From d383a4277b75f3305d78006bb8de673cd17d04a5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jan 2018 11:39:47 +0100 Subject: [PATCH 001/770] futex: Fix OWNER_DEAD fixup commit a97cb0e7b3f4c6297fd857055ae8e895f402f501 upstream. Both Geert and DaveJ reported that the recent futex commit: c1e2f0eaf015 ("futex: Avoid violating the 10th rule of futex") introduced a problem with setting OWNER_DEAD. We set the bit on an uninitialized variable and then entirely optimize it away as a dead-store. Move the setting of the bit to where it is more useful. Reported-by: Geert Uytterhoeven Reported-by: Dave Jones Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: c1e2f0eaf015 ("futex: Avoid violating the 10th rule of futex") Link: http://lkml.kernel.org/r/20180122103947.GD2228@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar Cc: Ozkan Sezer Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 52b3f4703158..046cd780d057 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2311,9 +2311,6 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); oldowner = pi_state->owner; - /* Owner died? */ - if (!pi_state->owner) - newtid |= FUTEX_OWNER_DIED; /* * We are here because either: @@ -2374,6 +2371,9 @@ retry: } newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; + /* Owner died? */ + if (!pi_state->owner) + newtid |= FUTEX_OWNER_DIED; if (get_futex_value_locked(&uval, uaddr)) goto handle_fault; From d5e06a1867210049bbfe27864ee0a40cfd9b1e9b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 5 Jan 2018 16:26:00 -0800 Subject: [PATCH 002/770] loop: fix concurrent lo_open/lo_release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ae6650163c66a7eff1acd6eb8b0f752dcfa8eba5 upstream. 范龙飞 reports that KASAN can report a use-after-free in __lock_acquire. The reason is due to insufficient serialization in lo_release(), which will continue to use the loop device even after it has decremented the lo_refcnt to zero. In the meantime, another process can come in, open the loop device again as it is being shut down. Confusion ensues. Reported-by: 范龙飞 Signed-off-by: Linus Torvalds Signed-off-by: Jens Axboe Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 85de67334695..a2a0dce5114e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1576,9 +1576,8 @@ out: return err; } -static void lo_release(struct gendisk *disk, fmode_t mode) +static void __lo_release(struct loop_device *lo) { - struct loop_device *lo = disk->private_data; int err; if (atomic_dec_return(&lo->lo_refcnt)) @@ -1605,6 +1604,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode) mutex_unlock(&lo->lo_ctl_mutex); } +static void lo_release(struct gendisk *disk, fmode_t mode) +{ + mutex_lock(&loop_index_mutex); + __lo_release(disk->private_data); + mutex_unlock(&loop_index_mutex); +} + static const struct block_device_operations lo_fops = { .owner = THIS_MODULE, .open = lo_open, From 00251aedef3195e912184a5798952a7793a77abe Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Mon, 6 Nov 2017 11:44:23 -0600 Subject: [PATCH 003/770] KVM: x86: Fix CPUID function for word 6 (80000001_ECX) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 50a671d4d15b859f447fa527191073019b6ce9cb upstream. The function for CPUID 80000001 ECX is set to 0xc0000001. Set it to 0x80000001. Signed-off-by: Janakarajan Natarajan Reviewed-by: Jim Mattson Reviewed-by: Krish Sadhukhan Reviewed-by: Borislav Petkov Fixes: d6321d493319 ("KVM: x86: generalize guest_cpuid_has_ helpers") Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index cdc70a3a6583..c2cea6651279 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -44,7 +44,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, - [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX}, + [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX}, [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, From 636124c8699be4f5f987ef7f95cfda1acaeed88c Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 21 Dec 2017 11:11:31 +1030 Subject: [PATCH 004/770] tools/gpio: Fix build error with musl libc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1696784eb7b52b13b62d160c028ef2c2c981d4f2 upstream. The GPIO tools build fails when using a buildroot toolchain that uses musl as it's C library: arm-broomstick-linux-musleabi-gcc -Wp,-MD,./.gpio-event-mon.o.d \ -Wp,-MT,gpio-event-mon.o -O2 -Wall -g -D_GNU_SOURCE \ -Iinclude -D"BUILD_STR(s)=#s" -c -o gpio-event-mon.o gpio-event-mon.c gpio-event-mon.c:30:6: error: unknown type name ‘u_int32_t’; did you mean ‘uint32_t’? u_int32_t handleflags, ^~~~~~~~~ uint32_t The glibc headers installed on my laptop include sys/types.h in unistd.h, but it appears that musl does not. Fixes: 97f69747d8b1 ("tools/gpio: add the gpio-event-mon tool") Signed-off-by: Joel Stanley Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- tools/gpio/gpio-event-mon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 1c14c2595158..4b36323ea64b 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -23,6 +23,7 @@ #include #include #include +#include #include int monitor_device(const char *device_name, From 460c5b9745b8b455be5591b1179e89a8bdcb1afd Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Fri, 12 Jan 2018 13:16:08 +0100 Subject: [PATCH 005/770] gpio: stmpe: i2c transfer are forbiden in atomic context commit b888fb6f2a278442933e3bfab70262e9a5365fb3 upstream. Move the workaround from stmpe_gpio_irq_unmask() which is executed in atomic context to stmpe_gpio_irq_sync_unlock() which is not. It fixes the following issue: [ 1.500000] BUG: scheduling while atomic: swapper/1/0x00000002 [ 1.500000] CPU: 0 PID: 1 Comm: swapper Not tainted 4.15.0-rc2-00020-gbd4301f-dirty #28 [ 1.520000] Hardware name: STM32 (Device Tree Support) [ 1.520000] [<0000bfc9>] (unwind_backtrace) from [<0000b347>] (show_stack+0xb/0xc) [ 1.530000] [<0000b347>] (show_stack) from [<0001fc49>] (__schedule_bug+0x39/0x58) [ 1.530000] [<0001fc49>] (__schedule_bug) from [<00168211>] (__schedule+0x23/0x2b2) [ 1.550000] [<00168211>] (__schedule) from [<001684f7>] (schedule+0x57/0x64) [ 1.550000] [<001684f7>] (schedule) from [<0016a513>] (schedule_timeout+0x137/0x164) [ 1.550000] [<0016a513>] (schedule_timeout) from [<00168b91>] (wait_for_common+0x8d/0xfc) [ 1.570000] [<00168b91>] (wait_for_common) from [<00139753>] (stm32f4_i2c_xfer+0xe9/0xfe) [ 1.580000] [<00139753>] (stm32f4_i2c_xfer) from [<00138545>] (__i2c_transfer+0x111/0x148) [ 1.590000] [<00138545>] (__i2c_transfer) from [<001385cf>] (i2c_transfer+0x53/0x70) [ 1.590000] [<001385cf>] (i2c_transfer) from [<001388a5>] (i2c_smbus_xfer+0x12f/0x36e) [ 1.600000] [<001388a5>] (i2c_smbus_xfer) from [<00138b49>] (i2c_smbus_read_byte_data+0x1f/0x2a) [ 1.610000] [<00138b49>] (i2c_smbus_read_byte_data) from [<00124fdd>] (__stmpe_reg_read+0xd/0x24) [ 1.620000] [<00124fdd>] (__stmpe_reg_read) from [<001252b3>] (stmpe_reg_read+0x19/0x24) [ 1.630000] [<001252b3>] (stmpe_reg_read) from [<0002c4d1>] (unmask_irq+0x17/0x22) [ 1.640000] [<0002c4d1>] (unmask_irq) from [<0002c57f>] (irq_startup+0x6f/0x78) [ 1.650000] [<0002c57f>] (irq_startup) from [<0002b7a1>] (__setup_irq+0x319/0x47c) [ 1.650000] [<0002b7a1>] (__setup_irq) from [<0002bad3>] (request_threaded_irq+0x6b/0xe8) [ 1.660000] [<0002bad3>] (request_threaded_irq) from [<0002d0b9>] (devm_request_threaded_irq+0x3b/0x6a) [ 1.670000] [<0002d0b9>] (devm_request_threaded_irq) from [<001446e7>] (mmc_gpiod_request_cd_irq+0x49/0x8a) [ 1.680000] [<001446e7>] (mmc_gpiod_request_cd_irq) from [<0013d45d>] (mmc_start_host+0x49/0x60) [ 1.690000] [<0013d45d>] (mmc_start_host) from [<0013e40b>] (mmc_add_host+0x3b/0x54) [ 1.700000] [<0013e40b>] (mmc_add_host) from [<00148119>] (mmci_probe+0x4d1/0x60c) [ 1.710000] [<00148119>] (mmci_probe) from [<000f903b>] (amba_probe+0x7b/0xbe) [ 1.720000] [<000f903b>] (amba_probe) from [<001170e5>] (driver_probe_device+0x169/0x1f8) [ 1.730000] [<001170e5>] (driver_probe_device) from [<001171b7>] (__driver_attach+0x43/0x5c) [ 1.740000] [<001171b7>] (__driver_attach) from [<0011618d>] (bus_for_each_dev+0x3d/0x46) [ 1.740000] [<0011618d>] (bus_for_each_dev) from [<001165cd>] (bus_add_driver+0xcd/0x124) [ 1.740000] [<001165cd>] (bus_add_driver) from [<00117713>] (driver_register+0x4d/0x7a) [ 1.760000] [<00117713>] (driver_register) from [<001fc765>] (do_one_initcall+0xbd/0xe8) [ 1.770000] [<001fc765>] (do_one_initcall) from [<001fc88b>] (kernel_init_freeable+0xfb/0x134) [ 1.780000] [<001fc88b>] (kernel_init_freeable) from [<00167ee3>] (kernel_init+0x7/0x9c) [ 1.790000] [<00167ee3>] (kernel_init) from [<00009b65>] (ret_from_fork+0x11/0x2c) Signed-off-by: Alexandre TORGUE Signed-off-by: Patrice Chotard Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-stmpe.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c index 16cbc5702865..491b0974c0fe 100644 --- a/drivers/gpio/gpio-stmpe.c +++ b/drivers/gpio/gpio-stmpe.c @@ -190,6 +190,16 @@ static void stmpe_gpio_irq_sync_unlock(struct irq_data *d) }; int i, j; + /* + * STMPE1600: to be able to get IRQ from pins, + * a read must be done on GPMR register, or a write in + * GPSR or GPCR registers + */ + if (stmpe->partnum == STMPE1600) { + stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_LSB]); + stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_CSB]); + } + for (i = 0; i < CACHE_NR_REGS; i++) { /* STMPE801 and STMPE1600 don't have RE and FE registers */ if ((stmpe->partnum == STMPE801 || @@ -227,21 +237,11 @@ static void stmpe_gpio_irq_unmask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct stmpe_gpio *stmpe_gpio = gpiochip_get_data(gc); - struct stmpe *stmpe = stmpe_gpio->stmpe; int offset = d->hwirq; int regoffset = offset / 8; int mask = BIT(offset % 8); stmpe_gpio->regs[REG_IE][regoffset] |= mask; - - /* - * STMPE1600 workaround: to be able to get IRQ from pins, - * a read must be done on GPMR register, or a write in - * GPSR or GPCR registers - */ - if (stmpe->partnum == STMPE1600) - stmpe_reg_read(stmpe, - stmpe->regs[STMPE_IDX_GPMR_LSB + regoffset]); } static void stmpe_dbg_show_one(struct seq_file *s, From c5845e025423b02235489f154903b8befabebb02 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 22 Jan 2018 13:19:28 +0100 Subject: [PATCH 006/770] gpio: Fix kernel stack leak to userspace commit 24bd3efc9d1efb5f756a7c6f807a36ddb6adc671 upstream. The GPIO event descriptor was leaking kernel stack to userspace because we don't zero the variable before use. Ooops. Fix this. Reported-by: Arnd Bergmann Reviewed-by: Bartosz Golaszewski Reviewed-by: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index eb80dac4e26a..bdd68ff197dc 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -723,6 +723,9 @@ static irqreturn_t lineevent_irq_thread(int irq, void *p) struct gpioevent_data ge; int ret, level; + /* Do not leak kernel stack to userspace */ + memset(&ge, 0, sizeof(ge)); + ge.timestamp = ktime_get_real_ns(); level = gpiod_get_value_cansleep(le->desc); From a5ba0b372da3cdc092ce0d98e5433e44aefab3f7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 19 Jan 2018 14:18:34 +0100 Subject: [PATCH 007/770] ALSA: hda - Reduce the suspend time consumption for ALC256 commit 1c9609e3a8cf5997bd35205cfda1ff2218ee793b upstream. ALC256 has its own quirk to override the shutup call, and it contains the COEF update for pulling down the headset jack control. Currently, the COEF update is called after clearing the headphone pin, and this seems triggering a stall of the codec communication, and results in a long delay over a second at suspend. A quick resolution is to swap the calls: at first with the COEF update, then clear the headphone pin. Fixes: 4a219ef8f370 ("ALSA: hda/realtek - Add ALC256 HP depop function") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=198503 Reported-by: Paul Menzel Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 145e92d6ca94..b2d039537d5e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3131,11 +3131,13 @@ static void alc256_shutup(struct hda_codec *codec) if (hp_pin_sense) msleep(85); + /* 3k pull low control for Headset jack. */ + /* NOTE: call this before clearing the pin, otherwise codec stalls */ + alc_update_coef_idx(codec, 0x46, 0, 3 << 12); + snd_hda_codec_write(codec, hp_pin, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); - alc_update_coef_idx(codec, 0x46, 0, 3 << 12); /* 3k pull low control for Headset jack. */ - if (hp_pin_sense) msleep(100); From 2992182765762c2d5bfa70ff2da5f1841e7aba89 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 26 Nov 2017 00:16:46 +0100 Subject: [PATCH 008/770] crypto: ecdh - fix typo in KPP dependency of CRYPTO_ECDH commit b5b9007730ce1d90deaf25d7f678511550744bdc upstream. This fixes a typo in the CRYPTO_KPP dependency of CRYPTO_ECDH. Fixes: 3c4b23901a0c ("crypto: ecdh - Add ECDH software support") Signed-off-by: Hauke Mehrtens Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index ac5fb37e6f4b..42212b60a0ee 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -130,7 +130,7 @@ config CRYPTO_DH config CRYPTO_ECDH tristate "ECDH algorithm" - select CRYTPO_KPP + select CRYPTO_KPP select CRYPTO_RNG_DEFAULT help Generic implementation of the ECDH algorithm From 6b3dcff7ca7e05c784ece15cc8d9c0bd913fbda4 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Thu, 18 Jan 2018 20:41:09 +0100 Subject: [PATCH 009/770] crypto: aesni - handle zero length dst buffer commit 9c674e1e2f9e24fa4392167efe343749008338e0 upstream. GCM can be invoked with a zero destination buffer. This is possible if the AAD and the ciphertext have zero lengths and only the tag exists in the source buffer (i.e. a source buffer cannot be zero). In this case, the GCM cipher only performs the authentication and no decryption operation. When the destination buffer has zero length, it is possible that no page is mapped to the SG pointing to the destination. In this case, sg_page(req->dst) is an invalid access. Therefore, page accesses should only be allowed if the req->dst->length is non-zero which is the indicator that a page must exist. This fixes a crash that can be triggered by user space via AF_ALG. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/aesni-intel_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 5c15d6b57329..fe2c7c16fd37 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -823,7 +823,7 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, if (sg_is_last(req->src) && (!PageHighMem(sg_page(req->src)) || req->src->offset + req->src->length <= PAGE_SIZE) && - sg_is_last(req->dst) && + sg_is_last(req->dst) && req->dst->length && (!PageHighMem(sg_page(req->dst)) || req->dst->offset + req->dst->length <= PAGE_SIZE)) { one_entry_in_sg = 1; From e50f1d7c0d0747378aa49cf5ef4863dcebd6a485 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 13 Dec 2017 14:53:43 +0100 Subject: [PATCH 010/770] crypto: aesni - fix typo in generic_gcmaes_decrypt commit 106840c41096a01079d3a2025225029c13713802 upstream. generic_gcmaes_decrypt needs to use generic_gcmaes_ctx, not aesni_rfc4106_gcm_ctx. This is actually harmless because the fields in struct generic_gcmaes_ctx share the layout of the same fields in aesni_rfc4106_gcm_ctx. Fixes: cce2ea8d90fe ("crypto: aesni - add generic gcm(aes)") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/aesni-intel_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index fe2c7c16fd37..f54f4730f6b6 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1115,7 +1115,7 @@ static int generic_gcmaes_decrypt(struct aead_request *req) { __be32 counter = cpu_to_be32(1); struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm); void *aes_ctx = &(ctx->aes_key_expanded); u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); From cffaf2b6b179227b31f8e21d7f65b68630fc6606 Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Tue, 22 Aug 2017 10:08:08 +0200 Subject: [PATCH 011/770] crypto: gcm - add GCM IV size constant commit ef780324592dd639e4bfbc5b9bf8934b234b7c99 upstream. Many GCM users use directly GCM IV size instead of using some constant. This patch add all IV size constant used by GCM. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- include/crypto/gcm.h | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 include/crypto/gcm.h diff --git a/include/crypto/gcm.h b/include/crypto/gcm.h new file mode 100644 index 000000000000..c50e057ea17e --- /dev/null +++ b/include/crypto/gcm.h @@ -0,0 +1,8 @@ +#ifndef _CRYPTO_GCM_H +#define _CRYPTO_GCM_H + +#define GCM_AES_IV_SIZE 12 +#define GCM_RFC4106_IV_SIZE 8 +#define GCM_RFC4543_IV_SIZE 8 + +#endif From e704e550cb7bfcd86044f875abce2e7703139ba9 Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Tue, 22 Aug 2017 10:08:18 +0200 Subject: [PATCH 012/770] crypto: aesni - Use GCM IV size constant commit 46d93748e5a3628f9f553832cd64d8a59d8bafde upstream. This patch replace GCM IV size value by their constant name. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/aesni-intel_glue.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index f54f4730f6b6..7ff4c3b3322b 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -1131,7 +1132,7 @@ static struct aead_alg aesni_aead_algs[] = { { .setauthsize = common_rfc4106_set_authsize, .encrypt = helper_rfc4106_encrypt, .decrypt = helper_rfc4106_decrypt, - .ivsize = 8, + .ivsize = GCM_RFC4106_IV_SIZE, .maxauthsize = 16, .base = { .cra_name = "__gcm-aes-aesni", @@ -1149,7 +1150,7 @@ static struct aead_alg aesni_aead_algs[] = { { .setauthsize = rfc4106_set_authsize, .encrypt = rfc4106_encrypt, .decrypt = rfc4106_decrypt, - .ivsize = 8, + .ivsize = GCM_RFC4106_IV_SIZE, .maxauthsize = 16, .base = { .cra_name = "rfc4106(gcm(aes))", @@ -1165,7 +1166,7 @@ static struct aead_alg aesni_aead_algs[] = { { .setauthsize = generic_gcmaes_set_authsize, .encrypt = generic_gcmaes_encrypt, .decrypt = generic_gcmaes_decrypt, - .ivsize = 12, + .ivsize = GCM_AES_IV_SIZE, .maxauthsize = 16, .base = { .cra_name = "gcm(aes)", From 265502fe9ab62a92d06cc7aa518d349fd60bdba1 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 13 Dec 2017 14:54:36 +0100 Subject: [PATCH 013/770] crypto: aesni - add wrapper for generic gcm(aes) commit fc8517bf627c9b834f80274a1bc9ecd39b27231b upstream. When I added generic-gcm-aes I didn't add a wrapper like the one provided for rfc4106(gcm(aes)). We need to add a cryptd wrapper to fall back on in case the FPU is not available, otherwise we might corrupt the FPU state. Fixes: cce2ea8d90fe ("crypto: aesni - add generic gcm(aes)") Reported-by: Ilya Lesokhin Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/aesni-intel_glue.c | 66 ++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 12 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 7ff4c3b3322b..c690ddc78c03 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -690,8 +690,8 @@ static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); } -static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, - unsigned int key_len) +static int gcmaes_wrapper_set_key(struct crypto_aead *parent, const u8 *key, + unsigned int key_len) { struct cryptd_aead **ctx = crypto_aead_ctx(parent); struct cryptd_aead *cryptd_tfm = *ctx; @@ -716,8 +716,8 @@ static int common_rfc4106_set_authsize(struct crypto_aead *aead, /* This is the Integrity Check Value (aka the authentication tag length and can * be 8, 12 or 16 bytes long. */ -static int rfc4106_set_authsize(struct crypto_aead *parent, - unsigned int authsize) +static int gcmaes_wrapper_set_authsize(struct crypto_aead *parent, + unsigned int authsize) { struct cryptd_aead **ctx = crypto_aead_ctx(parent); struct cryptd_aead *cryptd_tfm = *ctx; @@ -929,7 +929,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req) aes_ctx); } -static int rfc4106_encrypt(struct aead_request *req) +static int gcmaes_wrapper_encrypt(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cryptd_aead **ctx = crypto_aead_ctx(tfm); @@ -945,7 +945,7 @@ static int rfc4106_encrypt(struct aead_request *req) return crypto_aead_encrypt(req); } -static int rfc4106_decrypt(struct aead_request *req) +static int gcmaes_wrapper_decrypt(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cryptd_aead **ctx = crypto_aead_ctx(tfm); @@ -1127,6 +1127,30 @@ static int generic_gcmaes_decrypt(struct aead_request *req) aes_ctx); } +static int generic_gcmaes_init(struct crypto_aead *aead) +{ + struct cryptd_aead *cryptd_tfm; + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + + cryptd_tfm = cryptd_alloc_aead("__driver-generic-gcm-aes-aesni", + CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + *ctx = cryptd_tfm; + crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); + + return 0; +} + +static void generic_gcmaes_exit(struct crypto_aead *aead) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + + cryptd_free_aead(*ctx); +} + static struct aead_alg aesni_aead_algs[] = { { .setkey = common_rfc4106_set_key, .setauthsize = common_rfc4106_set_authsize, @@ -1146,10 +1170,10 @@ static struct aead_alg aesni_aead_algs[] = { { }, { .init = rfc4106_init, .exit = rfc4106_exit, - .setkey = rfc4106_set_key, - .setauthsize = rfc4106_set_authsize, - .encrypt = rfc4106_encrypt, - .decrypt = rfc4106_decrypt, + .setkey = gcmaes_wrapper_set_key, + .setauthsize = gcmaes_wrapper_set_authsize, + .encrypt = gcmaes_wrapper_encrypt, + .decrypt = gcmaes_wrapper_decrypt, .ivsize = GCM_RFC4106_IV_SIZE, .maxauthsize = 16, .base = { @@ -1168,14 +1192,32 @@ static struct aead_alg aesni_aead_algs[] = { { .decrypt = generic_gcmaes_decrypt, .ivsize = GCM_AES_IV_SIZE, .maxauthsize = 16, + .base = { + .cra_name = "__generic-gcm-aes-aesni", + .cra_driver_name = "__driver-generic-gcm-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct generic_gcmaes_ctx), + .cra_alignmask = AESNI_ALIGN - 1, + .cra_module = THIS_MODULE, + }, +}, { + .init = generic_gcmaes_init, + .exit = generic_gcmaes_exit, + .setkey = gcmaes_wrapper_set_key, + .setauthsize = gcmaes_wrapper_set_authsize, + .encrypt = gcmaes_wrapper_encrypt, + .decrypt = gcmaes_wrapper_decrypt, + .ivsize = GCM_AES_IV_SIZE, + .maxauthsize = 16, .base = { .cra_name = "gcm(aes)", .cra_driver_name = "generic-gcm-aesni", .cra_priority = 400, .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct generic_gcmaes_ctx), - .cra_alignmask = AESNI_ALIGN - 1, + .cra_ctxsize = sizeof(struct cryptd_aead *), .cra_module = THIS_MODULE, }, } }; From f898a39985a30050abdad7a40b5b0098801509d4 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Wed, 20 Dec 2017 17:08:37 -0800 Subject: [PATCH 014/770] crypto: aesni - Fix out-of-bounds access of the data buffer in generic-gcm-aesni commit b20209c91e23a9bbad9cac2f80bc16b3c259e10e upstream. The aesni_gcm_enc/dec functions can access memory before the start of the data buffer if the length of the data buffer is less than 16 bytes. This is because they perform the read via a single 16-byte load. This can potentially result in accessing a page that is not mapped and thus causing the machine to crash. This patch fixes that by reading the partial block byte-by-byte and optionally an via 8-byte load if the block was at least 8 bytes. Fixes: 0487ccac ("crypto: aesni - make non-AVX AES-GCM work with any aadlen") Signed-off-by: Junaid Shahid Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/aesni-intel_asm.S | 87 ++++++++++++++++--------------- 1 file changed, 45 insertions(+), 42 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 3d09e3aca18d..63ba858b1a77 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -257,6 +257,37 @@ aad_shift_arr: pxor \TMP1, \GH # result is in TMP1 .endm +# Reads DLEN bytes starting at DPTR and stores in XMMDst +# where 0 < DLEN < 16 +# Clobbers %rax, DLEN and XMM1 +.macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst + cmp $8, \DLEN + jl _read_lt8_\@ + mov (\DPTR), %rax + MOVQ_R64_XMM %rax, \XMMDst + sub $8, \DLEN + jz _done_read_partial_block_\@ + xor %eax, %eax +_read_next_byte_\@: + shl $8, %rax + mov 7(\DPTR, \DLEN, 1), %al + dec \DLEN + jnz _read_next_byte_\@ + MOVQ_R64_XMM %rax, \XMM1 + pslldq $8, \XMM1 + por \XMM1, \XMMDst + jmp _done_read_partial_block_\@ +_read_lt8_\@: + xor %eax, %eax +_read_next_byte_lt8_\@: + shl $8, %rax + mov -1(\DPTR, \DLEN, 1), %al + dec \DLEN + jnz _read_next_byte_lt8_\@ + MOVQ_R64_XMM %rax, \XMMDst +_done_read_partial_block_\@: +.endm + /* * if a = number of total plaintext bytes * b = floor(a/16) @@ -1386,14 +1417,6 @@ _esb_loop_\@: * * AAD Format with 64-bit Extended Sequence Number * -* aadLen: -* from the definition of the spec, aadLen can only be 8 or 12 bytes. -* The code supports 16 too but for other sizes, the code will fail. -* -* TLen: -* from the definition of the spec, TLen can only be 8, 12 or 16 bytes. -* For other sizes, the code will fail. -* * poly = x^128 + x^127 + x^126 + x^121 + 1 * *****************************************************************************/ @@ -1487,19 +1510,16 @@ _zero_cipher_left_decrypt: PSHUFB_XMM %xmm10, %xmm0 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) - sub $16, %r11 - add %r13, %r11 - movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte block - lea SHIFT_MASK+16(%rip), %r12 - sub %r13, %r12 -# adjust the shuffle mask pointer to be able to shift 16-%r13 bytes -# (%r13 is the number of bytes in plaintext mod 16) - movdqu (%r12), %xmm2 # get the appropriate shuffle mask - PSHUFB_XMM %xmm2, %xmm1 # right shift 16-%r13 butes + lea (%arg3,%r11,1), %r10 + mov %r13, %r12 + READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 + + lea ALL_F+16(%rip), %r12 + sub %r13, %r12 movdqa %xmm1, %xmm2 pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) - movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + movdqu (%r12), %xmm1 # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 pand %xmm1, %xmm2 @@ -1508,9 +1528,6 @@ _zero_cipher_left_decrypt: pxor %xmm2, %xmm8 GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # GHASH computation for the last <16 byte block - sub %r13, %r11 - add $16, %r11 # output %r13 bytes MOVQ_R64_XMM %xmm0, %rax @@ -1664,14 +1681,6 @@ ENDPROC(aesni_gcm_dec) * * AAD Format with 64-bit Extended Sequence Number * -* aadLen: -* from the definition of the spec, aadLen can only be 8 or 12 bytes. -* The code supports 16 too but for other sizes, the code will fail. -* -* TLen: -* from the definition of the spec, TLen can only be 8, 12 or 16 bytes. -* For other sizes, the code will fail. -* * poly = x^128 + x^127 + x^126 + x^121 + 1 ***************************************************************************/ ENTRY(aesni_gcm_enc) @@ -1764,19 +1773,16 @@ _zero_cipher_left_encrypt: movdqa SHUF_MASK(%rip), %xmm10 PSHUFB_XMM %xmm10, %xmm0 - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) - sub $16, %r11 - add %r13, %r11 - movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte blocks - lea SHIFT_MASK+16(%rip), %r12 + + lea (%arg3,%r11,1), %r10 + mov %r13, %r12 + READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 + + lea ALL_F+16(%rip), %r12 sub %r13, %r12 - # adjust the shuffle mask pointer to be able to shift 16-r13 bytes - # (%r13 is the number of bytes in plaintext mod 16) - movdqu (%r12), %xmm2 # get the appropriate shuffle mask - PSHUFB_XMM %xmm2, %xmm1 # shift right 16-r13 byte pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) - movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + movdqu (%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm0 pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 movdqa SHUF_MASK(%rip), %xmm10 @@ -1785,9 +1791,6 @@ _zero_cipher_left_encrypt: pxor %xmm0, %xmm8 GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 # GHASH computation for the last <16 byte block - sub %r13, %r11 - add $16, %r11 - movdqa SHUF_MASK(%rip), %xmm10 PSHUFB_XMM %xmm10, %xmm0 From 5ca02df0160233e463b043897cd17e32d38cb70e Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Wed, 20 Dec 2017 17:08:38 -0800 Subject: [PATCH 015/770] crypto: aesni - Fix out-of-bounds access of the AAD buffer in generic-gcm-aesni commit 1ecdd37e308ca149dc378cce225068cbac54e3a6 upstream. The aesni_gcm_enc/dec functions can access memory after the end of the AAD buffer if the AAD length is not a multiple of 4 bytes. It didn't matter with rfc4106-gcm-aesni as in that case the AAD was always followed by the 8 byte IV, but that is no longer the case with generic-gcm-aesni. This can potentially result in accessing a page that is not mapped and thus causing the machine to crash. This patch fixes that by reading the last <16 byte block of the AAD byte-by-byte and optionally via an 8-byte load if the block was at least 8 bytes. Fixes: 0487ccac ("crypto: aesni - make non-AVX AES-GCM work with any aadlen") Signed-off-by: Junaid Shahid Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/aesni-intel_asm.S | 112 ++++-------------------------- 1 file changed, 12 insertions(+), 100 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 63ba858b1a77..12e8484a8ee7 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -90,30 +90,6 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 ALL_F: .octa 0xffffffffffffffffffffffffffffffff .octa 0x00000000000000000000000000000000 -.section .rodata -.align 16 -.type aad_shift_arr, @object -.size aad_shift_arr, 272 -aad_shift_arr: - .octa 0xffffffffffffffffffffffffffffffff - .octa 0xffffffffffffffffffffffffffffff0C - .octa 0xffffffffffffffffffffffffffff0D0C - .octa 0xffffffffffffffffffffffffff0E0D0C - .octa 0xffffffffffffffffffffffff0F0E0D0C - .octa 0xffffffffffffffffffffff0C0B0A0908 - .octa 0xffffffffffffffffffff0D0C0B0A0908 - .octa 0xffffffffffffffffff0E0D0C0B0A0908 - .octa 0xffffffffffffffff0F0E0D0C0B0A0908 - .octa 0xffffffffffffff0C0B0A090807060504 - .octa 0xffffffffffff0D0C0B0A090807060504 - .octa 0xffffffffff0E0D0C0B0A090807060504 - .octa 0xffffffff0F0E0D0C0B0A090807060504 - .octa 0xffffff0C0B0A09080706050403020100 - .octa 0xffff0D0C0B0A09080706050403020100 - .octa 0xff0E0D0C0B0A09080706050403020100 - .octa 0x0F0E0D0C0B0A09080706050403020100 - - .text @@ -304,62 +280,30 @@ _done_read_partial_block_\@: XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation MOVADQ SHUF_MASK(%rip), %xmm14 mov arg7, %r10 # %r10 = AAD - mov arg8, %r12 # %r12 = aadLen - mov %r12, %r11 + mov arg8, %r11 # %r11 = aadLen pxor %xmm\i, %xmm\i pxor \XMM2, \XMM2 cmp $16, %r11 - jl _get_AAD_rest8\num_initial_blocks\operation + jl _get_AAD_rest\num_initial_blocks\operation _get_AAD_blocks\num_initial_blocks\operation: movdqu (%r10), %xmm\i PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data pxor %xmm\i, \XMM2 GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 add $16, %r10 - sub $16, %r12 sub $16, %r11 cmp $16, %r11 jge _get_AAD_blocks\num_initial_blocks\operation movdqu \XMM2, %xmm\i + + /* read the last <16B of AAD */ +_get_AAD_rest\num_initial_blocks\operation: cmp $0, %r11 je _get_AAD_done\num_initial_blocks\operation - pxor %xmm\i,%xmm\i - - /* read the last <16B of AAD. since we have at least 4B of - data right after the AAD (the ICV, and maybe some CT), we can - read 4B/8B blocks safely, and then get rid of the extra stuff */ -_get_AAD_rest8\num_initial_blocks\operation: - cmp $4, %r11 - jle _get_AAD_rest4\num_initial_blocks\operation - movq (%r10), \TMP1 - add $8, %r10 - sub $8, %r11 - pslldq $8, \TMP1 - psrldq $8, %xmm\i - pxor \TMP1, %xmm\i - jmp _get_AAD_rest8\num_initial_blocks\operation -_get_AAD_rest4\num_initial_blocks\operation: - cmp $0, %r11 - jle _get_AAD_rest0\num_initial_blocks\operation - mov (%r10), %eax - movq %rax, \TMP1 - add $4, %r10 - sub $4, %r10 - pslldq $12, \TMP1 - psrldq $4, %xmm\i - pxor \TMP1, %xmm\i -_get_AAD_rest0\num_initial_blocks\operation: - /* finalize: shift out the extra bytes we read, and align - left. since pslldq can only shift by an immediate, we use - vpshufb and an array of shuffle masks */ - movq %r12, %r11 - salq $4, %r11 - movdqu aad_shift_arr(%r11), \TMP1 - PSHUFB_XMM \TMP1, %xmm\i -_get_AAD_rest_final\num_initial_blocks\operation: + READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data pxor \XMM2, %xmm\i GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 @@ -563,62 +507,30 @@ _initial_blocks_done\num_initial_blocks\operation: XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation MOVADQ SHUF_MASK(%rip), %xmm14 mov arg7, %r10 # %r10 = AAD - mov arg8, %r12 # %r12 = aadLen - mov %r12, %r11 + mov arg8, %r11 # %r11 = aadLen pxor %xmm\i, %xmm\i pxor \XMM2, \XMM2 cmp $16, %r11 - jl _get_AAD_rest8\num_initial_blocks\operation + jl _get_AAD_rest\num_initial_blocks\operation _get_AAD_blocks\num_initial_blocks\operation: movdqu (%r10), %xmm\i PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data pxor %xmm\i, \XMM2 GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 add $16, %r10 - sub $16, %r12 sub $16, %r11 cmp $16, %r11 jge _get_AAD_blocks\num_initial_blocks\operation movdqu \XMM2, %xmm\i + + /* read the last <16B of AAD */ +_get_AAD_rest\num_initial_blocks\operation: cmp $0, %r11 je _get_AAD_done\num_initial_blocks\operation - pxor %xmm\i,%xmm\i - - /* read the last <16B of AAD. since we have at least 4B of - data right after the AAD (the ICV, and maybe some PT), we can - read 4B/8B blocks safely, and then get rid of the extra stuff */ -_get_AAD_rest8\num_initial_blocks\operation: - cmp $4, %r11 - jle _get_AAD_rest4\num_initial_blocks\operation - movq (%r10), \TMP1 - add $8, %r10 - sub $8, %r11 - pslldq $8, \TMP1 - psrldq $8, %xmm\i - pxor \TMP1, %xmm\i - jmp _get_AAD_rest8\num_initial_blocks\operation -_get_AAD_rest4\num_initial_blocks\operation: - cmp $0, %r11 - jle _get_AAD_rest0\num_initial_blocks\operation - mov (%r10), %eax - movq %rax, \TMP1 - add $4, %r10 - sub $4, %r10 - pslldq $12, \TMP1 - psrldq $4, %xmm\i - pxor \TMP1, %xmm\i -_get_AAD_rest0\num_initial_blocks\operation: - /* finalize: shift out the extra bytes we read, and align - left. since pslldq can only shift by an immediate, we use - vpshufb and an array of shuffle masks */ - movq %r12, %r11 - salq $4, %r11 - movdqu aad_shift_arr(%r11), \TMP1 - PSHUFB_XMM \TMP1, %xmm\i -_get_AAD_rest_final\num_initial_blocks\operation: + READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data pxor \XMM2, %xmm\i GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 From bb9eec7b301eeb178fc1305708ae8902fa507953 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 26 Dec 2017 17:21:17 +0100 Subject: [PATCH 016/770] crypto: inside-secure - fix hash when length is a multiple of a block commit 809778e02cd45d0625439fee67688f655627bb3c upstream. This patch fixes the hash support in the SafeXcel driver when the update size is a multiple of a block size, and when a final call is made just after with a size of 0. In such cases the driver should cache the last block from the update to avoid handling 0 length data on the final call (that's a hardware limitation). Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver") Signed-off-by: Antoine Tenart Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/inside-secure/safexcel_hash.c | 34 ++++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 3980f946874f..ebf1388e1e10 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -185,17 +185,31 @@ static int safexcel_ahash_send(struct crypto_async_request *async, int ring, else cache_len = queued - areq->nbytes; - /* - * If this is not the last request and the queued data does not fit - * into full blocks, cache it for the next send() call. - */ - extra = queued & (crypto_ahash_blocksize(ahash) - 1); - if (!req->last_req && extra) { - sg_pcopy_to_buffer(areq->src, sg_nents(areq->src), - req->cache_next, extra, areq->nbytes - extra); + if (!req->last_req) { + /* If this is not the last request and the queued data does not + * fit into full blocks, cache it for the next send() call. + */ + extra = queued & (crypto_ahash_blocksize(ahash) - 1); + if (!extra) + /* If this is not the last request and the queued data + * is a multiple of a block, cache the last one for now. + */ + extra = queued - crypto_ahash_blocksize(ahash); - queued -= extra; - len -= extra; + if (extra) { + sg_pcopy_to_buffer(areq->src, sg_nents(areq->src), + req->cache_next, extra, + areq->nbytes - extra); + + queued -= extra; + len -= extra; + + if (!queued) { + *commands = 0; + *results = 0; + return 0; + } + } } spin_lock_bh(&priv->ring[ring].egress_lock); From 199d97815d32acd0f1d20cbd04e39e3dead59a8b Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 26 Dec 2017 17:21:16 +0100 Subject: [PATCH 017/770] crypto: inside-secure - avoid unmapping DMA memory that was not mapped commit c957f8b3e2e54b29f53ef69decc87bbc858c9b58 upstream. This patch adds a parameter in the SafeXcel ahash request structure to keep track of the number of SG entries mapped. This allows not to call dma_unmap_sg() when dma_map_sg() wasn't called in the first place. This also removes a warning when the debugging of the DMA-API is enabled in the kernel configuration: "DMA-API: device driver tries to free DMA memory it has not allocated". Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver") Signed-off-by: Antoine Tenart Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/inside-secure/safexcel_hash.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index ebf1388e1e10..0626b33d2886 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -33,6 +33,8 @@ struct safexcel_ahash_req { bool finish; bool hmac; + int nents; + u8 state_sz; /* expected sate size, only set once */ u32 state[SHA256_DIGEST_SIZE / sizeof(u32)]; @@ -151,8 +153,10 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, result_sz = crypto_ahash_digestsize(ahash); memcpy(sreq->state, areq->result, result_sz); - dma_unmap_sg(priv->dev, areq->src, - sg_nents_for_len(areq->src, areq->nbytes), DMA_TO_DEVICE); + if (sreq->nents) { + dma_unmap_sg(priv->dev, areq->src, sreq->nents, DMA_TO_DEVICE); + sreq->nents = 0; + } safexcel_free_context(priv, async, sreq->state_sz); @@ -177,7 +181,7 @@ static int safexcel_ahash_send(struct crypto_async_request *async, int ring, struct safexcel_command_desc *cdesc, *first_cdesc = NULL; struct safexcel_result_desc *rdesc; struct scatterlist *sg; - int i, nents, queued, len, cache_len, extra, n_cdesc = 0, ret = 0; + int i, queued, len, cache_len, extra, n_cdesc = 0, ret = 0; queued = len = req->len - req->processed; if (queued < crypto_ahash_blocksize(ahash)) @@ -247,15 +251,15 @@ static int safexcel_ahash_send(struct crypto_async_request *async, int ring, } /* Now handle the current ahash request buffer(s) */ - nents = dma_map_sg(priv->dev, areq->src, - sg_nents_for_len(areq->src, areq->nbytes), - DMA_TO_DEVICE); - if (!nents) { + req->nents = dma_map_sg(priv->dev, areq->src, + sg_nents_for_len(areq->src, areq->nbytes), + DMA_TO_DEVICE); + if (!req->nents) { ret = -ENOMEM; goto cdesc_rollback; } - for_each_sg(areq->src, sg, nents, i) { + for_each_sg(areq->src, sg, req->nents, i) { int sglen = sg_dma_len(sg); /* Do not overflow the request */ From b9788e278cec29fef2e837633fa1b97030417e78 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 Jan 2018 12:04:33 +0000 Subject: [PATCH 018/770] crypto: sha3-generic - fixes for alignment and big endian operation commit c013cee99d5a18aec8c71fee8f5f41369cd12595 upstream. Ensure that the input is byte swabbed before injecting it into the SHA3 transform. Use the get_unaligned() accessor for this so that we don't perform unaligned access inadvertently on architectures that do not support that. Fixes: 53964b9ee63b7075 ("crypto: sha3 - Add SHA-3 hash algorithm") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/sha3_generic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c index 7e8ed96236ce..a68be626017c 100644 --- a/crypto/sha3_generic.c +++ b/crypto/sha3_generic.c @@ -18,6 +18,7 @@ #include #include #include +#include #define KECCAK_ROUNDS 24 @@ -149,7 +150,7 @@ static int sha3_update(struct shash_desc *desc, const u8 *data, unsigned int i; for (i = 0; i < sctx->rsizw; i++) - sctx->st[i] ^= ((u64 *) src)[i]; + sctx->st[i] ^= get_unaligned_le64(src + 8 * i); keccakf(sctx->st); done += sctx->rsiz; @@ -174,7 +175,7 @@ static int sha3_final(struct shash_desc *desc, u8 *out) sctx->buf[sctx->rsiz - 1] |= 0x80; for (i = 0; i < sctx->rsizw; i++) - sctx->st[i] ^= ((u64 *) sctx->buf)[i]; + sctx->st[i] ^= get_unaligned_le64(sctx->buf + 8 * i); keccakf(sctx->st); From f41c8a003196df125b32ed8d78a66f9ea22ccce4 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Tue, 2 Jan 2018 08:55:25 +0100 Subject: [PATCH 019/770] crypto: af_alg - whitelist mask and type commit bb30b8848c85e18ca7e371d0a869e94b3e383bdf upstream. The user space interface allows specifying the type and mask field used to allocate the cipher. Only a subset of the possible flags are intended for user space. Therefore, white-list the allowed flags. In case the user space caller uses at least one non-allowed flag, EINVAL is returned. Reported-by: syzbot Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/af_alg.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 6ec360213107..53b7fa4cf4ab 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -150,7 +150,7 @@ EXPORT_SYMBOL_GPL(af_alg_release_parent); static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { - const u32 forbidden = CRYPTO_ALG_INTERNAL; + const u32 allowed = CRYPTO_ALG_KERN_DRIVER_ONLY; struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct sockaddr_alg *sa = (void *)uaddr; @@ -158,6 +158,10 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) void *private; int err; + /* If caller uses non-allowed flag, return error. */ + if ((sa->salg_feat & ~allowed) || (sa->salg_mask & ~allowed)) + return -EINVAL; + if (sock->state == SS_CONNECTED) return -EINVAL; @@ -176,9 +180,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (IS_ERR(type)) return PTR_ERR(type); - private = type->bind(sa->salg_name, - sa->salg_feat & ~forbidden, - sa->salg_mask & ~forbidden); + private = type->bind(sa->salg_name, sa->salg_feat, sa->salg_mask); if (IS_ERR(private)) { module_put(type->owner); return PTR_ERR(private); From e3f7e6f2e03396df38491a484c6381e9301ff0f6 Mon Sep 17 00:00:00 2001 From: Aaron Armstrong Skomra Date: Thu, 7 Dec 2017 12:31:56 -0800 Subject: [PATCH 020/770] HID: wacom: EKR: ensure devres groups at higher indexes are released commit 791ae273731fa85d3332e45064dab177ae663e80 upstream. Background: ExpressKey Remotes communicate their events via usb dongle. Each dongle can hold up to 5 pairings at one time and one EKR (identified by its serial number) can unfortunately be paired with its dongle more than once. The pairing takes place in a round-robin fashion. Input devices are only created once per EKR, when a new serial number is seen in the list of pairings. However, if a device is created for a "higher" paring index and subsequently a second pairing occurs at a lower pairing index, unpairing the remote with that serial number from any pairing index will currently cause a driver crash. This occurs infrequently, as two remotes are necessary to trigger this bug and most users have only one remote. As an illustration, to trigger the bug you need to have two remotes, and pair them in this order: 1. slot 0 -> remote 1 (input device created for remote 1) 2. slot 1 -> remote 1 (duplicate pairing - no device created) 3. slot 2 -> remote 1 (duplicate pairing - no device created) 4. slot 3 -> remote 1 (duplicate pairing - no device created) 5. slot 4 -> remote 2 (input device created for remote 2) 6. slot 0 -> remote 2 (1 destroyed and recreated at slot 1) 7. slot 1 -> remote 2 (1 destroyed and recreated at slot 2) 8. slot 2 -> remote 2 (1 destroyed and recreated at slot 3) 9. slot 3 -> remote 2 (1 destroyed and not recreated) 10. slot 4 -> remote 2 (2 was already in this slot so no changes) 11. slot 0 -> remote 1 (The current code sees remote 2 was paired over in one of the dongle slots it occupied and attempts to remove all information about remote 2 [1]. It calls wacom_remote_destroy_one for remote 2, but the destroy function assumes the lowest index is where the remote's input device was created. The code "cleans up" the other remote 2 pairings including the one which the input device was based on, assuming they were were just duplicate pairings. However, the cleanup doesn't call the devres release function for the input device that was created in slot 4). This issue is fixed by this commit. [1] Remote 2 should subsequently be re-created on the next packet from the EKR at the lowest numbered slot that it occupies (here slot 1). Fixes: f9036bd43602 ("HID: wacom: EKR: use devres groups to manage resources") Signed-off-by: Aaron Armstrong Skomra Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_sys.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 906e654fb0ba..65f1cfbbe7fe 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2340,23 +2340,23 @@ static void wacom_remote_destroy_one(struct wacom *wacom, unsigned int index) int i; unsigned long flags; - spin_lock_irqsave(&remote->remote_lock, flags); - remote->remotes[index].registered = false; - spin_unlock_irqrestore(&remote->remote_lock, flags); - - if (remote->remotes[index].battery.battery) - devres_release_group(&wacom->hdev->dev, - &remote->remotes[index].battery.bat_desc); - - if (remote->remotes[index].group.name) - devres_release_group(&wacom->hdev->dev, - &remote->remotes[index]); - for (i = 0; i < WACOM_MAX_REMOTES; i++) { if (remote->remotes[i].serial == serial) { + + spin_lock_irqsave(&remote->remote_lock, flags); + remote->remotes[i].registered = false; + spin_unlock_irqrestore(&remote->remote_lock, flags); + + if (remote->remotes[i].battery.battery) + devres_release_group(&wacom->hdev->dev, + &remote->remotes[i].battery.bat_desc); + + if (remote->remotes[i].group.name) + devres_release_group(&wacom->hdev->dev, + &remote->remotes[i]); + remote->remotes[i].serial = 0; remote->remotes[i].group.name = NULL; - remote->remotes[i].registered = false; remote->remotes[i].battery.battery = NULL; wacom->led.groups[i].select = WACOM_STATUS_UNKNOWN; } From c76133afb9033e47a4b944f69a6d8f81450a9983 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Tue, 26 Dec 2017 14:53:55 -0800 Subject: [PATCH 021/770] HID: wacom: Fix reporting of touch toggle (WACOM_HID_WD_MUTE_DEVICE) events commit 403c0f681c1964ff1db8c2fb8de8c4067779d081 upstream. Touch toggle softkeys send a '1' while pressed and a '0' while released, requring the kernel to keep track of wether touch should be enabled or disabled. The code does not handle the state transitions properly, however. If the key is pressed repeatedly, the following four states of states are cycled through (assuming touch starts out enabled): Press: shared->is_touch_on => 0, SW_MUTE_DEVICE => 1 Release: shared->is_touch_on => 0, SW_MUTE_DEVICE => 1 Press: shared->is_touch_on => 1, SW_MUTE_DEVICE => 0 Release: shared->is_touch_on => 1, SW_MUTE_DEVICE => 1 The hardware always properly enables/disables touch when the key is pressed but applications that listen for SW_MUTE_DEVICE events to provide feedback about the state will only ever show touch as being enabled while the key is held, and only every-other time. This sequence occurs because the fallthrough WACOM_HID_WD_TOUCHONOFF case is always handled, and it uses the value of the *local* is_touch_on variable as the value to report to userspace. The local value is equal to the shared value when the button is pressed, but equal to zero when the button is released. Reporting the shared value to userspace fixes this problem, but the fallthrough case needs to update the shared value in an incompatible way (which is why the local variable was introduced in the first place). To work around this, we just handle both cases in a single block of code and update the shared variable as appropriate. Fixes: d793ff8187 ("HID: wacom: generic: support touch on/off softkey") Signed-off-by: Jason Gerecke Reviewed-by: Aaron Skomra Tested-by: Aaron Skomra Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index aa692e28b2cd..70cbe1e5a3d2 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1924,7 +1924,6 @@ static void wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field struct wacom_features *features = &wacom_wac->features; unsigned equivalent_usage = wacom_equivalent_usage(usage->hid); int i; - bool is_touch_on = value; bool do_report = false; /* @@ -1969,16 +1968,17 @@ static void wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field break; case WACOM_HID_WD_MUTE_DEVICE: - if (wacom_wac->shared->touch_input && value) { - wacom_wac->shared->is_touch_on = !wacom_wac->shared->is_touch_on; - is_touch_on = wacom_wac->shared->is_touch_on; - } - - /* fall through*/ case WACOM_HID_WD_TOUCHONOFF: if (wacom_wac->shared->touch_input) { + bool *is_touch_on = &wacom_wac->shared->is_touch_on; + + if (equivalent_usage == WACOM_HID_WD_MUTE_DEVICE && value) + *is_touch_on = !(*is_touch_on); + else if (equivalent_usage == WACOM_HID_WD_TOUCHONOFF) + *is_touch_on = value; + input_report_switch(wacom_wac->shared->touch_input, - SW_MUTE_DEVICE, !is_touch_on); + SW_MUTE_DEVICE, !(*is_touch_on)); input_sync(wacom_wac->shared->touch_input); } break; From aecad437fd709b3eeb9ed7eb336646e413999139 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:58:27 -0800 Subject: [PATCH 022/770] power: reset: zx-reboot: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 348c7cf5fcbcb68838255759d4cb45d039af36d2 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/power/reset/zx-reboot.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/reset/zx-reboot.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/power/reset/zx-reboot.c b/drivers/power/reset/zx-reboot.c index 7549c7f74a3c..c03e96e6a041 100644 --- a/drivers/power/reset/zx-reboot.c +++ b/drivers/power/reset/zx-reboot.c @@ -82,3 +82,7 @@ static struct platform_driver zx_reboot_driver = { }, }; module_platform_driver(zx_reboot_driver); + +MODULE_DESCRIPTION("ZTE SoCs reset driver"); +MODULE_AUTHOR("Jun Nie "); +MODULE_LICENSE("GPL v2"); From 925e26b922c525e21a905bf99b4b20247d521c4f Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:54:52 -0800 Subject: [PATCH 023/770] gpio: iop: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 97b03136e1b637d7a9d2274c099e44ecf23f1103 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/gpio/gpio-iop.o see include/linux/module.h for more information This adds the license as "GPL", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-iop.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpio/gpio-iop.c b/drivers/gpio/gpio-iop.c index 98c7ff2a76e7..8d62db447ec1 100644 --- a/drivers/gpio/gpio-iop.c +++ b/drivers/gpio/gpio-iop.c @@ -58,3 +58,7 @@ static int __init iop3xx_gpio_init(void) return platform_driver_register(&iop3xx_gpio_driver); } arch_initcall(iop3xx_gpio_init); + +MODULE_DESCRIPTION("GPIO handling for Intel IOP3xx processors"); +MODULE_AUTHOR("Lennert Buytenhek "); +MODULE_LICENSE("GPL"); From bf8c4b3dd7bfd47920e61ad5f67c827a2e66c01b Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:54:26 -0800 Subject: [PATCH 024/770] gpio: ath79: add missing MODULE_DESCRIPTION/LICENSE commit 539340f37e6d6ed4cd93e8e18c9b2e4eafd4b842 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/gpio/gpio-ath79.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION is also added. Signed-off-by: Jesse Chan Acked-by: Alban Bedel Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-ath79.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpio-ath79.c b/drivers/gpio/gpio-ath79.c index f33d4a5fe671..af0baf8da295 100644 --- a/drivers/gpio/gpio-ath79.c +++ b/drivers/gpio/gpio-ath79.c @@ -323,3 +323,6 @@ static struct platform_driver ath79_gpio_driver = { }; module_platform_driver(ath79_gpio_driver); + +MODULE_DESCRIPTION("Atheros AR71XX/AR724X/AR913X GPIO API support"); +MODULE_LICENSE("GPL v2"); From e6a5fe3180369479988ef4c44f41d9c00e102dbd Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:57:13 -0800 Subject: [PATCH 025/770] mtd: nand: denali_pci: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit d822401d1c6898a4a4ee03977b78b8cec402e88a upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/mtd/nand/denali_pci.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Acked-by: Masahiro Yamada Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/denali_pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mtd/nand/denali_pci.c b/drivers/mtd/nand/denali_pci.c index 81370c79aa48..7ad0db65a6fa 100644 --- a/drivers/mtd/nand/denali_pci.c +++ b/drivers/mtd/nand/denali_pci.c @@ -124,3 +124,7 @@ static struct pci_driver denali_pci_driver = { }; module_pci_driver(denali_pci_driver); + +MODULE_DESCRIPTION("PCI driver for Denali NAND controller"); +MODULE_AUTHOR("Intel Corporation and its suppliers"); +MODULE_LICENSE("GPL v2"); From 80baea0e6cf36f64905fd7d50b4fe2fe846b6a4a Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 12 Dec 2017 14:31:30 -0500 Subject: [PATCH 026/770] igb: Free IRQs when device is hotplugged commit 888f22931478a05bc81ceb7295c626e1292bf0ed upstream. Recently I got a Caldigit TS3 Thunderbolt 3 dock, and noticed that upon hotplugging my kernel would immediately crash due to igb: [ 680.825801] kernel BUG at drivers/pci/msi.c:352! [ 680.828388] invalid opcode: 0000 [#1] SMP [ 680.829194] Modules linked in: igb(O) thunderbolt i2c_algo_bit joydev vfat fat btusb btrtl btbcm btintel bluetooth ecdh_generic hp_wmi sparse_keymap rfkill wmi_bmof iTCO_wdt intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul snd_pcm rtsx_pci_ms mei_me snd_timer memstick snd pcspkr mei soundcore i2c_i801 tpm_tis psmouse shpchp wmi tpm_tis_core tpm video hp_wireless acpi_pad rtsx_pci_sdmmc mmc_core crc32c_intel serio_raw rtsx_pci mfd_core xhci_pci xhci_hcd i2c_hid i2c_core [last unloaded: igb] [ 680.831085] CPU: 1 PID: 78 Comm: kworker/u16:1 Tainted: G O 4.15.0-rc3Lyude-Test+ #6 [ 680.831596] Hardware name: HP HP ZBook Studio G4/826B, BIOS P71 Ver. 01.03 06/09/2017 [ 680.832168] Workqueue: kacpi_hotplug acpi_hotplug_work_fn [ 680.832687] RIP: 0010:free_msi_irqs+0x180/0x1b0 [ 680.833271] RSP: 0018:ffffc9000030fbf0 EFLAGS: 00010286 [ 680.833761] RAX: ffff8803405f9c00 RBX: ffff88033e3d2e40 RCX: 000000000000002c [ 680.834278] RDX: 0000000000000000 RSI: 00000000000000ac RDI: ffff880340be2178 [ 680.834832] RBP: 0000000000000000 R08: ffff880340be1ff0 R09: ffff8803405f9c00 [ 680.835342] R10: 0000000000000000 R11: 0000000000000040 R12: ffff88033d63a298 [ 680.835822] R13: ffff88033d63a000 R14: 0000000000000060 R15: ffff880341959000 [ 680.836332] FS: 0000000000000000(0000) GS:ffff88034f440000(0000) knlGS:0000000000000000 [ 680.836817] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 680.837360] CR2: 000055e64044afdf CR3: 0000000001c09002 CR4: 00000000003606e0 [ 680.837954] Call Trace: [ 680.838853] pci_disable_msix+0xce/0xf0 [ 680.839616] igb_reset_interrupt_capability+0x5d/0x60 [igb] [ 680.840278] igb_remove+0x9d/0x110 [igb] [ 680.840764] pci_device_remove+0x36/0xb0 [ 680.841279] device_release_driver_internal+0x157/0x220 [ 680.841739] pci_stop_bus_device+0x7d/0xa0 [ 680.842255] pci_stop_bus_device+0x2b/0xa0 [ 680.842722] pci_stop_bus_device+0x3d/0xa0 [ 680.843189] pci_stop_and_remove_bus_device+0xe/0x20 [ 680.843627] trim_stale_devices+0xf3/0x140 [ 680.844086] trim_stale_devices+0x94/0x140 [ 680.844532] trim_stale_devices+0xa6/0x140 [ 680.845031] ? get_slot_status+0x90/0xc0 [ 680.845536] acpiphp_check_bridge.part.5+0xfe/0x140 [ 680.846021] acpiphp_hotplug_notify+0x175/0x200 [ 680.846581] ? free_bridge+0x100/0x100 [ 680.847113] acpi_device_hotplug+0x8a/0x490 [ 680.847535] acpi_hotplug_work_fn+0x1a/0x30 [ 680.848076] process_one_work+0x182/0x3a0 [ 680.848543] worker_thread+0x2e/0x380 [ 680.848963] ? process_one_work+0x3a0/0x3a0 [ 680.849373] kthread+0x111/0x130 [ 680.849776] ? kthread_create_worker_on_cpu+0x50/0x50 [ 680.850188] ret_from_fork+0x1f/0x30 [ 680.850601] Code: 43 14 85 c0 0f 84 d5 fe ff ff 31 ed eb 0f 83 c5 01 39 6b 14 0f 86 c5 fe ff ff 8b 7b 10 01 ef e8 b7 e4 d2 ff 48 83 78 70 00 74 e3 <0f> 0b 49 8d b5 a0 00 00 00 e8 62 6f d3 ff e9 c7 fe ff ff 48 8b [ 680.851497] RIP: free_msi_irqs+0x180/0x1b0 RSP: ffffc9000030fbf0 As it turns out, normally the freeing of IRQs that would fix this is called inside of the scope of __igb_close(). However, since the device is already gone by the point we try to unregister the netdevice from the driver due to a hotplug we end up seeing that the netif isn't present and thus, forget to free any of the device IRQs. So: make sure that if we're in the process of dismantling the netdev, we always allow __igb_close() to be called so that IRQs may be freed normally. Additionally, only allow igb_close() to be called from __igb_close() if it hasn't already been called for the given adapter. Signed-off-by: Lyude Paul Fixes: 9474933caf21 ("igb: close/suspend race in netif_device_detach") Cc: Todd Fujinaka Cc: Stephen Hemminger Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 667dbc7d4a4e..d1a44a84c97e 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3331,7 +3331,7 @@ static int __igb_close(struct net_device *netdev, bool suspending) int igb_close(struct net_device *netdev) { - if (netif_device_present(netdev)) + if (netif_device_present(netdev) || netdev->dismantle) return __igb_close(netdev, false); return 0; } From 516868c59d82705b7c636ec2bdd3d6cb0e989b06 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 17 Jan 2018 20:27:11 +0200 Subject: [PATCH 027/770] ima/policy: fix parsing of fsuuid commit 36447456e1cca853188505f2a964dbbeacfc7a7a upstream. The switch to uuid_t invereted the logic of verfication that &entry->fsuuid is zero during parsing of "fsuuid=" rule. Instead of making sure the &entry->fsuuid field is not attempted to be overwritten, we bail out for perfectly correct rule. Fixes: 787d8c530af7 ("ima/policy: switch to use uuid_t") Signed-off-by: Mike Rapoport Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 95209a5f8595..8daf16e1d421 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -743,7 +743,7 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) case Opt_fsuuid: ima_log_string(ab, "fsuuid", args[0].from); - if (uuid_is_null(&entry->fsuuid)) { + if (!uuid_is_null(&entry->fsuuid)) { result = -EINVAL; break; } From 791274e773952f80990bc10757d0281bffbe6955 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Tue, 26 Dec 2017 20:34:22 -0800 Subject: [PATCH 028/770] scsi: aacraid: Fix udev inquiry race condition commit f4e8708d3104437fd7716e957f38c265b0c509ef upstream. When udev requests for a devices inquiry string, it might create multiple threads causing a race condition on the shared inquiry resource string. Created a buffer with the string for each thread. Fixes: 3bc8070fb75b3315 ([SCSI] aacraid: SMC vendor identification) Signed-off-by: Raghava Aditya Renukunta Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/aachba.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index af3e4d3f9735..548a3e73f10a 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -913,8 +913,16 @@ static void setinqstr(struct aac_dev *dev, void *data, int tindex) memset(str, ' ', sizeof(*str)); if (sup_adap_info->adapter_type_text[0]) { - char *cp = sup_adap_info->adapter_type_text; int c; + char *cp; + char *cname = kmemdup(sup_adap_info->adapter_type_text, + sizeof(sup_adap_info->adapter_type_text), + GFP_ATOMIC); + + if (!cname) + return; + + cp = cname; if ((cp[0] == 'A') && (cp[1] == 'O') && (cp[2] == 'C')) inqstrcpy("SMC", str->vid); else { @@ -923,7 +931,7 @@ static void setinqstr(struct aac_dev *dev, void *data, int tindex) ++cp; c = *cp; *cp = '\0'; - inqstrcpy(sup_adap_info->adapter_type_text, str->vid); + inqstrcpy(cname, str->vid); *cp = c; while (*cp && *cp != ' ') ++cp; @@ -937,8 +945,8 @@ static void setinqstr(struct aac_dev *dev, void *data, int tindex) cp[sizeof(str->pid)] = '\0'; } inqstrcpy (cp, str->pid); - if (c) - cp[sizeof(str->pid)] = c; + + kfree(cname); } else { struct aac_driver_ident *mp = aac_get_driver_ident(dev->cardtype); From d7fec01990e65967855790877ba829d67ebde255 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Tue, 26 Dec 2017 20:34:24 -0800 Subject: [PATCH 029/770] scsi: aacraid: Fix hang in kdump commit c5313ae8e4e037bfaf5e56cb8d6efdb8e92ce437 upstream. Driver attempts to perform a device scan and device add after coming out of reset. At times when the kdump kernel loads and it tries to perform eh recovery, the device scan hangs since its commands are blocked because of the eh recovery. This should have shown up in normal eh recovery path (Should have been obvious) Remove the code that performs scanning.I can live without the rescanning support in the stable kernels but a hanging kdump/eh recovery needs to be fixed. Fixes: a2d0321dd532901e (scsi: aacraid: Reload offlined drives after controller reset) Reported-by: Douglas Miller Tested-by: Guilherme G. Piccoli Fixes: a2d0321dd532901e (scsi: aacraid: Reload offlined drives after controller reset) Signed-off-by: Raghava Aditya Renukunta Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/aachba.c | 1 - drivers/scsi/aacraid/commsup.c | 9 +-------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index 548a3e73f10a..7173ae53c526 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -918,7 +918,6 @@ static void setinqstr(struct aac_dev *dev, void *data, int tindex) char *cname = kmemdup(sup_adap_info->adapter_type_text, sizeof(sup_adap_info->adapter_type_text), GFP_ATOMIC); - if (!cname) return; diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 525a652dab48..ffbfd042be08 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1672,14 +1672,7 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced, u8 reset_type) out: aac->in_reset = 0; scsi_unblock_requests(host); - /* - * Issue bus rescan to catch any configuration that might have - * occurred - */ - if (!retval) { - dev_info(&aac->pdev->dev, "Issuing bus rescan\n"); - scsi_scan_host(host); - } + if (jafo) { spin_lock_irq(host->host_lock); } From bdf19237e16f5495dab1edb0ec5eb60049d02a60 Mon Sep 17 00:00:00 2001 From: Markus Trippelsdorf Date: Wed, 11 Oct 2017 07:01:31 +0200 Subject: [PATCH 030/770] VFS: Handle lazytime in do_mount() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d7ee946942bdd12394809305e3df05aa4c8b7b8f upstream. Since commit e462ec50cb5fa ("VFS: Differentiate mount flags (MS_*) from internal superblock flags") the lazytime mount option doesn't get passed on anymore. Fix the issue by handling the option in do_mount(). Reviewed-by: Lukas Czerner Signed-off-by: Markus Trippelsdorf Signed-off-by: Al Viro Cc: Holger Hoffstätte Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/namespace.c b/fs/namespace.c index d18deb4c410b..adae9ffce91d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2826,6 +2826,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, SB_DIRSYNC | SB_SILENT | SB_POSIXACL | + SB_LAZYTIME | SB_I_VERSION); if (flags & MS_REMOUNT) From 08bb42086b3e9c5619e6e82aed576eae8a6583ef Mon Sep 17 00:00:00 2001 From: Stefan Schake Date: Fri, 10 Nov 2017 02:05:06 +0100 Subject: [PATCH 031/770] drm/vc4: Account for interrupts in flight [ Upstream commit 253696ccd613fbdaa5aba1de44c461a058e0a114 ] Synchronously disable the IRQ to make the following cancel_work_sync invocation effective. An interrupt in flight could enqueue further overflow mem work. As we free the binner BO immediately following vc4_irq_uninstall this caused a NULL pointer dereference in the work callback vc4_overflow_mem_work. Link: https://github.com/anholt/linux/issues/114 Signed-off-by: Stefan Schake Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/1510275907-993-2-git-send-email-stschake@gmail.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_irq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 7d7af3a93d94..61b2e5377993 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -208,6 +208,9 @@ vc4_irq_postinstall(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + /* Undo the effects of a previous vc4_irq_uninstall. */ + enable_irq(dev->irq); + /* Enable both the render done and out of memory interrupts. */ V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); @@ -225,6 +228,9 @@ vc4_irq_uninstall(struct drm_device *dev) /* Clear any pending interrupts we might have left. */ V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); + /* Finish any interrupt handler still in flight. */ + disable_irq(dev->irq); + cancel_work_sync(&vc4->overflow_mem_work); } From 9a8215c0a91e9182761d52e933fd635c7a7a47cd Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 23 Oct 2017 09:58:46 +0300 Subject: [PATCH 032/770] btrfs: Fix transaction abort during failure in btrfs_rm_dev_item [ Upstream commit 5e9f2ad5b2904a7e81df6d9a3dbef29478952eac ] btrfs_rm_dev_item calls several function under an active transaction, however it fails to abort it if an error happens. Fix this by adding explicit btrfs_abort_transaction/btrfs_end_transaction calls. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0c11121a8ace..4006b2a1233d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1765,20 +1765,24 @@ static int btrfs_rm_dev_item(struct btrfs_fs_info *fs_info, key.offset = device->devid; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) - goto out; - - if (ret > 0) { - ret = -ENOENT; + if (ret) { + if (ret > 0) + ret = -ENOENT; + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); goto out; } ret = btrfs_del_item(trans, root, path); - if (ret) - goto out; + if (ret) { + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); + } + out: btrfs_free_path(path); - btrfs_commit_transaction(trans); + if (!ret) + ret = btrfs_commit_transaction(trans); return ret; } From 71341a8a703752278bbaf6e4502f78ff50e645f7 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 30 Oct 2017 11:14:38 -0600 Subject: [PATCH 033/770] Btrfs: bail out gracefully rather than BUG_ON [ Upstream commit 56a0e706fcf870270878d6d72b71092ae42d229c ] If a file's DIR_ITEM key is invalid (due to memory errors) and gets written to disk, a future lookup_path can end up with kernel panic due to BUG_ON(). This gets rid of the BUG_ON(), meanwhile output the corrupted key and return ENOENT if it's invalid. Signed-off-by: Liu Bo Reported-by: Guillaume Bouchard Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d94e3f68b9b1..c71afd424900 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5500,6 +5500,14 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, goto out_err; btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); + if (location->type != BTRFS_INODE_ITEM_KEY && + location->type != BTRFS_ROOT_ITEM_KEY) { + btrfs_warn(root->fs_info, +"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))", + __func__, name, btrfs_ino(BTRFS_I(dir)), + location->objectid, location->type, location->offset); + goto out_err; + } out: btrfs_free_path(path); return ret; @@ -5816,8 +5824,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) return inode; } - BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY); - index = srcu_read_lock(&fs_info->subvol_srcu); ret = fixup_tree_root_location(fs_info, dir, dentry, &location, &sub_root); From 1d285c0440fe784e39e9110cc75ca21719b92ead Mon Sep 17 00:00:00 2001 From: Abhishek Goel Date: Tue, 7 Nov 2017 15:17:55 +0530 Subject: [PATCH 034/770] cpupowerutils: bench - Fix cpu online check [ Upstream commit 53d1cd6b125fb9d69303516a1179ebc3b72f797a ] cpupower_is_cpu_online was incorrectly checking for 0. This patch fixes this by checking for 1 when the cpu is online. Signed-off-by: Abhishek Goel Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/power/cpupower/bench/system.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/bench/system.c b/tools/power/cpupower/bench/system.c index c25a74ae51ba..2bb3eef7d5c1 100644 --- a/tools/power/cpupower/bench/system.c +++ b/tools/power/cpupower/bench/system.c @@ -61,7 +61,7 @@ int set_cpufreq_governor(char *governor, unsigned int cpu) dprintf("set %s as cpufreq governor\n", governor); - if (cpupower_is_cpu_online(cpu) != 0) { + if (cpupower_is_cpu_online(cpu) != 1) { perror("cpufreq_cpu_exists"); fprintf(stderr, "error: cpu %u does not exist\n", cpu); return -1; From f1881befacbe7f91b49544b0860b65eb6af93429 Mon Sep 17 00:00:00 2001 From: Abhishek Goel Date: Wed, 15 Nov 2017 14:10:02 +0530 Subject: [PATCH 035/770] cpupower : Fix cpupower working when cpu0 is offline [ Upstream commit dbdc468f35ee827cab2753caa1c660bdb832243a ] cpuidle_monitor used to assume that cpu0 is always online which is not a valid assumption on POWER machines. This patch fixes this by getting the cpu on which the current thread is running, instead of always using cpu0 for monitoring which may not be online. Signed-off-by: Abhishek Goel Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index 1b5da0066ebf..5b3205f16217 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -130,15 +130,18 @@ static struct cpuidle_monitor *cpuidle_register(void) { int num; char *tmp; + int this_cpu; + + this_cpu = sched_getcpu(); /* Assume idle state count is the same for all CPUs */ - cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(0); + cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(this_cpu); if (cpuidle_sysfs_monitor.hw_states_num <= 0) return NULL; for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) { - tmp = cpuidle_state_name(0, num); + tmp = cpuidle_state_name(this_cpu, num); if (tmp == NULL) continue; @@ -146,7 +149,7 @@ static struct cpuidle_monitor *cpuidle_register(void) strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1); free(tmp); - tmp = cpuidle_state_desc(0, num); + tmp = cpuidle_state_desc(this_cpu, num); if (tmp == NULL) continue; strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1); From d003b4bfda3b3b8f42fba2236c774084e437cad0 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Mon, 6 Nov 2017 16:15:10 +0200 Subject: [PATCH 036/770] KVM: nVMX/nSVM: Don't intercept #UD when running L2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ac9b305caa0df6f5b75d294e4b86c1027648991e ] When running L2, #UD should be intercepted by L1 or just forwarded directly to L2. It should not reach L0 x86 emulator. Therefore, set intercept for #UD only based on L1 exception-bitmap. Also add WARN_ON_ONCE() on L0 #UD intercept handlers to make sure it is never reached while running L2. This improves commit ae1f57670703 ("KVM: nVMX: Do not emulate #UD while in guest mode") by removing an unnecessary exit from L2 to L0 on #UD when L1 doesn't intercept it. In addition, SVM L0 #UD intercept handler doesn't handle correctly the case it is raised from L2. In this case, it should forward the #UD to guest instead of x86 emulator. As done in VMX #UD intercept handler. This commit fixes this issue as-well. Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Paolo Bonzini Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 9 ++++++++- arch/x86/kvm/vmx.c | 9 ++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6a8284f72328..c8be4e6d365b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -362,6 +362,7 @@ static void recalc_intercepts(struct vcpu_svm *svm) { struct vmcb_control_area *c, *h; struct nested_state *g; + u32 h_intercept_exceptions; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); @@ -372,9 +373,14 @@ static void recalc_intercepts(struct vcpu_svm *svm) h = &svm->nested.hsave->control; g = &svm->nested; + /* No need to intercept #UD if L1 doesn't intercept it */ + h_intercept_exceptions = + h->intercept_exceptions & ~(1U << UD_VECTOR); + c->intercept_cr = h->intercept_cr | g->intercept_cr; c->intercept_dr = h->intercept_dr | g->intercept_dr; - c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; + c->intercept_exceptions = + h_intercept_exceptions | g->intercept_exceptions; c->intercept = h->intercept | g->intercept; } @@ -2189,6 +2195,7 @@ static int ud_interception(struct vcpu_svm *svm) { int er; + WARN_ON_ONCE(is_guest_mode(&svm->vcpu)); er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); if (er == EMULATE_USER_EXIT) return 0; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ef16cf0f7cfd..36628ed362d8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1891,7 +1891,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) { u32 eb; - eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | + eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == @@ -1909,6 +1909,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) */ if (is_guest_mode(vcpu)) eb |= get_vmcs12(vcpu)->exception_bitmap; + else + eb |= 1u << UD_VECTOR; vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -5919,10 +5921,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) return 1; /* already handled by vmx_vcpu_run() */ if (is_invalid_opcode(intr_info)) { - if (is_guest_mode(vcpu)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } + WARN_ON_ONCE(is_guest_mode(vcpu)); er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); if (er == EMULATE_USER_EXIT) return 0; From fbd81f09793b8ab0696782da0c081599b6620efc Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:33 +0200 Subject: [PATCH 037/770] KVM: x86: emulator: Return to user-mode on L1 CPL=0 emulation failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1f4dcb3b213235e642088709a1c54964d23365e9 ] On this case, handle_emulation_failure() fills kvm_run with internal-error information which it expects to be delivered to user-mode for further processing. However, the code reports a wrong return-value which makes KVM to never return to user-mode on this scenario. Fixes: 6d77dbfc88e3 ("KVM: inject #UD if instruction emulation fails and exit to userspace") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 575c8953cc9a..b25326ad82a1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5416,7 +5416,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - r = EMULATE_FAIL; + r = EMULATE_USER_EXIT; } kvm_queue_exception(vcpu, UD_VECTOR); From df54fc5c28d2921ae4eb0d3401db929cb0c62e56 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:56:34 +0200 Subject: [PATCH 038/770] KVM: x86: Don't re-execute instruction when not passing CR2 value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9b8ae63798cb97e785a667ff27e43fa6220cb734 ] In case of instruction-decode failure or emulation failure, x86_emulate_instruction() will call reexecute_instruction() which will attempt to use the cr2 value passed to x86_emulate_instruction(). However, when x86_emulate_instruction() is called from emulate_instruction(), cr2 is not passed (passed as 0) and therefore it doesn't make sense to execute reexecute_instruction() logic at all. Fixes: 51d8b66199e9 ("KVM: cleanup emulate_instruction") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/vmx.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index eb38ac9d9a31..4f8b80199672 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1156,7 +1156,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, static inline int emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { - return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); + return x86_emulate_instruction(vcpu, 0, + emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); } void kvm_enable_efer_bits(u64); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 36628ed362d8..079cedb910bc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6607,7 +6607,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; - err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); + err = emulate_instruction(vcpu, 0); if (err == EMULATE_USER_EXIT) { ++vcpu->stat.mmio_exits; From 4f5500a63455815c45b28aa740fce91b49630188 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 5 Nov 2017 16:54:47 -0800 Subject: [PATCH 039/770] KVM: X86: Fix operand/address-size during instruction decoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3853be2603191829b442b64dac6ae8ba0c027bf9 ] Pedro reported: During tests that we conducted on KVM, we noticed that executing a "PUSH %ES" instruction under KVM produces different results on both memory and the SP register depending on whether EPT support is enabled. With EPT the SP is reduced by 4 bytes (and the written value is 0-padded) but without EPT support it is only reduced by 2 bytes. The difference can be observed when the CS.DB field is 1 (32-bit) but not when it's 0 (16-bit). The internal segment descriptor cache exist even in real/vm8096 mode. The CS.D also should be respected instead of just default operand/address-size/66H prefix/67H prefix during instruction decoding. This patch fixes it by also adjusting operand/address-size according to CS.D. Reported-by: Pedro Fonseca Tested-by: Pedro Fonseca Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Nadav Amit Cc: Pedro Fonseca Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 7bbb5da2b49d..1a2543545e5d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5009,6 +5009,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) bool op_prefix = false; bool has_seg_override = false; struct opcode opcode; + u16 dummy; + struct desc_struct desc; ctxt->memop.type = OP_NONE; ctxt->memopp = NULL; @@ -5027,6 +5029,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) switch (mode) { case X86EMUL_MODE_REAL: case X86EMUL_MODE_VM86: + def_op_bytes = def_ad_bytes = 2; + ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS); + if (desc.d) + def_op_bytes = def_ad_bytes = 4; + break; case X86EMUL_MODE_PROT16: def_op_bytes = def_ad_bytes = 2; break; From 5e7c270a1e60fcc16e8bcda6c6d659b43eba1ddb Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 5 Nov 2017 16:54:49 -0800 Subject: [PATCH 040/770] KVM: nVMX: Fix mmu context after VMLAUNCH/VMRESUME failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5af4157388adad82c339e3742fb6b67840721347 ] Commit 4f350c6dbcb (kvm: nVMX: Handle deferred early VMLAUNCH/VMRESUME failure properly) can result in L1(run kvm-unit-tests/run_tests.sh vmx_controls in L1) null pointer deference and also L0 calltrace when EPT=0 on both L0 and L1. In L1: BUG: unable to handle kernel paging request at ffffffffc015bf8f IP: vmx_vcpu_run+0x202/0x510 [kvm_intel] PGD 146e13067 P4D 146e13067 PUD 146e15067 PMD 3d2686067 PTE 3d4af9161 Oops: 0003 [#1] PREEMPT SMP CPU: 2 PID: 1798 Comm: qemu-system-x86 Not tainted 4.14.0-rc4+ #6 RIP: 0010:vmx_vcpu_run+0x202/0x510 [kvm_intel] Call Trace: WARNING: kernel stack frame pointer at ffffb86f4988bc18 in qemu-system-x86:1798 has bad value 0000000000000002 In L0: -----------[ cut here ]------------ WARNING: CPU: 6 PID: 4460 at /home/kernel/linux/arch/x86/kvm//vmx.c:9845 vmx_inject_page_fault_nested+0x130/0x140 [kvm_intel] CPU: 6 PID: 4460 Comm: qemu-system-x86 Tainted: G OE 4.14.0-rc7+ #25 RIP: 0010:vmx_inject_page_fault_nested+0x130/0x140 [kvm_intel] Call Trace: paging64_page_fault+0x500/0xde0 [kvm] ? paging32_gva_to_gpa_nested+0x120/0x120 [kvm] ? nonpaging_page_fault+0x3b0/0x3b0 [kvm] ? __asan_storeN+0x12/0x20 ? paging64_gva_to_gpa+0xb0/0x120 [kvm] ? paging64_walk_addr_generic+0x11a0/0x11a0 [kvm] ? lock_acquire+0x2c0/0x2c0 ? vmx_read_guest_seg_ar+0x97/0x100 [kvm_intel] ? vmx_get_segment+0x2a6/0x310 [kvm_intel] ? sched_clock+0x1f/0x30 ? check_chain_key+0x137/0x1e0 ? __lock_acquire+0x83c/0x2420 ? kvm_multiple_exception+0xf2/0x220 [kvm] ? debug_check_no_locks_freed+0x240/0x240 ? debug_smp_processor_id+0x17/0x20 ? __lock_is_held+0x9e/0x100 kvm_mmu_page_fault+0x90/0x180 [kvm] kvm_handle_page_fault+0x15c/0x310 [kvm] ? __lock_is_held+0x9e/0x100 handle_exception+0x3c7/0x4d0 [kvm_intel] vmx_handle_exit+0x103/0x1010 [kvm_intel] ? kvm_arch_vcpu_ioctl_run+0x1628/0x2e20 [kvm] The commit avoids to load host state of vmcs12 as vmcs01's guest state since vmcs12 is not modified (except for the VM-instruction error field) if the checking of vmcs control area fails. However, the mmu context is switched to nested mmu in prepare_vmcs02() and it will not be reloaded since load_vmcs12_host_state() is skipped when nested VMLAUNCH/VMRESUME fails. This patch fixes it by reloading mmu context when nested VMLAUNCH/VMRESUME fails. Reviewed-by: Jim Mattson Reviewed-by: Krish Sadhukhan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Jim Mattson Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 079cedb910bc..3a16e45f0c2e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11339,6 +11339,24 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, kvm_clear_interrupt_queue(vcpu); } +static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + u32 entry_failure_code; + + nested_ept_uninit_mmu_context(vcpu); + + /* + * Only PDPTE load can fail as the value of cr3 was checked on entry and + * couldn't have changed. + */ + if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) + nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); + + if (!enable_ept) + vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; +} + /* * A part of what we need to when the nested L2 guest exits and we want to * run its L1 parent, is to reset L1's guest state to the host state specified @@ -11352,7 +11370,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct kvm_segment seg; - u32 entry_failure_code; if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->host_ia32_efer; @@ -11379,17 +11396,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); vmx_set_cr4(vcpu, vmcs12->host_cr4); - nested_ept_uninit_mmu_context(vcpu); - - /* - * Only PDPTE load can fail as the value of cr3 was checked on entry and - * couldn't have changed. - */ - if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) - nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); - - if (!enable_ept) - vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; + load_vmcs12_mmu_host_state(vcpu, vmcs12); if (enable_vpid) { /* @@ -11615,6 +11622,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, * accordingly. */ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + + load_vmcs12_mmu_host_state(vcpu, vmcs12); + /* * The emulated instruction was already skipped in * nested_vmx_run, but the updated RIP was never From f0a3691bf2e5692ec3acf60aea8dc098a6ce2eae Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Nov 2017 18:04:05 +0100 Subject: [PATCH 041/770] KVM: x86: fix em_fxstor() sleeping while in atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4d772cb85f64c16eca00177089ecb3cd5d292120 ] Commit 9d643f63128b ("KVM: x86: avoid large stack allocations in em_fxrstor") optimize the stack size, but introduced a guest memory access which might sleep while in atomic. Fix it by introducing, again, a second fxregs_state. Try to avoid large stacks by using noinline. Add some helpful comments. Reported by syzbot: in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109 2 locks held by syzkaller879109/2909: #0: (&vcpu->mutex){+.+.}, at: [] vcpu_load+0x1c/0x70 arch/x86/kvm/../../../virt/kvm/kvm_main.c:154 #1: (&kvm->srcu){....}, at: [] vcpu_enter_guest arch/x86/kvm/x86.c:6983 [inline] #1: (&kvm->srcu){....}, at: [] vcpu_run arch/x86/kvm/x86.c:7061 [inline] #1: (&kvm->srcu){....}, at: [] kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222 CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted 4.13.0-rc4-next-20170811 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014 __might_sleep+0x95/0x190 kernel/sched/core.c:5967 __might_fault+0xab/0x1d0 mm/memory.c:4383 __copy_from_user include/linux/uaccess.h:71 [inline] __kvm_read_guest_page+0x58/0xa0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771 kvm_vcpu_read_guest_page+0x44/0x60 arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791 kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407 kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466 segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819 em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022 x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471 x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698 kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854 handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400 vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718 vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline] vcpu_run arch/x86/kvm/x86.c:7061 [inline] kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222 kvm_vcpu_ioctl+0x64c/0x1010 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x437fc9 RSP: 002b:00007ffc7b4d5ab8 EFLAGS: 00000206 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000437fc9 RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000005 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000020ae8000 R10: 0000000000009120 R11: 0000000000000206 R12: 0000000000000000 R13: 0000000000000004 R14: 0000000000000004 R15: 0000000020077000 Fixes: 9d643f63128b ("KVM: x86: avoid large stack allocations in em_fxrstor") Signed-off-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1a2543545e5d..eca6a89f2326 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4023,6 +4023,26 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) fxstate_size(ctxt)); } +/* + * FXRSTOR might restore XMM registers not provided by the guest. Fill + * in the host registers (via FXSAVE) instead, so they won't be modified. + * (preemption has to stay disabled until FXRSTOR). + * + * Use noinline to keep the stack for other functions called by callers small. + */ +static noinline int fxregs_fixup(struct fxregs_state *fx_state, + const size_t used_size) +{ + struct fxregs_state fx_tmp; + int rc; + + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp)); + memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size, + __fxstate_size(16) - used_size); + + return rc; +} + static int em_fxrstor(struct x86_emulate_ctxt *ctxt) { struct fxregs_state fx_state; @@ -4033,19 +4053,19 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + size = fxstate_size(ctxt); + rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->get_fpu(ctxt); - size = fxstate_size(ctxt); if (size < __fxstate_size(16)) { - rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); + rc = fxregs_fixup(&fx_state, size); if (rc != X86EMUL_CONTINUE) goto out; } - rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); - if (rc != X86EMUL_CONTINUE) - goto out; - if (fx_state.mxcsr >> 16) { rc = emulate_gp(ctxt, 0); goto out; From b501603be9dfc38725feb8dddf1bdf2bbf3bd6ea Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:29 +0200 Subject: [PATCH 042/770] KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0fc5a36dd6b345eb0d251a65c236e53bead3eef7 ] KVM uses ioapic_handled_vectors to track vectors that need to notify the IOAPIC on EOI. The problem is that IOAPIC can be reconfigured while an interrupt with old configuration is pending or running and ioapic_handled_vectors only remembers the newest configuration; thus EOI from the old interrupt is not delievered to the IOAPIC. A previous commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") addressed this issue by adding pending edge-triggered interrupts to ioapic_handled_vectors, fixing this race for edge-triggered interrupts. The commit explicitly ignored level-triggered interrupts, but this race applies to them as well: 1) IOAPIC sends a level triggered interrupt vector to VCPU0 2) VCPU0's handler deasserts the irq line and reconfigures the IOAPIC to route the vector to VCPU1. The reconfiguration rewrites only the upper 32 bits of the IOREDTBLn register. (Causes KVM to update ioapic_handled_vectors for VCPU0 and it no longer includes the vector.) 3) VCPU0 sends EOI for the vector, but it's not delievered to the IOAPIC because the ioapic_handled_vectors doesn't include the vector. 4) New interrupts are not delievered to VCPU1 because remote_irr bit is set forever. Therefore, the correct behavior is to add all pending and running interrupts to ioapic_handled_vectors. This commit introduces a slight performance hit similar to commit db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") for the rare case that the vector is reused by a non-IOAPIC source on VCPU0. We prefer to keep solution simple and not handle this case just as the original commit does. Fixes: db2bdcbbbd32 ("KVM: x86: fix edge EOI and IOAPIC reconfig race") Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/ioapic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index bdff437acbcb..ae0a7dc318b2 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) index == RTC_GSI) { if (kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, e->fields.dest_mode) || - (e->fields.trig_mode == IOAPIC_EDGE_TRIG && - kvm_apic_pending_eoi(vcpu, e->fields.vector))) + kvm_apic_pending_eoi(vcpu, e->fields.vector)) __set_bit(e->fields.vector, ioapic_handled_vectors); } From 408a265107397dc55b415628880a8f6b14dc7044 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:32 +0200 Subject: [PATCH 043/770] KVM: x86: ioapic: Clear Remote IRR when entry is switched to edge-triggered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a8bfec2930525808c01f038825d1df3904638631 ] Some OSes (Linux, Xen) use this behavior to clear the Remote IRR bit for IOAPICs without an EOI register. They simulate the EOI message manually by changing the trigger mode to edge and then back to level, with the entry being masked during this. QEMU implements this feature in commit ed1263c363c9 ("ioapic: clear remote irr bit for edge-triggered interrupts") As a side effect, this commit removes an incorrect behavior where Remote IRR was cleared when the redirection table entry was rewritten. This is not consistent with the manual and also opens an opportunity for a strange behavior when a redirection table entry is modified from an interrupt handler that handles the same entry: The modification will clear the Remote IRR bit even though the interrupt handler is still running. Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Steve Rutherford Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/ioapic.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index ae0a7dc318b2..5c9ad29e11fd 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -304,8 +304,17 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) } else { e->bits &= ~0xffffffffULL; e->bits |= (u32) val; - e->fields.remote_irr = 0; } + + /* + * Some OSes (Linux, Xen) assume that Remote IRR bit will + * be cleared by IOAPIC hardware when the entry is configured + * as edge-triggered. This behavior is used to simulate an + * explicit EOI on IOAPICs that don't have the EOI register. + */ + if (e->fields.trig_mode == IOAPIC_EDGE_TRIG) + e->fields.remote_irr = 0; + mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); From b689fc5b79d547f88d464e021d6de815d8612ac3 Mon Sep 17 00:00:00 2001 From: Nikita Leshenko Date: Sun, 5 Nov 2017 15:52:33 +0200 Subject: [PATCH 044/770] KVM: x86: ioapic: Preserve read-only values in the redirection table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b200dded0a6974a3b69599832b2203483920ab25 ] According to 82093AA (IOAPIC) manual, Remote IRR and Delivery Status are read-only. QEMU implements the bits as RO in commit 479c2a1cb7fb ("ioapic: keep RO bits for IOAPIC entry"). Signed-off-by: Nikita Leshenko Reviewed-by: Liran Alon Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Wanpeng Li Reviewed-by: Steve Rutherford Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/ioapic.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 5c9ad29e11fd..9d270ba9643c 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -276,6 +276,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; bool mask_before, mask_after; + int old_remote_irr, old_delivery_status; union kvm_ioapic_redirect_entry *e; switch (ioapic->ioregsel) { @@ -298,6 +299,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) return; e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; + /* Preserve read-only fields */ + old_remote_irr = e->fields.remote_irr; + old_delivery_status = e->fields.delivery_status; if (ioapic->ioregsel & 1) { e->bits &= 0xffffffff; e->bits |= (u64) val << 32; @@ -305,6 +309,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) e->bits &= ~0xffffffffULL; e->bits |= (u32) val; } + e->fields.remote_irr = old_remote_irr; + e->fields.delivery_status = old_delivery_status; /* * Some OSes (Linux, Xen) assume that Remote IRR bit will From d46e961f056aaec155bc3e21cf8e31449b5d2117 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 5 Nov 2017 16:07:43 +0200 Subject: [PATCH 045/770] KVM: nVMX: Fix vmx_check_nested_events() return value in case an event was reinjected to L2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 917dc6068bc12a2dafffcf0e9d405ddb1b8780cb ] vmx_check_nested_events() should return -EBUSY only in case there is a pending L1 event which requires a VMExit from L2 to L1 but such a VMExit is currently blocked. Such VMExits are blocked either because nested_run_pending=1 or an event was reinjected to L2. vmx_check_nested_events() should return 0 in case there are no pending L1 events which requires a VMExit from L2 to L1 or if a VMExit from L2 to L1 was done internally. However, upstream commit which introduced blocking in case an event was reinjected to L2 (commit acc9ab601327 ("KVM: nVMX: Fix pending events injection")) contains a bug: It returns -EBUSY even if there are no pending L1 events which requires VMExit from L2 to L1. This commit fix this issue. Fixes: acc9ab601327 ("KVM: nVMX: Fix pending events injection") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3a16e45f0c2e..d073f91bc089 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11114,13 +11114,12 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qual; - - if (kvm_event_needs_reinjection(vcpu)) - return -EBUSY; + bool block_nested_events = + vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu); if (vcpu->arch.exception.pending && nested_vmx_check_exception(vcpu, &exit_qual)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_inject_exception_vmexit(vcpu, exit_qual); vcpu->arch.exception.pending = false; @@ -11129,14 +11128,14 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && vmx->nested.preemption_timer_expired) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); return 0; } if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, NMI_VECTOR | INTR_TYPE_NMI_INTR | @@ -11152,7 +11151,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && nested_exit_on_intr(vcpu)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); return 0; From 26bd01c1affe663196e05302290c08f781d2e597 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 24 Oct 2017 15:25:20 +0300 Subject: [PATCH 046/770] nvme-fabrics: introduce init command check for a queue that is not alive [ Upstream commit 48832f8d58cfedb2f9bee11bbfbb657efb42e7e7 ] When the fabrics queue is not alive and fully functional, no commands should be allowed to pass but connect (which moves the queue to a fully functional state). Any other command should be failed, with either temporary status BLK_STS_RESOUCE or permanent status BLK_STS_IOERR. This is shared across all fabrics, hence move the check to fabrics library. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/fabrics.h | 30 ++++++++++++++++++++++++++++++ drivers/nvme/host/rdma.c | 30 +++++------------------------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index bf33663218cd..9ff8529a64a9 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -142,4 +142,34 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); +static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl, + struct request *rq) +{ + struct nvme_command *cmd = nvme_req(rq)->cmd; + + /* + * We cannot accept any other command until the connect command has + * completed, so only allow connect to pass. + */ + if (!blk_rq_is_passthrough(rq) || + cmd->common.opcode != nvme_fabrics_command || + cmd->fabrics.fctype != nvme_fabrics_type_connect) { + /* + * Reconnecting state means transport disruption, which can take + * a long time and even might fail permanently, fail fast to + * give upper layers a chance to failover. + * Deleting state means that the ctrl will never accept commands + * again, fail it permanently. + */ + if (ctrl->state == NVME_CTRL_RECONNECTING || + ctrl->state == NVME_CTRL_DELETING) { + nvme_req(rq)->status = NVME_SC_ABORT_REQ; + return BLK_STS_IOERR; + } + return BLK_STS_RESOURCE; /* try again later */ + } + + return BLK_STS_OK; +} + #endif /* _NVME_FABRICS_H */ diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 0ebb539f3bd3..6de163e6c9eb 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1603,31 +1603,11 @@ nvme_rdma_timeout(struct request *rq, bool reserved) * We cannot accept any other command until the Connect command has completed. */ static inline blk_status_t -nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq) +nvme_rdma_is_ready(struct nvme_rdma_queue *queue, struct request *rq) { - if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { - struct nvme_command *cmd = nvme_req(rq)->cmd; - - if (!blk_rq_is_passthrough(rq) || - cmd->common.opcode != nvme_fabrics_command || - cmd->fabrics.fctype != nvme_fabrics_type_connect) { - /* - * reconnecting state means transport disruption, which - * can take a long time and even might fail permanently, - * fail fast to give upper layers a chance to failover. - * deleting state means that the ctrl will never accept - * commands again, fail it permanently. - */ - if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING || - queue->ctrl->ctrl.state == NVME_CTRL_DELETING) { - nvme_req(rq)->status = NVME_SC_ABORT_REQ; - return BLK_STS_IOERR; - } - return BLK_STS_RESOURCE; /* try again later */ - } - } - - return 0; + if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) + return nvmf_check_init_req(&queue->ctrl->ctrl, rq); + return BLK_STS_OK; } static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, @@ -1646,7 +1626,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, WARN_ON_ONCE(rq->tag < 0); - ret = nvme_rdma_queue_is_ready(queue, rq); + ret = nvme_rdma_is_ready(queue, rq); if (unlikely(ret)) return ret; From db2044fc42305d2f8f92f24e16f223b66dff6cfe Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 24 Oct 2017 15:25:21 +0300 Subject: [PATCH 047/770] nvme-fc: check if queue is ready in queue_rq [ Upstream commit 9e0ed16ab9a9aaf670b81c9cd05b5e50defed654 ] In case the queue is not LIVE (fully functional and connected at the nvmf level), we cannot allow any commands other than connect to pass through. Add a new queue state flag NVME_FC_Q_LIVE which is set after nvmf connect and cleared in queue teardown. Signed-off-by: Sagi Grimberg Reviewed-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/fc.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index be49d0f79381..3148d760d825 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -41,6 +41,7 @@ enum nvme_fc_queue_flags { NVME_FC_Q_CONNECTED = (1 << 0), + NVME_FC_Q_LIVE = (1 << 1), }; #define NVMEFC_QUEUE_DELAY 3 /* ms units */ @@ -1654,6 +1655,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) return; + clear_bit(NVME_FC_Q_LIVE, &queue->flags); /* * Current implementation never disconnects a single queue. * It always terminates a whole association. So there is never @@ -1661,7 +1663,6 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) */ queue->connection_id = 0; - clear_bit(NVME_FC_Q_CONNECTED, &queue->flags); } static void @@ -1740,6 +1741,8 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) break; + + set_bit(NVME_FC_Q_LIVE, &ctrl->queues[i].flags); } return ret; @@ -2048,6 +2051,14 @@ busy: return BLK_STS_RESOURCE; } +static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue, + struct request *rq) +{ + if (unlikely(!test_bit(NVME_FC_Q_LIVE, &queue->flags))) + return nvmf_check_init_req(&queue->ctrl->ctrl, rq); + return BLK_STS_OK; +} + static blk_status_t nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) @@ -2063,6 +2074,10 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, u32 data_len; blk_status_t ret; + ret = nvme_fc_is_ready(queue, rq); + if (unlikely(ret)) + return ret; + ret = nvme_setup_cmd(ns, rq, sqe); if (ret) return ret; @@ -2398,6 +2413,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) if (ret) goto out_disconnect_admin_queue; + set_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags); + /* * Check controller capabilities * From 7af5f9137c93bd69384a3d12d9f741b63f4d98c4 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 24 Oct 2017 15:25:22 +0300 Subject: [PATCH 048/770] nvme-loop: check if queue is ready in queue_rq [ Upstream commit 9d7fab04b95e8c26014a9bfc1c943b8360b44c17 ] In case the queue is not LIVE (fully functional and connected at the nvmf level), we cannot allow any commands other than connect to pass through. Add a new queue state flag NVME_LOOP_Q_LIVE which is set after nvmf connect and cleared in queue teardown. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/loop.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 92628c432926..02aff5cc48bf 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -61,10 +61,15 @@ static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) return container_of(ctrl, struct nvme_loop_ctrl, ctrl); } +enum nvme_loop_queue_flags { + NVME_LOOP_Q_LIVE = 0, +}; + struct nvme_loop_queue { struct nvmet_cq nvme_cq; struct nvmet_sq nvme_sq; struct nvme_loop_ctrl *ctrl; + unsigned long flags; }; static struct nvmet_port *nvmet_loop_port; @@ -153,6 +158,14 @@ nvme_loop_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; } +static inline blk_status_t nvme_loop_is_ready(struct nvme_loop_queue *queue, + struct request *rq) +{ + if (unlikely(!test_bit(NVME_LOOP_Q_LIVE, &queue->flags))) + return nvmf_check_init_req(&queue->ctrl->ctrl, rq); + return BLK_STS_OK; +} + static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -162,6 +175,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); blk_status_t ret; + ret = nvme_loop_is_ready(queue, req); + if (unlikely(ret)) + return ret; + ret = nvme_setup_cmd(ns, req, &iod->cmd); if (ret) return ret; @@ -275,6 +292,7 @@ static const struct blk_mq_ops nvme_loop_admin_mq_ops = { static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) { + clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); blk_cleanup_queue(ctrl->ctrl.admin_q); blk_mq_free_tag_set(&ctrl->admin_tag_set); @@ -305,8 +323,10 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) { int i; - for (i = 1; i < ctrl->ctrl.queue_count; i++) + for (i = 1; i < ctrl->ctrl.queue_count; i++) { + clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); + } } static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) @@ -346,6 +366,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl) ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) return ret; + set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); } return 0; @@ -387,6 +408,8 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) if (error) goto out_cleanup_queue; + set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); + error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); if (error) { dev_err(ctrl->ctrl.device, From 128dc55f891c79535dbf278e332f0f8fedfa7469 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 9 Nov 2017 01:12:03 -0500 Subject: [PATCH 049/770] nvme-pci: disable APST on Samsung SSD 960 EVO + ASUS PRIME B350M-A [ Upstream commit 8427bbc224863e14d905c87920d4005cb3e88ac3 ] The NVMe device in question drops off the PCIe bus after system suspend. I've tried several approaches to workaround this issue, but none of them works: - NVME_QUIRK_DELAY_BEFORE_CHK_RDY - NVME_QUIRK_NO_DEEPEST_PS - Disable APST before controller shutdown - Delay between controller shutdown and system suspend - Explicitly set power state to 0 before controller shutdown Fortunately it's a desktop, so disable APST won't hurt the battery. Also, change the quirk function name to reflect it's for vendor combination quirks. BugLink: https://bugs.launchpad.net/bugs/1705748 Signed-off-by: Kai-Heng Feng Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 75539f7c58b9..66d872b05024 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2282,7 +2282,7 @@ static int nvme_dev_map(struct nvme_dev *dev) return -ENODEV; } -static unsigned long check_dell_samsung_bug(struct pci_dev *pdev) +static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) { if (pdev->vendor == 0x144d && pdev->device == 0xa802) { /* @@ -2297,6 +2297,14 @@ static unsigned long check_dell_samsung_bug(struct pci_dev *pdev) (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") || dmi_match(DMI_PRODUCT_NAME, "Precision 5510"))) return NVME_QUIRK_NO_DEEPEST_PS; + } else if (pdev->vendor == 0x144d && pdev->device == 0xa804) { + /* + * Samsung SSD 960 EVO drops off the PCIe bus after system + * suspend on a Ryzen board, ASUS PRIME B350M-A. + */ + if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") && + dmi_match(DMI_BOARD_NAME, "PRIME B350M-A")) + return NVME_QUIRK_NO_APST; } return 0; @@ -2336,7 +2344,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto unmap; - quirks |= check_dell_samsung_bug(pdev); + quirks |= check_vendor_combination_bug(pdev); result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, quirks); From 93a4bcf2c4252a518a128df1d017a7d5b1a814c2 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Fri, 17 Nov 2017 01:34:24 +0900 Subject: [PATCH 050/770] nvme-pci: avoid hmb desc array idx out-of-bound when hmmaxd set. [ Upstream commit 244a8fe40a09c218622eb9927b9090b0a9b73a1a ] hmb descriptor idx out-of-bound occurs in case of below conditions. preferred = 128MiB chunk_size = 4MiB hmmaxd = 1 Current code will not allow rmmod which will free hmb descriptors to be done successfully in above case. "descs[i]" will be set in for-loop without seeing any conditions related to "max_entries" after a single "descs" was allocated by (max_entries = 1) in this case. Added a condition into for-loop to check index of descriptors. Fixes: 044a9df1("nvme-pci: implement the HMB entry number and size limitations") Signed-off-by: Minwoo Im Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 66d872b05024..5dae650438c1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1645,7 +1645,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, if (!bufs) goto out_free_descs; - for (size = 0; size < preferred; size += len) { + for (size = 0; size < preferred && i < max_entries; size += len) { dma_addr_t dma_addr; len = min_t(u64, chunk_size, preferred - size); From 71686d2a17592a79c7174efad05ca79d3270b30d Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 10 Nov 2017 15:38:45 -0800 Subject: [PATCH 051/770] nvmet-fc: correct ref counting error when deferred rcv used [ Upstream commit 619c62dcc62b957d17cccde2081cad527b020883 ] Whenever a cmd is received a reference is taken while looking up the queue. The reference is removed after the cmd is done as the iod is returned for reuse. The fod may be reused for a deferred (recevied but no job context) cmd. Existing code removes the reference only if the fod is not reused for another command. Given the fod may be used for one or more ios, although a reference was taken per io, it won't be matched on the frees. Remove the reference on every fod free. This pairs the references to each io. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/fc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 58e010bdda3e..8e21211b904b 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -532,15 +532,15 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); + /* release the queue lookup reference on the completed IO */ + nvmet_fc_tgt_q_put(queue); + spin_lock_irqsave(&queue->qlock, flags); deferfcp = list_first_entry_or_null(&queue->pending_cmd_list, struct nvmet_fc_defer_fcp_req, req_list); if (!deferfcp) { list_add_tail(&fod->fcp_list, &fod->queue->fod_list); spin_unlock_irqrestore(&queue->qlock, flags); - - /* Release reference taken at queue lookup and fod allocation */ - nvmet_fc_tgt_q_put(queue); return; } @@ -759,6 +759,9 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) tgtport->ops->fcp_req_release(&tgtport->fc_target_port, deferfcp->fcp_req); + /* release the queue lookup reference */ + nvmet_fc_tgt_q_put(queue); + kfree(deferfcp); spin_lock_irqsave(&queue->qlock, flags); From 18c128456eacb6fc022264f178c3a821180854c2 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 16 Nov 2017 14:26:36 +0100 Subject: [PATCH 052/770] s390/topology: fix compile error in file arch/s390/kernel/smp.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 38389ec84e835fa31a59b7dabb18343106a6d0d5 ] Commit 1887aa07b676 ("s390/topology: add detection of dedicated vs shared CPUs") introduced following compiler error when CONFIG_SCHED_TOPOLOGY is not set. CC arch/s390/kernel/smp.o ... arch/s390/kernel/smp.c: In function ‘smp_start_secondary’: arch/s390/kernel/smp.c:812:6: error: implicit declaration of function ‘topology_cpu_dedicated’; did you mean ‘topology_cpu_init’? This patch fixes the compiler error by adding function topology_cpu_dedicated() to return false when this config option is not defined. Signed-off-by: Thomas Richter Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/topology.h | 1 + arch/s390/kernel/smp.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 55de4eb73604..de0a8b17bcaa 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -51,6 +51,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu); static inline void topology_init_early(void) { } static inline void topology_schedule_update(void) { } static inline int topology_cpu_init(struct cpu *cpu) { return 0; } +static inline int topology_cpu_dedicated(int cpu_nr) { return 0; } static inline void topology_expect_change(void) { } #endif /* CONFIG_SCHED_TOPOLOGY */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 092c4154abd7..7ffaf9fd6d19 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -54,6 +54,7 @@ #include #include #include +#include #include "entry.h" enum { From 82b90de5b111deaa09ec61594a6df84a426dceba Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 17 Nov 2017 16:32:22 +0100 Subject: [PATCH 053/770] s390/zcrypt: Fix wrong comparison leading to strange load balancing [ Upstream commit 0b0882672640ced4deeebf84da0b88b6389619c4 ] The function to decide if one zcrypt queue is better than another one compared two pointers instead of comparing the values where the pointers refer to. So within the same zcrypt card when load of each queue was equal just one queue was used. This effect only appears on relatively lite load, typically with one thread applications. This patch fixes the wrong comparison and now the counters show that requests are balanced equally over all available queues within the cards. There is no performance improvement coming with this fix. As long as the queue depth for an APQN queue is not touched, processing is not faster when requests are spread over queues within the same card hardware. So this fix only beautifies the lszcrypt counter printouts. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/crypto/zcrypt_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index b5f4006198b9..a9a56aa9c26b 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -218,8 +218,8 @@ static inline bool zcrypt_queue_compare(struct zcrypt_queue *zq, weight += atomic_read(&zq->load); pref_weight += atomic_read(&pref_zq->load); if (weight == pref_weight) - return &zq->queue->total_request_count > - &pref_zq->queue->total_request_count; + return zq->queue->total_request_count > + pref_zq->queue->total_request_count; return weight > pref_weight; } From cb78d818c30d4fa700d24baf1d91d6337da272db Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 15 Oct 2017 21:24:49 +0200 Subject: [PATCH 054/770] ACPI / bus: Leave modalias empty for devices which are not present [ Upstream commit 10809bb976648ac58194a629e3d7af99e7400297 ] Most Bay and Cherry Trail devices use a generic DSDT with all possible peripheral devices present in the DSDT, with their _STA returning 0x00 or 0x0f based on AML variables which describe what is actually present on the board. Since ACPI device objects with a 0x00 status (not present) still get an entry under /sys/bus/acpi/devices, and those entry had an acpi:PNPID modalias, userspace would end up loading modules for non present hardware. This commit fixes this by leaving the modalias empty for non present devices. This results in 10 modules less being loaded with a generic distro kernel config on my Cherry Trail test-device (a GPD pocket). Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/device_sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index 24418932612e..a041689e5701 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -146,6 +146,10 @@ static int create_pnp_modalias(struct acpi_device *acpi_dev, char *modalias, int count; struct acpi_hardware_id *id; + /* Avoid unnecessarily loading modules for non present devices. */ + if (!acpi_device_is_present(acpi_dev)) + return 0; + /* * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the From d290178f00d483df27dc17bf41797355bd952d9d Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 15 Nov 2017 21:17:55 +0000 Subject: [PATCH 055/770] cpufreq: Add Loongson machine dependencies [ Upstream commit 0d307935fefa6389eb726c6362351c162c949101 ] The MIPS loongson cpufreq drivers don't build unless configured for the correct machine type, due to dependency on machine specific architecture headers and symbols in machine specific platform code. More specifically loongson1-cpufreq.c uses RST_CPU_EN and RST_CPU, neither of which is defined in asm/mach-loongson32/regs-clk.h unless CONFIG_LOONGSON1_LS1B=y, and loongson2_cpufreq.c references loongson2_clockmod_table[], which is only defined in arch/mips/loongson64/lemote-2f/clock.c, i.e. when CONFIG_LEMOTE_MACH2F=y. Add these dependencies to Kconfig to avoid randconfig / allyesconfig build failures (e.g. when based on BMIPS which also has a cpufreq driver). Signed-off-by: James Hogan Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 4ebae43118ef..d8addbce40bc 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -275,6 +275,7 @@ config BMIPS_CPUFREQ config LOONGSON2_CPUFREQ tristate "Loongson2 CPUFreq Driver" + depends on LEMOTE_MACH2F help This option adds a CPUFreq driver for loongson processors which support software configurable cpu frequency. @@ -287,6 +288,7 @@ config LOONGSON2_CPUFREQ config LOONGSON1_CPUFREQ tristate "Loongson1 CPUFreq Driver" + depends on LOONGSON1_LS1B help This option adds a CPUFreq driver for loongson1 processors which support software configurable cpu frequency. From 2e194c9c55964455366e9e1b93fe02f932ecf346 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 8 Nov 2017 17:29:44 +0100 Subject: [PATCH 056/770] null_blk: fix dev->badblocks leak [ Upstream commit 1addb798e93893d33c8dfab743cd44f09fd7719a ] null_alloc_dev() allocates memory for dev->badblocks, but cleanup currently only occurs in the configfs release codepath, missing a number of other places. This bug was found running the blktests block/010 test, alongside kmemleak: rapido1:/blktests# ./check block/010 ... rapido1:/blktests# echo scan > /sys/kernel/debug/kmemleak [ 306.966708] kmemleak: 32 new suspected memory leaks (see /sys/kernel/debug/kmemleak) rapido1:/blktests# cat /sys/kernel/debug/kmemleak unreferenced object 0xffff88001f86d000 (size 4096): comm "modprobe", pid 231, jiffies 4294892415 (age 318.252s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x49/0xa0 [] kmem_cache_alloc+0x9f/0xe0 [] badblocks_init+0x2f/0x60 [] 0xffffffffa0019fae [] nullb_device_badblocks_store+0x63/0x130 [null_blk] [] do_one_initcall+0x3d/0x170 [] do_init_module+0x56/0x1e9 [] load_module+0x1c47/0x26a0 [] SyS_finit_module+0xa9/0xd0 [] entry_SYSCALL_64_fastpath+0x13/0x94 Fixes: 2f54a613c942 ("nullb: badbblocks support") Reviewed-by: Shaohua Li Signed-off-by: David Disseldorp Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/null_blk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 4d55af5c6e5b..69dfa1d3f453 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -467,7 +467,6 @@ static void nullb_device_release(struct config_item *item) { struct nullb_device *dev = to_nullb_device(item); - badblocks_exit(&dev->badblocks); null_free_device_storage(dev, false); null_free_dev(dev); } @@ -578,6 +577,10 @@ static struct nullb_device *null_alloc_dev(void) static void null_free_dev(struct nullb_device *dev) { + if (!dev) + return; + + badblocks_exit(&dev->badblocks); kfree(dev); } From e2443fb287cc52a98a3b6f417a5e46bf2383e5ba Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 22 Nov 2017 17:19:32 +0100 Subject: [PATCH 057/770] s390: fix alloc_pgste check in init_new_context again [ Upstream commit 53c4ab70c11c3ba1b9e3caa8e8c17e9c16d9cbc0 ] git commit badb8bb983e9 "fix alloc_pgste check in init_new_context" fixed the problem of 'current->mm == NULL' in init_new_context back in 2011. git commit 3eabaee998c7 "KVM: s390: allow sie enablement for multi- threaded programs" completely removed the check against alloc_pgste. git commit 23fefe119ceb "s390/kvm: avoid global config of vm.alloc_pgste=1" re-added a check against the alloc_pgste flag but without the required check for current->mm != NULL. For execve() called by a kernel thread init_new_context() reads from ((struct mm_struct *) NULL)->context.alloc_pgste to decide between 2K vs 4K page tables. If the bit happens to be set for the init process it will be created with large page tables. This decision is inherited by all the children of init, this waste quite some memory. Re-add the check for 'current->mm != NULL'. Fixes: 23fefe119ceb ("s390/kvm: avoid global config of vm.alloc_pgste=1") Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/mmu_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 43607bb12cc2..a6cc744ff5fb 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -28,7 +28,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste || test_thread_flag(TIF_PGSTE) || - current->mm->context.alloc_pgste; + (current->mm && current->mm->context.alloc_pgste); mm->context.has_pgste = 0; mm->context.use_skey = 0; mm->context.use_cmma = 0; From 92c131bebf4737866ffb31aeed45085ce827e0af Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:40 +0000 Subject: [PATCH 058/770] rxrpc: The mutex lock returned by rxrpc_accept_call() needs releasing [ Upstream commit 03a6c82218b9a87014b2c6c4e178294fdc8ebd8a ] The caller of rxrpc_accept_call() must release the lock on call->user_mutex returned by that function. Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/sendmsg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 9ea6f972767e..d2f51d6a253c 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -563,8 +563,8 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /* The socket is now unlocked. */ if (IS_ERR(call)) return PTR_ERR(call); - rxrpc_put_call(call, rxrpc_call_put); - return 0; + ret = 0; + goto out_put_unlock; } call = rxrpc_find_call_by_user_ID(rx, p.user_call_ID); @@ -633,6 +633,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = rxrpc_send_data(rx, call, msg, len, NULL); } +out_put_unlock: mutex_unlock(&call->user_mutex); error_put: rxrpc_put_call(call, rxrpc_call_put); From b89372f234a83d64a8f7e80fdcfadb64d9a9aeca Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:40 +0000 Subject: [PATCH 059/770] rxrpc: Provide a different lockdep key for call->user_mutex for kernel calls [ Upstream commit 9faaff593404a9c4e5abc6839a641635d7b9d0cd ] Provide a different lockdep key for rxrpc_call::user_mutex when the call is made on a kernel socket, such as by the AFS filesystem. The problem is that lockdep registers a false positive between userspace calling the sendmsg syscall on a user socket where call->user_mutex is held whilst userspace memory is accessed whereas the AFS filesystem may perform operations with mmap_sem held by the caller. In such a case, the following warning is produced. ====================================================== WARNING: possible circular locking dependency detected 4.14.0-fscache+ #243 Tainted: G E ------------------------------------------------------ modpost/16701 is trying to acquire lock: (&vnode->io_lock){+.+.}, at: [] afs_begin_vnode_operation+0x33/0x77 [kafs] but task is already holding lock: (&mm->mmap_sem){++++}, at: [] __do_page_fault+0x1ef/0x486 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&mm->mmap_sem){++++}: __might_fault+0x61/0x89 _copy_from_iter_full+0x40/0x1fa rxrpc_send_data+0x8dc/0xff3 rxrpc_do_sendmsg+0x62f/0x6a1 rxrpc_sendmsg+0x166/0x1b7 sock_sendmsg+0x2d/0x39 ___sys_sendmsg+0x1ad/0x22b __sys_sendmsg+0x41/0x62 do_syscall_64+0x89/0x1be return_from_SYSCALL_64+0x0/0x75 -> #2 (&call->user_mutex){+.+.}: __mutex_lock+0x86/0x7d2 rxrpc_new_client_call+0x378/0x80e rxrpc_kernel_begin_call+0xf3/0x154 afs_make_call+0x195/0x454 [kafs] afs_vl_get_capabilities+0x193/0x198 [kafs] afs_vl_lookup_vldb+0x5f/0x151 [kafs] afs_create_volume+0x2e/0x2f4 [kafs] afs_mount+0x56a/0x8d7 [kafs] mount_fs+0x6a/0x109 vfs_kern_mount+0x67/0x135 do_mount+0x90b/0xb57 SyS_mount+0x72/0x98 do_syscall_64+0x89/0x1be return_from_SYSCALL_64+0x0/0x75 -> #1 (k-sk_lock-AF_RXRPC){+.+.}: lock_sock_nested+0x74/0x8a rxrpc_kernel_begin_call+0x8a/0x154 afs_make_call+0x195/0x454 [kafs] afs_fs_get_capabilities+0x17a/0x17f [kafs] afs_probe_fileserver+0xf7/0x2f0 [kafs] afs_select_fileserver+0x83f/0x903 [kafs] afs_fetch_status+0x89/0x11d [kafs] afs_iget+0x16f/0x4f8 [kafs] afs_mount+0x6c6/0x8d7 [kafs] mount_fs+0x6a/0x109 vfs_kern_mount+0x67/0x135 do_mount+0x90b/0xb57 SyS_mount+0x72/0x98 do_syscall_64+0x89/0x1be return_from_SYSCALL_64+0x0/0x75 -> #0 (&vnode->io_lock){+.+.}: lock_acquire+0x174/0x19f __mutex_lock+0x86/0x7d2 afs_begin_vnode_operation+0x33/0x77 [kafs] afs_fetch_data+0x80/0x12a [kafs] afs_readpages+0x314/0x405 [kafs] __do_page_cache_readahead+0x203/0x2ba filemap_fault+0x179/0x54d __do_fault+0x17/0x60 __handle_mm_fault+0x6d7/0x95c handle_mm_fault+0x24e/0x2a3 __do_page_fault+0x301/0x486 do_page_fault+0x236/0x259 page_fault+0x22/0x30 __clear_user+0x3d/0x60 padzero+0x1c/0x2b load_elf_binary+0x785/0xdc7 search_binary_handler+0x81/0x1ff do_execveat_common.isra.14+0x600/0x888 do_execve+0x1f/0x21 SyS_execve+0x28/0x2f do_syscall_64+0x89/0x1be return_from_SYSCALL_64+0x0/0x75 other info that might help us debug this: Chain exists of: &vnode->io_lock --> &call->user_mutex --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&call->user_mutex); lock(&mm->mmap_sem); lock(&vnode->io_lock); *** DEADLOCK *** 1 lock held by modpost/16701: #0: (&mm->mmap_sem){++++}, at: [] __do_page_fault+0x1ef/0x486 stack backtrace: CPU: 0 PID: 16701 Comm: modpost Tainted: G E 4.14.0-fscache+ #243 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: dump_stack+0x67/0x8e print_circular_bug+0x341/0x34f check_prev_add+0x11f/0x5d4 ? add_lock_to_list.isra.12+0x8b/0x8b ? add_lock_to_list.isra.12+0x8b/0x8b ? __lock_acquire+0xf77/0x10b4 __lock_acquire+0xf77/0x10b4 lock_acquire+0x174/0x19f ? afs_begin_vnode_operation+0x33/0x77 [kafs] __mutex_lock+0x86/0x7d2 ? afs_begin_vnode_operation+0x33/0x77 [kafs] ? afs_begin_vnode_operation+0x33/0x77 [kafs] ? afs_begin_vnode_operation+0x33/0x77 [kafs] afs_begin_vnode_operation+0x33/0x77 [kafs] afs_fetch_data+0x80/0x12a [kafs] afs_readpages+0x314/0x405 [kafs] __do_page_cache_readahead+0x203/0x2ba ? filemap_fault+0x179/0x54d filemap_fault+0x179/0x54d __do_fault+0x17/0x60 __handle_mm_fault+0x6d7/0x95c handle_mm_fault+0x24e/0x2a3 __do_page_fault+0x301/0x486 do_page_fault+0x236/0x259 page_fault+0x22/0x30 RIP: 0010:__clear_user+0x3d/0x60 RSP: 0018:ffff880071e93da0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 000000000000011c RCX: 000000000000011c RDX: 0000000000000000 RSI: 0000000000000008 RDI: 000000000060f720 RBP: 000000000060f720 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: ffff8800b5459b68 R12: ffff8800ce150e00 R13: 000000000060f720 R14: 00000000006127a8 R15: 0000000000000000 padzero+0x1c/0x2b load_elf_binary+0x785/0xdc7 search_binary_handler+0x81/0x1ff do_execveat_common.isra.14+0x600/0x888 do_execve+0x1f/0x21 SyS_execve+0x28/0x2f do_syscall_64+0x89/0x1be entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7fdb6009ee07 RSP: 002b:00007fff566d9728 EFLAGS: 00000246 ORIG_RAX: 000000000000003b RAX: ffffffffffffffda RBX: 000055ba57280900 RCX: 00007fdb6009ee07 RDX: 000055ba5727f270 RSI: 000055ba5727cac0 RDI: 000055ba57280900 RBP: 000055ba57280900 R08: 00007fff566d9700 R09: 0000000000000000 R10: 000055ba5727cac0 R11: 0000000000000246 R12: 0000000000000000 R13: 000055ba5727cac0 R14: 000055ba5727f270 R15: 0000000000000000 Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/ar-internal.h | 2 +- net/rxrpc/call_accept.c | 2 +- net/rxrpc/call_object.c | 19 +++++++++++++++---- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ea5600b747cc..2b3992c5060e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -671,7 +671,7 @@ extern unsigned int rxrpc_max_call_lifetime; extern struct kmem_cache *rxrpc_call_jar; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); -struct rxrpc_call *rxrpc_alloc_call(gfp_t); +struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index cbd1701e813a..3028298ca561 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -94,7 +94,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, /* Now it gets complicated, because calls get registered with the * socket here, particularly if a user ID is preassigned by the user. */ - call = rxrpc_alloc_call(gfp); + call = rxrpc_alloc_call(rx, gfp); if (!call) return -ENOMEM; call->flags |= (1 << RXRPC_CALL_IS_SERVICE); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index fcdd6555a820..8a5a42e8ec23 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -55,6 +55,8 @@ static void rxrpc_call_timer_expired(unsigned long _call) rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real()); } +static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; + /* * find an extant server call * - called in process context with IRQs enabled @@ -95,7 +97,7 @@ found_extant_call: /* * allocate a new call */ -struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) +struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp) { struct rxrpc_call *call; @@ -114,6 +116,14 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) goto nomem_2; mutex_init(&call->user_mutex); + + /* Prevent lockdep reporting a deadlock false positive between the afs + * filesystem and sys_sendmsg() via the mmap sem. + */ + if (rx->sk.sk_kern_sock) + lockdep_set_class(&call->user_mutex, + &rxrpc_call_user_mutex_lock_class_key); + setup_timer(&call->timer, rxrpc_call_timer_expired, (unsigned long)call); INIT_WORK(&call->processor, &rxrpc_process_call); @@ -150,7 +160,8 @@ nomem: /* * Allocate a new client call. */ -static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, +static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, + struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_call *call; @@ -158,7 +169,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, _enter(""); - call = rxrpc_alloc_call(gfp); + call = rxrpc_alloc_call(rx, gfp); if (!call) return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; @@ -209,7 +220,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _enter("%p,%lx", rx, user_call_ID); - call = rxrpc_alloc_client_call(srx, gfp); + call = rxrpc_alloc_client_call(rx, srx, gfp); if (IS_ERR(call)) { release_sock(&rx->sk); _leave(" = %ld", PTR_ERR(call)); From 1392633bafde919b3d78449b8a1a1c9f50f2b72e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2017 10:18:42 +0000 Subject: [PATCH 060/770] rxrpc: Fix service endpoint expiry [ Upstream commit f859ab61875978eeaa539740ff7f7d91f5d60006 ] RxRPC service endpoints expire like they're supposed to by the following means: (1) Mark dead rxrpc_net structs (with ->live) rather than twiddling the global service conn timeout, otherwise the first rxrpc_net struct to die will cause connections on all others to expire immediately from then on. (2) Mark local service endpoints for which the socket has been closed (->service_closed) so that the expiration timeout can be much shortened for service and client connections going through that endpoint. (3) rxrpc_put_service_conn() needs to schedule the reaper when the usage count reaches 1, not 0, as idle conns have a 1 count. (4) The accumulator for the earliest time we might want to schedule for should be initialised to jiffies + MAX_JIFFY_OFFSET, not ULONG_MAX as the comparison functions use signed arithmetic. (5) Simplify the expiration handling, adding the expiration value to the idle timestamp each time rather than keeping track of the time in the past before which the idle timestamp must go to be expired. This is much easier to read. (6) Ignore the timeouts if the net namespace is dead. (7) Restart the service reaper work item rather the client reaper. Signed-off-by: David Howells Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/trace/events/rxrpc.h | 2 ++ net/rxrpc/af_rxrpc.c | 13 ++++++++++++ net/rxrpc/ar-internal.h | 3 +++ net/rxrpc/conn_client.c | 2 ++ net/rxrpc/conn_object.c | 40 +++++++++++++++++++++--------------- net/rxrpc/net_ns.c | 3 +++ 6 files changed, 46 insertions(+), 17 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index ebe96796027a..a58caf5807ff 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -49,6 +49,7 @@ enum rxrpc_conn_trace { rxrpc_conn_put_client, rxrpc_conn_put_service, rxrpc_conn_queued, + rxrpc_conn_reap_service, rxrpc_conn_seen, }; @@ -206,6 +207,7 @@ enum rxrpc_congest_change { EM(rxrpc_conn_put_client, "PTc") \ EM(rxrpc_conn_put_service, "PTs") \ EM(rxrpc_conn_queued, "QUE") \ + EM(rxrpc_conn_reap_service, "RPs") \ E_(rxrpc_conn_seen, "SEE") #define rxrpc_client_traces \ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 4b0a8288c98a..7c1cb08874d5 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -823,6 +823,19 @@ static int rxrpc_release_sock(struct sock *sk) sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; + /* We want to kill off all connections from a service socket + * as fast as possible because we can't share these; client + * sockets, on the other hand, can share an endpoint. + */ + switch (sk->sk_state) { + case RXRPC_SERVER_BOUND: + case RXRPC_SERVER_BOUND2: + case RXRPC_SERVER_LISTENING: + case RXRPC_SERVER_LISTEN_DISABLED: + rx->local->service_closed = true; + break; + } + spin_lock_bh(&sk->sk_receive_queue.lock); sk->sk_state = RXRPC_CLOSE; spin_unlock_bh(&sk->sk_receive_queue.lock); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 2b3992c5060e..e6c2c4f56fb1 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -84,6 +84,7 @@ struct rxrpc_net { unsigned int nr_client_conns; unsigned int nr_active_client_conns; bool kill_all_client_conns; + bool live; spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ struct list_head waiting_client_conns; @@ -265,6 +266,7 @@ struct rxrpc_local { rwlock_t services_lock; /* lock for services list */ int debug_id; /* debug ID for printks */ bool dead; + bool service_closed; /* Service socket closed */ struct sockaddr_rxrpc srx; /* local address */ }; @@ -824,6 +826,7 @@ void rxrpc_process_connection(struct work_struct *); * conn_object.c */ extern unsigned int rxrpc_connection_expiry; +extern unsigned int rxrpc_closed_conn_expiry; struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 5f9624bd311c..78a154173d90 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -1061,6 +1061,8 @@ next: expiry = rxrpc_conn_idle_client_expiry; if (nr_conns > rxrpc_reap_client_connections) expiry = rxrpc_conn_idle_client_fast_expiry; + if (conn->params.local->service_closed) + expiry = rxrpc_closed_conn_expiry * HZ; conn_expires_at = conn->idle_timestamp + expiry; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index fe575798592f..a48c817b792b 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -20,7 +20,8 @@ /* * Time till a connection expires after last use (in seconds). */ -unsigned int rxrpc_connection_expiry = 10 * 60; +unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60; +unsigned int __read_mostly rxrpc_closed_conn_expiry = 10; static void rxrpc_destroy_connection(struct rcu_head *); @@ -312,7 +313,7 @@ void rxrpc_put_service_conn(struct rxrpc_connection *conn) n = atomic_dec_return(&conn->usage); trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); ASSERTCMP(n, >=, 0); - if (n == 0) { + if (n == 1) { rxnet = conn->params.local->rxnet; rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0); } @@ -353,15 +354,14 @@ void rxrpc_service_connection_reaper(struct work_struct *work) struct rxrpc_net *rxnet = container_of(to_delayed_work(work), struct rxrpc_net, service_conn_reaper); - unsigned long reap_older_than, earliest, idle_timestamp, now; + unsigned long expire_at, earliest, idle_timestamp, now; LIST_HEAD(graveyard); _enter(""); now = jiffies; - reap_older_than = now - rxrpc_connection_expiry * HZ; - earliest = ULONG_MAX; + earliest = now + MAX_JIFFY_OFFSET; write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { @@ -371,15 +371,21 @@ void rxrpc_service_connection_reaper(struct work_struct *work) if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) continue; - idle_timestamp = READ_ONCE(conn->idle_timestamp); - _debug("reap CONN %d { u=%d,t=%ld }", - conn->debug_id, atomic_read(&conn->usage), - (long)reap_older_than - (long)idle_timestamp); + if (rxnet->live) { + idle_timestamp = READ_ONCE(conn->idle_timestamp); + expire_at = idle_timestamp + rxrpc_connection_expiry * HZ; + if (conn->params.local->service_closed) + expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; - if (time_after(idle_timestamp, reap_older_than)) { - if (time_before(idle_timestamp, earliest)) - earliest = idle_timestamp; - continue; + _debug("reap CONN %d { u=%d,t=%ld }", + conn->debug_id, atomic_read(&conn->usage), + (long)expire_at - (long)now); + + if (time_before(now, expire_at)) { + if (time_before(expire_at, earliest)) + earliest = expire_at; + continue; + } } /* The usage count sits at 1 whilst the object is unused on the @@ -387,6 +393,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) */ if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) continue; + trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, 0); if (rxrpc_conn_is_client(conn)) BUG(); @@ -397,10 +404,10 @@ void rxrpc_service_connection_reaper(struct work_struct *work) } write_unlock(&rxnet->conn_lock); - if (earliest != ULONG_MAX) { - _debug("reschedule reaper %ld", (long) earliest - now); + if (earliest != now + MAX_JIFFY_OFFSET) { + _debug("reschedule reaper %ld", (long)earliest - (long)now); ASSERT(time_after(earliest, now)); - rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, + rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, earliest - now); } @@ -429,7 +436,6 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) rxrpc_destroy_all_client_connections(rxnet); - rxrpc_connection_expiry = 0; cancel_delayed_work(&rxnet->client_conn_reaper); rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0); flush_workqueue(rxrpc_workqueue); diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 7edceb8522f5..684c51d600c7 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -22,6 +22,7 @@ static __net_init int rxrpc_init_net(struct net *net) struct rxrpc_net *rxnet = rxrpc_net(net); int ret; + rxnet->live = true; get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch)); rxnet->epoch |= RXRPC_RANDOM_EPOCH; @@ -60,6 +61,7 @@ static __net_init int rxrpc_init_net(struct net *net) return 0; err_proc: + rxnet->live = false; return ret; } @@ -70,6 +72,7 @@ static __net_exit void rxrpc_exit_net(struct net *net) { struct rxrpc_net *rxnet = rxrpc_net(net); + rxnet->live = false; rxrpc_destroy_all_calls(rxnet); rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_locals(rxnet); From f9f175778bb9e4a5fe6d46efde1b353f15fde1fa Mon Sep 17 00:00:00 2001 From: Michael Lyle Date: Fri, 24 Nov 2017 15:14:27 -0800 Subject: [PATCH 061/770] bcache: check return value of register_shrinker [ Upstream commit 6c4ca1e36cdc1a0a7a84797804b87920ccbebf51 ] register_shrinker is now __must_check, so check it to kill a warning. Caller of bch_btree_cache_alloc in super.c appropriately checks return value so this is fully plumbed through. This V2 fixes checkpatch warnings and improves the commit description, as I was too hasty getting the previous version out. Signed-off-by: Michael Lyle Reviewed-by: Vojtech Pavlik Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 658c54b3b07a..1598d1e04989 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -807,7 +807,10 @@ int bch_btree_cache_alloc(struct cache_set *c) c->shrink.scan_objects = bch_mca_scan; c->shrink.seeks = 4; c->shrink.batch = c->btree_pages * 2; - register_shrinker(&c->shrink); + + if (register_shrinker(&c->shrink)) + pr_warn("bcache: %s: could not register shrinker", + __func__); return 0; } From a595f190fc6be88c3942b468819378a977379fe1 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 1 Nov 2017 19:21:55 -0400 Subject: [PATCH 062/770] drm/amdgpu: Fix SDMA load/unload sequence on HWS disabled mode [ Upstream commit cf21654b40968609779751b34e7923180968fe5b ] Fix the SDMA load and unload sequence as suggested by HW document. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Acked-by: Oded Gabbay Signed-off-by: Oded Gabbay Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 57 +++++++++++++------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index b9dbbf9cb8b0..bdabaa3399db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -369,29 +369,50 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; + unsigned long end_jiffies; uint32_t sdma_base_addr; + uint32_t data; m = get_sdma_mqd(mqd); sdma_base_addr = get_sdma_base_addr(m); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, - m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, - m->sdma_rlc_rb_base); - - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, - m->sdma_rlc_rb_base_hi); - - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, - m->sdma_rlc_rb_rptr_addr_lo); - - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, - m->sdma_rlc_rb_rptr_addr_hi); + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + if (m->sdma_engine_id) { + data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); + } else { + data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); + } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, - m->sdma_rlc_doorbell); - + m->sdma_rlc_doorbell); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + m->sdma_rlc_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdma_rlc_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdma_rlc_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdma_rlc_rb_rptr_addr_hi); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, m->sdma_rlc_rb_cntl); @@ -564,9 +585,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); return 0; } From 64aca9911a1097b224739e2cf060c3114e6bd59f Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 1 Nov 2017 19:21:56 -0400 Subject: [PATCH 063/770] drm/amdkfd: Fix SDMA ring buffer size calculation [ Upstream commit d12fb13f23199faa7e536acec1db49068e5a067d ] ffs function return the position of the first bit set on 1 based. (bit zero returns 1). Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 44ffd23348fc..164fa4b1f9a9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -205,8 +205,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, struct cik_sdma_rlc_registers *m; m = get_sdma_mqd(mqd); - m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) << - SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; From b15f53b46d880fae35c976d8285c09debbb760bb Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 1 Nov 2017 19:21:57 -0400 Subject: [PATCH 064/770] drm/amdkfd: Fix SDMA oversubsription handling [ Upstream commit 8c946b8988acec785bcf67088b6bd0747f36d2d3 ] SDMA only supports a fixed number of queues. HWS cannot handle oversubscription. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../drm/amd/amdkfd/kfd_process_queue_manager.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 03bec765b03d..f9a1a4db9be7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -184,6 +184,24 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: + if (dev->dqm->queue_count >= + CIK_SDMA_QUEUES_PER_ENGINE * CIK_SDMA_ENGINE_NUM) { + pr_err("Over-subscription is not allowed for SDMA.\n"); + retval = -EPERM; + goto err_create_queue; + } + + retval = create_cp_queue(pqm, dev, &q, properties, f, *qid); + if (retval != 0) + goto err_create_queue; + pqn->q = q; + pqn->kq = NULL; + retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, + &q->properties.vmid); + pr_debug("DQM returned %d for create_queue\n", retval); + print_queue(q); + break; + case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && From 120c41af36df87401576f09498abf5696fb3d474 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 13 Nov 2017 03:35:27 +0300 Subject: [PATCH 065/770] uapi: fix linux/kfd_ioctl.h userspace compilation errors [ Upstream commit b4d085201d86af69cbda2214c6dafc0be240ef9f ] Consistently use types provided by via to fix the following linux/kfd_ioctl.h userspace compilation errors: /usr/include/linux/kfd_ioctl.h:236:2: error: unknown type name 'uint64_t' uint64_t va_addr; /* to KFD */ /usr/include/linux/kfd_ioctl.h:237:2: error: unknown type name 'uint32_t' uint32_t gpu_id; /* to KFD */ /usr/include/linux/kfd_ioctl.h:238:2: error: unknown type name 'uint32_t' uint32_t pad; /usr/include/linux/kfd_ioctl.h:243:2: error: unknown type name 'uint64_t' uint64_t tile_config_ptr; /usr/include/linux/kfd_ioctl.h:245:2: error: unknown type name 'uint64_t' uint64_t macro_tile_config_ptr; /usr/include/linux/kfd_ioctl.h:249:2: error: unknown type name 'uint32_t' uint32_t num_tile_configs; /usr/include/linux/kfd_ioctl.h:253:2: error: unknown type name 'uint32_t' uint32_t num_macro_tile_configs; /usr/include/linux/kfd_ioctl.h:255:2: error: unknown type name 'uint32_t' uint32_t gpu_id; /* to KFD */ /usr/include/linux/kfd_ioctl.h:256:2: error: unknown type name 'uint32_t' uint32_t gb_addr_config; /* from KFD */ /usr/include/linux/kfd_ioctl.h:257:2: error: unknown type name 'uint32_t' uint32_t num_banks; /* from KFD */ /usr/include/linux/kfd_ioctl.h:258:2: error: unknown type name 'uint32_t' uint32_t num_ranks; /* from KFD */ Fixes: 6a1c9510694fe ("drm/amdkfd: Adding new IOCTL for scratch memory v2") Fixes: 5d71dbc3a5886 ("drm/amdkfd: Implement image tiling mode support v2") Signed-off-by: Dmitry V. Levin Signed-off-by: Oded Gabbay Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/kfd_ioctl.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 26283fefdf5f..f7015aa12347 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -233,29 +233,29 @@ struct kfd_ioctl_wait_events_args { }; struct kfd_ioctl_set_scratch_backing_va_args { - uint64_t va_addr; /* to KFD */ - uint32_t gpu_id; /* to KFD */ - uint32_t pad; + __u64 va_addr; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_get_tile_config_args { /* to KFD: pointer to tile array */ - uint64_t tile_config_ptr; + __u64 tile_config_ptr; /* to KFD: pointer to macro tile array */ - uint64_t macro_tile_config_ptr; + __u64 macro_tile_config_ptr; /* to KFD: array size allocated by user mode * from KFD: array size filled by kernel */ - uint32_t num_tile_configs; + __u32 num_tile_configs; /* to KFD: array size allocated by user mode * from KFD: array size filled by kernel */ - uint32_t num_macro_tile_configs; + __u32 num_macro_tile_configs; - uint32_t gpu_id; /* to KFD */ - uint32_t gb_addr_config; /* from KFD */ - uint32_t num_banks; /* from KFD */ - uint32_t num_ranks; /* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 gb_addr_config; /* from KFD */ + __u32 num_banks; /* from KFD */ + __u32 num_ranks; /* from KFD */ /* struct size can be extended later if needed * without breaking ABI compatibility */ From dd45c5e5be071fb9fee6e5f82fed6e0bf67737d5 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 23 Nov 2017 17:35:22 +0200 Subject: [PATCH 066/770] nvme-rdma: don't complete requests before a send work request has completed [ Upstream commit 4af7f7ff92a42b6c713293c99e7982bcfcf51a70 ] In order to guarantee that the HCA will never get an access violation (either from invalidated rkey or from iommu) when retrying a send operation we must complete a request only when both send completion and the nvme cqe has arrived. We need to set the send/recv completions flags atomically because we might have more than a single context accessing the request concurrently (one is cq irq-poll context and the other is user-polling used in IOCB_HIPRI). Only then we are safe to invalidate the rkey (if needed), unmap the host buffers, and complete the IO. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/rdma.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 6de163e6c9eb..33d4431c2b4b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -67,6 +67,9 @@ struct nvme_rdma_request { struct nvme_request req; struct ib_mr *mr; struct nvme_rdma_qe sqe; + union nvme_result result; + __le16 status; + refcount_t ref; struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; u32 num_sge; int nents; @@ -1177,6 +1180,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, req->num_sge = 1; req->inline_data = false; req->mr->need_inval = false; + refcount_set(&req->ref, 2); /* send and recv completions */ c->common.flags |= NVME_CMD_SGL_METABUF; @@ -1213,8 +1217,19 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) { - if (unlikely(wc->status != IB_WC_SUCCESS)) + struct nvme_rdma_qe *qe = + container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe); + struct nvme_rdma_request *req = + container_of(qe, struct nvme_rdma_request, sqe); + struct request *rq = blk_mq_rq_from_pdu(req); + + if (unlikely(wc->status != IB_WC_SUCCESS)) { nvme_rdma_wr_error(cq, wc, "SEND"); + return; + } + + if (refcount_dec_and_test(&req->ref)) + nvme_end_request(rq, req->status, req->result); } /* @@ -1359,14 +1374,19 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, } req = blk_mq_rq_to_pdu(rq); - if (rq->tag == tag) - ret = 1; + req->status = cqe->status; + req->result = cqe->result; if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && wc->ex.invalidate_rkey == req->mr->rkey) req->mr->need_inval = false; - nvme_end_request(rq, cqe->status, cqe->result); + if (refcount_dec_and_test(&req->ref)) { + if (rq->tag == tag) + ret = 1; + nvme_end_request(rq, req->status, req->result); + } + return ret; } From 9be97a9abe9e0a3166320f21a42dc5e0966cb5c4 Mon Sep 17 00:00:00 2001 From: zhangliping Date: Sat, 25 Nov 2017 22:02:12 +0800 Subject: [PATCH 067/770] openvswitch: fix the incorrect flow action alloc size [ Upstream commit 67c8d22a73128ff910e2287567132530abcf5b71 ] If we want to add a datapath flow, which has more than 500 vxlan outputs' action, we will get the following error reports: openvswitch: netlink: Flow action size 32832 bytes exceeds max openvswitch: netlink: Flow action size 32832 bytes exceeds max openvswitch: netlink: Actions may not be safe on all matching packets ... ... It seems that we can simply enlarge the MAX_ACTIONS_BUFSIZE to fix it, but this is not the root cause. For example, for a vxlan output action, we need about 60 bytes for the nlattr, but after it is converted to the flow action, it only occupies 24 bytes. This means that we can still support more than 1000 vxlan output actions for a single datapath flow under the the current 32k max limitation. So even if the nla_len(attr) is larger than MAX_ACTIONS_BUFSIZE, we shouldn't report EINVAL and keep it move on, as the judgement can be done by the reserve_sfa_size. Signed-off-by: zhangliping Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/flow_netlink.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index e8eb427ce6d1..0d9f6afa266c 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1903,14 +1903,11 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) #define MAX_ACTIONS_BUFSIZE (32 * 1024) -static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) +static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; - if (size > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); - return ERR_PTR(-EINVAL); - } + WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); if (!sfa) @@ -1983,12 +1980,15 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = ksize(*sfa) * 2; if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { + OVS_NLERR(log, "Flow action size exceeds max %u", + MAX_ACTIONS_BUFSIZE); return ERR_PTR(-EMSGSIZE); + } new_acts_size = MAX_ACTIONS_BUFSIZE; } - acts = nla_alloc_flow_actions(new_acts_size, log); + acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) return (void *)acts; @@ -2660,7 +2660,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, { int err; - *sfa = nla_alloc_flow_actions(nla_len(attr), log); + *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); From 093a5cb46d05895d02c36d5be784c7a71cd0c585 Mon Sep 17 00:00:00 2001 From: Mirza Krak Date: Wed, 15 Nov 2017 08:24:46 +0000 Subject: [PATCH 068/770] drm/rockchip: dw-mipi-dsi: fix possible un-balanced runtime PM enable [ Upstream commit 517f56839f581618d24f2e67a35738a5c6cbaecb ] In the case where the bind gets deferred we would end up with a un-balanced runtime PM enable call. Fix this by simply moving the pm_runtime_enable call to the end of the bind function when all paths have succeeded. Signed-off-by: Mirza Krak Signed-off-by: Sandy Huang Link: https://patchwork.freedesktop.org/patch/msgid/1510734286-37434-1-git-send-email-mirza.krak@endian.se Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rockchip/dw-mipi-dsi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c index 9a20b9dc27c8..f7fc652b0027 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c @@ -1275,8 +1275,6 @@ static int dw_mipi_dsi_bind(struct device *dev, struct device *master, goto err_pllref; } - pm_runtime_enable(dev); - dsi->dsi_host.ops = &dw_mipi_dsi_host_ops; dsi->dsi_host.dev = dev; ret = mipi_dsi_host_register(&dsi->dsi_host); @@ -1291,6 +1289,7 @@ static int dw_mipi_dsi_bind(struct device *dev, struct device *master, } dev_set_drvdata(dev, dsi); + pm_runtime_enable(dev); return 0; err_mipi_dsi_host: From e23090a7d8f05f03cf564148472130286f5ca9bf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 21 Nov 2017 14:46:08 +0100 Subject: [PATCH 069/770] mac80211: use QoS NDP for AP probing [ Upstream commit 7b6ddeaf27eca72795ceeae2f0f347db1b5f9a30 ] When connected to a QoS/WMM AP, mac80211 should use a QoS NDP for probing it, instead of a regular non-QoS one, fix this. Change all the drivers to *not* allow QoS NDP for now, even though it looks like most of them should be OK with that. Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/channel.c | 2 +- drivers/net/wireless/st/cw1200/sta.c | 4 ++-- drivers/net/wireless/ti/wl1251/main.c | 2 +- drivers/net/wireless/ti/wlcore/cmd.c | 5 ++-- include/net/mac80211.h | 8 ++++++- net/mac80211/mlme.c | 2 +- net/mac80211/tx.c | 29 ++++++++++++++++++++++-- 7 files changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c index f0439f2d566b..173891b11b2d 100644 --- a/drivers/net/wireless/ath/ath9k/channel.c +++ b/drivers/net/wireless/ath/ath9k/channel.c @@ -1112,7 +1112,7 @@ ath_chanctx_send_vif_ps_frame(struct ath_softc *sc, struct ath_vif *avp, if (!avp->assoc) return false; - skb = ieee80211_nullfunc_get(sc->hw, vif); + skb = ieee80211_nullfunc_get(sc->hw, vif, false); if (!skb) return false; diff --git a/drivers/net/wireless/st/cw1200/sta.c b/drivers/net/wireless/st/cw1200/sta.c index a52224836a2b..666b88cb2cfe 100644 --- a/drivers/net/wireless/st/cw1200/sta.c +++ b/drivers/net/wireless/st/cw1200/sta.c @@ -198,7 +198,7 @@ void __cw1200_cqm_bssloss_sm(struct cw1200_common *priv, priv->bss_loss_state++; - skb = ieee80211_nullfunc_get(priv->hw, priv->vif); + skb = ieee80211_nullfunc_get(priv->hw, priv->vif, false); WARN_ON(!skb); if (skb) cw1200_tx(priv->hw, NULL, skb); @@ -2266,7 +2266,7 @@ static int cw1200_upload_null(struct cw1200_common *priv) .rate = 0xFF, }; - frame.skb = ieee80211_nullfunc_get(priv->hw, priv->vif); + frame.skb = ieee80211_nullfunc_get(priv->hw, priv->vif, false); if (!frame.skb) return -ENOMEM; diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index 9915d83a4a30..6d02c660b4ab 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -566,7 +566,7 @@ static int wl1251_build_null_data(struct wl1251 *wl) size = sizeof(struct wl12xx_null_data_template); ptr = NULL; } else { - skb = ieee80211_nullfunc_get(wl->hw, wl->vif); + skb = ieee80211_nullfunc_get(wl->hw, wl->vif, false); if (!skb) goto out; size = skb->len; diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c index 2bfc12fdc929..761cf8573a80 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.c +++ b/drivers/net/wireless/ti/wlcore/cmd.c @@ -1069,7 +1069,8 @@ int wl12xx_cmd_build_null_data(struct wl1271 *wl, struct wl12xx_vif *wlvif) ptr = NULL; } else { skb = ieee80211_nullfunc_get(wl->hw, - wl12xx_wlvif_to_vif(wlvif)); + wl12xx_wlvif_to_vif(wlvif), + false); if (!skb) goto out; size = skb->len; @@ -1096,7 +1097,7 @@ int wl12xx_cmd_build_klv_null_data(struct wl1271 *wl, struct sk_buff *skb = NULL; int ret = -ENOMEM; - skb = ieee80211_nullfunc_get(wl->hw, vif); + skb = ieee80211_nullfunc_get(wl->hw, vif, false); if (!skb) goto out; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 885690fa39c8..4f1d2dec43ce 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4470,18 +4470,24 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, * ieee80211_nullfunc_get - retrieve a nullfunc template * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @qos_ok: QoS NDP is acceptable to the caller, this should be set + * if at all possible * * Creates a Nullfunc template which can, for example, uploaded to * hardware. The template must be updated after association so that correct * BSSID and address is used. * + * If @qos_ndp is set and the association is to an AP with QoS/WMM, the + * returned packet will be QoS NDP. + * * Note: Caller (or hardware) is responsible for setting the * &IEEE80211_FCTL_PM bit as well as Duration and Sequence Control fields. * * Return: The nullfunc template. %NULL on error. */ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + bool qos_ok); /** * ieee80211_probereq_get - retrieve a Probe Request template diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3b8e2709d8de..9115cc52ce83 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -908,7 +908,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif); + skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true); if (!skb) return; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 94826680cf2b..73429841f115 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4404,13 +4404,15 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, EXPORT_SYMBOL(ieee80211_pspoll_get); struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) + struct ieee80211_vif *vif, + bool qos_ok) { struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_sub_if_data *sdata; struct ieee80211_if_managed *ifmgd; struct ieee80211_local *local; struct sk_buff *skb; + bool qos = false; if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) return NULL; @@ -4419,7 +4421,17 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, ifmgd = &sdata->u.mgd; local = sdata->local; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc)); + if (qos_ok) { + struct sta_info *sta; + + rcu_read_lock(); + sta = sta_info_get(sdata, ifmgd->bssid); + qos = sta && sta->sta.wme; + rcu_read_unlock(); + } + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + sizeof(*nullfunc) + 2); if (!skb) return NULL; @@ -4429,6 +4441,19 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | IEEE80211_FCTL_TODS); + if (qos) { + __le16 qos = cpu_to_le16(7); + + BUILD_BUG_ON((IEEE80211_STYPE_QOS_NULLFUNC | + IEEE80211_STYPE_NULLFUNC) != + IEEE80211_STYPE_QOS_NULLFUNC); + nullfunc->frame_control |= + cpu_to_le16(IEEE80211_STYPE_QOS_NULLFUNC); + skb->priority = 7; + skb_set_queue_mapping(skb, IEEE80211_AC_VO); + skb_put_data(skb, &qos, sizeof(qos)); + } + memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN); memcpy(nullfunc->addr2, vif->addr, ETH_ALEN); memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN); From 841211271eccb49f7f4a71442c13a09d4b6a92b0 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Tue, 14 Nov 2017 23:20:05 +0800 Subject: [PATCH 070/770] mac80211: fix the update of path metric for RANN frame [ Upstream commit fbbdad5edf0bb59786a51b94a9d006bc8c2da9a2 ] The previous path metric update from RANN frame has not considered the own link metric toward the transmitting mesh STA. Fix this. Reported-by: Michael65535 Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mesh_hwmp.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index d8bbd0d2225a..d6d3f316de4c 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -797,7 +797,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, struct mesh_path *mpath; u8 ttl, flags, hopcount; const u8 *orig_addr; - u32 orig_sn, metric, metric_txsta, interval; + u32 orig_sn, new_metric, orig_metric, last_hop_metric, interval; bool root_is_gate; ttl = rann->rann_ttl; @@ -808,7 +808,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, interval = le32_to_cpu(rann->rann_interval); hopcount = rann->rann_hopcount; hopcount++; - metric = le32_to_cpu(rann->rann_metric); + orig_metric = le32_to_cpu(rann->rann_metric); /* Ignore our own RANNs */ if (ether_addr_equal(orig_addr, sdata->vif.addr)) @@ -825,7 +825,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, return; } - metric_txsta = airtime_link_metric_get(local, sta); + last_hop_metric = airtime_link_metric_get(local, sta); + new_metric = orig_metric + last_hop_metric; + if (new_metric < orig_metric) + new_metric = MAX_METRIC; mpath = mesh_path_lookup(sdata, orig_addr); if (!mpath) { @@ -838,7 +841,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } if (!(SN_LT(mpath->sn, orig_sn)) && - !(mpath->sn == orig_sn && metric < mpath->rann_metric)) { + !(mpath->sn == orig_sn && new_metric < mpath->rann_metric)) { rcu_read_unlock(); return; } @@ -856,7 +859,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } mpath->sn = orig_sn; - mpath->rann_metric = metric + metric_txsta; + mpath->rann_metric = new_metric; mpath->is_root = true; /* Recording RANNs sender address to send individually * addressed PREQs destined for root mesh STA */ @@ -876,7 +879,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, orig_sn, 0, NULL, 0, broadcast_addr, hopcount, ttl, interval, - metric + metric_txsta, 0, sdata); + new_metric, 0, sdata); } rcu_read_unlock(); From 928066e61940e3c19f85d543d15c55fcae20208f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 15 Nov 2017 16:20:52 -0500 Subject: [PATCH 071/770] btrfs: fix deadlock when writing out space cache [ Upstream commit b77000ed558daa3bef0899d29bf171b8c9b5e6a8 ] If we fail to prepare our pages for whatever reason (out of memory in our case) we need to make sure to drop the block_group->data_rwsem, otherwise hilarity ensues. Signed-off-by: Josef Bacik Reviewed-by: Omar Sandoval Reviewed-by: Liu Bo Reviewed-by: David Sterba [ add label and use existing unlocking code ] Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index cdc9f4015ec3..4426d1c73e50 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1264,7 +1264,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, /* Lock all pages first so we can lock the extent safely. */ ret = io_ctl_prepare_pages(io_ctl, inode, 0); if (ret) - goto out; + goto out_unlock; lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, &cached_state); @@ -1358,6 +1358,7 @@ out_nospc_locked: out_nospc: cleanup_write_cache_enospc(inode, io_ctl, &cached_state); +out_unlock: if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) up_write(&block_group->data_rwsem); From d4c72a410f3bbcddac69f666024934a492dd8c38 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:05:34 +0800 Subject: [PATCH 072/770] sctp: only allow the asoc reset when the asoc outq is empty [ Upstream commit 5c6144a0eb5366ae07fc5059301b139338f39bbd ] As it says in rfc6525#section5.1.4, before sending the request, C2: The sender has either no outstanding TSNs or considers all outstanding TSNs abandoned. Prior to this patch, it tried to consider all outstanding TSNs abandoned by dropping all chunks in all outqs with sctp_outq_free (even including sacked, retransmit and transmitted queues) when doing this reset, which is too aggressive. To make it work gently, this patch will only allow the asoc reset when the sender has no outstanding TSNs by checking if unsent, transmitted and retransmit are all empty with sctp_outq_is_empty before sending and processing the request. Fixes: 692787cef651 ("sctp: implement receiver-side procedures for the SSN/TSN Reset Request Parameter") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sctp/stream.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 724adf2786a2..7710133238ea 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -224,6 +224,9 @@ int sctp_send_reset_assoc(struct sctp_association *asoc) if (asoc->strreset_outstanding) return -EINPROGRESS; + if (!sctp_outq_is_empty(&asoc->outqueue)) + return -EAGAIN; + chunk = sctp_make_strreset_tsnreq(asoc); if (!chunk) return -ENOMEM; @@ -544,6 +547,12 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( } goto err; } + + if (!sctp_outq_is_empty(&asoc->outqueue)) { + result = SCTP_STRRESET_IN_PROGRESS; + goto err; + } + asoc->strreset_inseq++; if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ)) From 55f3de731c0b21ad37f80c7819a925278356ddf1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:05:35 +0800 Subject: [PATCH 073/770] sctp: avoid flushing unsent queue when doing asoc reset [ Upstream commit 159f2a7456c6ae95c1e1a58e8b8ec65ef12d51cf ] Now when doing asoc reset, it cleans up sacked and abandoned queues by calling sctp_outq_free where it also cleans up unsent, retransmit and transmitted queues. It's safe for the sender of response, as these 3 queues are empty at that time. But when the receiver of response is doing the reset, the users may already enqueue some chunks into unsent during the time waiting the response, and these chunks should not be flushed. To void the chunks in it would be removed, it moves the queue into a temp list, then gets it back after sctp_outq_free is done. The patch also fixes some incorrect comments in sctp_process_strreset_tsnreq. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sctp/stream.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 7710133238ea..aa629654d27e 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -563,9 +563,10 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( goto out; } - /* G3: The same processing as though a SACK chunk with no gap report - * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were - * received MUST be performed. + /* G4: The same processing as though a FWD-TSN chunk (as defined in + * [RFC3758]) with all streams affected and a new cumulative TSN + * ACK of the Receiver's Next TSN minus 1 were received MUST be + * performed. */ max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map); sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen); @@ -580,10 +581,9 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, init_tsn, GFP_ATOMIC); - /* G4: The same processing as though a FWD-TSN chunk (as defined in - * [RFC3758]) with all streams affected and a new cumulative TSN - * ACK of the Receiver's Next TSN minus 1 were received MUST be - * performed. + /* G3: The same processing as though a SACK chunk with no gap report + * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were + * received MUST be performed. */ sctp_outq_free(&asoc->outqueue); @@ -844,6 +844,7 @@ struct sctp_chunk *sctp_process_strreset_resp( if (result == SCTP_STRRESET_PERFORMED) { __u32 mtsn = sctp_tsnmap_get_max_tsn_seen( &asoc->peer.tsn_map); + LIST_HEAD(temp); sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn); sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); @@ -852,7 +853,13 @@ struct sctp_chunk *sctp_process_strreset_resp( SCTP_TSN_MAP_INITIAL, stsn, GFP_ATOMIC); + /* Clean up sacked and abandoned queues only. As the + * out_chunk_list may not be empty, splice it to temp, + * then get it back after sctp_outq_free is done. + */ + list_splice_init(&asoc->outqueue.out_chunk_list, &temp); sctp_outq_free(&asoc->outqueue); + list_splice_init(&temp, &asoc->outqueue.out_chunk_list); asoc->next_tsn = rtsn; asoc->ctsn_ack_point = asoc->next_tsn - 1; From 14a4e9f6bde8cdba1c0896c0af31304a934e0427 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 25 Nov 2017 21:05:36 +0800 Subject: [PATCH 074/770] sctp: set sender next_tsn for the old result with ctsn_ack_point plus 1 [ Upstream commit 52a395896a051a3d5c34fba67c324f69ec5e67c6 ] When doing asoc reset, if the sender of the response has already sent some chunk and increased asoc->next_tsn before the duplicate request comes, the response will use the old result with an incorrect sender next_tsn. Better than asoc->next_tsn, asoc->ctsn_ack_point can't be changed after the sender of the response has performed the asoc reset and before the peer has confirmed it, and it's value is still asoc->next_tsn original value minus 1. This patch sets sender next_tsn for the old result with ctsn_ack_point plus 1 when processing the duplicate request, to make sure the sender next_tsn value peer gets will be always right. Fixes: 692787cef651 ("sctp: implement receiver-side procedures for the SSN/TSN Reset Request Parameter") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sctp/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index aa629654d27e..9ea6057ed28b 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -541,7 +541,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( i = asoc->strreset_inseq - request_seq - 1; result = asoc->strreset_result[i]; if (result == SCTP_STRRESET_PERFORMED) { - next_tsn = asoc->next_tsn; + next_tsn = asoc->ctsn_ack_point + 1; init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1; } From 9a447435afabf52b16d1295d0f18bef5ab3abb65 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Oct 2017 11:20:15 -0400 Subject: [PATCH 075/770] reiserfs: remove unneeded i_version bump [ Upstream commit 9f97df50c52c2887432debb6238f4e43567386a5 ] The i_version field in reiserfs is not initialized and is only ever updated here. Nothing ever views it, so just remove it. Signed-off-by: Jeff Layton Signed-off-by: Jan Kara Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 5464ec517702..4885c7b6e44f 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2591,7 +2591,6 @@ out: return err; if (inode->i_size < off + len - towrite) i_size_write(inode, off + len - towrite); - inode->i_version++; inode->i_mtime = inode->i_ctime = current_time(inode); mark_inode_dirty(inode); return len - towrite; From 2f1b5183aee337be1c2444c7f6be189ddf14d068 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Nov 2017 14:55:05 -0800 Subject: [PATCH 076/770] KVM: X86: Fix softlockup when get the current kvmclock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e70b57a6ce4e8b92a56a615ae79bdb2bd66035e7 ] watchdog: BUG: soft lockup - CPU#6 stuck for 22s! [qemu-system-x86:10185] CPU: 6 PID: 10185 Comm: qemu-system-x86 Tainted: G OE 4.14.0-rc4+ #4 RIP: 0010:kvm_get_time_scale+0x4e/0xa0 [kvm] Call Trace: get_time_ref_counter+0x5a/0x80 [kvm] kvm_hv_process_stimers+0x120/0x5f0 [kvm] kvm_arch_vcpu_ioctl_run+0x4b4/0x1690 [kvm] kvm_vcpu_ioctl+0x33a/0x620 [kvm] do_vfs_ioctl+0xa1/0x5d0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x1e/0xa9 This can be reproduced when running kvm-unit-tests/hyperv_stimer.flat and cpu-hotplug stress simultaneously. __this_cpu_read(cpu_tsc_khz) returns 0 (set in kvmclock_cpu_down_prep()) when the pCPU is unhotplug which results in kvm_get_time_scale() gets into an infinite loop. This patch fixes it by treating the unhotplug pCPU as not using master clock. Reviewed-by: Radim Krčmář Reviewed-by: David Hildenbrand Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b25326ad82a1..2dd19f234245 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1795,10 +1795,13 @@ u64 get_kvmclock_ns(struct kvm *kvm) /* both __this_cpu_read() and rdtsc() should be on the same cpu */ get_cpu(); - kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, - &hv_clock.tsc_shift, - &hv_clock.tsc_to_system_mul); - ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + if (__this_cpu_read(cpu_tsc_khz)) { + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + &hv_clock.tsc_shift, + &hv_clock.tsc_to_system_mul); + ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + } else + ret = ktime_get_boot_ns() + ka->kvmclock_offset; put_cpu(); From 2ce1bbfb5e67563bc01c0ba025fefc3b4b1fc391 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Nov 2017 14:52:21 -0800 Subject: [PATCH 077/770] KVM: VMX: Fix rflags cache during vCPU reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c37c28730bb031cc8a44a130c2555c0f3efbe2d0 ] Reported by syzkaller: *** Guest State *** CR0: actual=0x0000000080010031, shadow=0x0000000060000010, gh_mask=fffffffffffffff7 CR4: actual=0x0000000000002061, shadow=0x0000000000000000, gh_mask=ffffffffffffe8f1 CR3 = 0x000000002081e000 RSP = 0x000000000000fffa RIP = 0x0000000000000000 RFLAGS=0x00023000 DR7 = 0x00000000000000 ^^^^^^^^^^ ------------[ cut here ]------------ WARNING: CPU: 6 PID: 24431 at /home/kernel/linux/arch/x86/kvm//x86.c:7302 kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] CPU: 6 PID: 24431 Comm: reprotest Tainted: G W OE 4.14.0+ #26 RIP: 0010:kvm_arch_vcpu_ioctl_run+0x651/0x2ea0 [kvm] RSP: 0018:ffff880291d179e0 EFLAGS: 00010202 Call Trace: kvm_vcpu_ioctl+0x479/0x880 [kvm] do_vfs_ioctl+0x142/0x9a0 SyS_ioctl+0x74/0x80 entry_SYSCALL_64_fastpath+0x23/0x9a The failed vmentry is triggered by the following beautified testcase: #include #include #include #include #include #include #include long r[5]; int main() { struct kvm_debugregs dr = { 0 }; r[2] = open("/dev/kvm", O_RDONLY); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7); struct kvm_guest_debug debug = { .control = 0xf0403, .arch = { .debugreg[6] = 0x2, .debugreg[7] = 0x2 } }; ioctl(r[4], KVM_SET_GUEST_DEBUG, &debug); ioctl(r[4], KVM_RUN, 0); } which testcase tries to setup the processor specific debug registers and configure vCPU for handling guest debug events through KVM_SET_GUEST_DEBUG. The KVM_SET_GUEST_DEBUG ioctl will get and set rflags in order to set TF bit if single step is needed. All regs' caches are reset to avail and GUEST_RFLAGS vmcs field is reset to 0x2 during vCPU reset. However, the cache of rflags is not reset during vCPU reset. The function vmx_get_rflags() returns an unreset rflags cache value since the cache is marked avail, it is 0 after boot. Vmentry fails if the rflags reserved bit 1 is 0. This patch fixes it by resetting both the GUEST_RFLAGS vmcs field and its cache to 0x2 during vCPU reset. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Reviewed-by: David Hildenbrand Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Nadav Amit Cc: Dmitry Vyukov Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d073f91bc089..5242fac8165b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5608,7 +5608,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write64(GUEST_IA32_DEBUGCTL, 0); } - vmcs_writel(GUEST_RFLAGS, 0x02); + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); kvm_rip_write(vcpu, 0xfff0); vmcs_writel(GUEST_GDTR_BASE, 0); From 809981870b6670d5ac2338325865900d88acf2b4 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 21 Nov 2017 14:35:40 -0700 Subject: [PATCH 078/770] Btrfs: fix list_add corruption and soft lockups in fsync [ Upstream commit ebb70442cdd4872260c2415929c456be3562da82 ] Xfstests btrfs/146 revealed this corruption, [ 58.138831] Buffer I/O error on dev dm-0, logical block 2621424, async page read [ 58.151233] BTRFS error (device sdf): bdev /dev/mapper/error-test errs: wr 1, rd 0, flush 0, corrupt 0, gen 0 [ 58.152403] list_add corruption. prev->next should be next (ffff88005e6775d8), but was ffffc9000189be88. (prev=ffffc9000189be88). [ 58.153518] ------------[ cut here ]------------ [ 58.153892] WARNING: CPU: 1 PID: 1287 at lib/list_debug.c:31 __list_add_valid+0x169/0x1f0 ... [ 58.157379] RIP: 0010:__list_add_valid+0x169/0x1f0 ... [ 58.161956] Call Trace: [ 58.162264] btrfs_log_inode_parent+0x5bd/0xfb0 [btrfs] [ 58.163583] btrfs_log_dentry_safe+0x60/0x80 [btrfs] [ 58.164003] btrfs_sync_file+0x4c2/0x6f0 [btrfs] [ 58.164393] vfs_fsync_range+0x5f/0xd0 [ 58.164898] do_fsync+0x5a/0x90 [ 58.165170] SyS_fsync+0x10/0x20 [ 58.165395] entry_SYSCALL_64_fastpath+0x1f/0xbe ... It turns out that we could record btrfs_log_ctx:io_err in log_one_extents when IO fails, but make log_one_extents() return '0' instead of -EIO, so the IO error is not acknowledged by the callers, i.e. btrfs_log_inode_parent(), which would remove btrfs_log_ctx:list from list head 'root->log_ctxs'. Since btrfs_log_ctx is allocated from stack memory, it'd get freed with a object alive on the list. then a future list_add will throw the above warning. This returns the correct error in the above case. Jeff also reported this while testing against his fsync error patch set[1]. [1]: https://www.spinics.net/lists/linux-btrfs/msg65308.html "btrfs list corruption and soft lockups while testing writeback error handling" Fixes: 8407f553268a4611f254 ("Btrfs: fix data corruption after fast fsync and writeback error") Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/file.c | 5 +++-- fs/btrfs/tree-log.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index aafcc785f840..d564a7049d7f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2056,6 +2056,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) len = (u64)end - (u64)start + 1; trace_btrfs_sync_file(file, datasync); + btrfs_init_log_ctx(&ctx, inode); + /* * We write the dirty pages in the range and wait until they complete * out of the ->i_mutex. If so, we can flush the dirty pages by @@ -2202,8 +2204,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } trans->sync = true; - btrfs_init_log_ctx(&ctx, inode); - ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); if (ret < 0) { /* Fallthrough and commit/free transaction. */ @@ -2261,6 +2261,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = btrfs_end_transaction(trans); } out: + ASSERT(list_empty(&ctx.list)); err = file_check_and_advance_wb_err(file); if (!ret) ret = err; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c800d067fcbf..d3002842d7f6 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4100,7 +4100,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, if (ordered_io_err) { ctx->io_err = -EIO; - return 0; + return ctx->io_err; } btrfs_init_map_token(&token); From 40ba283e2602d319d00d4f6539b5113eb8d25d24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Fri, 24 Nov 2017 22:39:01 +0100 Subject: [PATCH 079/770] KVM: Let KVM_SET_SIGNAL_MASK work as advertised MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 20b7035c66bacc909ae3ffe92c1a1ea7db99fe4f ] KVM API says for the signal mask you set via KVM_SET_SIGNAL_MASK, that "any unblocked signal received [...] will cause KVM_RUN to return with -EINTR" and that "the signal will only be delivered if not blocked by the original signal mask". This, however, is only true, when the calling task has a signal handler registered for a signal. If not, signal evaluation is short-circuited for SIG_IGN and SIG_DFL, and the signal is either ignored without KVM_RUN returning or the whole process is terminated. Make KVM_SET_SIGNAL_MASK behave as advertised by utilizing logic similar to that in do_sigtimedwait() to avoid short-circuiting of signals. Signed-off-by: Jan H. Schönherr Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/mips.c | 7 ++----- arch/powerpc/kvm/powerpc.c | 7 ++----- arch/s390/kvm/kvm-s390.c | 7 ++----- arch/x86/kvm/x86.c | 7 ++----- include/linux/kvm_host.h | 3 +++ virt/kvm/arm/arm.c | 8 +++----- virt/kvm/kvm_main.c | 23 +++++++++++++++++++++++ 7 files changed, 37 insertions(+), 25 deletions(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index d535edc01434..75fdeaa8c62f 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -445,10 +445,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r = -EINTR; - sigset_t sigsaved; - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (vcpu->mmio_needed) { if (!vcpu->mmio_is_write) @@ -480,8 +478,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) local_irq_enable(); out: - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ee279c7f4802..2b02d51d14d8 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1407,7 +1407,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r; - sigset_t sigsaved; if (vcpu->mmio_needed) { vcpu->mmio_needed = 0; @@ -1448,16 +1447,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) #endif } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (run->immediate_exit) r = -EINTR; else r = kvmppc_vcpu_run(run, vcpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6c88cb18ace2..6e3d80b2048e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3378,7 +3378,6 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int rc; - sigset_t sigsaved; if (kvm_run->immediate_exit) return -EINTR; @@ -3388,8 +3387,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { kvm_s390_vcpu_start(vcpu); @@ -3423,8 +3421,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) disable_cpu_timer_accounting(vcpu); store_regs(vcpu, kvm_run); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); vcpu->stat.exit_userspace++; return rc; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2dd19f234245..8c28023a43b1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7245,12 +7245,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct fpu *fpu = ¤t->thread.fpu; int r; - sigset_t sigsaved; fpu__initialize(fpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (kvm_run->immediate_exit) { @@ -7293,8 +7291,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) out: post_kvm_run_save(vcpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6882538eda32..5a8019befafd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -714,6 +714,9 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, unsigned long len); void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); +void kvm_sigset_activate(struct kvm_vcpu *vcpu); +void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); + void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 95cba0799828..9a07ee94a230 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -612,7 +612,6 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int ret; - sigset_t sigsaved; if (unlikely(!kvm_vcpu_initialized(vcpu))) return -ENOEXEC; @@ -630,8 +629,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (run->immediate_exit) return -EINTR; - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); ret = 1; run->exit_reason = KVM_EXIT_UNKNOWN; @@ -753,8 +751,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_pmu_update_run(vcpu); } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); + return ret; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2447d7c017e7..8401774f5aeb 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2073,6 +2073,29 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); +void kvm_sigset_activate(struct kvm_vcpu *vcpu) +{ + if (!vcpu->sigset_active) + return; + + /* + * This does a lockless modification of ->real_blocked, which is fine + * because, only current can change ->real_blocked and all readers of + * ->real_blocked don't care as long ->real_blocked is always a subset + * of ->blocked. + */ + sigprocmask(SIG_SETMASK, &vcpu->sigset, ¤t->real_blocked); +} + +void kvm_sigset_deactivate(struct kvm_vcpu *vcpu) +{ + if (!vcpu->sigset_active) + return; + + sigprocmask(SIG_SETMASK, ¤t->real_blocked, NULL); + sigemptyset(¤t->real_blocked); +} + static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) { unsigned int old, val, grow; From 1eccdbd4836a417b2b4e8f1f56db97c541d58e57 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 22 Nov 2017 12:21:07 -0800 Subject: [PATCH 080/770] xfs: always free inline data before resetting inode fork during ifree [ Upstream commit 98c4f78dcdd8cec112d1cbc5e9a792ee6e5ab7a6 ] In xfs_ifree, we reset the data/attr forks to extents format without bothering to free any inline data buffer that might still be around after all the blocks have been truncated off the file. Prior to commit 43518812d2 ("xfs: remove support for inlining data/extents into the inode fork") nobody noticed because the leftover inline data after truncation was small enough to fit inside the inline buffer inside the fork itself. However, now that we've removed the inline buffer, we /always/ have to free the inline data buffer or else we leak them like crazy. This test was found by turning on kmemleak for generic/001 or generic/388. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_inode.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 63350906961a..cb4833d06467 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2421,6 +2421,24 @@ retry: return 0; } +/* + * Free any local-format buffers sitting around before we reset to + * extents format. + */ +static inline void +xfs_ifree_local_data( + struct xfs_inode *ip, + int whichfork) +{ + struct xfs_ifork *ifp; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return; + + ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); +} + /* * This is called to return an inode to the inode free list. * The inode should already be truncated to 0 length and have @@ -2458,6 +2476,9 @@ xfs_ifree( if (error) return error; + xfs_ifree_local_data(ip, XFS_DATA_FORK); + xfs_ifree_local_data(ip, XFS_ATTR_FORK); + VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ ip->i_d.di_flags = 0; ip->i_d.di_dmevmask = 0; From 30ac846da3600e299cd470ade18e1e859b6a41f8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 21 Nov 2017 20:53:02 -0800 Subject: [PATCH 081/770] xfs: log recovery should replay deferred ops in order [ Upstream commit 509955823cc9cc225c05673b1b83d70ca70c5c60 ] As part of testing log recovery with dm_log_writes, Amir Goldstein discovered an error in the deferred ops recovery that lead to corruption of the filesystem metadata if a reflink+rmap filesystem happened to shut down midway through a CoW remap: "This is what happens [after failed log recovery]: "Phase 1 - find and verify superblock... "Phase 2 - using internal log " - zero log... " - scan filesystem freespace and inode maps... " - found root inode chunk "Phase 3 - for each AG... " - scan (but don't clear) agi unlinked lists... " - process known inodes and perform inode discovery... " - agno = 0 "data fork in regular inode 134 claims CoW block 376 "correcting nextents for inode 134 "bad data fork in inode 134 "would have cleared inode 134" Hou Tao dissected the log contents of exactly such a crash: "According to the implementation of xfs_defer_finish(), these ops should be completed in the following sequence: "Have been done: "(1) CUI: Oper (160) "(2) BUI: Oper (161) "(3) CUD: Oper (194), for CUI Oper (160) "(4) RUI A: Oper (197), free rmap [0x155, 2, -9] "Should be done: "(5) BUD: for BUI Oper (161) "(6) RUI B: add rmap [0x155, 2, 137] "(7) RUD: for RUI A "(8) RUD: for RUI B "Actually be done by xlog_recover_process_intents() "(5) BUD: for BUI Oper (161) "(6) RUI B: add rmap [0x155, 2, 137] "(7) RUD: for RUI B "(8) RUD: for RUI A "So the rmap entry [0x155, 2, -9] for COW should be freed firstly, then a new rmap entry [0x155, 2, 137] will be added. However, as we can see from the log record in post_mount.log (generated after umount) and the trace print, the new rmap entry [0x155, 2, 137] are added firstly, then the rmap entry [0x155, 2, -9] are freed." When reconstructing the internal log state from the log items found on disk, it's required that deferred ops replay in exactly the same order that they would have had the filesystem not gone down. However, replaying unfinished deferred ops can create /more/ deferred ops. These new deferred ops are finished in the wrong order. This causes fs corruption and replay crashes, so let's create a single defer_ops to handle the subsequent ops created during replay, then use one single transaction at the end of log recovery to ensure that everything is replayed in the same order as they're supposed to be. Reported-by: Amir Goldstein Analyzed-by: Hou Tao Reviewed-by: Christoph Hellwig Tested-by: Amir Goldstein Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_bmap_item.c | 23 ++++-------- fs/xfs/xfs_bmap_item.h | 3 +- fs/xfs/xfs_log_recover.c | 75 ++++++++++++++++++++++++++++++++++---- fs/xfs/xfs_refcount_item.c | 21 ++++------- fs/xfs/xfs_refcount_item.h | 3 +- 5 files changed, 85 insertions(+), 40 deletions(-) diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index dd136f7275e4..e5fb008d75e8 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -389,7 +389,8 @@ xfs_bud_init( int xfs_bui_recover( struct xfs_mount *mp, - struct xfs_bui_log_item *buip) + struct xfs_bui_log_item *buip, + struct xfs_defer_ops *dfops) { int error = 0; unsigned int bui_type; @@ -404,9 +405,7 @@ xfs_bui_recover( xfs_exntst_t state; struct xfs_trans *tp; struct xfs_inode *ip = NULL; - struct xfs_defer_ops dfops; struct xfs_bmbt_irec irec; - xfs_fsblock_t firstfsb; ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)); @@ -464,7 +463,6 @@ xfs_bui_recover( if (VFS_I(ip)->i_nlink == 0) xfs_iflags_set(ip, XFS_IRECOVERY); - xfs_defer_init(&dfops, &firstfsb); /* Process deferred bmap item. */ state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? @@ -479,16 +477,16 @@ xfs_bui_recover( break; default: error = -EFSCORRUPTED; - goto err_dfops; + goto err_inode; } xfs_trans_ijoin(tp, ip, 0); count = bmap->me_len; - error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type, + error = xfs_trans_log_finish_bmap_update(tp, budp, dfops, type, ip, whichfork, bmap->me_startoff, bmap->me_startblock, &count, state); if (error) - goto err_dfops; + goto err_inode; if (count > 0) { ASSERT(type == XFS_BMAP_UNMAP); @@ -496,16 +494,11 @@ xfs_bui_recover( irec.br_blockcount = count; irec.br_startoff = bmap->me_startoff; irec.br_state = state; - error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec); + error = xfs_bmap_unmap_extent(tp->t_mountp, dfops, ip, &irec); if (error) - goto err_dfops; + goto err_inode; } - /* Finish transaction, free inodes. */ - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto err_dfops; - set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -513,8 +506,6 @@ xfs_bui_recover( return error; -err_dfops: - xfs_defer_cancel(&dfops); err_inode: xfs_trans_cancel(tp); if (ip) { diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h index c867daae4a3c..24b354a2c836 100644 --- a/fs/xfs/xfs_bmap_item.h +++ b/fs/xfs/xfs_bmap_item.h @@ -93,6 +93,7 @@ struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *, struct xfs_bui_log_item *); void xfs_bui_item_free(struct xfs_bui_log_item *); void xfs_bui_release(struct xfs_bui_log_item *); -int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip); +int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip, + struct xfs_defer_ops *dfops); #endif /* __XFS_BMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index d6e049fdd977..eaf29646c28f 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -24,6 +24,7 @@ #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" +#include "xfs_defer.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_inode.h" @@ -4714,7 +4715,8 @@ STATIC int xlog_recover_process_cui( struct xfs_mount *mp, struct xfs_ail *ailp, - struct xfs_log_item *lip) + struct xfs_log_item *lip, + struct xfs_defer_ops *dfops) { struct xfs_cui_log_item *cuip; int error; @@ -4727,7 +4729,7 @@ xlog_recover_process_cui( return 0; spin_unlock(&ailp->xa_lock); - error = xfs_cui_recover(mp, cuip); + error = xfs_cui_recover(mp, cuip, dfops); spin_lock(&ailp->xa_lock); return error; @@ -4754,7 +4756,8 @@ STATIC int xlog_recover_process_bui( struct xfs_mount *mp, struct xfs_ail *ailp, - struct xfs_log_item *lip) + struct xfs_log_item *lip, + struct xfs_defer_ops *dfops) { struct xfs_bui_log_item *buip; int error; @@ -4767,7 +4770,7 @@ xlog_recover_process_bui( return 0; spin_unlock(&ailp->xa_lock); - error = xfs_bui_recover(mp, buip); + error = xfs_bui_recover(mp, buip, dfops); spin_lock(&ailp->xa_lock); return error; @@ -4803,6 +4806,46 @@ static inline bool xlog_item_is_intent(struct xfs_log_item *lip) } } +/* Take all the collected deferred ops and finish them in order. */ +static int +xlog_finish_defer_ops( + struct xfs_mount *mp, + struct xfs_defer_ops *dfops) +{ + struct xfs_trans *tp; + int64_t freeblks; + uint resblks; + int error; + + /* + * We're finishing the defer_ops that accumulated as a result of + * recovering unfinished intent items during log recovery. We + * reserve an itruncate transaction because it is the largest + * permanent transaction type. Since we're the only user of the fs + * right now, take 93% (15/16) of the available free blocks. Use + * weird math to avoid a 64-bit division. + */ + freeblks = percpu_counter_sum(&mp->m_fdblocks); + if (freeblks <= 0) + return -ENOSPC; + resblks = min_t(int64_t, UINT_MAX, freeblks); + resblks = (resblks * 15) >> 4; + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks, + 0, XFS_TRANS_RESERVE, &tp); + if (error) + return error; + + error = xfs_defer_finish(&tp, dfops); + if (error) + goto out_cancel; + + return xfs_trans_commit(tp); + +out_cancel: + xfs_trans_cancel(tp); + return error; +} + /* * When this is called, all of the log intent items which did not have * corresponding log done items should be in the AIL. What we do now @@ -4823,10 +4866,12 @@ STATIC int xlog_recover_process_intents( struct xlog *log) { - struct xfs_log_item *lip; - int error = 0; + struct xfs_defer_ops dfops; struct xfs_ail_cursor cur; + struct xfs_log_item *lip; struct xfs_ail *ailp; + xfs_fsblock_t firstfsb; + int error = 0; #if defined(DEBUG) || defined(XFS_WARN) xfs_lsn_t last_lsn; #endif @@ -4837,6 +4882,7 @@ xlog_recover_process_intents( #if defined(DEBUG) || defined(XFS_WARN) last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); #endif + xfs_defer_init(&dfops, &firstfsb); while (lip != NULL) { /* * We're done when we see something other than an intent. @@ -4857,6 +4903,12 @@ xlog_recover_process_intents( */ ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0); + /* + * NOTE: If your intent processing routine can create more + * deferred ops, you /must/ attach them to the dfops in this + * routine or else those subsequent intents will get + * replayed in the wrong order! + */ switch (lip->li_type) { case XFS_LI_EFI: error = xlog_recover_process_efi(log->l_mp, ailp, lip); @@ -4865,10 +4917,12 @@ xlog_recover_process_intents( error = xlog_recover_process_rui(log->l_mp, ailp, lip); break; case XFS_LI_CUI: - error = xlog_recover_process_cui(log->l_mp, ailp, lip); + error = xlog_recover_process_cui(log->l_mp, ailp, lip, + &dfops); break; case XFS_LI_BUI: - error = xlog_recover_process_bui(log->l_mp, ailp, lip); + error = xlog_recover_process_bui(log->l_mp, ailp, lip, + &dfops); break; } if (error) @@ -4878,6 +4932,11 @@ xlog_recover_process_intents( out: xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); + if (error) + xfs_defer_cancel(&dfops); + else + error = xlog_finish_defer_ops(log->l_mp, &dfops); + return error; } diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 8f2e2fac4255..3a55d6fc271b 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -393,7 +393,8 @@ xfs_cud_init( int xfs_cui_recover( struct xfs_mount *mp, - struct xfs_cui_log_item *cuip) + struct xfs_cui_log_item *cuip, + struct xfs_defer_ops *dfops) { int i; int error = 0; @@ -405,11 +406,9 @@ xfs_cui_recover( struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; enum xfs_refcount_intent_type type; - xfs_fsblock_t firstfsb; xfs_fsblock_t new_fsb; xfs_extlen_t new_len; struct xfs_bmbt_irec irec; - struct xfs_defer_ops dfops; bool requeue_only = false; ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)); @@ -465,7 +464,6 @@ xfs_cui_recover( return error; cudp = xfs_trans_get_cud(tp, cuip); - xfs_defer_init(&dfops, &firstfsb); for (i = 0; i < cuip->cui_format.cui_nextents; i++) { refc = &cuip->cui_format.cui_extents[i]; refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; @@ -485,7 +483,7 @@ xfs_cui_recover( new_len = refc->pe_len; } else error = xfs_trans_log_finish_refcount_update(tp, cudp, - &dfops, type, refc->pe_startblock, refc->pe_len, + dfops, type, refc->pe_startblock, refc->pe_len, &new_fsb, &new_len, &rcur); if (error) goto abort_error; @@ -497,21 +495,21 @@ xfs_cui_recover( switch (type) { case XFS_REFCOUNT_INCREASE: error = xfs_refcount_increase_extent( - tp->t_mountp, &dfops, &irec); + tp->t_mountp, dfops, &irec); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_decrease_extent( - tp->t_mountp, &dfops, &irec); + tp->t_mountp, dfops, &irec); break; case XFS_REFCOUNT_ALLOC_COW: error = xfs_refcount_alloc_cow_extent( - tp->t_mountp, &dfops, + tp->t_mountp, dfops, irec.br_startblock, irec.br_blockcount); break; case XFS_REFCOUNT_FREE_COW: error = xfs_refcount_free_cow_extent( - tp->t_mountp, &dfops, + tp->t_mountp, dfops, irec.br_startblock, irec.br_blockcount); break; @@ -525,17 +523,12 @@ xfs_cui_recover( } xfs_refcount_finish_one_cleanup(tp, rcur, error); - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto abort_defer; set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); error = xfs_trans_commit(tp); return error; abort_error: xfs_refcount_finish_one_cleanup(tp, rcur, error); -abort_defer: - xfs_defer_cancel(&dfops); xfs_trans_cancel(tp); return error; } diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h index 5b74dddfa64b..0e5327349a13 100644 --- a/fs/xfs/xfs_refcount_item.h +++ b/fs/xfs/xfs_refcount_item.h @@ -96,6 +96,7 @@ struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *, struct xfs_cui_log_item *); void xfs_cui_item_free(struct xfs_cui_log_item *); void xfs_cui_release(struct xfs_cui_log_item *); -int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip); +int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip, + struct xfs_defer_ops *dfops); #endif /* __XFS_REFCOUNT_ITEM_H__ */ From 53c045c6d07d78bf0d547352df184c250cc29fe3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 22 Nov 2017 17:52:24 +0000 Subject: [PATCH 082/770] i2c: i2c-boardinfo: fix memory leaks on devinfo [ Upstream commit 66a7c84d677e8e4a5a2ef4afdb9bd52e1399a866 ] Currently when an error occurs devinfo is still allocated but is unused when the error exit paths break out of the for-loop. Fix this by kfree'ing devinfo to avoid the leak. Detected by CoverityScan, CID#1416590 ("Resource Leak") Fixes: 4124c4eba402 ("i2c: allow attaching IRQ resources to i2c_board_info") Fixes: 0daaf99d8424 ("i2c: copy device properties when using i2c_register_board_info()") Signed-off-by: Colin Ian King Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-boardinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/i2c-boardinfo.c b/drivers/i2c/i2c-boardinfo.c index 31186ead5a40..509a6007cdf6 100644 --- a/drivers/i2c/i2c-boardinfo.c +++ b/drivers/i2c/i2c-boardinfo.c @@ -86,6 +86,7 @@ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsig property_entries_dup(info->properties); if (IS_ERR(devinfo->board_info.properties)) { status = PTR_ERR(devinfo->board_info.properties); + kfree(devinfo); break; } } @@ -98,6 +99,7 @@ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsig GFP_KERNEL); if (!devinfo->board_info.resources) { status = -ENOMEM; + kfree(devinfo); break; } } From 0ffb252208a288589251ed0ada38424ccde217d5 Mon Sep 17 00:00:00 2001 From: Eduardo Otubo Date: Thu, 23 Nov 2017 15:18:35 +0100 Subject: [PATCH 083/770] xen-netfront: remove warning when unloading module [ Upstream commit 5b5971df3bc2775107ddad164018a8a8db633b81 ] v2: * Replace busy wait with wait_event()/wake_up_all() * Cannot garantee that at the time xennet_remove is called, the xen_netback state will not be XenbusStateClosed, so added a condition for that * There's a small chance for the xen_netback state is XenbusStateUnknown by the time the xen_netfront switches to Closed, so added a condition for that. When unloading module xen_netfront from guest, dmesg would output warning messages like below: [ 105.236836] xen:grant_table: WARNING: g.e. 0x903 still in use! [ 105.236839] deferring g.e. 0x903 (pfn 0x35805) This problem relies on netfront and netback being out of sync. By the time netfront revokes the g.e.'s netback didn't have enough time to free all of them, hence displaying the warnings on dmesg. The trick here is to make netfront to wait until netback frees all the g.e.'s and only then continue to cleanup for the module removal, and this is done by manipulating both device states. Signed-off-by: Eduardo Otubo Acked-by: Juergen Gross Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 8b8689c6d887..391432e2725d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -87,6 +87,8 @@ struct netfront_cb { /* IRQ name is queue name with "-tx" or "-rx" appended */ #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3) +static DECLARE_WAIT_QUEUE_HEAD(module_unload_q); + struct netfront_stats { u64 packets; u64 bytes; @@ -2021,10 +2023,12 @@ static void netback_changed(struct xenbus_device *dev, break; case XenbusStateClosed: + wake_up_all(&module_unload_q); if (dev->state == XenbusStateClosed) break; /* Missed the backend's CLOSING state -- fallthrough */ case XenbusStateClosing: + wake_up_all(&module_unload_q); xenbus_frontend_closed(dev); break; } @@ -2130,6 +2134,20 @@ static int xennet_remove(struct xenbus_device *dev) dev_dbg(&dev->dev, "%s\n", dev->nodename); + if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) { + xenbus_switch_state(dev, XenbusStateClosing); + wait_event(module_unload_q, + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosing); + + xenbus_switch_state(dev, XenbusStateClosed); + wait_event(module_unload_q, + xenbus_read_driver_state(dev->otherend) == + XenbusStateClosed || + xenbus_read_driver_state(dev->otherend) == + XenbusStateUnknown); + } + xennet_disconnect_backend(info); unregister_netdev(info->netdev); From c900ee9118d7cd0d1e5eba6727dd6c3c37a46e7f Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Thu, 10 Aug 2017 10:53:53 +0200 Subject: [PATCH 084/770] auxdisplay: img-ascii-lcd: Only build on archs that have IOMEM [ Upstream commit 141cbfba1d0502006463aa80f57c64086226af1a ] This avoids the MODPOST error: ERROR: "devm_ioremap_resource" [drivers/auxdisplay/img-ascii-lcd.ko] undefined! Signed-off-by: Thomas Meyer Acked-by: Randy Dunlap Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/auxdisplay/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index d7d21118d3e0..2c2ed9cf8796 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -136,6 +136,7 @@ config CFAG12864B_RATE config IMG_ASCII_LCD tristate "Imagination Technologies ASCII LCD Display" + depends on HAS_IOMEM default y if MIPS_MALTA || MIPS_SEAD3 select SYSCON help From 5f71ff51063177085e7dc2eb33913a2a222700ad Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:12 -0400 Subject: [PATCH 085/770] nfsd: CLOSE SHOULD return the invalid special stateid for NFSv4.x (x>0) [ Upstream commit fb500a7cfee7f2f447d2bbf30cb59629feab6ac1 ] Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a439a70177a4..61f38346ce9d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -63,6 +63,9 @@ static const stateid_t zero_stateid = { static const stateid_t currentstateid = { .si_generation = 1, }; +static const stateid_t close_stateid = { + .si_generation = 0xffffffffU, +}; static u64 current_sessionid = 1; @@ -5411,6 +5414,11 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_close_open_stateid(stp); mutex_unlock(&stp->st_mutex); + /* See RFC5661 sectionm 18.2.4 */ + if (stp->st_stid.sc_client->cl_minorversion) + memcpy(&close->cl_stateid, &close_stateid, + sizeof(close->cl_stateid)); + /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); out: From 3bd364d156b13a7f5ba6fd5892b1a4439aa1666b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Nov 2017 08:00:15 -0400 Subject: [PATCH 086/770] nfsd: Ensure we check stateid validity in the seqid operation checks [ Upstream commit 9271d7e509c1bfc0b9a418caec29ec8d1ac38270 ] After taking the stateid st_mutex, we want to know that the stateid still represents valid state before performing any non-idempotent actions. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 61f38346ce9d..15343dd6e415 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5180,15 +5180,9 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ status = nfsd4_check_seqid(cstate, sop, seqid); if (status) return status; - if (stp->st_stid.sc_type == NFS4_CLOSED_STID - || stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID) - /* - * "Closed" stateid's exist *only* to return - * nfserr_replay_me from the previous step, and - * revoked delegations are kept only for free_stateid. - */ - return nfserr_bad_stateid; - mutex_lock(&stp->st_mutex); + status = nfsd4_lock_ol_stateid(stp); + if (status != nfs_ok) + return status; status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status == nfs_ok) status = nfs4_check_fh(current_fh, &stp->st_stid); From e0849eb97066b741c5dacf96349741fad983b7cd Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 6 Nov 2017 16:22:48 +0300 Subject: [PATCH 087/770] grace: replace BUG_ON by WARN_ONCE in exit_net hook [ Upstream commit b872285751c1af010e12d02bce7069e2061a58ca ] Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs_common/grace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 420d3a0ab258..1bd659938646 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -104,7 +104,9 @@ grace_exit_net(struct net *net) { struct list_head *grace_list = net_generic(net, grace_net_id); - BUG_ON(!list_empty(grace_list)); + WARN_ONCE(!list_empty(grace_list), + "net %x %s: grace_list is not empty\n", + net->ns.inum, __func__); } static struct pernet_operations grace_net_ops = { From 631db7f3895bd4138a7fb0c02f0db1750c79ee23 Mon Sep 17 00:00:00 2001 From: Andrew Elble Date: Thu, 9 Nov 2017 13:41:10 -0500 Subject: [PATCH 088/770] nfsd: check for use of the closed special stateid [ Upstream commit ae254dac721d44c0bfebe2795df87459e2e88219 ] Prevent the use of the closed (invalid) special stateid by clients. Signed-off-by: Andrew Elble Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 15343dd6e415..2f80340a1800 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -72,6 +72,7 @@ static u64 current_sessionid = 1; #define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) #define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) #define CURRENT_STATEID(stateid) (!memcmp((stateid), ¤tstateid, sizeof(stateid_t))) +#define CLOSE_STATEID(stateid) (!memcmp((stateid), &close_stateid, sizeof(stateid_t))) /* forward declarations */ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); @@ -4869,7 +4870,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) struct nfs4_stid *s; __be32 status = nfserr_bad_stateid; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || + CLOSE_STATEID(stateid)) return status; /* Client debugging aid. */ if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { @@ -4927,7 +4929,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, else if (typemask & NFS4_DELEG_STID) typemask |= NFS4_REVOKED_DELEG_STID; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || + CLOSE_STATEID(stateid)) return nfserr_bad_stateid; status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn); if (status == nfserr_stale_clientid) { From 156c80d4e197be386103a0ead001d14c4492ed3c Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Nov 2017 10:19:26 +0300 Subject: [PATCH 089/770] race of lockd inetaddr notifiers vs nlmsvc_rqst change [ Upstream commit 6b18dd1c03e07262ea0866084856b2a3c5ba8d09 ] lockd_inet[6]addr_event use nlmsvc_rqst without taken nlmsvc_mutex, nlmsvc_rqst can be changed during execution of notifiers and crash the host. Patch enables access to nlmsvc_rqst only when it was correctly initialized and delays its cleanup until notifiers are no longer in use. Note that nlmsvc_rqst can be temporally set to ERR_PTR, so the "if (nlmsvc_rqst)" check in notifiers is insufficient on its own. Signed-off-by: Vasily Averin Tested-by: Scott Mayhew Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/lockd/svc.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 45e96549ebd2..809cbccbad28 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -57,6 +57,9 @@ static struct task_struct *nlmsvc_task; static struct svc_rqst *nlmsvc_rqst; unsigned long nlmsvc_timeout; +atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0); +DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq); + unsigned int lockd_net_id; /* @@ -292,7 +295,8 @@ static int lockd_inetaddr_event(struct notifier_block *this, struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct sockaddr_in sin; - if (event != NETDEV_DOWN) + if ((event != NETDEV_DOWN) || + !atomic_inc_not_zero(&nlm_ntf_refcnt)) goto out; if (nlmsvc_rqst) { @@ -303,6 +307,8 @@ static int lockd_inetaddr_event(struct notifier_block *this, svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, (struct sockaddr *)&sin); } + atomic_dec(&nlm_ntf_refcnt); + wake_up(&nlm_ntf_wq); out: return NOTIFY_DONE; @@ -319,7 +325,8 @@ static int lockd_inet6addr_event(struct notifier_block *this, struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sockaddr_in6 sin6; - if (event != NETDEV_DOWN) + if ((event != NETDEV_DOWN) || + !atomic_inc_not_zero(&nlm_ntf_refcnt)) goto out; if (nlmsvc_rqst) { @@ -331,6 +338,8 @@ static int lockd_inet6addr_event(struct notifier_block *this, svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, (struct sockaddr *)&sin6); } + atomic_dec(&nlm_ntf_refcnt); + wake_up(&nlm_ntf_wq); out: return NOTIFY_DONE; @@ -347,10 +356,12 @@ static void lockd_unregister_notifiers(void) #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&lockd_inet6addr_notifier); #endif + wait_event(nlm_ntf_wq, atomic_read(&nlm_ntf_refcnt) == 0); } static void lockd_svc_exit_thread(void) { + atomic_dec(&nlm_ntf_refcnt); lockd_unregister_notifiers(); svc_exit_thread(nlmsvc_rqst); } @@ -375,6 +386,7 @@ static int lockd_start_svc(struct svc_serv *serv) goto out_rqst; } + atomic_inc(&nlm_ntf_refcnt); svc_sock_update_bufs(serv); serv->sv_maxconn = nlm_max_connections; From 02cfbaa6cd42cb7bb041519ffd8cd7f3e5a16863 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 13 Nov 2017 07:25:40 +0300 Subject: [PATCH 090/770] lockd: fix "list_add double add" caused by legacy signal interface [ Upstream commit 81833de1a46edce9ca20cfe079872ac1c20ef359 ] restart_grace() uses hardcoded init_net. It can cause to "list_add double add" in following scenario: 1) nfsd and lockd was started in several net namespaces 2) nfsd in init_net was stopped (lockd was not stopped because it have users from another net namespaces) 3) lockd got signal, called restart_grace() -> set_grace_period() and enabled lock_manager in hardcoded init_net. 4) nfsd in init_net is started again, its lockd_up() calls set_grace_period() and tries to add lock_manager into init_net 2nd time. Jeff Layton suggest: "Make it safe to call locks_start_grace multiple times on the same lock_manager. If it's already on the global grace_list, then don't try to add it again. (But we don't intentionally add twice, so for now we WARN about that case.) With this change, we also need to ensure that the nfsd4 lock manager initializes the list before we call locks_start_grace. While we're at it, move the rest of the nfsd_net initialization into nfs4_state_create_net. I see no reason to have it spread over two functions like it is today." Suggested patch was updated to generate warning in described situation. Suggested-by: Jeff Layton Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs_common/grace.c | 6 +++++- fs/nfsd/nfs4state.c | 7 ++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 1bd659938646..3b13fb3b0553 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -30,7 +30,11 @@ locks_start_grace(struct net *net, struct lock_manager *lm) struct list_head *grace_list = net_generic(net, grace_net_id); spin_lock(&grace_lock); - list_add(&lm->list, grace_list); + if (list_empty(&lm->list)) + list_add(&lm->list, grace_list); + else + WARN(1, "double list_add attempt detected in net %x %s\n", + net->ns.inum, (net == &init_net) ? "(init_net)" : ""); spin_unlock(&grace_lock); } EXPORT_SYMBOL_GPL(locks_start_grace); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2f80340a1800..d89e6ccd33ba 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7021,6 +7021,10 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; nn->unconf_name_tree = RB_ROOT; + nn->boot_time = get_seconds(); + nn->grace_ended = false; + nn->nfsd4_manager.block_opens = true; + INIT_LIST_HEAD(&nn->nfsd4_manager.list); INIT_LIST_HEAD(&nn->client_lru); INIT_LIST_HEAD(&nn->close_lru); INIT_LIST_HEAD(&nn->del_recall_lru); @@ -7078,9 +7082,6 @@ nfs4_state_start_net(struct net *net) ret = nfs4_state_create_net(net); if (ret) return ret; - nn->boot_time = get_seconds(); - nn->grace_ended = false; - nn->nfsd4_manager.block_opens = true; locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", From 1271aeb0027cad65ae21737d9b0c60440dae9638 Mon Sep 17 00:00:00 2001 From: Robert Lippert Date: Mon, 27 Nov 2017 15:51:55 -0800 Subject: [PATCH 091/770] hwmon: (pmbus) Use 64bit math for DIRECT format values [ Upstream commit bd467e4eababe4c04272c1e646f066db02734c79 ] Power values in the 100s of watt range can easily blow past 32bit math limits when processing everything in microwatts. Use 64bit math instead to avoid these issues on common 32bit ARM BMC platforms. Fixes: 442aba78728e ("hwmon: PMBus device driver") Signed-off-by: Robert Lippert Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/pmbus/pmbus_core.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 52a58b8b6e1b..a139940cd991 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -499,8 +500,8 @@ static long pmbus_reg2data_linear(struct pmbus_data *data, static long pmbus_reg2data_direct(struct pmbus_data *data, struct pmbus_sensor *sensor) { - long val = (s16) sensor->data; - long m, b, R; + s64 b, val = (s16)sensor->data; + s32 m, R; m = data->info->m[sensor->class]; b = data->info->b[sensor->class]; @@ -528,11 +529,12 @@ static long pmbus_reg2data_direct(struct pmbus_data *data, R--; } while (R < 0) { - val = DIV_ROUND_CLOSEST(val, 10); + val = div_s64(val + 5LL, 10L); /* round closest */ R++; } - return (val - b) / m; + val = div_s64(val - b, m); + return clamp_val(val, LONG_MIN, LONG_MAX); } /* @@ -656,7 +658,8 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data, static u16 pmbus_data2reg_direct(struct pmbus_data *data, struct pmbus_sensor *sensor, long val) { - long m, b, R; + s64 b, val64 = val; + s32 m, R; m = data->info->m[sensor->class]; b = data->info->b[sensor->class]; @@ -673,18 +676,18 @@ static u16 pmbus_data2reg_direct(struct pmbus_data *data, R -= 3; /* Adjust R and b for data in milli-units */ b *= 1000; } - val = val * m + b; + val64 = val64 * m + b; while (R > 0) { - val *= 10; + val64 *= 10; R--; } while (R < 0) { - val = DIV_ROUND_CLOSEST(val, 10); + val64 = div_s64(val64 + 5LL, 10L); /* round closest */ R++; } - return val; + return (u16)clamp_val(val64, S16_MIN, S16_MAX); } static u16 pmbus_data2reg_vid(struct pmbus_data *data, From 606592f53b877dc77ec1356ea82ea1d7bb6e8580 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 28 Nov 2017 23:01:44 +0800 Subject: [PATCH 092/770] quota: propagate error from __dquot_initialize [ Upstream commit 1a6152d36dee08da2be2a3030dceb45ef680460a ] In commit 6184fc0b8dd7 ("quota: Propagate error from ->acquire_dquot()"), we have propagated error from __dquot_initialize to caller, but we forgot to handle such error in add_dquot_ref(), so, currently, during quota accounting information initialization flow, if we failed for some of inodes, we just ignore such error, and do account for others, which is not a good implementation. In this patch, we choose to let user be aware of such error, so after turning on quota successfully, we can make sure all inodes disk usage can be accounted, which will be more reasonable. Suggested-by: Jan Kara Signed-off-by: Chao Yu Signed-off-by: Jan Kara Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/quota/dquot.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9f78b5015f2e..8406be97a518 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -934,12 +934,13 @@ static int dqinit_needed(struct inode *inode, int type) } /* This routine is guarded by s_umount semaphore */ -static void add_dquot_ref(struct super_block *sb, int type) +static int add_dquot_ref(struct super_block *sb, int type) { struct inode *inode, *old_inode = NULL; #ifdef CONFIG_QUOTA_DEBUG int reserved = 0; #endif + int err = 0; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { @@ -959,7 +960,11 @@ static void add_dquot_ref(struct super_block *sb, int type) reserved = 1; #endif iput(old_inode); - __dquot_initialize(inode, type); + err = __dquot_initialize(inode, type); + if (err) { + iput(inode); + goto out; + } /* * We hold a reference to 'inode' so it couldn't have been @@ -974,7 +979,7 @@ static void add_dquot_ref(struct super_block *sb, int type) } spin_unlock(&sb->s_inode_list_lock); iput(old_inode); - +out: #ifdef CONFIG_QUOTA_DEBUG if (reserved) { quota_error(sb, "Writes happened before quota was turned on " @@ -982,6 +987,7 @@ static void add_dquot_ref(struct super_block *sb, int type) "Please run quotacheck(8)"); } #endif + return err; } /* @@ -2372,10 +2378,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, dqopt->flags |= dquot_state_flag(flags, type); spin_unlock(&dq_state_lock); - add_dquot_ref(sb, type); - - return 0; + error = add_dquot_ref(sb, type); + if (error) + dquot_disable(sb, type, flags); + return error; out_file_init: dqopt->files[type] = NULL; iput(inode); From 5e60a297e78dd7b15679751b5a28428a633161f8 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 28 Nov 2017 14:19:48 +0100 Subject: [PATCH 093/770] net: mvpp2: fix the txq_init error path [ Upstream commit ba2d8d887d962c2f790e6dc01b2fd25b4608720b ] When an allocation in the txq_init path fails, the allocated buffers end-up being freed twice: in the txq_init error path, and in txq_deinit. This lead to issues as txq_deinit would work on already freed memory regions: kernel BUG at mm/slub.c:3915! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP This patch fixes this by removing the txq_init own error path, as the txq_deinit function is always called on errors. This was introduced by TSO as way more buffers are allocated. Fixes: 186cd4d4e414 ("net: mvpp2: software tso support") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvpp2.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index d147dc7d0f77..1dd3a1264a53 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -5597,7 +5597,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port, sizeof(*txq_pcpu->buffs), GFP_KERNEL); if (!txq_pcpu->buffs) - goto cleanup; + return -ENOMEM; txq_pcpu->count = 0; txq_pcpu->reserved_num = 0; @@ -5610,26 +5610,10 @@ static int mvpp2_txq_init(struct mvpp2_port *port, &txq_pcpu->tso_headers_dma, GFP_KERNEL); if (!txq_pcpu->tso_headers) - goto cleanup; + return -ENOMEM; } return 0; -cleanup: - for_each_present_cpu(cpu) { - txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); - kfree(txq_pcpu->buffs); - - dma_free_coherent(port->dev->dev.parent, - txq_pcpu->size * TSO_HEADER_SIZE, - txq_pcpu->tso_headers, - txq_pcpu->tso_headers_dma); - } - - dma_free_coherent(port->dev->dev.parent, - txq->size * MVPP2_DESC_ALIGNED_SIZE, - txq->descs, txq->descs_dma); - - return -ENOMEM; } /* Free allocated TXQ resources */ From c25d803a3b7d82e9549c081e30ce1b62bc1dc9d2 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 28 Nov 2017 14:26:30 +0100 Subject: [PATCH 094/770] net: phy: marvell10g: fix the PHY id mask [ Upstream commit 952b6b3b07877419386e719ff20917170e1ce684 ] The Marvell 10G PHY driver supports different hardware revisions, which have their bits 3..0 differing. To get the correct revision number these bits should be ignored. This patch fixes this by using the already defined MARVELL_PHY_ID_MASK (0xfffffff0) instead of the custom 0xffffffff mask. Fixes: 20b2af32ff3f ("net: phy: add Marvell Alaska X 88X3310 10Gigabit PHY support") Suggested-by: Yan Markman Signed-off-by: Antoine Tenart Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/marvell10g.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index aebc08beceba..21b3f36e023a 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -16,6 +16,7 @@ * link takes priority and the other port is completely locked out. */ #include +#include enum { MV_PCS_BASE_T = 0x0000, @@ -338,7 +339,7 @@ static int mv3310_read_status(struct phy_device *phydev) static struct phy_driver mv3310_drivers[] = { { .phy_id = 0x002b09aa, - .phy_id_mask = 0xffffffff, + .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "mv88x3310", .features = SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Full | @@ -360,7 +361,7 @@ static struct phy_driver mv3310_drivers[] = { module_phy_driver(mv3310_drivers); static struct mdio_device_id __maybe_unused mv3310_tbl[] = { - { 0x002b09aa, 0xffffffff }, + { 0x002b09aa, MARVELL_PHY_ID_MASK }, { }, }; MODULE_DEVICE_TABLE(mdio, mv3310_tbl); From b274406266744e706c5b741cafa0013e7a032655 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 21 Nov 2017 20:46:49 +0100 Subject: [PATCH 095/770] bnxt_en: Fix an error handling path in 'bnxt_get_module_eeprom()' [ Upstream commit dea521a2b9f96e905fa2bb2f95e23ec00c2ec436 ] Error code returned by 'bnxt_read_sfp_module_eeprom_info()' is handled a few lines above when reading the A0 portion of the EEPROM. The same should be done when reading the A2 portion of the EEPROM. In order to correctly propagate an error, update 'rc' in this 2nd call as well, otherwise 0 (success) is returned. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 3cbe771b3352..a22336fef66b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2133,8 +2133,8 @@ static int bnxt_get_module_eeprom(struct net_device *dev, /* Read A2 portion of the EEPROM */ if (length) { start -= ETH_MODULE_SFF_8436_LEN; - bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, start, - length, data); + rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, + start, length, data); } return rc; } From f268e508aeb20f09de9a258ac5d2e41983567775 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 17 Nov 2017 01:54:00 +0000 Subject: [PATCH 096/770] Btrfs: incremental send, fix wrong unlink path after renaming file [ Upstream commit ea37d5998b50a72b9045ba60a132eeb20e1c4230 ] Under some circumstances, an incremental send operation can issue wrong paths for unlink commands related to files that have multiple hard links and some (or all) of those links were renamed between the parent and send snapshots. Consider the following example: Parent snapshot . (ino 256) |---- a/ (ino 257) | |---- b/ (ino 259) | | |---- c/ (ino 260) | | |---- f2 (ino 261) | | | |---- f2l1 (ino 261) | |---- d/ (ino 262) |---- f1l1_2 (ino 258) |---- f2l2 (ino 261) |---- f1_2 (ino 258) Send snapshot . (ino 256) |---- a/ (ino 257) | |---- f2l1/ (ino 263) | |---- b2/ (ino 259) | |---- c/ (ino 260) | | |---- d3 (ino 262) | | |---- f1l1_2 (ino 258) | | |---- f2l2_2 (ino 261) | | |---- f1_2 (ino 258) | | | |---- f2 (ino 261) | |---- f1l2 (ino 258) | |---- d (ino 261) When computing the incremental send stream the following steps happen: 1) When processing inode 261, a rename operation is issued that renames inode 262, which currently as a path of "d", to an orphan name of "o262-7-0". This is done because in the send snapshot, inode 261 has of its hard links with a path of "d" as well. 2) Two link operations are issued that create the new hard links for inode 261, whose names are "d" and "f2l2_2", at paths "/" and "o262-7-0/" respectively. 3) Still while processing inode 261, unlink operations are issued to remove the old hard links of inode 261, with names "f2l1" and "f2l2", at paths "a/" and "d/". However path "d/" does not correspond anymore to the directory inode 262 but corresponds instead to a hard link of inode 261 (link command issued in the previous step). This makes the receiver fail with a ENOTDIR error when attempting the unlink operation. The problem happens because before sending the unlink operation, we failed to detect that inode 262 was one of ancestors for inode 261 in the parent snapshot, and therefore we didn't recompute the path for inode 262 before issuing the unlink operation for the link named "f2l2" of inode 262. The detection failed because the function "is_ancestor()" only follows the first hard link it finds for an inode instead of all of its hard links (as it was originally created for being used with directories only, for which only one hard link exists). So fix this by making "is_ancestor()" follow all hard links of the input inode. A test case for fstests follows soon. Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 130 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 109 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 8fd195cfe81b..2c35717a3470 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3527,7 +3527,40 @@ out: } /* - * Check if ino ino1 is an ancestor of inode ino2 in the given root. + * Check if inode ino2, or any of its ancestors, is inode ino1. + * Return 1 if true, 0 if false and < 0 on error. + */ +static int check_ino_in_path(struct btrfs_root *root, + const u64 ino1, + const u64 ino1_gen, + const u64 ino2, + const u64 ino2_gen, + struct fs_path *fs_path) +{ + u64 ino = ino2; + + if (ino1 == ino2) + return ino1_gen == ino2_gen; + + while (ino > BTRFS_FIRST_FREE_OBJECTID) { + u64 parent; + u64 parent_gen; + int ret; + + fs_path_reset(fs_path); + ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path); + if (ret < 0) + return ret; + if (parent == ino1) + return parent_gen == ino1_gen; + ino = parent; + } + return 0; +} + +/* + * Check if ino ino1 is an ancestor of inode ino2 in the given root for any + * possible path (in case ino2 is not a directory and has multiple hard links). * Return 1 if true, 0 if false and < 0 on error. */ static int is_ancestor(struct btrfs_root *root, @@ -3536,36 +3569,91 @@ static int is_ancestor(struct btrfs_root *root, const u64 ino2, struct fs_path *fs_path) { - u64 ino = ino2; - bool free_path = false; + bool free_fs_path = false; int ret = 0; + struct btrfs_path *path = NULL; + struct btrfs_key key; if (!fs_path) { fs_path = fs_path_alloc(); if (!fs_path) return -ENOMEM; - free_path = true; + free_fs_path = true; } - while (ino > BTRFS_FIRST_FREE_OBJECTID) { - u64 parent; - u64 parent_gen; - - fs_path_reset(fs_path); - ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path); - if (ret < 0) { - if (ret == -ENOENT && ino == ino2) - ret = 0; - goto out; - } - if (parent == ino1) { - ret = parent_gen == ino1_gen ? 1 : 0; - goto out; - } - ino = parent; + path = alloc_path_for_send(); + if (!path) { + ret = -ENOMEM; + goto out; } + + key.objectid = ino2; + key.type = BTRFS_INODE_REF_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + + while (true) { + struct extent_buffer *leaf = path->nodes[0]; + int slot = path->slots[0]; + u32 cur_offset = 0; + u32 item_size; + + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret > 0) + break; + continue; + } + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != ino2) + break; + if (key.type != BTRFS_INODE_REF_KEY && + key.type != BTRFS_INODE_EXTREF_KEY) + break; + + item_size = btrfs_item_size_nr(leaf, slot); + while (cur_offset < item_size) { + u64 parent; + u64 parent_gen; + + if (key.type == BTRFS_INODE_EXTREF_KEY) { + unsigned long ptr; + struct btrfs_inode_extref *extref; + + ptr = btrfs_item_ptr_offset(leaf, slot); + extref = (struct btrfs_inode_extref *) + (ptr + cur_offset); + parent = btrfs_inode_extref_parent(leaf, + extref); + cur_offset += sizeof(*extref); + cur_offset += btrfs_inode_extref_name_len(leaf, + extref); + } else { + parent = key.offset; + cur_offset = item_size; + } + + ret = get_inode_info(root, parent, NULL, &parent_gen, + NULL, NULL, NULL, NULL); + if (ret < 0) + goto out; + ret = check_ino_in_path(root, ino1, ino1_gen, + parent, parent_gen, fs_path); + if (ret) + goto out; + } + path->slots[0]++; + } + ret = 0; out: - if (free_path) + btrfs_free_path(path); + if (free_fs_path) fs_path_free(fs_path); return ret; } From 95a7d23415b79c94b4809200f29f458520e9fc56 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sat, 25 Nov 2017 03:03:00 +0900 Subject: [PATCH 097/770] nvme-pci: fix NULL pointer dereference in nvme_free_host_mem() [ Upstream commit 7e5dd57ef3081ff6c03908d786ed5087f6fbb7ae ] Following condition which will cause NULL pointer dereference will occur in nvme_free_host_mem() when it tries to remove pci device via nvme_remove() especially after a failure of host memory allocation for HMB. "(host_mem_descs == NULL) && (nr_host_mem_descs != 0)" It's because __nr_host_mem_descs__ is not cleared to 0 unlike __host_mem_descs__ is so. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5dae650438c1..cdd2fd509ddc 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1617,6 +1617,7 @@ static void nvme_free_host_mem(struct nvme_dev *dev) dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), dev->host_mem_descs, dev->host_mem_descs_dma); dev->host_mem_descs = NULL; + dev->nr_host_mem_descs = 0; } static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, From f730601cdead1ed14be5089e14d8ffcb019373e5 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 23 Nov 2017 17:13:40 +0100 Subject: [PATCH 098/770] xfs: fortify xfs_alloc_buftarg error handling [ Upstream commit d210a9874b8f6166579408131cb74495caff1958 ] percpu_counter_init failure path doesn't clean up &btp->bt_lru list. Call list_lru_destroy in that error path. Similarly register_shrinker error path is not handled. While it is unlikely to trigger these error path, it is not impossible especially the later might fail with large NUMAs. Let's handle the failure to make the code more robust. Noticed-by: Tetsuo Handa Signed-off-by: Michal Hocko Acked-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 2f97c12ca75e..16f93d7356b7 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1813,22 +1813,27 @@ xfs_alloc_buftarg( btp->bt_daxdev = dax_dev; if (xfs_setsize_buftarg_early(btp, bdev)) - goto error; + goto error_free; if (list_lru_init(&btp->bt_lru)) - goto error; + goto error_free; if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) - goto error; + goto error_lru; btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker.seeks = DEFAULT_SEEKS; btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; - register_shrinker(&btp->bt_shrinker); + if (register_shrinker(&btp->bt_shrinker)) + goto error_pcpu; return btp; -error: +error_pcpu: + percpu_counter_destroy(&btp->bt_io_count); +error_lru: + list_lru_destroy(&btp->bt_lru); +error_free: kmem_free(btp); return NULL; } From 575c548137f8b397b5e32c66a972c5f0ebcc1cd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 24 Nov 2017 11:39:30 +0100 Subject: [PATCH 099/770] drm/amdgpu: don't try to move pinned BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6edc6910ba4cd6eab309263539c8f09b8ad772bf ] Never try to move pinned BOs during CS. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 60d8bedb694d..b5aa8e6f8e0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -403,6 +403,10 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, if (candidate->robj == validated) break; + /* We can't move pinned BOs here */ + if (bo->pin_count) + continue; + other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); /* Check if this BO is in one of the domains we need space for */ From 600c904b919a6c3e36cc93754c8941a7b0a1ec6b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 29 Nov 2017 11:01:09 +0100 Subject: [PATCH 100/770] net: ethernet: xilinx: Mark XILINX_LL_TEMAC broken on 64-bit [ Upstream commit 15bfe05c8d6386f1a90e9340d15336e85e32aad6 ] On 64-bit (e.g. powerpc64/allmodconfig): drivers/net/ethernet/xilinx/ll_temac_main.c: In function 'temac_start_xmit_done': drivers/net/ethernet/xilinx/ll_temac_main.c:633:22: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] dev_kfree_skb_irq((struct sk_buff *)cur_p->app4); ^ cdmac_bd.app4 is u32, so it is too small to hold a kernel pointer. Note that several other fields in struct cdmac_bd are also too small to hold physical addresses on 64-bit platforms. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/xilinx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index 6d68c8a8f4f2..da4ec575ccf9 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -34,6 +34,7 @@ config XILINX_AXI_EMAC config XILINX_LL_TEMAC tristate "Xilinx LL TEMAC (LocalLink Tri-mode Ethernet MAC) driver" depends on (PPC || MICROBLAZE) + depends on !64BIT || BROKEN select PHYLIB ---help--- This driver supports the Xilinx 10/100/1000 LocalLink TEMAC From 8f8b2c79c4a39a7a0a1f5eb079586fce92794dc2 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 29 Nov 2017 22:34:50 +0900 Subject: [PATCH 101/770] quota: Check for register_shrinker() failure. [ Upstream commit 88bc0ede8d35edc969350852894dc864a2dc1859 ] register_shrinker() might return -ENOMEM error since Linux 3.12. Call panic() as with other failure checks in this function if register_shrinker() failed. Fixes: 1d3d4437eae1 ("vmscan: per-node deferred work") Signed-off-by: Tetsuo Handa Cc: Jan Kara Cc: Michal Hocko Reviewed-by: Michal Hocko Signed-off-by: Jan Kara Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/quota/dquot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 8406be97a518..4cd0c2336624 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2985,7 +2985,8 @@ static int __init dquot_init(void) pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld," " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); - register_shrinker(&dqcache_shrinker); + if (register_shrinker(&dqcache_shrinker)) + panic("Cannot register dquot shrinker"); return 0; } From ca4b61373ee86e15b4469285fdd23f9c201fc13d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 24 Nov 2017 12:00:24 -0500 Subject: [PATCH 102/770] SUNRPC: Allow connect to return EHOSTUNREACH [ Upstream commit 4ba161a793d5f43757c35feff258d9f20a082940 ] Reported-by: Dmitry Vyukov Signed-off-by: Trond Myklebust Tested-by: Dmitry Vyukov Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4dad5da388d6..8cb40f8ffa5b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2437,6 +2437,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: + case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: /* From bf6a04c3ffe1486ae7e0ad8fb9f10af9bad6d007 Mon Sep 17 00:00:00 2001 From: "Liu, Changcheng" Date: Wed, 29 Nov 2017 16:10:25 -0800 Subject: [PATCH 103/770] scripts/faddr2line: extend usage on generic arch [ Upstream commit 95a87982541932503d3f59aba4c30b0bde0a6294 ] When cross-compiling, fadd2line should use the binary tool used for the target system, rather than that of the host. Link: http://lkml.kernel.org/r/20171121092911.GA150711@sofia Signed-off-by: Liu Changcheng Cc: Kate Stewart Cc: NeilBrown Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- scripts/faddr2line | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/scripts/faddr2line b/scripts/faddr2line index 1f5ce959f596..39e07d8574dd 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -44,9 +44,16 @@ set -o errexit set -o nounset +READELF="${CROSS_COMPILE}readelf" +ADDR2LINE="${CROSS_COMPILE}addr2line" +SIZE="${CROSS_COMPILE}size" +NM="${CROSS_COMPILE}nm" + command -v awk >/dev/null 2>&1 || die "awk isn't installed" -command -v readelf >/dev/null 2>&1 || die "readelf isn't installed" -command -v addr2line >/dev/null 2>&1 || die "addr2line isn't installed" +command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed" +command -v ${ADDR2LINE} >/dev/null 2>&1 || die "addr2line isn't installed" +command -v ${SIZE} >/dev/null 2>&1 || die "size isn't installed" +command -v ${NM} >/dev/null 2>&1 || die "nm isn't installed" usage() { echo "usage: faddr2line ..." >&2 @@ -69,10 +76,10 @@ die() { find_dir_prefix() { local objfile=$1 - local start_kernel_addr=$(readelf -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') + local start_kernel_addr=$(${READELF} -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') [[ -z $start_kernel_addr ]] && return - local file_line=$(addr2line -e $objfile $start_kernel_addr) + local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr) [[ -z $file_line ]] && return local prefix=${file_line%init/main.c:*} @@ -104,7 +111,7 @@ __faddr2line() { # Go through each of the object's symbols which match the func name. # In rare cases there might be duplicates. - file_end=$(size -Ax $objfile | awk '$1 == ".text" {print $2}') + file_end=$(${SIZE} -Ax $objfile | awk '$1 == ".text" {print $2}') while read symbol; do local fields=($symbol) local sym_base=0x${fields[0]} @@ -156,10 +163,10 @@ __faddr2line() { # pass real address to addr2line echo "$func+$offset/$sym_size:" - addr2line -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;" + ${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;" DONE=1 - done < <(nm -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') + done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') } [[ $# -lt 2 ]] && usage From b2ba0bd3469546e5bf6f32a0bb67d6ccf21d4584 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Wed, 29 Nov 2017 16:11:08 -0800 Subject: [PATCH 104/770] kmemleak: add scheduling point to kmemleak_scan() [ Upstream commit bde5f6bc68db51128f875a756e9082a6c6ff7b4c ] kmemleak_scan() will scan struct page for each node and it can be really large and resulting in a soft lockup. We have seen a soft lockup when do scan while compile kernel: watchdog: BUG: soft lockup - CPU#53 stuck for 22s! [bash:10287] [...] Call Trace: kmemleak_scan+0x21a/0x4c0 kmemleak_write+0x312/0x350 full_proxy_write+0x5a/0xa0 __vfs_write+0x33/0x150 vfs_write+0xad/0x1a0 SyS_write+0x52/0xc0 do_syscall_64+0x61/0x1a0 entry_SYSCALL64_slow_path+0x25/0x25 Fix this by adding cond_resched every MAX_SCAN_SIZE. Link: http://lkml.kernel.org/r/1511439788-20099-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Yisheng Xie Suggested-by: Catalin Marinas Acked-by: Catalin Marinas Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/kmemleak.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 7780cd83a495..a1ba553816eb 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1532,6 +1532,8 @@ static void kmemleak_scan(void) if (page_count(page) == 0) continue; scan_block(page, page + 1, NULL); + if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page)))) + cond_resched(); } } put_online_mems(); From 345f165a31b8a287b15e91e5aad03d588f1bd141 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 14 Nov 2017 11:16:47 -0800 Subject: [PATCH 105/770] drm/bridge: Fix lvds-encoder since the panel_bridge rework. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit dbb58bfd9ae6c885b2ca001a9a5ab8b881fb4ba9 ] The panel_bridge bridge attaches to the panel's OF node, not the lvds-encoder's node. Put in a little no-op bridge of our own so that our consumers can still find a bridge where they expect. This also fixes an unintended unregistration and leak of the panel-bridge on module remove. Signed-off-by: Eric Anholt Fixes: 13dfc0540a57 ("drm/bridge: Refactor out the panel wrapper from the lvds-encoder bri dge.") Tested-by: Lothar Waßmann Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/20171114191647.22207-1-eric@anholt.net Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/lvds-encoder.c | 48 +++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/bridge/lvds-encoder.c b/drivers/gpu/drm/bridge/lvds-encoder.c index 0903ba574f61..75b0d3f6e4de 100644 --- a/drivers/gpu/drm/bridge/lvds-encoder.c +++ b/drivers/gpu/drm/bridge/lvds-encoder.c @@ -13,13 +13,37 @@ #include +struct lvds_encoder { + struct drm_bridge bridge; + struct drm_bridge *panel_bridge; +}; + +static int lvds_encoder_attach(struct drm_bridge *bridge) +{ + struct lvds_encoder *lvds_encoder = container_of(bridge, + struct lvds_encoder, + bridge); + + return drm_bridge_attach(bridge->encoder, lvds_encoder->panel_bridge, + bridge); +} + +static struct drm_bridge_funcs funcs = { + .attach = lvds_encoder_attach, +}; + static int lvds_encoder_probe(struct platform_device *pdev) { struct device_node *port; struct device_node *endpoint; struct device_node *panel_node; struct drm_panel *panel; - struct drm_bridge *bridge; + struct lvds_encoder *lvds_encoder; + + lvds_encoder = devm_kzalloc(&pdev->dev, sizeof(*lvds_encoder), + GFP_KERNEL); + if (!lvds_encoder) + return -ENOMEM; /* Locate the panel DT node. */ port = of_graph_get_port_by_id(pdev->dev.of_node, 1); @@ -49,20 +73,30 @@ static int lvds_encoder_probe(struct platform_device *pdev) return -EPROBE_DEFER; } - bridge = drm_panel_bridge_add(panel, DRM_MODE_CONNECTOR_LVDS); - if (IS_ERR(bridge)) - return PTR_ERR(bridge); + lvds_encoder->panel_bridge = + devm_drm_panel_bridge_add(&pdev->dev, + panel, DRM_MODE_CONNECTOR_LVDS); + if (IS_ERR(lvds_encoder->panel_bridge)) + return PTR_ERR(lvds_encoder->panel_bridge); - platform_set_drvdata(pdev, bridge); + /* The panel_bridge bridge is attached to the panel's of_node, + * but we need a bridge attached to our of_node for our user + * to look up. + */ + lvds_encoder->bridge.of_node = pdev->dev.of_node; + lvds_encoder->bridge.funcs = &funcs; + drm_bridge_add(&lvds_encoder->bridge); + + platform_set_drvdata(pdev, lvds_encoder); return 0; } static int lvds_encoder_remove(struct platform_device *pdev) { - struct drm_bridge *bridge = platform_get_drvdata(pdev); + struct lvds_encoder *lvds_encoder = platform_get_drvdata(pdev); - drm_bridge_remove(bridge); + drm_bridge_remove(&lvds_encoder->bridge); return 0; } From 6d734363f2c5e782384765c5a96caa4473645b8d Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:19 +0300 Subject: [PATCH 106/770] drm/bridge: tc358767: do no fail on hi-res displays [ Upstream commit cffd2b16c01c3431a7a7dd62e722af33490fc436 ] Do not fail data rates higher than 2.7 and more than 2 lanes. Try to fall back to 2.7Gbps and 2 lanes. Acked-by: Philipp Zabel Reviewed-by: Andrzej Hajda Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-2-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 8571cfd877c5..e1996dbfb0a1 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -603,8 +603,15 @@ static int tc_get_display_props(struct tc_data *tc) ret = drm_dp_link_probe(&tc->aux, &tc->link.base); if (ret < 0) goto err_dpcd_read; - if ((tc->link.base.rate != 162000) && (tc->link.base.rate != 270000)) - goto err_dpcd_inval; + if (tc->link.base.rate != 162000 && tc->link.base.rate != 270000) { + dev_dbg(tc->dev, "Falling to 2.7 Gbps rate\n"); + tc->link.base.rate = 270000; + } + + if (tc->link.base.num_lanes > 2) { + dev_dbg(tc->dev, "Falling to 2 lanes\n"); + tc->link.base.num_lanes = 2; + } ret = drm_dp_dpcd_readb(&tc->aux, DP_MAX_DOWNSPREAD, tmp); if (ret < 0) @@ -637,9 +644,6 @@ static int tc_get_display_props(struct tc_data *tc) err_dpcd_read: dev_err(tc->dev, "failed to read DPCD: %d\n", ret); return ret; -err_dpcd_inval: - dev_err(tc->dev, "invalid DPCD\n"); - return -EINVAL; } static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) From 859bacc13cbb34a86f6140430f9fdb767c33d7d4 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:20 +0300 Subject: [PATCH 107/770] drm/bridge: tc358767: filter out too high modes [ Upstream commit 99fc8e963a4c0203dba26a77cf737db6081bca14 ] Pixel clock limitation for DPI is 154 MHz. Do not accept modes with higher pixel clock rate. Reviewed-by: Andrzej Hajda Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-3-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index e1996dbfb0a1..e571b5e29ae7 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1103,7 +1103,10 @@ static bool tc_bridge_mode_fixup(struct drm_bridge *bridge, static int tc_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - /* Accept any mode */ + /* DPI interface clock limitation: upto 154 MHz */ + if (mode->clock > 154000) + return MODE_CLOCK_HIGH; + return MODE_OK; } From 0106381870a49d1ce47501410b4a7433e505aa71 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:21 +0300 Subject: [PATCH 108/770] drm/bridge: tc358767: fix DP0_MISC register set [ Upstream commit f3b8adbe1911f66fd3cab1aaa74f0f66b7ceda25 ] Remove shift from TU_SIZE_RECOMMENDED define as it used to calculate max_tu_symbols. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-4-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index e571b5e29ae7..7334cb2121a3 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -97,7 +97,7 @@ #define DP0_ACTIVEVAL 0x0650 #define DP0_SYNCVAL 0x0654 #define DP0_MISC 0x0658 -#define TU_SIZE_RECOMMENDED (0x3f << 16) /* LSCLK cycles per TU */ +#define TU_SIZE_RECOMMENDED (63) /* LSCLK cycles per TU */ #define BPC_6 (0 << 5) #define BPC_8 (1 << 5) @@ -716,7 +716,8 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) * Must be less than tu_size. */ max_tu_symbol = TU_SIZE_RECOMMENDED - 1; - tc_write(DP0_MISC, (max_tu_symbol << 23) | TU_SIZE_RECOMMENDED | BPC_8); + tc_write(DP0_MISC, (max_tu_symbol << 23) | (TU_SIZE_RECOMMENDED << 16) | + BPC_8); return 0; err: From b536eb986e45e2347bd4cfaeff0b48d69fde52d1 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:22 +0300 Subject: [PATCH 109/770] drm/bridge: tc358767: fix timing calculations [ Upstream commit 66d1c3b94d5d59e4325e61a78d520f92c043d645 ] Fields in HTIM01 and HTIM02 regs should be even. Recomended thresh_dly value is max_tu_symbol. Remove set of VPCTRL0.VSDELAY as it is related to DSI input interface. Currently driver supports only DPI. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-5-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 34 ++++++++++++++++++------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 7334cb2121a3..b916346b933a 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -659,6 +659,14 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) int lower_margin = mode->vsync_start - mode->vdisplay; int vsync_len = mode->vsync_end - mode->vsync_start; + /* + * Recommended maximum number of symbols transferred in a transfer unit: + * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size, + * (output active video bandwidth in bytes)) + * Must be less than tu_size. + */ + max_tu_symbol = TU_SIZE_RECOMMENDED - 1; + dev_dbg(tc->dev, "set mode %dx%d\n", mode->hdisplay, mode->vdisplay); dev_dbg(tc->dev, "H margin %d,%d sync %d\n", @@ -668,13 +676,18 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) dev_dbg(tc->dev, "total: %dx%d\n", mode->htotal, mode->vtotal); - /* LCD Ctl Frame Size */ - tc_write(VPCTRL0, (0x40 << 20) /* VSDELAY */ | + /* + * LCD Ctl Frame Size + * datasheet is not clear of vsdelay in case of DPI + * assume we do not need any delay when DPI is a source of + * sync signals + */ + tc_write(VPCTRL0, (0 << 20) /* VSDELAY */ | OPXLFMT_RGB888 | FRMSYNC_DISABLED | MSF_DISABLED); - tc_write(HTIM01, (left_margin << 16) | /* H back porch */ - (hsync_len << 0)); /* Hsync */ - tc_write(HTIM02, (right_margin << 16) | /* H front porch */ - (mode->hdisplay << 0)); /* width */ + tc_write(HTIM01, (ALIGN(left_margin, 2) << 16) | /* H back porch */ + (ALIGN(hsync_len, 2) << 0)); /* Hsync */ + tc_write(HTIM02, (ALIGN(right_margin, 2) << 16) | /* H front porch */ + (ALIGN(mode->hdisplay, 2) << 0)); /* width */ tc_write(VTIM01, (upper_margin << 16) | /* V back porch */ (vsync_len << 0)); /* Vsync */ tc_write(VTIM02, (lower_margin << 16) | /* V front porch */ @@ -693,7 +706,7 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) /* DP Main Stream Attributes */ vid_sync_dly = hsync_len + left_margin + mode->hdisplay; tc_write(DP0_VIDSYNCDELAY, - (0x003e << 16) | /* thresh_dly */ + (max_tu_symbol << 16) | /* thresh_dly */ (vid_sync_dly << 0)); tc_write(DP0_TOTALVAL, (mode->vtotal << 16) | (mode->htotal)); @@ -709,13 +722,6 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode) tc_write(DPIPXLFMT, VS_POL_ACTIVE_LOW | HS_POL_ACTIVE_LOW | DE_POL_ACTIVE_HIGH | SUB_CFG_TYPE_CONFIG1 | DPI_BPP_RGB888); - /* - * Recommended maximum number of symbols transferred in a transfer unit: - * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size, - * (output active video bandwidth in bytes)) - * Must be less than tu_size. - */ - max_tu_symbol = TU_SIZE_RECOMMENDED - 1; tc_write(DP0_MISC, (max_tu_symbol << 23) | (TU_SIZE_RECOMMENDED << 16) | BPC_8); From 340c9a4ba3031f87fcbfa0d89332d4a5aa468aaf Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:23 +0300 Subject: [PATCH 110/770] drm/bridge: tc358767: fix AUXDATAn registers access [ Upstream commit 9217c1abbc145a77d65c476cf2004a3df02104c7 ] First four bytes should go to DP0_AUXWDATA0. Due to bug if len > 4 first four bytes was writen to DP0_AUXWDATA1 and all data get shifted by 4 bytes. Fix it. Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-6-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index b916346b933a..24f495bf0567 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -318,7 +318,7 @@ static ssize_t tc_aux_transfer(struct drm_dp_aux *aux, tmp = (tmp << 8) | buf[i]; i++; if (((i % 4) == 0) || (i == size)) { - tc_write(DP0_AUXWDATA(i >> 2), tmp); + tc_write(DP0_AUXWDATA((i - 1) >> 2), tmp); tmp = 0; } } From 464711a74ce3238c7ca07f903bed3189efd23831 Mon Sep 17 00:00:00 2001 From: Andrey Gusakov Date: Tue, 7 Nov 2017 19:56:24 +0300 Subject: [PATCH 111/770] drm/bridge: tc358767: fix 1-lane behavior [ Upstream commit 4dbd6c03fbf88299c573d676838896c6e06aade2 ] Use drm_dp_channel_eq_ok helper Acked-by: Philipp Zabel Signed-off-by: Andrey Gusakov Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1510073785-16108-7-git-send-email-andrey.gusakov@cogentembedded.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/tc358767.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 24f495bf0567..8636e7eeb731 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -819,8 +819,6 @@ static int tc_main_link_setup(struct tc_data *tc) unsigned int rate; u32 dp_phy_ctrl; int timeout; - bool aligned; - bool ready; u32 value; int ret; u8 tmp[8]; @@ -965,16 +963,15 @@ static int tc_main_link_setup(struct tc_data *tc) ret = drm_dp_dpcd_read_link_status(aux, tmp + 2); if (ret < 0) goto err_dpcd_read; - ready = (tmp[2] == ((DP_CHANNEL_EQ_BITS << 4) | /* Lane1 */ - DP_CHANNEL_EQ_BITS)); /* Lane0 */ - aligned = tmp[4] & DP_INTERLANE_ALIGN_DONE; - } while ((--timeout) && !(ready && aligned)); + } while ((--timeout) && + !(drm_dp_channel_eq_ok(tmp + 2, tc->link.base.num_lanes))); if (timeout == 0) { /* Read DPCD 0x200-0x201 */ ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT, tmp, 2); if (ret < 0) goto err_dpcd_read; + dev_err(dev, "channel(s) EQ not ok\n"); dev_info(dev, "0x0200 SINK_COUNT: 0x%02x\n", tmp[0]); dev_info(dev, "0x0201 DEVICE_SERVICE_IRQ_VECTOR: 0x%02x\n", tmp[1]); @@ -985,10 +982,6 @@ static int tc_main_link_setup(struct tc_data *tc) dev_info(dev, "0x0206 ADJUST_REQUEST_LANE0_1: 0x%02x\n", tmp[6]); - if (!ready) - dev_err(dev, "Lane0/1 not ready\n"); - if (!aligned) - dev_err(dev, "Lane0/1 not aligned\n"); return -EAGAIN; } From 345dc6d499d8509ec9cc4b9b5b84a070ac87e0d5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 24 Sep 2017 08:01:03 +0200 Subject: [PATCH 112/770] drm/omap: Fix error handling path in 'omap_dmm_probe()' [ Upstream commit 8677b1ac2db021ab30bb1fa34f1e56ebe0051ec3 ] If we don't find a matching device node, we must free the memory allocated in 'omap_dmm' a few lines above. Fixes: 7cb0d6c17b96 ("drm/omap: fix TILER on OMAP5") Signed-off-by: Christophe JAILLET Signed-off-by: Tomi Valkeinen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/omapdrm/omap_dmm_tiler.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index 1dd3dafc59af..c60a85e82c6d 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -638,7 +638,8 @@ static int omap_dmm_probe(struct platform_device *dev) match = of_match_node(dmm_of_match, dev->dev.of_node); if (!match) { dev_err(&dev->dev, "failed to find matching device node\n"); - return -ENODEV; + ret = -ENODEV; + goto fail; } omap_dmm->plat_data = match->data; From 4a96f3d05668cdee90779b07227054d4ec41b32d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 Oct 2017 14:06:41 +0200 Subject: [PATCH 113/770] drm/omap: displays: panel-dpi: add backlight dependency [ Upstream commit 499ec0ed5eb2f6a7fcaab2dd66ffc5993484bda9 ] The new backlight code causes a link failure when backlight support itself is disabled: drivers/gpu/drm/omapdrm/displays/panel-dpi.o: In function `panel_dpi_probe_of': panel-dpi.c:(.text+0x35c): undefined reference to `of_find_backlight_by_node' This adds a Kconfig dependency like we have for the other OMAP display targets. Fixes: 39135a305a0f ("drm/omap: displays: panel-dpi: Support for handling backlight devices") Signed-off-by: Arnd Bergmann Signed-off-by: Tomi Valkeinen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/omapdrm/displays/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/omapdrm/displays/Kconfig b/drivers/gpu/drm/omapdrm/displays/Kconfig index c226da145fb3..a349cb61961e 100644 --- a/drivers/gpu/drm/omapdrm/displays/Kconfig +++ b/drivers/gpu/drm/omapdrm/displays/Kconfig @@ -35,6 +35,7 @@ config DRM_OMAP_CONNECTOR_ANALOG_TV config DRM_OMAP_PANEL_DPI tristate "Generic DPI panel" + depends on BACKLIGHT_CLASS_DEVICE help Driver for generic DPI panels. From 4e506f41705cf9b2ea64e9d698bc272dd9c25b2d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 27 Nov 2017 09:50:17 -0800 Subject: [PATCH 114/770] xfs: ubsan fixes [ Upstream commit 22a6c83777ac7c17d6c63891beeeac24cf5da450 ] Fix some complaints from the UBSAN about signed integer addition overflows. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_aops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a3eeaba156c5..b0cccf8a81a8 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -399,7 +399,7 @@ xfs_map_blocks( (ip->i_df.if_flags & XFS_IFEXTENTS)); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + count > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes) count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -1265,7 +1265,7 @@ xfs_map_trim_size( if (mapping_size > size) mapping_size = size; if (offset < i_size_read(inode) && - offset + mapping_size >= i_size_read(inode)) { + (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) { /* limit mapping to block that spans EOF */ mapping_size = roundup_64(i_size_read(inode) - offset, i_blocksize(inode)); @@ -1312,7 +1312,7 @@ xfs_get_blocks( lockmode = xfs_ilock_data_map_shared(ip); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + size > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes) size = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); From 773a1c5baad243ad30b41700111f0c499f6a259d Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Tue, 28 Nov 2017 08:54:10 -0800 Subject: [PATCH 115/770] xfs: Properly retry failed dquot items in case of error during buffer writeback [ Upstream commit 373b0589dc8d58bc09c9a28d03611ae4fb216057 ] Once the inode item writeback errors is already fixed, it's time to fix the same problem in dquot code. Although there were no reports of users hitting this bug in dquot code (at least none I've seen), the bug is there and I was already planning to fix it when the correct approach to fix the inodes part was decided. This patch aims to fix the same problem in dquot code, regarding failed buffers being unable to be resubmitted once they are flush locked. Tested with the recently test-case sent to fstests list by Hou Tao. Reviewed-by: Brian Foster Signed-off-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_dquot.c | 14 +++++++++++--- fs/xfs/xfs_dquot_item.c | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index cd82429d8df7..5a86495127fd 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -987,14 +987,22 @@ xfs_qm_dqflush_done( * holding the lock before removing the dquot from the AIL. */ if ((lip->li_flags & XFS_LI_IN_AIL) && - lip->li_lsn == qip->qli_flush_lsn) { + ((lip->li_lsn == qip->qli_flush_lsn) || + (lip->li_flags & XFS_LI_FAILED))) { /* xfs_trans_ail_delete() drops the AIL lock. */ spin_lock(&ailp->xa_lock); - if (lip->li_lsn == qip->qli_flush_lsn) + if (lip->li_lsn == qip->qli_flush_lsn) { xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); - else + } else { + /* + * Clear the failed state since we are about to drop the + * flush lock + */ + if (lip->li_flags & XFS_LI_FAILED) + xfs_clear_li_failed(lip); spin_unlock(&ailp->xa_lock); + } } /* diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 2c7a1629e064..664dea105e76 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -137,6 +137,26 @@ xfs_qm_dqunpin_wait( wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); } +/* + * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer + * have been failed during writeback + * + * this informs the AIL that the dquot is already flush locked on the next push, + * and acquires a hold on the buffer to ensure that it isn't reclaimed before + * dirty data makes it to disk. + */ +STATIC void +xfs_dquot_item_error( + struct xfs_log_item *lip, + struct xfs_buf *bp) +{ + struct xfs_dquot *dqp; + + dqp = DQUOT_ITEM(lip)->qli_dquot; + ASSERT(!completion_done(&dqp->q_flush)); + xfs_set_li_failed(lip, bp); +} + STATIC uint xfs_qm_dquot_logitem_push( struct xfs_log_item *lip, @@ -144,13 +164,28 @@ xfs_qm_dquot_logitem_push( __acquires(&lip->li_ailp->xa_lock) { struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - struct xfs_buf *bp = NULL; + struct xfs_buf *bp = lip->li_buf; uint rval = XFS_ITEM_SUCCESS; int error; if (atomic_read(&dqp->q_pincount) > 0) return XFS_ITEM_PINNED; + /* + * The buffer containing this item failed to be written back + * previously. Resubmit the buffer for IO + */ + if (lip->li_flags & XFS_LI_FAILED) { + if (!xfs_buf_trylock(bp)) + return XFS_ITEM_LOCKED; + + if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) + rval = XFS_ITEM_FLUSHING; + + xfs_buf_unlock(bp); + return rval; + } + if (!xfs_dqlock_nowait(dqp)) return XFS_ITEM_LOCKED; @@ -242,7 +277,8 @@ static const struct xfs_item_ops xfs_dquot_item_ops = { .iop_unlock = xfs_qm_dquot_logitem_unlock, .iop_committed = xfs_qm_dquot_logitem_committed, .iop_push = xfs_qm_dquot_logitem_push, - .iop_committing = xfs_qm_dquot_logitem_committing + .iop_committing = xfs_qm_dquot_logitem_committing, + .iop_error = xfs_dquot_item_error }; /* From 612959943e409aa9a1dd5cfaa7ade8e270ead7d2 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 15 Nov 2017 08:47:02 +0300 Subject: [PATCH 116/770] perf/core: Fix memory leak triggered by perf --namespace [ Upstream commit 0e18dd12064e07519f7cbff4149ca7fff620cbed ] perf with --namespace key leaks various memory objects including namespaces 4.14.0+ pid_namespace 1 12 2568 12 8 user_namespace 1 39 824 39 8 net_namespace 1 5 6272 5 8 This happen because perf_fill_ns_link_info() struct patch ns_path: during initialization ns_path incremented counters on related mnt and dentry, but without lost path_put nobody decremented them back. Leaked dentry is name of related namespace, and its leak does not allow to free unused namespace. Signed-off-by: Vasily Averin Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Hari Bathini Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Thomas Gleixner Fixes: commit e422267322cd ("perf: Add PERF_RECORD_NAMESPACES to include namespaces related info") Link: http://lkml.kernel.org/r/c510711b-3904-e5e1-d296-61273d21118d@virtuozzo.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 24ebad5567b4..8c20af8738ac 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6756,6 +6756,7 @@ static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info, ns_inode = ns_path.dentry->d_inode; ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev); ns_link_info->ino = ns_inode->i_ino; + path_put(&ns_path); } } From 39527e909ea52bfec616536a1b02011f58a08c30 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Fri, 17 Nov 2017 19:14:55 -0200 Subject: [PATCH 117/770] scsi: aacraid: Prevent crash in case of free interrupt during scsi EH path [ Upstream commit e4717292ddebcfe231651b5aff9fa19ca158d178 ] As part of the scsi EH path, aacraid performs a reinitialization of the adapter, which encompass freeing resources and IRQs, NULLifying lots of pointers, and then initialize it all over again. We've identified a problem during the free IRQ portion of this path if CONFIG_DEBUG_SHIRQ is enabled on kernel config file. Happens that, in case this flag was set, right after free_irq() effectively clears the interrupt, it checks if it was requested as IRQF_SHARED. In positive case, it performs another call to the IRQ handler on driver. Problem is: since aacraid currently free some resources *before* freeing the IRQ, once free_irq() path calls the handler again (due to CONFIG_DEBUG_SHIRQ), aacraid crashes due to NULL pointer dereference with the following trace: aac_src_intr_message+0xf8/0x740 [aacraid] __free_irq+0x33c/0x4a0 free_irq+0x78/0xb0 aac_free_irq+0x13c/0x150 [aacraid] aac_reset_adapter+0x2e8/0x970 [aacraid] aac_eh_reset+0x3a8/0x5d0 [aacraid] scsi_try_host_reset+0x74/0x180 scsi_eh_ready_devs+0xc70/0x1510 scsi_error_handler+0x624/0xa20 This patch prevents the crash by changing the order of the deinitialization in this path of aacraid: first we clear the IRQ, then we free other resources. No functional change intended. Signed-off-by: Guilherme G. Piccoli Reviewed-by: Raghava Aditya Renukunta Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/commsup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index ffbfd042be08..c0a4fcb7fd0a 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1583,6 +1583,7 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced, u8 reset_type) * will ensure that i/o is queisced and the card is flushed in that * case. */ + aac_free_irq(aac); aac_fib_map_free(aac); dma_free_coherent(&aac->pdev->dev, aac->comm_size, aac->comm_addr, aac->comm_phys); @@ -1590,7 +1591,6 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced, u8 reset_type) aac->comm_phys = 0; kfree(aac->queues); aac->queues = NULL; - aac_free_irq(aac); kfree(aac->fsa_dev); aac->fsa_dev = NULL; From 6c27a40c9e182ed318e886c4edc35fbbc182d384 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 20 Nov 2017 08:12:29 -0600 Subject: [PATCH 118/770] scsi: ufs: ufshcd: fix potential NULL pointer dereference in ufshcd_config_vreg [ Upstream commit 727535903bea924c4f73abb202c4b3e85fff0ca4 ] _vreg_ is being dereferenced before it is null checked, hence there is a potential null pointer dereference. Fix this by moving the pointer dereference after _vreg_ has been null checked. This issue was detected with the help of Coccinelle. Fixes: aa4976130934 ("ufs: Add regulator enable support") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufshcd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 794a4600e952..d344fef01f1d 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6555,12 +6555,15 @@ static int ufshcd_config_vreg(struct device *dev, struct ufs_vreg *vreg, bool on) { int ret = 0; - struct regulator *reg = vreg->reg; - const char *name = vreg->name; + struct regulator *reg; + const char *name; int min_uV, uA_load; BUG_ON(!vreg); + reg = vreg->reg; + name = vreg->name; + if (regulator_count_voltages(reg) > 0) { min_uV = on ? vreg->min_uV : 0; ret = regulator_set_voltage(reg, min_uV, vreg->max_uV); From 99f3d5f37e58692f1147e6d4b0ca2e42fe1f6453 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 15 Nov 2017 14:12:30 +0200 Subject: [PATCH 119/770] iwlwifi: mvm: fix the TX queue hang timeout for MONITOR vif type [ Upstream commit d1b275ffec459c5ae12b5c7086c84175696e5a9f ] The MONITOR type is missing in the interface type switch. Add it. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 53e269d54050..0ae7624eac9d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1181,6 +1181,8 @@ unsigned int iwl_mvm_get_wd_timeout(struct iwl_mvm *mvm, return le32_to_cpu(txq_timer->p2p_go); case NL80211_IFTYPE_P2P_DEVICE: return le32_to_cpu(txq_timer->p2p_device); + case NL80211_IFTYPE_MONITOR: + return default_timeout; default: WARN_ON(1); return mvm->cfg->base_params->wd_timeout; From c67fa169456e2179be40b3e398030019879dacd2 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 20 Nov 2017 13:25:05 +0200 Subject: [PATCH 120/770] iwlwifi: fix access to prph when transport is stopped [ Upstream commit 0232d2cd7aa8e1b810fe84fb4059a0bd1eabe2ba ] When getting HW rfkill we get stop_device being called from two paths. One path is the IRQ calling stop device, and updating op mode and stack. As a result, cfg80211 is running rfkill sync work that shuts down all devices (second path). In the second path, we eventually get to iwl_mvm_stop_device which calls iwl_fw_dump_conf_clear->iwl_fw_dbg_stop_recording, that access periphery registers. The device may be stopped at this point from the first path, which will result with a failure to access those registers. Simply checking for the trans status is insufficient, since the race will still exist, only minimized. Instead, move the stop from iwl_fw_dump_conf_clear (which is getting called only from stop path) to the transport stop device function, where the access is always safe. This has the added value, of actually stopping dbgc before stopping device even when the stop is initiated from the transport. Fixes: 1efc3843a4ee ("iwlwifi: stop dbgc recording before stopping DMA") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/fw/dbg.h | 2 -- drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c | 6 ++++++ drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 9 +++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h index 9c889a32fe24..223fb77a3aa9 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h @@ -209,8 +209,6 @@ static inline void iwl_fw_dbg_stop_recording(struct iwl_fw_runtime *fwrt) static inline void iwl_fw_dump_conf_clear(struct iwl_fw_runtime *fwrt) { - iwl_fw_dbg_stop_recording(fwrt); - fwrt->dump.conf = FW_DBG_INVALID; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c index c59f4581e972..ac05fd1e74c4 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c @@ -49,6 +49,7 @@ * *****************************************************************************/ #include "iwl-trans.h" +#include "iwl-prph.h" #include "iwl-context-info.h" #include "internal.h" @@ -156,6 +157,11 @@ void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans, bool low_power) trans_pcie->is_down = true; + /* Stop dbgc before stopping device */ + iwl_write_prph(trans, DBGC_IN_SAMPLE, 0); + udelay(100); + iwl_write_prph(trans, DBGC_OUT_CTRL, 0); + /* tell the device to stop sending interrupts */ iwl_disable_interrupts(trans); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 2e3e013ec95a..12a9b86d71ea 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1138,6 +1138,15 @@ static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power) trans_pcie->is_down = true; + /* Stop dbgc before stopping device */ + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) { + iwl_set_bits_prph(trans, MON_BUFF_SAMPLE_CTL, 0x100); + } else { + iwl_write_prph(trans, DBGC_IN_SAMPLE, 0); + udelay(100); + iwl_write_prph(trans, DBGC_OUT_CTRL, 0); + } + /* tell the device to stop sending interrupts */ iwl_disable_interrupts(trans); From 12f165f4418aaa904e7412ee178ee43437f65bc8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 7 Nov 2017 10:58:34 -0800 Subject: [PATCH 121/770] ARM: dts: NSP: Disable AHCI controller for HR NSP boards [ Upstream commit 77416ab35f5712382e5a792bfa1736ceb70d5bbb ] The AHCI controller is currently enabled for all of these boards: bcm958623hr and bcm958625hr would result in a hard hang on boot that we cannot get rid of. Since this does not appear to have an easy and simple fix, just disable the AHCI controller for now until this gets resolved. Fixes: 70725d6e97ac ("ARM: dts: NSP: Enable SATA on bcm958625hr") Fixes: d454c3762437 ("ARM: dts: NSP: Add new DT file for bcm958623hr") Acked-by: Jon Mason Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm958623hr.dts | 4 ---- arch/arm/boot/dts/bcm958625hr.dts | 4 ---- 2 files changed, 8 deletions(-) diff --git a/arch/arm/boot/dts/bcm958623hr.dts b/arch/arm/boot/dts/bcm958623hr.dts index 3bc50849d013..b8bde13de90a 100644 --- a/arch/arm/boot/dts/bcm958623hr.dts +++ b/arch/arm/boot/dts/bcm958623hr.dts @@ -141,10 +141,6 @@ status = "okay"; }; -&sata { - status = "okay"; -}; - &qspi { bspi-sel = <0>; flash: m25p80@0 { diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts index d94d14b3c745..6a44b8021702 100644 --- a/arch/arm/boot/dts/bcm958625hr.dts +++ b/arch/arm/boot/dts/bcm958625hr.dts @@ -177,10 +177,6 @@ status = "okay"; }; -&sata { - status = "okay"; -}; - &srab { compatible = "brcm,bcm58625-srab", "brcm,nsp-srab"; status = "okay"; From 504b902b36a9cd487cbcd2f209a9a5c6213cb320 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 7 Nov 2017 11:10:29 -0800 Subject: [PATCH 122/770] ARM: dts: NSP: Fix PPI interrupt types [ Upstream commit 5f1aa51c7a1eef1c5a60b8334e32c89904964245 ] Booting a kernel results in the kernel warning us about the following PPI interrupts configuration: [ 0.105127] smp: Bringing up secondary CPUs ... [ 0.110545] GIC: PPI11 is secure or misconfigured [ 0.110551] GIC: PPI13 is secure or misconfigured Fix this by using the appropriate edge configuration for PPI11 and PPI13, this is similar to what was fixed for Northstar (BCM5301X) in commit 0e34079cd1f6 ("ARM: dts: BCM5301X: Correct GIC_PPI interrupt flags"). Fixes: 7b2e987de207 ("ARM: NSP: add minimal Northstar Plus device tree") Fixes: 1a9d53cabaf4 ("ARM: dts: NSP: Add TWD Support to DT") Acked-by: Jon Mason Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm-nsp.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index dff66974feed..d5f5e92e7488 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -85,7 +85,7 @@ timer@20200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x20200 0x100>; - interrupts = ; + interrupts = ; clocks = <&periph_clk>; }; @@ -93,7 +93,7 @@ compatible = "arm,cortex-a9-twd-timer"; reg = <0x20600 0x20>; interrupts = ; + IRQ_TYPE_EDGE_RISING)>; clocks = <&periph_clk>; }; From 99a8cad9c1b2236439d7d44c86cdbcdd5831154b Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sun, 16 Apr 2017 02:51:16 -0400 Subject: [PATCH 123/770] media: usbtv: add a new usbid [ Upstream commit 04226916d2360f56d57ad00bc48d2d1854d1e0b0 ] A new usbid of UTV007 is found in a newly bought device. The usbid is 1f71:3301. The ID on the chip is: UTV007 A89029.1 1520L18K1 Both video and audio is tested with the modified usbtv driver. Signed-off-by: Icenowy Zheng Acked-by: Lubomir Rintel Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/usbtv/usbtv-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/usb/usbtv/usbtv-core.c b/drivers/media/usb/usbtv/usbtv-core.c index f06f09a0876e..71fb5734995b 100644 --- a/drivers/media/usb/usbtv/usbtv-core.c +++ b/drivers/media/usb/usbtv/usbtv-core.c @@ -144,6 +144,7 @@ static void usbtv_disconnect(struct usb_interface *intf) static const struct usb_device_id usbtv_id_table[] = { { USB_DEVICE(0x1b71, 0x3002) }, + { USB_DEVICE(0x1f71, 0x3301) }, {} }; MODULE_DEVICE_TABLE(usb, usbtv_id_table); From aafb1a7eb0f73cf8bb59ed7cc6d29e19d4de49bc Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 24 Nov 2017 09:42:21 +0100 Subject: [PATCH 124/770] x86/xen: Support early interrupts in xen pv guests [ Upstream commit 42b3a4cb5609de757f5445fcad18945ba9239a07 ] Add early interrupt handlers activated by idt_setup_early_handler() to the handlers supported by Xen pv guests. This will allow for early WARN() calls not crashing the guest. Suggested-by: Andy Lutomirski Signed-off-by: Juergen Gross Signed-off-by: Thomas Gleixner Cc: xen-devel@lists.xenproject.org Cc: boris.ostrovsky@oracle.com Link: https://lkml.kernel.org/r/20171124084221.30172-1-jgross@suse.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/segment.h | 12 +++++++++++ arch/x86/mm/extable.c | 4 +++- arch/x86/xen/enlighten_pv.c | 37 ++++++++++++++++++++++------------ arch/x86/xen/xen-asm_64.S | 14 +++++++++++++ 4 files changed, 53 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index b20f9d623f9c..8f09012b92e7 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -236,11 +236,23 @@ */ #define EARLY_IDT_HANDLER_SIZE 9 +/* + * xen_early_idt_handler_array is for Xen pv guests: for each entry in + * early_idt_handler_array it contains a prequel in the form of + * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to + * max 8 bytes. + */ +#define XEN_EARLY_IDT_HANDLER_SIZE 8 + #ifndef __ASSEMBLY__ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; extern void early_ignore_irq(void); +#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE]; +#endif + /* * Load a segment. Fall back on loading the zero segment if something goes * wrong. This variant assumes that loading zero fully clears the segment. diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 30bc4812ceb8..9fe656c42aa5 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -212,8 +213,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) * Old CPUs leave the high bits of CS on the stack * undefined. I'm not sure which CPUs do this, but at least * the 486 DX works this way. + * Xen pv domains are not using the default __KERNEL_CS. */ - if (regs->cs != __KERNEL_CS) + if (!xen_pv_domain() && regs->cs != __KERNEL_CS) goto fail; /* diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index ae3a071e1d0f..899a22a02e95 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -622,7 +622,7 @@ static struct trap_array_entry trap_array[] = { { simd_coprocessor_error, xen_simd_coprocessor_error, false }, }; -static bool get_trap_addr(void **addr, unsigned int ist) +static bool __ref get_trap_addr(void **addr, unsigned int ist) { unsigned int nr; bool ist_okay = false; @@ -644,6 +644,14 @@ static bool get_trap_addr(void **addr, unsigned int ist) } } + if (nr == ARRAY_SIZE(trap_array) && + *addr >= (void *)early_idt_handler_array[0] && + *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) { + nr = (*addr - (void *)early_idt_handler_array[0]) / + EARLY_IDT_HANDLER_SIZE; + *addr = (void *)xen_early_idt_handler_array[nr]; + } + if (WARN_ON(ist != 0 && !ist_okay)) return false; @@ -1261,6 +1269,21 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_setup_gdt(0); xen_init_irq_ops(); + + /* Let's presume PV guests always boot on vCPU with id 0. */ + per_cpu(xen_vcpu_id, 0) = 0; + + /* + * Setup xen_vcpu early because idt_setup_early_handler needs it for + * local_irq_disable(), irqs_disabled(). + * + * Don't do the full vcpu_info placement stuff until we have + * the cpu_possible_mask and a non-dummy shared_info. + */ + xen_vcpu_info_reset(0); + + idt_setup_early_handler(); + xen_init_capabilities(); #ifdef CONFIG_X86_LOCAL_APIC @@ -1294,18 +1317,6 @@ asmlinkage __visible void __init xen_start_kernel(void) */ acpi_numa = -1; #endif - /* Let's presume PV guests always boot on vCPU with id 0. */ - per_cpu(xen_vcpu_id, 0) = 0; - - /* - * Setup xen_vcpu early because start_kernel needs it for - * local_irq_disable(), irqs_disabled(). - * - * Don't do the full vcpu_info placement stuff until we have - * the cpu_possible_mask and a non-dummy shared_info. - */ - xen_vcpu_info_reset(0); - WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv)); local_irq_disable(); diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 8a10c9a9e2b5..417b339e5c8e 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -15,6 +15,7 @@ #include +#include #include .macro xen_pv_trap name @@ -54,6 +55,19 @@ xen_pv_trap entry_INT80_compat #endif xen_pv_trap hypervisor_callback + __INIT +ENTRY(xen_early_idt_handler_array) + i = 0 + .rept NUM_EXCEPTION_VECTORS + pop %rcx + pop %r11 + jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE + i = i + 1 + .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc + .endr +END(xen_early_idt_handler_array) + __FINIT + hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 /* * Xen64 iret frame: From 0479bc01767ff58170a2f1ae27e6b98f9dfa01b9 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 14 Nov 2017 16:18:28 +0000 Subject: [PATCH 125/770] usb: gadget: don't dereference g until after it has been null checked [ Upstream commit b2fc059fa549fe6881d4c1f8d698b0f50bcd16ec ] Avoid dereferencing pointer g until after g has been sanity null checked; move the assignment of cdev much later when it is required into a more local scope. Detected by CoverityScan, CID#1222135 ("Dereference before null check") Fixes: b785ea7ce662 ("usb: gadget: composite: fix ep->maxburst initialization") Signed-off-by: Colin Ian King Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 5d061b3d8224..ed9346f0b000 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -150,7 +150,6 @@ int config_ep_by_speed(struct usb_gadget *g, struct usb_function *f, struct usb_ep *_ep) { - struct usb_composite_dev *cdev = get_gadget_data(g); struct usb_endpoint_descriptor *chosen_desc = NULL; struct usb_descriptor_header **speed_desc = NULL; @@ -229,8 +228,12 @@ ep_found: _ep->maxburst = comp_desc->bMaxBurst + 1; break; default: - if (comp_desc->bMaxBurst != 0) + if (comp_desc->bMaxBurst != 0) { + struct usb_composite_dev *cdev; + + cdev = get_gadget_data(g); ERROR(cdev, "ep0 bMaxBurst must be 0\n"); + } _ep->maxburst = 1; break; } From c789cfe0ae72005f368aef5c361851fabec0e5f5 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 25 Nov 2017 13:32:38 -0600 Subject: [PATCH 126/770] staging: rtl8188eu: Fix incorrect response to SIOCGIWESSID [ Upstream commit b77992d2df9e47144354d1b25328b180afa33442 ] When not associated with an AP, wifi device drivers should respond to the SIOCGIWESSID ioctl with a zero-length string for the SSID, which is the behavior expected by dhcpcd. Currently, this driver returns an error code (-1) from the ioctl call, which causes dhcpcd to assume that the device is not a wireless interface and therefore it fails to work correctly with it thereafter. This problem was reported and tested at https://github.com/lwfinger/rtl8188eu/issues/234. Signed-off-by: Larry Finger Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index c0664dc80bf2..446310775e90 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -1395,19 +1395,13 @@ static int rtw_wx_get_essid(struct net_device *dev, if ((check_fwstate(pmlmepriv, _FW_LINKED)) || (check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE))) { len = pcur_bss->Ssid.SsidLength; - - wrqu->essid.length = len; - memcpy(extra, pcur_bss->Ssid.Ssid, len); - - wrqu->essid.flags = 1; } else { - ret = -1; - goto exit; + len = 0; + *extra = 0; } - -exit: - + wrqu->essid.length = len; + wrqu->essid.flags = 1; return ret; } From e9273b08d4fb808baeebcd2637b5257d3f0db18c Mon Sep 17 00:00:00 2001 From: Stefan Schake Date: Fri, 29 Dec 2017 17:05:43 +0100 Subject: [PATCH 127/770] drm/vc4: Move IRQ enable to PM path [ Upstream commit ce9caf2f79a5aa170a4b6456a03db639eed9c988 ] We were calling enable_irq on bind, where it was already enabled previously by the IRQ helper. Additionally, dev->irq is not set correctly until after postinstall and so was always zero here, triggering a warning in 4.15. Fix both by moving the enable to the power management resume path, where we know there was a previous disable invocation during suspend. Fixes: 253696ccd613 ("drm/vc4: Account for interrupts in flight") Signed-off-by: Stefan Schake Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/1514563543-32511-1-git-send-email-stschake@gmail.com Tested-by: Stefan Wahren Reviewed-by: Eric Anholt Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_irq.c | 3 --- drivers/gpu/drm/vc4/vc4_v3d.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 61b2e5377993..521addec831e 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -208,9 +208,6 @@ vc4_irq_postinstall(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - /* Undo the effects of a previous vc4_irq_uninstall. */ - enable_irq(dev->irq); - /* Enable both the render done and out of memory interrupts. */ V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 622cd43840b8..493f392b3a0a 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -327,6 +327,9 @@ static int vc4_v3d_runtime_resume(struct device *dev) return ret; vc4_v3d_init_hw(vc4->dev); + + /* We disabled the IRQ as part of vc4_irq_uninstall in suspend. */ + enable_irq(vc4->dev->irq); vc4_irq_postinstall(vc4->dev); return 0; From c065b9947b05d63936dad5f3d5aa6e83d1e0346f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Jan 2018 16:55:24 +0100 Subject: [PATCH 128/770] KVM: x86: emulate #UD while in guest mode [ Upstream commit bd89525a823ce6edddcedbe9aed79faa1b9cf544 ] This reverts commits ae1f57670703656cc9f293722c3b8b6782f8ab3f and ac9b305caa0df6f5b75d294e4b86c1027648991e. If the hardware doesn't support MOVBE, but L0 sets CPUID.01H:ECX.MOVBE in L1's emulated CPUID information, then L1 is likely to pass that CPUID bit through to L2. L2 will expect MOVBE to work, but if L1 doesn't intercept #UD, then any MOVBE instruction executed in L2 will raise #UD, and the exception will be delivered in L2. Commit ac9b305caa0df6f5b75d294e4b86c1027648991e is a better and more complete version of ae1f57670703 ("KVM: nVMX: Do not emulate #UD while in guest mode"); however, neither considers the above case. Suggested-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 9 +-------- arch/x86/kvm/vmx.c | 5 +---- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c8be4e6d365b..6a8284f72328 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -362,7 +362,6 @@ static void recalc_intercepts(struct vcpu_svm *svm) { struct vmcb_control_area *c, *h; struct nested_state *g; - u32 h_intercept_exceptions; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); @@ -373,14 +372,9 @@ static void recalc_intercepts(struct vcpu_svm *svm) h = &svm->nested.hsave->control; g = &svm->nested; - /* No need to intercept #UD if L1 doesn't intercept it */ - h_intercept_exceptions = - h->intercept_exceptions & ~(1U << UD_VECTOR); - c->intercept_cr = h->intercept_cr | g->intercept_cr; c->intercept_dr = h->intercept_dr | g->intercept_dr; - c->intercept_exceptions = - h_intercept_exceptions | g->intercept_exceptions; + c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; c->intercept = h->intercept | g->intercept; } @@ -2195,7 +2189,6 @@ static int ud_interception(struct vcpu_svm *svm) { int er; - WARN_ON_ONCE(is_guest_mode(&svm->vcpu)); er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); if (er == EMULATE_USER_EXIT) return 0; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5242fac8165b..a45063a9219c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1891,7 +1891,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) { u32 eb; - eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) | + eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == @@ -1909,8 +1909,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) */ if (is_guest_mode(vcpu)) eb |= get_vmcs12(vcpu)->exception_bitmap; - else - eb |= 1u << UD_VECTOR; vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -5921,7 +5919,6 @@ static int handle_exception(struct kvm_vcpu *vcpu) return 1; /* already handled by vmx_vcpu_run() */ if (is_invalid_opcode(intr_info)) { - WARN_ON_ONCE(is_guest_mode(vcpu)); er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); if (er == EMULATE_USER_EXIT) return 0; From 7a4b5ee973cfbcf32bff2fe03bed1786d5baa14e Mon Sep 17 00:00:00 2001 From: Dmitry Eremin Date: Thu, 25 Jan 2018 16:51:04 +0300 Subject: [PATCH 129/770] staging: lustre: separate a connection destroy from free struct kib_conn commit 9b046013e5837f8a58453d1e9f8e01d03adb7fe7 upstream. The logic of the original commit 4d99b2581eff ("staging: lustre: avoid intensive reconnecting for ko2iblnd") was assumed conditional free of struct kib_conn if the second argument free_conn in function kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn) is true. But this hunk of code was dropped from original commit. As result the logic works wrong and current code use struct kib_conn after free. > drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c > 3317 kiblnd_destroy_conn(conn, !peer); > ^^^^ Freed always (but should be conditionally) > 3318 > 3319 spin_lock_irqsave(lock, flags); > 3320 if (!peer) > 3321 continue; > 3322 > 3323 conn->ibc_peer = peer; > ^^^^^^^^^^^^^^ Use after free > 3324 if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE) > 3325 list_add_tail(&conn->ibc_list, > ^^^^^^^^^^^^^^ Use after free > 3326 &kiblnd_data.kib_reconn_list); > 3327 else > 3328 list_add_tail(&conn->ibc_list, > ^^^^^^^^^^^^^^ Use after free > 3329 &kiblnd_data.kib_reconn_wait); To avoid confusion this fix moved the freeing a struct kib_conn outside of the function kiblnd_destroy_conn() and free as it was intended in original commit. Fixes: 4d99b2581eff ("staging: lustre: avoid intensive reconnecting for ko2iblnd") Signed-off-by: Dmitry Eremin Reviewed-by: Andreas Dilger Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c | 7 +++---- drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h | 2 +- drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 6 ++++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index 64763aacda57..284cdd44a2ee 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@ -825,14 +825,15 @@ struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cm return conn; failed_2: - kiblnd_destroy_conn(conn, true); + kiblnd_destroy_conn(conn); + LIBCFS_FREE(conn, sizeof(*conn)); failed_1: LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); failed_0: return NULL; } -void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn) +void kiblnd_destroy_conn(struct kib_conn *conn) { struct rdma_cm_id *cmid = conn->ibc_cmid; struct kib_peer *peer = conn->ibc_peer; @@ -895,8 +896,6 @@ void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn) rdma_destroy_id(cmid); atomic_dec(&net->ibn_nconns); } - - LIBCFS_FREE(conn, sizeof(*conn)); } int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why) diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h index a1e994a1cc84..98a5e2c21a83 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h @@ -1015,7 +1015,7 @@ int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why); struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid, int state, int version); -void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn); +void kiblnd_destroy_conn(struct kib_conn *conn); void kiblnd_close_conn(struct kib_conn *conn, int error); void kiblnd_close_conn_locked(struct kib_conn *conn, int error); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index 8fc191d99927..29e10021b906 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -3313,11 +3313,13 @@ kiblnd_connd(void *arg) spin_unlock_irqrestore(lock, flags); dropped_lock = 1; - kiblnd_destroy_conn(conn, !peer); + kiblnd_destroy_conn(conn); spin_lock_irqsave(lock, flags); - if (!peer) + if (!peer) { + kfree(conn); continue; + } conn->ibc_peer = peer; if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE) From fad7da7b638a1f5900bee31ed598b7c527d3a14c Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Sun, 3 Dec 2017 13:58:19 +0000 Subject: [PATCH 130/770] staging: ccree: NULLify backup_info when unused commit 46df8824982e4fb0198776078d4a8c3e2d531464 upstream. backup_info field is only allocated for decrypt code path. The field was not nullified when not used causing a kfree in an error handling path to attempt to free random addresses as uncovered in stress testing. Fixes: 737aed947f9b ("staging: ccree: save ciphertext for CTS IV") Signed-off-by: Gilad Ben-Yossef Signed-off-by: Greg Kroah-Hartman --- drivers/staging/ccree/ssi_cipher.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/ccree/ssi_cipher.c b/drivers/staging/ccree/ssi_cipher.c index 8d31a93fd8b7..087a622f20b2 100644 --- a/drivers/staging/ccree/ssi_cipher.c +++ b/drivers/staging/ccree/ssi_cipher.c @@ -904,6 +904,7 @@ static int ssi_ablkcipher_decrypt(struct ablkcipher_request *req) scatterwalk_map_and_copy(req_ctx->backup_info, req->src, (req->nbytes - ivsize), ivsize, 0); req_ctx->is_giv = false; + req_ctx->backup_info = NULL; return ssi_blkcipher_process(tfm, req_ctx, req->dst, req->src, req->nbytes, req->info, ivsize, (void *)req, DRV_CRYPTO_DIRECTION_DECRYPT); } From eb6de1af2e7235133a0e2563c09926ca28361095 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Thu, 14 Dec 2017 14:02:46 +0000 Subject: [PATCH 131/770] staging: ccree: fix fips event irq handling build commit dc5591dc9c03e4cd22d3f0c3659196cc34668452 upstream. When moving from internal for kernel FIPS infrastructure the FIPS event irq handling code was left with the old ifdef by mistake. Fix it. Fixes: b7e607bf33a2 ("staging: ccree: move FIPS support to kernel infrastructure") Signed-off-by: Gilad Ben-Yossef Signed-off-by: Greg Kroah-Hartman --- drivers/staging/ccree/ssi_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/ccree/ssi_driver.c b/drivers/staging/ccree/ssi_driver.c index 9c6f1200c130..eeb995307951 100644 --- a/drivers/staging/ccree/ssi_driver.c +++ b/drivers/staging/ccree/ssi_driver.c @@ -141,7 +141,7 @@ static irqreturn_t cc_isr(int irq, void *dev_id) irr &= ~SSI_COMP_IRQ_MASK; complete_request(drvdata); } -#ifdef CC_SUPPORT_FIPS +#ifdef CONFIG_CRYPTO_FIPS /* TEE FIPS interrupt */ if (likely((irr & SSI_GPR0_IRQ_MASK) != 0)) { /* Mask interrupt - will be unmasked in Deferred service handler */ From 3c538ad935465d7ba4bf0b5747fadcfdf05311c9 Mon Sep 17 00:00:00 2001 From: Gaurav Kohli Date: Tue, 23 Jan 2018 13:16:34 +0530 Subject: [PATCH 132/770] tty: fix data race between tty_init_dev and flush of buf commit b027e2298bd588d6fa36ed2eda97447fb3eac078 upstream. There can be a race, if receive_buf call comes before tty initialization completes in n_tty_open and tty->disc_data may be NULL. CPU0 CPU1 ---- ---- 000|n_tty_receive_buf_common() n_tty_open() -001|n_tty_receive_buf2() tty_ldisc_open.isra.3() -002|tty_ldisc_receive_buf(inline) tty_ldisc_setup() Using ldisc semaphore lock in tty_init_dev till disc_data initializes completely. Signed-off-by: Gaurav Kohli Reviewed-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 8 +++++++- drivers/tty/tty_ldisc.c | 4 ++-- include/linux/tty.h | 2 ++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 94cccb6efa32..7892d0be8af9 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1322,6 +1322,9 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx) "%s: %s driver does not set tty->port. This will crash the kernel later. Fix the driver!\n", __func__, tty->driver->name); + retval = tty_ldisc_lock(tty, 5 * HZ); + if (retval) + goto err_release_lock; tty->port->itty = tty; /* @@ -1332,6 +1335,7 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx) retval = tty_ldisc_setup(tty, tty->link); if (retval) goto err_release_tty; + tty_ldisc_unlock(tty); /* Return the tty locked so that it cannot vanish under the caller */ return tty; @@ -1344,9 +1348,11 @@ err_module_put: /* call the tty release_tty routine to clean out this slot */ err_release_tty: - tty_unlock(tty); + tty_ldisc_unlock(tty); tty_info_ratelimited(tty, "ldisc open failed (%d), clearing slot %d\n", retval, idx); +err_release_lock: + tty_unlock(tty); release_tty(tty, idx); return ERR_PTR(retval); } diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 84a8ac2a779f..7c895684c3ef 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -336,7 +336,7 @@ static inline void __tty_ldisc_unlock(struct tty_struct *tty) ldsem_up_write(&tty->ldisc_sem); } -static int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) +int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) { int ret; @@ -347,7 +347,7 @@ static int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) return 0; } -static void tty_ldisc_unlock(struct tty_struct *tty) +void tty_ldisc_unlock(struct tty_struct *tty) { clear_bit(TTY_LDISC_HALTED, &tty->flags); __tty_ldisc_unlock(tty); diff --git a/include/linux/tty.h b/include/linux/tty.h index 7ac8ba208b1f..0a6c71e0ad01 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -405,6 +405,8 @@ extern const char *tty_name(const struct tty_struct *tty); extern struct tty_struct *tty_kopen(dev_t device); extern void tty_kclose(struct tty_struct *tty); extern int tty_dev_name_to_number(const char *name, dev_t *number); +extern int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); +extern void tty_ldisc_unlock(struct tty_struct *tty); #else static inline void tty_kref_put(struct tty_struct *tty) { } From 94791411130312ba655c944c4623de1ff4c5a614 Mon Sep 17 00:00:00 2001 From: OKAMOTO Yoshiaki Date: Tue, 16 Jan 2018 09:51:17 +0000 Subject: [PATCH 133/770] usb: option: Add support for FS040U modem commit 69341bd15018da0a662847e210f9b2380c71e623 upstream. FS040U modem is manufactured by omega, and sold by Fujisoft. This patch adds ID of the modem to use option1 driver. Interface 3 is used as qmi_wwan, so the interface is ignored. Signed-off-by: Yoshiaki Okamoto Signed-off-by: Hiroyuki Yamamoto Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index a9400458ccea..dcf78a498927 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -383,6 +383,9 @@ static void option_instat_callback(struct urb *urb); #define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603 #define FOUR_G_SYSTEMS_PRODUCT_W100 0x9b01 +/* Fujisoft products */ +#define FUJISOFT_PRODUCT_FS040U 0x9b02 + /* iBall 3.5G connect wireless modem */ #define IBALL_3_5G_CONNECT 0x9605 @@ -1897,6 +1900,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100), .driver_info = (kernel_ulong_t)&four_g_w100_blacklist }, + {USB_DEVICE(LONGCHEER_VENDOR_ID, FUJISOFT_PRODUCT_FS040U), + .driver_info = (kernel_ulong_t)&net_intf3_blacklist}, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) }, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9801, 0xff), .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, From bfc372036b41fec5162c38c18aa824698c33b825 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 25 Jan 2018 09:48:55 +0100 Subject: [PATCH 134/770] USB: serial: pl2303: new device id for Chilitag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d08dd3f3dd2ae351b793fc5b76abdbf0fd317b12 upstream. This adds a new device id for Chilitag devices to the pl2303 driver. Reported-by: "Chu.Mike [朱堅宜]" Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index a585b477415d..34c5a75f98a7 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -41,6 +41,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_RSAQ2) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_DCU11) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_RSAQ3) }, + { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_CHILITAG) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_PHAROS) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ALDIGA) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_MMX) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 3b5a15d1dc0d..123289085ee2 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -17,6 +17,7 @@ #define PL2303_PRODUCT_ID_DCU11 0x1234 #define PL2303_PRODUCT_ID_PHAROS 0xaaa0 #define PL2303_PRODUCT_ID_RSAQ3 0xaaa2 +#define PL2303_PRODUCT_ID_CHILITAG 0xaaa8 #define PL2303_PRODUCT_ID_ALDIGA 0x0611 #define PL2303_PRODUCT_ID_MMX 0x0612 #define PL2303_PRODUCT_ID_GPRS 0x0609 From ca7964983ca453dd9902c40171e93a69f21ee3ee Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 14 Jan 2018 16:09:00 +0100 Subject: [PATCH 135/770] USB: cdc-acm: Do not log urb submission errors on disconnect commit f0386c083c2ce85284dc0b419d7b89c8e567c09f upstream. When disconnected sometimes the cdc-acm driver logs errors like these: [20278.039417] cdc_acm 2-2:2.1: urb 9 failed submission with -19 [20278.042924] cdc_acm 2-2:2.1: urb 10 failed submission with -19 [20278.046449] cdc_acm 2-2:2.1: urb 11 failed submission with -19 [20278.049920] cdc_acm 2-2:2.1: urb 12 failed submission with -19 [20278.053442] cdc_acm 2-2:2.1: urb 13 failed submission with -19 [20278.056915] cdc_acm 2-2:2.1: urb 14 failed submission with -19 [20278.060418] cdc_acm 2-2:2.1: urb 15 failed submission with -19 Silence these by not logging errors when the result is -ENODEV. Signed-off-by: Hans de Goede Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 18c923a4c16e..b133fc68b3fd 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -438,7 +438,7 @@ static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags) res = usb_submit_urb(acm->read_urbs[index], mem_flags); if (res) { - if (res != -EPERM) { + if (res != -EPERM && res != -ENODEV) { dev_err(&acm->data->dev, "urb %d failed submission with %d\n", index, res); From 34b812ca30389a3759bf89d424c2d256185face3 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 18 Jan 2018 12:13:45 +0100 Subject: [PATCH 136/770] CDC-ACM: apply quirk for card reader commit df1cc78a52491f71d8170d513d0f6f114faa1bda upstream. This devices drops random bytes from messages if you talk to it too fast. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index b133fc68b3fd..4149a965516e 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1765,6 +1765,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0ace, 0x1611), /* ZyDAS 56K USB MODEM - new version */ .driver_info = SINGLE_RX_URB, /* firmware bug */ }, + { USB_DEVICE(0x11ca, 0x0201), /* VeriFone Mx870 Gadget Serial */ + .driver_info = SINGLE_RX_URB, + }, { USB_DEVICE(0x22b8, 0x7000), /* Motorola Q Phone */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, From 9a24d3f8c566bfdb3bb2126055426c6ae952cdd5 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 13 Dec 2017 20:34:36 +0800 Subject: [PATCH 137/770] USB: serial: io_edgeport: fix possible sleep-in-atomic commit c7b8f77872c73f69a16528a9eb87afefcccdc18b upstream. According to drivers/usb/serial/io_edgeport.c, the driver may sleep under a spinlock. The function call path is: edge_bulk_in_callback (acquire the spinlock) process_rcvd_data process_rcvd_status change_port_settings send_iosp_ext_cmd write_cmd_usb usb_kill_urb --> may sleep To fix it, the redundant usb_kill_urb() is removed from the error path after usb_submit_urb() fails. This possible bug is found by my static analysis tool (DSAC) and checked by my code review. Signed-off-by: Jia-Ju Bai Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_edgeport.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index bdf8bd814a9a..01f3ac7769f3 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -2286,7 +2286,6 @@ static int write_cmd_usb(struct edgeport_port *edge_port, /* something went wrong */ dev_err(dev, "%s - usb_submit_urb(write command) failed, status = %d\n", __func__, status); - usb_kill_urb(urb); usb_free_urb(urb); atomic_dec(&CmdUrbs); return status; From aa7cdae765cf383067cb08a854f0d5738c684a78 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 17 Jan 2018 12:07:30 -0700 Subject: [PATCH 138/770] usbip: prevent bind loops on devices attached to vhci_hcd commit ef54cf0c600fb8f5737fb001a9e357edda1a1de8 upstream. usbip host binds to devices attached to vhci_hcd on the same server when user does attach over localhost or specifies the server as the remote. usbip attach -r localhost -b busid or usbip attach -r servername (or server IP) Unbind followed by bind works, however device is left in a bad state with accesses via the attached busid result in errors and system hangs during shutdown. Fix it to check and bail out if the device is already attached to vhci_hcd. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbip_bind.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/usb/usbip/src/usbip_bind.c b/tools/usb/usbip/src/usbip_bind.c index fa46141ae68b..e121cfb1746a 100644 --- a/tools/usb/usbip/src/usbip_bind.c +++ b/tools/usb/usbip/src/usbip_bind.c @@ -144,6 +144,7 @@ static int bind_device(char *busid) int rc; struct udev *udev; struct udev_device *dev; + const char *devpath; /* Check whether the device with this bus ID exists. */ udev = udev_new(); @@ -152,8 +153,16 @@ static int bind_device(char *busid) err("device with the specified bus ID does not exist"); return -1; } + devpath = udev_device_get_devpath(dev); udev_unref(udev); + /* If the device is already attached to vhci_hcd - bail out */ + if (strstr(devpath, USBIP_VHCI_DRV_NAME)) { + err("bind loop detected: device: %s is attached to %s\n", + devpath, USBIP_VHCI_DRV_NAME); + return -1; + } + rc = unbind_other(busid); if (rc == UNBIND_ST_FAILED) { err("could not unbind driver from device on busid %s", busid); From d00a0442a99e859ff4b04741abd157fa3bb6b30e Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 17 Jan 2018 12:08:03 -0700 Subject: [PATCH 139/770] usbip: list: don't list devices attached to vhci_hcd commit ef824501f50846589f02173d73ce3fe6021a9d2a upstream. usbip host lists devices attached to vhci_hcd on the same server when user does attach over localhost or specifies the server as the remote. usbip attach -r localhost -b busid or usbip attach -r servername (or server IP) Fix it to check and not list devices that are attached to vhci_hcd. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbip_list.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/usb/usbip/src/usbip_list.c b/tools/usb/usbip/src/usbip_list.c index f1b38e866dd7..d65a9f444174 100644 --- a/tools/usb/usbip/src/usbip_list.c +++ b/tools/usb/usbip/src/usbip_list.c @@ -187,6 +187,7 @@ static int list_devices(bool parsable) const char *busid; char product_name[128]; int ret = -1; + const char *devpath; /* Create libudev context. */ udev = udev_new(); @@ -209,6 +210,14 @@ static int list_devices(bool parsable) path = udev_list_entry_get_name(dev_list_entry); dev = udev_device_new_from_syspath(udev, path); + /* Ignore devices attached to vhci_hcd */ + devpath = udev_device_get_devpath(dev); + if (strstr(devpath, USBIP_VHCI_DRV_NAME)) { + dbg("Skip the device %s already attached to %s\n", + devpath, USBIP_VHCI_DRV_NAME); + continue; + } + /* Get device information. */ idVendor = udev_device_get_sysattr_value(dev, "idVendor"); idProduct = udev_device_get_sysattr_value(dev, "idProduct"); From 1c226267c2259112a9825d09a8c715568c3d8d10 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 18 Jan 2018 14:46:41 +1100 Subject: [PATCH 140/770] USB: serial: simple: add Motorola Tetra driver commit 46fe895e22ab3845515ec06b01eaf1282b342e29 upstream. Add new Motorola Tetra (simple) driver for Motorola Solutions TETRA PEI devices. D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=0cad ProdID=9011 Rev=24.16 S: Manufacturer=Motorola Solutions Inc. S: Product=Motorola Solutions TETRA PEI interface C: #Ifs= 2 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) Note that these devices do not support the CDC SET_CONTROL_LINE_STATE request (for any interface). Reported-by: Max Schulze Tested-by: Max Schulze Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/Kconfig | 1 + drivers/usb/serial/usb-serial-simple.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig index a8d5f2e4878d..c66b93664d54 100644 --- a/drivers/usb/serial/Kconfig +++ b/drivers/usb/serial/Kconfig @@ -63,6 +63,7 @@ config USB_SERIAL_SIMPLE - Google USB serial devices - HP4x calculators - a number of Motorola phones + - Motorola Tetra devices - Novatel Wireless GPS receivers - Siemens USB/MPI adapter. - ViVOtech ViVOpay USB device. diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index e98b6e57b703..6aa7ff2c1cf7 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -80,6 +80,11 @@ DEVICE(vivopay, VIVOPAY_IDS); { USB_DEVICE(0x22b8, 0x2c64) } /* Motorola V950 phone */ DEVICE(moto_modem, MOTO_IDS); +/* Motorola Tetra driver */ +#define MOTOROLA_TETRA_IDS() \ + { USB_DEVICE(0x0cad, 0x9011) } /* Motorola Solutions TETRA PEI */ +DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); + /* Novatel Wireless GPS driver */ #define NOVATEL_IDS() \ { USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */ @@ -110,6 +115,7 @@ static struct usb_serial_driver * const serial_drivers[] = { &google_device, &vivopay_device, &moto_modem_device, + &motorola_tetra_device, &novatel_gps_device, &hp4x_device, &suunto_device, @@ -125,6 +131,7 @@ static const struct usb_device_id id_table[] = { GOOGLE_IDS(), VIVOPAY_IDS(), MOTO_IDS(), + MOTOROLA_TETRA_IDS(), NOVATEL_IDS(), HP4X_IDS(), SUUNTO_IDS(), From 75816a439f6589b8f14d10c4b5f8c835ad76e036 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Tue, 9 Jan 2018 12:30:53 +0530 Subject: [PATCH 141/770] usb: f_fs: Prevent gadget unbind if it is already unbound commit ce5bf9a50daf2d9078b505aca1cea22e88ecb94a upstream. Upon usb composition switch there is possibility of ep0 file release happening after gadget driver bind. In case of composition switch from adb to a non-adb composition gadget will never gets bound again resulting into failure of usb device enumeration. Fix this issue by checking FFS_FL_BOUND flag and avoid extra gadget driver unbind if it is already done as part of composition switch. This fixes adb reconnection error reported on Android running v4.4 and above kernel versions. Verified on Hikey running vanilla v4.15-rc7 + few out of tree Mali patches. Reviewed-at: https://android-review.googlesource.com/#/c/582632/ Cc: Felipe Balbi Cc: Greg KH Cc: Michal Nazarewicz Cc: John Stultz Cc: Dmitry Shmidt Cc: Badhri Cc: Android Kernel Team Signed-off-by: Hemant Kumar [AmitP: Cherry-picked it from android-4.14 and updated the commit log] Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 876cdbec1307..c0491dd73f53 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3704,7 +3704,8 @@ static void ffs_closed(struct ffs_data *ffs) ci = opts->func_inst.group.cg_item.ci_parent->ci_parent; ffs_dev_unlock(); - unregister_gadget_item(ci); + if (test_bit(FFS_FL_BOUND, &ffs->flags)) + unregister_gadget_item(ci); return; done: ffs_dev_unlock(); From e0096f93aa42d47f0277e28c3b26eaa5e1ebadd5 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 11 Jan 2018 13:10:16 +0100 Subject: [PATCH 142/770] usb: uas: unconditionally bring back host after reset commit cbeef22fd611c4f47c494b821b2b105b8af970bb upstream. Quoting Hans: If we return 1 from our post_reset handler, then our disconnect handler will be called immediately afterwards. Since pre_reset blocks all scsi requests our disconnect handler will then hang in the scsi_remove_host call. This is esp. bad because our disconnect handler hanging for ever also stops the USB subsys from enumerating any new USB devices, causes commands like lsusb to hang, etc. In practice this happens when unplugging some uas devices because the hub code may see the device as needing a warm-reset and calls usb_reset_device before seeing the disconnect. In this case uas_configure_endpoints fails with -ENODEV. We do not want to print an error for this, so this commit also silences the shost_printk for -ENODEV. ENDQUOTE However, if we do that we better drop any unconditional execution and report to the SCSI subsystem that we have undergone a reset but we are not operational now. Signed-off-by: Oliver Neukum Reported-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 63cf981ed81c..0bc8543e96b1 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -1076,20 +1076,19 @@ static int uas_post_reset(struct usb_interface *intf) return 0; err = uas_configure_endpoints(devinfo); - if (err) { + if (err && err != ENODEV) shost_printk(KERN_ERR, shost, "%s: alloc streams error %d after reset", __func__, err); - return 1; - } + /* we must unblock the host in every case lest we deadlock */ spin_lock_irqsave(shost->host_lock, flags); scsi_report_bus_reset(shost, 0); spin_unlock_irqrestore(shost->host_lock, flags); scsi_unblock_requests(shost); - return 0; + return err ? 1 : 0; } static int uas_suspend(struct usb_interface *intf, pm_message_t message) From 2ba5966eddefdf04306bd1da36811c8b83a9664d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 12 Jan 2018 17:50:02 +1100 Subject: [PATCH 143/770] usb/gadget: Fix "high bandwidth" check in usb_gadget_ep_match_desc() commit 11fb37998759c48e4e4c200c974593cbeab25d3e upstream. The current code tries to test for bits that are masked out by usb_endpoint_maxp(). Instead, use the proper accessor to access the new high bandwidth bits. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 284bd1a7b570..794bb4958383 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -923,7 +923,7 @@ int usb_gadget_ep_match_desc(struct usb_gadget *gadget, return 0; /* "high bandwidth" works only at high speed */ - if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp(desc) & (3<<11)) + if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp_mult(desc) > 1) return 0; switch (type) { From 7a3cee43e935b9d526ad07f20bf005ba7e74d05b Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 5 Jan 2018 11:27:07 +0100 Subject: [PATCH 144/770] ANDROID: binder: remove waitqueue when thread exits. commit f5cb779ba16334b45ba8946d6bfa6d9834d1527f upstream. binder_poll() passes the thread->wait waitqueue that can be slept on for work. When a thread that uses epoll explicitly exits using BINDER_THREAD_EXIT, the waitqueue is freed, but it is never removed from the corresponding epoll data structure. When the process subsequently exits, the epoll cleanup code tries to access the waitlist, which results in a use-after-free. Prevent this by using POLLFREE when the thread exits. Signed-off-by: Martijn Coenen Reported-by: syzbot Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index a340766b51fe..2ef8bd29e188 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4302,6 +4302,18 @@ static int binder_thread_release(struct binder_proc *proc, if (t) spin_lock(&t->lock); } + + /* + * If this thread used poll, make sure we remove the waitqueue + * from any epoll data structures holding it with POLLFREE. + * waitqueue_active() is safe to use here because we're holding + * the inner lock. + */ + if ((thread->looper & BINDER_LOOPER_STATE_POLL) && + waitqueue_active(&thread->wait)) { + wake_up_poll(&thread->wait, POLLHUP | POLLFREE); + } + binder_inner_proc_unlock(thread->proc); if (send_reply) From 86eda3864a704a98d5444ae60654b898731bfea5 Mon Sep 17 00:00:00 2001 From: Ganesh Mahendran Date: Wed, 10 Jan 2018 10:49:05 +0800 Subject: [PATCH 145/770] android: binder: use VM_ALLOC to get vm area commit aac6830ec1cb681544212838911cdc57f2638216 upstream. VM_IOREMAP is used to access hardware through a mechanism called I/O mapped memory. Android binder is a IPC machanism which will not access I/O memory. And VM_IOREMAP has alignment requiement which may not needed in binder. __get_vm_area_node() { ... if (flags & VM_IOREMAP) align = 1ul << clamp_t(int, fls_long(size), PAGE_SHIFT, IOREMAP_MAX_ORDER); ... } This patch will save some kernel vm area, especially for 32bit os. In 32bit OS, kernel vm area is only 240MB. We may got below error when launching a app: <3>[ 4482.440053] binder_alloc: binder_alloc_mmap_handler: 15728 8ce67000-8cf65000 get_vm_area failed -12 <3>[ 4483.218817] binder_alloc: binder_alloc_mmap_handler: 15745 8ce67000-8cf65000 get_vm_area failed -12 Signed-off-by: Ganesh Mahendran Acked-by: Martijn Coenen Acked-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index c2819a3d58a6..6cb148268676 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -668,7 +668,7 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, goto err_already_mapped; } - area = get_vm_area(vma->vm_end - vma->vm_start, VM_IOREMAP); + area = get_vm_area(vma->vm_end - vma->vm_start, VM_ALLOC); if (area == NULL) { ret = -ENOMEM; failure_string = "get_vm_area"; From e00c5c771877abdb70814ce555f020deab42bd46 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 2 Jan 2018 12:01:41 +0200 Subject: [PATCH 146/770] mei: me: allow runtime pm for platform with D0i3 commit cc365dcf0e56271bedf3de95f88922abe248e951 upstream. >From the pci power documentation: "The driver itself should not call pm_runtime_allow(), though. Instead, it should let user space or some platform-specific code do that (user space can do it via sysfs as stated above)..." However, the S0ix residency cannot be reached without MEI device getting into low power state. Hence, for mei devices that support D0i3, it's better to make runtime power management mandatory and not rely on the system integration such as udev rules. This policy cannot be applied globally as some older platforms were found to have broken power management. Cc: Rafael J. Wysocki Signed-off-by: Tomas Winkler Reviewed-by: Alexander Usyskin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/pci-me.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 78b3172c8e6e..d46cb1f0868f 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -238,8 +238,11 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) */ mei_me_set_pm_domain(dev); - if (mei_pg_is_enabled(dev)) + if (mei_pg_is_enabled(dev)) { pm_runtime_put_noidle(&pdev->dev); + if (hw->d0i3_supported) + pm_runtime_allow(&pdev->dev); + } dev_dbg(&pdev->dev, "initialization successful.\n"); From aa33208b5a6f0014d9b7d966f61bd6c4f8cd9729 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 27 Dec 2017 14:21:05 +0900 Subject: [PATCH 147/770] serial: 8250_of: fix return code when probe function fails to get reset commit b9820a31691b771db37afe2054dd3d3a680c1eed upstream. The error pointer from devm_reset_control_get_optional_shared() is not propagated. One of the most common problem scenarios is it returns -EPROBE_DEFER when the reset controller has not probed yet. In this case, the probe of the reset consumer should be deferred. Fixes: e2860e1f62f2 ("serial: 8250_of: Add reset support") Signed-off-by: Masahiro Yamada Reviewed-by: Philipp Zabel Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_of.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index 1222c005fb98..951680640ad5 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -141,8 +141,11 @@ static int of_platform_serial_setup(struct platform_device *ofdev, } info->rst = devm_reset_control_get_optional_shared(&ofdev->dev, NULL); - if (IS_ERR(info->rst)) + if (IS_ERR(info->rst)) { + ret = PTR_ERR(info->rst); goto err_dispose; + } + ret = reset_control_deassert(info->rst); if (ret) goto err_dispose; From 24293a3970e11889cc123d3f0cf9bb9bbb2cf34f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 4 Jan 2018 07:42:15 +0000 Subject: [PATCH 148/770] serial: 8250_uniphier: fix error return code in uniphier_uart_probe() commit 7defa77d2baca4d6eb85234f10f38ab618332e75 upstream. Fix to return a negative error code from the port register error handling case instead of 0, as done elsewhere in this function. Fixes: 39be40ce066d ("serial: 8250_uniphier: fix serial port index in private data") Signed-off-by: Wei Yongjun Acked-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_uniphier.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_uniphier.c b/drivers/tty/serial/8250/8250_uniphier.c index 8a10b10e27aa..c206f173f912 100644 --- a/drivers/tty/serial/8250/8250_uniphier.c +++ b/drivers/tty/serial/8250/8250_uniphier.c @@ -259,12 +259,13 @@ static int uniphier_uart_probe(struct platform_device *pdev) up.dl_read = uniphier_serial_dl_read; up.dl_write = uniphier_serial_dl_write; - priv->line = serial8250_register_8250_port(&up); - if (priv->line < 0) { + ret = serial8250_register_8250_port(&up); + if (ret < 0) { dev_err(dev, "failed to register 8250 port\n"); clk_disable_unprepare(priv->clk); return ret; } + priv->line = ret; platform_set_drvdata(pdev, priv); From e6e7d6baa61a57bf61d0d3647a5037ad1013f4db Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 4 Jan 2018 15:58:34 -0200 Subject: [PATCH 149/770] serial: imx: Only wakeup via RTSDEN bit if the system has RTS/CTS commit 38b1f0fb42f772b8c9aac53593883a18ff5eb9d7 upstream. The wakeup mechanism via RTSDEN bit relies on the system using the RTS/CTS lines, so only allow such wakeup method when the system actually has RTS/CTS support. Fixes: bc85734b126f ("serial: imx: allow waking up on RTSD") Signed-off-by: Fabio Estevam Reviewed-by: Martin Kaiser Acked-by: Fugang Duan Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 3657d745e90f..521500c575c8 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -2275,12 +2275,14 @@ static void serial_imx_enable_wakeup(struct imx_port *sport, bool on) val &= ~UCR3_AWAKEN; writel(val, sport->port.membase + UCR3); - val = readl(sport->port.membase + UCR1); - if (on) - val |= UCR1_RTSDEN; - else - val &= ~UCR1_RTSDEN; - writel(val, sport->port.membase + UCR1); + if (sport->have_rtscts) { + val = readl(sport->port.membase + UCR1); + if (on) + val |= UCR1_RTSDEN; + else + val &= ~UCR1_RTSDEN; + writel(val, sport->port.membase + UCR1); + } } static int imx_serial_port_suspend_noirq(struct device *dev) From cfd96cbd607ab5d63a33cd63673221f4d572ea8c Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 7 Jan 2018 15:05:49 +0100 Subject: [PATCH 150/770] spi: imx: do not access registers while clocks disabled commit d593574aff0ab846136190b1729c151c736727ec upstream. Since clocks are disabled except during message transfer clocks are also disabled when spi_imx_remove gets called. Accessing registers leads to a freeeze at least on a i.MX 6ULL. Enable clocks before disabling accessing the MXC_CSPICTRL register. Fixes: 9e556dcc55774 ("spi: spi-imx: only enable the clocks when we start to transfer a message") Signed-off-by: Stefan Agner Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-imx.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index babb15f07995..d51ca243a028 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1496,12 +1496,23 @@ static int spi_imx_remove(struct platform_device *pdev) { struct spi_master *master = platform_get_drvdata(pdev); struct spi_imx_data *spi_imx = spi_master_get_devdata(master); + int ret; spi_bitbang_stop(&spi_imx->bitbang); + ret = clk_enable(spi_imx->clk_per); + if (ret) + return ret; + + ret = clk_enable(spi_imx->clk_ipg); + if (ret) { + clk_disable(spi_imx->clk_per); + return ret; + } + writel(0, spi_imx->base + MXC_CSPICTRL); - clk_unprepare(spi_imx->clk_ipg); - clk_unprepare(spi_imx->clk_per); + clk_disable_unprepare(spi_imx->clk_ipg); + clk_disable_unprepare(spi_imx->clk_per); spi_imx_sdma_exit(spi_imx); spi_master_put(master); From e2d4cdb7b8a4b2f8d4dfc2ae067229595ec0190c Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Fri, 5 Jan 2018 15:34:54 +0100 Subject: [PATCH 151/770] iio: adc: stm32: fix scan of multiple channels with DMA commit 04e491ca9df60ffe8637d00d68e5ab8bc73b30d5 upstream. By default, watermark is set to '1'. Watermark is used to fine tune cyclic dma buffer period. In case watermark is left untouched (e.g. 1) and several channels are being scanned, buffer period is wrongly set (e.g. to 1 sample). As a consequence, data is never pushed to upper layer. Fix buffer period size, by taking scan channels number into account. Fixes: 2763ea0585c9 ("iio: adc: stm32: add optional dma support") Signed-off-by: Fabrice Gasnier Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/stm32-adc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 4df32cf1650e..172753b14a4f 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -1314,6 +1314,7 @@ static int stm32_adc_set_watermark(struct iio_dev *indio_dev, unsigned int val) { struct stm32_adc *adc = iio_priv(indio_dev); unsigned int watermark = STM32_DMA_BUFFER_SIZE / 2; + unsigned int rx_buf_sz = STM32_DMA_BUFFER_SIZE; /* * dma cyclic transfers are used, buffer is split into two periods. @@ -1322,7 +1323,7 @@ static int stm32_adc_set_watermark(struct iio_dev *indio_dev, unsigned int val) * - one buffer (period) driver can push with iio_trigger_poll(). */ watermark = min(watermark, val * (unsigned)(sizeof(u16))); - adc->rx_buf_sz = watermark * 2; + adc->rx_buf_sz = min(rx_buf_sz, watermark * 2 * adc->num_conv); return 0; } From 0db5de4f4e1884c0ceb246bbcf450fbfd01881f7 Mon Sep 17 00:00:00 2001 From: Narcisa Ana Maria Vasile Date: Wed, 6 Dec 2017 18:57:58 +0200 Subject: [PATCH 152/770] iio: chemical: ccs811: Fix output of IIO_CONCENTRATION channels commit 8f114acd4e1a9cfa05b70bcc4219bc88197b5c9b upstream. in_concentration_raw should report, according to sysfs-bus-iio documentation, a "Raw (unscaled no offset etc.) percentage reading of a substance." Modify scale to convert from ppm/ppb to percentage: 1 ppm = 0.0001% 1 ppb = 0.0000001% There is no offset needed to convert the ppm/ppb to percentage, so remove offset from IIO_CONCENTRATION (IIO_MOD_CO2) channel. Cc'd stable to reduce chance of userspace breakage in the long run as we fix this wrong bit of ABI usage. Signed-off-by: Narcisa Ana Maria Vasile Reviewed-by: Matt Ranostay Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/chemical/ccs811.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/iio/chemical/ccs811.c b/drivers/iio/chemical/ccs811.c index 840a6cbd5f0f..8cfac6d1cec4 100644 --- a/drivers/iio/chemical/ccs811.c +++ b/drivers/iio/chemical/ccs811.c @@ -91,7 +91,6 @@ static const struct iio_chan_spec ccs811_channels[] = { .channel2 = IIO_MOD_CO2, .modified = 1, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | - BIT(IIO_CHAN_INFO_OFFSET) | BIT(IIO_CHAN_INFO_SCALE), .scan_index = 0, .scan_type = { @@ -245,24 +244,18 @@ static int ccs811_read_raw(struct iio_dev *indio_dev, switch (chan->channel2) { case IIO_MOD_CO2: *val = 0; - *val2 = 12834; + *val2 = 100; return IIO_VAL_INT_PLUS_MICRO; case IIO_MOD_VOC: *val = 0; - *val2 = 84246; - return IIO_VAL_INT_PLUS_MICRO; + *val2 = 100; + return IIO_VAL_INT_PLUS_NANO; default: return -EINVAL; } default: return -EINVAL; } - case IIO_CHAN_INFO_OFFSET: - if (!(chan->type == IIO_CONCENTRATION && - chan->channel2 == IIO_MOD_CO2)) - return -EINVAL; - *val = -400; - return IIO_VAL_INT; default: return -EINVAL; } From aad757b657176409e803b487bb76becf729a4a58 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 11 Jan 2018 11:12:55 +0000 Subject: [PATCH 153/770] test_firmware: fix missing unlock on error in config_num_requests_store() commit a5e1923356505e46476c2fb518559b7a4d9d25b1 upstream. Add the missing unlock before return from function config_num_requests_store() in the error handling case. Fixes: c92316bf8e94 ("test_firmware: add batched firmware tests") Signed-off-by: Wei Yongjun Signed-off-by: Greg Kroah-Hartman --- lib/test_firmware.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 64a4c76cba2b..e7008688769b 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -371,6 +371,7 @@ static ssize_t config_num_requests_store(struct device *dev, if (test_fw_config->reqs) { pr_err("Must call release_all_firmware prior to changing config\n"); rc = -EINVAL; + mutex_unlock(&test_fw_mutex); goto out; } mutex_unlock(&test_fw_mutex); From fced3c99e7264237aeba0b420e892cfa7bb36b9c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 17 Jan 2018 15:46:18 -0800 Subject: [PATCH 154/770] Input: synaptics-rmi4 - unmask F03 interrupts when port is opened commit 6abe534f0776d2437c8302f58d8eb5abd483e926 upstream. Currently we register the pass-through serio port when we probe the F03 RMI function, and then, in sensor configure phase, we unmask interrupts. Unfortunately this is too late, as other drivers are free probe devices attached to the serio port as soon as it is probed. Because interrupts are masked, the IO times out, which may result in not being able to detect trackpoints on the pass-through port. To fix the issue we implement open() and close() methods for the pass-through serio port and unmask interrupts from there. We also move creation of the pass-through port form probe to configure stage, as RMI driver does not enable transport interrupt until all functions are probed (we should change this, but this is a separate topic). We also try to clear the pending data before unmasking interrupts, because some devices like to spam the system with multiple 0xaa 0x00 announcements, which may interfere with us trying to query ID of the device. Fixes: c5e8848fc98e ("Input: synaptics-rmi4 - add support for F03") Reviewed-by: Lyude Paul Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_f03.c | 64 ++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/drivers/input/rmi4/rmi_f03.c b/drivers/input/rmi4/rmi_f03.c index ad71a5e768dc..7ccbb370a9a8 100644 --- a/drivers/input/rmi4/rmi_f03.c +++ b/drivers/input/rmi4/rmi_f03.c @@ -32,6 +32,7 @@ struct f03_data { struct rmi_function *fn; struct serio *serio; + bool serio_registered; unsigned int overwrite_buttons; @@ -138,6 +139,37 @@ static int rmi_f03_initialize(struct f03_data *f03) return 0; } +static int rmi_f03_pt_open(struct serio *serio) +{ + struct f03_data *f03 = serio->port_data; + struct rmi_function *fn = f03->fn; + const u8 ob_len = f03->rx_queue_length * RMI_F03_OB_SIZE; + const u16 data_addr = fn->fd.data_base_addr + RMI_F03_OB_OFFSET; + u8 obs[RMI_F03_QUEUE_LENGTH * RMI_F03_OB_SIZE]; + int error; + + /* + * Consume any pending data. Some devices like to spam with + * 0xaa 0x00 announcements which may confuse us as we try to + * probe the device. + */ + error = rmi_read_block(fn->rmi_dev, data_addr, &obs, ob_len); + if (!error) + rmi_dbg(RMI_DEBUG_FN, &fn->dev, + "%s: Consumed %*ph (%d) from PS2 guest\n", + __func__, ob_len, obs, ob_len); + + return fn->rmi_dev->driver->set_irq_bits(fn->rmi_dev, fn->irq_mask); +} + +static void rmi_f03_pt_close(struct serio *serio) +{ + struct f03_data *f03 = serio->port_data; + struct rmi_function *fn = f03->fn; + + fn->rmi_dev->driver->clear_irq_bits(fn->rmi_dev, fn->irq_mask); +} + static int rmi_f03_register_pt(struct f03_data *f03) { struct serio *serio; @@ -148,6 +180,8 @@ static int rmi_f03_register_pt(struct f03_data *f03) serio->id.type = SERIO_PS_PSTHRU; serio->write = rmi_f03_pt_write; + serio->open = rmi_f03_pt_open; + serio->close = rmi_f03_pt_close; serio->port_data = f03; strlcpy(serio->name, "Synaptics RMI4 PS/2 pass-through", @@ -184,17 +218,27 @@ static int rmi_f03_probe(struct rmi_function *fn) f03->device_count); dev_set_drvdata(dev, f03); - - error = rmi_f03_register_pt(f03); - if (error) - return error; - return 0; } static int rmi_f03_config(struct rmi_function *fn) { - fn->rmi_dev->driver->set_irq_bits(fn->rmi_dev, fn->irq_mask); + struct f03_data *f03 = dev_get_drvdata(&fn->dev); + int error; + + if (!f03->serio_registered) { + error = rmi_f03_register_pt(f03); + if (error) + return error; + + f03->serio_registered = true; + } else { + /* + * We must be re-configuring the sensor, just enable + * interrupts for this function. + */ + fn->rmi_dev->driver->set_irq_bits(fn->rmi_dev, fn->irq_mask); + } return 0; } @@ -204,7 +248,7 @@ static int rmi_f03_attention(struct rmi_function *fn, unsigned long *irq_bits) struct rmi_device *rmi_dev = fn->rmi_dev; struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev); struct f03_data *f03 = dev_get_drvdata(&fn->dev); - u16 data_addr = fn->fd.data_base_addr; + const u16 data_addr = fn->fd.data_base_addr + RMI_F03_OB_OFFSET; const u8 ob_len = f03->rx_queue_length * RMI_F03_OB_SIZE; u8 obs[RMI_F03_QUEUE_LENGTH * RMI_F03_OB_SIZE]; u8 ob_status; @@ -226,8 +270,7 @@ static int rmi_f03_attention(struct rmi_function *fn, unsigned long *irq_bits) drvdata->attn_data.size -= ob_len; } else { /* Grab all of the data registers, and check them for data */ - error = rmi_read_block(fn->rmi_dev, data_addr + RMI_F03_OB_OFFSET, - &obs, ob_len); + error = rmi_read_block(fn->rmi_dev, data_addr, &obs, ob_len); if (error) { dev_err(&fn->dev, "%s: Failed to read F03 output buffers: %d\n", @@ -266,7 +309,8 @@ static void rmi_f03_remove(struct rmi_function *fn) { struct f03_data *f03 = dev_get_drvdata(&fn->dev); - serio_unregister_port(f03->serio); + if (f03->serio_registered) + serio_unregister_port(f03->serio); } struct rmi_function_handler rmi_f03_handler = { From 01ab9886ea7815d41a4ee435524c3fce127e7290 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 17 Jan 2018 16:18:27 -0800 Subject: [PATCH 155/770] Input: synaptics-rmi4 - do not delete interrupt memory too early commit a1ab69021a584d952e6548a44b93760547b1b6b5 upstream. We want to free memory reserved for interrupt mask handling only after we free functions, as function drivers might want to mask interrupts. This is needed for the followup patch to the F03 that would implement unmasking and masking interrupts from the serio pass-through port open() and close() methods. Reviewed-by: Lyude Paul Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_driver.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 141ea228aac6..f5954981e9ee 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -41,6 +41,13 @@ void rmi_free_function_list(struct rmi_device *rmi_dev) rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev, "Freeing function list\n"); + /* Doing it in the reverse order so F01 will be removed last */ + list_for_each_entry_safe_reverse(fn, tmp, + &data->function_list, node) { + list_del(&fn->node); + rmi_unregister_function(fn); + } + devm_kfree(&rmi_dev->dev, data->irq_memory); data->irq_memory = NULL; data->irq_status = NULL; @@ -50,13 +57,6 @@ void rmi_free_function_list(struct rmi_device *rmi_dev) data->f01_container = NULL; data->f34_container = NULL; - - /* Doing it in the reverse order so F01 will be removed last */ - list_for_each_entry_safe_reverse(fn, tmp, - &data->function_list, node) { - list_del(&fn->node); - rmi_unregister_function(fn); - } } static int reset_one_function(struct rmi_function *fn) From 04178b1709b6e98c929ae7ca2668c85334e886b2 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 16 Jan 2018 09:10:02 +0000 Subject: [PATCH 156/770] x86/efi: Clarify that reset attack mitigation needs appropriate userspace commit a5c03c31af2291f13689d11760c0b59fb70c9a5a upstream. Some distributions have turned on the reset attack mitigation feature, which is designed to force the platform to clear the contents of RAM if the machine is shut down uncleanly. However, in order for the platform to be able to determine whether the shutdown was clean or not, userspace has to be configured to clear the MemoryOverwriteRequest flag on shutdown - otherwise the firmware will end up clearing RAM on every reboot, which is unnecessarily time consuming. Add some additional clarity to the kconfig text to reduce the risk of systems being configured this way. Signed-off-by: Matthew Garrett Acked-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/Kconfig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 2b4c39fdfa91..86210f75d233 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -159,7 +159,10 @@ config RESET_ATTACK_MITIGATION using the TCG Platform Reset Attack Mitigation specification. This protects against an attacker forcibly rebooting the system while it still contains secrets in RAM, booting another OS and extracting the - secrets. + secrets. This should only be enabled when userland is configured to + clear the MemoryOverwriteRequest flag on clean shutdown after secrets + have been evicted, since otherwise it will trigger even on clean + reboots. endmenu From 0146985addc322ca518d27fecfe98ae5acda7c1e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 3 Feb 2018 17:39:25 +0100 Subject: [PATCH 157/770] Linux 4.14.17 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 90a4bffa8446..7ed993896dd5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 16 +SUBLEVEL = 17 EXTRAVERSION = NAME = Petit Gorille From 16d3d10d29f753d0247d94f5e889e93dd13f56bb Mon Sep 17 00:00:00 2001 From: "Liu, Changcheng" Date: Thu, 14 Dec 2017 15:32:48 -0800 Subject: [PATCH 158/770] scripts/faddr2line: fix CROSS_COMPILE unset error commit 4cc90b4cc3d4955f79eae4f7f9d64e67e17b468e upstream. faddr2line hit var unbound error when CROSS_COMPILE isn't set since nounset option is set in bash script. Link: http://lkml.kernel.org/r/20171206013022.GA83929@sofia Fixes: 95a879825419 ("scripts/faddr2line: extend usage on generic arch") Signed-off-by: Liu Changcheng Reported-by: Richard Weinberger Reviewed-by: Richard Weinberger Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Philippe Ombredanne Cc: NeilBrown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- scripts/faddr2line | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/faddr2line b/scripts/faddr2line index 39e07d8574dd..7721d5b2b0c0 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -44,10 +44,10 @@ set -o errexit set -o nounset -READELF="${CROSS_COMPILE}readelf" -ADDR2LINE="${CROSS_COMPILE}addr2line" -SIZE="${CROSS_COMPILE}size" -NM="${CROSS_COMPILE}nm" +READELF="${CROSS_COMPILE:-}readelf" +ADDR2LINE="${CROSS_COMPILE:-}addr2line" +SIZE="${CROSS_COMPILE:-}size" +NM="${CROSS_COMPILE:-}nm" command -v awk >/dev/null 2>&1 || die "awk isn't installed" command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed" From 517bdccc3af650316046801fdcd828d7b6aa42ba Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jan 2018 21:20:05 +1100 Subject: [PATCH 159/770] powerpc/64s: Wire up cpu_show_meltdown() commit fd6e440f20b1a4304553775fc55938848ff617c9 upstream. The recent commit 87590ce6e373 ("sysfs/cpu: Add vulnerability folder") added a generic folder and set of files for reporting information on CPU vulnerabilities. One of those was for meltdown: /sys/devices/system/cpu/vulnerabilities/meltdown This commit wires up that file for 64-bit Book3S powerpc. For now we default to "Vulnerable" unless the RFI flush is enabled. That may not actually be true on all hardware, further patches will refine the reporting based on the CPU/platform etc. But for now we default to being pessimists. Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/setup_64.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index cb782ac1c35d..fe418226df7f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -164,6 +164,7 @@ config PPC select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_SMP_IDLE_THREAD diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 935059cb9e40..4fac5cca19e1 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -884,4 +884,12 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) if (!no_rfi_flush) rfi_flush_enable(enable); } + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (rfi_flush) + return sprintf(buf, "Mitigation: RFI Flush\n"); + + return sprintf(buf, "Vulnerable\n"); +} #endif /* CONFIG_PPC_BOOK3S_64 */ From d6eded6c94530cb2f359f1fbffcb5f92ee0fa8b2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jan 2018 22:17:18 +1100 Subject: [PATCH 160/770] powerpc/64s: Allow control of RFI flush via debugfs commit 236003e6b5443c45c18e613d2b0d776a9f87540e upstream. Expose the state of the RFI flush (enabled/disabled) via debugfs, and allow it to be enabled/disabled at runtime. eg: $ cat /sys/kernel/debug/powerpc/rfi_flush 1 $ echo 0 > /sys/kernel/debug/powerpc/rfi_flush $ cat /sys/kernel/debug/powerpc/rfi_flush 0 Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/setup_64.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4fac5cca19e1..9527a4c6cbc2 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -885,6 +886,35 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) rfi_flush_enable(enable); } +#ifdef CONFIG_DEBUG_FS +static int rfi_flush_set(void *data, u64 val) +{ + if (val == 1) + rfi_flush_enable(true); + else if (val == 0) + rfi_flush_enable(false); + else + return -EINVAL; + + return 0; +} + +static int rfi_flush_get(void *data, u64 *val) +{ + *val = rfi_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); + +static __init int rfi_flush_debugfs_init(void) +{ + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + return 0; +} +device_initcall(rfi_flush_debugfs_init); +#endif + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { if (rfi_flush) From 4ce354deed2cc18b5b2175cf487011e375e89c75 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 22 Jan 2018 17:09:34 -0500 Subject: [PATCH 161/770] x86/retpoline: Remove the esp/rsp thunk commit 1df37383a8aeabb9b418698f0bcdffea01f4b1b2 It doesn't make sense to have an indirect call thunk with esp/rsp as retpoline code won't work correctly with the stack pointer register. Removing it will help compiler writers to catch error in case such a thunk call is emitted incorrectly. Fixes: 76b043848fd2 ("x86/retpoline: Add initial retpoline support") Suggested-by: Jeff Law Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Tom Lendacky Cc: Kees Cook Cc: Andi Kleen Cc: Tim Chen Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Josh Poimboeuf Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1516658974-27852-1-git-send-email-longman@redhat.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm-prototypes.h | 1 - arch/x86/lib/retpoline.S | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 0927cdc4f946..1908214b9125 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -38,5 +38,4 @@ INDIRECT_THUNK(dx) INDIRECT_THUNK(si) INDIRECT_THUNK(di) INDIRECT_THUNK(bp) -INDIRECT_THUNK(sp) #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index dfb2ba91b670..c909961e678a 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -36,7 +36,6 @@ GENERATE_THUNK(_ASM_DX) GENERATE_THUNK(_ASM_SI) GENERATE_THUNK(_ASM_DI) GENERATE_THUNK(_ASM_BP) -GENERATE_THUNK(_ASM_SP) #ifdef CONFIG_64BIT GENERATE_THUNK(r8) GENERATE_THUNK(r9) From 76bee09efb89b0a71566bec5f36d9e65d9034aef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jan 2018 10:58:13 +0100 Subject: [PATCH 162/770] KVM: x86: Make indirect calls in emulator speculation safe commit 1a29b5b7f347a1a9230c1e0af5b37e3e571588ab Replace the indirect calls with CALL_NOSPEC. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: David Woodhouse Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Ashok Raj Cc: Greg KH Cc: Jun Nakajima Cc: David Woodhouse Cc: Linus Torvalds Cc: rga@amazon.de Cc: Dave Hansen Cc: Asit Mallick Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Jason Baron Cc: Paolo Bonzini Cc: Dan Williams Cc: Arjan Van De Ven Cc: Tim Chen Link: https://lkml.kernel.org/r/20180125095843.595615683@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index eca6a89f2326..fab073b19528 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "x86.h" #include "tss.h" @@ -1021,8 +1022,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; - asm("push %[flags]; popf; call *%[fastop]" - : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); + asm("push %[flags]; popf; " CALL_NOSPEC + : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); return rc; } @@ -5350,9 +5351,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) if (!(ctxt->d & ByteOp)) fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; - asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" + asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), - [fastop]"+S"(fop), ASM_CALL_CONSTRAINT + [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT : "c"(ctxt->src2.val)); ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); From c927726674c7dfe42e8de585b70ffbd9ec775fc6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jan 2018 10:58:14 +0100 Subject: [PATCH 163/770] KVM: VMX: Make indirect call speculation safe commit c940a3fb1e2e9b7d03228ab28f375fb5a47ff699 Replace indirect call with CALL_NOSPEC. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: David Woodhouse Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Ashok Raj Cc: Greg KH Cc: Jun Nakajima Cc: David Woodhouse Cc: Linus Torvalds Cc: rga@amazon.de Cc: Dave Hansen Cc: Asit Mallick Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Jason Baron Cc: Paolo Bonzini Cc: Dan Williams Cc: Arjan Van De Ven Cc: Tim Chen Link: https://lkml.kernel.org/r/20180125095843.645776917@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a45063a9219c..213c092f2a9f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9118,14 +9118,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) #endif "pushf\n\t" __ASM_SIZE(push) " $%c[cs]\n\t" - "call *%[entry]\n\t" + CALL_NOSPEC : #ifdef CONFIG_X86_64 [sp]"=&r"(tmp), #endif ASM_CALL_CONSTRAINT : - [entry]"r"(entry), + THUNK_TARGET(entry), [ss]"i"(__KERNEL_DS), [cs]"i"(__KERNEL_CS) ); From 86b5b1eb18aa49eedff2c9a9087fc48d03099844 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 25 Jan 2018 15:50:28 -0800 Subject: [PATCH 164/770] module/retpoline: Warn about missing retpoline in module commit caf7501a1b4ec964190f31f9c3f163de252273b8 There's a risk that a kernel which has full retpoline mitigations becomes vulnerable when a module gets loaded that hasn't been compiled with the right compiler or the right option. To enable detection of that mismatch at module load time, add a module info string "retpoline" at build time when the module was compiled with retpoline support. This only covers compiled C source, but assembler source or prebuilt object files are not checked. If a retpoline enabled kernel detects a non retpoline protected module at load time, print a warning and report it in the sysfs vulnerability file. [ tglx: Massaged changelog ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: gregkh@linuxfoundation.org Cc: torvalds@linux-foundation.org Cc: jeyu@kernel.org Cc: arjan@linux.intel.com Link: https://lkml.kernel.org/r/20180125235028.31211-1-andi@firstfloor.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 17 ++++++++++++++++- include/linux/module.h | 9 +++++++++ kernel/module.c | 11 +++++++++++ scripts/mod/modpost.c | 9 +++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 390b3dc3d438..4a39d7bb4bd8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -93,6 +94,19 @@ static const char *spectre_v2_strings[] = { #define pr_fmt(fmt) "Spectre V2 mitigation: " fmt static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; +static bool spectre_v2_bad_module; + +#ifdef RETPOLINE +bool retpoline_module_ok(bool has_retpoline) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) + return true; + + pr_err("System may be vunerable to spectre v2\n"); + spectre_v2_bad_module = true; + return false; +} +#endif static void __init spec2_print_if_insecure(const char *reason) { @@ -278,6 +292,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); + return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], + spectre_v2_bad_module ? " - vulnerable module loaded" : ""); } #endif diff --git a/include/linux/module.h b/include/linux/module.h index fe5aa3736707..b1cc541f2ddf 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -794,6 +794,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ +#ifdef RETPOLINE +extern bool retpoline_module_ok(bool has_retpoline); +#else +static inline bool retpoline_module_ok(bool has_retpoline) +{ + return true; +} +#endif + #ifdef CONFIG_MODULE_SIG static inline bool module_sig_ok(struct module *module) { diff --git a/kernel/module.c b/kernel/module.c index de66ec825992..690c0651c40f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2855,6 +2855,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info) } #endif /* CONFIG_LIVEPATCH */ +static void check_modinfo_retpoline(struct module *mod, struct load_info *info) +{ + if (retpoline_module_ok(get_modinfo(info, "retpoline"))) + return; + + pr_warn("%s: loading module not compiled with retpoline compiler.\n", + mod->name); +} + /* Sets info->hdr and info->len. */ static int copy_module_from_user(const void __user *umod, unsigned long len, struct load_info *info) @@ -3021,6 +3030,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); } + check_modinfo_retpoline(mod, info); + if (get_modinfo(info, "staging")) { add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); pr_warn("%s: module is from the staging directory, the quality " diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 98314b400a95..54deaa1066cf 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2165,6 +2165,14 @@ static void add_intree_flag(struct buffer *b, int is_intree) buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n"); } +/* Cannot check for assembler */ +static void add_retpoline(struct buffer *b) +{ + buf_printf(b, "\n#ifdef RETPOLINE\n"); + buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); + buf_printf(b, "#endif\n"); +} + static void add_staging_flag(struct buffer *b, const char *name) { static const char *staging_dir = "drivers/staging"; @@ -2506,6 +2514,7 @@ int main(int argc, char **argv) err |= check_modname_len(mod); add_header(&buf, mod); add_intree_flag(&buf, !external_module); + add_retpoline(&buf); add_staging_flag(&buf, mod->name); err |= add_versions(&buf, mod); add_depends(&buf, mod, modules); From 76c4bd53969bcbaf19fe12218785411b390fd14b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:09 +0000 Subject: [PATCH 165/770] x86/cpufeatures: Add CPUID_7_EDX CPUID leaf commit 95ca0ee8636059ea2800dfbac9ecac6212d6b38f This is a pure feature bits leaf. There are two AVX512 feature bits in it already which were handled as scattered bits, and three more from this leaf are going to be added for speculation control features. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeature.h | 7 +++++-- arch/x86/include/asm/cpufeatures.h | 8 +++++--- arch/x86/include/asm/disabled-features.h | 3 ++- arch/x86/include/asm/required-features.h | 3 ++- arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/cpu/scattered.c | 2 -- 6 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ea9a7dde62e5..70eddb3922ff 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -29,6 +29,7 @@ enum cpuid_leafs CPUID_8000_000A_EDX, CPUID_7_ECX, CPUID_8000_0007_EBX, + CPUID_7_EDX, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -79,8 +80,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -101,8 +103,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 25b9375c1484..7b25cf30d25d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 18 /* N 32-bit words worth of info */ +#define NCAPINTS 19 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -206,8 +206,6 @@ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ @@ -319,6 +317,10 @@ #define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ #define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index e428e16dd822..c6a3af198294 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -71,6 +71,7 @@ #define DISABLED_MASK15 0 #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57) #define DISABLED_MASK17 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) +#define DISABLED_MASK18 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index d91ba04dd007..fb3a6de7440b 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -106,6 +106,7 @@ #define REQUIRED_MASK15 0 #define REQUIRED_MASK16 (NEED_LA57) #define REQUIRED_MASK17 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) +#define REQUIRED_MASK18 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 372ba3fb400f..e5d66e93ed81 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -745,6 +745,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); c->x86_capability[CPUID_7_0_EBX] = ebx; c->x86_capability[CPUID_7_ECX] = ecx; + c->x86_capability[CPUID_7_EDX] = edx; } /* Extended state features: level 0x0000000d */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index d0e69769abfd..df11f5d604be 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -21,8 +21,6 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, - { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, - { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, From 343c91242d092852ab22411780f886317d7001aa Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:10 +0000 Subject: [PATCH 166/770] x86/cpufeatures: Add Intel feature bits for Speculation Control commit fc67dd70adb711a45d2ef34e12d1a8be75edde61 Add three feature bits exposed by new microcode on Intel CPUs for speculation control. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-3-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 7b25cf30d25d..0a5107002716 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -320,6 +320,9 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ /* * BUG word(s) From 15ee82be40b6895edeb324435621ef1869aac173 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:11 +0000 Subject: [PATCH 167/770] x86/cpufeatures: Add AMD feature bits for Speculation Control commit 5d10cbc91d9eb5537998b65608441b592eec65e7 AMD exposes the PRED_CMD/SPEC_CTRL MSRs slightly differently to Intel. See http://lkml.kernel.org/r/2b3e25cc-286d-8bd0-aeaf-9ac4aae39de8@amd.com Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: Tom Lendacky Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-4-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0a5107002716..ae3212f14dec 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -269,6 +269,9 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ +#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */ +#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ From a65710dc584c1c424eb358a1f6e8c01d706e9f81 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:12 +0000 Subject: [PATCH 168/770] x86/msr: Add definitions for new speculation control MSRs commit 1e340c60d0dd3ae07b5bedc16a0469c14b9f3410 Add MSR and bit definitions for SPEC_CTRL, PRED_CMD and ARCH_CAPABILITIES. See Intel's 336996-Speculative-Execution-Side-Channel-Mitigations.pdf Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-5-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index fa11fb1fa570..eb83ff1bae8f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -39,6 +39,13 @@ /* Intel MSRs. Some also available on other CPUs */ +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ +#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ + +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ +#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ + #define MSR_PPIN_CTL 0x0000004e #define MSR_PPIN 0x0000004f @@ -57,6 +64,11 @@ #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) #define MSR_MTRRcap 0x000000fe + +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a +#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ + #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e From d9b47a41160c71b217582b9876129e3fbf587225 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:13 +0000 Subject: [PATCH 169/770] x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown commit fec9434a12f38d3aeafeb75711b71d8a1fdef621 Also, for CPUs which don't speculate at all, don't report that they're vulnerable to the Spectre variants either. Leave the cpu_no_meltdown[] match table with just X86_VENDOR_AMD in it for now, even though that could be done with a simple comparison, on the assumption that we'll have more to add. Based on suggestions from Dave Hansen and Alan Cox. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Acked-by: Dave Hansen Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-6-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 48 ++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e5d66e93ed81..970ee06dc8aa 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -47,6 +47,8 @@ #include #include #include +#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -853,6 +855,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } +static const __initdata struct x86_cpu_id cpu_no_speculation[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_CENTAUR, 5 }, + { X86_VENDOR_INTEL, 5 }, + { X86_VENDOR_NSC, 5 }, + { X86_VENDOR_ANY, 4 }, + {} +}; + +static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { + { X86_VENDOR_AMD }, + {} +}; + +static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) +{ + u64 ia32_cap = 0; + + if (x86_match_cpu(cpu_no_meltdown)) + return false; + + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + /* Rogue Data Cache Load? No! */ + if (ia32_cap & ARCH_CAP_RDCL_NO) + return false; + + return true; +} + /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -900,11 +937,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); - if (c->x86_vendor != X86_VENDOR_AMD) - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + if (!x86_match_cpu(cpu_no_speculation)) { + if (cpu_vulnerable_to_meltdown(c)) + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + } fpu__init_system(c); From 0fd222b19766ba28cf6e45b6ad461f1a303831bd Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:14 +0000 Subject: [PATCH 170/770] x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early Spectre v2 microcodes commit a5b2966364538a0e68c9fa29bc0a3a1651799035 This doesn't refuse to load the affected microcodes; it just refuses to use the Spectre v2 mitigation features if they're detected, by clearing the appropriate feature bits. The AMD CPUID bits are handled here too, because hypervisors *may* have been exposing those bits even on Intel chips, for fine-grained control of what's available. It is non-trivial to use x86_match_cpu() for this table because that doesn't handle steppings. And the approach taken in commit bd9240a18 almost made me lose my lunch. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-7-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel.c | 66 +++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b720dacac051..5faa487d0477 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -102,6 +102,59 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) ELF_HWCAP2 |= HWCAP2_RING3MWAIT; } +/* + * Early microcode releases for the Spectre v2 mitigation were broken. + * Information taken from; + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf + * - https://kb.vmware.com/s/article/52345 + * - Microcode revisions observed in the wild + * - Release note from 20180108 microcode release + */ +struct sku_microcode { + u8 model; + u8 stepping; + u32 microcode; +}; +static const struct sku_microcode spectre_bad_microcodes[] = { + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, + { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 }, + { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, + { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 }, + { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 }, + { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 }, + { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, + { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, + { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, + /* Updated in the 20180108 release; blacklist until we know otherwise */ + { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, + /* Observed in the wild */ + { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, + { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, +}; + +static bool bad_spectre_microcode(struct cpuinfo_x86 *c) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { + if (c->x86_model == spectre_bad_microcodes[i].model && + c->x86_mask == spectre_bad_microcodes[i].stepping) + return (c->microcode <= spectre_bad_microcodes[i].microcode); + } + return false; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -122,6 +175,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) c->microcode = intel_get_microcode_revision(); + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_STIBP) || + cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_AMD_PRED_CMD) || + cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) { + pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n"); + clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_STIBP); + clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD); + clear_cpu_cap(c, X86_FEATURE_AMD_STIBP); + } + /* * Atom erratum AAE44/AAF40/AAG38/AAH41: * From dbbbafce5380f2df32e142e5f08f17cf1b55b59c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:15 +0000 Subject: [PATCH 171/770] x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support commit 20ffa1caecca4db8f79fe665acdeaa5af815a24d Expose indirect_branch_prediction_barrier() for use in subsequent patches. [ tglx: Add IBPB status to spectre_v2 sysfs file ] Co-developed-by: KarimAllah Ahmed Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-8-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++ arch/x86/kernel/cpu/bugs.c | 10 +++++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ae3212f14dec..07934b2f8df2 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -210,6 +210,8 @@ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/ + /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 4ad41087ce0e..34e384c7208f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -218,5 +218,18 @@ static inline void vmexit_fill_RSB(void) #endif } +static inline void indirect_branch_prediction_barrier(void) +{ + asm volatile(ALTERNATIVE("", + "movl %[msr], %%ecx\n\t" + "movl %[val], %%eax\n\t" + "movl $0, %%edx\n\t" + "wrmsr", + X86_FEATURE_IBPB) + : : [msr] "i" (MSR_IA32_PRED_CMD), + [val] "i" (PRED_CMD_IBPB) + : "eax", "ecx", "edx", "memory"); +} + #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4a39d7bb4bd8..bac7a3558db2 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -263,6 +263,13 @@ retpoline_auto: setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Filling RSB on context switch\n"); } + + /* Initialize Indirect Branch Prediction Barrier if supported */ + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || + boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) { + setup_force_cpu_cap(X86_FEATURE_IBPB); + pr_info("Enabling Indirect Branch Prediction Barrier\n"); + } } #undef pr_fmt @@ -292,7 +299,8 @@ ssize_t cpu_show_spectre_v2(struct device *dev, if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "", spectre_v2_bad_module ? " - vulnerable module loaded" : ""); } #endif From 67eb59b8ecfb319438706cee2cb67a3045b54494 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 26 Jan 2018 13:11:36 +0100 Subject: [PATCH 172/770] x86/alternative: Print unadorned pointers commit 0e6c16c652cadaffd25a6bb326ec10da5bcec6b4 After commit ad67b74d2469 ("printk: hash addresses printed with %p") pointers are being hashed when printed. However, this makes the alternative debug output completely useless. Switch to %px in order to see the unadorned kernel pointers. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: riel@redhat.com Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: David Woodhouse Cc: jikos@kernel.org Cc: luto@amacapital.net Cc: dave.hansen@intel.com Cc: torvalds@linux-foundation.org Cc: keescook@google.com Cc: Josh Poimboeuf Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Cc: pjt@google.com Link: https://lkml.kernel.org/r/20180126121139.31959-2-bp@alien8.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/alternative.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index e0b97e4d1db5..14a52c7d23d4 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -298,7 +298,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) tgt_rip = next_rip + o_dspl; n_dspl = tgt_rip - orig_insn; - DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl); + DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl); if (tgt_rip - orig_insn >= 0) { if (n_dspl - 2 <= 127) @@ -355,7 +355,7 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins add_nops(instr + (a->instrlen - a->padlen), a->padlen); local_irq_restore(flags); - DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", + DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", instr, a->instrlen - a->padlen, a->padlen); } @@ -376,7 +376,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, u8 *instr, *replacement; u8 insnbuf[MAX_PATCH_LEN]; - DPRINTK("alt table %p -> %p", start, end); + DPRINTK("alt table %px, -> %px", start, end); /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. @@ -400,14 +400,14 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, continue; } - DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d", + DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d", a->cpuid >> 5, a->cpuid & 0x1f, instr, a->instrlen, replacement, a->replacementlen, a->padlen); - DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr); - DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement); + DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); + DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement); memcpy(insnbuf, replacement, a->replacementlen); insnbuf_sz = a->replacementlen; @@ -433,7 +433,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, a->instrlen - a->replacementlen); insnbuf_sz += a->instrlen - a->replacementlen; } - DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr); + DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr); text_poke_early(instr, insnbuf, insnbuf_sz); } From b955239cf4ea953eb5fca5409a5524daf20f642a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 26 Jan 2018 13:11:37 +0100 Subject: [PATCH 173/770] x86/nospec: Fix header guards names commit 7a32fc51ca938e67974cbb9db31e1a43f98345a9 ... to adhere to the _ASM_X86_ naming scheme. No functional change. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: riel@redhat.com Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: David Woodhouse Cc: jikos@kernel.org Cc: luto@amacapital.net Cc: dave.hansen@intel.com Cc: torvalds@linux-foundation.org Cc: keescook@google.com Cc: Josh Poimboeuf Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Cc: pjt@google.com Link: https://lkml.kernel.org/r/20180126121139.31959-3-bp@alien8.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 34e384c7208f..865192a2cc31 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NOSPEC_BRANCH_H__ -#define __NOSPEC_BRANCH_H__ +#ifndef _ASM_X86_NOSPEC_BRANCH_H_ +#define _ASM_X86_NOSPEC_BRANCH_H_ #include #include @@ -232,4 +232,4 @@ static inline void indirect_branch_prediction_barrier(void) } #endif /* __ASSEMBLY__ */ -#endif /* __NOSPEC_BRANCH_H__ */ +#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ From 91ff9a75f360eb89e913fcc4b3a7fbb602c67cbb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 26 Jan 2018 13:11:39 +0100 Subject: [PATCH 174/770] x86/bugs: Drop one "mitigation" from dmesg commit 55fa19d3e51f33d9cd4056d25836d93abf9438db Make [ 0.031118] Spectre V2 mitigation: Mitigation: Full generic retpoline into [ 0.031118] Spectre V2: Mitigation: Full generic retpoline to reduce the mitigation mitigations strings. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: riel@redhat.com Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: David Woodhouse Cc: jikos@kernel.org Cc: luto@amacapital.net Cc: dave.hansen@intel.com Cc: torvalds@linux-foundation.org Cc: keescook@google.com Cc: Josh Poimboeuf Cc: tim.c.chen@linux.intel.com Cc: pjt@google.com Link: https://lkml.kernel.org/r/20180126121139.31959-5-bp@alien8.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bac7a3558db2..c988a8acb0d5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -91,7 +91,7 @@ static const char *spectre_v2_strings[] = { }; #undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; static bool spectre_v2_bad_module; From 249b1f7a7f09c9c5243f8c592ea7fb13871968bc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 27 Jan 2018 15:45:14 +0100 Subject: [PATCH 175/770] x86/cpu/bugs: Make retpoline module warning conditional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e383095c7fe8d218e00ec0f83e4b95ed4e627b02 If sysfs is disabled and RETPOLINE not defined: arch/x86/kernel/cpu/bugs.c:97:13: warning: ‘spectre_v2_bad_module’ defined but not used [-Wunused-variable] static bool spectre_v2_bad_module; Hide it. Fixes: caf7501a1b4e ("module/retpoline: Warn about missing retpoline in module") Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Andi Kleen Cc: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c988a8acb0d5..b0b7157df89e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -94,9 +94,10 @@ static const char *spectre_v2_strings[] = { #define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; -static bool spectre_v2_bad_module; #ifdef RETPOLINE +static bool spectre_v2_bad_module; + bool retpoline_module_ok(bool has_retpoline) { if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) @@ -106,6 +107,13 @@ bool retpoline_module_ok(bool has_retpoline) spectre_v2_bad_module = true; return false; } + +static inline const char *spectre_v2_module_string(void) +{ + return spectre_v2_bad_module ? " - vulnerable module loaded" : ""; +} +#else +static inline const char *spectre_v2_module_string(void) { return ""; } #endif static void __init spec2_print_if_insecure(const char *reason) @@ -300,7 +308,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Not affected\n"); return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "", - spectre_v2_bad_module ? " - vulnerable module loaded" : ""); + boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "", + spectre_v2_module_string()); } #endif From 7f3e0daa9e125d3d4d7720136370303009edbdeb Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jan 2018 16:24:32 +0000 Subject: [PATCH 176/770] x86/cpufeatures: Clean up Spectre v2 related CPUID flags commit 2961298efe1ea1b6fc0d7ee8b76018fa6c0bcef2 We want to expose the hardware features simply in /proc/cpuinfo as "ibrs", "ibpb" and "stibp". Since AMD has separate CPUID bits for those, use them as the user-visible bits. When the Intel SPEC_CTRL bit is set which indicates both IBRS and IBPB capability, set those (AMD) bits accordingly. Likewise if the Intel STIBP bit is set, set the AMD STIBP that's used for the generic hardware capability. Hide the rest from /proc/cpuinfo by putting "" in the comments. Including RETPOLINE and RETPOLINE_AMD which shouldn't be visible there. There are patches to make the sysfs vulnerabilities information non-readable by non-root, and the same should apply to all information about which mitigations are actually in use. Those *shouldn't* appear in /proc/cpuinfo. The feature bit for whether IBPB is actually used, which is needed for ALTERNATIVEs, is renamed to X86_FEATURE_USE_IBPB. Originally-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1517070274-12128-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 18 ++++++++-------- arch/x86/include/asm/nospec-branch.h | 2 +- arch/x86/kernel/cpu/bugs.c | 7 +++---- arch/x86/kernel/cpu/intel.c | 31 +++++++++++++++++++--------- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 07934b2f8df2..73b5fff159a4 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -203,14 +203,14 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/ +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -271,9 +271,9 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */ -#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */ +#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -325,8 +325,8 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ -#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ /* diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 865192a2cc31..19ecb5446b30 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -225,7 +225,7 @@ static inline void indirect_branch_prediction_barrier(void) "movl %[val], %%eax\n\t" "movl $0, %%edx\n\t" "wrmsr", - X86_FEATURE_IBPB) + X86_FEATURE_USE_IBPB) : : [msr] "i" (MSR_IA32_PRED_CMD), [val] "i" (PRED_CMD_IBPB) : "eax", "ecx", "edx", "memory"); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b0b7157df89e..32d8e6cdc09e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -273,9 +273,8 @@ retpoline_auto: } /* Initialize Indirect Branch Prediction Barrier if supported */ - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || - boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) { - setup_force_cpu_cap(X86_FEATURE_IBPB); + if (boot_cpu_has(X86_FEATURE_IBPB)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); pr_info("Enabling Indirect Branch Prediction Barrier\n"); } } @@ -308,7 +307,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Not affected\n"); return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", spectre_v2_module_string()); } #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5faa487d0477..0c8b916abced 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -175,17 +175,28 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) c->microcode = intel_get_microcode_revision(); - if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || - cpu_has(c, X86_FEATURE_STIBP) || - cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) || - cpu_has(c, X86_FEATURE_AMD_PRED_CMD) || - cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) { - pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n"); - clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + /* + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, + * and they also have a different bit for STIBP support. Also, + * a hypervisor might have set the individual AMD bits even on + * Intel CPUs, for finer-grained selection of what's available. + */ + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_IBPB); + } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); + + /* Now if any of them are set, check the blacklist and clear the lot */ + if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || + cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { + pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); + clear_cpu_cap(c, X86_FEATURE_IBRS); + clear_cpu_cap(c, X86_FEATURE_IBPB); clear_cpu_cap(c, X86_FEATURE_STIBP); - clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL); - clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD); - clear_cpu_cap(c, X86_FEATURE_AMD_STIBP); + clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); } /* From 52d78bce4965e685eea60763076f688272f963ea Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 27 Jan 2018 16:24:33 +0000 Subject: [PATCH 177/770] x86/retpoline: Simplify vmexit_fill_RSB() commit 1dde7415e99933bb7293d6b2843752cbdb43ec11 Simplify it to call an asm-function instead of pasting 41 insn bytes at every call site. Also, add alignment to the macro as suggested here: https://support.google.com/faqs/answer/7625886 [dwmw2: Clean up comments, let it clobber %ebx and just tell the compiler] Signed-off-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1517070274-12128-3-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 3 +- arch/x86/entry/entry_64.S | 3 +- arch/x86/include/asm/asm-prototypes.h | 3 ++ arch/x86/include/asm/nospec-branch.h | 70 +++------------------------ arch/x86/lib/Makefile | 1 + arch/x86/lib/retpoline.S | 56 +++++++++++++++++++++ 6 files changed, 71 insertions(+), 65 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 60c4c342316c..2a35b1e0fb90 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -252,7 +252,8 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + /* Clobbers %ebx */ + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index be6b66464f6a..4527630cc21e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -495,7 +495,8 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + /* Clobbers %rbx */ + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 1908214b9125..4d111616524b 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -38,4 +38,7 @@ INDIRECT_THUNK(dx) INDIRECT_THUNK(si) INDIRECT_THUNK(di) INDIRECT_THUNK(bp) +asmlinkage void __fill_rsb(void); +asmlinkage void __clear_rsb(void); + #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 19ecb5446b30..df4ececa6ebf 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -7,50 +7,6 @@ #include #include -/* - * Fill the CPU return stack buffer. - * - * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; lfence; jmp' loop to capture speculative execution. - * - * This is required in various cases for retpoline and IBRS-based - * mitigations for the Spectre variant 2 vulnerability. Sometimes to - * eliminate potentially bogus entries from the RSB, and sometimes - * purely to ensure that it doesn't get empty, which on some CPUs would - * allow predictions from other (unwanted!) sources to be used. - * - * We define a CPP macro such that it can be used from both .S files and - * inline assembly. It's possible to do a .macro and then include that - * from C via asm(".include ") but let's not go there. - */ - -#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ - -/* - * Google experimented with loop-unrolling and this turned out to be - * the optimal version — two calls, each with their own speculation - * trap should their return address end up getting used, in a loop. - */ -#define __FILL_RETURN_BUFFER(reg, nr, sp) \ - mov $(nr/2), reg; \ -771: \ - call 772f; \ -773: /* speculation trap */ \ - pause; \ - lfence; \ - jmp 773b; \ -772: \ - call 774f; \ -775: /* speculation trap */ \ - pause; \ - lfence; \ - jmp 775b; \ -774: \ - dec reg; \ - jnz 771b; \ - add $(BITS_PER_LONG/8) * nr, sp; - #ifdef __ASSEMBLY__ /* @@ -121,17 +77,10 @@ #endif .endm - /* - * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP - * monstrosity above, manually. - */ -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +/* This clobbers the BX register */ +.macro FILL_RETURN_BUFFER nr:req ftr:req #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE "jmp .Lskip_rsb_\@", \ - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ - \ftr -.Lskip_rsb_\@: + ALTERNATIVE "", "call __clear_rsb", \ftr #endif .endm @@ -206,15 +155,10 @@ extern char __indirect_thunk_end[]; static inline void vmexit_fill_RSB(void) { #ifdef CONFIG_RETPOLINE - unsigned long loops; - - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE("jmp 910f", - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), - X86_FEATURE_RETPOLINE) - "910:" - : "=r" (loops), ASM_CALL_CONSTRAINT - : : "memory" ); + alternative_input("", + "call __fill_rsb", + X86_FEATURE_RETPOLINE, + ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); #endif } diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index d435c89875c1..d0a3170e6804 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -27,6 +27,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o lib-$(CONFIG_RETPOLINE) += retpoline.o +OBJECT_FILES_NON_STANDARD_retpoline.o :=y obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index c909961e678a..480edc3a5e03 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -7,6 +7,7 @@ #include #include #include +#include .macro THUNK reg .section .text.__x86.indirect_thunk @@ -46,3 +47,58 @@ GENERATE_THUNK(r13) GENERATE_THUNK(r14) GENERATE_THUNK(r15) #endif + +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; lfence; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * Google experimented with loop-unrolling and this turned out to be + * the optimal version - two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +.macro STUFF_RSB nr:req sp:req + mov $(\nr / 2), %_ASM_BX + .align 16 +771: + call 772f +773: /* speculation trap */ + pause + lfence + jmp 773b + .align 16 +772: + call 774f +775: /* speculation trap */ + pause + lfence + jmp 775b + .align 16 +774: + dec %_ASM_BX + jnz 771b + add $((BITS_PER_LONG/8) * \nr), \sp +.endm + +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +ENTRY(__fill_rsb) + STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP + ret +END(__fill_rsb) +EXPORT_SYMBOL_GPL(__fill_rsb) + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ + +ENTRY(__clear_rsb) + STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP + ret +END(__clear_rsb) +EXPORT_SYMBOL_GPL(__clear_rsb) From 9f8955f96bd520efc7def658ae61b67db20b1b46 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 27 Jan 2018 16:24:34 +0000 Subject: [PATCH 178/770] x86/speculation: Simplify indirect_branch_prediction_barrier() commit 64e16720ea0879f8ab4547e3b9758936d483909b Make it all a function which does the WRMSR instead of having a hairy inline asm. [dwmw2: export it, fix CONFIG_RETPOLINE issues] Signed-off-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1517070274-12128-4-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 13 ++++--------- arch/x86/include/asm/processor.h | 3 +++ arch/x86/kernel/cpu/bugs.c | 6 ++++++ 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index df4ececa6ebf..d15d471348b8 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -164,15 +164,10 @@ static inline void vmexit_fill_RSB(void) static inline void indirect_branch_prediction_barrier(void) { - asm volatile(ALTERNATIVE("", - "movl %[msr], %%ecx\n\t" - "movl %[val], %%eax\n\t" - "movl $0, %%edx\n\t" - "wrmsr", - X86_FEATURE_USE_IBPB) - : : [msr] "i" (MSR_IA32_PRED_CMD), - [val] "i" (PRED_CMD_IBPB) - : "eax", "ecx", "edx", "memory"); + alternative_input("", + "call __ibp_barrier", + X86_FEATURE_USE_IBPB, + ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory")); } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9c18da64daa9..881ca3b1d6d4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -970,4 +970,7 @@ bool xen_set_default_idle(void); void stop_this_cpu(void *dummy); void df_debug(struct pt_regs *regs, long error_code); + +void __ibp_barrier(void); + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 32d8e6cdc09e..3bfb2b23d79c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -311,3 +311,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, spectre_v2_module_string()); } #endif + +void __ibp_barrier(void) +{ + __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0); +} +EXPORT_SYMBOL_GPL(__ibp_barrier); From c8aa5cd906a583332f831b88a9d7177f0427b986 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Wed, 10 Jan 2018 17:41:10 +0100 Subject: [PATCH 179/770] auxdisplay: img-ascii-lcd: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 09c479f7f1fbfaf848e5813996793966cd50be81 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/auxdisplay/img-ascii-lcd.o see include/linux/module.h for more information This adds the license as "GPL", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/auxdisplay/img-ascii-lcd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c index a9020f82eea7..58403052514f 100644 --- a/drivers/auxdisplay/img-ascii-lcd.c +++ b/drivers/auxdisplay/img-ascii-lcd.c @@ -443,3 +443,7 @@ static struct platform_driver img_ascii_lcd_driver = { .remove = img_ascii_lcd_remove, }; module_platform_driver(img_ascii_lcd_driver); + +MODULE_DESCRIPTION("Imagination Technologies ASCII LCD Display"); +MODULE_AUTHOR("Paul Burton "); +MODULE_LICENSE("GPL"); From ce094a80f019049a7bfb6860a91f3856bd53ec3b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 13 Nov 2017 11:29:55 +0100 Subject: [PATCH 180/770] iio: adc/accel: Fix up module licenses commit 9a0ebbc93547d88f422905c34dcceebe928f3e9e upstream. The module license checker complains about these two so just fix it up. They are both GPLv2, both written by me or using code I extracted while refactoring from the GPLv2 drivers. Cc: Randy Dunlap Reported-by: Randy Dunlap Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/kxsd9-i2c.c | 3 +++ drivers/iio/adc/qcom-vadc-common.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/drivers/iio/accel/kxsd9-i2c.c b/drivers/iio/accel/kxsd9-i2c.c index 98fbb628d5bd..38411e1c155b 100644 --- a/drivers/iio/accel/kxsd9-i2c.c +++ b/drivers/iio/accel/kxsd9-i2c.c @@ -63,3 +63,6 @@ static struct i2c_driver kxsd9_i2c_driver = { .id_table = kxsd9_i2c_id, }; module_i2c_driver(kxsd9_i2c_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("KXSD9 accelerometer I2C interface"); diff --git a/drivers/iio/adc/qcom-vadc-common.c b/drivers/iio/adc/qcom-vadc-common.c index 47d24ae5462f..fe3d7826783c 100644 --- a/drivers/iio/adc/qcom-vadc-common.c +++ b/drivers/iio/adc/qcom-vadc-common.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "qcom-vadc-common.h" @@ -229,3 +230,6 @@ int qcom_vadc_decimation_from_dt(u32 value) return __ffs64(value / VADC_DECIMATION_MIN); } EXPORT_SYMBOL(qcom_vadc_decimation_from_dt); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Qualcomm ADC common functionality"); From bc484da3e18c13b15badcc861e816514ccd86f8c Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:58:03 -0800 Subject: [PATCH 181/770] pinctrl: pxa: pxa2xx: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 0b9335cbd38e3bd2025bcc23b5758df4ac035f75 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/pinctrl/pxa/pinctrl-pxa2xx.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pxa/pinctrl-pxa2xx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c index 866aa3ce1ac9..6cf0006d4c8d 100644 --- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c +++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c @@ -436,3 +436,7 @@ int pxa2xx_pinctrl_exit(struct platform_device *pdev) return 0; } EXPORT_SYMBOL_GPL(pxa2xx_pinctrl_exit); + +MODULE_AUTHOR("Robert Jarzmik "); +MODULE_DESCRIPTION("Marvell PXA2xx pinctrl driver"); +MODULE_LICENSE("GPL v2"); From 76f06358ec400cd5f7c8260fd290e7a7ee20fbcf Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Sun, 19 Nov 2017 23:45:49 -0800 Subject: [PATCH 182/770] ASoC: pcm512x: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 0cab20cec0b663b7be8e2be5998d5a4113647f86 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in sound/soc/codecs/snd-soc-pcm512x-spi.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/pcm512x-spi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c index 712ed6598c48..ebdf9bd5a64c 100644 --- a/sound/soc/codecs/pcm512x-spi.c +++ b/sound/soc/codecs/pcm512x-spi.c @@ -70,3 +70,7 @@ static struct spi_driver pcm512x_spi_driver = { }; module_spi_driver(pcm512x_spi_driver); + +MODULE_DESCRIPTION("ASoC PCM512x codec driver - SPI"); +MODULE_AUTHOR("Mark Brown "); +MODULE_LICENSE("GPL v2"); From 2e9521197f08d8cec826d3a5ff04131e40a79120 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 27 Nov 2017 17:22:25 -0600 Subject: [PATCH 183/770] KVM: nVMX: Eliminate vmcs02 pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit de3a0021a60635de96aa92713c1a31a96747d72c The potential performance advantages of a vmcs02 pool have never been realized. To simplify the code, eliminate the pool. Instead, a single vmcs02 is allocated per VCPU when the VCPU enters VMX operation. Cc: stable@vger.kernel.org # prereq for Spectre mitigation Signed-off-by: Jim Mattson Signed-off-by: Mark Kanda Reviewed-by: Ameya More Reviewed-by: David Hildenbrand Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 146 +++++++-------------------------------------- 1 file changed, 23 insertions(+), 123 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 213c092f2a9f..95546c5926e1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -182,7 +182,6 @@ module_param(ple_window_max, int, S_IRUGO); extern const ulong vmx_return; #define NR_AUTOLOAD_MSRS 8 -#define VMCS02_POOL_SIZE 1 struct vmcs { u32 revision_id; @@ -223,7 +222,7 @@ struct shared_msr_entry { * stored in guest memory specified by VMPTRLD, but is opaque to the guest, * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. * More than one of these structures may exist, if L1 runs multiple L2 guests. - * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the + * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the * underlying hardware which will be used to run L2. * This structure is packed to ensure that its layout is identical across * machines (necessary for live migration). @@ -406,13 +405,6 @@ struct __packed vmcs12 { */ #define VMCS12_SIZE 0x1000 -/* Used to remember the last vmcs02 used for some recently used vmcs12s */ -struct vmcs02_list { - struct list_head list; - gpa_t vmptr; - struct loaded_vmcs vmcs02; -}; - /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -437,15 +429,15 @@ struct nested_vmx { */ bool sync_shadow_vmcs; - /* vmcs02_list cache of VMCSs recently used to run L2 guests */ - struct list_head vmcs02_pool; - int vmcs02_num; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; + + struct loaded_vmcs vmcs02; + /* - * Guest pages referred to in vmcs02 with host-physical pointers, so - * we must keep them pinned while L2 runs. + * Guest pages referred to in the vmcs02 with host-physical + * pointers, so we must keep them pinned while L2 runs. */ struct page *apic_access_page; struct page *virtual_apic_page; @@ -6963,94 +6955,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) return handle_nop(vcpu); } -/* - * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. - * We could reuse a single VMCS for all the L2 guests, but we also want the - * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this - * allows keeping them loaded on the processor, and in the future will allow - * optimizations where prepare_vmcs02 doesn't need to set all the fields on - * every entry if they never change. - * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE - * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. - * - * The following functions allocate and free a vmcs02 in this pool. - */ - -/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ -static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmx->nested.current_vmptr) { - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { - /* Recycle the least recently used VMCS. */ - item = list_last_entry(&vmx->nested.vmcs02_pool, - struct vmcs02_list, list); - item->vmptr = vmx->nested.current_vmptr; - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - /* Create a new VMCS */ - item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL); - if (!item) - return NULL; - item->vmcs02.vmcs = alloc_vmcs(); - item->vmcs02.shadow_vmcs = NULL; - if (!item->vmcs02.vmcs) { - kfree(item); - return NULL; - } - loaded_vmcs_init(&item->vmcs02); - item->vmptr = vmx->nested.current_vmptr; - list_add(&(item->list), &(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num++; - return &item->vmcs02; -} - -/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ -static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmptr) { - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - return; - } -} - -/* - * Free all VMCSs saved for this vcpu, except the one pointed by - * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs - * must be &vmx->vmcs01. - */ -static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item, *n; - - WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); - list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { - /* - * Something will leak if the above WARN triggers. Better than - * a use-after-free. - */ - if (vmx->loaded_vmcs == &item->vmcs02) - continue; - - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - } -} - /* * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), * set the success or error code of an emulated VMX instruction, as specified @@ -7232,6 +7136,12 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs *shadow_vmcs; + vmx->nested.vmcs02.vmcs = alloc_vmcs(); + vmx->nested.vmcs02.shadow_vmcs = NULL; + if (!vmx->nested.vmcs02.vmcs) + goto out_vmcs02; + loaded_vmcs_init(&vmx->nested.vmcs02); + if (cpu_has_vmx_msr_bitmap()) { vmx->nested.msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); @@ -7254,9 +7164,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) vmx->vmcs01.shadow_vmcs = shadow_vmcs; } - INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num = 0; - hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; @@ -7271,6 +7178,9 @@ out_cached_vmcs12: free_page((unsigned long)vmx->nested.msr_bitmap); out_msr_bitmap: + free_loaded_vmcs(&vmx->nested.vmcs02); + +out_vmcs02: return -ENOMEM; } @@ -7423,7 +7333,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->vmcs01.shadow_vmcs = NULL; } kfree(vmx->nested.cached_vmcs12); - /* Unpin physical memory we referred to in current vmcs02 */ + /* Unpin physical memory we referred to in the vmcs02 */ if (vmx->nested.apic_access_page) { kvm_release_page_dirty(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; @@ -7439,7 +7349,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.pi_desc = NULL; } - nested_free_all_saved_vmcss(vmx); + free_loaded_vmcs(&vmx->nested.vmcs02); } /* Emulate the VMXOFF instruction */ @@ -7482,8 +7392,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) vmptr + offsetof(struct vmcs12, launch_state), &zero, sizeof(zero)); - nested_free_vmcs02(vmx, vmptr); - nested_vmx_succeed(vcpu); return kvm_skip_emulated_instruction(vcpu); } @@ -8395,10 +8303,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) /* * The host physical addresses of some pages of guest memory - * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU - * may write to these pages via their host physical address while - * L2 is running, bypassing any address-translation-based dirty - * tracking (e.g. EPT write protection). + * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC + * Page). The CPU may write to these pages via their host + * physical address while L2 is running, bypassing any + * address-translation-based dirty tracking (e.g. EPT write + * protection). * * Mark them dirty on every exit from L2 to prevent them from * getting out of sync with dirty tracking. @@ -10894,20 +10803,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - struct loaded_vmcs *vmcs02; u32 msr_entry_idx; u32 exit_qual; - vmcs02 = nested_get_current_vmcs02(vmx); - if (!vmcs02) - return -ENOMEM; - enter_guest_mode(vcpu); if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); - vmx_switch_vmcs(vcpu, vmcs02); + vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); vmx_segment_cache_clear(vmx); if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { @@ -11522,10 +11426,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vm_exit_controls_reset_shadow(vmx); vmx_segment_cache_clear(vmx); - /* if no vmcs02 cache requested, remove the one we used */ - if (VMCS02_POOL_SIZE == 0) - nested_free_vmcs02(vmx, vmx->nested.current_vmptr); - /* Update any VMCS fields that might have changed while L2 ran */ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); From 92f4b68ed14c7731cadd5bf7501a98da5302c093 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Jan 2018 12:16:15 +0100 Subject: [PATCH 184/770] KVM: VMX: introduce alloc_loaded_vmcs commit f21f165ef922c2146cc5bdc620f542953c41714b Group together the calls to alloc_vmcs and loaded_vmcs_init. Soon we'll also allocate an MSR bitmap there. Cc: stable@vger.kernel.org # prereq for Spectre mitigation Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 95546c5926e1..bca85821b0f0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3814,11 +3814,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) return vmcs; } -static struct vmcs *alloc_vmcs(void) -{ - return alloc_vmcs_cpu(raw_smp_processor_id()); -} - static void free_vmcs(struct vmcs *vmcs) { free_pages((unsigned long)vmcs, vmcs_config.order); @@ -3837,6 +3832,22 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } +static struct vmcs *alloc_vmcs(void) +{ + return alloc_vmcs_cpu(raw_smp_processor_id()); +} + +static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) +{ + loaded_vmcs->vmcs = alloc_vmcs(); + if (!loaded_vmcs->vmcs) + return -ENOMEM; + + loaded_vmcs->shadow_vmcs = NULL; + loaded_vmcs_init(loaded_vmcs); + return 0; +} + static void free_kvm_area(void) { int cpu; @@ -7135,12 +7146,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs *shadow_vmcs; + int r; - vmx->nested.vmcs02.vmcs = alloc_vmcs(); - vmx->nested.vmcs02.shadow_vmcs = NULL; - if (!vmx->nested.vmcs02.vmcs) + r = alloc_loaded_vmcs(&vmx->nested.vmcs02); + if (r < 0) goto out_vmcs02; - loaded_vmcs_init(&vmx->nested.vmcs02); if (cpu_has_vmx_msr_bitmap()) { vmx->nested.msr_bitmap = @@ -9535,13 +9545,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx->guest_msrs) goto free_pml; - vmx->loaded_vmcs = &vmx->vmcs01; - vmx->loaded_vmcs->vmcs = alloc_vmcs(); - vmx->loaded_vmcs->shadow_vmcs = NULL; - if (!vmx->loaded_vmcs->vmcs) + err = alloc_loaded_vmcs(&vmx->vmcs01); + if (err < 0) goto free_msrs; - loaded_vmcs_init(vmx->loaded_vmcs); + vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); vmx->vcpu.cpu = cpu; From a358df03279e806f8f4217eacf813c5b6c3578ee Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 29 Jan 2018 22:00:39 -0600 Subject: [PATCH 185/770] objtool: Improve retpoline alternative handling commit a845c7cf4b4cb5e9e3b2823867892b27646f3a98 Currently objtool requires all retpolines to be: a) patched in with alternatives; and b) annotated with ANNOTATE_NOSPEC_ALTERNATIVE. If you forget to do both of the above, objtool segfaults trying to dereference a NULL 'insn->call_dest' pointer. Avoid that situation and print a more helpful error message: quirks.o: warning: objtool: efi_delete_dummy_variable()+0x99: unsupported intra-function call quirks.o: warning: objtool: If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE. Future improvements can be made to make objtool smarter with respect to retpolines, but this is a good incremental improvement for now. Reported-and-tested-by: Guenter Roeck Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/819e50b6d9c2e1a22e34c1a636c0b2057cc8c6e5.1517284349.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f40d46e24bcc..bc3490d929ff 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -543,18 +543,14 @@ static int add_call_destinations(struct objtool_file *file) dest_off = insn->offset + insn->len + insn->immediate; insn->call_dest = find_symbol_by_offset(insn->sec, dest_off); - /* - * FIXME: Thanks to retpolines, it's now considered - * normal for a function to call within itself. So - * disable this warning for now. - */ -#if 0 - if (!insn->call_dest) { - WARN_FUNC("can't find call dest symbol at offset 0x%lx", - insn->sec, insn->offset, dest_off); + + if (!insn->call_dest && !insn->ignore) { + WARN_FUNC("unsupported intra-function call", + insn->sec, insn->offset); + WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); return -1; } -#endif + } else if (rela->sym->type == STT_SECTION) { insn->call_dest = find_symbol_by_offset(rela->sym->sec, rela->addend+4); @@ -648,6 +644,8 @@ static int handle_group_alt(struct objtool_file *file, last_new_insn = insn; + insn->ignore = orig_insn->ignore_alts; + if (insn->type != INSN_JUMP_CONDITIONAL && insn->type != INSN_JUMP_UNCONDITIONAL) continue; @@ -729,10 +727,6 @@ static int add_special_section_alts(struct objtool_file *file) goto out; } - /* Ignore retpoline alternatives. */ - if (orig_insn->ignore_alts) - continue; - new_insn = NULL; if (!special_alt->group || special_alt->new_len) { new_insn = find_insn(file, special_alt->new_sec, @@ -1089,11 +1083,11 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = add_call_destinations(file); + ret = add_special_section_alts(file); if (ret) return ret; - ret = add_special_section_alts(file); + ret = add_call_destinations(file); if (ret) return ret; @@ -1720,10 +1714,12 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, insn->visited = true; - list_for_each_entry(alt, &insn->alts, list) { - ret = validate_branch(file, alt->insn, state); - if (ret) - return 1; + if (!insn->ignore_alts) { + list_for_each_entry(alt, &insn->alts, list) { + ret = validate_branch(file, alt->insn, state); + if (ret) + return 1; + } } switch (insn->type) { From 3e04e09855c5cbc4f18c2d14bec326b1ff6f11f6 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 29 Jan 2018 22:00:40 -0600 Subject: [PATCH 186/770] objtool: Add support for alternatives at the end of a section commit 17bc33914bcc98ba3c6b426fd1c49587a25c0597 Now that the previous patch gave objtool the ability to read retpoline alternatives, it shows a new warning: arch/x86/entry/entry_64.o: warning: objtool: .entry_trampoline: don't know how to handle alternatives at end of section This is due to the JMP_NOSPEC in entry_SYSCALL_64_trampoline(). Previously, objtool ignored this situation because it wasn't needed, and it would have required a bit of extra code. Now that this case exists, add proper support for it. Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Guenter Roeck Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2a30a3c2158af47d891a76e69bb1ef347e0443fd.1517284349.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 51 +++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index bc3490d929ff..9cd028aa1509 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -594,7 +594,7 @@ static int handle_group_alt(struct objtool_file *file, struct instruction *orig_insn, struct instruction **new_insn) { - struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; + struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; unsigned long dest_off; last_orig_insn = NULL; @@ -610,28 +610,30 @@ static int handle_group_alt(struct objtool_file *file, last_orig_insn = insn; } - if (!next_insn_same_sec(file, last_orig_insn)) { - WARN("%s: don't know how to handle alternatives at end of section", - special_alt->orig_sec->name); - return -1; - } + if (next_insn_same_sec(file, last_orig_insn)) { + fake_jump = malloc(sizeof(*fake_jump)); + if (!fake_jump) { + WARN("malloc failed"); + return -1; + } + memset(fake_jump, 0, sizeof(*fake_jump)); + INIT_LIST_HEAD(&fake_jump->alts); + clear_insn_state(&fake_jump->state); - fake_jump = malloc(sizeof(*fake_jump)); - if (!fake_jump) { - WARN("malloc failed"); - return -1; + fake_jump->sec = special_alt->new_sec; + fake_jump->offset = -1; + fake_jump->type = INSN_JUMP_UNCONDITIONAL; + fake_jump->jump_dest = list_next_entry(last_orig_insn, list); + fake_jump->ignore = true; } - memset(fake_jump, 0, sizeof(*fake_jump)); - INIT_LIST_HEAD(&fake_jump->alts); - clear_insn_state(&fake_jump->state); - - fake_jump->sec = special_alt->new_sec; - fake_jump->offset = -1; - fake_jump->type = INSN_JUMP_UNCONDITIONAL; - fake_jump->jump_dest = list_next_entry(last_orig_insn, list); - fake_jump->ignore = true; if (!special_alt->new_len) { + if (!fake_jump) { + WARN("%s: empty alternative at end of section", + special_alt->orig_sec->name); + return -1; + } + *new_insn = fake_jump; return 0; } @@ -654,8 +656,14 @@ static int handle_group_alt(struct objtool_file *file, continue; dest_off = insn->offset + insn->len + insn->immediate; - if (dest_off == special_alt->new_off + special_alt->new_len) + if (dest_off == special_alt->new_off + special_alt->new_len) { + if (!fake_jump) { + WARN("%s: alternative jump to end of section", + special_alt->orig_sec->name); + return -1; + } insn->jump_dest = fake_jump; + } if (!insn->jump_dest) { WARN_FUNC("can't find alternative jump destination", @@ -670,7 +678,8 @@ static int handle_group_alt(struct objtool_file *file, return -1; } - list_add(&fake_jump->list, &last_new_insn->list); + if (fake_jump) + list_add(&fake_jump->list, &last_new_insn->list); return 0; } From 23a4ca4e749a300a29abff1e304c55b55b6e59a3 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 29 Jan 2018 22:00:41 -0600 Subject: [PATCH 187/770] objtool: Warn on stripped section symbol commit 830c1e3d16b2c1733cd1ec9c8f4d47a398ae31bc With the following fix: 2a0098d70640 ("objtool: Fix seg fault with gold linker") ... a seg fault was avoided, but the original seg fault condition in objtool wasn't fixed. Replace the seg fault with an error message. Suggested-by: Ingo Molnar Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Guenter Roeck Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/dc4585a70d6b975c99fc51d1957ccdde7bd52f3a.1517284349.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/objtool/orc_gen.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index e61fe703197b..18384d9be4e1 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -98,6 +98,11 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, struct orc_entry *orc; struct rela *rela; + if (!insn_sec->sym) { + WARN("missing symbol for section %s", insn_sec->name); + return -1; + } + /* populate ORC data */ orc = (struct orc_entry *)u_sec->data->d_buf + idx; memcpy(orc, o, sizeof(*orc)); From 04e073072d1494ce9f60cc2db68fb79aee4283dd Mon Sep 17 00:00:00 2001 From: William Grant Date: Tue, 30 Jan 2018 22:22:55 +1100 Subject: [PATCH 188/770] x86/mm: Fix overlap of i386 CPU_ENTRY_AREA with FIX_BTMAP commit 55f49fcb879fbeebf2a8c1ac7c9e6d90df55f798 Since commit 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap"), i386's CPU_ENTRY_AREA has been mapped to the memory area just below FIXADDR_START. But already immediately before FIXADDR_START is the FIX_BTMAP area, which means that early_ioremap can collide with the entry area. It's especially bad on PAE where FIX_BTMAP_BEGIN gets aligned to exactly match CPU_ENTRY_AREA_BASE, so the first early_ioremap slot clobbers the IDT and causes interrupts during early boot to reset the system. The overlap wasn't a problem before the CPU entry area was introduced, as the fixmap has classically been preceded by the pkmap or vmalloc areas, neither of which is used until early_ioremap is out of the picture. Relocate CPU_ENTRY_AREA to below FIX_BTMAP, not just below the permanent fixmap area. Fixes: commit 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap") Signed-off-by: William Grant Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/7041d181-a019-e8b9-4e4e-48215f841e2c@canonical.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/fixmap.h | 6 ++++-- arch/x86/include/asm/pgtable_32_types.h | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 64c4a30e0d39..e203169931c7 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -137,8 +137,10 @@ enum fixed_addresses { extern void reserve_top_address(unsigned long reserve); -#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_TOT_START (FIXADDR_TOP - FIXADDR_TOT_SIZE) extern int fixmaps_set; diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index ce245b0cdfca..0777e18a1d23 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -44,8 +44,9 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ */ #define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) -#define CPU_ENTRY_AREA_BASE \ - ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK) +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ + & PMD_MASK) #define PKMAP_BASE \ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) From 74ae346691dd34d49a5e733af40ca25a4dd42245 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Tue, 30 Jan 2018 14:13:50 +0800 Subject: [PATCH 189/770] x86/spectre: Check CONFIG_RETPOLINE in command line parser commit 9471eee9186a46893726e22ebb54cade3f9bc043 The spectre_v2 option 'auto' does not check whether CONFIG_RETPOLINE is enabled. As a consequence it fails to emit the appropriate warning and sets feature flags which have no effect at all. Add the missing IS_ENABLED() check. Fixes: da285121560e ("x86/spectre: Add boot time option to select Spectre v2 mitigation") Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: Tomohiro Cc: dave.hansen@intel.com Cc: bp@alien8.de Cc: arjan@linux.intel.com Cc: dwmw@amazon.co.uk Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/f5892721-7528-3647-08fb-f8d10e65ad87@cn.fujitsu.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3bfb2b23d79c..400c34ec9179 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -213,10 +213,10 @@ static void __init spectre_v2_select_mitigation(void) return; case SPECTRE_V2_CMD_FORCE: - /* FALLTRHU */ case SPECTRE_V2_CMD_AUTO: - goto retpoline_auto; - + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_auto; + break; case SPECTRE_V2_CMD_RETPOLINE_AMD: if (IS_ENABLED(CONFIG_RETPOLINE)) goto retpoline_amd; From 8459ebcbd6ecbbefd6455315985dc0da1a450b13 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 28 Jan 2018 10:38:49 -0800 Subject: [PATCH 190/770] x86/entry/64: Remove the SYSCALL64 fast path commit 21d375b6b34ff511a507de27bf316b3dde6938d9 The SYCALLL64 fast path was a nice, if small, optimization back in the good old days when syscalls were actually reasonably fast. Now there is PTI to slow everything down, and indirect branches are verboten, making everything messier. The retpoline code in the fast path is particularly nasty. Just get rid of the fast path. The slow path is barely slower. [ tglx: Split out the 'push all extra regs' part ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Borislav Petkov Cc: Linus Torvalds Cc: Kernel Hardening Link: https://lkml.kernel.org/r/462dff8d4d64dfbfc851fbf3130641809d980ecd.1517164461.git.luto@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 117 ------------------------------------ arch/x86/entry/syscall_64.c | 7 +-- 2 files changed, 2 insertions(+), 122 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4527630cc21e..dd103ed7a622 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -237,86 +237,11 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) TRACE_IRQS_OFF - /* - * If we need to do entry work or if we guess we'll need to do - * exit work, go straight to the slow path. - */ - movq PER_CPU_VAR(current_task), %r11 - testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) - jnz entry_SYSCALL64_slow_path - -entry_SYSCALL_64_fastpath: - /* - * Easy case: enable interrupts and issue the syscall. If the syscall - * needs pt_regs, we'll call a stub that disables interrupts again - * and jumps to the slow path. - */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) -#if __SYSCALL_MASK == ~0 - cmpq $__NR_syscall_max, %rax -#else - andl $__SYSCALL_MASK, %eax - cmpl $__NR_syscall_max, %eax -#endif - ja 1f /* return -ENOSYS (already in pt_regs->ax) */ - movq %r10, %rcx - - /* - * This call instruction is handled specially in stub_ptregs_64. - * It might end up jumping to the slow path. If it jumps, RAX - * and all argument registers are clobbered. - */ -#ifdef CONFIG_RETPOLINE - movq sys_call_table(, %rax, 8), %rax - call __x86_indirect_thunk_rax -#else - call *sys_call_table(, %rax, 8) -#endif -.Lentry_SYSCALL_64_after_fastpath_call: - - movq %rax, RAX(%rsp) -1: - - /* - * If we get here, then we know that pt_regs is clean for SYSRET64. - * If we see that no exit work is required (which we are required - * to check with IRQs off), then we can go straight to SYSRET64. - */ - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - movq PER_CPU_VAR(current_task), %r11 - testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) - jnz 1f - - LOCKDEP_SYS_EXIT - TRACE_IRQS_ON /* user mode is traced as IRQs on */ - movq RIP(%rsp), %rcx - movq EFLAGS(%rsp), %r11 - addq $6*8, %rsp /* skip extra regs -- they were preserved */ - UNWIND_HINT_EMPTY - jmp .Lpop_c_regs_except_rcx_r11_and_sysret - -1: - /* - * The fast path looked good when we started, but something changed - * along the way and we need to switch to the slow path. Calling - * raise(3) will trigger this, for example. IRQs are off. - */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) - SAVE_EXTRA_REGS - movq %rsp, %rdi - call syscall_return_slowpath /* returns with IRQs disabled */ - jmp return_from_SYSCALL_64 - -entry_SYSCALL64_slow_path: /* IRQs are off. */ SAVE_EXTRA_REGS movq %rsp, %rdi call do_syscall_64 /* returns with IRQs disabled */ -return_from_SYSCALL_64: TRACE_IRQS_IRETQ /* we're about to change IF */ /* @@ -389,7 +314,6 @@ syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ UNWIND_HINT_EMPTY POP_EXTRA_REGS -.Lpop_c_regs_except_rcx_r11_and_sysret: popq %rsi /* skip r11 */ popq %r10 popq %r9 @@ -420,47 +344,6 @@ syscall_return_via_sysret: USERGS_SYSRET64 END(entry_SYSCALL_64) -ENTRY(stub_ptregs_64) - /* - * Syscalls marked as needing ptregs land here. - * If we are on the fast path, we need to save the extra regs, - * which we achieve by trying again on the slow path. If we are on - * the slow path, the extra regs are already saved. - * - * RAX stores a pointer to the C function implementing the syscall. - * IRQs are on. - */ - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) - jne 1f - - /* - * Called from fast path -- disable IRQs again, pop return address - * and jump to slow path - */ - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - popq %rax - UNWIND_HINT_REGS extra=0 - jmp entry_SYSCALL64_slow_path - -1: - JMP_NOSPEC %rax /* Called from C */ -END(stub_ptregs_64) - -.macro ptregs_stub func -ENTRY(ptregs_\func) - UNWIND_HINT_FUNC - leaq \func(%rip), %rax - jmp stub_ptregs_64 -END(ptregs_\func) -.endm - -/* Instantiate ptregs_stub for each ptregs-using syscall */ -#define __SYSCALL_64_QUAL_(sym) -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) -#include - /* * %rdi: prev task * %rsi: next task diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index 9c09775e589d..c176d2fab1da 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -7,14 +7,11 @@ #include #include -#define __SYSCALL_64_QUAL_(sym) sym -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym - -#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); #include #undef __SYSCALL_64 -#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), +#define __SYSCALL_64(nr, sym, qual) [nr] = sym, extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); From 90522d30b223fe35957fc8c8ee3b7a63dc9844b1 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 28 Jan 2018 10:38:49 -0800 Subject: [PATCH 191/770] x86/entry/64: Push extra regs right away commit d1f7732009e0549eedf8ea1db948dc37be77fd46 With the fast path removed there is no point in splitting the push of the normal and the extra register set. Just push the extra regs right away. [ tglx: Split out from 'x86/entry/64: Remove the SYSCALL64 fast path' ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Borislav Petkov Cc: Linus Torvalds Cc: Kernel Hardening Link: https://lkml.kernel.org/r/462dff8d4d64dfbfc851fbf3130641809d980ecd.1517164461.git.luto@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index dd103ed7a622..16e2d72e79a0 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -232,13 +232,17 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) pushq %r9 /* pt_regs->r9 */ pushq %r10 /* pt_regs->r10 */ pushq %r11 /* pt_regs->r11 */ - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ - UNWIND_HINT_REGS extra=0 + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ + UNWIND_HINT_REGS TRACE_IRQS_OFF /* IRQs are off. */ - SAVE_EXTRA_REGS movq %rsp, %rdi call do_syscall_64 /* returns with IRQs disabled */ From fb9f2d9ab83d9ab1d5286db9432266b353c201c1 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 28 Jan 2018 10:38:50 -0800 Subject: [PATCH 192/770] x86/asm: Move 'status' from thread_struct to thread_info commit 37a8f7c38339b22b69876d6f5a0ab851565284e3 The TS_COMPAT bit is very hot and is accessed from code paths that mostly also touch thread_info::flags. Move it into struct thread_info to improve cache locality. The only reason it was in thread_struct is that there was a brief period during which arch-specific fields were not allowed in struct thread_info. Linus suggested further changing: ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); to: if (unlikely(ti->status & (TS_COMPAT|TS_I386_REGS_POKED))) ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); on the theory that frequently dirtying the cacheline even in pure 64-bit code that never needs to modify status hurts performance. That could be a reasonable followup patch, but I suspect it matters less on top of this patch. Suggested-by: Linus Torvalds Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: Kernel Hardening Link: https://lkml.kernel.org/r/03148bcc1b217100e6e8ecf6a5468c45cf4304b6.1517164461.git.luto@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/common.c | 4 ++-- arch/x86/include/asm/processor.h | 2 -- arch/x86/include/asm/syscall.h | 6 +++--- arch/x86/include/asm/thread_info.h | 3 ++- arch/x86/kernel/process_64.c | 4 ++-- arch/x86/kernel/ptrace.c | 2 +- arch/x86/kernel/signal.c | 2 +- 7 files changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 03505ffbe1b6..4858911eea9d 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -208,7 +208,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) * special case only applies after poking regs and before the * very next return to user mode. */ - current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); #endif user_enter_irqoff(); @@ -306,7 +306,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) unsigned int nr = (unsigned int)regs->orig_ax; #ifdef CONFIG_IA32_EMULATION - current->thread.status |= TS_COMPAT; + ti->status |= TS_COMPAT; #endif if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 881ca3b1d6d4..c57c6e77c29f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -459,8 +459,6 @@ struct thread_struct { unsigned short gsindex; #endif - u32 status; /* thread synchronous flags */ - #ifdef CONFIG_X86_64 unsigned long fsbase; unsigned long gsbase; diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index e3c95e8e61c5..03eedc21246d 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, * TS_COMPAT is set for 32-bit syscall entries and then * remains set until we return to user mode. */ - if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) /* * Sign-extend the value so (int)-EFOO becomes (long)-EFOO * and will match correctly in comparisons. @@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread.status & TS_COMPAT) + if (task->thread_info.status & TS_COMPAT) switch (i) { case 0: if (!n--) break; @@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, const unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread.status & TS_COMPAT) + if (task->thread_info.status & TS_COMPAT) switch (i) { case 0: if (!n--) break; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 00223333821a..eda3b6823ca4 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -55,6 +55,7 @@ struct task_struct; struct thread_info { unsigned long flags; /* low level flags */ + u32 status; /* thread synchronous flags */ }; #define INIT_THREAD_INFO(tsk) \ @@ -221,7 +222,7 @@ static inline int arch_within_stack_frames(const void * const stack, #define in_ia32_syscall() true #else #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ - current->thread.status & TS_COMPAT) + current_thread_info()->status & TS_COMPAT) #endif /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c75466232016..9eb448c7859d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -557,7 +557,7 @@ static void __set_personality_x32(void) * Pretend to come from a x32 execve. */ task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT; - current->thread.status &= ~TS_COMPAT; + current_thread_info()->status &= ~TS_COMPAT; #endif } @@ -571,7 +571,7 @@ static void __set_personality_ia32(void) current->personality |= force_personality32; /* Prepare the first "return" to user space */ task_pt_regs(current)->orig_ax = __NR_ia32_execve; - current->thread.status |= TS_COMPAT; + current_thread_info()->status |= TS_COMPAT; #endif } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f37d18124648..ed5c4cdf0a34 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -935,7 +935,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) */ regs->orig_ax = value; if (syscall_get_nr(child, regs) >= 0) - child->thread.status |= TS_I386_REGS_POKED; + child->thread_info.status |= TS_I386_REGS_POKED; break; case offsetof(struct user32, regs.eflags): diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b9e00e8f1c9b..4cdc0b27ec82 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -787,7 +787,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) * than the tracee. */ #ifdef CONFIG_IA32_EMULATION - if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI From e72041f70c3c7d149a8c444262173b7e70e3d4f8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 29 Jan 2018 17:02:16 -0800 Subject: [PATCH 193/770] Documentation: Document array_index_nospec commit f84a56f73dddaeac1dba8045b007f742f61cd2da Document the rationale and usage of the new array_index_nospec() helper. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Cc: linux-arch@vger.kernel.org Cc: Jonathan Corbet Cc: Peter Zijlstra Cc: gregkh@linuxfoundation.org Cc: kernel-hardening@lists.openwall.com Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727413645.33451.15878817161436755393.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/speculation.txt | 90 +++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 Documentation/speculation.txt diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt new file mode 100644 index 000000000000..e9e6cbae2841 --- /dev/null +++ b/Documentation/speculation.txt @@ -0,0 +1,90 @@ +This document explains potential effects of speculation, and how undesirable +effects can be mitigated portably using common APIs. + +=========== +Speculation +=========== + +To improve performance and minimize average latencies, many contemporary CPUs +employ speculative execution techniques such as branch prediction, performing +work which may be discarded at a later stage. + +Typically speculative execution cannot be observed from architectural state, +such as the contents of registers. However, in some cases it is possible to +observe its impact on microarchitectural state, such as the presence or +absence of data in caches. Such state may form side-channels which can be +observed to extract secret information. + +For example, in the presence of branch prediction, it is possible for bounds +checks to be ignored by code which is speculatively executed. Consider the +following code: + + int load_array(int *array, unsigned int index) + { + if (index >= MAX_ARRAY_ELEMS) + return 0; + else + return array[index]; + } + +Which, on arm64, may be compiled to an assembly sequence such as: + + CMP , #MAX_ARRAY_ELEMS + B.LT less + MOV , #0 + RET + less: + LDR , [, ] + RET + +It is possible that a CPU mis-predicts the conditional branch, and +speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This +value will subsequently be discarded, but the speculated load may affect +microarchitectural state which can be subsequently measured. + +More complex sequences involving multiple dependent memory accesses may +result in sensitive information being leaked. Consider the following +code, building on the prior example: + + int load_dependent_arrays(int *arr1, int *arr2, int index) + { + int val1, val2, + + val1 = load_array(arr1, index); + val2 = load_array(arr2, val1); + + return val2; + } + +Under speculation, the first call to load_array() may return the value +of an out-of-bounds address, while the second call will influence +microarchitectural state dependent on this value. This may provide an +arbitrary read primitive. + +==================================== +Mitigating speculation side-channels +==================================== + +The kernel provides a generic API to ensure that bounds checks are +respected even under speculation. Architectures which are affected by +speculation-based side-channels are expected to implement these +primitives. + +The array_index_nospec() helper in can be used to +prevent information from being leaked via side-channels. + +A call to array_index_nospec(index, size) returns a sanitized index +value that is bounded to [0, size) even under cpu speculation +conditions. + +This can be used to protect the earlier load_array() example: + + int load_array(int *array, unsigned int index) + { + if (index >= MAX_ARRAY_ELEMS) + return 0; + else { + index = array_index_nospec(index, MAX_ARRAY_ELEMS); + return array[index]; + } + } From 478742cf80b732f6c3227c572184e18b6e8bcb05 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:22 -0800 Subject: [PATCH 194/770] array_index_nospec: Sanitize speculative array de-references commit f3804203306e098dae9ca51540fcd5eb700d7f40 array_index_nospec() is proposed as a generic mechanism to mitigate against Spectre-variant-1 attacks, i.e. an attack that bypasses boundary checks via speculative execution. The array_index_nospec() implementation is expected to be safe for current generation CPUs across multiple architectures (ARM, x86). Based on an original implementation by Linus Torvalds, tweaked to remove speculative flows by Alexei Starovoitov, and tweaked again by Linus to introduce an x86 assembly implementation for the mask generation. Co-developed-by: Linus Torvalds Co-developed-by: Alexei Starovoitov Suggested-by: Cyril Novikov Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: Peter Zijlstra Cc: Catalin Marinas Cc: Will Deacon Cc: Russell King Cc: gregkh@linuxfoundation.org Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727414229.33451.18411580953862676575.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/nospec.h | 72 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 include/linux/nospec.h diff --git a/include/linux/nospec.h b/include/linux/nospec.h new file mode 100644 index 000000000000..b99bced39ac2 --- /dev/null +++ b/include/linux/nospec.h @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Linus Torvalds. All rights reserved. +// Copyright(c) 2018 Alexei Starovoitov. All rights reserved. +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#ifndef _LINUX_NOSPEC_H +#define _LINUX_NOSPEC_H + +/** + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * When @index is out of bounds (@index >= @size), the sign bit will be + * set. Extend the sign bit to all bits and invert, giving a result of + * zero for an out of bounds index, or ~0 if within bounds [0, @size). + */ +#ifndef array_index_mask_nospec +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + /* + * Warn developers about inappropriate array_index_nospec() usage. + * + * Even if the CPU speculates past the WARN_ONCE branch, the + * sign bit of @index is taken into account when generating the + * mask. + * + * This warning is compiled out when the compiler can infer that + * @index and @size are less than LONG_MAX. + */ + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) + return 0; + + /* + * Always calculate and emit the mask even if the compiler + * thinks the mask is not needed. The compiler does not take + * into account the value of @index under speculation. + */ + OPTIMIZER_HIDE_VAR(index); + return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1); +} +#endif + +/* + * array_index_nospec - sanitize an array index after a bounds check + * + * For a code sequence like: + * + * if (index < size) { + * index = array_index_nospec(index, size); + * val = array[index]; + * } + * + * ...if the CPU speculates past the bounds check then + * array_index_nospec() will clamp the index within the range of [0, + * size). + */ +#define array_index_nospec(index, size) \ +({ \ + typeof(index) _i = (index); \ + typeof(size) _s = (size); \ + unsigned long _mask = array_index_mask_nospec(_i, _s); \ + \ + BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ + BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ + \ + _i &= _mask; \ + _i; \ +}) +#endif /* _LINUX_NOSPEC_H */ From 4820d42835b2741fc18b8f6135c90dc1128974bc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:28 -0800 Subject: [PATCH 195/770] x86: Implement array_index_mask_nospec commit babdde2698d482b6c0de1eab4f697cf5856c5859 array_index_nospec() uses a mask to sanitize user controllable array indexes, i.e. generate a 0 mask if 'index' >= 'size', and a ~0 mask otherwise. While the default array_index_mask_nospec() handles the carry-bit from the (index - size) result in software. The x86 array_index_mask_nospec() does the same, but the carry-bit is handled in the processor CF flag without conditional instructions in the control flow. Suggested-by: Linus Torvalds Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727414808.33451.1873237130672785331.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/barrier.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 01727dbc294a..551ccbe07c0f 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -24,6 +24,30 @@ #define wmb() asm volatile("sfence" ::: "memory") #endif +/** + * array_index_mask_nospec() - generate a mask that is ~0UL when the + * bounds check succeeds and 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * Returns: + * 0 - (index < size) + */ +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + unsigned long mask; + + asm ("cmp %1,%2; sbb %0,%0;" + :"=r" (mask) + :"r"(size),"r" (index) + :"cc"); + return mask; +} + +/* Override the default implementation from linux/nospec.h. */ +#define array_index_mask_nospec array_index_mask_nospec + #ifdef CONFIG_X86_PPRO_FENCE #define dma_rmb() rmb() #else From 437ac7b6868da05620764c143c0e4f7aa91e8fc7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:33 -0800 Subject: [PATCH 196/770] x86: Introduce barrier_nospec commit b3d7ad85b80bbc404635dca80f5b129f6242bc7a Rename the open coded form of this instruction sequence from rdtsc_ordered() into a generic barrier primitive, barrier_nospec(). One of the mitigations for Spectre variant1 vulnerabilities is to fence speculative execution after successfully validating a bounds check. I.e. force the result of a bounds check to resolve in the instruction pipeline to ensure speculative execution honors that result before potentially operating on out-of-bounds data. No functional changes. Suggested-by: Linus Torvalds Suggested-by: Andi Kleen Suggested-by: Ingo Molnar Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Tom Lendacky Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727415361.33451.9049453007262764675.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/barrier.h | 4 ++++ arch/x86/include/asm/msr.h | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 551ccbe07c0f..1e7c955b6303 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -48,6 +48,10 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, /* Override the default implementation from linux/nospec.h. */ #define array_index_mask_nospec array_index_mask_nospec +/* Prevent speculative execution past this barrier. */ +#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ + "lfence", X86_FEATURE_LFENCE_RDTSC) + #ifdef CONFIG_X86_PPRO_FENCE #define dma_rmb() rmb() #else diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 07962f5f6fba..30df295f6d94 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -214,8 +214,7 @@ static __always_inline unsigned long long rdtsc_ordered(void) * that some other imaginary CPU is updating continuously with a * time stamp. */ - alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, - "lfence", X86_FEATURE_LFENCE_RDTSC); + barrier_nospec(); return rdtsc(); } From 31c5b332189ec6692ddd92269261db93c835a4d9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:39 -0800 Subject: [PATCH 197/770] x86: Introduce __uaccess_begin_nospec() and uaccess_try_nospec commit b3bbfb3fb5d25776b8e3f361d2eedaabb0b496cd For __get_user() paths, do not allow the kernel to speculate on the value of a user controlled pointer. In addition to the 'stac' instruction for Supervisor Mode Access Protection (SMAP), a barrier_nospec() causes the access_ok() result to resolve in the pipeline before the CPU might take any speculative action on the pointer value. Given the cost of 'stac' the speculation barrier is placed after 'stac' to hopefully overlap the cost of disabling SMAP with the cost of flushing the instruction pipeline. Since __get_user is a major kernel interface that deals with user controlled pointers, the __uaccess_begin_nospec() mechanism will prevent speculative execution past an access_ok() permission check. While speculative execution past access_ok() is not enough to lead to a kernel memory leak, it is a necessary precondition. To be clear, __uaccess_begin_nospec() is addressing a class of potential problems near __get_user() usages. Note, that while the barrier_nospec() in __uaccess_begin_nospec() is used to protect __get_user(), pointer masking similar to array_index_nospec() will be used for get_user() since it incorporates a bounds check near the usage. uaccess_try_nospec provides the same mechanism for get_user_try. No functional changes. Suggested-by: Linus Torvalds Suggested-by: Andi Kleen Suggested-by: Ingo Molnar Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Tom Lendacky Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727415922.33451.5796614273104346583.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/uaccess.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 574dff4d2913..663e9bde9fc9 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -124,6 +124,11 @@ extern int __get_user_bad(void); #define __uaccess_begin() stac() #define __uaccess_end() clac() +#define __uaccess_begin_nospec() \ +({ \ + stac(); \ + barrier_nospec(); \ +}) /* * This is a type: either unsigned long, if the argument fits into @@ -487,6 +492,10 @@ struct __large_struct { unsigned long buf[100]; }; __uaccess_begin(); \ barrier(); +#define uaccess_try_nospec do { \ + current->thread.uaccess_err = 0; \ + __uaccess_begin_nospec(); \ + #define uaccess_catch(err) \ __uaccess_end(); \ (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ From 2406eb9f4568098f2353f1c9267dbe7e7f4a546b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:44 -0800 Subject: [PATCH 198/770] x86/usercopy: Replace open coded stac/clac with __uaccess_{begin, end} commit b5c4ae4f35325d520b230bab6eb3310613b72ac1 In preparation for converting some __uaccess_begin() instances to __uacess_begin_nospec(), make sure all 'from user' uaccess paths are using the _begin(), _end() helpers rather than open-coded stac() and clac(). No functional changes. Suggested-by: Ingo Molnar Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Tom Lendacky Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727416438.33451.17309465232057176966.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/usercopy_32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1b377f734e64..de3436719e26 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -331,12 +331,12 @@ do { \ unsigned long __copy_user_ll(void *to, const void *from, unsigned long n) { - stac(); + __uaccess_begin(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel(to, from, n); - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_user_ll); @@ -344,7 +344,7 @@ EXPORT_SYMBOL(__copy_user_ll); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { - stac(); + __uaccess_begin(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) n = __copy_user_intel_nocache(to, from, n); @@ -353,7 +353,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr #else __copy_user(to, from, n); #endif - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); From 5f40de41ccae0c17105d8336e44d704dc5500a50 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:49 -0800 Subject: [PATCH 199/770] x86/uaccess: Use __uaccess_begin_nospec() and uaccess_try_nospec commit 304ec1b050310548db33063e567123fae8fd0301 Quoting Linus: I do think that it would be a good idea to very expressly document the fact that it's not that the user access itself is unsafe. I do agree that things like "get_user()" want to be protected, but not because of any direct bugs or problems with get_user() and friends, but simply because get_user() is an excellent source of a pointer that is obviously controlled from a potentially attacking user space. So it's a prime candidate for then finding _subsequent_ accesses that can then be used to perturb the cache. __uaccess_begin_nospec() covers __get_user() and copy_from_iter() where the limit check is far away from the user pointer de-reference. In those cases a barrier_nospec() prevents speculation with a potential pointer to privileged memory. uaccess_try_nospec covers get_user_try. Suggested-by: Linus Torvalds Suggested-by: Andi Kleen Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727416953.33451.10508284228526170604.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/uaccess.h | 6 +++--- arch/x86/include/asm/uaccess_32.h | 6 +++--- arch/x86/include/asm/uaccess_64.h | 12 ++++++------ arch/x86/lib/usercopy_32.c | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 663e9bde9fc9..aae77eb8491c 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -450,7 +450,7 @@ do { \ ({ \ int __gu_err; \ __inttype(*(ptr)) __gu_val; \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ __uaccess_end(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ @@ -557,7 +557,7 @@ struct __large_struct { unsigned long buf[100]; }; * get_user_ex(...); * } get_user_catch(err) */ -#define get_user_try uaccess_try +#define get_user_try uaccess_try_nospec #define get_user_catch(err) uaccess_catch(err) #define get_user_ex(x, ptr) do { \ @@ -591,7 +591,7 @@ extern void __cmpxchg_wrong_size(void) __typeof__(ptr) __uval = (uval); \ __typeof__(*(ptr)) __old = (old); \ __typeof__(*(ptr)) __new = (new); \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ switch (size) { \ case 1: \ { \ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 72950401b223..ba2dc1930630 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -29,21 +29,21 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n) switch (n) { case 1: ret = 0; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u8 *)to, from, ret, "b", "b", "=q", 1); __uaccess_end(); return ret; case 2: ret = 0; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u16 *)to, from, ret, "w", "w", "=r", 2); __uaccess_end(); return ret; case 4: ret = 0; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u32 *)to, from, ret, "l", "k", "=r", 4); __uaccess_end(); diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index f07ef3c575db..62546b3a398e 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -55,31 +55,31 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size) return copy_user_generic(dst, (__force void *)src, size); switch (size) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src, ret, "b", "b", "=q", 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src, ret, "w", "w", "=r", 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src, ret, "l", "k", "=r", 4); __uaccess_end(); return ret; case 8: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 8); __uaccess_end(); return ret; case 10: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 10); if (likely(!ret)) @@ -89,7 +89,7 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size) __uaccess_end(); return ret; case 16: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 16); if (likely(!ret)) diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index de3436719e26..7add8ba06887 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -331,7 +331,7 @@ do { \ unsigned long __copy_user_ll(void *to, const void *from, unsigned long n) { - __uaccess_begin(); + __uaccess_begin_nospec(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else @@ -344,7 +344,7 @@ EXPORT_SYMBOL(__copy_user_ll); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { - __uaccess_begin(); + __uaccess_begin_nospec(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) n = __copy_user_intel_nocache(to, from, n); From edaf1538d3a5620862544a20aab14cb601787e1a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:54 -0800 Subject: [PATCH 200/770] x86/get_user: Use pointer masking to limit speculation commit c7f631cb07e7da06ac1d231ca178452339e32a94 Quoting Linus: I do think that it would be a good idea to very expressly document the fact that it's not that the user access itself is unsafe. I do agree that things like "get_user()" want to be protected, but not because of any direct bugs or problems with get_user() and friends, but simply because get_user() is an excellent source of a pointer that is obviously controlled from a potentially attacking user space. So it's a prime candidate for then finding _subsequent_ accesses that can then be used to perturb the cache. Unlike the __get_user() case get_user() includes the address limit check near the pointer de-reference. With that locality the speculation can be mitigated with pointer narrowing rather than a barrier, i.e. array_index_nospec(). Where the narrowing is performed by: cmp %limit, %ptr sbb %mask, %mask and %mask, %ptr With respect to speculation the value of %ptr is either less than %limit or NULL. Co-developed-by: Linus Torvalds Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Kees Cook Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: Andy Lutomirski Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727417469.33451.11804043010080838495.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/getuser.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index c97d935a29e8..49b167f73215 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -40,6 +40,8 @@ ENTRY(__get_user_1) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 1: movzbl (%_ASM_AX),%edx xor %eax,%eax @@ -54,6 +56,8 @@ ENTRY(__get_user_2) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 2: movzwl -1(%_ASM_AX),%edx xor %eax,%eax @@ -68,6 +72,8 @@ ENTRY(__get_user_4) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 3: movl -3(%_ASM_AX),%edx xor %eax,%eax @@ -83,6 +89,8 @@ ENTRY(__get_user_8) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 4: movq -7(%_ASM_AX),%rdx xor %eax,%eax @@ -94,6 +102,8 @@ ENTRY(__get_user_8) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user_8 + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 4: movl -7(%_ASM_AX),%edx 5: movl -3(%_ASM_AX),%ecx From 0035134041207f990e0756e2a6f63b7dc3bfe95b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:59 -0800 Subject: [PATCH 201/770] x86/syscall: Sanitize syscall table de-references under speculation commit 2fbd7af5af8665d18bcefae3e9700be07e22b681 The syscall table base is a user controlled function pointer in kernel space. Use array_index_nospec() to prevent any out of bounds speculation. While retpoline prevents speculating into a userspace directed target it does not stop the pointer de-reference, the concern is leaking memory relative to the syscall table base, by observing instruction cache behavior. Reported-by: Linus Torvalds Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Andy Lutomirski Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727417984.33451.1216731042505722161.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 4858911eea9d..60e21ccfb6d6 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -284,7 +285,8 @@ __visible void do_syscall_64(struct pt_regs *regs) * regs->orig_ax, which changes the behavior of some syscalls. */ if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { - regs->ax = sys_call_table[nr & __SYSCALL_MASK]( + nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls); + regs->ax = sys_call_table[nr]( regs->di, regs->si, regs->dx, regs->r10, regs->r8, regs->r9); } @@ -320,6 +322,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) } if (likely(nr < IA32_NR_syscalls)) { + nr = array_index_nospec(nr, IA32_NR_syscalls); /* * It's possible that a 32-bit syscall implementation * takes a 64-bit parameter but nonetheless assumes that From 98116c32d3b4b60bc1add46a81ed4f991ef02a7d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:03:05 -0800 Subject: [PATCH 202/770] vfs, fdtable: Prevent bounds-check bypass via speculative execution commit 56c30ba7b348b90484969054d561f711ba196507 'fd' is a user controlled value that is used as a data dependency to read from the 'fdt->fd' array. In order to avoid potential leaks of kernel memory values, block speculative execution of the instruction stream that could issue reads based on an invalid 'file *' returned from __fcheck_files. Co-developed-by: Elena Reshetova Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727418500.33451.17392199002892248656.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fdtable.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 1c65817673db..41615f38bcff 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -82,8 +83,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i { struct fdtable *fdt = rcu_dereference_raw(files->fdt); - if (fd < fdt->max_fds) + if (fd < fdt->max_fds) { + fd = array_index_nospec(fd, fdt->max_fds); return rcu_dereference_raw(fdt->fd[fd]); + } return NULL; } From c9daf8144642b9daad69ab50e9cba5ecea2c8e25 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:03:15 -0800 Subject: [PATCH 203/770] nl80211: Sanitize array index in parse_txq_params commit 259d8c1e984318497c84eef547bbb6b1d9f4eb05 Wireless drivers rely on parse_txq_params to validate that txq_params->ac is less than NL80211_NUM_ACS by the time the low-level driver's ->conf_tx() handler is called. Use a new helper, array_index_nospec(), to sanitize txq_params->ac with respect to speculation. I.e. ensure that any speculation into ->conf_tx() handlers is done with a value of txq_params->ac that is within the bounds of [0, NL80211_NUM_ACS). Reported-by: Christian Lamparter Reported-by: Elena Reshetova Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Acked-by: Johannes Berg Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: linux-wireless@vger.kernel.org Cc: torvalds@linux-foundation.org Cc: "David S. Miller" Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727419584.33451.7700736761686184303.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d396cb61a280..81bef0676e1d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -2056,20 +2057,22 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { static int parse_txq_params(struct nlattr *tb[], struct ieee80211_txq_params *txq_params) { + u8 ac; + if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] || !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || !tb[NL80211_TXQ_ATTR_AIFS]) return -EINVAL; - txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); + ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); - if (txq_params->ac >= NL80211_NUM_ACS) + if (ac >= NL80211_NUM_ACS) return -EINVAL; - + txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS); return 0; } From 863b308dbb1945949529a0e523c34636430884a7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:03:21 -0800 Subject: [PATCH 204/770] x86/spectre: Report get_user mitigation for spectre_v1 commit edfbae53dab8348fca778531be9f4855d2ca0360 Reflect the presence of get_user(), __get_user(), and 'syscall' protections in sysfs. The expectation is that new and better tooling will allow the kernel to grow more usages of array_index_nospec(), for now, only claim mitigation for __user pointer de-references. Reported-by: Jiri Slaby Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727420158.33451.11658324346540434635.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 400c34ec9179..fdb6068a23b5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -297,7 +297,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev, { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Vulnerable\n"); + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } ssize_t cpu_show_spectre_v2(struct device *dev, From c962dfa4aca010e05730ff458210b04c4eb9af14 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 30 Jan 2018 19:32:18 +0000 Subject: [PATCH 205/770] x86/spectre: Fix spelling mistake: "vunerable"-> "vulnerable" commit e698dcdfcda41efd0984de539767b4cddd235f1e Trivial fix to spelling mistake in pr_err error message text. Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Cc: Andi Kleen Cc: Greg Kroah-Hartman Cc: kernel-janitors@vger.kernel.org Cc: Andy Lutomirski Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180130193218.9271-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fdb6068a23b5..dd3a3cc84591 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -103,7 +103,7 @@ bool retpoline_module_ok(bool has_retpoline) if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) return true; - pr_err("System may be vunerable to spectre v2\n"); + pr_err("System may be vulnerable to spectre v2\n"); spectre_v2_bad_module = true; return false; } From 85543d7613c481392d564a4956850591b2025f5d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 30 Jan 2018 14:30:23 +0000 Subject: [PATCH 206/770] x86/cpuid: Fix up "virtual" IBRS/IBPB/STIBP feature bits on Intel commit 7fcae1118f5fd44a862aa5c3525248e35ee67c3b Despite the fact that all the other code there seems to be doing it, just using set_cpu_cap() in early_intel_init() doesn't actually work. For CPUs with PKU support, setup_pku() calls get_cpu_cap() after c->c_init() has set those feature bits. That resets those bits back to what was queried from the hardware. Turning the bits off for bad microcode is easy to fix. That can just use setup_clear_cpu_cap() to force them off for all CPUs. I was less keen on forcing the feature bits *on* that way, just in case of inconsistencies. I appreciate that the kernel is going to get this utterly wrong if CPU features are not consistent, because it has already applied alternatives by the time secondary CPUs are brought up. But at least if setup_force_cpu_cap() isn't being used, we might have a chance of *detecting* the lack of the corresponding bit and either panicking or refusing to bring the offending CPU online. So ensure that the appropriate feature bits are set within get_cpu_cap() regardless of how many extra times it's called. Fixes: 2961298e ("x86/cpufeatures: Clean up Spectre v2 related CPUID flags") Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: karahmed@amazon.de Cc: peterz@infradead.org Cc: bp@alien8.de Link: https://lkml.kernel.org/r/1517322623-15261-1-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 21 +++++++++++++++++++++ arch/x86/kernel/cpu/intel.c | 27 ++++++++------------------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 970ee06dc8aa..f9d8460d34a1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -726,6 +726,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) } } +static void init_speculation_control(struct cpuinfo_x86 *c) +{ + /* + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, + * and they also have a different bit for STIBP support. Also, + * a hypervisor might have set the individual AMD bits even on + * Intel CPUs, for finer-grained selection of what's available. + * + * We use the AMD bits in 0x8000_0008 EBX as the generic hardware + * features, which are visible in /proc/cpuinfo and used by the + * kernel. So set those accordingly from the Intel bits. + */ + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_IBPB); + } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -820,6 +840,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); init_scattered_cpuid_features(c); + init_speculation_control(c); /* * Clear/Set all flags overridden by options, after probe. diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0c8b916abced..4cf4f8cbc69d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -175,28 +175,17 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) c->microcode = intel_get_microcode_revision(); - /* - * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, - * and they also have a different bit for STIBP support. Also, - * a hypervisor might have set the individual AMD bits even on - * Intel CPUs, for finer-grained selection of what's available. - */ - if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { - set_cpu_cap(c, X86_FEATURE_IBRS); - set_cpu_cap(c, X86_FEATURE_IBPB); - } - if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) - set_cpu_cap(c, X86_FEATURE_STIBP); - /* Now if any of them are set, check the blacklist and clear the lot */ - if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_INTEL_STIBP) || + cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); - clear_cpu_cap(c, X86_FEATURE_IBRS); - clear_cpu_cap(c, X86_FEATURE_IBPB); - clear_cpu_cap(c, X86_FEATURE_STIBP); - clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); - clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); + setup_clear_cpu_cap(X86_FEATURE_IBRS); + setup_clear_cpu_cap(X86_FEATURE_IBPB); + setup_clear_cpu_cap(X86_FEATURE_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); } /* From 7a3f12294da45e779b313b33077f04b87782985b Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Mon, 29 Jan 2018 22:04:47 +0000 Subject: [PATCH 207/770] x86/speculation: Use Indirect Branch Prediction Barrier in context switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 18bf3c3ea8ece8f03b6fc58508f2dfd23c7711c7 Flush indirect branches when switching into a process that marked itself non dumpable. This protects high value processes like gpg better, without having too high performance overhead. If done naïvely, we could switch to a kernel idle thread and then back to the original process, such as: process A -> idle -> process A In such scenario, we do not have to do IBPB here even though the process is non-dumpable, as we are switching back to the same process after a hiatus. To avoid the redundant IBPB, which is expensive, we track the last mm user context ID. The cost is to have an extra u64 mm context id to track the last mm we were using before switching to the init_mm used by idle. Avoiding the extra IBPB is probably worth the extra memory for this common scenario. For those cases where tlb_defer_switch_to_init_mm() returns true (non PCID), lazy tlb will defer switch to init_mm, so we will not be changing the mm for the process A -> idle -> process A switch. So IBPB will be skipped for this case. Thanks to the reviewers and Andy Lutomirski for the suggestion of using ctx_id which got rid of the problem of mm pointer recycling. Signed-off-by: Tim Chen Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: linux@dominikbrodowski.net Cc: peterz@infradead.org Cc: bp@alien8.de Cc: luto@kernel.org Cc: pbonzini@redhat.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1517263487-3708-1-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/tlbflush.h | 2 ++ arch/x86/mm/tlb.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 3effd3c994af..4405c4b308e8 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -174,6 +174,8 @@ struct tlb_state { struct mm_struct *loaded_mm; u16 loaded_mm_asid; u16 next_asid; + /* last user mm's ctx id */ + u64 last_ctx_id; /* * We can be in one of several states: diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5bfe61a5e8e3..012d02624848 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -6,13 +6,14 @@ #include #include #include +#include #include #include +#include #include #include #include -#include /* * TLB flushing, formerly SMP-only @@ -247,6 +248,27 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, } else { u16 new_asid; bool need_flush; + u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); + + /* + * Avoid user/user BTB poisoning by flushing the branch + * predictor when switching between processes. This stops + * one process from doing Spectre-v2 attacks on another. + * + * As an optimization, flush indirect branches only when + * switching into processes that disable dumping. This + * protects high value processes like gpg, without having + * too high performance overhead. IBPB is *expensive*! + * + * This will not flush branches when switching into kernel + * threads. It will also not flush if we switch to idle + * thread and back to the same process. It will flush if we + * switch to a different non-dumpable process. + */ + if (tsk && tsk->mm && + tsk->mm->context.ctx_id != last_ctx_id && + get_dumpable(tsk->mm) != SUID_DUMP_USER) + indirect_branch_prediction_barrier(); if (IS_ENABLED(CONFIG_VMAP_STACK)) { /* @@ -292,6 +314,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); } + /* + * Record last user mm's context id, so we can avoid + * flushing branch buffer with IBPB if we switch back + * to the same user. + */ + if (next != &init_mm) + this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); + this_cpu_write(cpu_tlbstate.loaded_mm, next); this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); } @@ -369,6 +399,7 @@ void initialize_tlbstate_and_flush(void) write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ + this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); this_cpu_write(cpu_tlbstate.next_asid, 1); this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); From ad368e5b2d56ed059ebc8343cd7e713f3a7717d4 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 30 Jan 2018 22:13:33 -0600 Subject: [PATCH 208/770] x86/paravirt: Remove 'noreplace-paravirt' cmdline option commit 12c69f1e94c89d40696e83804dd2f0965b5250cd The 'noreplace-paravirt' option disables paravirt patching, leaving the original pv indirect calls in place. That's highly incompatible with retpolines, unless we want to uglify paravirt even further and convert the paravirt calls to retpolines. As far as I can tell, the option doesn't seem to be useful for much other than introducing surprising corner cases and making the kernel vulnerable to Spectre v2. It was probably a debug option from the early paravirt days. So just remove it. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: Andrea Arcangeli Cc: Peter Zijlstra Cc: Andi Kleen Cc: Ashok Raj Cc: Greg KH Cc: Jun Nakajima Cc: Tim Chen Cc: Rusty Russell Cc: Dave Hansen Cc: Asit Mallick Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jason Baron Cc: Paolo Bonzini Cc: Alok Kataria Cc: Arjan Van De Ven Cc: David Woodhouse Cc: Dan Williams Link: https://lkml.kernel.org/r/20180131041333.2x6blhxirc2kclrq@treble Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 2 -- arch/x86/kernel/alternative.c | 14 -------------- 2 files changed, 16 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 8122b5f98ea1..c76afdcafbef 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2718,8 +2718,6 @@ norandmaps Don't use address space randomization. Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space - noreplace-paravirt [X86,IA-64,PV_OPS] Don't patch paravirt_ops - noreplace-smp [X86-32,SMP] Don't replace SMP instructions with UP alternatives diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 14a52c7d23d4..21be0193d9dc 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str) } __setup("noreplace-smp", setup_noreplace_smp); -#ifdef CONFIG_PARAVIRT -static int __initdata_or_module noreplace_paravirt = 0; - -static int __init setup_noreplace_paravirt(char *str) -{ - noreplace_paravirt = 1; - return 1; -} -__setup("noreplace-paravirt", setup_noreplace_paravirt); -#endif - #define DPRINTK(fmt, args...) \ do { \ if (debug_alternative) \ @@ -599,9 +588,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, struct paravirt_patch_site *p; char insnbuf[MAX_PATCH_LEN]; - if (noreplace_paravirt) - return; - for (p = start; p < end; p++) { unsigned int used; From 7f8da2c8a191f274600bafcf6e58f54e16d5c8de Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 16 Jan 2018 16:51:18 +0100 Subject: [PATCH 209/770] KVM: VMX: make MSR bitmaps per-VCPU commit 904e14fb7cb96401a7dc803ca2863fd5ba32ffe6 Place the MSR bitmap in struct loaded_vmcs, and update it in place every time the x2apic or APICv state can change. This is rare and the loop can handle 64 MSRs per iteration, in a similar fashion as nested_vmx_prepare_msr_bitmap. This prepares for choosing, on a per-VM basis, whether to intercept the SPEC_CTRL and PRED_CMD MSRs. Cc: stable@vger.kernel.org # prereq for Spectre mitigation Suggested-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 270 ++++++++++++++++++++++++--------------------- 1 file changed, 147 insertions(+), 123 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bca85821b0f0..ab66374e8c82 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -108,6 +108,14 @@ static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +#define MSR_TYPE_R 1 +#define MSR_TYPE_W 2 +#define MSR_TYPE_RW 3 + +#define MSR_BITMAP_MODE_X2APIC 1 +#define MSR_BITMAP_MODE_X2APIC_APICV 2 +#define MSR_BITMAP_MODE_LM 4 + #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ @@ -206,6 +214,7 @@ struct loaded_vmcs { int soft_vnmi_blocked; ktime_t entry_time; s64 vnmi_blocked_time; + unsigned long *msr_bitmap; struct list_head loaded_vmcss_on_cpu_link; }; @@ -446,8 +455,6 @@ struct nested_vmx { bool pi_pending; u16 posted_intr_nv; - unsigned long *msr_bitmap; - struct hrtimer preemption_timer; bool preemption_timer_expired; @@ -562,6 +569,7 @@ struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; u8 fail; + u8 msr_bitmap_mode; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -919,6 +927,7 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -938,12 +947,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); enum { VMX_IO_BITMAP_A, VMX_IO_BITMAP_B, - VMX_MSR_BITMAP_LEGACY, - VMX_MSR_BITMAP_LONGMODE, - VMX_MSR_BITMAP_LEGACY_X2APIC_APICV, - VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV, - VMX_MSR_BITMAP_LEGACY_X2APIC, - VMX_MSR_BITMAP_LONGMODE_X2APIC, VMX_VMREAD_BITMAP, VMX_VMWRITE_BITMAP, VMX_BITMAP_NR @@ -953,12 +956,6 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR]; #define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A]) #define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B]) -#define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY]) -#define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE]) -#define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV]) -#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV]) -#define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC]) -#define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC]) #define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) #define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) @@ -2559,36 +2556,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) vmx->guest_msrs[from] = tmp; } -static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) -{ - unsigned long *msr_bitmap; - - if (is_guest_mode(vcpu)) - msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; - else if (cpu_has_secondary_exec_ctrls() && - (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv; - else - msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv; - } else { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode_x2apic; - else - msr_bitmap = vmx_msr_bitmap_legacy_x2apic; - } - } else { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode; - else - msr_bitmap = vmx_msr_bitmap_legacy; - } - - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); -} - /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy @@ -2629,7 +2596,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx->save_nmsrs = save_nmsrs; if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(&vmx->vcpu); + vmx_update_msr_bitmap(&vmx->vcpu); } /* @@ -3829,6 +3796,8 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; + if (loaded_vmcs->msr_bitmap) + free_page((unsigned long)loaded_vmcs->msr_bitmap); WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } @@ -3845,7 +3814,18 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs->shadow_vmcs = NULL; loaded_vmcs_init(loaded_vmcs); + + if (cpu_has_vmx_msr_bitmap()) { + loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!loaded_vmcs->msr_bitmap) + goto out_vmcs; + memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); + } return 0; + +out_vmcs: + free_loaded_vmcs(loaded_vmcs); + return -ENOMEM; } static void free_kvm_area(void) @@ -4920,10 +4900,8 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } -#define MSR_TYPE_R 1 -#define MSR_TYPE_W 2 -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4957,6 +4935,50 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, } } +static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) +{ + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return; + + /* + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals + * have the write-low and read-high bitmap offsets the wrong way round. + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. + */ + if (msr <= 0x1fff) { + if (type & MSR_TYPE_R) + /* read-low */ + __set_bit(msr, msr_bitmap + 0x000 / f); + + if (type & MSR_TYPE_W) + /* write-low */ + __set_bit(msr, msr_bitmap + 0x800 / f); + + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + if (type & MSR_TYPE_R) + /* read-high */ + __set_bit(msr, msr_bitmap + 0x400 / f); + + if (type & MSR_TYPE_W) + /* write-high */ + __set_bit(msr, msr_bitmap + 0xc00 / f); + + } +} + +static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type, bool value) +{ + if (value) + vmx_enable_intercept_for_msr(msr_bitmap, msr, type); + else + vmx_disable_intercept_for_msr(msr_bitmap, msr, type); +} + /* * If a msr is allowed by L0, we should check whether it is allowed by L1. * The corresponding bit will be cleared unless both of L0 and L1 allow it. @@ -5003,28 +5025,68 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, } } -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) +static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) { - if (!longmode_only) - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, - msr, MSR_TYPE_R | MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, - msr, MSR_TYPE_R | MSR_TYPE_W); + u8 mode = 0; + + if (cpu_has_secondary_exec_ctrls() && + (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { + mode |= MSR_BITMAP_MODE_X2APIC; + if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) + mode |= MSR_BITMAP_MODE_X2APIC_APICV; + } + + if (is_long_mode(vcpu)) + mode |= MSR_BITMAP_MODE_LM; + + return mode; } -static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active) +#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) + +static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, + u8 mode) { - if (apicv_active) { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv, - msr, type); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv, - msr, type); - } else { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, type); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, type); + int msr; + + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; } + + if (mode & MSR_BITMAP_MODE_X2APIC) { + /* + * TPR reads and writes can be virtualized even if virtual interrupt + * delivery is not in use. + */ + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); + if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); + } + } +} + +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; + u8 mode = vmx_msr_bitmap_mode(vcpu); + u8 changed = mode ^ vmx->msr_bitmap_mode; + + if (!changed) + return; + + vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, + !(mode & MSR_BITMAP_MODE_LM)); + + if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) + vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); + + vmx->msr_bitmap_mode = mode; } static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu) @@ -5272,7 +5334,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) } if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); } static u32 vmx_exec_control(struct vcpu_vmx *vmx) @@ -5459,7 +5521,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); } if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ @@ -6742,7 +6804,7 @@ void vmx_enable_tdp(void) static __init int hardware_setup(void) { - int r = -ENOMEM, i, msr; + int r = -ENOMEM, i; rdmsrl_safe(MSR_EFER, &host_efer); @@ -6763,9 +6825,6 @@ static __init int hardware_setup(void) memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); - if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; goto out; @@ -6828,42 +6887,8 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - - memcpy(vmx_msr_bitmap_legacy_x2apic_apicv, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic_apicv, - vmx_msr_bitmap_longmode, PAGE_SIZE); - memcpy(vmx_msr_bitmap_legacy_x2apic, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic, - vmx_msr_bitmap_longmode, PAGE_SIZE); - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - for (msr = 0x800; msr <= 0x8ff; msr++) { - if (msr == 0x839 /* TMCCT */) - continue; - vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true); - } - - /* - * TPR reads and writes can be virtualized even if virtual interrupt - * delivery is not in use. - */ - vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true); - vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false); - - /* EOI */ - vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true); - /* SELF-IPI */ - vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true); - if (enable_ept) vmx_enable_tdp(); else @@ -7152,13 +7177,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) if (r < 0) goto out_vmcs02; - if (cpu_has_vmx_msr_bitmap()) { - vmx->nested.msr_bitmap = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx->nested.msr_bitmap) - goto out_msr_bitmap; - } - vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_vmcs12) goto out_cached_vmcs12; @@ -7185,9 +7203,6 @@ out_shadow_vmcs: kfree(vmx->nested.cached_vmcs12); out_cached_vmcs12: - free_page((unsigned long)vmx->nested.msr_bitmap); - -out_msr_bitmap: free_loaded_vmcs(&vmx->nested.vmcs02); out_vmcs02: @@ -7332,10 +7347,6 @@ static void free_nested(struct vcpu_vmx *vmx) free_vpid(vmx->nested.vpid02); vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; - if (vmx->nested.msr_bitmap) { - free_page((unsigned long)vmx->nested.msr_bitmap); - vmx->nested.msr_bitmap = NULL; - } if (enable_shadow_vmcs) { vmx_disable_shadow_vmcs(vmx); vmcs_clear(vmx->vmcs01.shadow_vmcs); @@ -8851,7 +8862,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); } static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) @@ -9513,6 +9524,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + unsigned long *msr_bitmap; int cpu; if (!vmx) @@ -9549,6 +9561,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (err < 0) goto free_msrs; + msr_bitmap = vmx->vmcs01.msr_bitmap; + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + vmx->msr_bitmap_mode = 0; + vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); @@ -10018,7 +10039,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, int msr; struct page *page; unsigned long *msr_bitmap_l1; - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; /* This shortcut is ok because we support only x2APIC MSRs so far. */ if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) @@ -10595,6 +10616,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); + if (cpu_has_vmx_msr_bitmap()) + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); + if (enable_vpid) { /* * There is no direct mapping between vpid02 and vpid12, the @@ -11388,7 +11412,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmcs_write64(GUEST_IA32_DEBUGCTL, 0); if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) From 6f6eb84b14aca572ebe50984be94e036075e6e2b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 31 Jan 2018 17:47:03 -0800 Subject: [PATCH 210/770] x86/kvm: Update spectre-v1 mitigation commit 085331dfc6bbe3501fb936e657331ca943827600 Commit 75f139aaf896 "KVM: x86: Add memory barrier on vmcs field lookup" added a raw 'asm("lfence");' to prevent a bounds check bypass of 'vmcs_field_to_offset_table'. The lfence can be avoided in this path by using the array_index_nospec() helper designed for these types of fixes. Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Acked-by: Paolo Bonzini Cc: Andrew Honig Cc: kvm@vger.kernel.org Cc: Jim Mattson Link: https://lkml.kernel.org/r/151744959670.6342.3001723920950249067.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ab66374e8c82..93e25e676800 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "kvm_cache_regs.h" #include "x86.h" @@ -887,21 +888,18 @@ static const unsigned short vmcs_field_to_offset_table[] = { static inline short vmcs_field_to_offset(unsigned long field) { - BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); + const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table); + unsigned short offset; - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) + BUILD_BUG_ON(size > SHRT_MAX); + if (field >= size) return -ENOENT; - /* - * FIXME: Mitigation for CVE-2017-5753. To be replaced with a - * generic mechanism. - */ - asm("lfence"); - - if (vmcs_field_to_offset_table[field] == 0) + field = array_index_nospec(field, size); + offset = vmcs_field_to_offset_table[field]; + if (offset == 0) return -ENOENT; - - return vmcs_field_to_offset_table[field]; + return offset; } static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) From 838dbae0acd06da5fb0d0173dfddc72ca5650eb0 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 1 Feb 2018 11:27:20 +0000 Subject: [PATCH 211/770] x86/retpoline: Avoid retpolines for built-in __init functions commit 66f793099a636862a71c59d4a6ba91387b155e0c There's no point in building init code with retpolines, since it runs before any potentially hostile userspace does. And before the retpoline is actually ALTERNATIVEd into place, for much of it. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: karahmed@amazon.de Cc: peterz@infradead.org Cc: bp@alien8.de Link: https://lkml.kernel.org/r/1517484441-1420-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- include/linux/init.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/linux/init.h b/include/linux/init.h index f38b993edacb..943139a563e3 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -5,6 +5,13 @@ #include #include +/* Built-in __init functions needn't be compiled with retpoline */ +#if defined(RETPOLINE) && !defined(MODULE) +#define __noretpoline __attribute__((indirect_branch("keep"))) +#else +#define __noretpoline +#endif + /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) * as `initialization' functions. The kernel can take this @@ -40,7 +47,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold __inittrace __latent_entropy +#define __init __section(.init.text) __cold __inittrace __latent_entropy __noretpoline #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) From ebaf2271a024a7000dc83e061349f7bbf2b44281 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 1 Feb 2018 11:27:21 +0000 Subject: [PATCH 212/770] x86/spectre: Simplify spectre_v2 command line parsing commit 9005c6834c0ffdfe46afa76656bd9276cca864f6 [dwmw2: Use ARRAY_SIZE] Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: bp@alien8.de Link: https://lkml.kernel.org/r/1517484441-1420-3-git-send-email-dwmw@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 84 +++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index dd3a3cc84591..71949bf2de5a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -119,13 +119,13 @@ static inline const char *spectre_v2_module_string(void) { return ""; } static void __init spec2_print_if_insecure(const char *reason) { if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s\n", reason); + pr_info("%s selected on command line.\n", reason); } static void __init spec2_print_if_secure(const char *reason) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s\n", reason); + pr_info("%s selected on command line.\n", reason); } static inline bool retp_compiler(void) @@ -140,42 +140,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) return len == arglen && !strncmp(arg, opt, len); } +static const struct { + const char *option; + enum spectre_v2_mitigation_cmd cmd; + bool secure; +} mitigation_options[] = { + { "off", SPECTRE_V2_CMD_NONE, false }, + { "on", SPECTRE_V2_CMD_FORCE, true }, + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, +}; + static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) { char arg[20]; - int ret; + int ret, i; + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, - sizeof(arg)); - if (ret > 0) { - if (match_option(arg, ret, "off")) { - goto disable; - } else if (match_option(arg, ret, "on")) { - spec2_print_if_secure("force enabled on command line."); - return SPECTRE_V2_CMD_FORCE; - } else if (match_option(arg, ret, "retpoline")) { - spec2_print_if_insecure("retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE; - } else if (match_option(arg, ret, "retpoline,amd")) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { - pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); - return SPECTRE_V2_CMD_AUTO; - } - spec2_print_if_insecure("AMD retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE_AMD; - } else if (match_option(arg, ret, "retpoline,generic")) { - spec2_print_if_insecure("generic retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE_GENERIC; - } else if (match_option(arg, ret, "auto")) { + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + return SPECTRE_V2_CMD_NONE; + else { + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, + sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { + if (!match_option(arg, ret, mitigation_options[i].option)) + continue; + cmd = mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } } - if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + if ((cmd == SPECTRE_V2_CMD_RETPOLINE || + cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && + !IS_ENABLED(CONFIG_RETPOLINE)) { + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; -disable: - spec2_print_if_insecure("disabled on command line."); - return SPECTRE_V2_CMD_NONE; + } + + if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); + return SPECTRE_V2_CMD_AUTO; + } + + if (mitigation_options[i].secure) + spec2_print_if_secure(mitigation_options[i].option); + else + spec2_print_if_insecure(mitigation_options[i].option); + + return cmd; } /* Check for Skylake-like CPUs (for RSB handling) */ From 6dd1f6989a545446661d0cbecf8748c8d2093549 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Feb 2018 22:39:23 +0100 Subject: [PATCH 213/770] x86/pti: Mark constant arrays as __initconst commit 4bf5d56d429cbc96c23d809a08f63cd29e1a702e I'm seeing build failures from the two newly introduced arrays that are marked 'const' and '__initdata', which are mutually exclusive: arch/x86/kernel/cpu/common.c:882:43: error: 'cpu_no_speculation' causes a section type conflict with 'e820_table_firmware_init' arch/x86/kernel/cpu/common.c:895:43: error: 'cpu_no_meltdown' causes a section type conflict with 'e820_table_firmware_init' The correct annotation is __initconst. Fixes: fec9434a12f3 ("x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Cc: Ricardo Neri Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Thomas Garnier Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180202213959.611210-1-arnd@arndb.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f9d8460d34a1..92b66e21bae5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -876,7 +876,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } -static const __initdata struct x86_cpu_id cpu_no_speculation[] = { +static const __initconst struct x86_cpu_id cpu_no_speculation[] = { { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, @@ -889,7 +889,7 @@ static const __initdata struct x86_cpu_id cpu_no_speculation[] = { {} }; -static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { +static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { { X86_VENDOR_AMD }, {} }; From 4c8298c1fdd3317658a3dcd055b19a4c60df947d Mon Sep 17 00:00:00 2001 From: Darren Kenny Date: Fri, 2 Feb 2018 19:12:20 +0000 Subject: [PATCH 214/770] x86/speculation: Fix typo IBRS_ATT, which should be IBRS_ALL commit af189c95a371b59f493dbe0f50c0a09724868881 Fixes: 117cc7a908c83 ("x86/retpoline: Fill return stack buffer on vmexit") Signed-off-by: Darren Kenny Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Cc: Tom Lendacky Cc: Andi Kleen Cc: Borislav Petkov Cc: Masami Hiramatsu Cc: Arjan van de Ven Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180202191220.blvgkgutojecxr3b@starbug-vm.ie.oracle.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d15d471348b8..4d57894635f2 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -150,7 +150,7 @@ extern char __indirect_thunk_end[]; * On VMEXIT we must ensure that no RSB predictions learned in the guest * can be followed in the host, by overwriting the RSB completely. Both * retpoline and IBRS mitigations for Spectre v2 need this; only on future - * CPUs with IBRS_ATT *might* it be avoided. + * CPUs with IBRS_ALL *might* it be avoided. */ static inline void vmexit_fill_RSB(void) { From 8f7c4d52a0e82854e5ccc15a2b53d3ab3efe0452 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 1 Feb 2018 22:59:42 +0100 Subject: [PATCH 215/770] KVM/x86: Update the reverse_cpuid list to include CPUID_7_EDX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b7b27aa011a1df42728d1768fc181d9ce69e6911 [dwmw2: Stop using KF() for bits in it, too] Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Paolo Bonzini Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Jim Mattson Cc: kvm@vger.kernel.org Cc: Radim Krčmář Link: https://lkml.kernel.org/r/1517522386-18410-2-git-send-email-karahmed@amazon.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 8 +++----- arch/x86/kvm/cpuid.h | 1 + 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0099e10eb045..c0eb337e7878 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -67,9 +67,7 @@ u64 kvm_supported_xcr0(void) #define F(x) bit(X86_FEATURE_##x) -/* These are scattered features in cpufeatures.h. */ -#define KVM_CPUID_BIT_AVX512_4VNNIW 2 -#define KVM_CPUID_BIT_AVX512_4FMAPS 3 +/* For scattered features from cpufeatures.h; we currently expose none */ #define KF(x) bit(KVM_CPUID_BIT_##x) int kvm_update_cpuid(struct kvm_vcpu *vcpu) @@ -392,7 +390,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); + F(AVX512_4VNNIW) | F(AVX512_4FMAPS); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -477,7 +475,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); entry->edx &= kvm_cpuid_7_0_edx_x86_features; - entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX); + cpuid_mask(&entry->edx, CPUID_7_EDX); } else { entry->ebx = 0; entry->ecx = 0; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index c2cea6651279..9a327d5b6d1f 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, }; static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) From d395d69de67ea95760e1f207eb0f6fdfbcb6e069 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Thu, 1 Feb 2018 22:59:43 +0100 Subject: [PATCH 216/770] KVM/x86: Add IBPB support commit 15d45071523d89b3fb7372e2135fbd72f6af9506 The Indirect Branch Predictor Barrier (IBPB) is an indirect branch control mechanism. It keeps earlier branches from influencing later ones. Unlike IBRS and STIBP, IBPB does not define a new mode of operation. It's a command that ensures predicted branch targets aren't used after the barrier. Although IBRS and IBPB are enumerated by the same CPUID enumeration, IBPB is very different. IBPB helps mitigate against three potential attacks: * Mitigate guests from being attacked by other guests. - This is addressed by issing IBPB when we do a guest switch. * Mitigate attacks from guest/ring3->host/ring3. These would require a IBPB during context switch in host, or after VMEXIT. The host process has two ways to mitigate - Either it can be compiled with retpoline - If its going through context switch, and has set !dumpable then there is a IBPB in that path. (Tim's patch: https://patchwork.kernel.org/patch/10192871) - The case where after a VMEXIT you return back to Qemu might make Qemu attackable from guest when Qemu isn't compiled with retpoline. There are issues reported when doing IBPB on every VMEXIT that resulted in some tsc calibration woes in guest. * Mitigate guest/ring0->host/ring0 attacks. When host kernel is using retpoline it is safe against these attacks. If host kernel isn't using retpoline we might need to do a IBPB flush on every VMEXIT. Even when using retpoline for indirect calls, in certain conditions 'ret' can use the BTB on Skylake-era CPUs. There are other mitigations available like RSB stuffing/clearing. * IBPB is issued only for SVM during svm_free_vcpu(). VMX has a vmclear and SVM doesn't. Follow discussion here: https://lkml.org/lkml/2018/1/15/146 Please refer to the following spec for more details on the enumeration and control. Refer here to get documentation about mitigations. https://software.intel.com/en-us/side-channel-security-support [peterz: rebase and changelog rewrite] [karahmed: - rebase - vmx: expose PRED_CMD if guest has it in CPUID - svm: only pass through IBPB if guest has it in CPUID - vmx: support !cpu_has_vmx_msr_bitmap()] - vmx: support nested] [dwmw2: Expose CPUID bit too (AMD IBPB only for now as we lack IBRS) PRED_CMD is a write-only MSR] Signed-off-by: Ashok Raj Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: David Woodhouse Signed-off-by: KarimAllah Ahmed Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Cc: Andrea Arcangeli Cc: Andi Kleen Cc: kvm@vger.kernel.org Cc: Asit Mallick Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Dave Hansen Cc: Arjan Van De Ven Cc: Greg KH Cc: Jun Nakajima Cc: Paolo Bonzini Cc: Dan Williams Cc: Tim Chen Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@intel.com Link: https://lkml.kernel.org/r/1517522386-18410-3-git-send-email-karahmed@amazon.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 11 +++++- arch/x86/kvm/svm.c | 28 ++++++++++++++++ arch/x86/kvm/vmx.c | 80 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 116 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c0eb337e7878..033004de01e6 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -365,6 +365,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); + /* cpuid 0x80000008.ebx */ + const u32 kvm_cpuid_8000_0008_ebx_x86_features = + F(IBPB); + /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | @@ -625,7 +629,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, if (!g_phys_as) g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); - entry->ebx = entry->edx = 0; + entry->edx = 0; + /* IBPB isn't necessarily present in hardware cpuid */ + if (boot_cpu_has(X86_FEATURE_IBPB)) + entry->ebx |= F(IBPB); + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); break; } case 0x80000019: diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6a8284f72328..bc310335baa6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -249,6 +249,7 @@ static const struct svm_direct_access_msrs { { .index = MSR_CSTAR, .always = true }, { .index = MSR_SYSCALL_MASK, .always = true }, #endif + { .index = MSR_IA32_PRED_CMD, .always = false }, { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, @@ -529,6 +530,7 @@ struct svm_cpu_data { struct kvm_ldttss_desc *tss_desc; struct page *save_area; + struct vmcb *current_vmcb; }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); @@ -1706,11 +1708,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); + /* + * The vmcb page can be recycled, causing a false negative in + * svm_vcpu_load(). So do a full IBPB now. + */ + indirect_branch_prediction_barrier(); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int i; if (unlikely(cpu != vcpu->cpu)) { @@ -1739,6 +1747,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (static_cpu_has(X86_FEATURE_RDTSCP)) wrmsrl(MSR_TSC_AUX, svm->tsc_aux); + if (sd->current_vmcb != svm->vmcb) { + sd->current_vmcb = svm->vmcb; + indirect_branch_prediction_barrier(); + } avic_vcpu_load(vcpu, cpu); } @@ -3670,6 +3682,22 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr); break; + case MSR_IA32_PRED_CMD: + if (!msr->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + if (is_guest_mode(vcpu)) + break; + set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); + break; case MSR_STAR: svm->vmcb->save.star = data; break; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 93e25e676800..196f324eb23a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -582,6 +582,7 @@ struct vcpu_vmx { u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; #endif + u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; u32 secondary_exec_control; @@ -926,6 +927,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -1900,6 +1903,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) vmcs_write32(EXCEPTION_BITMAP, eb); } +/* + * Check if MSR is intercepted for L01 MSR bitmap. + */ +static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit) { @@ -2278,6 +2304,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; vmcs_load(vmx->loaded_vmcs->vmcs); + indirect_branch_prediction_barrier(); } if (!already_loaded) { @@ -3337,6 +3364,34 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr_info); break; + case MSR_IA32_PRED_CMD: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, + MSR_TYPE_W); + break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -10038,9 +10093,23 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, struct page *page; unsigned long *msr_bitmap_l1; unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; + /* + * pred_cmd is trying to verify two things: + * + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This + * ensures that we do not accidentally generate an L02 MSR bitmap + * from the L12 MSR bitmap that is too permissive. + * 2. That L1 or L2s have actually used the MSR. This avoids + * unnecessarily merging of the bitmap if the MSR is unused. This + * works properly because we only update the L01 MSR bitmap lazily. + * So even if L0 should pass L1 these MSRs, the L01 bitmap is only + * updated to reflect this when L1 (or its L2s) actually write to + * the MSR. + */ + bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); - /* This shortcut is ok because we support only x2APIC MSRs so far. */ - if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) + if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && + !pred_cmd) return false; page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); @@ -10073,6 +10142,13 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, MSR_TYPE_W); } } + + if (pred_cmd) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PRED_CMD, + MSR_TYPE_W); + kunmap(page); kvm_release_page_clean(page); From 0716f551d5a29dc3abb128996aa02269b527fa45 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 1 Feb 2018 22:59:44 +0100 Subject: [PATCH 217/770] KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES commit 28c1c9fabf48d6ad596273a11c46e0d0da3e14cd Intel processors use MSR_IA32_ARCH_CAPABILITIES MSR to indicate RDCL_NO (bit 0) and IBRS_ALL (bit 1). This is a read-only MSR. By default the contents will come directly from the hardware, but user-space can still override it. [dwmw2: The bit in kvm_cpuid_7_0_edx_x86_features can be unconditional] Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Paolo Bonzini Reviewed-by: Darren Kenny Reviewed-by: Jim Mattson Reviewed-by: Konrad Rzeszutek Wilk Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Jun Nakajima Cc: kvm@vger.kernel.org Cc: Dave Hansen Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Asit Mallick Cc: Arjan Van De Ven Cc: Greg KH Cc: Dan Williams Cc: Tim Chen Cc: Ashok Raj Link: https://lkml.kernel.org/r/1517522386-18410-4-git-send-email-karahmed@amazon.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/vmx.c | 15 +++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 033004de01e6..1909635fe09c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -394,7 +394,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - F(AVX512_4VNNIW) | F(AVX512_4FMAPS); + F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 196f324eb23a..a66682ac2d16 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -583,6 +583,8 @@ struct vcpu_vmx { u64 msr_guest_kernel_gs_base; #endif + u64 arch_capabilities; + u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; u32 secondary_exec_control; @@ -3257,6 +3259,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) + return 1; + msr_info->data = to_vmx(vcpu)->arch_capabilities; + break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; @@ -3392,6 +3400,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W); break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + vmx->arch_capabilities = data; + break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -5652,6 +5665,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8c28023a43b1..6fb112071f7e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1006,6 +1006,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, + MSR_IA32_ARCH_CAPABILITIES }; static unsigned num_msrs_to_save; From 4a82531c96a20ab008e7f5a903e85d46a81fd3a3 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 1 Feb 2018 22:59:45 +0100 Subject: [PATCH 218/770] KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL commit d28b387fb74da95d69d2615732f50cceb38e9a4d [ Based on a patch from Ashok Raj ] Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a retpoline+IBPB based approach. To avoid the overhead of saving and restoring the MSR_IA32_SPEC_CTRL for guests that do not actually use the MSR, only start saving and restoring when a non-zero is written to it. No attempt is made to handle STIBP here, intentionally. Filtering STIBP may be added in a future patch, which may require trapping all writes if we don't want to pass it through directly to the guest. [dwmw2: Clean up CPUID bits, save/restore manually, handle reset] Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Darren Kenny Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Jim Mattson Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Jun Nakajima Cc: kvm@vger.kernel.org Cc: Dave Hansen Cc: Tim Chen Cc: Andy Lutomirski Cc: Asit Mallick Cc: Arjan Van De Ven Cc: Greg KH Cc: Paolo Bonzini Cc: Dan Williams Cc: Linus Torvalds Cc: Ashok Raj Link: https://lkml.kernel.org/r/1517522386-18410-5-git-send-email-karahmed@amazon.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 9 ++-- arch/x86/kvm/vmx.c | 105 ++++++++++++++++++++++++++++++++++++++++++- arch/x86/kvm/x86.c | 2 +- 3 files changed, 110 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 1909635fe09c..13f5d4217e4f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -367,7 +367,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(IBPB); + F(IBPB) | F(IBRS); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -394,7 +394,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(ARCH_CAPABILITIES); + F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | + F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -630,9 +631,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; - /* IBPB isn't necessarily present in hardware cpuid */ + /* IBRS and IBPB aren't necessarily present in hardware cpuid */ if (boot_cpu_has(X86_FEATURE_IBPB)) entry->ebx |= F(IBPB); + if (boot_cpu_has(X86_FEATURE_IBRS)) + entry->ebx |= F(IBRS); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); break; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a66682ac2d16..0ae4b1a86168 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -584,6 +584,7 @@ struct vcpu_vmx { #endif u64 arch_capabilities; + u64 spec_ctrl; u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; @@ -1905,6 +1906,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) vmcs_write32(EXCEPTION_BITMAP, eb); } +/* + * Check if MSR is intercepted for currently loaded MSR bitmap. + */ +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + /* * Check if MSR is intercepted for L01 MSR bitmap. */ @@ -3259,6 +3283,14 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + return 1; + + msr_info->data = to_vmx(vcpu)->spec_ctrl; + break; case MSR_IA32_ARCH_CAPABILITIES: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) @@ -3372,6 +3404,37 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr_info); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + return 1; + + vmx->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. We update the vmcs01 here for L1 as well + * since it will end up touching the MSR anyway now. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_RW); + break; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && @@ -5697,6 +5760,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u64 cr0; vmx->rmode.vm86_active = 0; + vmx->spec_ctrl = 0; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(vcpu, 0); @@ -9360,6 +9424,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_arm_hv_timer(vcpu); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + if (vmx->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + vmx->__launched = vmx->loaded_vmcs->launched; asm( /* Store host registers */ @@ -9478,6 +9551,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + + if (vmx->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -10109,7 +10203,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, unsigned long *msr_bitmap_l1; unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; /* - * pred_cmd is trying to verify two things: + * pred_cmd & spec_ctrl are trying to verify two things: * * 1. L0 gave a permission to L1 to actually passthrough the MSR. This * ensures that we do not accidentally generate an L02 MSR bitmap @@ -10122,9 +10216,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, * the MSR. */ bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); + bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && - !pred_cmd) + !pred_cmd && !spec_ctrl) return false; page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); @@ -10158,6 +10253,12 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, } } + if (spec_ctrl) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_R | MSR_TYPE_W); + if (pred_cmd) nested_vmx_disable_intercept_for_msr( msr_bitmap_l1, msr_bitmap_l0, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6fb112071f7e..f97358423f9c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1006,7 +1006,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, - MSR_IA32_ARCH_CAPABILITIES + MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES }; static unsigned num_msrs_to_save; From f6d90612c96c99d98f204c7641becfd7295a6fc2 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Sat, 3 Feb 2018 15:56:23 +0100 Subject: [PATCH 219/770] KVM/SVM: Allow direct access to MSR_IA32_SPEC_CTRL commit b2ac58f90540e39324e7a29a7ad471407ae0bf48 [ Based on a patch from Paolo Bonzini ] ... basically doing exactly what we do for VMX: - Passthrough SPEC_CTRL to guests (if enabled in guest CPUID) - Save and restore SPEC_CTRL around VMExit and VMEntry only if the guest actually used it. Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Darren Kenny Reviewed-by: Konrad Rzeszutek Wilk Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Jun Nakajima Cc: kvm@vger.kernel.org Cc: Dave Hansen Cc: Tim Chen Cc: Andy Lutomirski Cc: Asit Mallick Cc: Arjan Van De Ven Cc: Greg KH Cc: Paolo Bonzini Cc: Dan Williams Cc: Linus Torvalds Cc: Ashok Raj Link: https://lkml.kernel.org/r/1517669783-20732-1-git-send-email-karahmed@amazon.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index bc310335baa6..e0bc3ad0f6cd 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -184,6 +184,8 @@ struct vcpu_svm { u64 gs_base; } host; + u64 spec_ctrl; + u32 *msrpm; ulong nmi_iret_rip; @@ -249,6 +251,7 @@ static const struct svm_direct_access_msrs { { .index = MSR_CSTAR, .always = true }, { .index = MSR_SYSCALL_MASK, .always = true }, #endif + { .index = MSR_IA32_SPEC_CTRL, .always = false }, { .index = MSR_IA32_PRED_CMD, .always = false }, { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, @@ -882,6 +885,25 @@ static bool valid_msr_intercept(u32 index) return false; } +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) +{ + u8 bit_write; + unsigned long tmp; + u32 offset; + u32 *msrpm; + + msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: + to_svm(vcpu)->msrpm; + + offset = svm_msrpm_offset(msr); + bit_write = 2 * (msr & 0x0f) + 1; + tmp = msrpm[offset]; + + BUG_ON(offset == MSR_INVALID); + + return !!test_bit(bit_write, &tmp); +} + static void set_msr_interception(u32 *msrpm, unsigned msr, int read, int write) { @@ -1587,6 +1609,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; + svm->spec_ctrl = 0; + if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; @@ -3591,6 +3615,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_VM_CR: msr_info->data = svm->nested.vm_cr_msr; break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + return 1; + + msr_info->data = svm->spec_ctrl; + break; case MSR_IA32_UCODE_REV: msr_info->data = 0x01000065; break; @@ -3682,6 +3713,33 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr); break; + case MSR_IA32_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + return 1; + + svm->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_svm_vmrun_msrpm. + * We update the L1 MSR bit as well since it will end up + * touching the MSR anyway now. + */ + set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); + break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) @@ -4950,6 +5008,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + if (svm->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + asm volatile ( "push %%" _ASM_BP "; \n\t" "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" @@ -5042,6 +5109,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + + if (svm->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); From a252f37c5f8b998ccd419d1562a3c069890eb1d4 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 11 Jan 2018 18:57:26 +0100 Subject: [PATCH 220/770] serial: core: mark port as initialized after successful IRQ change commit 44117a1d1732c513875d5a163f10d9adbe866c08 upstream. setserial changes the IRQ via uart_set_info(). It invokes uart_shutdown() which free the current used IRQ and clear TTY_PORT_INITIALIZED. It will then update the IRQ number and invoke uart_startup() before returning to the caller leaving TTY_PORT_INITIALIZED cleared. The next open will crash with | list_add double add: new=ffffffff839fcc98, prev=ffffffff839fcc98, next=ffffffff839fcc98. since the close from the IOCTL won't free the IRQ (and clean the list) due to the TTY_PORT_INITIALIZED check in uart_shutdown(). There is same pattern in uart_do_autoconfig() and I *think* it also needs to set TTY_PORT_INITIALIZED there. Is there a reason why uart_startup() does not set the flag by itself after the IRQ has been acquired (since it is cleared in uart_shutdown)? Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 3a14cccbd7ff..7948acf14601 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -987,6 +987,8 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, } } else { retval = uart_startup(tty, state, 1); + if (retval == 0) + tty_port_set_initialized(port, true); if (retval > 0) retval = 0; } From 8d1ed7d4e1ce6d4d08676f5b2bb1a4e83b3ad5f7 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 15 Nov 2017 16:33:12 -0600 Subject: [PATCH 221/770] fpga: region: release of_parse_phandle nodes after use commit 0f5eb1545907edeea7672a9c1652c4231150ff22 upstream. Both fpga_region_get_manager() and fpga_region_get_bridges() call of_parse_phandle(), but nothing calls of_node_put() on the returned struct device_node pointers. Make sure to do that to stop their reference counters getting out of whack. Fixes: 0fa20cdfcc1f ("fpga: fpga-region: device tree control for FPGA") Signed-off-by: Ian Abbott Signed-off-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- drivers/fpga/fpga-region.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c index d9ab7c75b14f..e0c73ceba2ed 100644 --- a/drivers/fpga/fpga-region.c +++ b/drivers/fpga/fpga-region.c @@ -147,6 +147,7 @@ static struct fpga_manager *fpga_region_get_manager(struct fpga_region *region) mgr_node = of_parse_phandle(np, "fpga-mgr", 0); if (mgr_node) { mgr = of_fpga_mgr_get(mgr_node); + of_node_put(mgr_node); of_node_put(np); return mgr; } @@ -192,10 +193,13 @@ static int fpga_region_get_bridges(struct fpga_region *region, parent_br = region_np->parent; /* If overlay has a list of bridges, use it. */ - if (of_parse_phandle(overlay, "fpga-bridges", 0)) + br = of_parse_phandle(overlay, "fpga-bridges", 0); + if (br) { + of_node_put(br); np = overlay; - else + } else { np = region_np; + } for (i = 0; ; i++) { br = of_parse_phandle(np, "fpga-bridges", i); @@ -203,12 +207,15 @@ static int fpga_region_get_bridges(struct fpga_region *region, break; /* If parent bridge is in list, skip it. */ - if (br == parent_br) + if (br == parent_br) { + of_node_put(br); continue; + } /* If node is a bridge, get it and add to list */ ret = fpga_bridge_get_to_list(br, region->info, ®ion->bridge_list); + of_node_put(br); /* If any of the bridges are in use, give up */ if (ret == -EBUSY) { From 81d0cc85caabe062991ea45ddada814835d47fb0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 7 Feb 2018 11:12:26 -0800 Subject: [PATCH 222/770] Linux 4.14.18 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7ed993896dd5..a69e5da9ed86 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 17 +SUBLEVEL = 18 EXTRAVERSION = NAME = Petit Gorille From bafda5d375270277a140d3b70f0ea3c5fe6aacf8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 31 Oct 2017 00:33:45 +0900 Subject: [PATCH 223/770] .gitignore: sort normal pattern rules alphabetically commit 1377dd3e29878b8f5d9f5c9000975f50a428a0cd upstream. We are having more and more ignore patterns. Sort the list alphabetically. We will easily catch duplicated patterns if any. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- .gitignore | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/.gitignore b/.gitignore index 0c39aa20b6ba..435eeeb81efa 100644 --- a/.gitignore +++ b/.gitignore @@ -7,38 +7,38 @@ # command after changing this file, to see if there are # any tracked files which get ignored after the change. # -# Normal rules +# Normal rules (sorted alphabetically) # .* +*.a +*.bin +*.bz2 +*.c.[012]*.* +*.dwo +*.elf +*.gcno +*.gz +*.i +*.ko +*.ll +*.lst +*.lz4 +*.lzma +*.lzo +*.mod.c *.o *.o.* -*.a +*.order +*.patch *.s -*.ko *.so *.so.dbg -*.mod.c -*.i -*.lst -*.symtypes -*.order -*.elf -*.bin -*.tar -*.gz -*.bz2 -*.lzma -*.xz -*.lz4 -*.lzo -*.patch -*.gcno -*.ll -modules.builtin -Module.symvers -*.dwo *.su -*.c.[012]*.* +*.symtypes +*.tar +*.xz +Module.symvers +modules.builtin # # Top-level generic files From abc5896b778581fb2396f018a82334c701a1c261 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 31 Oct 2017 00:33:46 +0900 Subject: [PATCH 224/770] .gitignore: move *.dtb and *.dtb.S patterns to the top-level .gitignore commit 10b62a2f785ab55857380f0c63d9fa468fd8c676 upstream. Most of DT files are compiled under arch/*/boot/dts/, but we have some other directories, like drivers/of/unittest-data/. We often miss to add gitignore patterns per directory. Since there are no source files that end with .dtb or .dtb.S, we can ignore the patterns globally. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- .gitignore | 2 ++ arch/arc/boot/.gitignore | 1 - arch/arm/boot/.gitignore | 1 - arch/arm64/boot/dts/.gitignore | 1 - arch/metag/boot/.gitignore | 1 - arch/microblaze/boot/.gitignore | 1 - arch/mips/boot/.gitignore | 1 - arch/nios2/boot/.gitignore | 1 - arch/powerpc/boot/.gitignore | 1 - arch/xtensa/boot/.gitignore | 1 - drivers/of/unittest-data/.gitignore | 2 -- 11 files changed, 2 insertions(+), 11 deletions(-) delete mode 100644 arch/arm64/boot/dts/.gitignore delete mode 100644 drivers/of/unittest-data/.gitignore diff --git a/.gitignore b/.gitignore index 435eeeb81efa..6c119eab5d46 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,8 @@ *.bin *.bz2 *.c.[012]*.* +*.dtb +*.dtb.S *.dwo *.elf *.gcno diff --git a/arch/arc/boot/.gitignore b/arch/arc/boot/.gitignore index 5246969a20c5..c4c5fd529c25 100644 --- a/arch/arc/boot/.gitignore +++ b/arch/arc/boot/.gitignore @@ -1,2 +1 @@ -*.dtb* uImage diff --git a/arch/arm/boot/.gitignore b/arch/arm/boot/.gitignore index 3c79f85975aa..ce1c5ff746e7 100644 --- a/arch/arm/boot/.gitignore +++ b/arch/arm/boot/.gitignore @@ -3,4 +3,3 @@ zImage xipImage bootpImage uImage -*.dtb diff --git a/arch/arm64/boot/dts/.gitignore b/arch/arm64/boot/dts/.gitignore deleted file mode 100644 index b60ed208c779..000000000000 --- a/arch/arm64/boot/dts/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.dtb diff --git a/arch/metag/boot/.gitignore b/arch/metag/boot/.gitignore index 2d6c0c160884..6c662ddb909a 100644 --- a/arch/metag/boot/.gitignore +++ b/arch/metag/boot/.gitignore @@ -1,4 +1,3 @@ vmlinux* uImage* ramdisk.* -*.dtb* diff --git a/arch/microblaze/boot/.gitignore b/arch/microblaze/boot/.gitignore index bf0459186027..679502d64a97 100644 --- a/arch/microblaze/boot/.gitignore +++ b/arch/microblaze/boot/.gitignore @@ -1,3 +1,2 @@ -*.dtb linux.bin* simpleImage.* diff --git a/arch/mips/boot/.gitignore b/arch/mips/boot/.gitignore index d3962cd5ce0c..a73d6e2c4f64 100644 --- a/arch/mips/boot/.gitignore +++ b/arch/mips/boot/.gitignore @@ -5,4 +5,3 @@ zImage zImage.tmp calc_vmlinuz_load_addr uImage -*.dtb diff --git a/arch/nios2/boot/.gitignore b/arch/nios2/boot/.gitignore index 109279ca5a4d..64386a8dedd8 100644 --- a/arch/nios2/boot/.gitignore +++ b/arch/nios2/boot/.gitignore @@ -1,2 +1 @@ -*.dtb vmImage diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index 84774ccba1c2..f92d0530ceb1 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -18,7 +18,6 @@ otheros.bld uImage cuImage.* dtbImage.* -*.dtb treeImage.* vmlinux.strip zImage diff --git a/arch/xtensa/boot/.gitignore b/arch/xtensa/boot/.gitignore index be7655998b26..38177c7ebcab 100644 --- a/arch/xtensa/boot/.gitignore +++ b/arch/xtensa/boot/.gitignore @@ -1,3 +1,2 @@ uImage zImage.redboot -*.dtb diff --git a/drivers/of/unittest-data/.gitignore b/drivers/of/unittest-data/.gitignore deleted file mode 100644 index 4b3cf8b16de2..000000000000 --- a/drivers/of/unittest-data/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -testcases.dtb -testcases.dtb.S From 806d61d66900b999c908823471ea9ff64cf7344e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 30 Sep 2017 10:10:10 +0900 Subject: [PATCH 225/770] kbuild: rpm-pkg: keep spec file until make mrproper commit af60e207087975d069858741c44ed4f450330ac4 upstream. If build fails during (bin)rpm-pkg, the spec file is not cleaned by anyone until the next successful build of the package. We do not have to immediately delete the spec file in case somebody may want to take a look at it. Instead, make them ignored by git, and cleaned up by make mrproper. Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- .gitignore | 5 +++++ scripts/package/Makefile | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 6c119eab5d46..f6050b88e95b 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,11 @@ modules.builtin /System.map /Module.markers +# +# RPM spec file (make rpm-pkg) +# +/*.spec + # # Debian directory (make deb-pkg) # diff --git a/scripts/package/Makefile b/scripts/package/Makefile index 34de8b953ecf..348af5b20618 100644 --- a/scripts/package/Makefile +++ b/scripts/package/Makefile @@ -50,7 +50,6 @@ rpm-pkg rpm: FORCE $(CONFIG_SHELL) $(MKSPEC) >$(objtree)/kernel.spec $(call cmd,src_tar,$(KERNELPATH),kernel.spec) +rpmbuild $(RPMOPTS) --target $(UTS_MACHINE) -ta $(KERNELPATH).tar.gz - rm $(KERNELPATH).tar.gz kernel.spec # binrpm-pkg # --------------------------------------------------------------------------- @@ -59,7 +58,8 @@ binrpm-pkg: FORCE $(CONFIG_SHELL) $(MKSPEC) prebuilt > $(objtree)/binkernel.spec +rpmbuild $(RPMOPTS) --define "_builddir $(objtree)" --target \ $(UTS_MACHINE) -bb $(objtree)/binkernel.spec - rm binkernel.spec + +clean-files += $(objtree)/*.spec # Deb target # --------------------------------------------------------------------------- From 2726946dfcd2b2f1863dfae198687c6ec6d976dd Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 31 Jan 2018 16:29:30 +0200 Subject: [PATCH 226/770] ip6mr: fix stale iterator [ Upstream commit 4adfa79fc254efb7b0eb3cd58f62c2c3f805f1ba ] When we dump the ip6mr mfc entries via proc, we initialize an iterator with the table to dump but we don't clear the cache pointer which might be initialized from a prior read on the same descriptor that ended. This can result in lock imbalance (an unnecessary unlock) leading to other crashes and hangs. Clear the cache pointer like ipmr does to fix the issue. Thanks for the reliable reproducer. Here's syzbot's trace: WARNING: bad unlock balance detected! 4.15.0-rc3+ #128 Not tainted syzkaller971460/3195 is trying to release lock (mrt_lock) at: [<000000006898068d>] ipmr_mfc_seq_stop+0xe1/0x130 net/ipv6/ip6mr.c:553 but there are no more locks to release! other info that might help us debug this: 1 lock held by syzkaller971460/3195: #0: (&p->lock){+.+.}, at: [<00000000744a6565>] seq_read+0xd5/0x13d0 fs/seq_file.c:165 stack backtrace: CPU: 1 PID: 3195 Comm: syzkaller971460 Not tainted 4.15.0-rc3+ #128 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_unlock_imbalance_bug+0x12f/0x140 kernel/locking/lockdep.c:3561 __lock_release kernel/locking/lockdep.c:3775 [inline] lock_release+0x5f9/0xda0 kernel/locking/lockdep.c:4023 __raw_read_unlock include/linux/rwlock_api_smp.h:225 [inline] _raw_read_unlock+0x1a/0x30 kernel/locking/spinlock.c:255 ipmr_mfc_seq_stop+0xe1/0x130 net/ipv6/ip6mr.c:553 traverse+0x3bc/0xa00 fs/seq_file.c:135 seq_read+0x96a/0x13d0 fs/seq_file.c:189 proc_reg_read+0xef/0x170 fs/proc/inode.c:217 do_loop_readv_writev fs/read_write.c:673 [inline] do_iter_read+0x3db/0x5b0 fs/read_write.c:897 compat_readv+0x1bf/0x270 fs/read_write.c:1140 do_compat_preadv64+0xdc/0x100 fs/read_write.c:1189 C_SYSC_preadv fs/read_write.c:1209 [inline] compat_SyS_preadv+0x3b/0x50 fs/read_write.c:1203 do_syscall_32_irqs_on arch/x86/entry/common.c:327 [inline] do_fast_syscall_32+0x3ee/0xf9d arch/x86/entry/common.c:389 entry_SYSENTER_compat+0x51/0x60 arch/x86/entry/entry_64_compat.S:125 RIP: 0023:0xf7f73c79 RSP: 002b:00000000e574a15c EFLAGS: 00000292 ORIG_RAX: 000000000000014d RAX: ffffffffffffffda RBX: 000000000000000f RCX: 0000000020a3afb0 RDX: 0000000000000001 RSI: 0000000000000067 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 BUG: sleeping function called from invalid context at lib/usercopy.c:25 in_atomic(): 1, irqs_disabled(): 0, pid: 3195, name: syzkaller971460 INFO: lockdep is turned off. CPU: 1 PID: 3195 Comm: syzkaller971460 Not tainted 4.15.0-rc3+ #128 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6060 __might_sleep+0x95/0x190 kernel/sched/core.c:6013 __might_fault+0xab/0x1d0 mm/memory.c:4525 _copy_to_user+0x2c/0xc0 lib/usercopy.c:25 copy_to_user include/linux/uaccess.h:155 [inline] seq_read+0xcb4/0x13d0 fs/seq_file.c:279 proc_reg_read+0xef/0x170 fs/proc/inode.c:217 do_loop_readv_writev fs/read_write.c:673 [inline] do_iter_read+0x3db/0x5b0 fs/read_write.c:897 compat_readv+0x1bf/0x270 fs/read_write.c:1140 do_compat_preadv64+0xdc/0x100 fs/read_write.c:1189 C_SYSC_preadv fs/read_write.c:1209 [inline] compat_SyS_preadv+0x3b/0x50 fs/read_write.c:1203 do_syscall_32_irqs_on arch/x86/entry/common.c:327 [inline] do_fast_syscall_32+0x3ee/0xf9d arch/x86/entry/common.c:389 entry_SYSENTER_compat+0x51/0x60 arch/x86/entry/entry_64_compat.S:125 RIP: 0023:0xf7f73c79 RSP: 002b:00000000e574a15c EFLAGS: 00000292 ORIG_RAX: 000000000000014d RAX: ffffffffffffffda RBX: 000000000000000f RCX: 0000000020a3afb0 RDX: 0000000000000001 RSI: 0000000000000067 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 WARNING: CPU: 1 PID: 3195 at lib/usercopy.c:26 _copy_to_user+0xb5/0xc0 lib/usercopy.c:26 Reported-by: syzbot Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index f5500f5444e9..e1060f28410d 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -496,6 +496,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) return ERR_PTR(-ENOENT); it->mrt = mrt; + it->cache = NULL; return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) : SEQ_START_TOKEN; } From 166f27322f8c89a3580a30c339ff34a66544500c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Feb 2018 10:26:57 -0800 Subject: [PATCH 227/770] net: igmp: add a missing rcu locking section [ Upstream commit e7aadb27a5415e8125834b84a74477bfbee4eff5 ] Newly added igmpv3_get_srcaddr() needs to be called under rcu lock. Timer callbacks do not ensure this locking. ============================= WARNING: suspicious RCU usage 4.15.0+ #200 Not tainted ----------------------------- ./include/linux/inetdevice.h:216 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by syzkaller616973/4074: #0: (&mm->mmap_sem){++++}, at: [<00000000bfce669e>] __do_page_fault+0x32d/0xc90 arch/x86/mm/fault.c:1355 #1: ((&im->timer)){+.-.}, at: [<00000000619d2f71>] lockdep_copy_map include/linux/lockdep.h:178 [inline] #1: ((&im->timer)){+.-.}, at: [<00000000619d2f71>] call_timer_fn+0x1c6/0x820 kernel/time/timer.c:1316 #2: (&(&im->lock)->rlock){+.-.}, at: [<000000005f833c5c>] spin_lock_bh include/linux/spinlock.h:315 [inline] #2: (&(&im->lock)->rlock){+.-.}, at: [<000000005f833c5c>] igmpv3_send_report+0x98/0x5b0 net/ipv4/igmp.c:600 stack backtrace: CPU: 0 PID: 4074 Comm: syzkaller616973 Not tainted 4.15.0+ #200 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 lockdep_rcu_suspicious+0x123/0x170 kernel/locking/lockdep.c:4592 __in_dev_get_rcu include/linux/inetdevice.h:216 [inline] igmpv3_get_srcaddr net/ipv4/igmp.c:329 [inline] igmpv3_newpack+0xeef/0x12e0 net/ipv4/igmp.c:389 add_grhead.isra.27+0x235/0x300 net/ipv4/igmp.c:432 add_grec+0xbd3/0x1170 net/ipv4/igmp.c:565 igmpv3_send_report+0xd5/0x5b0 net/ipv4/igmp.c:605 igmp_send_report+0xc43/0x1050 net/ipv4/igmp.c:722 igmp_timer_expire+0x322/0x5c0 net/ipv4/igmp.c:831 call_timer_fn+0x228/0x820 kernel/time/timer.c:1326 expire_timers kernel/time/timer.c:1363 [inline] __run_timers+0x7ee/0xb70 kernel/time/timer.c:1666 run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692 __do_softirq+0x2d7/0xb85 kernel/softirq.c:285 invoke_softirq kernel/softirq.c:365 [inline] irq_exit+0x1cc/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:541 [inline] smp_apic_timer_interrupt+0x16b/0x700 arch/x86/kernel/apic/apic.c:1052 apic_timer_interrupt+0xa9/0xb0 arch/x86/entry/entry_64.S:938 Fixes: a46182b00290 ("net: igmp: Use correct source address on IGMPv3 reports") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/igmp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 013fed55b610..fbeb35ad804b 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -386,7 +386,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) pip->frag_off = htons(IP_DF); pip->ttl = 1; pip->daddr = fl4.daddr; + + rcu_read_lock(); pip->saddr = igmpv3_get_srcaddr(dev, &fl4); + rcu_read_unlock(); + pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ ip_select_ident(net, skb, NULL); From fcee7812ea05e0a9031f723a8f4206c2e0d74f5e Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 29 Jan 2018 17:53:42 +0800 Subject: [PATCH 228/770] qlcnic: fix deadlock bug [ Upstream commit 233ac3891607f501f08879134d623b303838f478 ] The following soft lockup was caught. This is a deadlock caused by recusive locking. Process kworker/u40:1:28016 was holding spin lock "mbx->queue_lock" in qlcnic_83xx_mailbox_worker(), while a softirq came in and ask the same spin lock in qlcnic_83xx_enqueue_mbx_cmd(). This lock should be hold by disable bh.. [161846.962125] NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [kworker/u40:1:28016] [161846.962367] Modules linked in: tun ocfs2 xen_netback xen_blkback xen_gntalloc xen_gntdev xen_evtchn xenfs xen_privcmd autofs4 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs bnx2fc fcoe libfcoe libfc sunrpc 8021q mrp garp bridge stp llc bonding dm_round_robin dm_multipath iTCO_wdt iTCO_vendor_support pcspkr sb_edac edac_core i2c_i801 shpchp lpc_ich mfd_core ioatdma ipmi_devintf ipmi_si ipmi_msghandler sg ext4 jbd2 mbcache2 sr_mod cdrom sd_mod igb i2c_algo_bit i2c_core ahci libahci megaraid_sas ixgbe dca ptp pps_core vxlan udp_tunnel ip6_udp_tunnel qla2xxx scsi_transport_fc qlcnic crc32c_intel be2iscsi bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi ipv6 cxgb3 mdio libiscsi_tcp qla4xxx iscsi_boot_sysfs libiscsi scsi_transport_iscsi dm_mirror dm_region_hash dm_log dm_mod [161846.962454] [161846.962460] CPU: 1 PID: 28016 Comm: kworker/u40:1 Not tainted 4.1.12-94.5.9.el6uek.x86_64 #2 [161846.962463] Hardware name: Oracle Corporation SUN SERVER X4-2L /ASSY,MB,X4-2L , BIOS 26050100 09/19/2017 [161846.962489] Workqueue: qlcnic_mailbox qlcnic_83xx_mailbox_worker [qlcnic] [161846.962493] task: ffff8801f2e34600 ti: ffff88004ca5c000 task.ti: ffff88004ca5c000 [161846.962496] RIP: e030:[] [] xen_hypercall_sched_op+0xa/0x20 [161846.962506] RSP: e02b:ffff880202e43388 EFLAGS: 00000206 [161846.962509] RAX: 0000000000000000 RBX: ffff8801f6996b70 RCX: ffffffff810013aa [161846.962511] RDX: ffff880202e433cc RSI: ffff880202e433b0 RDI: 0000000000000003 [161846.962513] RBP: ffff880202e433d0 R08: 0000000000000000 R09: ffff8801fe893200 [161846.962516] R10: ffff8801fe400538 R11: 0000000000000206 R12: ffff880202e4b000 [161846.962518] R13: 0000000000000050 R14: 0000000000000001 R15: 000000000000020d [161846.962528] FS: 0000000000000000(0000) GS:ffff880202e40000(0000) knlGS:ffff880202e40000 [161846.962531] CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 [161846.962533] CR2: 0000000002612640 CR3: 00000001bb796000 CR4: 0000000000042660 [161846.962536] Stack: [161846.962538] ffff880202e43608 0000000000000000 ffffffff813f0442 ffff880202e433b0 [161846.962543] 0000000000000000 ffff880202e433cc ffffffff00000001 0000000000000000 [161846.962547] 00000009813f03d6 ffff880202e433e0 ffffffff813f0460 ffff880202e43440 [161846.962552] Call Trace: [161846.962555] [161846.962565] [] ? xen_poll_irq_timeout+0x42/0x50 [161846.962570] [] xen_poll_irq+0x10/0x20 [161846.962578] [] xen_lock_spinning+0xe2/0x110 [161846.962583] [] __raw_callee_save_xen_lock_spinning+0x11/0x20 [161846.962592] [] ? _raw_spin_lock+0x57/0x80 [161846.962609] [] qlcnic_83xx_enqueue_mbx_cmd+0x7c/0xe0 [qlcnic] [161846.962623] [] qlcnic_83xx_issue_cmd+0x58/0x210 [qlcnic] [161846.962636] [] qlcnic_83xx_sre_macaddr_change+0x162/0x1d0 [qlcnic] [161846.962649] [] qlcnic_83xx_change_l2_filter+0x2b/0x30 [qlcnic] [161846.962657] [] ? __skb_flow_dissect+0x18b/0x650 [161846.962670] [] qlcnic_send_filter+0x205/0x250 [qlcnic] [161846.962682] [] qlcnic_xmit_frame+0x547/0x7b0 [qlcnic] [161846.962691] [] xmit_one+0x82/0x1a0 [161846.962696] [] dev_hard_start_xmit+0x50/0xa0 [161846.962701] [] sch_direct_xmit+0x112/0x220 [161846.962706] [] __dev_queue_xmit+0x1df/0x5e0 [161846.962710] [] dev_queue_xmit_sk+0x13/0x20 [161846.962721] [] bond_dev_queue_xmit+0x35/0x80 [bonding] [161846.962729] [] __bond_start_xmit+0x1cb/0x210 [bonding] [161846.962736] [] bond_start_xmit+0x31/0x60 [bonding] [161846.962740] [] xmit_one+0x82/0x1a0 [161846.962745] [] dev_hard_start_xmit+0x50/0xa0 [161846.962749] [] __dev_queue_xmit+0x4ee/0x5e0 [161846.962754] [] dev_queue_xmit_sk+0x13/0x20 [161846.962760] [] vlan_dev_hard_start_xmit+0xb2/0x150 [8021q] [161846.962764] [] xmit_one+0x82/0x1a0 [161846.962769] [] dev_hard_start_xmit+0x50/0xa0 [161846.962773] [] __dev_queue_xmit+0x4ee/0x5e0 [161846.962777] [] dev_queue_xmit_sk+0x13/0x20 [161846.962789] [] br_dev_queue_push_xmit+0x54/0xa0 [bridge] [161846.962797] [] br_forward_finish+0x2f/0x90 [bridge] [161846.962807] [] ? ttwu_do_wakeup+0x1d/0x100 [161846.962811] [] ? __alloc_skb+0x8b/0x1f0 [161846.962818] [] __br_forward+0x8d/0x120 [bridge] [161846.962822] [] ? __kmalloc_reserve+0x3b/0xa0 [161846.962829] [] ? update_rq_runnable_avg+0xee/0x230 [161846.962836] [] br_forward+0x96/0xb0 [bridge] [161846.962845] [] br_handle_frame_finish+0x1ae/0x420 [bridge] [161846.962853] [] br_handle_frame+0x17f/0x260 [bridge] [161846.962862] [] ? br_handle_frame_finish+0x420/0x420 [bridge] [161846.962867] [] __netif_receive_skb_core+0x1f7/0x870 [161846.962872] [] __netif_receive_skb+0x22/0x70 [161846.962877] [] netif_receive_skb_internal+0x23/0x90 [161846.962884] [] ? xenvif_idx_release+0xea/0x100 [xen_netback] [161846.962889] [] ? _raw_spin_unlock_irqrestore+0x20/0x50 [161846.962893] [] netif_receive_skb_sk+0x24/0x90 [161846.962899] [] xenvif_tx_submit+0x2ca/0x3f0 [xen_netback] [161846.962906] [] xenvif_tx_action+0x9c/0xd0 [xen_netback] [161846.962915] [] xenvif_poll+0x35/0x70 [xen_netback] [161846.962920] [] napi_poll+0xcb/0x1e0 [161846.962925] [] net_rx_action+0x90/0x1c0 [161846.962931] [] __do_softirq+0x10a/0x350 [161846.962938] [] irq_exit+0x125/0x130 [161846.962943] [] xen_evtchn_do_upcall+0x39/0x50 [161846.962950] [] xen_do_hypervisor_callback+0x1e/0x40 [161846.962952] [161846.962959] [] ? _raw_spin_lock+0x4a/0x80 [161846.962964] [] ? _raw_spin_lock_irqsave+0x1e/0xa0 [161846.962978] [] ? qlcnic_83xx_mailbox_worker+0xb9/0x2a0 [qlcnic] [161846.962991] [] ? process_one_work+0x151/0x4b0 [161846.962995] [] ? check_events+0x12/0x20 [161846.963001] [] ? worker_thread+0x120/0x480 [161846.963005] [] ? __schedule+0x30b/0x890 [161846.963010] [] ? process_one_work+0x4b0/0x4b0 [161846.963015] [] ? process_one_work+0x4b0/0x4b0 [161846.963021] [] ? kthread+0xce/0xf0 [161846.963025] [] ? kthread_freezable_should_stop+0x70/0x70 [161846.963031] [] ? ret_from_fork+0x42/0x70 [161846.963035] [] ? kthread_freezable_should_stop+0x70/0x70 [161846.963037] Code: cc 51 41 53 b8 1c 00 00 00 0f 05 41 5b 59 c3 cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc 51 41 53 b8 1d 00 00 00 0f 05 <41> 5b 59 c3 cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc Signed-off-by: Junxiao Bi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index f7080d0ab874..46b0372dd032 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -3891,7 +3891,7 @@ static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) struct list_head *head = &mbx->cmd_q; struct qlcnic_cmd_args *cmd = NULL; - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); while (!list_empty(head)) { cmd = list_entry(head->next, struct qlcnic_cmd_args, list); @@ -3902,7 +3902,7 @@ static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) qlcnic_83xx_notify_cmd_completion(adapter, cmd); } - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); } static int qlcnic_83xx_check_mbx_status(struct qlcnic_adapter *adapter) @@ -3938,12 +3938,12 @@ static void qlcnic_83xx_dequeue_mbx_cmd(struct qlcnic_adapter *adapter, { struct qlcnic_mailbox *mbx = adapter->ahw->mailbox; - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); list_del(&cmd->list); mbx->num_cmds--; - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); qlcnic_83xx_notify_cmd_completion(adapter, cmd); } @@ -4008,7 +4008,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, init_completion(&cmd->completion); cmd->rsp_opcode = QLC_83XX_MBX_RESPONSE_UNKNOWN; - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); list_add_tail(&cmd->list, &mbx->cmd_q); mbx->num_cmds++; @@ -4016,7 +4016,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, *timeout = cmd->total_cmds * QLC_83XX_MBX_TIMEOUT; queue_work(mbx->work_q, &mbx->work); - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); return 0; } @@ -4112,15 +4112,15 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work) mbx->rsp_status = QLC_83XX_MBX_RESPONSE_WAIT; spin_unlock_irqrestore(&mbx->aen_lock, flags); - spin_lock(&mbx->queue_lock); + spin_lock_bh(&mbx->queue_lock); if (list_empty(head)) { - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); return; } cmd = list_entry(head->next, struct qlcnic_cmd_args, list); - spin_unlock(&mbx->queue_lock); + spin_unlock_bh(&mbx->queue_lock); mbx_ops->encode_cmd(adapter, cmd); mbx_ops->nofity_fw(adapter, QLC_83XX_MBX_REQUEST); From a7c2cf702a4021f40a8948b9c5abcb8a81668ca4 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Tue, 30 Jan 2018 14:12:55 +0100 Subject: [PATCH 229/770] qmi_wwan: Add support for Quectel EP06 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c0b91a56a2e57a5a370655b25d677ae0ebf8a2d0 ] The Quectel EP06 is a Cat. 6 LTE modem. It uses the same interface as the EC20/EC25 for QMI, and requires the same "set DTR"-quirk to work. Signed-off-by: Kristian Evensen Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 2092febfcb42..8d9f02b7a71f 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1243,6 +1243,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)}, /* Quectel EP06 Mini PCIe */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ From e8513f250d1b632f17cf909ff6b76293adc5f682 Mon Sep 17 00:00:00 2001 From: Chunhao Lin Date: Wed, 31 Jan 2018 01:32:36 +0800 Subject: [PATCH 230/770] r8169: fix RTL8168EP take too long to complete driver initialization. [ Upstream commit 086ca23d03c0d2f4088f472386778d293e15c5f6 ] Driver check the wrong register bit in rtl_ocp_tx_cond() that keep driver waiting until timeout. Fix this by waiting for the right register bit. Signed-off-by: Chunhao Lin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 958ff931e790..619a1b7281a0 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -1388,7 +1388,7 @@ DECLARE_RTL_COND(rtl_ocp_tx_cond) { void __iomem *ioaddr = tp->mmio_addr; - return RTL_R8(IBISR0) & 0x02; + return RTL_R8(IBISR0) & 0x20; } static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) @@ -1396,7 +1396,7 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) void __iomem *ioaddr = tp->mmio_addr; RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01); - rtl_msleep_loop_wait_low(tp, &rtl_ocp_tx_cond, 50, 2000); + rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000); RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20); RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01); } From 6d35430fdaf47880cacda975a7d1b1588f933357 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 26 Jan 2018 16:40:41 +0800 Subject: [PATCH 231/770] tcp: release sk_frag.page in tcp_disconnect [ Upstream commit 9b42d55a66d388e4dd5550107df051a9637564fc ] socket can be disconnected and gets transformed back to a listening socket, if sk_frag.page is not released, which will be cloned into a new socket by sk_clone_lock, but the reference count of this page is increased, lead to a use after free or double free issue Signed-off-by: Li RongQing Cc: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2a65d806b562..fe11128d7df4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2379,6 +2379,12 @@ int tcp_disconnect(struct sock *sk, int flags) WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); + if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + sk->sk_frag.offset = 0; + } + sk->sk_error_report(sk); return err; } From b9b70c876a7a73d494cb9d9f32a227dcfac54a4c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 25 Jan 2018 22:03:52 +0800 Subject: [PATCH 232/770] vhost_net: stop device during reset owner [ Upstream commit 4cd879515d686849eec5f718aeac62a70b067d82 ] We don't stop device before reset owner, this means we could try to serve any virtqueue kick before reset dev->worker. This will result a warn since the work was pending at llist during owner resetting. Fix this by stopping device during owner reset. Reported-by: syzbot+eb17c6162478cc50632c@syzkaller.appspotmail.com Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server") Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index bd15309ac5f1..082891dffd9d 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1212,6 +1212,7 @@ static long vhost_net_reset_owner(struct vhost_net *n) } vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); + vhost_dev_stop(&n->dev); vhost_dev_reset_owner(&n->dev, umem); vhost_net_vq_reset(n); done: From 799a34d5b048644a918b295627c3594124d96d4c Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 2 Feb 2018 15:26:57 +0000 Subject: [PATCH 233/770] Revert "defer call to mem_cgroup_sk_alloc()" [ Upstream commit edbe69ef2c90fc86998a74b08319a01c508bd497 ] This patch effectively reverts commit 9f1c2674b328 ("net: memcontrol: defer call to mem_cgroup_sk_alloc()"). Moving mem_cgroup_sk_alloc() to the inet_csk_accept() completely breaks memcg socket memory accounting, as packets received before memcg pointer initialization are not accounted and are causing refcounting underflow on socket release. Actually the free-after-use problem was fixed by commit c0576e397508 ("net: call cgroup_sk_alloc() earlier in sk_clone_lock()") for the cgroup pointer. So, let's revert it and call mem_cgroup_sk_alloc() just before cgroup_sk_alloc(). This is safe, as we hold a reference to the socket we're cloning, and it holds a reference to the memcg. Also, let's drop BUG_ON(mem_cgroup_is_root()) check from mem_cgroup_sk_alloc(). I see no reasons why bumping the root memcg counter is a good reason to panic, and there are no realistic ways to hit it. Signed-off-by: Roman Gushchin Cc: Eric Dumazet Cc: David S. Miller Cc: Johannes Weiner Cc: Tejun Heo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 14 ++++++++++++++ net/core/sock.c | 5 +---- net/ipv4/inet_connection_sock.c | 1 - 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 53f7c919b916..66e7efabf0a1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5828,6 +5828,20 @@ void mem_cgroup_sk_alloc(struct sock *sk) if (!mem_cgroup_sockets_enabled) return; + /* + * Socket cloning can throw us here with sk_memcg already + * filled. It won't however, necessarily happen from + * process context. So the test for root memcg given + * the current task's memcg won't help us in this case. + * + * Respecting the original socket's memcg is a better + * decision in this case. + */ + if (sk->sk_memcg) { + css_get(&sk->sk_memcg->css); + return; + } + rcu_read_lock(); memcg = mem_cgroup_from_task(current); if (memcg == root_mem_cgroup) diff --git a/net/core/sock.c b/net/core/sock.c index 415f441c63b9..beb1e299fed3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1677,16 +1677,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_dst_pending_confirm = 0; newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; - - /* sk->sk_memcg will be populated at accept() time */ - newsk->sk_memcg = NULL; - atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); + mem_cgroup_sk_alloc(newsk); cgroup_sk_alloc(&newsk->sk_cgrp_data); rcu_read_lock(); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b47a59cb3573..0cc08c512202 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -475,7 +475,6 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) } spin_unlock_bh(&queue->fastopenq.lock); } - mem_cgroup_sk_alloc(newsk); out: release_sock(sk); if (req) From 07ca93e3176bfdc5e954b79c0dcee0fb95aef6fb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 25 Jan 2018 20:16:29 -0800 Subject: [PATCH 234/770] net: ipv6: send unsolicited NA after DAD [ Upstream commit c76fe2d98c726224a975a0d0198c3fb50406d325 ] Unsolicited IPv6 neighbor advertisements should be sent after DAD completes. Update ndisc_send_unsol_na to skip tentative, non-optimistic addresses and have those sent by addrconf_dad_completed after DAD. Fixes: 4a6e3c5def13c ("net: ipv6: send unsolicited NA on admin up") Reported-by: Vivek Venkatraman Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 30 ++++++++++++++++++++++++++---- net/ipv6/ndisc.c | 5 +++++ 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c5318f5f6a14..6a76e41e6d51 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -184,7 +184,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, static void addrconf_dad_start(struct inet6_ifaddr *ifp); static void addrconf_dad_work(struct work_struct *w); -static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id); +static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, + bool send_na); static void addrconf_dad_run(struct inet6_dev *idev); static void addrconf_rs_timer(unsigned long data); static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); @@ -3825,12 +3826,17 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) idev->cnf.accept_dad < 1) || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { + bool send_na = false; + + if (ifp->flags & IFA_F_TENTATIVE && + !(ifp->flags & IFA_F_OPTIMISTIC)) + send_na = true; bump_id = ifp->flags & IFA_F_TENTATIVE; ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); read_unlock_bh(&idev->lock); - addrconf_dad_completed(ifp, bump_id); + addrconf_dad_completed(ifp, bump_id, send_na); return; } @@ -3959,16 +3965,21 @@ static void addrconf_dad_work(struct work_struct *w) } if (ifp->dad_probes == 0) { + bool send_na = false; + /* * DAD was successful */ + if (ifp->flags & IFA_F_TENTATIVE && + !(ifp->flags & IFA_F_OPTIMISTIC)) + send_na = true; bump_id = ifp->flags & IFA_F_TENTATIVE; ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); write_unlock_bh(&idev->lock); - addrconf_dad_completed(ifp, bump_id); + addrconf_dad_completed(ifp, bump_id, send_na); goto out; } @@ -4006,7 +4017,8 @@ static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp) return true; } -static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id) +static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, + bool send_na) { struct net_device *dev = ifp->idev->dev; struct in6_addr lladdr; @@ -4038,6 +4050,16 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id) if (send_mld) ipv6_mc_dad_complete(ifp->idev); + /* send unsolicited NA if enabled */ + if (send_na && + (ifp->idev->cnf.ndisc_notify || + dev_net(dev)->ipv6.devconf_all->ndisc_notify)) { + ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr, + /*router=*/ !!ifp->idev->cnf.forwarding, + /*solicited=*/ false, /*override=*/ true, + /*inc_opt=*/ true); + } + if (send_rs) { /* * If a host as already performed a random delay diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 266a530414d7..2a937c8d19e9 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -558,6 +558,11 @@ static void ndisc_send_unsol_na(struct net_device *dev) read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { + /* skip tentative addresses until dad completes */ + if (ifa->flags & IFA_F_TENTATIVE && + !(ifa->flags & IFA_F_OPTIMISTIC)) + continue; + ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr, /*router=*/ !!idev->cnf.forwarding, /*solicited=*/ false, /*override=*/ true, From 1da27118f419208acd895fc95713dcd9ef25a289 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 1 Feb 2018 12:21:15 +0100 Subject: [PATCH 235/770] rocker: fix possible null pointer dereference in rocker_router_fib_event_work [ Upstream commit a83165f00f16c0e0ef5b7cec3cbd0d4788699265 ] Currently, rocker user may experience following null pointer derefence bug: [ 3.062141] BUG: unable to handle kernel NULL pointer dereference at 00000000000000d0 [ 3.065163] IP: rocker_router_fib_event_work+0x36/0x110 [rocker] The problem is uninitialized rocker->wops pointer that is initialized only with the first initialized port. So move the port initialization before registering the fib events. Fixes: 936bd486564a ("rocker: use FIB notifications instead of switchdev calls") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/rocker/rocker_main.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index fc8f8bdf6579..056cb6093630 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2902,6 +2902,12 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_alloc_ordered_workqueue; } + err = rocker_probe_ports(rocker); + if (err) { + dev_err(&pdev->dev, "failed to probe ports\n"); + goto err_probe_ports; + } + /* Only FIBs pointing to our own netdevs are programmed into * the device, so no need to pass a callback. */ @@ -2918,22 +2924,16 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) rocker->hw.id = rocker_read64(rocker, SWITCH_ID); - err = rocker_probe_ports(rocker); - if (err) { - dev_err(&pdev->dev, "failed to probe ports\n"); - goto err_probe_ports; - } - dev_info(&pdev->dev, "Rocker switch with id %*phN\n", (int)sizeof(rocker->hw.id), &rocker->hw.id); return 0; -err_probe_ports: - unregister_switchdev_notifier(&rocker_switchdev_notifier); err_register_switchdev_notifier: unregister_fib_notifier(&rocker->fib_nb); err_register_fib_notifier: + rocker_remove_ports(rocker); +err_probe_ports: destroy_workqueue(rocker->rocker_owq); err_alloc_ordered_workqueue: free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker); @@ -2961,9 +2961,9 @@ static void rocker_remove(struct pci_dev *pdev) { struct rocker *rocker = pci_get_drvdata(pdev); - rocker_remove_ports(rocker); unregister_switchdev_notifier(&rocker_switchdev_notifier); unregister_fib_notifier(&rocker->fib_nb); + rocker_remove_ports(rocker); rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET); destroy_workqueue(rocker->rocker_owq); free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker); From c04818abad468f5abb45f191eee618b159e1a215 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 31 Jan 2018 15:43:05 -0500 Subject: [PATCH 236/770] tcp_bbr: fix pacing_gain to always be unity when using lt_bw [ Upstream commit 3aff3b4b986e51bcf4ab249e5d48d39596e0df6a ] This commit fixes the pacing_gain to remain at BBR_UNIT (1.0) when using lt_bw and returning from the PROBE_RTT state to PROBE_BW. Previously, when using lt_bw, upon exiting PROBE_RTT and entering PROBE_BW the bbr_reset_probe_bw_mode() code could sometimes randomly end up with a cycle_idx of 0 and hence have bbr_advance_cycle_phase() set a pacing gain above 1.0. In such cases this would result in a pacing rate that is 1.25x higher than intended, potentially resulting in a high loss rate for a little while until we stop using the lt_bw a bit later. This commit is a stable candidate for kernels back as far as 4.9. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Reported-by: Beyers Cronje Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bbr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 8322f26e770e..25c5a0b60cfc 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -481,7 +481,8 @@ static void bbr_advance_cycle_phase(struct sock *sk) bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); bbr->cycle_mstamp = tp->delivered_mstamp; - bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; + bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT : + bbr_pacing_gain[bbr->cycle_idx]; } /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ @@ -490,8 +491,7 @@ static void bbr_update_cycle_phase(struct sock *sk, { struct bbr *bbr = inet_csk_ca(sk); - if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw && - bbr_is_next_cycle_phase(sk, rs)) + if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs)) bbr_advance_cycle_phase(sk); } From 456add4c9bd3bd97a6ab34fc9a5866358ca18d25 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 24 Jan 2018 23:15:27 -0800 Subject: [PATCH 237/770] ipv6: Fix SO_REUSEPORT UDP socket with implicit sk_ipv6only [ Upstream commit 7ece54a60ee2ba7a386308cae73c790bd580589c ] If a sk_v6_rcv_saddr is !IPV6_ADDR_ANY and !IPV6_ADDR_MAPPED, it implicitly implies it is an ipv6only socket. However, in inet6_bind(), this addr_type checking and setting sk->sk_ipv6only to 1 are only done after sk->sk_prot->get_port(sk, snum) has been completed successfully. This inconsistency between sk_v6_rcv_saddr and sk_ipv6only confuses the 'get_port()'. In particular, when binding SO_REUSEPORT UDP sockets, udp_reuseport_add_sock(sk,...) is called. udp_reuseport_add_sock() checks "ipv6_only_sock(sk2) == ipv6_only_sock(sk)" before adding sk to sk2->sk_reuseport_cb. In this case, ipv6_only_sock(sk2) could be 1 while ipv6_only_sock(sk) is still 0 here. The end result is, reuseport_alloc(sk) is called instead of adding sk to the existing sk2->sk_reuseport_cb. It can be reproduced by binding two SO_REUSEPORT UDP sockets on an IPv6 address (!ANY and !MAPPED). Only one of the socket will receive packet. The fix is to set the implicit sk_ipv6only before calling get_port(). The original sk_ipv6only has to be saved such that it can be restored in case get_port() failed. The situation is similar to the inet_reset_saddr(sk) after get_port() has failed. Thanks to Calvin Owens who created an easy reproduction which leads to a fix. Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection") Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/af_inet6.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index bcbd5f3bf8bd..9ccbf74deb99 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -284,6 +284,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net *net = sock_net(sk); __be32 v4addr = 0; unsigned short snum; + bool saved_ipv6only; int addr_type = 0; int err = 0; @@ -389,19 +390,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; + saved_ipv6only = sk->sk_ipv6only; + if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED) + sk->sk_ipv6only = 1; + /* Make sure we are allowed to bind here. */ if ((snum || !inet->bind_address_no_port) && sk->sk_prot->get_port(sk, snum)) { + sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); err = -EADDRINUSE; goto out; } - if (addr_type != IPV6_ADDR_ANY) { + if (addr_type != IPV6_ADDR_ANY) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; - if (addr_type != IPV6_ADDR_MAPPED) - sk->sk_ipv6only = 1; - } if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; inet->inet_sport = htons(inet->inet_num); From 981f20bc75952ffa1ebe5b1bb2b9f35ed490a354 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Feb 2018 10:27:27 -0800 Subject: [PATCH 238/770] soreuseport: fix mem leak in reuseport_add_sock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4db428a7c9ab07e08783e0fcdc4ca0f555da0567 ] reuseport_add_sock() needs to deal with attaching a socket having its own sk_reuseport_cb, after a prior setsockopt(SO_ATTACH_REUSEPORT_?BPF) Without this fix, not only a WARN_ONCE() was issued, but we were also leaking memory. Thanks to sysbot and Eric Biggers for providing us nice C repros. ------------[ cut here ]------------ socket already in reuseport group WARNING: CPU: 0 PID: 3496 at net/core/sock_reuseport.c:119   reuseport_add_sock+0x742/0x9b0 net/core/sock_reuseport.c:117 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 3496 Comm: syzkaller869503 Not tainted 4.15.0-rc6+ #245 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS   Google 01/01/2011 Call Trace:   __dump_stack lib/dump_stack.c:17 [inline]   dump_stack+0x194/0x257 lib/dump_stack.c:53   panic+0x1e4/0x41c kernel/panic.c:183   __warn+0x1dc/0x200 kernel/panic.c:547   report_bug+0x211/0x2d0 lib/bug.c:184   fixup_bug.part.11+0x37/0x80 arch/x86/kernel/traps.c:178   fixup_bug arch/x86/kernel/traps.c:247 [inline]   do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296   do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315   invalid_op+0x22/0x40 arch/x86/entry/entry_64.S:1079 Fixes: ef456144da8e ("soreuseport: define reuseport groups") Signed-off-by: Eric Dumazet Reported-by: syzbot+c0ea2226f77a42936bf7@syzkaller.appspotmail.com Acked-by: Craig Gallek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock_reuseport.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 5eeb1d20cc38..676092d7bd81 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -94,6 +94,16 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) return more_reuse; } +static void reuseport_free_rcu(struct rcu_head *head) +{ + struct sock_reuseport *reuse; + + reuse = container_of(head, struct sock_reuseport, rcu); + if (reuse->prog) + bpf_prog_destroy(reuse->prog); + kfree(reuse); +} + /** * reuseport_add_sock - Add a socket to the reuseport group of another. * @sk: New socket to add to the group. @@ -102,7 +112,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) */ int reuseport_add_sock(struct sock *sk, struct sock *sk2) { - struct sock_reuseport *reuse; + struct sock_reuseport *old_reuse, *reuse; if (!rcu_access_pointer(sk2->sk_reuseport_cb)) { int err = reuseport_alloc(sk2); @@ -113,10 +123,13 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) spin_lock_bh(&reuseport_lock); reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, - lockdep_is_held(&reuseport_lock)), - WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, - lockdep_is_held(&reuseport_lock)), - "socket already in reuseport group"); + lockdep_is_held(&reuseport_lock)); + old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + if (old_reuse && old_reuse->num_socks != 1) { + spin_unlock_bh(&reuseport_lock); + return -EBUSY; + } if (reuse->num_socks == reuse->max_socks) { reuse = reuseport_grow(reuse); @@ -134,19 +147,11 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) spin_unlock_bh(&reuseport_lock); + if (old_reuse) + call_rcu(&old_reuse->rcu, reuseport_free_rcu); return 0; } -static void reuseport_free_rcu(struct rcu_head *head) -{ - struct sock_reuseport *reuse; - - reuse = container_of(head, struct sock_reuseport, rcu); - if (reuse->prog) - bpf_prog_destroy(reuse->prog); - kfree(reuse); -} - void reuseport_detach_sock(struct sock *sk) { struct sock_reuseport *reuse; From 1bf81cff1bad67632e4ba68cbe0c843541439de8 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 02:47:54 -0500 Subject: [PATCH 239/770] media: mtk-vcodec: add missing MODULE_LICENSE/DESCRIPTION commit ccbc1e3876d5476fc23429690a683294ef0f755b upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/media/platform/mtk-vcodec/mtk-vcodec-common.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION is also added. Signed-off-by: Jesse Chan Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/mtk-vcodec/mtk_vcodec_util.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_util.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_util.c index 46768c056193..0c28d0b995cc 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_util.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_util.c @@ -115,3 +115,6 @@ struct mtk_vcodec_ctx *mtk_vcodec_get_curr_ctx(struct mtk_vcodec_dev *dev) return ctx; } EXPORT_SYMBOL(mtk_vcodec_get_curr_ctx); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Mediatek video codec driver"); From 68a9f19264fd23ec78c18724d56fd71dbe5f23b1 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 15:56:28 -0500 Subject: [PATCH 240/770] media: soc_camera: soc_scale_crop: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE commit 5331aec1bf9c9da557668174e0a4bfcee39f1121 upstream. This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/media/platform/soc_camera/soc_scale_crop.o see include/linux/module.h for more information This adds the license as "GPL", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/soc_camera/soc_scale_crop.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/media/platform/soc_camera/soc_scale_crop.c b/drivers/media/platform/soc_camera/soc_scale_crop.c index 0116097c0c0f..092c73f24589 100644 --- a/drivers/media/platform/soc_camera/soc_scale_crop.c +++ b/drivers/media/platform/soc_camera/soc_scale_crop.c @@ -419,3 +419,7 @@ void soc_camera_calc_client_output(struct soc_camera_device *icd, mf->height = soc_camera_shift_scale(rect->height, shift, scale_v); } EXPORT_SYMBOL(soc_camera_calc_client_output); + +MODULE_DESCRIPTION("soc-camera scaling-cropping functions"); +MODULE_AUTHOR("Guennadi Liakhovetski "); +MODULE_LICENSE("GPL"); From 3a570cfe785a1d031ee56e3c4a3534cb36e26b65 Mon Sep 17 00:00:00 2001 From: Robert Baronescu Date: Tue, 10 Oct 2017 13:21:59 +0300 Subject: [PATCH 241/770] crypto: tcrypt - fix S/G table for test_aead_speed() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5c6ac1d4f8fbdbed65dbeb8cf149d736409d16a1 upstream. In case buffer length is a multiple of PAGE_SIZE, the S/G table is incorrectly generated. Fix this by handling buflen = k * PAGE_SIZE separately. Signed-off-by: Robert Baronescu Signed-off-by: Herbert Xu Signed-off-by: Horia Geantă Signed-off-by: Greg Kroah-Hartman --- crypto/tcrypt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index f5f58a6eee5d..e339960dcac7 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -221,11 +221,13 @@ static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE], } sg_init_table(sg, np + 1); - np--; + if (rem) + np--; for (k = 0; k < np; k++) sg_set_buf(&sg[k + 1], xbuf[k], PAGE_SIZE); - sg_set_buf(&sg[k + 1], xbuf[k], rem); + if (rem) + sg_set_buf(&sg[k + 1], xbuf[k], rem); } static void test_aead_speed(const char *algo, int enc, unsigned int secs, From c5c91d8305074ce1ae72df29c7446850fc927a4f Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 14 Dec 2017 15:33:19 -0800 Subject: [PATCH 242/770] arch: define weak abort() commit 7c2c11b208be09c156573fc0076b7b3646e05219 upstream. gcc toggle -fisolate-erroneous-paths-dereference (default at -O2 onwards) isolates faulty code paths such as null pointer access, divide by zero etc. If gcc port doesnt implement __builtin_trap, an abort() is generated which causes kernel link error. In this case, gcc is generating abort due to 'divide by zero' in lib/mpi/mpih-div.c. Currently 'frv' and 'arc' are failing. Previously other arch was also broken like m32r was fixed by commit d22e3d69ee1a ("m32r: fix build failure"). Let's define this weak function which is common for all arch and fix the problem permanently. We can even remove the arch specific 'abort' after this is done. Link: http://lkml.kernel.org/r/1513118956-8718-1-git-send-email-sudipm.mukherjee@gmail.com Signed-off-by: Sudip Mukherjee Cc: Alexey Brodkin Cc: Vineet Gupta Cc: Sudip Mukherjee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Evgeniy Didin Signed-off-by: Greg Kroah-Hartman --- kernel/exit.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/exit.c b/kernel/exit.c index f6cad39f35df..5a0c4791473c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1755,3 +1755,11 @@ Efault: return -EFAULT; } #endif + +__weak void abort(void) +{ + BUG(); + + /* if that doesn't kill us, halt */ + panic("Oops failed to kill thread"); +} From f18046f7a50accf76dab9c6ab274a7e38143a0dc Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 4 Jan 2018 16:17:56 -0800 Subject: [PATCH 243/770] kernel/exit.c: export abort() to modules commit dc8635b78cd8669c37e230058d18c33af7451ab1 upstream. gcc -fisolate-erroneous-paths-dereference can generate calls to abort() from modular code too. [arnd@arndb.de: drop duplicate exports of abort()] Link: http://lkml.kernel.org/r/20180102103311.706364-1-arnd@arndb.de Reported-by: Vineet Gupta Cc: Sudip Mukherjee Cc: Arnd Bergmann Cc: Alexey Brodkin Cc: Russell King Cc: Jose Abreu Signed-off-by: Andrew Morton Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds Cc: Evgeniy Didin Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/traps.c | 1 - arch/m32r/kernel/traps.c | 1 - arch/unicore32/kernel/traps.c | 1 - kernel/exit.c | 1 + 4 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 0fcd82f01388..b8dc3b516f93 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -790,7 +790,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index cb79fba79d43..b88a8dd14933 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -122,7 +122,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 5f25b39f04d4..c4ac6043ebb0 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -298,7 +298,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/kernel/exit.c b/kernel/exit.c index 5a0c4791473c..e3a08761eb40 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1763,3 +1763,4 @@ __weak void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } +EXPORT_SYMBOL(abort); From 22f16a74c4744f8b0d83fd0d031be2137e764cbf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 16 Jan 2018 13:40:22 +0300 Subject: [PATCH 244/770] scsi: storvsc: missing error code in storvsc_probe() commit ca8dc694045e9aa248e9916e0f614deb0494cb3d upstream. We should set the error code if fc_remote_port_add() fails. Cc: #v4.12+ Fixes: daf0cd445a21 ("scsi: storvsc: Add support for FC rport.") Signed-off-by: Dan Carpenter Reviewed-by: Cathy Avery Acked-by: K. Y. Srinivasan Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman Signed-off-by: Long Li --- drivers/scsi/storvsc_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 5e7200f05873..c17ccb913fde 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1826,8 +1826,10 @@ static int storvsc_probe(struct hv_device *device, fc_host_node_name(host) = stor_device->node_name; fc_host_port_name(host) = stor_device->port_name; stor_device->rport = fc_remote_port_add(host, 0, &ids); - if (!stor_device->rport) + if (!stor_device->rport) { + ret = -ENOMEM; goto err_out3; + } } #endif return 0; From d2aeb4e2bd8db42589fd7624cb9453894ab455ec Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 13 Feb 2018 10:14:14 +0100 Subject: [PATCH 245/770] Revert "x86/alternative: Print unadorned pointers" This reverts commit 67eb59b8ecfb319438706cee2cb67a3045b54494. It's not needed in 4.14.y and only causes messy debugging messages, if anyone actually cares about these random debug messages in the first place (doubtful). Reported-by: Kees Cook Acked-by: Borislav Petkov Cc: Thomas Gleixner Cc: riel@redhat.com Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: David Woodhouse Cc: jikos@kernel.org Cc: luto@amacapital.net Cc: dave.hansen@intel.com Cc: torvalds@linux-foundation.org Cc: keescook@google.com Cc: Josh Poimboeuf Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Cc: pjt@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/alternative.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 21be0193d9dc..b034826a0b3b 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -287,7 +287,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) tgt_rip = next_rip + o_dspl; n_dspl = tgt_rip - orig_insn; - DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl); + DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl); if (tgt_rip - orig_insn >= 0) { if (n_dspl - 2 <= 127) @@ -344,7 +344,7 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins add_nops(instr + (a->instrlen - a->padlen), a->padlen); local_irq_restore(flags); - DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", + DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", instr, a->instrlen - a->padlen, a->padlen); } @@ -365,7 +365,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, u8 *instr, *replacement; u8 insnbuf[MAX_PATCH_LEN]; - DPRINTK("alt table %px, -> %px", start, end); + DPRINTK("alt table %p -> %p", start, end); /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. @@ -389,14 +389,14 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, continue; } - DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d", + DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d", a->cpuid >> 5, a->cpuid & 0x1f, instr, a->instrlen, replacement, a->replacementlen, a->padlen); - DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); - DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement); + DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr); + DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement); memcpy(insnbuf, replacement, a->replacementlen); insnbuf_sz = a->replacementlen; @@ -422,7 +422,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, a->instrlen - a->replacementlen); insnbuf_sz += a->instrlen - a->replacementlen; } - DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr); + DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr); text_poke_early(instr, insnbuf, insnbuf_sz); } From 1722fe3727845178e5d7fe3fdf35b8a5de230fdf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 13 Feb 2018 10:19:50 +0100 Subject: [PATCH 246/770] Linux 4.14.19 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a69e5da9ed86..76a0b13623f4 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 18 +SUBLEVEL = 19 EXTRAVERSION = NAME = Petit Gorille From 8caab9edccf57540770352825f9d3fa2d8343ae2 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 14 Nov 2017 10:52:54 +0000 Subject: [PATCH 247/770] watchdog: indydog: Add dependency on SGI_HAS_INDYDOG commit 24f8d233074badd4c18e4dafd2fb97d65838afed upstream. Commit da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible") enabled building the Indy watchdog driver when COMPILE_TEST is enabled. However, the driver makes reference to symbols that are only defined for certain platforms are selected in the config. These platforms select SGI_HAS_INDYDOG. Without this, link time errors result, for example when building a MIPS allyesconfig. drivers/watchdog/indydog.o: In function `indydog_write': indydog.c:(.text+0x18): undefined reference to `sgimc' indydog.c:(.text+0x1c): undefined reference to `sgimc' drivers/watchdog/indydog.o: In function `indydog_start': indydog.c:(.text+0x54): undefined reference to `sgimc' indydog.c:(.text+0x58): undefined reference to `sgimc' drivers/watchdog/indydog.o: In function `indydog_stop': indydog.c:(.text+0xa4): undefined reference to `sgimc' drivers/watchdog/indydog.o:indydog.c:(.text+0xa8): more undefined references to `sgimc' follow make: *** [Makefile:1005: vmlinux] Error 1 Fix this by ensuring that CONFIG_INDIDOG can only be selected when the necessary dependent platform symbols are built in. Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible") Signed-off-by: Matt Redfearn Signed-off-by: Ralf Baechle Suggested-by: James Hogan Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index c722cbfdc7e6..3ece1335ba84 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1451,7 +1451,7 @@ config RC32434_WDT config INDYDOG tristate "Indy/I2 Hardware Watchdog" - depends on SGI_HAS_INDYDOG || (MIPS && COMPILE_TEST) + depends on SGI_HAS_INDYDOG help Hardware driver for the Indy's/I2's watchdog. This is a watchdog timer that will reboot the machine after a 60 second From b4a9ffad970244d0a349e4e06e5bb332790d7715 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Mon, 15 Jan 2018 14:30:03 +0100 Subject: [PATCH 248/770] powerpc/pseries: include linux/types.h in asm/hvcall.h commit 1b689a95ce7427075f9ac9fb4aea1af530742b7f upstream. Commit 6e032b350cd1 ("powerpc/powernv: Check device-tree for RFI flush settings") uses u64 in asm/hvcall.h without including linux/types.h This breaks hvcall.h users that do not include the header themselves. Fixes: 6e032b350cd1 ("powerpc/powernv: Check device-tree for RFI flush settings") Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/hvcall.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index f0461618bf7b..eca3f9c68907 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -353,6 +353,7 @@ #define PROC_TABLE_GTSE 0x01 #ifndef __ASSEMBLY__ +#include /** * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments From 061df7705a53179ecb7f73a6b995bf7170a794d8 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 15 Dec 2017 12:48:32 -0800 Subject: [PATCH 249/770] cifs: Fix missing put_xid in cifs_file_strict_mmap commit f04a703c3d613845ae3141bfaf223489de8ab3eb upstream. If cifs_zap_mapping() returned an error, we would return without putting the xid that we got earlier. Restructure cifs_file_strict_mmap() and cifs_file_mmap() to be more similar to each other and have a single point of return that always puts the xid. Signed-off-by: Matthew Wilcox Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 92fdf9c35de2..7d6539a04fac 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3488,20 +3488,18 @@ static const struct vm_operations_struct cifs_file_vm_ops = { int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) { - int rc, xid; + int xid, rc = 0; struct inode *inode = file_inode(file); xid = get_xid(); - if (!CIFS_CACHE_READ(CIFS_I(inode))) { + if (!CIFS_CACHE_READ(CIFS_I(inode))) rc = cifs_zap_mapping(inode); - if (rc) - return rc; - } - - rc = generic_file_mmap(file, vma); - if (rc == 0) + if (!rc) + rc = generic_file_mmap(file, vma); + if (!rc) vma->vm_ops = &cifs_file_vm_ops; + free_xid(xid); return rc; } @@ -3511,16 +3509,16 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) int rc, xid; xid = get_xid(); + rc = cifs_revalidate_file(file); - if (rc) { + if (rc) cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", rc); - free_xid(xid); - return rc; - } - rc = generic_file_mmap(file, vma); - if (rc == 0) + if (!rc) + rc = generic_file_mmap(file, vma); + if (!rc) vma->vm_ops = &cifs_file_vm_ops; + free_xid(xid); return rc; } From be6874b4d480d12ec120e195f35091d831ca97bc Mon Sep 17 00:00:00 2001 From: Daniel N Pettersson Date: Thu, 11 Jan 2018 16:00:12 +0100 Subject: [PATCH 250/770] cifs: Fix autonegotiate security settings mismatch commit 9aca7e454415f7878b28524e76bebe1170911a88 upstream. Autonegotiation gives a security settings mismatch error if the SMB server selects an SMBv3 dialect that isn't SMB3.02. The exact error is "protocol revalidation - security settings mismatch". This can be tested using Samba v4.2 or by setting the global Samba setting max protocol = SMB3_00. The check that fails in smb3_validate_negotiate is the dialect verification of the negotiate info response. This is because it tries to verify against the protocol_id in the global smbdefault_values. The protocol_id in smbdefault_values is SMB3.02. In SMB2_negotiate the protocol_id in smbdefault_values isn't updated, it is global so it probably shouldn't be, but server->dialect is. This patch changes the check in smb3_validate_negotiate to use server->dialect instead of server->vals->protocol_id. The patch works with autonegotiate and when using a specific version in the vers mount option. Signed-off-by: Daniel N Pettersson Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 01346b8b6edb..66af1f8a13cc 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -733,8 +733,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) } /* check validate negotiate info response matches what we got earlier */ - if (pneg_rsp->Dialect != - cpu_to_le16(tcon->ses->server->vals->protocol_id)) + if (pneg_rsp->Dialect != cpu_to_le16(tcon->ses->server->dialect)) goto vneg_out; if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode)) From 4126cdb73193eb0778ae05d2162cbb3bcd778331 Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Thu, 25 Jan 2018 15:59:39 +0100 Subject: [PATCH 251/770] CIFS: zero sensitive data when freeing commit 97f4b7276b829a8927ac903a119bef2f963ccc58 upstream. also replaces memset()+kfree() by kzfree(). Signed-off-by: Aurelien Aptel Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsencrypt.c | 3 +-- fs/cifs/connect.c | 6 +++--- fs/cifs/misc.c | 14 ++++---------- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 68abbb0db608..f2b0a7f124da 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -325,9 +325,8 @@ int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, { int i; int rc; - char password_with_pad[CIFS_ENCPWD_SIZE]; + char password_with_pad[CIFS_ENCPWD_SIZE] = {0}; - memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); if (password) strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0bfc2280436d..f7db2fedfa8c 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1707,7 +1707,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, tmp_end++; if (!(tmp_end < end && tmp_end[1] == delim)) { /* No it is not. Set the password to NULL */ - kfree(vol->password); + kzfree(vol->password); vol->password = NULL; break; } @@ -1745,7 +1745,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, options = end; } - kfree(vol->password); + kzfree(vol->password); /* Now build new password string */ temp_len = strlen(value); vol->password = kzalloc(temp_len+1, GFP_KERNEL); @@ -4235,7 +4235,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) reset_cifs_unix_caps(0, tcon, NULL, vol_info); out: kfree(vol_info->username); - kfree(vol_info->password); + kzfree(vol_info->password); kfree(vol_info); return tcon; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index eea93ac15ef0..a0dbced4a45c 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -98,14 +98,11 @@ sesInfoFree(struct cifs_ses *buf_to_free) kfree(buf_to_free->serverOS); kfree(buf_to_free->serverDomain); kfree(buf_to_free->serverNOS); - if (buf_to_free->password) { - memset(buf_to_free->password, 0, strlen(buf_to_free->password)); - kfree(buf_to_free->password); - } + kzfree(buf_to_free->password); kfree(buf_to_free->user_name); kfree(buf_to_free->domainName); - kfree(buf_to_free->auth_key.response); - kfree(buf_to_free); + kzfree(buf_to_free->auth_key.response); + kzfree(buf_to_free); } struct cifs_tcon * @@ -136,10 +133,7 @@ tconInfoFree(struct cifs_tcon *buf_to_free) } atomic_dec(&tconInfoAllocCount); kfree(buf_to_free->nativeFileSystem); - if (buf_to_free->password) { - memset(buf_to_free->password, 0, strlen(buf_to_free->password)); - kfree(buf_to_free->password); - } + kzfree(buf_to_free->password); kfree(buf_to_free); } From 7adf1d63ef6567924695e3965418d0ded8c896c8 Mon Sep 17 00:00:00 2001 From: Andrew-sh Cheng Date: Fri, 8 Dec 2017 14:07:56 +0800 Subject: [PATCH 252/770] cpufreq: mediatek: add mediatek related projects into blacklist commit 6066998cbd2b1012a8d5bc9a2957cfd0ad53150e upstream. mediatek projects will use mediate-cpufreq.c as cpufreq driver, instead of using cpufreq_dt.c Add mediatek related projects into cpufreq-dt blacklist Signed-off-by: Andrew-sh Cheng Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Sean Wang Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq-dt-platdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index a753c50e9e41..9e0aa767bbbe 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -111,6 +111,14 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "marvell,armadaxp", }, + { .compatible = "mediatek,mt2701", }, + { .compatible = "mediatek,mt2712", }, + { .compatible = "mediatek,mt7622", }, + { .compatible = "mediatek,mt7623", }, + { .compatible = "mediatek,mt817x", }, + { .compatible = "mediatek,mt8173", }, + { .compatible = "mediatek,mt8176", }, + { .compatible = "nvidia,tegra124", }, { .compatible = "st,stih407", }, From 69373cdc4ae4eacac2eb19da03eb0ad2bb0eda7f Mon Sep 17 00:00:00 2001 From: Yang Shunyong Date: Mon, 29 Jan 2018 14:40:11 +0800 Subject: [PATCH 253/770] dmaengine: dmatest: fix container_of member in dmatest_callback commit 66b3bd2356e0a1531c71a3dcf96944621e25c17c upstream. The type of arg passed to dmatest_callback is struct dmatest_done. It refers to test_done in struct dmatest_thread, not done_wait. Fixes: 6f6a23a213be ("dmaengine: dmatest: move callback wait ...") Signed-off-by: Yang Shunyong Acked-by: Adam Wallis Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dmatest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index ec5f9d2bc820..80cc2be6483c 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -355,7 +355,7 @@ static void dmatest_callback(void *arg) { struct dmatest_done *done = arg; struct dmatest_thread *thread = - container_of(arg, struct dmatest_thread, done_wait); + container_of(done, struct dmatest_thread, test_done); if (!thread->done) { done->done = true; wake_up_all(done->wait); From e94a7de2a3d2da42f0d124c7501195f1cdffa232 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 5 Dec 2017 23:15:31 -0800 Subject: [PATCH 254/770] sched/wait: Fix add_wait_queue() behavioral change commit c6b9d9a33029014446bd9ed84c1688f6d3d4eab9 upstream. The following cleanup commit: 50816c48997a ("sched/wait: Standardize internal naming of wait-queue entries") ... unintentionally changed the behavior of add_wait_queue() from inserting the wait entry at the head of the wait queue to the tail of the wait queue. Beyond a negative performance impact this change in behavior theoretically also breaks wait queues which mix exclusive and non-exclusive waiters, as non-exclusive waiters will not be woken up if they are queued behind enough exclusive waiters. Signed-off-by: Omar Sandoval Reviewed-by: Jens Axboe Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Thomas Gleixner Cc: kernel-team@fb.com Fixes: ("sched/wait: Standardize internal naming of wait-queue entries") Link: http://lkml.kernel.org/r/a16c8ccffd39bd08fdaa45a5192294c784b803a7.1512544324.git.osandov@fb.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/wait.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 98feab7933c7..929ecb7d6b78 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -27,7 +27,7 @@ void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE; spin_lock_irqsave(&wq_head->lock, flags); - __add_wait_queue_entry_tail(wq_head, wq_entry); + __add_wait_queue(wq_head, wq_entry); spin_unlock_irqrestore(&wq_head->lock, flags); } EXPORT_SYMBOL(add_wait_queue); From fd7467f82e58e7b6579cac0c9e883495df15f163 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 9 Nov 2017 14:39:55 +0100 Subject: [PATCH 255/770] watchdog: gpio_wdt: set WDOG_HW_RUNNING in gpio_wdt_stop commit bc137dfdbec27c0ec5731a89002daded4a4aa1ea upstream. The first patch above (https://patchwork.kernel.org/patch/9970181/) makes the oops go away, but it just papers over the problem. The real problem is that the watchdog core clears WDOG_HW_RUNNING in watchdog_stop, and the gpio driver fails to set it in its stop function when it doesn't actually stop it. This means that the core doesn't know that it now has responsibility for petting the device, in turn causing the device to reset the system (I hadn't noticed this because the board I'm working on has that reset logic disabled). How about this (other drivers may of course have the same problem, I haven't checked). One might say that ->stop should return an error when the device can't be stopped, but OTOH this brings parity between a device without a ->stop method and a GPIO wd that has always-running set. IOW, I think ->stop should only return an error when an actual attempt to stop the hardware failed. From: Rasmus Villemoes The watchdog framework clears WDOG_HW_RUNNING before calling ->stop. If the driver is unable to stop the device, it is supposed to set that bit again so that the watchdog core takes care of sending heart-beats while the device is not open from user-space. Update the gpio_wdt driver to honour that contract (and get rid of the redundant clearing of WDOG_HW_RUNNING). Fixes: 3c10bbde10 ("watchdog: core: Clear WDOG_HW_RUNNING before calling the stop function") Signed-off-by: Rasmus Villemoes Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/gpio_wdt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/gpio_wdt.c b/drivers/watchdog/gpio_wdt.c index cb66c2f99ff1..7a6279daa8b9 100644 --- a/drivers/watchdog/gpio_wdt.c +++ b/drivers/watchdog/gpio_wdt.c @@ -80,7 +80,8 @@ static int gpio_wdt_stop(struct watchdog_device *wdd) if (!priv->always_running) { gpio_wdt_disable(priv); - clear_bit(WDOG_HW_RUNNING, &wdd->status); + } else { + set_bit(WDOG_HW_RUNNING, &wdd->status); } return 0; From 0b69ec336d3d5c50710e19aaf0ca5f738fc20bc3 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Mon, 11 Dec 2017 16:42:31 -0600 Subject: [PATCH 256/770] arm64: Define cputype macros for Falkor CPU commit c622cc013cece073722592cff1ac6643a33b1622 upstream. Add cputype definition macros for Qualcomm Datacenter Technologies Falkor CPU in cputype.h. It's unfortunate that the first revision of the Falkor CPU used the wrong part number 0x800, got fixed in v2 chip with part number 0xC00, and would be used the same value for future revisions. Signed-off-by: Shanker Donthineni Signed-off-by: Will Deacon Cc: Timur Tabi Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 235e77d98261..cbf08d7cbf30 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -91,6 +91,7 @@ #define BRCM_CPU_PART_VULCAN 0x516 #define QCOM_CPU_PART_FALKOR_V1 0x800 +#define QCOM_CPU_PART_FALKOR 0xC00 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) @@ -99,6 +100,7 @@ #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) +#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #ifndef __ASSEMBLY__ From 4c54eab372f8cd3f3b1f0a07bc484b06eed3c5a0 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Mon, 11 Dec 2017 16:42:32 -0600 Subject: [PATCH 257/770] arm64: Add software workaround for Falkor erratum 1041 commit 932b50c7c1c65e6f23002e075b97ee083c4a9e71 upstream. The ARM architecture defines the memory locations that are permitted to be accessed as the result of a speculative instruction fetch from an exception level for which all stages of translation are disabled. Specifically, the core is permitted to speculatively fetch from the 4KB region containing the current program counter 4K and next 4K. When translation is changed from enabled to disabled for the running exception level (SCTLR_ELn[M] changed from a value of 1 to 0), the Falkor core may errantly speculatively access memory locations outside of the 4KB region permitted by the architecture. The errant memory access may lead to one of the following unexpected behaviors. 1) A System Error Interrupt (SEI) being raised by the Falkor core due to the errant memory access attempting to access a region of memory that is protected by a slave-side memory protection unit. 2) Unpredictable device behavior due to a speculative read from device memory. This behavior may only occur if the instruction cache is disabled prior to or coincident with translation being changed from enabled to disabled. The conditions leading to this erratum will not occur when either of the following occur: 1) A higher exception level disables translation of a lower exception level (e.g. EL2 changing SCTLR_EL1[M] from a value of 1 to 0). 2) An exception level disabling its stage-1 translation if its stage-2 translation is enabled (e.g. EL1 changing SCTLR_EL1[M] from a value of 1 to 0 when HCR_EL2[VM] has a value of 1). To avoid the errant behavior, software must execute an ISB immediately prior to executing the MSR that will change SCTLR_ELn[M] from 1 to 0. Signed-off-by: Shanker Donthineni Signed-off-by: Will Deacon Cc: Timur Tabi Signed-off-by: Greg Kroah-Hartman --- Documentation/arm64/silicon-errata.txt | 1 + arch/arm64/Kconfig | 10 ++++++++++ arch/arm64/include/asm/assembler.h | 10 ++++++++++ arch/arm64/kernel/cpu-reset.S | 1 + arch/arm64/kernel/efi-entry.S | 2 ++ arch/arm64/kernel/head.S | 1 + arch/arm64/kernel/relocate_kernel.S | 1 + arch/arm64/kvm/hyp-init.S | 1 + 8 files changed, 27 insertions(+) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 66e8ce14d23d..704770c0edf2 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -74,3 +74,4 @@ stable kernels. | Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | | Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | | Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 | +| Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0df64a6a56d4..8f73eaccf50a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -539,6 +539,16 @@ config QCOM_QDF2400_ERRATUM_0065 If unsure, say Y. +config QCOM_FALKOR_ERRATUM_E1041 + bool "Falkor E1041: Speculative instruction fetches might cause errant memory access" + default y + help + Falkor CPU may speculatively fetch instructions from an improper + memory location when MMU translation is changed from SCTLR_ELn[M]=1 + to SCTLR_ELn[M]=0. Prefix an ISB instruction to fix the problem. + + If unsure, say Y. + endmenu diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index d58a6253c6ab..7896728f3b22 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -499,4 +499,14 @@ alternative_else_nop_endif #endif .endm +/** + * Errata workaround prior to disable MMU. Insert an ISB immediately prior + * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0. + */ + .macro pre_disable_mmu_workaround +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041 + isb +#endif + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 65f42d257414..2a752cb2a0f3 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -37,6 +37,7 @@ ENTRY(__cpu_soft_restart) mrs x12, sctlr_el1 ldr x13, =SCTLR_ELx_FLAGS bic x12, x12, x13 + pre_disable_mmu_workaround msr sctlr_el1, x12 isb diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 4e6ad355bd05..6b9736c3fb56 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -96,6 +96,7 @@ ENTRY(entry) mrs x0, sctlr_el2 bic x0, x0, #1 << 0 // clear SCTLR.M bic x0, x0, #1 << 2 // clear SCTLR.C + pre_disable_mmu_workaround msr sctlr_el2, x0 isb b 2f @@ -103,6 +104,7 @@ ENTRY(entry) mrs x0, sctlr_el1 bic x0, x0, #1 << 0 // clear SCTLR.M bic x0, x0, #1 << 2 // clear SCTLR.C + pre_disable_mmu_workaround msr sctlr_el1, x0 isb 2: diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0b243ecaf7ac..6657ae4f3c7a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -732,6 +732,7 @@ __primary_switch: * to take into account by discarding the current kernel mapping and * creating a new one. */ + pre_disable_mmu_workaround msr sctlr_el1, x20 // disable the MMU isb bl __create_page_tables // recreate kernel mapping diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index ce704a4aeadd..f407e422a720 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -45,6 +45,7 @@ ENTRY(arm64_relocate_new_kernel) mrs x0, sctlr_el2 ldr x1, =SCTLR_ELx_FLAGS bic x0, x0, x1 + pre_disable_mmu_workaround msr sctlr_el2, x0 isb 1: diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 3f9615582377..870828c364c5 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -151,6 +151,7 @@ reset: mrs x5, sctlr_el2 ldr x6, =SCTLR_ELx_FLAGS bic x5, x5, x6 // Clear SCTL_M and etc + pre_disable_mmu_workaround msr sctlr_el2, x5 isb From fea5349eb88bf8e95b0574a7c7d30040c2f07c9c Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Wed, 10 Jan 2018 21:44:42 +0800 Subject: [PATCH 258/770] KVM MMU: check pending exception before injecting APF commit 2a266f23550be997d783f27e704b9b40c4010292 upstream. For example, when two APF's for page ready happen after one exit and the first one becomes pending, the second one will result in #DF. Instead, just handle the second page fault synchronously. Reported-by: Ross Zwisler Message-ID: Reported-by: Alec Blayne Signed-off-by: Haozhong Zhang Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 0fce8d73403c..beb7f8795bc1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3784,7 +3784,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) { if (unlikely(!lapic_in_kernel(vcpu) || - kvm_event_needs_reinjection(vcpu))) + kvm_event_needs_reinjection(vcpu) || + vcpu->arch.exception.pending)) return false; if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu)) From 9c41a8453c826b58e470d23302d0e10aba3fd168 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 23 Jan 2018 20:45:37 -0500 Subject: [PATCH 259/770] sched/rt: Use container_of() to get root domain in rto_push_irq_work_func() commit ad0f1d9d65938aec72a698116cd73a980916895e upstream. When the rto_push_irq_work_func() is called, it looks at the RT overloaded bitmask in the root domain via the runqueue (rq->rd). The problem is that during CPU up and down, nothing here stops rq->rd from changing between taking the rq->rd->rto_lock and releasing it. That means the lock that is released is not the same lock that was taken. Instead of using this_rq()->rd to get the root domain, as the irq work is part of the root domain, we can simply get the root domain from the irq work that is passed to the routine: container_of(work, struct root_domain, rto_push_work) This keeps the root domain consistent. Reported-by: Pavan Kondeti Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 4bdced5c9a292 ("sched/rt: Simplify the IPI based RT balancing logic") Link: http://lkml.kernel.org/r/CAEU1=PkiHO35Dzna8EQqNSKW1fr1y1zRQ5y66X117MG06sQtNA@mail.gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/rt.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7464c5c4de46..6f42703edc47 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1907,9 +1907,8 @@ static void push_rt_tasks(struct rq *rq) * the rt_loop_next will cause the iterator to perform another scan. * */ -static int rto_next_cpu(struct rq *rq) +static int rto_next_cpu(struct root_domain *rd) { - struct root_domain *rd = rq->rd; int next; int cpu; @@ -1985,7 +1984,7 @@ static void tell_cpu_to_push(struct rq *rq) * Otherwise it is finishing up and an ipi needs to be sent. */ if (rq->rd->rto_cpu < 0) - cpu = rto_next_cpu(rq); + cpu = rto_next_cpu(rq->rd); raw_spin_unlock(&rq->rd->rto_lock); @@ -1998,6 +1997,8 @@ static void tell_cpu_to_push(struct rq *rq) /* Called from hardirq context */ void rto_push_irq_work_func(struct irq_work *work) { + struct root_domain *rd = + container_of(work, struct root_domain, rto_push_work); struct rq *rq; int cpu; @@ -2013,18 +2014,18 @@ void rto_push_irq_work_func(struct irq_work *work) raw_spin_unlock(&rq->lock); } - raw_spin_lock(&rq->rd->rto_lock); + raw_spin_lock(&rd->rto_lock); /* Pass the IPI to the next rt overloaded queue */ - cpu = rto_next_cpu(rq); + cpu = rto_next_cpu(rd); - raw_spin_unlock(&rq->rd->rto_lock); + raw_spin_unlock(&rd->rto_lock); if (cpu < 0) return; /* Try the next RT overloaded CPU */ - irq_work_queue_on(&rq->rd->rto_push_work, cpu); + irq_work_queue_on(&rd->rto_push_work, cpu); } #endif /* HAVE_RT_PUSH_IPI */ From d9c3131f2ab45eacb6641b29ea6ee69039c727a7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 23 Jan 2018 20:45:38 -0500 Subject: [PATCH 260/770] sched/rt: Up the root domain ref count when passing it around via IPIs commit 364f56653708ba8bcdefd4f0da2a42904baa8eeb upstream. When issuing an IPI RT push, where an IPI is sent to each CPU that has more than one RT task scheduled on it, it references the root domain's rto_mask, that contains all the CPUs within the root domain that has more than one RT task in the runable state. The problem is, after the IPIs are initiated, the rq->lock is released. This means that the root domain that is associated to the run queue could be freed while the IPIs are going around. Add a sched_get_rd() and a sched_put_rd() that will increment and decrement the root domain's ref count respectively. This way when initiating the IPIs, the scheduler will up the root domain's ref count before releasing the rq->lock, ensuring that the root domain does not go away until the IPI round is complete. Reported-by: Pavan Kondeti Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 4bdced5c9a292 ("sched/rt: Simplify the IPI based RT balancing logic") Link: http://lkml.kernel.org/r/CAEU1=PkiHO35Dzna8EQqNSKW1fr1y1zRQ5y66X117MG06sQtNA@mail.gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/rt.c | 9 +++++++-- kernel/sched/sched.h | 2 ++ kernel/sched/topology.c | 13 +++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 6f42703edc47..298f62b8662d 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1990,8 +1990,11 @@ static void tell_cpu_to_push(struct rq *rq) rto_start_unlock(&rq->rd->rto_loop_start); - if (cpu >= 0) + if (cpu >= 0) { + /* Make sure the rd does not get freed while pushing */ + sched_get_rd(rq->rd); irq_work_queue_on(&rq->rd->rto_push_work, cpu); + } } /* Called from hardirq context */ @@ -2021,8 +2024,10 @@ void rto_push_irq_work_func(struct irq_work *work) raw_spin_unlock(&rd->rto_lock); - if (cpu < 0) + if (cpu < 0) { + sched_put_rd(rd); return; + } /* Try the next RT overloaded CPU */ irq_work_queue_on(&rd->rto_push_work, cpu); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b732e779fe7d..307c35d33660 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -661,6 +661,8 @@ extern struct mutex sched_domains_mutex; extern void init_defrootdomain(void); extern int sched_init_domains(const struct cpumask *cpu_map); extern void rq_attach_root(struct rq *rq, struct root_domain *rd); +extern void sched_get_rd(struct root_domain *rd); +extern void sched_put_rd(struct root_domain *rd); #ifdef HAVE_RT_PUSH_IPI extern void rto_push_irq_work_func(struct irq_work *work); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 093f2ceba2e2..659e075ef70b 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -258,6 +258,19 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd) call_rcu_sched(&old_rd->rcu, free_rootdomain); } +void sched_get_rd(struct root_domain *rd) +{ + atomic_inc(&rd->refcount); +} + +void sched_put_rd(struct root_domain *rd) +{ + if (!atomic_dec_and_test(&rd->refcount)) + return; + + call_rcu_sched(&rd->rcu, free_rootdomain); +} + static int init_rootdomain(struct root_domain *rd) { if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL)) From 19d8e5122fefc3e424050a78063f0d385433e191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 17 Nov 2017 21:19:08 +0200 Subject: [PATCH 261/770] drm/i915: Add .get_hw_state() method for planes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d87ce76402950b8e4d5117276d44465658e886a4 upstream. Add a .get_hw_state() method for planes, returning true or false depending on whether the plane is enabled. Use it to rewrite the plane enabled/disabled asserts in platform agnostic fashion. We do lose the pre-gen4 plane<->pipe mapping checks, but since we're supposed sanitize that anyway it doesn't really matter. v2: Reoder patches to not depend on enum old_plane_id Just call assert_plane_disabled() from assert_planes_disabled() v3: Deal with disabled power wells in .get_hw_state() v4: Rebase due skl primary plane code removal Cc: Thierry Reding Cc: Alex Villacís Lasso Reviewed-by: Daniel Vetter #v2 Tested-by: Thierry Reding #v2 Link: https://patchwork.freedesktop.org/patch/msgid/20171117191917.11506-2-ville.syrjala@linux.intel.com Signed-off-by: Ville Syrjälä (cherry picked from commit 51f5a096398433a881e845d3685a2c1dac756019) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 188 +++++++++++++-------------- drivers/gpu/drm/i915/intel_drv.h | 2 + drivers/gpu/drm/i915/intel_sprite.c | 83 ++++++++++++ 3 files changed, 175 insertions(+), 98 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 095a2240af4f..47094d4a8f0e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1211,23 +1211,6 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv, enum pipe pipe) pipe_name(pipe)); } -static void assert_cursor(struct drm_i915_private *dev_priv, - enum pipe pipe, bool state) -{ - bool cur_state; - - if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) - cur_state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; - else - cur_state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; - - I915_STATE_WARN(cur_state != state, - "cursor on pipe %c assertion failure (expected %s, current %s)\n", - pipe_name(pipe), onoff(state), onoff(cur_state)); -} -#define assert_cursor_enabled(d, p) assert_cursor(d, p, true) -#define assert_cursor_disabled(d, p) assert_cursor(d, p, false) - void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state) { @@ -1255,77 +1238,25 @@ void assert_pipe(struct drm_i915_private *dev_priv, pipe_name(pipe), onoff(state), onoff(cur_state)); } -static void assert_plane(struct drm_i915_private *dev_priv, - enum plane plane, bool state) +static void assert_plane(struct intel_plane *plane, bool state) { - u32 val; - bool cur_state; + bool cur_state = plane->get_hw_state(plane); - val = I915_READ(DSPCNTR(plane)); - cur_state = !!(val & DISPLAY_PLANE_ENABLE); I915_STATE_WARN(cur_state != state, - "plane %c assertion failure (expected %s, current %s)\n", - plane_name(plane), onoff(state), onoff(cur_state)); + "%s assertion failure (expected %s, current %s)\n", + plane->base.name, onoff(state), onoff(cur_state)); } -#define assert_plane_enabled(d, p) assert_plane(d, p, true) -#define assert_plane_disabled(d, p) assert_plane(d, p, false) +#define assert_plane_enabled(p) assert_plane(p, true) +#define assert_plane_disabled(p) assert_plane(p, false) -static void assert_planes_disabled(struct drm_i915_private *dev_priv, - enum pipe pipe) +static void assert_planes_disabled(struct intel_crtc *crtc) { - int i; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_plane *plane; - /* Primary planes are fixed to pipes on gen4+ */ - if (INTEL_GEN(dev_priv) >= 4) { - u32 val = I915_READ(DSPCNTR(pipe)); - I915_STATE_WARN(val & DISPLAY_PLANE_ENABLE, - "plane %c assertion failure, should be disabled but not\n", - plane_name(pipe)); - return; - } - - /* Need to check both planes against the pipe */ - for_each_pipe(dev_priv, i) { - u32 val = I915_READ(DSPCNTR(i)); - enum pipe cur_pipe = (val & DISPPLANE_SEL_PIPE_MASK) >> - DISPPLANE_SEL_PIPE_SHIFT; - I915_STATE_WARN((val & DISPLAY_PLANE_ENABLE) && pipe == cur_pipe, - "plane %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(i), pipe_name(pipe)); - } -} - -static void assert_sprites_disabled(struct drm_i915_private *dev_priv, - enum pipe pipe) -{ - int sprite; - - if (INTEL_GEN(dev_priv) >= 9) { - for_each_sprite(dev_priv, pipe, sprite) { - u32 val = I915_READ(PLANE_CTL(pipe, sprite)); - I915_STATE_WARN(val & PLANE_CTL_ENABLE, - "plane %d assertion failure, should be off on pipe %c but is still active\n", - sprite, pipe_name(pipe)); - } - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - for_each_sprite(dev_priv, pipe, sprite) { - u32 val = I915_READ(SPCNTR(pipe, PLANE_SPRITE0 + sprite)); - I915_STATE_WARN(val & SP_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - sprite_name(pipe, sprite), pipe_name(pipe)); - } - } else if (INTEL_GEN(dev_priv) >= 7) { - u32 val = I915_READ(SPRCTL(pipe)); - I915_STATE_WARN(val & SPRITE_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(pipe), pipe_name(pipe)); - } else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { - u32 val = I915_READ(DVSCNTR(pipe)); - I915_STATE_WARN(val & DVS_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(pipe), pipe_name(pipe)); - } + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) + assert_plane_disabled(plane); } static void assert_vblank_disabled(struct drm_crtc *crtc) @@ -1926,9 +1857,7 @@ static void intel_enable_pipe(struct intel_crtc *crtc) DRM_DEBUG_KMS("enabling pipe %c\n", pipe_name(pipe)); - assert_planes_disabled(dev_priv, pipe); - assert_cursor_disabled(dev_priv, pipe); - assert_sprites_disabled(dev_priv, pipe); + assert_planes_disabled(crtc); /* * A pipe without a PLL won't actually be able to drive bits from @@ -1997,9 +1926,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc) * Make sure planes won't keep trying to pump pixels to us, * or we might hang the display. */ - assert_planes_disabled(dev_priv, pipe); - assert_cursor_disabled(dev_priv, pipe); - assert_sprites_disabled(dev_priv, pipe); + assert_planes_disabled(crtc); reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); @@ -3397,6 +3324,31 @@ static void i9xx_disable_primary_plane(struct intel_plane *primary, spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool i9xx_plane_get_hw_state(struct intel_plane *primary) +{ + + struct drm_i915_private *dev_priv = to_i915(primary->base.dev); + enum intel_display_power_domain power_domain; + enum plane plane = primary->plane; + enum pipe pipe = primary->pipe; + bool ret; + + /* + * Not 100% correct for planes that can move between pipes, + * but that's only the case for gen2-4 which don't have any + * display power wells. + */ + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(DSPCNTR(plane)) & DISPLAY_PLANE_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 intel_fb_stride_alignment(const struct drm_framebuffer *fb, int plane) { @@ -4973,7 +4925,8 @@ void hsw_enable_ips(struct intel_crtc *crtc) * a vblank wait. */ - assert_plane_enabled(dev_priv, crtc->plane); + assert_plane_enabled(to_intel_plane(crtc->base.primary)); + if (IS_BROADWELL(dev_priv)) { mutex_lock(&dev_priv->rps.hw_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0xc0000000)); @@ -5005,7 +4958,8 @@ void hsw_disable_ips(struct intel_crtc *crtc) if (!crtc->config->ips_enabled) return; - assert_plane_enabled(dev_priv, crtc->plane); + assert_plane_enabled(to_intel_plane(crtc->base.primary)); + if (IS_BROADWELL(dev_priv)) { mutex_lock(&dev_priv->rps.hw_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0)); @@ -9577,6 +9531,23 @@ static void i845_disable_cursor(struct intel_plane *plane, i845_update_cursor(plane, NULL, NULL); } +static bool i845_cursor_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(PIPE_A); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -9770,6 +9741,28 @@ static void i9xx_disable_cursor(struct intel_plane *plane, i9xx_update_cursor(plane, NULL, NULL); } +static bool i9xx_cursor_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + /* + * Not 100% correct for planes that can move between pipes, + * but that's only the case for gen2-3 which don't have any + * display power wells. + */ + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} /* VESA 640x480x72Hz mode to set on the pipe */ static struct drm_display_mode load_detect_mode = { @@ -13240,6 +13233,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = skylake_update_primary_plane; primary->disable_plane = skylake_disable_primary_plane; + primary->get_hw_state = skl_plane_get_hw_state; } else if (INTEL_GEN(dev_priv) >= 9) { intel_primary_formats = skl_primary_formats; num_formats = ARRAY_SIZE(skl_primary_formats); @@ -13250,6 +13244,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = skylake_update_primary_plane; primary->disable_plane = skylake_disable_primary_plane; + primary->get_hw_state = skl_plane_get_hw_state; } else if (INTEL_GEN(dev_priv) >= 4) { intel_primary_formats = i965_primary_formats; num_formats = ARRAY_SIZE(i965_primary_formats); @@ -13257,6 +13252,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = i9xx_update_primary_plane; primary->disable_plane = i9xx_disable_primary_plane; + primary->get_hw_state = i9xx_plane_get_hw_state; } else { intel_primary_formats = i8xx_primary_formats; num_formats = ARRAY_SIZE(i8xx_primary_formats); @@ -13264,6 +13260,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = i9xx_update_primary_plane; primary->disable_plane = i9xx_disable_primary_plane; + primary->get_hw_state = i9xx_plane_get_hw_state; } if (INTEL_GEN(dev_priv) >= 9) @@ -13353,10 +13350,12 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { cursor->update_plane = i845_update_cursor; cursor->disable_plane = i845_disable_cursor; + cursor->get_hw_state = i845_cursor_get_hw_state; cursor->check_plane = i845_check_cursor; } else { cursor->update_plane = i9xx_update_cursor; cursor->disable_plane = i9xx_disable_cursor; + cursor->get_hw_state = i9xx_cursor_get_hw_state; cursor->check_plane = i9xx_check_cursor; } @@ -14704,8 +14703,8 @@ void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) DRM_DEBUG_KMS("disabling pipe %c due to force quirk\n", pipe_name(pipe)); - assert_plane_disabled(dev_priv, PLANE_A); - assert_plane_disabled(dev_priv, PLANE_B); + assert_planes_disabled(intel_get_crtc_for_pipe(dev_priv, PIPE_A)); + assert_planes_disabled(intel_get_crtc_for_pipe(dev_priv, PIPE_B)); I915_WRITE(PIPECONF(pipe), 0); POSTING_READ(PIPECONF(pipe)); @@ -14918,20 +14917,13 @@ void i915_redisable_vga(struct drm_i915_private *dev_priv) intel_display_power_put(dev_priv, POWER_DOMAIN_VGA); } -static bool primary_get_hw_state(struct intel_plane *plane) -{ - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - - return I915_READ(DSPCNTR(plane->plane)) & DISPLAY_PLANE_ENABLE; -} - /* FIXME read out full plane state for all planes */ static void readout_plane_state(struct intel_crtc *crtc) { struct intel_plane *primary = to_intel_plane(crtc->base.primary); bool visible; - visible = crtc->active && primary_get_hw_state(primary); + visible = crtc->active && primary->get_hw_state(primary); intel_set_plane_visible(to_intel_crtc_state(crtc->base.state), to_intel_plane_state(primary->base.state), diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 79fbaf78f604..10ae9681f02d 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -863,6 +863,7 @@ struct intel_plane { const struct intel_plane_state *plane_state); void (*disable_plane)(struct intel_plane *plane, struct intel_crtc *crtc); + bool (*get_hw_state)(struct intel_plane *plane); int (*check_plane)(struct intel_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state); @@ -1885,6 +1886,7 @@ int intel_sprite_set_colorkey(struct drm_device *dev, void *data, struct drm_file *file_priv); void intel_pipe_update_start(struct intel_crtc *crtc); void intel_pipe_update_end(struct intel_crtc *crtc); +bool skl_plane_get_hw_state(struct intel_plane *plane); /* intel_tv.c */ void intel_tv_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 524933b01483..f8ebeb5ffb96 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -324,6 +324,26 @@ skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +bool +skl_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(PLANE_CTL(pipe, plane_id)) & PLANE_CTL_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static void chv_update_csc(struct intel_plane *plane, uint32_t format) { @@ -501,6 +521,26 @@ vlv_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +vlv_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(SPCNTR(pipe, plane_id)) & SP_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -641,6 +681,25 @@ ivb_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +ivb_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(SPRCTL(pipe)) & SPRITE_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -772,6 +831,25 @@ g4x_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +g4x_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(DVSCNTR(pipe)) & DVS_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static int intel_check_sprite_plane(struct intel_plane *plane, struct intel_crtc_state *crtc_state, @@ -1227,6 +1305,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = skl_update_plane; intel_plane->disable_plane = skl_disable_plane; + intel_plane->get_hw_state = skl_plane_get_hw_state; plane_formats = skl_plane_formats; num_plane_formats = ARRAY_SIZE(skl_plane_formats); @@ -1237,6 +1316,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = skl_update_plane; intel_plane->disable_plane = skl_disable_plane; + intel_plane->get_hw_state = skl_plane_get_hw_state; plane_formats = skl_plane_formats; num_plane_formats = ARRAY_SIZE(skl_plane_formats); @@ -1247,6 +1327,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = vlv_update_plane; intel_plane->disable_plane = vlv_disable_plane; + intel_plane->get_hw_state = vlv_plane_get_hw_state; plane_formats = vlv_plane_formats; num_plane_formats = ARRAY_SIZE(vlv_plane_formats); @@ -1262,6 +1343,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = ivb_update_plane; intel_plane->disable_plane = ivb_disable_plane; + intel_plane->get_hw_state = ivb_plane_get_hw_state; plane_formats = snb_plane_formats; num_plane_formats = ARRAY_SIZE(snb_plane_formats); @@ -1272,6 +1354,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = g4x_update_plane; intel_plane->disable_plane = g4x_disable_plane; + intel_plane->get_hw_state = g4x_plane_get_hw_state; modifiers = i9xx_plane_format_modifiers; if (IS_GEN6(dev_priv)) { From 50018d09843cc4813f1c2b89ff8fbcdf883ab0a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 17 Nov 2017 21:19:09 +0200 Subject: [PATCH 262/770] drm/i915: Redo plane sanitation during readout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 23ac12732825901b3fc6ac720958d8bff9a0d6ec upstream. Unify the plane disabling during state readout by pulling the code into a new helper intel_plane_disable_noatomic(). We'll also read out the state of all planes, so that we know which planes really need to be diabled. Additonally we change the plane<->pipe mapping sanitation to work by simply disabling the offending planes instead of entire pipes. And we do it before we otherwise sanitize the crtcs, which means we don't have to worry about misassigned planes during crtc sanitation anymore. v2: Reoder patches to not depend on enum old_plane_id v3: s/for_each_pipe/for_each_intel_crtc/ Cc: Thierry Reding Cc: Alex Villacís Lasso Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103223 Reviewed-by: Daniel Vetter Tested-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20171117191917.11506-3-ville.syrjala@linux.intel.com Signed-off-by: Ville Syrjälä (cherry picked from commit b1e01595a66dc206a2c75401ec4c285740537f3f) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 114 +++++++++++++++------------ 1 file changed, 65 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 47094d4a8f0e..751b0f0582bc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2756,6 +2756,23 @@ intel_set_plane_visible(struct intel_crtc_state *crtc_state, crtc_state->active_planes); } +static void intel_plane_disable_noatomic(struct intel_crtc *crtc, + struct intel_plane *plane) +{ + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + + intel_set_plane_visible(crtc_state, plane_state, false); + + if (plane->id == PLANE_PRIMARY) + intel_pre_disable_primary_noatomic(&crtc->base); + + trace_intel_disable_plane(&plane->base, crtc); + plane->disable_plane(plane, crtc); +} + static void intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, struct intel_initial_plane_config *plane_config) @@ -2813,12 +2830,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - intel_set_plane_visible(to_intel_crtc_state(crtc_state), - to_intel_plane_state(plane_state), - false); - intel_pre_disable_primary_noatomic(&intel_crtc->base); - trace_intel_disable_plane(primary, intel_crtc); - intel_plane->disable_plane(intel_plane, intel_crtc); + intel_plane_disable_noatomic(intel_crtc, intel_plane); return; @@ -5954,6 +5966,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->dev); enum intel_display_power_domain domain; + struct intel_plane *plane; u64 domains; struct drm_atomic_state *state; struct intel_crtc_state *crtc_state; @@ -5962,11 +5975,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, if (!intel_crtc->active) return; - if (crtc->primary->state->visible) { - intel_pre_disable_primary_noatomic(crtc); + for_each_intel_plane_on_crtc(&dev_priv->drm, intel_crtc, plane) { + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); - intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary)); - crtc->primary->state->visible = false; + if (plane_state->base.visible) + intel_plane_disable_noatomic(intel_crtc, plane); } state = drm_atomic_state_alloc(crtc->dev); @@ -14715,22 +14729,36 @@ void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) POSTING_READ(DPLL(pipe)); } -static bool -intel_check_plane_mapping(struct intel_crtc *crtc) +static bool intel_plane_mapping_ok(struct intel_crtc *crtc, + struct intel_plane *primary) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - u32 val; + enum plane plane = primary->plane; + u32 val = I915_READ(DSPCNTR(plane)); - if (INTEL_INFO(dev_priv)->num_pipes == 1) - return true; + return (val & DISPLAY_PLANE_ENABLE) == 0 || + (val & DISPPLANE_SEL_PIPE_MASK) == DISPPLANE_SEL_PIPE(crtc->pipe); +} - val = I915_READ(DSPCNTR(!crtc->plane)); +static void +intel_sanitize_plane_mapping(struct drm_i915_private *dev_priv) +{ + struct intel_crtc *crtc; - if ((val & DISPLAY_PLANE_ENABLE) && - (!!(val & DISPPLANE_SEL_PIPE_MASK) == crtc->pipe)) - return false; + if (INTEL_GEN(dev_priv) >= 4) + return; - return true; + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + + if (intel_plane_mapping_ok(crtc, plane)) + continue; + + DRM_DEBUG_KMS("%s attached to the wrong pipe, disabling plane\n", + plane->base.name); + intel_plane_disable_noatomic(crtc, plane); + } } static bool intel_crtc_has_encoders(struct intel_crtc *crtc) @@ -14786,33 +14814,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, /* Disable everything but the primary plane */ for_each_intel_plane_on_crtc(dev, crtc, plane) { - if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) - continue; + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); - trace_intel_disable_plane(&plane->base, crtc); - plane->disable_plane(plane, crtc); + if (plane_state->base.visible && + plane->base.type != DRM_PLANE_TYPE_PRIMARY) + intel_plane_disable_noatomic(crtc, plane); } } - /* We need to sanitize the plane -> pipe mapping first because this will - * disable the crtc (and hence change the state) if it is wrong. Note - * that gen4+ has a fixed plane -> pipe mapping. */ - if (INTEL_GEN(dev_priv) < 4 && !intel_check_plane_mapping(crtc)) { - bool plane; - - DRM_DEBUG_KMS("[CRTC:%d:%s] wrong plane connection detected!\n", - crtc->base.base.id, crtc->base.name); - - /* Pipe has the wrong plane attached and the plane is active. - * Temporarily change the plane mapping and disable everything - * ... */ - plane = crtc->plane; - crtc->base.primary->state->visible = true; - crtc->plane = !plane; - intel_crtc_disable_noatomic(&crtc->base, ctx); - crtc->plane = plane; - } - /* Adjust the state of the output pipe according to whether we * have active connectors/encoders. */ if (crtc->active && !intel_crtc_has_encoders(crtc)) @@ -14920,14 +14930,18 @@ void i915_redisable_vga(struct drm_i915_private *dev_priv) /* FIXME read out full plane state for all planes */ static void readout_plane_state(struct intel_crtc *crtc) { - struct intel_plane *primary = to_intel_plane(crtc->base.primary); - bool visible; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane; - visible = crtc->active && primary->get_hw_state(primary); + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + bool visible = plane->get_hw_state(plane); - intel_set_plane_visible(to_intel_crtc_state(crtc->base.state), - to_intel_plane_state(primary->base.state), - visible); + intel_set_plane_visible(crtc_state, plane_state, visible); + } } static void intel_modeset_readout_hw_state(struct drm_device *dev) @@ -15129,6 +15143,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev, /* HW state is read out, now we need to sanitize this mess. */ get_encoder_power_domains(dev_priv); + intel_sanitize_plane_mapping(dev_priv); + for_each_intel_encoder(dev, encoder) { intel_sanitize_encoder(encoder); } From 050b86b5bf2040a4be87f93510ffd780113c2065 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Nov 2017 14:54:11 +0200 Subject: [PATCH 263/770] drm/i915: Fix deadlock in i830_disable_pipe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4488496d58200c7511842e049a4cc891d928da56 upstream. i830_disable_pipe() gets called from the power well code, and thus we're already holding the power domain mutex. That means we can't call plane->get_hw_state() as it will also try to grab the same mutex and will thus deadlock. Replace the assert_plane() calls (which calls ->get_hw_state()) with just raw register reads in i830_disable_pipe(). As a bonus we can now get a warning if plane C is enabled even though we don't even expose it as a drm plane. v2: Do a separate WARN_ON() for each plane (Chris) Cc: Chris Wilson Reviewed-by: Chris Wilson Fixes: d87ce7640295 ("drm/i915: Add .get_hw_state() method for planes") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171129125411.29055-1-ville.syrjala@linux.intel.com (cherry picked from commit 5816d9cbc0a0fbf232fe297cefcb85361a3cde90) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 751b0f0582bc..46485692db48 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14717,8 +14717,11 @@ void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) DRM_DEBUG_KMS("disabling pipe %c due to force quirk\n", pipe_name(pipe)); - assert_planes_disabled(intel_get_crtc_for_pipe(dev_priv, PIPE_A)); - assert_planes_disabled(intel_get_crtc_for_pipe(dev_priv, PIPE_B)); + WARN_ON(I915_READ(DSPCNTR(PLANE_A)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(DSPCNTR(PLANE_B)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(DSPCNTR(PLANE_C)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(CURCNTR(PIPE_A)) & CURSOR_MODE); + WARN_ON(I915_READ(CURCNTR(PIPE_B)) & CURSOR_MODE); I915_WRITE(PIPECONF(pipe), 0); POSTING_READ(PIPECONF(pipe)); From e5e1e153ecd40c2b09c23d2b2c161a6c8ddbba07 Mon Sep 17 00:00:00 2001 From: Mohamed Ghannam Date: Tue, 5 Dec 2017 20:58:35 +0000 Subject: [PATCH 264/770] dccp: CVE-2017-8824: use-after-free in DCCP code commit 69c64866ce072dea1d1e59a0d61e0f66c0dffb76 upstream. Whenever the sock object is in DCCP_CLOSED state, dccp_disconnect() must free dccps_hc_tx_ccid and dccps_hc_rx_ccid and set to NULL. Signed-off-by: Mohamed Ghannam Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/proto.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index b68168fcc06a..9d43c1f40274 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -259,6 +259,7 @@ int dccp_disconnect(struct sock *sk, int flags) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); int err = 0; const int old_state = sk->sk_state; @@ -278,6 +279,10 @@ int dccp_disconnect(struct sock *sk, int flags) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); + ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_rx_ccid = NULL; + dp->dccps_hc_tx_ccid = NULL; __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_write_queue); From fd31a38d268f50afe9c5cd3d4beafa020ad39e90 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Tue, 26 Sep 2017 17:10:20 -0400 Subject: [PATCH 265/770] media: dvb-usb-v2: lmedm04: Improve logic checking of warm start commit 3d932ee27e852e4904647f15b64dedca51187ad7 upstream. Warm start has no check as whether a genuine device has connected and proceeds to next execution path. Check device should read 0x47 at offset of 2 on USB descriptor read and it is the amount requested of 6 bytes. Fix for kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access as Reported-by: Andrey Konovalov Signed-off-by: Malcolm Priestley Signed-off-by: Mauro Carvalho Chehab Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb-v2/lmedm04.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/media/usb/dvb-usb-v2/lmedm04.c b/drivers/media/usb/dvb-usb-v2/lmedm04.c index 5e320fa4a795..992f2011a6ba 100644 --- a/drivers/media/usb/dvb-usb-v2/lmedm04.c +++ b/drivers/media/usb/dvb-usb-v2/lmedm04.c @@ -494,18 +494,23 @@ static int lme2510_pid_filter(struct dvb_usb_adapter *adap, int index, u16 pid, static int lme2510_return_status(struct dvb_usb_device *d) { - int ret = 0; + int ret; u8 *data; - data = kzalloc(10, GFP_KERNEL); + data = kzalloc(6, GFP_KERNEL); if (!data) return -ENOMEM; - ret |= usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0), - 0x06, 0x80, 0x0302, 0x00, data, 0x0006, 200); - info("Firmware Status: %x (%x)", ret , data[2]); + ret = usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0), + 0x06, 0x80, 0x0302, 0x00, + data, 0x6, 200); + if (ret != 6) + ret = -EINVAL; + else + ret = data[2]; + + info("Firmware Status: %6ph", data); - ret = (ret < 0) ? -ENODEV : data[2]; kfree(data); return ret; } @@ -1189,6 +1194,7 @@ static int lme2510_get_adapter_count(struct dvb_usb_device *d) static int lme2510_identify_state(struct dvb_usb_device *d, const char **name) { struct lme2510_state *st = d->priv; + int status; usb_reset_configuration(d->udev); @@ -1197,12 +1203,16 @@ static int lme2510_identify_state(struct dvb_usb_device *d, const char **name) st->dvb_usb_lme2510_firmware = dvb_usb_lme2510_firmware; - if (lme2510_return_status(d) == 0x44) { + status = lme2510_return_status(d); + if (status == 0x44) { *name = lme_firmware_switch(d, 0); return COLD; } - return 0; + if (status != 0x47) + return -EINVAL; + + return WARM; } static int lme2510_get_stream_config(struct dvb_frontend *fe, u8 *ts_type, From 2bd6279a4a8cd6c8d108b694c5ddb9cb53f6e35c Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Tue, 26 Sep 2017 17:10:21 -0400 Subject: [PATCH 266/770] media: dvb-usb-v2: lmedm04: move ts2020 attach to dm04_lme2510_tuner commit 7bf7a7116ed313c601307f7e585419369926ab05 upstream. When the tuner was split from m88rs2000 the attach function is in wrong place. Move to dm04_lme2510_tuner to trap errors on failure and removing a call to lme_coldreset. Prevents driver starting up without any tuner connected. Fixes to trap for ts2020 fail. LME2510(C): FE Found M88RS2000 ts2020: probe of 0-0060 failed with error -11 ... LME2510(C): TUN Found RS2000 tuner kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN Reported-by: Andrey Konovalov Signed-off-by: Malcolm Priestley Tested-by: Andrey Konovalov Signed-off-by: Mauro Carvalho Chehab Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb-v2/lmedm04.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/media/usb/dvb-usb-v2/lmedm04.c b/drivers/media/usb/dvb-usb-v2/lmedm04.c index 992f2011a6ba..be26c029546b 100644 --- a/drivers/media/usb/dvb-usb-v2/lmedm04.c +++ b/drivers/media/usb/dvb-usb-v2/lmedm04.c @@ -1076,8 +1076,6 @@ static int dm04_lme2510_frontend_attach(struct dvb_usb_adapter *adap) if (adap->fe[0]) { info("FE Found M88RS2000"); - dvb_attach(ts2020_attach, adap->fe[0], &ts2020_config, - &d->i2c_adap); st->i2c_tuner_gate_w = 5; st->i2c_tuner_gate_r = 5; st->i2c_tuner_addr = 0x60; @@ -1143,17 +1141,18 @@ static int dm04_lme2510_tuner(struct dvb_usb_adapter *adap) ret = st->tuner_config; break; case TUNER_RS2000: - ret = st->tuner_config; + if (dvb_attach(ts2020_attach, adap->fe[0], + &ts2020_config, &d->i2c_adap)) + ret = st->tuner_config; break; default: break; } - if (ret) + if (ret) { info("TUN Found %s tuner", tun_msg[ret]); - else { - info("TUN No tuner found --- resetting device"); - lme_coldreset(d); + } else { + info("TUN No tuner found"); return -ENODEV; } From dd24d173b50af3aadb1ea2e04edaa8d68ca59b23 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Fri, 22 Sep 2017 09:07:06 -0400 Subject: [PATCH 267/770] media: hdpvr: Fix an error handling path in hdpvr_probe() commit c0f71bbb810237a38734607ca4599632f7f5d47f upstream. Here, hdpvr_register_videodev() is responsible for setup and register a video device. Also defining and initializing a worker. hdpvr_register_videodev() is calling by hdpvr_probe at last. So no need to flush any work here. Unregister v4l2, free buffers and memory. If hdpvr_probe() will fail. Signed-off-by: Arvind Yadav Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/hdpvr/hdpvr-core.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/media/usb/hdpvr/hdpvr-core.c b/drivers/media/usb/hdpvr/hdpvr-core.c index dbe29c6c4d8b..1e8cbaf36896 100644 --- a/drivers/media/usb/hdpvr/hdpvr-core.c +++ b/drivers/media/usb/hdpvr/hdpvr-core.c @@ -292,7 +292,7 @@ static int hdpvr_probe(struct usb_interface *interface, /* register v4l2_device early so it can be used for printks */ if (v4l2_device_register(&interface->dev, &dev->v4l2_dev)) { dev_err(&interface->dev, "v4l2_device_register failed\n"); - goto error; + goto error_free_dev; } mutex_init(&dev->io_mutex); @@ -301,7 +301,7 @@ static int hdpvr_probe(struct usb_interface *interface, dev->usbc_buf = kmalloc(64, GFP_KERNEL); if (!dev->usbc_buf) { v4l2_err(&dev->v4l2_dev, "Out of memory\n"); - goto error; + goto error_v4l2_unregister; } init_waitqueue_head(&dev->wait_buffer); @@ -339,13 +339,13 @@ static int hdpvr_probe(struct usb_interface *interface, } if (!dev->bulk_in_endpointAddr) { v4l2_err(&dev->v4l2_dev, "Could not find bulk-in endpoint\n"); - goto error; + goto error_put_usb; } /* init the device */ if (hdpvr_device_init(dev)) { v4l2_err(&dev->v4l2_dev, "device init failed\n"); - goto error; + goto error_put_usb; } mutex_lock(&dev->io_mutex); @@ -353,7 +353,7 @@ static int hdpvr_probe(struct usb_interface *interface, mutex_unlock(&dev->io_mutex); v4l2_err(&dev->v4l2_dev, "allocating transfer buffers failed\n"); - goto error; + goto error_put_usb; } mutex_unlock(&dev->io_mutex); @@ -361,7 +361,7 @@ static int hdpvr_probe(struct usb_interface *interface, retval = hdpvr_register_i2c_adapter(dev); if (retval < 0) { v4l2_err(&dev->v4l2_dev, "i2c adapter register failed\n"); - goto error; + goto error_free_buffers; } client = hdpvr_register_ir_rx_i2c(dev); @@ -394,13 +394,17 @@ static int hdpvr_probe(struct usb_interface *interface, reg_fail: #if IS_ENABLED(CONFIG_I2C) i2c_del_adapter(&dev->i2c_adapter); +error_free_buffers: #endif + hdpvr_free_buffers(dev); +error_put_usb: + usb_put_dev(dev->udev); + kfree(dev->usbc_buf); +error_v4l2_unregister: + v4l2_device_unregister(&dev->v4l2_dev); +error_free_dev: + kfree(dev); error: - if (dev) { - flush_work(&dev->worker); - /* this frees allocated memory */ - hdpvr_delete(dev); - } return retval; } From 63380839c575e5ea13c8005332665e31897b2740 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 31 Aug 2017 11:30:50 +0300 Subject: [PATCH 268/770] arm64: move TASK_* definitions to Commit eef94a3d09aa upstream. ILP32 series [1] introduces the dependency on for TASK_SIZE macro. Which in turn requires , and include , giving a circular dependency, because TASK_SIZE is currently located in . In other architectures, TASK_SIZE is defined in , and moving TASK_SIZE there fixes the problem. Discussion: https://patchwork.kernel.org/patch/9929107/ [1] https://github.com/norov/linux/tree/ilp32-next CC: Will Deacon CC: Laura Abbott Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Suggested-by: Mark Rutland Signed-off-by: Yury Norov Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/memory.h | 15 --------------- arch/arm64/include/asm/processor.h | 21 +++++++++++++++++++++ arch/arm64/kernel/entry.S | 2 +- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f7c4d2146aed..d4bae7d6e0d8 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -61,8 +61,6 @@ * KIMAGE_VADDR - the virtual address of the start of the kernel image * VA_BITS - the maximum number of bits for virtual addresses. * VA_START - the first kernel virtual address. - * TASK_SIZE - the maximum size of a user space task. - * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) #define VA_START (UL(0xffffffffffffffff) - \ @@ -77,19 +75,6 @@ #define PCI_IO_END (VMEMMAP_START - SZ_2M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) #define FIXADDR_TOP (PCI_IO_START - SZ_2M) -#define TASK_SIZE_64 (UL(1) << VA_BITS) - -#ifdef CONFIG_COMPAT -#define TASK_SIZE_32 UL(0x100000000) -#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ - TASK_SIZE_32 : TASK_SIZE_64) -#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ - TASK_SIZE_32 : TASK_SIZE_64) -#else -#define TASK_SIZE TASK_SIZE_64 -#endif /* CONFIG_COMPAT */ - -#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) #define KERNEL_START _text #define KERNEL_END _end diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 29adab8138c3..7dddca21fc64 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -19,6 +19,10 @@ #ifndef __ASM_PROCESSOR_H #define __ASM_PROCESSOR_H +#define TASK_SIZE_64 (UL(1) << VA_BITS) + +#ifndef __ASSEMBLY__ + /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -37,6 +41,22 @@ #include #include +/* + * TASK_SIZE - the maximum size of a user space task. + * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. + */ +#ifdef CONFIG_COMPAT +#define TASK_SIZE_32 UL(0x100000000) +#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ + TASK_SIZE_32 : TASK_SIZE_64) +#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ + TASK_SIZE_32 : TASK_SIZE_64) +#else +#define TASK_SIZE TASK_SIZE_64 +#endif /* CONFIG_COMPAT */ + +#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) + #define STACK_TOP_MAX TASK_SIZE_64 #ifdef CONFIG_COMPAT #define AARCH32_VECTORS_BASE 0xffff0000 @@ -194,4 +214,5 @@ static inline void spin_lock_prefetch(const void *ptr) int cpu_enable_pan(void *__unused); int cpu_enable_cache_maint_trap(void *__unused); +#endif /* __ASSEMBLY__ */ #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e1c59d4008a8..f5e851eeda4b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include From 78299fafcd78bec4f4ce145decefff5b7f664d80 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 12:56:18 +0100 Subject: [PATCH 269/770] arm64: mm: Use non-global mappings for kernel space Commit e046eb0c9bf2 upstream. In preparation for unmapping the kernel whilst running in userspace, make the kernel mappings non-global so we can avoid expensive TLB invalidation on kernel exit to userspace. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kernel-pgtable.h | 12 ++++++++++-- arch/arm64/include/asm/pgtable-prot.h | 21 +++++++++++++++------ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 7803343e5881..77a27af01371 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -78,8 +78,16 @@ /* * Initial memory map attributes. */ -#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define _SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define _SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define SWAPPER_PTE_FLAGS (_SWAPPER_PTE_FLAGS | PTE_NG) +#define SWAPPER_PMD_FLAGS (_SWAPPER_PMD_FLAGS | PMD_SECT_NG) +#else +#define SWAPPER_PTE_FLAGS _SWAPPER_PTE_FLAGS +#define SWAPPER_PMD_FLAGS _SWAPPER_PMD_FLAGS +#endif #if ARM64_SWAPPER_USES_SECTION_MAPS #define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 0a5635fb0ef9..22a926825e3f 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -34,8 +34,16 @@ #include -#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define PROT_DEFAULT (_PROT_DEFAULT | PTE_NG) +#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_SECT_NG) +#else +#define PROT_DEFAULT _PROT_DEFAULT +#define PROT_SECT_DEFAULT _PROT_SECT_DEFAULT +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ #define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) #define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) @@ -48,6 +56,7 @@ #define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) #define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) +#define _HYP_PAGE_DEFAULT (_PAGE_DEFAULT & ~PTE_NG) #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) @@ -55,15 +64,15 @@ #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) -#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) -#define PAGE_HYP_EXEC __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) -#define PAGE_HYP_RO __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) +#define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) +#define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) +#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) #define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) From 59f47f9dcdcde20e0a20c434f18bcc50c7c824e4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 13:04:48 +0100 Subject: [PATCH 270/770] arm64: mm: Temporarily disable ARM64_SW_TTBR0_PAN Commit 376133b7edc2 upstream. We're about to rework the way ASIDs are allocated, switch_mm is implemented and low-level kernel entry/exit is handled, so keep the ARM64_SW_TTBR0_PAN code out of the way whilst we do the heavy lifting. It will be re-enabled in a subsequent patch. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8f73eaccf50a..63ff04fe03e1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -882,6 +882,7 @@ endif config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" + depends on BROKEN # Temporary while switch_mm is reworked help Enabling this option prevents the kernel from accessing user-space memory directly by pointing TTBR0_EL1 to a reserved From d26f0a5dc6a6d8237b024a150de6be30fce7b467 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 13:19:09 +0100 Subject: [PATCH 271/770] arm64: mm: Move ASID from TTBR0 to TTBR1 Commit 7655abb95386 upstream. In preparation for mapping kernelspace and userspace with different ASIDs, move the ASID to TTBR1 and update switch_mm to context-switch TTBR0 via an invalid mapping (the zero page). Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/mmu_context.h | 7 +++++++ arch/arm64/include/asm/pgtable-hwdef.h | 1 + arch/arm64/include/asm/proc-fns.h | 6 ------ arch/arm64/mm/proc.S | 9 ++++++--- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 9d155fa9a507..aa39c126c0d0 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -57,6 +57,13 @@ static inline void cpu_set_reserved_ttbr0(void) isb(); } +static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) +{ + BUG_ON(pgd == swapper_pg_dir); + cpu_set_reserved_ttbr0(); + cpu_do_switch_mm(virt_to_phys(pgd),mm); +} + /* * TCR.T0SZ value to use when the ID map is active. Usually equals * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index eb0c2bd90de9..8df4cb6ac6f7 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -272,6 +272,7 @@ #define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT) #define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT) +#define TCR_A1 (UL(1) << 22) #define TCR_ASID16 (UL(1) << 36) #define TCR_TBI0 (UL(1) << 37) #define TCR_HA (UL(1) << 39) diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h index 14ad6e4e87d1..16cef2e8449e 100644 --- a/arch/arm64/include/asm/proc-fns.h +++ b/arch/arm64/include/asm/proc-fns.h @@ -35,12 +35,6 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr); #include -#define cpu_switch_mm(pgd,mm) \ -do { \ - BUG_ON(pgd == swapper_pg_dir); \ - cpu_do_switch_mm(virt_to_phys(pgd),mm); \ -} while (0) - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* __ASM_PROCFNS_H */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 877d42fb0df6..0bd7550b7230 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -139,9 +139,12 @@ ENDPROC(cpu_do_resume) */ ENTRY(cpu_do_switch_mm) pre_ttbr0_update_workaround x0, x2, x3 + mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id - bfi x0, x1, #48, #16 // set the ASID - msr ttbr0_el1, x0 // set TTBR0 + bfi x2, x1, #48, #16 // set the ASID + msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set) + isb + msr ttbr0_el1, x0 // now update TTBR0 isb post_ttbr0_update_workaround ret @@ -225,7 +228,7 @@ ENTRY(__cpu_setup) * both user and kernel. */ ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ - TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 + TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1 tcr_set_idmap_t0sz x10, x9 /* From 4445cb04410fb5ea845e2991144f77d00a78ba0c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 13:29:06 +0100 Subject: [PATCH 272/770] arm64: mm: Remove pre_ttbr0_update_workaround for Falkor erratum #E1003 Commit 85d13c001497 upstream. The pre_ttbr0_update_workaround hook is called prior to context-switching TTBR0 because Falkor erratum E1003 can cause TLB allocation with the wrong ASID if both the ASID and the base address of the TTBR are updated at the same time. With the ASID sitting safely in TTBR1, we no longer update things atomically, so we can remove the pre_ttbr0_update_workaround macro as it's no longer required. The erratum infrastructure and documentation is left around for #E1003, as it will be required by the entry trampoline code in a future patch. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/assembler.h | 22 ---------------------- arch/arm64/include/asm/mmu_context.h | 2 -- arch/arm64/mm/context.c | 11 ----------- arch/arm64/mm/proc.S | 1 - 4 files changed, 36 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 7896728f3b22..b52ac54fd8a4 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -25,7 +25,6 @@ #include #include -#include #include #include #include @@ -465,27 +464,6 @@ alternative_endif .endm /* - * Errata workaround prior to TTBR0_EL1 update - * - * val: TTBR value with new BADDR, preserved - * tmp0: temporary register, clobbered - * tmp1: other temporary register, clobbered - */ - .macro pre_ttbr0_update_workaround, val, tmp0, tmp1 -#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 -alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003 - mrs \tmp0, ttbr0_el1 - mov \tmp1, #FALKOR_RESERVED_ASID - bfi \tmp0, \tmp1, #48, #16 // reserved ASID + old BADDR - msr ttbr0_el1, \tmp0 - isb - bfi \tmp0, \val, #0, #48 // reserved ASID + new BADDR - msr ttbr0_el1, \tmp0 - isb -alternative_else_nop_endif -#endif - .endm - /* * Errata workaround post TTBR0_EL1 update. */ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index aa39c126c0d0..da29766a181c 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -19,8 +19,6 @@ #ifndef __ASM_MMU_CONTEXT_H #define __ASM_MMU_CONTEXT_H -#define FALKOR_RESERVED_ASID 1 - #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index ab9f5f0fb2c7..78816e476491 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -79,13 +79,6 @@ void verify_cpu_asid_bits(void) } } -static void set_reserved_asid_bits(void) -{ - if (IS_ENABLED(CONFIG_QCOM_FALKOR_ERRATUM_1003) && - cpus_have_const_cap(ARM64_WORKAROUND_QCOM_FALKOR_E1003)) - __set_bit(FALKOR_RESERVED_ASID, asid_map); -} - static void flush_context(unsigned int cpu) { int i; @@ -94,8 +87,6 @@ static void flush_context(unsigned int cpu) /* Update the list of reserved ASIDs and the ASID bitmap. */ bitmap_clear(asid_map, 0, NUM_USER_ASIDS); - set_reserved_asid_bits(); - /* * Ensure the generation bump is observed before we xchg the * active_asids. @@ -250,8 +241,6 @@ static int asids_init(void) panic("Failed to allocate bitmap for %lu ASIDs\n", NUM_USER_ASIDS); - set_reserved_asid_bits(); - pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS); return 0; } diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 0bd7550b7230..1623150ed0a6 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -138,7 +138,6 @@ ENDPROC(cpu_do_resume) * - pgd_phys - physical address of new TTB */ ENTRY(cpu_do_switch_mm) - pre_ttbr0_update_workaround x0, x2, x3 mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id bfi x2, x1, #48, #16 // set the ASID From 95ce0d51f9a8ee5417a3d1d8699d6f654dd90a62 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 13:34:30 +0100 Subject: [PATCH 273/770] arm64: mm: Rename post_ttbr0_update_workaround Commit 158d495899ce upstream. The post_ttbr0_update_workaround hook applies to any change to TTBRx_EL1. Since we're using TTBR1 for the ASID, rename the hook to make it clearer as to what it's doing. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/assembler.h | 5 ++--- arch/arm64/kernel/entry.S | 2 +- arch/arm64/mm/proc.S | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index b52ac54fd8a4..a1e5c1eb3c85 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -464,10 +464,9 @@ alternative_endif .endm /* -/* - * Errata workaround post TTBR0_EL1 update. + * Errata workaround post TTBRx_EL1 update. */ - .macro post_ttbr0_update_workaround + .macro post_ttbr_update_workaround #ifdef CONFIG_CAVIUM_ERRATUM_27456 alternative_if ARM64_WORKAROUND_CAVIUM_27456 ic iallu diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index f5e851eeda4b..ae2c9a9380ff 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -255,7 +255,7 @@ alternative_else_nop_endif * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache * corruption). */ - post_ttbr0_update_workaround + post_ttbr_update_workaround .endif 1: .if \el != 0 diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 1623150ed0a6..447537c1699d 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -145,7 +145,7 @@ ENTRY(cpu_do_switch_mm) isb msr ttbr0_el1, x0 // now update TTBR0 isb - post_ttbr0_update_workaround + post_ttbr_update_workaround ret ENDPROC(cpu_do_switch_mm) From 00ff7de671f87cf7b330b563c16ee6fb8f5e3cab Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 13:58:16 +0100 Subject: [PATCH 274/770] arm64: mm: Fix and re-enable ARM64_SW_TTBR0_PAN Commit 27a921e75711 upstream. With the ASID now installed in TTBR1, we can re-enable ARM64_SW_TTBR0_PAN by ensuring that we switch to a reserved ASID of zero when disabling user access and restore the active user ASID on the uaccess enable path. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 1 - arch/arm64/include/asm/asm-uaccess.h | 25 +++++++++++++++++-------- arch/arm64/include/asm/uaccess.h | 21 +++++++++++++++++---- arch/arm64/kernel/entry.S | 4 ++-- arch/arm64/lib/clear_user.S | 2 +- arch/arm64/lib/copy_from_user.S | 2 +- arch/arm64/lib/copy_in_user.S | 2 +- arch/arm64/lib/copy_to_user.S | 2 +- arch/arm64/mm/cache.S | 2 +- arch/arm64/xen/hypercall.S | 2 +- 10 files changed, 42 insertions(+), 21 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 63ff04fe03e1..8f73eaccf50a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -882,7 +882,6 @@ endif config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" - depends on BROKEN # Temporary while switch_mm is reworked help Enabling this option prevents the kernel from accessing user-space memory directly by pointing TTBR0_EL1 to a reserved diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index b3da6c886835..21b8cf304028 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -16,11 +16,20 @@ add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 isb + sub \tmp1, \tmp1, #SWAPPER_DIR_SIZE + bic \tmp1, \tmp1, #(0xffff << 48) + msr ttbr1_el1, \tmp1 // set reserved ASID + isb .endm - .macro __uaccess_ttbr0_enable, tmp1 + .macro __uaccess_ttbr0_enable, tmp1, tmp2 get_thread_info \tmp1 ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 + mrs \tmp2, ttbr1_el1 + extr \tmp2, \tmp2, \tmp1, #48 + ror \tmp2, \tmp2, #16 + msr ttbr1_el1, \tmp2 // set the active ASID + isb msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 isb .endm @@ -31,18 +40,18 @@ alternative_if_not ARM64_HAS_PAN alternative_else_nop_endif .endm - .macro uaccess_ttbr0_enable, tmp1, tmp2 + .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3 alternative_if_not ARM64_HAS_PAN - save_and_disable_irq \tmp2 // avoid preemption - __uaccess_ttbr0_enable \tmp1 - restore_irq \tmp2 + save_and_disable_irq \tmp3 // avoid preemption + __uaccess_ttbr0_enable \tmp1, \tmp2 + restore_irq \tmp3 alternative_else_nop_endif .endm #else .macro uaccess_ttbr0_disable, tmp1 .endm - .macro uaccess_ttbr0_enable, tmp1, tmp2 + .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3 .endm #endif @@ -56,8 +65,8 @@ alternative_if ARM64_ALT_PAN_NOT_UAO alternative_else_nop_endif .endm - .macro uaccess_enable_not_uao, tmp1, tmp2 - uaccess_ttbr0_enable \tmp1, \tmp2 + .macro uaccess_enable_not_uao, tmp1, tmp2, tmp3 + uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3 alternative_if ARM64_ALT_PAN_NOT_UAO SET_PSTATE_PAN(0) alternative_else_nop_endif diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index fc0f9eb66039..750a3b76a01c 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -107,15 +107,19 @@ static inline void __uaccess_ttbr0_disable(void) { unsigned long ttbr; + ttbr = read_sysreg(ttbr1_el1); /* reserved_ttbr0 placed at the end of swapper_pg_dir */ - ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE; - write_sysreg(ttbr, ttbr0_el1); + write_sysreg(ttbr + SWAPPER_DIR_SIZE, ttbr0_el1); + isb(); + /* Set reserved ASID */ + ttbr &= ~(0xffffUL << 48); + write_sysreg(ttbr, ttbr1_el1); isb(); } static inline void __uaccess_ttbr0_enable(void) { - unsigned long flags; + unsigned long flags, ttbr0, ttbr1; /* * Disable interrupts to avoid preemption between reading the 'ttbr0' @@ -123,7 +127,16 @@ static inline void __uaccess_ttbr0_enable(void) * roll-over and an update of 'ttbr0'. */ local_irq_save(flags); - write_sysreg(current_thread_info()->ttbr0, ttbr0_el1); + ttbr0 = current_thread_info()->ttbr0; + + /* Restore active ASID */ + ttbr1 = read_sysreg(ttbr1_el1); + ttbr1 |= ttbr0 & (0xffffUL << 48); + write_sysreg(ttbr1, ttbr1_el1); + isb(); + + /* Restore user page table */ + write_sysreg(ttbr0, ttbr0_el1); isb(); local_irq_restore(flags); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ae2c9a9380ff..acfca2f49a2f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -184,7 +184,7 @@ alternative_if ARM64_HAS_PAN alternative_else_nop_endif .if \el != 0 - mrs x21, ttbr0_el1 + mrs x21, ttbr1_el1 tst x21, #0xffff << 48 // Check for the reserved ASID orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR b.eq 1f // TTBR0 access already disabled @@ -246,7 +246,7 @@ alternative_else_nop_endif tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set .endif - __uaccess_ttbr0_enable x0 + __uaccess_ttbr0_enable x0, x1 .if \el == 0 /* diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index e88fb99c1561..8f9c4641e706 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -30,7 +30,7 @@ * Alignment fixed up by hardware. */ ENTRY(__clear_user) - uaccess_enable_not_uao x2, x3 + uaccess_enable_not_uao x2, x3, x4 mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 4b5d826895ff..69d86a80f3e2 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -64,7 +64,7 @@ end .req x5 ENTRY(__arch_copy_from_user) - uaccess_enable_not_uao x3, x4 + uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" uaccess_disable_not_uao x3 diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index b24a830419ad..e442b531252a 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -65,7 +65,7 @@ end .req x5 ENTRY(raw_copy_in_user) - uaccess_enable_not_uao x3, x4 + uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" uaccess_disable_not_uao x3 diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 351f0766f7a6..318f15d5c336 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -63,7 +63,7 @@ end .req x5 ENTRY(__arch_copy_to_user) - uaccess_enable_not_uao x3, x4 + uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" uaccess_disable_not_uao x3 diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 7f1dbe962cf5..6cd20a8c0952 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -49,7 +49,7 @@ ENTRY(flush_icache_range) * - end - virtual end address of region */ ENTRY(__flush_cache_user_range) - uaccess_ttbr0_enable x2, x3 + uaccess_ttbr0_enable x2, x3, x4 dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x0, x3 diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 401ceb71540c..acdbd2c9e899 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -101,7 +101,7 @@ ENTRY(privcmd_call) * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation * is enabled (it implies that hardware UAO and PAN disabled). */ - uaccess_ttbr0_enable x6, x7 + uaccess_ttbr0_enable x6, x7, x8 hvc XEN_IMM /* From a8bad38cff23eaed7b962c0bd7b0553fb31dec15 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 14:10:28 +0100 Subject: [PATCH 275/770] arm64: mm: Allocate ASIDs in pairs Commit 0c8ea531b774 upstream. In preparation for separate kernel/user ASIDs, allocate them in pairs for each mm_struct. The bottom bit distinguishes the two: if it is set, then the ASID will map only userspace. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/mmu.h | 1 + arch/arm64/mm/context.c | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 0d34bf0a89c7..01bfb184f2a8 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -17,6 +17,7 @@ #define __ASM_MMU_H #define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */ +#define USER_ASID_FLAG (UL(1) << 48) typedef struct { atomic64_t id; diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 78816e476491..db28958d9e4f 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -39,7 +39,16 @@ static cpumask_t tlb_flush_pending; #define ASID_MASK (~GENMASK(asid_bits - 1, 0)) #define ASID_FIRST_VERSION (1UL << asid_bits) -#define NUM_USER_ASIDS ASID_FIRST_VERSION + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1) +#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1) +#define idx2asid(idx) (((idx) << 1) & ~ASID_MASK) +#else +#define NUM_USER_ASIDS (ASID_FIRST_VERSION) +#define asid2idx(asid) ((asid) & ~ASID_MASK) +#define idx2asid(idx) asid2idx(idx) +#endif /* Get the ASIDBits supported by the current CPU */ static u32 get_cpu_asid_bits(void) @@ -104,7 +113,7 @@ static void flush_context(unsigned int cpu) */ if (asid == 0) asid = per_cpu(reserved_asids, i); - __set_bit(asid & ~ASID_MASK, asid_map); + __set_bit(asid2idx(asid), asid_map); per_cpu(reserved_asids, i) = asid; } @@ -156,16 +165,16 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) * We had a valid ASID in a previous life, so try to re-use * it if possible. */ - asid &= ~ASID_MASK; - if (!__test_and_set_bit(asid, asid_map)) + if (!__test_and_set_bit(asid2idx(asid), asid_map)) return newasid; } /* * Allocate a free ASID. If we can't find one, take a note of the - * currently active ASIDs and mark the TLBs as requiring flushes. - * We always count from ASID #1, as we use ASID #0 when setting a - * reserved TTBR0 for the init_mm. + * currently active ASIDs and mark the TLBs as requiring flushes. We + * always count from ASID #2 (index 1), as we use ASID #0 when setting + * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd + * pairs. */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx); if (asid != NUM_USER_ASIDS) @@ -182,7 +191,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) set_asid: __set_bit(asid, asid_map); cur_idx = asid; - return asid | generation; + return idx2asid(asid) | generation; } void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) From 53b13950886c8dd83fdb613cd5b3a150601120e9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 13:58:08 +0000 Subject: [PATCH 276/770] arm64: mm: Add arm64_kernel_unmapped_at_el0 helper Commit fc0e1299da54 upstream. In order for code such as TLB invalidation to operate efficiently when the decision to map the kernel at EL0 is determined at runtime, this patch introduces a helper function, arm64_kernel_unmapped_at_el0, to determine whether or not the kernel is mapped whilst running in userspace. Currently, this just reports the value of CONFIG_UNMAP_KERNEL_AT_EL0, but will later be hooked up to a fake CPU capability using a static key. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/mmu.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 01bfb184f2a8..c07954638658 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -19,6 +19,8 @@ #define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */ #define USER_ASID_FLAG (UL(1) << 48) +#ifndef __ASSEMBLY__ + typedef struct { atomic64_t id; void *vdso; @@ -32,6 +34,11 @@ typedef struct { */ #define ASID(mm) ((mm)->context.id.counter & 0xffff) +static inline bool arm64_kernel_unmapped_at_el0(void) +{ + return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0); +} + extern void paging_init(void); extern void bootmem_init(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); @@ -42,4 +49,5 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, extern void *fixmap_remap_fdt(phys_addr_t dt_phys); extern void mark_linear_text_alias_ro(void); +#endif /* !__ASSEMBLY__ */ #endif From d6c414013bb4c41c93fcef1ffa90df4f56c4cc53 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 14:13:33 +0100 Subject: [PATCH 277/770] arm64: mm: Invalidate both kernel and user ASIDs when performing TLBI Commit 9b0de864b5bc upstream. Since an mm has both a kernel and a user ASID, we need to ensure that broadcast TLB maintenance targets both address spaces so that things like CoW continue to work with the uaccess primitives in the kernel. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/tlbflush.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index af1c76981911..9e82dd79c7db 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -23,6 +23,7 @@ #include #include +#include /* * Raw TLBI operations. @@ -54,6 +55,11 @@ #define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) +#define __tlbi_user(op, arg) do { \ + if (arm64_kernel_unmapped_at_el0()) \ + __tlbi(op, (arg) | USER_ASID_FLAG); \ +} while (0) + /* * TLB Management * ============== @@ -115,6 +121,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm) dsb(ishst); __tlbi(aside1is, asid); + __tlbi_user(aside1is, asid); dsb(ish); } @@ -125,6 +132,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, dsb(ishst); __tlbi(vale1is, addr); + __tlbi_user(vale1is, addr); dsb(ish); } @@ -151,10 +159,13 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { - if (last_level) + if (last_level) { __tlbi(vale1is, addr); - else + __tlbi_user(vale1is, addr); + } else { __tlbi(vae1is, addr); + __tlbi_user(vae1is, addr); + } } dsb(ish); } @@ -194,6 +205,7 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm, unsigned long addr = uaddr >> 12 | (ASID(mm) << 48); __tlbi(vae1is, addr); + __tlbi_user(vae1is, addr); dsb(ish); } From 78422a7b5160954aff02bc3655271adc5d21b04e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:07:40 +0000 Subject: [PATCH 278/770] arm64: entry: Add exception trampoline page for exceptions from EL0 Commit c7b9adaf85f8 upstream. To allow unmapping of the kernel whilst running at EL0, we need to point the exception vectors at an entry trampoline that can map/unmap the kernel on entry/exit respectively. This patch adds the trampoline page, although it is not yet plugged into the vector table and is therefore unused. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 86 +++++++++++++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 17 +++++++ 2 files changed, 103 insertions(+) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index acfca2f49a2f..cbef6983b52f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include #include @@ -895,6 +897,90 @@ __ni_sys_trace: .popsection // .entry.text +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +/* + * Exception vectors trampoline. + */ + .pushsection ".entry.tramp.text", "ax" + + .macro tramp_map_kernel, tmp + mrs \tmp, ttbr1_el1 + sub \tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) + bic \tmp, \tmp, #USER_ASID_FLAG + msr ttbr1_el1, \tmp + .endm + + .macro tramp_unmap_kernel, tmp + mrs \tmp, ttbr1_el1 + add \tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) + orr \tmp, \tmp, #USER_ASID_FLAG + msr ttbr1_el1, \tmp + /* + * We avoid running the post_ttbr_update_workaround here because the + * user and kernel ASIDs don't have conflicting mappings, so any + * "blessing" as described in: + * + * http://lkml.kernel.org/r/56BB848A.6060603@caviumnetworks.com + * + * will not hurt correctness. Whilst this may partially defeat the + * point of using split ASIDs in the first place, it avoids + * the hit of invalidating the entire I-cache on every return to + * userspace. + */ + .endm + + .macro tramp_ventry, regsize = 64 + .align 7 +1: + .if \regsize == 64 + msr tpidrro_el0, x30 // Restored in kernel_ventry + .endif + tramp_map_kernel x30 + ldr x30, =vectors + prfm plil1strm, [x30, #(1b - tramp_vectors)] + msr vbar_el1, x30 + add x30, x30, #(1b - tramp_vectors) + isb + br x30 + .endm + + .macro tramp_exit, regsize = 64 + adr x30, tramp_vectors + msr vbar_el1, x30 + tramp_unmap_kernel x30 + .if \regsize == 64 + mrs x30, far_el1 + .endif + eret + .endm + + .align 11 +ENTRY(tramp_vectors) + .space 0x400 + + tramp_ventry + tramp_ventry + tramp_ventry + tramp_ventry + + tramp_ventry 32 + tramp_ventry 32 + tramp_ventry 32 + tramp_ventry 32 +END(tramp_vectors) + +ENTRY(tramp_exit_native) + tramp_exit +END(tramp_exit_native) + +ENTRY(tramp_exit_compat) + tramp_exit 32 +END(tramp_exit_compat) + + .ltorg + .popsection // .entry.tramp.text +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ + /* * Special system call wrappers. */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 7da3e5c366a0..6b4260f22aab 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -57,6 +57,17 @@ jiffies = jiffies_64; #define HIBERNATE_TEXT #endif +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define TRAMP_TEXT \ + . = ALIGN(PAGE_SIZE); \ + VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \ + *(.entry.tramp.text) \ + . = ALIGN(PAGE_SIZE); \ + VMLINUX_SYMBOL(__entry_tramp_text_end) = .; +#else +#define TRAMP_TEXT +#endif + /* * The size of the PE/COFF section that covers the kernel image, which * runs from stext to _edata, must be a round multiple of the PE/COFF @@ -113,6 +124,7 @@ SECTIONS HYPERVISOR_TEXT IDMAP_TEXT HIBERNATE_TEXT + TRAMP_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); @@ -214,6 +226,11 @@ SECTIONS . += RESERVED_TTBR0_SIZE; #endif +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + tramp_pg_dir = .; + . += PAGE_SIZE; +#endif + __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; From 63d13760abd7a819b6e95793c795cdc155bfff44 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:14:17 +0000 Subject: [PATCH 279/770] arm64: mm: Map entry trampoline into trampoline and kernel page tables Commit 51a0048beb44 upstream. The exception entry trampoline needs to be mapped at the same virtual address in both the trampoline page table (which maps nothing else) and also the kernel page table, so that we can swizzle TTBR1_EL1 on exceptions from and return to EL0. This patch maps the trampoline at a fixed virtual address in the fixmap area of the kernel virtual address space, which allows the kernel proper to be randomized with respect to the trampoline when KASLR is enabled. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/fixmap.h | 4 ++++ arch/arm64/include/asm/pgtable.h | 1 + arch/arm64/kernel/asm-offsets.c | 6 +++++- arch/arm64/mm/mmu.c | 23 +++++++++++++++++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 4052ec39e8db..8119b49be98d 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -58,6 +58,10 @@ enum fixed_addresses { FIX_APEI_GHES_NMI, #endif /* CONFIG_ACPI_APEI_GHES */ +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + FIX_ENTRY_TRAMP_TEXT, +#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT)) +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ __end_of_permanent_fixed_addresses, /* diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 960d05c8816a..aafea648a30f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -684,6 +684,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a swap entry: diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 71bf088f1e4b..af247d10252f 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -148,11 +149,14 @@ int main(void) DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id)); DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state)); - BLANK(); DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next)); DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val)); + BLANK(); +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + DEFINE(TRAMP_VALIAS, TRAMP_VALIAS); +#endif return 0; } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f1eb15e0e864..3b9af178f791 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -525,6 +525,29 @@ static int __init parse_rodata(char *arg) } early_param("rodata", parse_rodata); +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static int __init map_entry_trampoline(void) +{ + extern char __entry_tramp_text_start[]; + + pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); + + /* The trampoline is always mapped and can therefore be global */ + pgprot_val(prot) &= ~PTE_NG; + + /* Map only the text into the trampoline page table */ + memset(tramp_pg_dir, 0, PGD_SIZE); + __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, + prot, pgd_pgtable_alloc, 0); + + /* ...as well as the kernel page table */ + __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); + return 0; +} +core_initcall(map_entry_trampoline); +#endif + /* * Create fine-grained mappings for the kernel. */ From 86200f218f98704fa4b78734d574a38519d5ef3c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:20:21 +0000 Subject: [PATCH 280/770] arm64: entry: Explicitly pass exception level to kernel_ventry macro Commit 5b1f7fe41909 upstream. We will need to treat exceptions from EL0 differently in kernel_ventry, so rework the macro to take the exception level as an argument and construct the branch target using that. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 46 +++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index cbef6983b52f..fa924eb6b4c2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -71,7 +71,7 @@ #define BAD_FIQ 2 #define BAD_ERROR 3 - .macro kernel_ventry label + .macro kernel_ventry, el, label, regsize = 64 .align 7 sub sp, sp, #S_FRAME_SIZE #ifdef CONFIG_VMAP_STACK @@ -84,7 +84,7 @@ tbnz x0, #THREAD_SHIFT, 0f sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0 sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp - b \label + b el\()\el\()_\label 0: /* @@ -116,7 +116,7 @@ sub sp, sp, x0 mrs x0, tpidrro_el0 #endif - b \label + b el\()\el\()_\label .endm .macro kernel_entry, el, regsize = 64 @@ -367,31 +367,31 @@ tsk .req x28 // current thread_info .align 11 ENTRY(vectors) - kernel_ventry el1_sync_invalid // Synchronous EL1t - kernel_ventry el1_irq_invalid // IRQ EL1t - kernel_ventry el1_fiq_invalid // FIQ EL1t - kernel_ventry el1_error_invalid // Error EL1t + kernel_ventry 1, sync_invalid // Synchronous EL1t + kernel_ventry 1, irq_invalid // IRQ EL1t + kernel_ventry 1, fiq_invalid // FIQ EL1t + kernel_ventry 1, error_invalid // Error EL1t - kernel_ventry el1_sync // Synchronous EL1h - kernel_ventry el1_irq // IRQ EL1h - kernel_ventry el1_fiq_invalid // FIQ EL1h - kernel_ventry el1_error_invalid // Error EL1h + kernel_ventry 1, sync // Synchronous EL1h + kernel_ventry 1, irq // IRQ EL1h + kernel_ventry 1, fiq_invalid // FIQ EL1h + kernel_ventry 1, error_invalid // Error EL1h - kernel_ventry el0_sync // Synchronous 64-bit EL0 - kernel_ventry el0_irq // IRQ 64-bit EL0 - kernel_ventry el0_fiq_invalid // FIQ 64-bit EL0 - kernel_ventry el0_error_invalid // Error 64-bit EL0 + kernel_ventry 0, sync // Synchronous 64-bit EL0 + kernel_ventry 0, irq // IRQ 64-bit EL0 + kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0 + kernel_ventry 0, error_invalid // Error 64-bit EL0 #ifdef CONFIG_COMPAT - kernel_ventry el0_sync_compat // Synchronous 32-bit EL0 - kernel_ventry el0_irq_compat // IRQ 32-bit EL0 - kernel_ventry el0_fiq_invalid_compat // FIQ 32-bit EL0 - kernel_ventry el0_error_invalid_compat // Error 32-bit EL0 + kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0 + kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0 + kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0 + kernel_ventry 0, error_invalid_compat, 32 // Error 32-bit EL0 #else - kernel_ventry el0_sync_invalid // Synchronous 32-bit EL0 - kernel_ventry el0_irq_invalid // IRQ 32-bit EL0 - kernel_ventry el0_fiq_invalid // FIQ 32-bit EL0 - kernel_ventry el0_error_invalid // Error 32-bit EL0 + kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0 + kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0 + kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0 + kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0 #endif END(vectors) From 6472f1a3a54f061864a5091788778793661aa7c0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:24:29 +0000 Subject: [PATCH 281/770] arm64: entry: Hook up entry trampoline to exception vectors Commit 4bf3286d29f3 upstream. Hook up the entry trampoline to our exception vectors so that all exceptions from and returns to EL0 go via the trampoline, which swizzles the vector base register accordingly. Transitioning to and from the kernel clobbers x30, so we use tpidrro_el0 and far_el1 as scratch registers for native tasks. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index fa924eb6b4c2..f66ab8eb45b9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -73,6 +73,17 @@ .macro kernel_ventry, el, label, regsize = 64 .align 7 +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + .if \el == 0 + .if \regsize == 64 + mrs x30, tpidrro_el0 + msr tpidrro_el0, xzr + .else + mov x30, xzr + .endif + .endif +#endif + sub sp, sp, #S_FRAME_SIZE #ifdef CONFIG_VMAP_STACK /* @@ -119,6 +130,11 @@ b el\()\el\()_\label .endm + .macro tramp_alias, dst, sym + mov_q \dst, TRAMP_VALIAS + add \dst, \dst, #(\sym - .entry.tramp.text) + .endm + .macro kernel_entry, el, regsize = 64 .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 @@ -269,18 +285,20 @@ alternative_else_nop_endif .if \el == 0 ldr x23, [sp, #S_SP] // load return stack pointer msr sp_el0, x23 + tst x22, #PSR_MODE32_BIT // native task? + b.eq 3f + #ifdef CONFIG_ARM64_ERRATUM_845719 alternative_if ARM64_WORKAROUND_845719 - tbz x22, #4, 1f #ifdef CONFIG_PID_IN_CONTEXTIDR mrs x29, contextidr_el1 msr contextidr_el1, x29 #else msr contextidr_el1, xzr #endif -1: alternative_else_nop_endif #endif +3: .endif msr elr_el1, x21 // set up the return data @@ -302,7 +320,22 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] ldr lr, [sp, #S_LR] add sp, sp, #S_FRAME_SIZE // restore sp - eret // return to kernel + +#ifndef CONFIG_UNMAP_KERNEL_AT_EL0 + eret +#else + .if \el == 0 + bne 4f + msr far_el1, x30 + tramp_alias x30, tramp_exit_native + br x30 +4: + tramp_alias x30, tramp_exit_compat + br x30 + .else + eret + .endif +#endif .endm .macro irq_stack_entry From 15a511c0cbd1b1c8f800ce8c8921fb2390fb2d05 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:29:19 +0000 Subject: [PATCH 282/770] arm64: erratum: Work around Falkor erratum #E1003 in trampoline code Commit d1777e686ad1 upstream. We rely on an atomic swizzling of TTBR1 when transitioning from the entry trampoline to the kernel proper on an exception. We can't rely on this atomicity in the face of Falkor erratum #E1003, so on affected cores we can issue a TLB invalidation to invalidate the walk cache prior to jumping into the kernel. There is still the possibility of a TLB conflict here due to conflicting walk cache entries prior to the invalidation, but this doesn't appear to be the case on these CPUs in practice. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 17 +++++------------ arch/arm64/kernel/entry.S | 12 ++++++++++++ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8f73eaccf50a..3cec78c81b33 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -504,20 +504,13 @@ config CAVIUM_ERRATUM_30115 config QCOM_FALKOR_ERRATUM_1003 bool "Falkor E1003: Incorrect translation due to ASID change" default y - select ARM64_PAN if ARM64_SW_TTBR0_PAN help On Falkor v1, an incorrect ASID may be cached in the TLB when ASID - and BADDR are changed together in TTBRx_EL1. The workaround for this - issue is to use a reserved ASID in cpu_do_switch_mm() before - switching to the new ASID. Saying Y here selects ARM64_PAN if - ARM64_SW_TTBR0_PAN is selected. This is done because implementing and - maintaining the E1003 workaround in the software PAN emulation code - would be an unnecessary complication. The affected Falkor v1 CPU - implements ARMv8.1 hardware PAN support and using hardware PAN - support versus software PAN emulation is mutually exclusive at - runtime. - - If unsure, say Y. + and BADDR are changed together in TTBRx_EL1. Since we keep the ASID + in TTBR1_EL1, this situation only occurs in the entry trampoline and + then only for entries in the walk cache, since the leaf translation + is unchanged. Work around the erratum by invalidating the walk cache + entries for the trampoline before entering the kernel proper. config QCOM_FALKOR_ERRATUM_1009 bool "Falkor E1009: Prematurely complete a DSB after a TLBI" diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index f66ab8eb45b9..edf7681f0d37 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -941,6 +941,18 @@ __ni_sys_trace: sub \tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) bic \tmp, \tmp, #USER_ASID_FLAG msr ttbr1_el1, \tmp +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 +alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003 + /* ASID already in \tmp[63:48] */ + movk \tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12) + movk \tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12) + /* 2MB boundary containing the vectors, so we nobble the walk cache */ + movk \tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12) + isb + tlbi vae1, \tmp + dsb nsh +alternative_else_nop_endif +#endif /* CONFIG_QCOM_FALKOR_ERRATUM_1003 */ .endm .macro tramp_unmap_kernel, tmp From 4e29f25b2bc1e3a5f545b2ce842911c030277309 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 13 Dec 2017 14:19:37 -0800 Subject: [PATCH 283/770] arm64: cpu_errata: Add Kryo to Falkor 1003 errata Commit bb48711800e6 upstream. The Kryo CPUs are also affected by the Falkor 1003 errata, so we need to do the same workaround on Kryo CPUs. The MIDR is slightly more complicated here, where the PART number is not always the same when looking at all the bits from 15 to 4. Drop the lower 8 bits and just look at the top 4 to see if it's '2' and then consider those as Kryo CPUs. This covers all the combinations without having to list them all out. Fixes: 38fd94b0275c ("arm64: Work around Falkor erratum 1003") Acked-by: Will Deacon Signed-off-by: Stephen Boyd Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- Documentation/arm64/silicon-errata.txt | 2 +- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/cpu_errata.c | 21 +++++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 704770c0edf2..f3d0d316d5f1 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -71,7 +71,7 @@ stable kernels. | Hisilicon | Hip0{5,6,7} | #161010101 | HISILICON_ERRATUM_161010101 | | Hisilicon | Hip0{6,7} | #161010701 | N/A | | | | | | -| Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | +| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | | Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | | Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 | | Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 | diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cbf08d7cbf30..da036fe68573 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -92,6 +92,7 @@ #define QCOM_CPU_PART_FALKOR_V1 0x800 #define QCOM_CPU_PART_FALKOR 0xC00 +#define QCOM_CPU_PART_KRYO 0x200 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) @@ -101,6 +102,7 @@ #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) +#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 0e27f86ee709..e4c78630a730 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -30,6 +30,20 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) entry->midr_range_max); } +static bool __maybe_unused +is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope) +{ + u32 model; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + model = read_cpuid_id(); + model &= MIDR_IMPLEMENTOR_MASK | (0xf00 << MIDR_PARTNUM_SHIFT) | + MIDR_ARCHITECTURE_MASK; + + return model == entry->midr_model; +} + static bool has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, int scope) @@ -169,6 +183,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_CPU_VAR_REV(0, 0), MIDR_CPU_VAR_REV(0, 0)), }, + { + .desc = "Qualcomm Technologies Kryo erratum 1003", + .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, + .def_scope = SCOPE_LOCAL_CPU, + .midr_model = MIDR_QCOM_KRYO, + .matches = is_kryo_midr, + }, #endif #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 { From 2843ade2dad2249900f508f61068d245921ce9f1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:33:28 +0000 Subject: [PATCH 284/770] arm64: tls: Avoid unconditional zeroing of tpidrro_el0 for native tasks Commit 18011eac28c7 upstream. When unmapping the kernel at EL0, we use tpidrro_el0 as a scratch register during exception entry from native tasks and subsequently zero it in the kernel_ventry macro. We can therefore avoid zeroing tpidrro_el0 in the context-switch path for native tasks using the entry trampoline. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/process.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index bcd22d7ee590..9e773732520c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -314,16 +314,14 @@ void tls_preserve_current_state(void) static void tls_thread_switch(struct task_struct *next) { - unsigned long tpidr, tpidrro; - tls_preserve_current_state(); - tpidr = *task_user_tls(next); - tpidrro = is_compat_thread(task_thread_info(next)) ? - next->thread.tp_value : 0; + if (is_compat_thread(task_thread_info(next))) + write_sysreg(next->thread.tp_value, tpidrro_el0); + else if (!arm64_kernel_unmapped_at_el0()) + write_sysreg(0, tpidrro_el0); - write_sysreg(tpidr, tpidr_el0); - write_sysreg(tpidrro, tpidrro_el0); + write_sysreg(*task_user_tls(next), tpidr_el0); } /* Restore the UAO state depending on next's addr_limit */ From 7fafcbb04f94d4709a5c8b7fb2c196b5bdf4fd7b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:38:19 +0000 Subject: [PATCH 285/770] arm64: entry: Add fake CPU feature for unmapping the kernel at EL0 Commit ea1e3de85e94 upstream. Allow explicit disabling of the entry trampoline on the kernel command line (kpti=off) by adding a fake CPU feature (ARM64_UNMAP_KERNEL_AT_EL0) that can be used to toggle the alternative sequences in our entry code and avoid use of the trampoline altogether if desired. This also allows us to make use of a static key in arm64_kernel_unmapped_at_el0(). Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/mmu.h | 3 ++- arch/arm64/kernel/cpufeature.c | 41 ++++++++++++++++++++++++++++++++ arch/arm64/kernel/entry.S | 9 +++---- 4 files changed, 50 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 8da621627d7c..6835a48b31d4 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -40,7 +40,8 @@ #define ARM64_WORKAROUND_858921 19 #define ARM64_WORKAROUND_CAVIUM_30115 20 #define ARM64_HAS_DCPOP 21 +#define ARM64_UNMAP_KERNEL_AT_EL0 23 -#define ARM64_NCAPS 22 +#define ARM64_NCAPS 24 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index c07954638658..da6f12e40714 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -36,7 +36,8 @@ typedef struct { static inline bool arm64_kernel_unmapped_at_el0(void) { - return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0); + return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && + cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); } extern void paging_init(void); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 21e2c95d24e7..1a6ec079c541 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -796,6 +796,40 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus ID_AA64PFR0_FP_SHIFT) < 0; } +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ + +static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, + int __unused) +{ + /* Forced on command line? */ + if (__kpti_forced) { + pr_info_once("kernel page table isolation forced %s by command line option\n", + __kpti_forced > 0 ? "ON" : "OFF"); + return __kpti_forced > 0; + } + + /* Useful for KASLR robustness */ + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) + return true; + + return false; +} + +static int __init parse_kpti(char *str) +{ + bool enabled; + int ret = strtobool(str, &enabled); + + if (ret) + return ret; + + __kpti_forced = enabled ? 1 : -1; + return 0; +} +__setup("kpti=", parse_kpti); +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -882,6 +916,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .def_scope = SCOPE_SYSTEM, .matches = hyp_offset_low, }, +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + { + .capability = ARM64_UNMAP_KERNEL_AT_EL0, + .def_scope = SCOPE_SYSTEM, + .matches = unmap_kernel_at_el0, + }, +#endif { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index edf7681f0d37..b431a6de1b4f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -74,6 +74,7 @@ .macro kernel_ventry, el, label, regsize = 64 .align 7 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +alternative_if ARM64_UNMAP_KERNEL_AT_EL0 .if \el == 0 .if \regsize == 64 mrs x30, tpidrro_el0 @@ -82,6 +83,7 @@ mov x30, xzr .endif .endif +alternative_else_nop_endif #endif sub sp, sp, #S_FRAME_SIZE @@ -321,10 +323,9 @@ alternative_else_nop_endif ldr lr, [sp, #S_LR] add sp, sp, #S_FRAME_SIZE // restore sp -#ifndef CONFIG_UNMAP_KERNEL_AT_EL0 - eret -#else .if \el == 0 +alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 bne 4f msr far_el1, x30 tramp_alias x30, tramp_exit_native @@ -332,10 +333,10 @@ alternative_else_nop_endif 4: tramp_alias x30, tramp_exit_compat br x30 +#endif .else eret .endif -#endif .endm .macro irq_stack_entry From 3882b5f63f3cfff225cee691dcb8357bf6714d07 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 6 Dec 2017 11:24:02 +0000 Subject: [PATCH 286/770] arm64: kaslr: Put kernel vectors address in separate data page Commit 6c27c4082f4f upstream. The literal pool entry for identifying the vectors base is the only piece of information in the trampoline page that identifies the true location of the kernel. This patch moves it into a page-aligned region of the .rodata section and maps this adjacent to the trampoline text via an additional fixmap entry, which protects against any accidental leakage of the trampoline contents. Suggested-by: Ard Biesheuvel Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/fixmap.h | 1 + arch/arm64/kernel/entry.S | 14 ++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 5 ++++- arch/arm64/mm/mmu.c | 10 +++++++++- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 8119b49be98d..ec1e6d6fa14c 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -59,6 +59,7 @@ enum fixed_addresses { #endif /* CONFIG_ACPI_APEI_GHES */ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + FIX_ENTRY_TRAMP_DATA, FIX_ENTRY_TRAMP_TEXT, #define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT)) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b431a6de1b4f..d3551e5025c0 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -982,7 +982,13 @@ alternative_else_nop_endif msr tpidrro_el0, x30 // Restored in kernel_ventry .endif tramp_map_kernel x30 +#ifdef CONFIG_RANDOMIZE_BASE + adr x30, tramp_vectors + PAGE_SIZE +alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 + ldr x30, [x30] +#else ldr x30, =vectors +#endif prfm plil1strm, [x30, #(1b - tramp_vectors)] msr vbar_el1, x30 add x30, x30, #(1b - tramp_vectors) @@ -1025,6 +1031,14 @@ END(tramp_exit_compat) .ltorg .popsection // .entry.tramp.text +#ifdef CONFIG_RANDOMIZE_BASE + .pushsection ".rodata", "a" + .align PAGE_SHIFT + .globl __entry_tramp_data_start +__entry_tramp_data_start: + .quad vectors + .popsection // .rodata +#endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 6b4260f22aab..ddfd3c0942f7 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -251,7 +251,10 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) <= SZ_4K, "Hibernate exit text too big or misaligned") #endif - +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, + "Entry trampoline text too big") +#endif /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3b9af178f791..606c3654186f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -541,8 +541,16 @@ static int __init map_entry_trampoline(void) __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, prot, pgd_pgtable_alloc, 0); - /* ...as well as the kernel page table */ + /* Map both the text and data into the kernel page table */ __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + extern char __entry_tramp_data_start[]; + + __set_fixmap(FIX_ENTRY_TRAMP_DATA, + __pa_symbol(__entry_tramp_data_start), + PAGE_KERNEL_RO); + } + return 0; } core_initcall(map_entry_trampoline); From 68a65ce7039aa247b911e00ed5c743db35167781 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 16:15:59 +0000 Subject: [PATCH 287/770] arm64: use RET instruction for exiting the trampoline Commit be04a6d1126b upstream. Speculation attacks against the entry trampoline can potentially resteer the speculative instruction stream through the indirect branch and into arbitrary gadgets within the kernel. This patch defends against these attacks by forcing a misprediction through the return stack: a dummy BL instruction loads an entry into the stack, so that the predicted program flow of the subsequent RET instruction is to a branch-to-self instruction which is finally resolved as a branch to the kernel vectors with speculation suppressed. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index d3551e5025c0..b32ba0332ccb 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -981,6 +981,14 @@ alternative_else_nop_endif .if \regsize == 64 msr tpidrro_el0, x30 // Restored in kernel_ventry .endif + /* + * Defend against branch aliasing attacks by pushing a dummy + * entry onto the return stack and using a RET instruction to + * enter the full-fat kernel vectors. + */ + bl 2f + b . +2: tramp_map_kernel x30 #ifdef CONFIG_RANDOMIZE_BASE adr x30, tramp_vectors + PAGE_SIZE @@ -993,7 +1001,7 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 msr vbar_el1, x30 add x30, x30, #(1b - tramp_vectors) isb - br x30 + ret .endm .macro tramp_exit, regsize = 64 From 332e0288200f6afe070e2c126680130195cec3d9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:41:01 +0000 Subject: [PATCH 288/770] arm64: Kconfig: Add CONFIG_UNMAP_KERNEL_AT_EL0 Commit 084eb77cd3a8 upstream. Add a Kconfig entry to control use of the entry trampoline, which allows us to unmap the kernel whilst running in userspace and improve the robustness of KASLR. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3cec78c81b33..5063d10f3cd5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -806,6 +806,19 @@ config FORCE_MAX_ZONEORDER However for 4K, we choose a higher default value, 11 as opposed to 10, giving us 4M allocations matching the default size used by generic code. +config UNMAP_KERNEL_AT_EL0 + bool "Unmap kernel when running in userspace (aka \"KAISER\")" + default y + help + Some attacks against KASLR make use of the timing difference between + a permission fault which could arise from a page table entry that is + present in the TLB, and a translation fault which always requires a + page table walk. This option defends against these attacks by unmapping + the kernel whilst running in userspace, therefore forcing translation + faults for all of kernel space. + + If unsure, say Y. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT From 5e319f4533bfa8df7f03df501d22a169b2f03719 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 16:19:39 +0000 Subject: [PATCH 289/770] arm64: Kconfig: Reword UNMAP_KERNEL_AT_EL0 kconfig entry Commit 0617052ddde3 upstream. Although CONFIG_UNMAP_KERNEL_AT_EL0 does make KASLR more robust, it's actually more useful as a mitigation against speculation attacks that can leak arbitrary kernel data to userspace through speculation. Reword the Kconfig help message to reflect this, and make the option depend on EXPERT so that it is on by default for the majority of users. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5063d10f3cd5..0b6b59d93cf8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -807,15 +807,14 @@ config FORCE_MAX_ZONEORDER 4M allocations matching the default size used by generic code. config UNMAP_KERNEL_AT_EL0 - bool "Unmap kernel when running in userspace (aka \"KAISER\")" + bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT default y help - Some attacks against KASLR make use of the timing difference between - a permission fault which could arise from a page table entry that is - present in the TLB, and a translation fault which always requires a - page table walk. This option defends against these attacks by unmapping - the kernel whilst running in userspace, therefore forcing translation - faults for all of kernel space. + Speculation attacks against some high-performance processors can + be used to bypass MMU permission checks and leak kernel data to + userspace. This can be defended against by unmapping the kernel + when running in userspace, mapping it back in on exception entry + via a trampoline page in the vector table. If unsure, say Y. From 83ae3355bdfbd44a594f18946d3a37a74529a801 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 27 Nov 2017 18:29:30 +0000 Subject: [PATCH 290/770] arm64: Take into account ID_AA64PFR0_EL1.CSV3 Commit 179a56f6f9fb upstream. For non-KASLR kernels where the KPTI behaviour has not been overridden on the command line we can use ID_AA64PFR0_EL1.CSV3 to determine whether or not we should unmap the kernel whilst running at EL0. Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kernel/cpufeature.c | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f707fed5886f..1efbe29c8b24 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -332,6 +332,7 @@ #define ID_AA64ISAR1_DPB_SHIFT 0 /* id_aa64pfr0 */ +#define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_GIC_SHIFT 24 #define ID_AA64PFR0_ASIMD_SHIFT 20 #define ID_AA64PFR0_FP_SHIFT 16 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 1a6ec079c541..378661ce1f8b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -125,6 +125,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), @@ -802,6 +803,8 @@ static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, int __unused) { + u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + /* Forced on command line? */ if (__kpti_forced) { pr_info_once("kernel page table isolation forced %s by command line option\n", @@ -813,7 +816,9 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) return true; - return false; + /* Defer to CPU feature registers */ + return !cpuid_feature_extract_unsigned_field(pfr0, + ID_AA64PFR0_CSV3_SHIFT); } static int __init parse_kpti(char *str) @@ -918,6 +923,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { }, #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 { + .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, .def_scope = SCOPE_SYSTEM, .matches = unmap_kernel_at_el0, From 5385e5fe9724479b3b9d7f1b6466b4d85f4eafa9 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 9 Jan 2018 16:12:18 +0000 Subject: [PATCH 291/770] arm64: capabilities: Handle duplicate entries for a capability Commit 67948af41f2e upstream. Sometimes a single capability could be listed multiple times with differing matches(), e.g, CPU errata for different MIDR versions. This breaks verify_local_cpu_feature() and this_cpu_has_cap() as we stop checking for a capability on a CPU with the first entry in the given table, which is not sufficient. Make sure we run the checks for all entries of the same capability. We do this by fixing __this_cpu_has_cap() to run through all the entries in the given table for a match and reuse it for verify_local_cpu_feature(). Cc: Mark Rutland Cc: Will Deacon Acked-by: Marc Zyngier Signed-off-by: Suzuki K Poulose Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 44 ++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 378661ce1f8b..c4e842f45b7c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1047,6 +1047,26 @@ static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) cap_set_elf_hwcap(hwcaps); } +/* + * Check if the current CPU has a given feature capability. + * Should be called from non-preemptible context. + */ +static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, + unsigned int cap) +{ + const struct arm64_cpu_capabilities *caps; + + if (WARN_ON(preemptible())) + return false; + + for (caps = cap_array; caps->desc; caps++) + if (caps->capability == cap && + caps->matches && + caps->matches(caps, SCOPE_LOCAL_CPU)) + return true; + return false; +} + void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info) { @@ -1125,8 +1145,9 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) } static void -verify_local_cpu_features(const struct arm64_cpu_capabilities *caps) +verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list) { + const struct arm64_cpu_capabilities *caps = caps_list; for (; caps->matches; caps++) { if (!cpus_have_cap(caps->capability)) continue; @@ -1134,7 +1155,7 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps) * If the new CPU misses an advertised feature, we cannot proceed * further, park the cpu. */ - if (!caps->matches(caps, SCOPE_LOCAL_CPU)) { + if (!__this_cpu_has_cap(caps_list, caps->capability)) { pr_crit("CPU%d: missing feature: %s\n", smp_processor_id(), caps->desc); cpu_die_early(); @@ -1195,25 +1216,6 @@ static void __init mark_const_caps_ready(void) static_branch_enable(&arm64_const_caps_ready); } -/* - * Check if the current CPU has a given feature capability. - * Should be called from non-preemptible context. - */ -static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, - unsigned int cap) -{ - const struct arm64_cpu_capabilities *caps; - - if (WARN_ON(preemptible())) - return false; - - for (caps = cap_array; caps->desc; caps++) - if (caps->capability == cap && caps->matches) - return caps->matches(caps, SCOPE_LOCAL_CPU); - - return false; -} - extern const struct arm64_cpu_capabilities arm64_errata[]; bool this_cpu_has_cap(unsigned int cap) From 0b2995145ea4fb9f5268f51c13d25213c039486e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 1 Dec 2017 17:33:48 +0000 Subject: [PATCH 292/770] arm64: mm: Introduce TTBR_ASID_MASK for getting at the ASID in the TTBR Commit b519538dfefc upstream. There are now a handful of open-coded masks to extract the ASID from a TTBR value, so introduce a TTBR_ASID_MASK and use that instead. Suggested-by: Mark Rutland Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/asm-uaccess.h | 3 ++- arch/arm64/include/asm/mmu.h | 1 + arch/arm64/include/asm/uaccess.h | 4 ++-- arch/arm64/kernel/entry.S | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index 21b8cf304028..f4f234b6155e 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -17,7 +18,7 @@ msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 isb sub \tmp1, \tmp1, #SWAPPER_DIR_SIZE - bic \tmp1, \tmp1, #(0xffff << 48) + bic \tmp1, \tmp1, #TTBR_ASID_MASK msr ttbr1_el1, \tmp1 // set reserved ASID isb .endm diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index da6f12e40714..6f7bdb89817f 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -18,6 +18,7 @@ #define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */ #define USER_ASID_FLAG (UL(1) << 48) +#define TTBR_ASID_MASK (UL(0xffff) << 48) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 750a3b76a01c..6eadf55ebaf0 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -112,7 +112,7 @@ static inline void __uaccess_ttbr0_disable(void) write_sysreg(ttbr + SWAPPER_DIR_SIZE, ttbr0_el1); isb(); /* Set reserved ASID */ - ttbr &= ~(0xffffUL << 48); + ttbr &= ~TTBR_ASID_MASK; write_sysreg(ttbr, ttbr1_el1); isb(); } @@ -131,7 +131,7 @@ static inline void __uaccess_ttbr0_enable(void) /* Restore active ASID */ ttbr1 = read_sysreg(ttbr1_el1); - ttbr1 |= ttbr0 & (0xffffUL << 48); + ttbr1 |= ttbr0 & TTBR_ASID_MASK; write_sysreg(ttbr1, ttbr1_el1); isb(); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b32ba0332ccb..67b6895fb628 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -205,7 +205,7 @@ alternative_else_nop_endif .if \el != 0 mrs x21, ttbr1_el1 - tst x21, #0xffff << 48 // Check for the reserved ASID + tst x21, #TTBR_ASID_MASK // Check for the reserved ASID orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR b.eq 1f // TTBR0 access already disabled and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR From fedf5a743cf2e024c5b557abfb1a45f3c06b3c81 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 10 Jan 2018 13:18:30 +0000 Subject: [PATCH 293/770] arm64: kpti: Fix the interaction between ASID switching and software PAN Commit 6b88a32c7af6 upstream. With ARM64_SW_TTBR0_PAN enabled, the exception entry code checks the active ASID to decide whether user access was enabled (non-zero ASID) when the exception was taken. On return from exception, if user access was previously disabled, it re-instates TTBR0_EL1 from the per-thread saved value (updated in switch_mm() or efi_set_pgd()). Commit 7655abb95386 ("arm64: mm: Move ASID from TTBR0 to TTBR1") makes a TTBR0_EL1 + ASID switching non-atomic. Subsequently, commit 27a921e75711 ("arm64: mm: Fix and re-enable ARM64_SW_TTBR0_PAN") changes the __uaccess_ttbr0_disable() function and asm macro to first write the reserved TTBR0_EL1 followed by the ASID=0 update in TTBR1_EL1. If an exception occurs between these two, the exception return code will re-instate a valid TTBR0_EL1. Similar scenario can happen in cpu_switch_mm() between setting the reserved TTBR0_EL1 and the ASID update in cpu_do_switch_mm(). This patch reverts the entry.S check for ASID == 0 to TTBR0_EL1 and disables the interrupts around the TTBR0_EL1 and ASID switching code in __uaccess_ttbr0_disable(). It also ensures that, when returning from the EFI runtime services, efi_set_pgd() doesn't leave a non-zero ASID in TTBR1_EL1 by using uaccess_ttbr0_{enable,disable}. The accesses to current_thread_info()->ttbr0 are updated to use READ_ONCE/WRITE_ONCE. As a safety measure, __uaccess_ttbr0_enable() always masks out any existing non-zero ASID TTBR1_EL1 before writing in the new ASID. Fixes: 27a921e75711 ("arm64: mm: Fix and re-enable ARM64_SW_TTBR0_PAN") Acked-by: Will Deacon Reported-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Reviewed-by: James Morse Tested-by: James Morse Co-developed-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/asm-uaccess.h | 12 +++++++----- arch/arm64/include/asm/efi.h | 12 +++++++----- arch/arm64/include/asm/mmu_context.h | 3 ++- arch/arm64/include/asm/uaccess.h | 9 ++++++--- arch/arm64/kernel/entry.S | 2 +- arch/arm64/lib/clear_user.S | 2 +- arch/arm64/lib/copy_from_user.S | 2 +- arch/arm64/lib/copy_in_user.S | 2 +- arch/arm64/lib/copy_to_user.S | 2 +- arch/arm64/mm/cache.S | 2 +- arch/arm64/mm/proc.S | 3 +++ arch/arm64/xen/hypercall.S | 2 +- 12 files changed, 32 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index f4f234b6155e..dd49c3567f20 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -14,11 +14,11 @@ #ifdef CONFIG_ARM64_SW_TTBR0_PAN .macro __uaccess_ttbr0_disable, tmp1 mrs \tmp1, ttbr1_el1 // swapper_pg_dir + bic \tmp1, \tmp1, #TTBR_ASID_MASK add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 isb sub \tmp1, \tmp1, #SWAPPER_DIR_SIZE - bic \tmp1, \tmp1, #TTBR_ASID_MASK msr ttbr1_el1, \tmp1 // set reserved ASID isb .endm @@ -35,9 +35,11 @@ isb .endm - .macro uaccess_ttbr0_disable, tmp1 + .macro uaccess_ttbr0_disable, tmp1, tmp2 alternative_if_not ARM64_HAS_PAN + save_and_disable_irq \tmp2 // avoid preemption __uaccess_ttbr0_disable \tmp1 + restore_irq \tmp2 alternative_else_nop_endif .endm @@ -49,7 +51,7 @@ alternative_if_not ARM64_HAS_PAN alternative_else_nop_endif .endm #else - .macro uaccess_ttbr0_disable, tmp1 + .macro uaccess_ttbr0_disable, tmp1, tmp2 .endm .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3 @@ -59,8 +61,8 @@ alternative_else_nop_endif /* * These macros are no-ops when UAO is present. */ - .macro uaccess_disable_not_uao, tmp1 - uaccess_ttbr0_disable \tmp1 + .macro uaccess_disable_not_uao, tmp1, tmp2 + uaccess_ttbr0_disable \tmp1, \tmp2 alternative_if ARM64_ALT_PAN_NOT_UAO SET_PSTATE_PAN(1) alternative_else_nop_endif diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index c4cd5081d78b..8389050328bb 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -121,19 +121,21 @@ static inline void efi_set_pgd(struct mm_struct *mm) if (mm != current->active_mm) { /* * Update the current thread's saved ttbr0 since it is - * restored as part of a return from exception. Set - * the hardware TTBR0_EL1 using cpu_switch_mm() - * directly to enable potential errata workarounds. + * restored as part of a return from exception. Enable + * access to the valid TTBR0_EL1 and invoke the errata + * workaround directly since there is no return from + * exception when invoking the EFI run-time services. */ update_saved_ttbr0(current, mm); - cpu_switch_mm(mm->pgd, mm); + uaccess_ttbr0_enable(); + post_ttbr_update_workaround(); } else { /* * Defer the switch to the current thread's TTBR0_EL1 * until uaccess_enable(). Restore the current * thread's saved ttbr0 corresponding to its active_mm */ - cpu_set_reserved_ttbr0(); + uaccess_ttbr0_disable(); update_saved_ttbr0(current, current->active_mm); } } diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index da29766a181c..779d7a2ec5ec 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -175,7 +175,7 @@ static inline void update_saved_ttbr0(struct task_struct *tsk, else ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48; - task_thread_info(tsk)->ttbr0 = ttbr; + WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr); } #else static inline void update_saved_ttbr0(struct task_struct *tsk, @@ -230,6 +230,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, #define activate_mm(prev,next) switch_mm(prev, next, current) void verify_cpu_asid_bits(void); +void post_ttbr_update_workaround(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 6eadf55ebaf0..335dfe02a846 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -105,16 +105,18 @@ static inline void set_fs(mm_segment_t fs) #ifdef CONFIG_ARM64_SW_TTBR0_PAN static inline void __uaccess_ttbr0_disable(void) { - unsigned long ttbr; + unsigned long flags, ttbr; + local_irq_save(flags); ttbr = read_sysreg(ttbr1_el1); + ttbr &= ~TTBR_ASID_MASK; /* reserved_ttbr0 placed at the end of swapper_pg_dir */ write_sysreg(ttbr + SWAPPER_DIR_SIZE, ttbr0_el1); isb(); /* Set reserved ASID */ - ttbr &= ~TTBR_ASID_MASK; write_sysreg(ttbr, ttbr1_el1); isb(); + local_irq_restore(flags); } static inline void __uaccess_ttbr0_enable(void) @@ -127,10 +129,11 @@ static inline void __uaccess_ttbr0_enable(void) * roll-over and an update of 'ttbr0'. */ local_irq_save(flags); - ttbr0 = current_thread_info()->ttbr0; + ttbr0 = READ_ONCE(current_thread_info()->ttbr0); /* Restore active ASID */ ttbr1 = read_sysreg(ttbr1_el1); + ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */ ttbr1 |= ttbr0 & TTBR_ASID_MASK; write_sysreg(ttbr1, ttbr1_el1); isb(); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 67b6895fb628..8d7112aa8afe 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -204,7 +204,7 @@ alternative_if ARM64_HAS_PAN alternative_else_nop_endif .if \el != 0 - mrs x21, ttbr1_el1 + mrs x21, ttbr0_el1 tst x21, #TTBR_ASID_MASK // Check for the reserved ASID orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR b.eq 1f // TTBR0 access already disabled diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 8f9c4641e706..3d69a8d41fa5 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -50,7 +50,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 b.mi 5f uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 - uaccess_disable_not_uao x2 + uaccess_disable_not_uao x2, x3 ret ENDPROC(__clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 69d86a80f3e2..20305d485046 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -67,7 +67,7 @@ ENTRY(__arch_copy_from_user) uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3 + uaccess_disable_not_uao x3, x4 mov x0, #0 // Nothing to copy ret ENDPROC(__arch_copy_from_user) diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index e442b531252a..fbb090f431a5 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -68,7 +68,7 @@ ENTRY(raw_copy_in_user) uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3 + uaccess_disable_not_uao x3, x4 mov x0, #0 ret ENDPROC(raw_copy_in_user) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 318f15d5c336..fda6172d6b88 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -66,7 +66,7 @@ ENTRY(__arch_copy_to_user) uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3 + uaccess_disable_not_uao x3, x4 mov x0, #0 ret ENDPROC(__arch_copy_to_user) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 6cd20a8c0952..91464e7f77cc 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -72,7 +72,7 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU isb mov x0, #0 1: - uaccess_ttbr0_disable x1 + uaccess_ttbr0_disable x1, x2 ret 9: mov x0, #-EFAULT diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 447537c1699d..2d406b4b522f 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -140,6 +140,9 @@ ENDPROC(cpu_do_resume) ENTRY(cpu_do_switch_mm) mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + bfi x0, x1, #48, #16 // set the ASID field in TTBR0 +#endif bfi x2, x1, #48, #16 // set the ASID msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set) isb diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index acdbd2c9e899..c5f05c4a4d00 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -107,6 +107,6 @@ ENTRY(privcmd_call) /* * Disable userspace access from kernel once the hyp call completed. */ - uaccess_ttbr0_disable x6 + uaccess_ttbr0_disable x6, x7 ret ENDPROC(privcmd_call); From 7aca19ea5a45e575de769578d5d78ab7a540ebcc Mon Sep 17 00:00:00 2001 From: Jayachandran C Date: Sun, 7 Jan 2018 22:53:35 -0800 Subject: [PATCH 294/770] arm64: cputype: Add MIDR values for Cavium ThunderX2 CPUs Commit 0d90718871fe upstream. Add the older Broadcom ID as well as the new Cavium ID for ThunderX2 CPUs. Signed-off-by: Jayachandran C Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cputype.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index da036fe68573..157afb9f7a35 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -87,6 +87,7 @@ #define CAVIUM_CPU_PART_THUNDERX 0x0A1 #define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 #define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3 +#define CAVIUM_CPU_PART_THUNDERX2 0x0AF #define BRCM_CPU_PART_VULCAN 0x516 @@ -100,6 +101,8 @@ #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) +#define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2) +#define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN) #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) From c98c8c23585643cbf255415b02cb32c95baac82e Mon Sep 17 00:00:00 2001 From: Jayachandran C Date: Fri, 19 Jan 2018 04:22:48 -0800 Subject: [PATCH 295/770] arm64: Turn on KPTI only on CPUs that need it Commit 0ba2e29c7fc1 upstream. Whitelist Broadcom Vulcan/Cavium ThunderX2 processors in unmap_kernel_at_el0(). These CPUs are not vulnerable to CVE-2017-5754 and do not need KPTI when KASLR is off. Acked-by: Will Deacon Signed-off-by: Jayachandran C Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c4e842f45b7c..46b1507ea605 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -816,6 +816,13 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) return true; + /* Don't force KPTI for CPUs that are not vulnerable */ + switch (read_cpuid_id() & MIDR_CPU_MODEL_MASK) { + case MIDR_CAVIUM_THUNDERX2: + case MIDR_BRCM_VULCAN: + return false; + } + /* Defer to CPU feature registers */ return !cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV3_SHIFT); From 69288201803aecffd7f17ffa9c0c5043ffb8367c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Jan 2018 11:59:53 +0000 Subject: [PATCH 296/770] arm64: kpti: Make use of nG dependent on arm64_kernel_unmapped_at_el0() Commit 41acec624087 upstream. To allow systems which do not require kpti to continue running with global kernel mappings (which appears to be a requirement for Cavium ThunderX due to a CPU erratum), make the use of nG in the kernel page tables dependent on arm64_kernel_unmapped_at_el0(), which is resolved at runtime. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kernel-pgtable.h | 12 ++-------- arch/arm64/include/asm/pgtable-prot.h | 30 ++++++++++++------------- 2 files changed, 16 insertions(+), 26 deletions(-) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 77a27af01371..7803343e5881 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -78,16 +78,8 @@ /* * Initial memory map attributes. */ -#define _SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define _SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -#define SWAPPER_PTE_FLAGS (_SWAPPER_PTE_FLAGS | PTE_NG) -#define SWAPPER_PMD_FLAGS (_SWAPPER_PMD_FLAGS | PMD_SECT_NG) -#else -#define SWAPPER_PTE_FLAGS _SWAPPER_PTE_FLAGS -#define SWAPPER_PMD_FLAGS _SWAPPER_PMD_FLAGS -#endif +#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) #if ARM64_SWAPPER_USES_SECTION_MAPS #define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 22a926825e3f..2db84df5eb42 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -37,13 +37,11 @@ #define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -#define PROT_DEFAULT (_PROT_DEFAULT | PTE_NG) -#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_SECT_NG) -#else -#define PROT_DEFAULT _PROT_DEFAULT -#define PROT_SECT_DEFAULT _PROT_SECT_DEFAULT -#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ +#define PTE_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PTE_NG : 0) +#define PMD_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PMD_SECT_NG : 0) + +#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) +#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) #define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) #define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) @@ -55,22 +53,22 @@ #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) #define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) -#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) -#define _HYP_PAGE_DEFAULT (_PAGE_DEFAULT & ~PTE_NG) +#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) +#define _HYP_PAGE_DEFAULT _PAGE_DEFAULT -#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) -#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) -#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) -#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) -#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) +#define PAGE_KERNEL __pgprot(PROT_NORMAL) +#define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) +#define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) +#define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) +#define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) #define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) #define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) #define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) -#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) -#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) +#define PAGE_S2 __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) +#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) From ee28fed5ccc66fbdef4119451aa6754501705a7e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Jan 2018 11:59:54 +0000 Subject: [PATCH 297/770] arm64: mm: Permit transitioning from Global to Non-Global without BBM Commit 4e6020565596 upstream. Break-before-make is not needed when transitioning from Global to Non-Global mappings, provided that the contiguous hint is not being used. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/mmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 606c3654186f..fa20124c19d5 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -117,6 +117,10 @@ static bool pgattr_change_is_safe(u64 old, u64 new) if ((old | new) & PTE_CONT) return false; + /* Transitioning from Global to Non-Global is safe */ + if (((old ^ new) == PTE_NG) && (new & PTE_NG)) + return true; + return ((old ^ new) & ~mask) == 0; } From 2feb36ebe4503551927d9c798cd454b7f01bd442 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Feb 2018 22:22:50 +0000 Subject: [PATCH 298/770] arm64: kpti: Add ->enable callback to remap swapper using nG mappings Commit f992b4dfd58b upstream. Defaulting to global mappings for kernel space is generally good for performance and appears to be necessary for Cavium ThunderX. If we subsequently decide that we need to enable kpti, then we need to rewrite our existing page table entries to be non-global. This is fiddly, and made worse by the possible use of contiguous mappings, which require a strict break-before-make sequence. Since the enable callback runs on each online CPU from stop_machine context, we can have all CPUs enter the idmap, where secondaries can wait for the primary CPU to rewrite swapper with its MMU off. It's all fairly horrible, but at least it only runs once. Tested-by: Marc Zyngier Reviewed-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/assembler.h | 4 + arch/arm64/kernel/cpufeature.c | 25 ++++ arch/arm64/mm/proc.S | 201 ++++++++++++++++++++++++++++- 3 files changed, 223 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index a1e5c1eb3c85..c8ec722cc855 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -486,4 +486,8 @@ alternative_else_nop_endif #endif .endm + .macro pte_to_phys, phys, pte + and \phys, \pte, #(((1 << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 46b1507ea605..194dd5dbf328 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -828,6 +828,30 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, ID_AA64PFR0_CSV3_SHIFT); } +static int kpti_install_ng_mappings(void *__unused) +{ + typedef void (kpti_remap_fn)(int, int, phys_addr_t); + extern kpti_remap_fn idmap_kpti_install_ng_mappings; + kpti_remap_fn *remap_fn; + + static bool kpti_applied = false; + int cpu = smp_processor_id(); + + if (kpti_applied) + return 0; + + remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); + + cpu_install_idmap(); + remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir)); + cpu_uninstall_idmap(); + + if (!cpu) + kpti_applied = true; + + return 0; +} + static int __init parse_kpti(char *str) { bool enabled; @@ -934,6 +958,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_UNMAP_KERNEL_AT_EL0, .def_scope = SCOPE_SYSTEM, .matches = unmap_kernel_at_el0, + .enable = kpti_install_ng_mappings, }, #endif { diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 2d406b4b522f..68eabe878dad 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -153,6 +153,16 @@ ENTRY(cpu_do_switch_mm) ENDPROC(cpu_do_switch_mm) .pushsection ".idmap.text", "ax" + +.macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2 + adrp \tmp1, empty_zero_page + msr ttbr1_el1, \tmp2 + isb + tlbi vmalle1 + dsb nsh + isb +.endm + /* * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd) * @@ -163,13 +173,7 @@ ENTRY(idmap_cpu_replace_ttbr1) mrs x2, daif msr daifset, #0xf - adrp x1, empty_zero_page - msr ttbr1_el1, x1 - isb - - tlbi vmalle1 - dsb nsh - isb + __idmap_cpu_set_reserved_ttbr1 x1, x3 msr ttbr1_el1, x0 isb @@ -180,6 +184,189 @@ ENTRY(idmap_cpu_replace_ttbr1) ENDPROC(idmap_cpu_replace_ttbr1) .popsection +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + .pushsection ".idmap.text", "ax" + + .macro __idmap_kpti_get_pgtable_ent, type + dc cvac, cur_\()\type\()p // Ensure any existing dirty + dmb sy // lines are written back before + ldr \type, [cur_\()\type\()p] // loading the entry + tbz \type, #0, next_\()\type // Skip invalid entries + .endm + + .macro __idmap_kpti_put_pgtable_ent_ng, type + orr \type, \type, #PTE_NG // Same bit for blocks and pages + str \type, [cur_\()\type\()p] // Update the entry and ensure it + dc civac, cur_\()\type\()p // is visible to all CPUs. + .endm + +/* + * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper) + * + * Called exactly once from stop_machine context by each CPU found during boot. + */ +__idmap_kpti_flag: + .long 1 +ENTRY(idmap_kpti_install_ng_mappings) + cpu .req w0 + num_cpus .req w1 + swapper_pa .req x2 + swapper_ttb .req x3 + flag_ptr .req x4 + cur_pgdp .req x5 + end_pgdp .req x6 + pgd .req x7 + cur_pudp .req x8 + end_pudp .req x9 + pud .req x10 + cur_pmdp .req x11 + end_pmdp .req x12 + pmd .req x13 + cur_ptep .req x14 + end_ptep .req x15 + pte .req x16 + + mrs swapper_ttb, ttbr1_el1 + adr flag_ptr, __idmap_kpti_flag + + cbnz cpu, __idmap_kpti_secondary + + /* We're the boot CPU. Wait for the others to catch up */ + sevl +1: wfe + ldaxr w18, [flag_ptr] + eor w18, w18, num_cpus + cbnz w18, 1b + + /* We need to walk swapper, so turn off the MMU. */ + mrs x18, sctlr_el1 + bic x18, x18, #SCTLR_ELx_M + msr sctlr_el1, x18 + isb + + /* Everybody is enjoying the idmap, so we can rewrite swapper. */ + /* PGD */ + mov cur_pgdp, swapper_pa + add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8) +do_pgd: __idmap_kpti_get_pgtable_ent pgd + tbnz pgd, #1, walk_puds + __idmap_kpti_put_pgtable_ent_ng pgd +next_pgd: + add cur_pgdp, cur_pgdp, #8 + cmp cur_pgdp, end_pgdp + b.ne do_pgd + + /* Publish the updated tables and nuke all the TLBs */ + dsb sy + tlbi vmalle1is + dsb ish + isb + + /* We're done: fire up the MMU again */ + mrs x18, sctlr_el1 + orr x18, x18, #SCTLR_ELx_M + msr sctlr_el1, x18 + isb + + /* Set the flag to zero to indicate that we're all done */ + str wzr, [flag_ptr] + ret + + /* PUD */ +walk_puds: + .if CONFIG_PGTABLE_LEVELS > 3 + pte_to_phys cur_pudp, pgd + add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8) +do_pud: __idmap_kpti_get_pgtable_ent pud + tbnz pud, #1, walk_pmds + __idmap_kpti_put_pgtable_ent_ng pud +next_pud: + add cur_pudp, cur_pudp, 8 + cmp cur_pudp, end_pudp + b.ne do_pud + b next_pgd + .else /* CONFIG_PGTABLE_LEVELS <= 3 */ + mov pud, pgd + b walk_pmds +next_pud: + b next_pgd + .endif + + /* PMD */ +walk_pmds: + .if CONFIG_PGTABLE_LEVELS > 2 + pte_to_phys cur_pmdp, pud + add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8) +do_pmd: __idmap_kpti_get_pgtable_ent pmd + tbnz pmd, #1, walk_ptes + __idmap_kpti_put_pgtable_ent_ng pmd +next_pmd: + add cur_pmdp, cur_pmdp, #8 + cmp cur_pmdp, end_pmdp + b.ne do_pmd + b next_pud + .else /* CONFIG_PGTABLE_LEVELS <= 2 */ + mov pmd, pud + b walk_ptes +next_pmd: + b next_pud + .endif + + /* PTE */ +walk_ptes: + pte_to_phys cur_ptep, pmd + add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8) +do_pte: __idmap_kpti_get_pgtable_ent pte + __idmap_kpti_put_pgtable_ent_ng pte +next_pte: + add cur_ptep, cur_ptep, #8 + cmp cur_ptep, end_ptep + b.ne do_pte + b next_pmd + + /* Secondary CPUs end up here */ +__idmap_kpti_secondary: + /* Uninstall swapper before surgery begins */ + __idmap_cpu_set_reserved_ttbr1 x18, x17 + + /* Increment the flag to let the boot CPU we're ready */ +1: ldxr w18, [flag_ptr] + add w18, w18, #1 + stxr w17, w18, [flag_ptr] + cbnz w17, 1b + + /* Wait for the boot CPU to finish messing around with swapper */ + sevl +1: wfe + ldxr w18, [flag_ptr] + cbnz w18, 1b + + /* All done, act like nothing happened */ + msr ttbr1_el1, swapper_ttb + isb + ret + + .unreq cpu + .unreq num_cpus + .unreq swapper_pa + .unreq swapper_ttb + .unreq flag_ptr + .unreq cur_pgdp + .unreq end_pgdp + .unreq pgd + .unreq cur_pudp + .unreq end_pudp + .unreq pud + .unreq cur_pmdp + .unreq end_pmdp + .unreq pmd + .unreq cur_ptep + .unreq end_ptep + .unreq pte +ENDPROC(idmap_kpti_install_ng_mappings) + .popsection +#endif + /* * __cpu_setup * From e39247ca17146e8baae3b0703fe5f27298a60189 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Jan 2018 11:59:56 +0000 Subject: [PATCH 299/770] arm64: Force KPTI to be disabled on Cavium ThunderX Commit 6dc52b15c4a4 upstream. Cavium ThunderX's erratum 27456 results in a corruption of icache entries that are loaded from memory that is mapped as non-global (i.e. ASID-tagged). As KPTI is based on memory being mapped non-global, let's prevent it from kicking in if this erratum is detected. Signed-off-by: Marc Zyngier [will: Update comment] Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 194dd5dbf328..451b319323c1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -803,12 +803,23 @@ static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, int __unused) { + char const *str = "command line option"; u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); - /* Forced on command line? */ + /* + * For reasons that aren't entirely clear, enabling KPTI on Cavium + * ThunderX leads to apparent I-cache corruption of kernel text, which + * ends as well as you might imagine. Don't even try. + */ + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_27456)) { + str = "ARM64_WORKAROUND_CAVIUM_27456"; + __kpti_forced = -1; + } + + /* Forced? */ if (__kpti_forced) { - pr_info_once("kernel page table isolation forced %s by command line option\n", - __kpti_forced > 0 ? "ON" : "OFF"); + pr_info_once("kernel page table isolation forced %s by %s\n", + __kpti_forced > 0 ? "ON" : "OFF", str); return __kpti_forced > 0; } From 8703f27d7c5db70ffd793a813160f28e810cc1bc Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Jan 2018 11:59:58 +0000 Subject: [PATCH 300/770] arm64: entry: Reword comment about post_ttbr_update_workaround Commit f167211a93ac upstream. We don't fully understand the Cavium ThunderX erratum, but it appears that mapping the kernel as nG can lead to horrible consequences such as attempting to execute userspace from kernel context. Since kpti isn't enabled for these CPUs anyway, simplify the comment justifying the lack of post_ttbr_update_workaround in the exception trampoline. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 8d7112aa8afe..9766039eef31 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -962,16 +962,9 @@ alternative_else_nop_endif orr \tmp, \tmp, #USER_ASID_FLAG msr ttbr1_el1, \tmp /* - * We avoid running the post_ttbr_update_workaround here because the - * user and kernel ASIDs don't have conflicting mappings, so any - * "blessing" as described in: - * - * http://lkml.kernel.org/r/56BB848A.6060603@caviumnetworks.com - * - * will not hurt correctness. Whilst this may partially defeat the - * point of using split ASIDs in the first place, it avoids - * the hit of invalidating the entire I-cache on every return to - * userspace. + * We avoid running the post_ttbr_update_workaround here because + * it's only needed by Cavium ThunderX, which requires KPTI to be + * disabled. */ .endm From 1449a173a2ee0db4fef6669c1bdc71a5561cb267 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Jan 2018 12:00:00 +0000 Subject: [PATCH 301/770] arm64: idmap: Use "awx" flags for .idmap.text .pushsection directives Commit 439e70e27a51 upstream. The identity map is mapped as both writeable and executable by the SWAPPER_MM_MMUFLAGS and this is relied upon by the kpti code to manage a synchronisation flag. Update the .pushsection flags to reflect the actual mapping attributes. Reported-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu-reset.S | 2 +- arch/arm64/kernel/head.S | 2 +- arch/arm64/kernel/sleep.S | 2 +- arch/arm64/mm/proc.S | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 2a752cb2a0f3..8021b46c9743 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -16,7 +16,7 @@ #include .text -.pushsection .idmap.text, "ax" +.pushsection .idmap.text, "awx" /* * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 6657ae4f3c7a..261f3f88364c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -371,7 +371,7 @@ ENDPROC(__primary_switched) * end early head section, begin head code that is also used for * hotplug and needs to have the same protections as the text region */ - .section ".idmap.text","ax" + .section ".idmap.text","awx" ENTRY(kimage_vaddr) .quad _text - TEXT_OFFSET diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 10dd16d7902d..bebec8ef9372 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -96,7 +96,7 @@ ENTRY(__cpu_suspend_enter) ret ENDPROC(__cpu_suspend_enter) - .pushsection ".idmap.text", "ax" + .pushsection ".idmap.text", "awx" ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly bl __cpu_setup diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 68eabe878dad..c25e58bc2910 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -86,7 +86,7 @@ ENDPROC(cpu_do_suspend) * * x0: Address of context pointer */ - .pushsection ".idmap.text", "ax" + .pushsection ".idmap.text", "awx" ENTRY(cpu_do_resume) ldp x2, x3, [x0] ldp x4, x5, [x0, #16] @@ -152,7 +152,7 @@ ENTRY(cpu_do_switch_mm) ret ENDPROC(cpu_do_switch_mm) - .pushsection ".idmap.text", "ax" + .pushsection ".idmap.text", "awx" .macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2 adrp \tmp1, empty_zero_page @@ -185,7 +185,7 @@ ENDPROC(idmap_cpu_replace_ttbr1) .popsection #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - .pushsection ".idmap.text", "ax" + .pushsection ".idmap.text", "awx" .macro __idmap_kpti_get_pgtable_ent, type dc cvac, cur_\()\type\()p // Ensure any existing dirty @@ -373,7 +373,7 @@ ENDPROC(idmap_kpti_install_ng_mappings) * Initialise the processor for turning the MMU on. Return in x0 the * value of the SCTLR_EL1 register. */ - .pushsection ".idmap.text", "ax" + .pushsection ".idmap.text", "awx" ENTRY(__cpu_setup) tlbi vmalle1 // Invalidate local TLB dsb nsh From 6afdaf109c34acd9cbc85ef687e3e4e991e5fd89 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 5 Feb 2018 15:34:16 +0000 Subject: [PATCH 302/770] arm64: barrier: Add CSDB macros to control data-value prediction Commit 669474e772b9 upstream. For CPUs capable of data value prediction, CSDB waits for any outstanding predictions to architecturally resolve before allowing speculative execution to continue. Provide macros to expose it to the arch code. Reviewed-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/assembler.h | 7 +++++++ arch/arm64/include/asm/barrier.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index c8ec722cc855..20f5845e5295 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -95,6 +95,13 @@ dmb \opt .endm +/* + * Value prediction barrier + */ + .macro csdb + hint #20 + .endm + /* * NOP sequence */ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 0fe7e43b7fbc..c68fdc5707ed 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -31,6 +31,8 @@ #define dmb(opt) asm volatile("dmb " #opt : : : "memory") #define dsb(opt) asm volatile("dsb " #opt : : : "memory") +#define csdb() asm volatile("hint #20" : : : "memory") + #define mb() dsb(sy) #define rmb() dsb(ld) #define wmb() dsb(st) From 0a532ea3ef1481c5af869728abac9179609acba8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 5 Feb 2018 15:34:17 +0000 Subject: [PATCH 303/770] arm64: Implement array_index_mask_nospec() Commit 022620eed3d0 upstream. Provide an optimised, assembly implementation of array_index_mask_nospec() for arm64 so that the compiler is not in a position to transform the code in ways which affect its ability to inhibit speculation (e.g. by introducing conditional branches). This is similar to the sequence used by x86, modulo architectural differences in the carry/borrow flags. Reviewed-by: Mark Rutland Signed-off-by: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/barrier.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index c68fdc5707ed..0b0755c961ac 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -40,6 +40,27 @@ #define dma_rmb() dmb(oshld) #define dma_wmb() dmb(oshst) +/* + * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz + * and 0 otherwise. + */ +#define array_index_mask_nospec array_index_mask_nospec +static inline unsigned long array_index_mask_nospec(unsigned long idx, + unsigned long sz) +{ + unsigned long mask; + + asm volatile( + " cmp %1, %2\n" + " sbc %0, xzr, xzr\n" + : "=r" (mask) + : "r" (idx), "Ir" (sz) + : "cc"); + + csdb(); + return mask; +} + #define __smp_mb() dmb(ish) #define __smp_rmb() dmb(ishld) #define __smp_wmb() dmb(ishst) From 535357c9d3e94115b87e11a3014ea29c8a0c26fb Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 5 Feb 2018 15:34:18 +0000 Subject: [PATCH 304/770] arm64: Make USER_DS an inclusive limit Commit 51369e398d0d upstream. Currently, USER_DS represents an exclusive limit while KERNEL_DS is inclusive. In order to do some clever trickery for speculation-safe masking, we need them both to behave equivalently - there aren't enough bits to make KERNEL_DS exclusive, so we have precisely one option. This also happens to correct a longstanding false negative for a range ending on the very top byte of kernel memory. Mark Rutland points out that we've actually got the semantics of addresses vs. segments muddled up in most of the places we need to amend, so shuffle the {USER,KERNEL}_DS definitions around such that we can correct those properly instead of just pasting "-1"s everywhere. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/processor.h | 3 ++ arch/arm64/include/asm/uaccess.h | 45 +++++++++++++++++------------- arch/arm64/kernel/entry.S | 4 +-- arch/arm64/mm/fault.c | 4 +-- 4 files changed, 33 insertions(+), 23 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 7dddca21fc64..fda6f5812281 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -21,6 +21,9 @@ #define TASK_SIZE_64 (UL(1) << VA_BITS) +#define KERNEL_DS UL(-1) +#define USER_DS (TASK_SIZE_64 - 1) + #ifndef __ASSEMBLY__ /* diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 335dfe02a846..87a6a52cee65 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -35,10 +35,7 @@ #include #include -#define KERNEL_DS (-1UL) #define get_ds() (KERNEL_DS) - -#define USER_DS TASK_SIZE_64 #define get_fs() (current_thread_info()->addr_limit) static inline void set_fs(mm_segment_t fs) @@ -66,22 +63,32 @@ static inline void set_fs(mm_segment_t fs) * Returns 1 if the range is valid, 0 otherwise. * * This is equivalent to the following test: - * (u65)addr + (u65)size <= current->addr_limit - * - * This needs 65-bit arithmetic. + * (u65)addr + (u65)size <= (u65)current->addr_limit + 1 */ -#define __range_ok(addr, size) \ -({ \ - unsigned long __addr = (unsigned long)(addr); \ - unsigned long flag, roksum; \ - __chk_user_ptr(addr); \ - asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls" \ - : "=&r" (flag), "=&r" (roksum) \ - : "1" (__addr), "Ir" (size), \ - "r" (current_thread_info()->addr_limit) \ - : "cc"); \ - flag; \ -}) +static inline unsigned long __range_ok(unsigned long addr, unsigned long size) +{ + unsigned long limit = current_thread_info()->addr_limit; + + __chk_user_ptr(addr); + asm volatile( + // A + B <= C + 1 for all A,B,C, in four easy steps: + // 1: X = A + B; X' = X % 2^64 + " adds %0, %0, %2\n" + // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4 + " csel %1, xzr, %1, hi\n" + // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X' + // to compensate for the carry flag being set in step 4. For + // X > 2^64, X' merely has to remain nonzero, which it does. + " csinv %0, %0, xzr, cc\n" + // 4: For X < 2^64, this gives us X' - C - 1 <= 0, where the -1 + // comes from the carry in being clear. Otherwise, we are + // testing X' - C == 0, subject to the previous adjustments. + " sbcs xzr, %0, %1\n" + " cset %0, ls\n" + : "+r" (addr), "+r" (limit) : "Ir" (size) : "cc"); + + return addr; +} /* * When dealing with data aborts, watchpoints, or instruction traps we may end @@ -90,7 +97,7 @@ static inline void set_fs(mm_segment_t fs) */ #define untagged_addr(addr) sign_extend64(addr, 55) -#define access_ok(type, addr, size) __range_ok(addr, size) +#define access_ok(type, addr, size) __range_ok((unsigned long)(addr), size) #define user_addr_max get_fs #define _ASM_EXTABLE(from, to) \ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 9766039eef31..206186e1f17f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -167,10 +167,10 @@ alternative_else_nop_endif .else add x21, sp, #S_FRAME_SIZE get_thread_info tsk - /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ + /* Save the task's original addr_limit and set USER_DS */ ldr x20, [tsk, #TSK_TI_ADDR_LIMIT] str x20, [sp, #S_ORIG_ADDR_LIMIT] - mov x20, #TASK_SIZE_64 + mov x20, #USER_DS str x20, [tsk, #TSK_TI_ADDR_LIMIT] /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */ .endif /* \el == 0 */ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index b64958b23a7f..2789f4662286 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -242,7 +242,7 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs, if (fsc_type == ESR_ELx_FSC_PERM) return true; - if (addr < USER_DS && system_uses_ttbr0_pan()) + if (addr < TASK_SIZE && system_uses_ttbr0_pan()) return fsc_type == ESR_ELx_FSC_FAULT && (regs->pstate & PSR_PAN_BIT); @@ -426,7 +426,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - if (addr < USER_DS && is_permission_fault(esr, regs, addr)) { + if (addr < TASK_SIZE && is_permission_fault(esr, regs, addr)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); From 2e985d2647a07bad21892811e8eb85f5231a1d4a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 5 Feb 2018 15:34:19 +0000 Subject: [PATCH 305/770] arm64: Use pointer masking to limit uaccess speculation Commit 4d8efc2d5ee4 upstream. Similarly to x86, mitigate speculation past an access_ok() check by masking the pointer against the address limit before use. Even if we don't expect speculative writes per se, it is plausible that a CPU may still speculate at least as far as fetching a cache line for writing, hence we also harden put_user() and clear_user() for peace of mind. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/uaccess.h | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 87a6a52cee65..6d0c7179a990 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -215,6 +215,26 @@ static inline void uaccess_enable_not_uao(void) __uaccess_enable(ARM64_ALT_PAN_NOT_UAO); } +/* + * Sanitise a uaccess pointer such that it becomes NULL if above the + * current addr_limit. + */ +#define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr) +static inline void __user *__uaccess_mask_ptr(const void __user *ptr) +{ + void __user *safe_ptr; + + asm volatile( + " bics xzr, %1, %2\n" + " csel %0, %1, xzr, eq\n" + : "=&r" (safe_ptr) + : "r" (ptr), "r" (current_thread_info()->addr_limit) + : "cc"); + + csdb(); + return safe_ptr; +} + /* * The "__xxx" versions of the user access functions do not verify the address * space - it must have been done previously with a separate "access_ok()" @@ -285,7 +305,7 @@ do { \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ access_ok(VERIFY_READ, __p, sizeof(*__p)) ? \ - __get_user((x), __p) : \ + __p = uaccess_mask_ptr(__p), __get_user((x), __p) : \ ((x) = 0, -EFAULT); \ }) @@ -349,7 +369,7 @@ do { \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ? \ - __put_user((x), __p) : \ + __p = uaccess_mask_ptr(__p), __put_user((x), __p) : \ -EFAULT; \ }) @@ -365,7 +385,7 @@ extern unsigned long __must_check __clear_user(void __user *addr, unsigned long static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) { if (access_ok(VERIFY_WRITE, to, n)) - n = __clear_user(to, n); + n = __clear_user(__uaccess_mask_ptr(to), n); return n; } From 7a51d7d2f7f77cea72fe0f73bd40d39852cd0702 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 5 Feb 2018 15:34:20 +0000 Subject: [PATCH 306/770] arm64: entry: Ensure branch through syscall table is bounded under speculation Commit 6314d90e6493 upstream. In a similar manner to array_index_mask_nospec, this patch introduces an assembly macro (mask_nospec64) which can be used to bound a value under speculation. This macro is then used to ensure that the indirect branch through the syscall table is bounded under speculation, with out-of-range addresses speculating as calls to sys_io_setup (0). Reviewed-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/assembler.h | 11 +++++++++++ arch/arm64/kernel/entry.S | 2 ++ 2 files changed, 13 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 20f5845e5295..6d951a82e656 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -102,6 +102,17 @@ hint #20 .endm +/* + * Sanitise a 64-bit bounded index wrt speculation, returning zero if out + * of bounds. + */ + .macro mask_nospec64, idx, limit, tmp + sub \tmp, \idx, \limit + bic \tmp, \tmp, \idx + and \idx, \idx, \tmp, asr #63 + csdb + .endm + /* * NOP sequence */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 206186e1f17f..f786e8d3d5be 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -376,6 +376,7 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 * x7 is reserved for the system call number in 32-bit mode. */ wsc_nr .req w25 // number of system calls +xsc_nr .req x25 // number of system calls (zero-extended) wscno .req w26 // syscall number xscno .req x26 // syscall number (zero-extended) stbl .req x27 // syscall table pointer @@ -884,6 +885,7 @@ el0_svc_naked: // compat entry point b.ne __sys_trace cmp wscno, wsc_nr // check upper syscall limit b.hs ni_sys + mask_nospec64 xscno, xsc_nr, x19 // enforce bounds for syscall number ldr x16, [stbl, xscno, lsl #3] // address in the syscall table blr x16 // call sys_* routine b ret_fast_syscall From 1ccaee9dea60f97e2f64fe17b8c23ff06fe95041 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 5 Feb 2018 15:34:21 +0000 Subject: [PATCH 307/770] arm64: uaccess: Prevent speculative use of the current addr_limit Commit c2f0ad4fc089 upstream. A mispredicted conditional call to set_fs could result in the wrong addr_limit being forwarded under speculation to a subsequent access_ok check, potentially forming part of a spectre-v1 attack using uaccess routines. This patch prevents this forwarding from taking place, but putting heavy barriers in set_fs after writing the addr_limit. Reviewed-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/uaccess.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 6d0c7179a990..2f5dd32f5aef 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -42,6 +42,13 @@ static inline void set_fs(mm_segment_t fs) { current_thread_info()->addr_limit = fs; + /* + * Prevent a mispredicted conditional call to set_fs from forwarding + * the wrong address limit to access_ok under speculation. + */ + dsb(nsh); + isb(); + /* On user-mode return, check fs is correct */ set_thread_flag(TIF_FSCHECK); From 28d8886d985ccc84e4e8f567c60f491acb1ae51c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 5 Feb 2018 15:34:22 +0000 Subject: [PATCH 308/770] arm64: uaccess: Don't bother eliding access_ok checks in __{get, put}_user Commit 84624087dd7e upstream. access_ok isn't an expensive operation once the addr_limit for the current thread has been loaded into the cache. Given that the initial access_ok check preceding a sequence of __{get,put}_user operations will take the brunt of the miss, we can make the __* variants identical to the full-fat versions, which brings with it the benefits of address masking. The likely cost in these sequences will be from toggling PAN/UAO, which we can address later by implementing the *_unsafe versions. Reviewed-by: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/uaccess.h | 54 +++++++++++++++++++------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 2f5dd32f5aef..0687bb084719 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -294,28 +294,33 @@ do { \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) -#define __get_user(x, ptr) \ +#define __get_user_check(x, ptr, err) \ ({ \ - int __gu_err = 0; \ - __get_user_err((x), (ptr), __gu_err); \ - __gu_err; \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + might_fault(); \ + if (access_ok(VERIFY_READ, __p, sizeof(*__p))) { \ + __p = uaccess_mask_ptr(__p); \ + __get_user_err((x), __p, (err)); \ + } else { \ + (x) = 0; (err) = -EFAULT; \ + } \ }) #define __get_user_error(x, ptr, err) \ ({ \ - __get_user_err((x), (ptr), (err)); \ + __get_user_check((x), (ptr), (err)); \ (void)0; \ }) -#define get_user(x, ptr) \ +#define __get_user(x, ptr) \ ({ \ - __typeof__(*(ptr)) __user *__p = (ptr); \ - might_fault(); \ - access_ok(VERIFY_READ, __p, sizeof(*__p)) ? \ - __p = uaccess_mask_ptr(__p), __get_user((x), __p) : \ - ((x) = 0, -EFAULT); \ + int __gu_err = 0; \ + __get_user_check((x), (ptr), __gu_err); \ + __gu_err; \ }) +#define get_user __get_user + #define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \ asm volatile( \ "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \ @@ -358,28 +363,33 @@ do { \ uaccess_disable_not_uao(); \ } while (0) -#define __put_user(x, ptr) \ +#define __put_user_check(x, ptr, err) \ ({ \ - int __pu_err = 0; \ - __put_user_err((x), (ptr), __pu_err); \ - __pu_err; \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + might_fault(); \ + if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) { \ + __p = uaccess_mask_ptr(__p); \ + __put_user_err((x), __p, (err)); \ + } else { \ + (err) = -EFAULT; \ + } \ }) #define __put_user_error(x, ptr, err) \ ({ \ - __put_user_err((x), (ptr), (err)); \ + __put_user_check((x), (ptr), (err)); \ (void)0; \ }) -#define put_user(x, ptr) \ +#define __put_user(x, ptr) \ ({ \ - __typeof__(*(ptr)) __user *__p = (ptr); \ - might_fault(); \ - access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ? \ - __p = uaccess_mask_ptr(__p), __put_user((x), __p) : \ - -EFAULT; \ + int __pu_err = 0; \ + __put_user_check((x), (ptr), __pu_err); \ + __pu_err; \ }) +#define put_user __put_user + extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); #define raw_copy_from_user __arch_copy_from_user extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n); From ba32050d308a8bcd8ea300daf00dc197523d47f8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 5 Feb 2018 15:34:23 +0000 Subject: [PATCH 309/770] arm64: uaccess: Mask __user pointers for __arch_{clear, copy_*}_user Commit f71c2ffcb20d upstream. Like we've done for get_user and put_user, ensure that user pointers are masked before invoking the underlying __arch_{clear,copy_*}_user operations. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/uaccess.h | 29 ++++++++++++++++++++++------- arch/arm64/kernel/arm64ksyms.c | 4 ++-- arch/arm64/lib/clear_user.S | 6 +++--- arch/arm64/lib/copy_in_user.S | 5 +++-- 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 0687bb084719..fad8c1b2ca3e 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -391,20 +391,35 @@ do { \ #define put_user __put_user extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); -#define raw_copy_from_user __arch_copy_from_user +#define raw_copy_from_user(to, from, n) \ +({ \ + __arch_copy_from_user((to), __uaccess_mask_ptr(from), (n)); \ +}) + extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n); -#define raw_copy_to_user __arch_copy_to_user -extern unsigned long __must_check raw_copy_in_user(void __user *to, const void __user *from, unsigned long n); -extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); +#define raw_copy_to_user(to, from, n) \ +({ \ + __arch_copy_to_user(__uaccess_mask_ptr(to), (from), (n)); \ +}) + +extern unsigned long __must_check __arch_copy_in_user(void __user *to, const void __user *from, unsigned long n); +#define raw_copy_in_user(to, from, n) \ +({ \ + __arch_copy_in_user(__uaccess_mask_ptr(to), \ + __uaccess_mask_ptr(from), (n)); \ +}) + #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER -static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) +extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned long n); +static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n) { if (access_ok(VERIFY_WRITE, to, n)) - n = __clear_user(__uaccess_mask_ptr(to), n); + n = __arch_clear_user(__uaccess_mask_ptr(to), n); return n; } +#define clear_user __clear_user extern long strncpy_from_user(char *dest, const char __user *src, long count); @@ -418,7 +433,7 @@ extern unsigned long __must_check __copy_user_flushcache(void *to, const void __ static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) { kasan_check_write(dst, size); - return __copy_user_flushcache(dst, src, size); + return __copy_user_flushcache(dst, __uaccess_mask_ptr(src), size); } #endif diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 67368c7329c0..66be504edb6c 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -37,8 +37,8 @@ EXPORT_SYMBOL(clear_page); /* user mem (segment) */ EXPORT_SYMBOL(__arch_copy_from_user); EXPORT_SYMBOL(__arch_copy_to_user); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(raw_copy_in_user); +EXPORT_SYMBOL(__arch_clear_user); +EXPORT_SYMBOL(__arch_copy_in_user); /* physical memory */ EXPORT_SYMBOL(memstart_addr); diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 3d69a8d41fa5..21ba0b29621b 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -21,7 +21,7 @@ .text -/* Prototype: int __clear_user(void *addr, size_t sz) +/* Prototype: int __arch_clear_user(void *addr, size_t sz) * Purpose : clear some user memory * Params : addr - user memory address to clear * : sz - number of bytes to clear @@ -29,7 +29,7 @@ * * Alignment fixed up by hardware. */ -ENTRY(__clear_user) +ENTRY(__arch_clear_user) uaccess_enable_not_uao x2, x3, x4 mov x2, x1 // save the size for fixup return subs x1, x1, #8 @@ -52,7 +52,7 @@ uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 uaccess_disable_not_uao x2, x3 ret -ENDPROC(__clear_user) +ENDPROC(__arch_clear_user) .section .fixup,"ax" .align 2 diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index fbb090f431a5..54b75deb1d16 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -64,14 +64,15 @@ .endm end .req x5 -ENTRY(raw_copy_in_user) + +ENTRY(__arch_copy_in_user) uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" uaccess_disable_not_uao x3, x4 mov x0, #0 ret -ENDPROC(raw_copy_in_user) +ENDPROC(__arch_copy_in_user) .section .fixup,"ax" .align 2 From d9ef050f28953658ab1621a65c4090600e30bfde Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 5 Feb 2018 15:34:24 +0000 Subject: [PATCH 310/770] arm64: futex: Mask __user pointers prior to dereference Commit 91b2d3442f6a upstream. The arm64 futex code has some explicit dereferencing of user pointers where performing atomic operations in response to a futex command. This patch uses masking to limit any speculative futex operations to within the user address space. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/futex.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 5bb2fd4674e7..07fe2479d310 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -48,9 +48,10 @@ do { \ } while (0) static inline int -arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) +arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr) { int oldval = 0, ret, tmp; + u32 __user *uaddr = __uaccess_mask_ptr(_uaddr); pagefault_disable(); @@ -88,15 +89,17 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, u32 oldval, u32 newval) { int ret = 0; u32 val, tmp; + u32 __user *uaddr; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + if (!access_ok(VERIFY_WRITE, _uaddr, sizeof(u32))) return -EFAULT; + uaddr = __uaccess_mask_ptr(_uaddr); uaccess_enable(); asm volatile("// futex_atomic_cmpxchg_inatomic\n" " prfm pstl1strm, %2\n" From da1f67921d2ff82ac3dfbf193dc4596da569a5c6 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 15 Jan 2018 19:38:54 +0000 Subject: [PATCH 311/770] arm64: cpufeature: __this_cpu_has_cap() shouldn't stop early Commit edf298cfce47 upstream. this_cpu_has_cap() tests caps->desc not caps->matches, so it stops walking the list when it finds a 'silent' feature, instead of walking to the end of the list. Prior to v4.6's 644c2ae198412 ("arm64: cpufeature: Test 'matches' pointer to find the end of the list") we always tested desc to find the end of a capability list. This was changed for dubious things like PAN_NOT_UAO. v4.7's e3661b128e53e ("arm64: Allow a capability to be checked on single CPU") added this_cpu_has_cap() using the old desc style test. CC: Suzuki K Poulose Reviewed-by: Suzuki K Poulose Acked-by: Marc Zyngier Signed-off-by: James Morse Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 451b319323c1..9249cb728992 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1102,9 +1102,8 @@ static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, if (WARN_ON(preemptible())) return false; - for (caps = cap_array; caps->desc; caps++) + for (caps = cap_array; caps->matches; caps++) if (caps->capability == cap && - caps->matches && caps->matches(caps, SCOPE_LOCAL_CPU)) return true; return false; From 9da836a476fe5f90348ca5edfb5af68c2ebb558b Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 17 Jan 2018 17:42:20 +0000 Subject: [PATCH 312/770] arm64: Run enable method for errata work arounds on late CPUs Commit 55b35d070c25 upstream. When a CPU is brought up after we have finalised the system wide capabilities (i.e, features and errata), we make sure the new CPU doesn't need a new errata work around which has not been detected already. However we don't run enable() method on the new CPU for the errata work arounds already detected. This could cause the new CPU running without potential work arounds. It is upto the "enable()" method to decide if this CPU should do something about the errata. Fixes: commit 6a6efbb45b7d95c84 ("arm64: Verify CPU errata work arounds on hotplugged CPU") Cc: Will Deacon Cc: Mark Rutland Cc: Andre Przywara Cc: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e4c78630a730..624f8a118029 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -221,15 +221,18 @@ void verify_local_cpu_errata_workarounds(void) { const struct arm64_cpu_capabilities *caps = arm64_errata; - for (; caps->matches; caps++) - if (!cpus_have_cap(caps->capability) && - caps->matches(caps, SCOPE_LOCAL_CPU)) { + for (; caps->matches; caps++) { + if (cpus_have_cap(caps->capability)) { + if (caps->enable) + caps->enable((void *)caps); + } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) { pr_crit("CPU%d: Requires work around for %s, not detected" " at boot time\n", smp_processor_id(), caps->desc ? : "an erratum"); cpu_die_early(); } + } } void update_cpu_errata_workarounds(void) From be53742befea9329b034f143e7e932cac54d3609 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jan 2018 21:37:25 +0000 Subject: [PATCH 313/770] arm64: cpufeature: Pass capability structure to ->enable callback Commit 0a0d111d40fd upstream. In order to invoke the CPU capability ->matches callback from the ->enable callback for applying local-CPU workarounds, we need a handle on the capability structure. This patch passes a pointer to the capability structure to the ->enable callback. Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9249cb728992..c284b855315f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1144,7 +1144,7 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) * uses an IPI, giving us a PSTATE that disappears when * we return. */ - stop_machine(caps->enable, NULL, cpu_online_mask); + stop_machine(caps->enable, (void *)caps, cpu_online_mask); } } } @@ -1203,7 +1203,7 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list) cpu_die_early(); } if (caps->enable) - caps->enable(NULL); + caps->enable((void *)caps); } } From f91f190708b25df0a5675800bcabae13bf2aa882 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jan 2018 21:45:41 +0000 Subject: [PATCH 314/770] drivers/firmware: Expose psci_get_version through psci_ops structure Commit d68e3ba5303f upstream. Entry into recent versions of ARM Trusted Firmware will invalidate the CPU branch predictor state in order to protect against aliasing attacks. This patch exposes the PSCI "VERSION" function via psci_ops, so that it can be invoked outside of the PSCI driver where necessary. Acked-by: Lorenzo Pieralisi Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/psci.c | 2 ++ include/linux/psci.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c index d687ca3d5049..8b25d31e8401 100644 --- a/drivers/firmware/psci.c +++ b/drivers/firmware/psci.c @@ -496,6 +496,8 @@ static void __init psci_init_migrate(void) static void __init psci_0_2_set_functions(void) { pr_info("Using standard PSCI v0.2 function IDs\n"); + psci_ops.get_version = psci_get_version; + psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_FN_NATIVE(0_2, CPU_SUSPEND); psci_ops.cpu_suspend = psci_cpu_suspend; diff --git a/include/linux/psci.h b/include/linux/psci.h index bdea1cb5e1db..6306ab10af18 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -26,6 +26,7 @@ int psci_cpu_init_idle(unsigned int cpu); int psci_cpu_suspend_enter(unsigned long index); struct psci_operations { + u32 (*get_version)(void); int (*cpu_suspend)(u32 state, unsigned long entry_point); int (*cpu_off)(u32 state); int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); From c10e4aa77814063ac459fab673a5a392b7334b42 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 Jan 2018 18:19:39 +0000 Subject: [PATCH 315/770] arm64: Move post_ttbr_update_workaround to C code Commit 95e3de3590e3 upstream. We will soon need to invoke a CPU-specific function pointer after changing page tables, so move post_ttbr_update_workaround out into C code to make this possible. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/assembler.h | 13 ------------- arch/arm64/kernel/entry.S | 2 +- arch/arm64/mm/context.c | 9 +++++++++ arch/arm64/mm/proc.S | 3 +-- 4 files changed, 11 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 6d951a82e656..463619dcadd4 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -481,19 +481,6 @@ alternative_endif mrs \rd, sp_el0 .endm -/* - * Errata workaround post TTBRx_EL1 update. - */ - .macro post_ttbr_update_workaround -#ifdef CONFIG_CAVIUM_ERRATUM_27456 -alternative_if ARM64_WORKAROUND_CAVIUM_27456 - ic iallu - dsb nsh - isb -alternative_else_nop_endif -#endif - .endm - /** * Errata workaround prior to disable MMU. Insert an ISB immediately prior * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0. diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index f786e8d3d5be..185c87a53fe3 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -275,7 +275,7 @@ alternative_else_nop_endif * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache * corruption). */ - post_ttbr_update_workaround + bl post_ttbr_update_workaround .endif 1: .if \el != 0 diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index db28958d9e4f..23498d032c82 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -235,6 +235,15 @@ switch_mm_fastpath: cpu_switch_mm(mm->pgd, mm); } +/* Errata workaround post TTBRx_EL1 update. */ +asmlinkage void post_ttbr_update_workaround(void) +{ + asm(ALTERNATIVE("nop; nop; nop", + "ic iallu; dsb nsh; isb", + ARM64_WORKAROUND_CAVIUM_27456, + CONFIG_CAVIUM_ERRATUM_27456)); +} + static int asids_init(void) { asid_bits = get_cpu_asid_bits(); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c25e58bc2910..27058f3fd132 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -148,8 +148,7 @@ ENTRY(cpu_do_switch_mm) isb msr ttbr0_el1, x0 // now update TTBR0 isb - post_ttbr_update_workaround - ret + b post_ttbr_update_workaround // Back to C code... ENDPROC(cpu_do_switch_mm) .pushsection ".idmap.text", "awx" From 5bee81c980297f3f5486539881ab4241c5f0dea3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Jan 2018 11:17:58 +0000 Subject: [PATCH 316/770] arm64: Add skeleton to harden the branch predictor against aliasing attacks Commit 0f15adbb2861 upstream. Aliasing attacks against CPU branch predictors can allow an attacker to redirect speculative control flow on some CPUs and potentially divulge information from one context to another. This patch adds initial skeleton code behind a new Kconfig option to enable implementation-specific mitigations against these attacks for CPUs that are affected. Co-developed-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 17 ++++++++ arch/arm64/include/asm/cpucaps.h | 3 +- arch/arm64/include/asm/mmu.h | 37 ++++++++++++++++ arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kernel/Makefile | 4 ++ arch/arm64/kernel/bpi.S | 55 ++++++++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 74 ++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpufeature.c | 1 + arch/arm64/kernel/entry.S | 8 ++-- arch/arm64/mm/context.c | 2 + arch/arm64/mm/fault.c | 17 ++++++++ 11 files changed, 215 insertions(+), 4 deletions(-) create mode 100644 arch/arm64/kernel/bpi.S diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0b6b59d93cf8..c2abb4e88ff2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -818,6 +818,23 @@ config UNMAP_KERNEL_AT_EL0 If unsure, say Y. +config HARDEN_BRANCH_PREDICTOR + bool "Harden the branch predictor against aliasing attacks" if EXPERT + default y + help + Speculation attacks against some high-performance processors rely on + being able to manipulate the branch predictor for a victim context by + executing aliasing branches in the attacker context. Such attacks + can be partially mitigated against by clearing internal branch + predictor state and limiting the prediction logic in some situations. + + This config option will take CPU-specific actions to harden the + branch predictor against aliasing attacks and may rely on specific + instruction sequences or control bits being set by the system + firmware. + + If unsure, say Y. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 6835a48b31d4..8498eb4ccfc2 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -41,7 +41,8 @@ #define ARM64_WORKAROUND_CAVIUM_30115 20 #define ARM64_HAS_DCPOP 21 #define ARM64_UNMAP_KERNEL_AT_EL0 23 +#define ARM64_HARDEN_BRANCH_PREDICTOR 24 -#define ARM64_NCAPS 24 +#define ARM64_NCAPS 25 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 6f7bdb89817f..6dd83d75b82a 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -41,6 +41,43 @@ static inline bool arm64_kernel_unmapped_at_el0(void) cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); } +typedef void (*bp_hardening_cb_t)(void); + +struct bp_hardening_data { + int hyp_vectors_slot; + bp_hardening_cb_t fn; +}; + +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[]; + +DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + +static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) +{ + return this_cpu_ptr(&bp_hardening_data); +} + +static inline void arm64_apply_bp_hardening(void) +{ + struct bp_hardening_data *d; + + if (!cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) + return; + + d = arm64_get_bp_hardening_data(); + if (d->fn) + d->fn(); +} +#else +static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) +{ + return NULL; +} + +static inline void arm64_apply_bp_hardening(void) { } +#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ + extern void paging_init(void); extern void bootmem_init(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 1efbe29c8b24..ede80d47d0ef 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -333,6 +333,7 @@ /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 +#define ID_AA64PFR0_CSV2_SHIFT 56 #define ID_AA64PFR0_GIC_SHIFT 24 #define ID_AA64PFR0_ASIMD_SHIFT 20 #define ID_AA64PFR0_FP_SHIFT 16 diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2f5ff2a65db3..def8d5623fd1 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -55,6 +55,10 @@ arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +ifeq ($(CONFIG_KVM),y) +arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o +endif + obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) head-y := head.o diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S new file mode 100644 index 000000000000..06a931eb2673 --- /dev/null +++ b/arch/arm64/kernel/bpi.S @@ -0,0 +1,55 @@ +/* + * Contains CPU specific branch predictor invalidation sequences + * + * Copyright (C) 2018 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +.macro ventry target + .rept 31 + nop + .endr + b \target +.endm + +.macro vectors target + ventry \target + 0x000 + ventry \target + 0x080 + ventry \target + 0x100 + ventry \target + 0x180 + + ventry \target + 0x200 + ventry \target + 0x280 + ventry \target + 0x300 + ventry \target + 0x380 + + ventry \target + 0x400 + ventry \target + 0x480 + ventry \target + 0x500 + ventry \target + 0x580 + + ventry \target + 0x600 + ventry \target + 0x680 + ventry \target + 0x700 + ventry \target + 0x780 +.endm + + .align 11 +ENTRY(__bp_harden_hyp_vecs_start) + .rept 4 + vectors __kvm_hyp_vector + .endr +ENTRY(__bp_harden_hyp_vecs_end) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 624f8a118029..3f2fee9d4590 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -60,6 +60,80 @@ static int cpu_enable_trap_ctr_access(void *__unused) return 0; } +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +#include +#include + +DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + +#ifdef CONFIG_KVM +static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, + const char *hyp_vecs_end) +{ + void *dst = lm_alias(__bp_harden_hyp_vecs_start + slot * SZ_2K); + int i; + + for (i = 0; i < SZ_2K; i += 0x80) + memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); + + flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); +} + +static void __install_bp_hardening_cb(bp_hardening_cb_t fn, + const char *hyp_vecs_start, + const char *hyp_vecs_end) +{ + static int last_slot = -1; + static DEFINE_SPINLOCK(bp_lock); + int cpu, slot = -1; + + spin_lock(&bp_lock); + for_each_possible_cpu(cpu) { + if (per_cpu(bp_hardening_data.fn, cpu) == fn) { + slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); + break; + } + } + + if (slot == -1) { + last_slot++; + BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start) + / SZ_2K) <= last_slot); + slot = last_slot; + __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); + } + + __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); + __this_cpu_write(bp_hardening_data.fn, fn); + spin_unlock(&bp_lock); +} +#else +static void __install_bp_hardening_cb(bp_hardening_cb_t fn, + const char *hyp_vecs_start, + const char *hyp_vecs_end) +{ + __this_cpu_write(bp_hardening_data.fn, fn); +} +#endif /* CONFIG_KVM */ + +static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry, + bp_hardening_cb_t fn, + const char *hyp_vecs_start, + const char *hyp_vecs_end) +{ + u64 pfr0; + + if (!entry->matches(entry, SCOPE_LOCAL_CPU)) + return; + + pfr0 = read_cpuid(ID_AA64PFR0_EL1); + if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT)) + return; + + __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end); +} +#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ + #define MIDR_RANGE(model, min, max) \ .def_scope = SCOPE_LOCAL_CPU, \ .matches = is_affected_midr_range, \ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c284b855315f..582142ae92e1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -126,6 +126,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 185c87a53fe3..a035a981b557 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -724,13 +724,15 @@ el0_ia: * Instruction abort handling */ mrs x26, far_el1 - // enable interrupts before calling the main handler - enable_dbg_and_irq + enable_dbg +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif ct_user_exit mov x0, x26 mov x1, x25 mov x2, sp - bl do_mem_abort + bl do_el0_ia_bp_hardening b ret_to_user el0_fpsimd_acc: /* diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 23498d032c82..abc56dc31ae0 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -242,6 +242,8 @@ asmlinkage void post_ttbr_update_workaround(void) "ic iallu; dsb nsh; isb", ARM64_WORKAROUND_CAVIUM_27456, CONFIG_CAVIUM_ERRATUM_27456)); + + arm64_apply_bp_hardening(); } static int asids_init(void) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 2789f4662286..4985bd1175ef 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -751,6 +751,23 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, arm64_notify_die("", regs, &info, esr); } +asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, + unsigned int esr, + struct pt_regs *regs) +{ + /* + * We've taken an instruction abort from userspace and not yet + * re-enabled IRQs. If the address is a kernel address, apply + * BP hardening prior to enabling IRQs and pre-emption. + */ + if (addr > TASK_SIZE) + arm64_apply_bp_hardening(); + + local_irq_enable(); + do_mem_abort(addr, esr, regs); +} + + /* * Handle stack alignment exceptions. */ From 9107ac4ea3da68af722d1b6820f90cf0c119b134 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 19 Jan 2018 15:42:09 +0000 Subject: [PATCH 317/770] arm64: Move BP hardening to check_and_switch_context Commit a8e4c0a919ae upstream. We call arm64_apply_bp_hardening() from post_ttbr_update_workaround, which has the unexpected consequence of being triggered on every exception return to userspace when ARM64_SW_TTBR0_PAN is selected, even if no context switch actually occured. This is a bit suboptimal, and it would be more logical to only invalidate the branch predictor when we actually switch to a different mm. In order to solve this, move the call to arm64_apply_bp_hardening() into check_and_switch_context(), where we're guaranteed to pick a different mm context. Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/context.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index abc56dc31ae0..9284788733d6 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -227,6 +227,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); switch_mm_fastpath: + + arm64_apply_bp_hardening(); + /* * Defer TTBR0_EL1 setting for user threads to uaccess_enable() when * emulating PAN. @@ -242,8 +245,6 @@ asmlinkage void post_ttbr_update_workaround(void) "ic iallu; dsb nsh; isb", ARM64_WORKAROUND_CAVIUM_27456, CONFIG_CAVIUM_ERRATUM_27456)); - - arm64_apply_bp_hardening(); } static int asids_init(void) From aab3306701f10c5dc35d8da74431cde6249baf0b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Jan 2018 16:38:35 +0000 Subject: [PATCH 318/770] arm64: KVM: Use per-CPU vector when BP hardening is enabled Commit 6840bdd73d07 upstream. Now that we have per-CPU vectors, let's plug then in the KVM/arm64 code. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_mmu.h | 10 +++++++++ arch/arm64/include/asm/kvm_mmu.h | 38 ++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/switch.c | 2 +- virt/kvm/arm/arm.c | 8 ++++++- 4 files changed, 56 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index fa6f2174276b..eb46fc81a440 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -221,6 +221,16 @@ static inline unsigned int kvm_get_vmid_bits(void) return 8; } +static inline void *kvm_get_hyp_vector(void) +{ + return kvm_ksym_ref(__kvm_hyp_vector); +} + +static inline int kvm_map_vectors(void) +{ + return 0; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 672c8684d5c2..2d6d4bd9de52 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -309,5 +309,43 @@ static inline unsigned int kvm_get_vmid_bits(void) return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +#include + +static inline void *kvm_get_hyp_vector(void) +{ + struct bp_hardening_data *data = arm64_get_bp_hardening_data(); + void *vect = kvm_ksym_ref(__kvm_hyp_vector); + + if (data->fn) { + vect = __bp_harden_hyp_vecs_start + + data->hyp_vectors_slot * SZ_2K; + + if (!has_vhe()) + vect = lm_alias(vect); + } + + return vect; +} + +static inline int kvm_map_vectors(void) +{ + return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start), + kvm_ksym_ref(__bp_harden_hyp_vecs_end), + PAGE_HYP_EXEC); +} + +#else +static inline void *kvm_get_hyp_vector(void) +{ + return kvm_ksym_ref(__kvm_hyp_vector); +} + +static inline int kvm_map_vectors(void) +{ + return 0; +} +#endif + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 945e79c641c4..9b2a76d783eb 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -51,7 +51,7 @@ static void __hyp_text __activate_traps_vhe(void) val &= ~CPACR_EL1_FPEN; write_sysreg(val, cpacr_el1); - write_sysreg(__kvm_hyp_vector, vbar_el1); + write_sysreg(kvm_get_hyp_vector(), vbar_el1); } static void __hyp_text __activate_traps_nvhe(void) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 9a07ee94a230..fc27a1528811 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1139,7 +1139,7 @@ static void cpu_init_hyp_mode(void *dummy) pgd_ptr = kvm_mmu_get_httbr(); stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; - vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector); + vector_ptr = (unsigned long)kvm_get_hyp_vector(); __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); __cpu_init_stage2(); @@ -1384,6 +1384,12 @@ static int init_hyp_mode(void) goto out_err; } + err = kvm_map_vectors(); + if (err) { + kvm_err("Cannot map vectors\n"); + goto out_err; + } + /* * Map the Hyp stack pages */ From 6b47a8256a56c261ab008863a548f245fdcc8b11 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 2 Feb 2018 17:31:39 +0000 Subject: [PATCH 319/770] arm64: entry: Apply BP hardening for high-priority synchronous exceptions Commit 5dfc6ed27710 upstream. Software-step and PC alignment fault exceptions have higher priority than instruction abort exceptions, so apply the BP hardening hooks there too if the user PC appears to reside in kernel space. Reported-by: Dan Hettena Reviewed-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 6 ++++-- arch/arm64/mm/fault.c | 9 +++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a035a981b557..ac7df45b296b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -759,8 +759,10 @@ el0_sp_pc: * Stack or PC alignment exception handling */ mrs x26, far_el1 - // enable interrupts before calling the main handler - enable_dbg_and_irq + enable_dbg +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif ct_user_exit mov x0, x26 mov x1, x25 diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4985bd1175ef..58992467ebb9 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -778,6 +778,12 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, struct siginfo info; struct task_struct *tsk = current; + if (user_mode(regs)) { + if (instruction_pointer(regs) > TASK_SIZE) + arm64_apply_bp_hardening(); + local_irq_enable(); + } + if (show_unhandled_signals && unhandled_signal(tsk, SIGBUS)) pr_info_ratelimited("%s[%d]: %s exception: pc=%p sp=%p\n", tsk->comm, task_pid_nr(tsk), @@ -837,6 +843,9 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, if (interrupts_enabled(regs)) trace_hardirqs_off(); + if (user_mode(regs) && instruction_pointer(regs) > TASK_SIZE) + arm64_apply_bp_hardening(); + if (!inf->fn(addr, esr, regs)) { rv = 1; } else { From 48c3538c35780838da19773b615ede148e6af2b0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 2 Feb 2018 17:31:40 +0000 Subject: [PATCH 320/770] arm64: entry: Apply BP hardening for suspicious interrupts from EL0 Commit 30d88c0e3ace upstream. It is possible to take an IRQ from EL0 following a branch to a kernel address in such a way that the IRQ is prioritised over the instruction abort. Whilst an attacker would need to get the stars to align here, it might be sufficient with enough calibration so perform BP hardening in the rare case that we see a kernel address in the ELR when handling an IRQ from EL0. Reported-by: Dan Hettena Reviewed-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 5 +++++ arch/arm64/mm/fault.c | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ac7df45b296b..93958d1341bb 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -821,6 +821,11 @@ el0_irq_naked: #endif ct_user_exit +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + tbz x22, #55, 1f + bl do_el0_irq_bp_hardening +1: +#endif irq_handler #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 58992467ebb9..5edb706aacb0 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -751,6 +751,12 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, arm64_notify_die("", regs, &info, esr); } +asmlinkage void __exception do_el0_irq_bp_hardening(void) +{ + /* PC has already been checked in entry.S */ + arm64_apply_bp_hardening(); +} + asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, struct pt_regs *regs) From 3317097b2b4affdd0b600ef0eb558a3e82964dc6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Jan 2018 11:19:34 +0000 Subject: [PATCH 321/770] arm64: cputype: Add missing MIDR values for Cortex-A72 and Cortex-A75 Commit a65d219fe5dc upstream. Hook up MIDR values for the Cortex-A72 and Cortex-A75 CPUs, since they will soon need MIDR matches for hardening the branch predictor. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cputype.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 157afb9f7a35..be7bd19c87ec 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -79,8 +79,10 @@ #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 #define ARM_CPU_PART_CORTEX_A57 0xD07 +#define ARM_CPU_PART_CORTEX_A72 0xD08 #define ARM_CPU_PART_CORTEX_A53 0xD03 #define ARM_CPU_PART_CORTEX_A73 0xD09 +#define ARM_CPU_PART_CORTEX_A75 0xD0A #define APM_CPU_PART_POTENZA 0x000 @@ -97,7 +99,9 @@ #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) +#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) +#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) From 48993dfa1af8c719576a18c0e2ca1d611297e34e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Jan 2018 12:46:21 +0000 Subject: [PATCH 322/770] arm64: Implement branch predictor hardening for affected Cortex-A CPUs Commit aa6acde65e03 upstream. Cortex-A57, A72, A73 and A75 are susceptible to branch predictor aliasing and can theoretically be attacked by malicious code. This patch implements a PSCI-based mitigation for these CPUs when available. The call into firmware will invalidate the branch predictor state, preventing any malicious entries from affecting other victim contexts. Co-developed-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/bpi.S | 24 +++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S index 06a931eb2673..dec95bd82e31 100644 --- a/arch/arm64/kernel/bpi.S +++ b/arch/arm64/kernel/bpi.S @@ -53,3 +53,27 @@ ENTRY(__bp_harden_hyp_vecs_start) vectors __kvm_hyp_vector .endr ENTRY(__bp_harden_hyp_vecs_end) +ENTRY(__psci_hyp_bp_inval_start) + sub sp, sp, #(8 * 18) + stp x16, x17, [sp, #(16 * 0)] + stp x14, x15, [sp, #(16 * 1)] + stp x12, x13, [sp, #(16 * 2)] + stp x10, x11, [sp, #(16 * 3)] + stp x8, x9, [sp, #(16 * 4)] + stp x6, x7, [sp, #(16 * 5)] + stp x4, x5, [sp, #(16 * 6)] + stp x2, x3, [sp, #(16 * 7)] + stp x0, x1, [sp, #(16 * 8)] + mov x0, #0x84000000 + smc #0 + ldp x16, x17, [sp, #(16 * 0)] + ldp x14, x15, [sp, #(16 * 1)] + ldp x12, x13, [sp, #(16 * 2)] + ldp x10, x11, [sp, #(16 * 3)] + ldp x8, x9, [sp, #(16 * 4)] + ldp x6, x7, [sp, #(16 * 5)] + ldp x4, x5, [sp, #(16 * 6)] + ldp x2, x3, [sp, #(16 * 7)] + ldp x0, x1, [sp, #(16 * 8)] + add sp, sp, #(8 * 18) +ENTRY(__psci_hyp_bp_inval_end) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 3f2fee9d4590..e09027094f31 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -67,6 +67,8 @@ static int cpu_enable_trap_ctr_access(void *__unused) DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); #ifdef CONFIG_KVM +extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[]; + static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, const char *hyp_vecs_end) { @@ -108,6 +110,9 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, spin_unlock(&bp_lock); } #else +#define __psci_hyp_bp_inval_start NULL +#define __psci_hyp_bp_inval_end NULL + static void __install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, const char *hyp_vecs_end) @@ -132,6 +137,21 @@ static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry, __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end); } + +#include + +static int enable_psci_bp_hardening(void *data) +{ + const struct arm64_cpu_capabilities *entry = data; + + if (psci_ops.get_version) + install_bp_hardening_cb(entry, + (bp_hardening_cb_t)psci_ops.get_version, + __psci_hyp_bp_inval_start, + __psci_hyp_bp_inval_end); + + return 0; +} #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ #define MIDR_RANGE(model, min, max) \ @@ -281,6 +301,28 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_858921, MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), }, +#endif +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + .enable = enable_psci_bp_hardening, + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + .enable = enable_psci_bp_hardening, + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + .enable = enable_psci_bp_hardening, + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + .enable = enable_psci_bp_hardening, + }, #endif { } From 9b26a45c34e40575e53287365be7571b9cda21ab Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Fri, 5 Jan 2018 14:28:59 -0600 Subject: [PATCH 323/770] arm64: Implement branch predictor hardening for Falkor Commit ec82b567a74f upstream. Falkor is susceptible to branch predictor aliasing and can theoretically be attacked by malicious code. This patch implements a mitigation for these attacks, preventing any malicious entries from affecting other victim contexts. Signed-off-by: Shanker Donthineni [will: fix label name when !CONFIG_KVM and remove references to MIDR_FALKOR] Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/kvm_asm.h | 2 ++ arch/arm64/kernel/bpi.S | 8 +++++++ arch/arm64/kernel/cpu_errata.c | 40 ++++++++++++++++++++++++++++++-- arch/arm64/kvm/hyp/entry.S | 12 ++++++++++ arch/arm64/kvm/hyp/switch.c | 8 +++++++ 6 files changed, 70 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 8498eb4ccfc2..2e7b236bc596 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -42,7 +42,8 @@ #define ARM64_HAS_DCPOP 21 #define ARM64_UNMAP_KERNEL_AT_EL0 23 #define ARM64_HARDEN_BRANCH_PREDICTOR 24 +#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25 -#define ARM64_NCAPS 25 +#define ARM64_NCAPS 26 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 26a64d0f9ab9..a7ef5a051911 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -66,6 +66,8 @@ extern u32 __kvm_get_mdcr_el2(void); extern u32 __init_stage2_translation(void); +extern void __qcom_hyp_sanitize_btac_predictors(void); + #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S index dec95bd82e31..76225c2611ea 100644 --- a/arch/arm64/kernel/bpi.S +++ b/arch/arm64/kernel/bpi.S @@ -77,3 +77,11 @@ ENTRY(__psci_hyp_bp_inval_start) ldp x0, x1, [sp, #(16 * 8)] add sp, sp, #(8 * 18) ENTRY(__psci_hyp_bp_inval_end) + +ENTRY(__qcom_hyp_sanitize_link_stack_start) + stp x29, x30, [sp, #-16]! + .rept 16 + bl . + 4 + .endr + ldp x29, x30, [sp], #16 +ENTRY(__qcom_hyp_sanitize_link_stack_end) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e09027094f31..54e41dfe41f6 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -68,6 +68,8 @@ DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); #ifdef CONFIG_KVM extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[]; +extern char __qcom_hyp_sanitize_link_stack_start[]; +extern char __qcom_hyp_sanitize_link_stack_end[]; static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, const char *hyp_vecs_end) @@ -110,8 +112,10 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, spin_unlock(&bp_lock); } #else -#define __psci_hyp_bp_inval_start NULL -#define __psci_hyp_bp_inval_end NULL +#define __psci_hyp_bp_inval_start NULL +#define __psci_hyp_bp_inval_end NULL +#define __qcom_hyp_sanitize_link_stack_start NULL +#define __qcom_hyp_sanitize_link_stack_end NULL static void __install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, @@ -152,6 +156,29 @@ static int enable_psci_bp_hardening(void *data) return 0; } + +static void qcom_link_stack_sanitization(void) +{ + u64 tmp; + + asm volatile("mov %0, x30 \n" + ".rept 16 \n" + "bl . + 4 \n" + ".endr \n" + "mov x30, %0 \n" + : "=&r" (tmp)); +} + +static int qcom_enable_link_stack_sanitization(void *data) +{ + const struct arm64_cpu_capabilities *entry = data; + + install_bp_hardening_cb(entry, qcom_link_stack_sanitization, + __qcom_hyp_sanitize_link_stack_start, + __qcom_hyp_sanitize_link_stack_end); + + return 0; +} #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ #define MIDR_RANGE(model, min, max) \ @@ -323,6 +350,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), .enable = enable_psci_bp_hardening, }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), + .enable = qcom_enable_link_stack_sanitization, + }, + { + .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, + MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), + }, #endif { } diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 12ee62d6d410..9c45c6af1f58 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -196,3 +196,15 @@ alternative_endif eret ENDPROC(__fpsimd_guest_restore) + +ENTRY(__qcom_hyp_sanitize_btac_predictors) + /** + * Call SMC64 with Silicon provider serviceID 23<<8 (0xc2001700) + * 0xC2000000-0xC200FFFF: assigned to SiP Service Calls + * b15-b0: contains SiP functionID + */ + movz x0, #0x1700 + movk x0, #0xc200, lsl #16 + smc #0 + ret +ENDPROC(__qcom_hyp_sanitize_btac_predictors) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 9b2a76d783eb..0d3d3f1e348c 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -364,6 +364,14 @@ again: /* 0 falls through to be handled out of EL2 */ } + if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) { + u32 midr = read_cpuid_id(); + + /* Apply BTAC predictors mitigation to all Falkor chips */ + if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1) + __qcom_hyp_sanitize_btac_predictors(); + } + fp_enabled = __fpsimd_enabled(); __sysreg_save_guest_state(guest_ctxt); From 402aeac58753ad5c3582eb8175a336f8e1fb62f4 Mon Sep 17 00:00:00 2001 From: Jayachandran C Date: Fri, 19 Jan 2018 04:22:47 -0800 Subject: [PATCH 324/770] arm64: Branch predictor hardening for Cavium ThunderX2 Commit f3d795d9b360 upstream. Use PSCI based mitigation for speculative execution attacks targeting the branch predictor. We use the same mechanism as the one used for Cortex-A CPUs, we expect the PSCI version call to have a side effect of clearing the BTBs. Acked-by: Will Deacon Signed-off-by: Jayachandran C Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 54e41dfe41f6..ed6881882231 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -359,6 +359,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + .enable = enable_psci_bp_hardening, + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), + .enable = enable_psci_bp_hardening, + }, #endif { } From 0b3512fa7b0a4f1f187a5e38112c5bebaea87fc1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:07 +0000 Subject: [PATCH 325/770] arm64: KVM: Increment PC after handling an SMC trap Commit f5115e8869e1 upstream. When handling an SMC trap, the "preferred return address" is set to that of the SMC, and not the next PC (which is a departure from the behaviour of an SMC that isn't trapped). Increment PC in the handler, as the guest is otherwise forever stuck... Cc: stable@vger.kernel.org Fixes: acfb3b883f6d ("arm64: KVM: Fix SMCCC handling of unimplemented SMC/HVC calls") Reviewed-by: Christoffer Dall Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/handle_exit.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 380261e258ef..7747f937c250 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -53,7 +53,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { + /* + * "If an SMC instruction executed at Non-secure EL1 is + * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a + * Trap exception, not a Secure Monitor Call exception [...]" + * + * We need to advance the PC after the trap, as it would + * otherwise return to the same address... + */ vcpu_set_reg(vcpu, 0, ~0UL); + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 1; } From 591862b560003518dfc369d44d1e177d96f7104c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:08 +0000 Subject: [PATCH 326/770] arm/arm64: KVM: Consolidate the PSCI include files Commit 1a2fb94e6a77 upstream. As we're about to update the PSCI support, and because I'm lazy, let's move the PSCI include file to include/kvm so that both ARM architectures can find it. Acked-by: Christoffer Dall Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_psci.h | 27 ------------------- arch/arm/kvm/handle_exit.c | 2 +- arch/arm64/kvm/handle_exit.c | 3 ++- .../asm/kvm_psci.h => include/kvm/arm_psci.h | 6 ++--- virt/kvm/arm/arm.c | 2 +- virt/kvm/arm/psci.c | 3 ++- 6 files changed, 9 insertions(+), 34 deletions(-) delete mode 100644 arch/arm/include/asm/kvm_psci.h rename arch/arm64/include/asm/kvm_psci.h => include/kvm/arm_psci.h (89%) diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h deleted file mode 100644 index 6bda945d31fa..000000000000 --- a/arch/arm/include/asm/kvm_psci.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2012 - ARM Ltd - * Author: Marc Zyngier - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef __ARM_KVM_PSCI_H__ -#define __ARM_KVM_PSCI_H__ - -#define KVM_ARM_PSCI_0_1 1 -#define KVM_ARM_PSCI_0_2 2 - -int kvm_psci_version(struct kvm_vcpu *vcpu); -int kvm_psci_call(struct kvm_vcpu *vcpu); - -#endif /* __ARM_KVM_PSCI_H__ */ diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index cf8bf6bf87c4..61fb27cdd9c1 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include "trace.h" diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 7747f937c250..ba2ade75d97d 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -22,12 +22,13 @@ #include #include +#include + #include #include #include #include #include -#include #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/arch/arm64/include/asm/kvm_psci.h b/include/kvm/arm_psci.h similarity index 89% rename from arch/arm64/include/asm/kvm_psci.h rename to include/kvm/arm_psci.h index bc39e557c56c..2042bb909474 100644 --- a/arch/arm64/include/asm/kvm_psci.h +++ b/include/kvm/arm_psci.h @@ -15,8 +15,8 @@ * along with this program. If not, see . */ -#ifndef __ARM64_KVM_PSCI_H__ -#define __ARM64_KVM_PSCI_H__ +#ifndef __KVM_ARM_PSCI_H__ +#define __KVM_ARM_PSCI_H__ #define KVM_ARM_PSCI_0_1 1 #define KVM_ARM_PSCI_0_2 2 @@ -24,4 +24,4 @@ int kvm_psci_version(struct kvm_vcpu *vcpu); int kvm_psci_call(struct kvm_vcpu *vcpu); -#endif /* __ARM64_KVM_PSCI_H__ */ +#endif /* __KVM_ARM_PSCI_H__ */ diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index fc27a1528811..385ba604a9b2 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -29,6 +29,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "trace.h" @@ -44,7 +45,6 @@ #include #include #include -#include #include #ifdef REQUIRES_VIRT diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index f1e363bab5e8..b322e46fd142 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -21,9 +21,10 @@ #include #include -#include #include +#include + #include /* From 4efa1a863a1218d4b884a67178735950aab85c2e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:09 +0000 Subject: [PATCH 327/770] arm/arm64: KVM: Add PSCI_VERSION helper Commit d0a144f12a7c upstream. As we're about to trigger a PSCI version explosion, it doesn't hurt to introduce a PSCI_VERSION helper that is going to be used everywhere. Reviewed-by: Christoffer Dall Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- include/kvm/arm_psci.h | 6 ++++-- include/uapi/linux/psci.h | 3 +++ virt/kvm/arm/psci.c | 4 +--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index 2042bb909474..5659343580a3 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -18,8 +18,10 @@ #ifndef __KVM_ARM_PSCI_H__ #define __KVM_ARM_PSCI_H__ -#define KVM_ARM_PSCI_0_1 1 -#define KVM_ARM_PSCI_0_2 2 +#include + +#define KVM_ARM_PSCI_0_1 PSCI_VERSION(0, 1) +#define KVM_ARM_PSCI_0_2 PSCI_VERSION(0, 2) int kvm_psci_version(struct kvm_vcpu *vcpu); int kvm_psci_call(struct kvm_vcpu *vcpu); diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 760e52a9640f..b3bcabe380da 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -88,6 +88,9 @@ (((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT) #define PSCI_VERSION_MINOR(ver) \ ((ver) & PSCI_VERSION_MINOR_MASK) +#define PSCI_VERSION(maj, min) \ + ((((maj) << PSCI_VERSION_MAJOR_SHIFT) & PSCI_VERSION_MAJOR_MASK) | \ + ((min) & PSCI_VERSION_MINOR_MASK)) /* PSCI features decoding (>=1.0) */ #define PSCI_1_0_FEATURES_CPU_SUSPEND_PF_SHIFT 1 diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index b322e46fd142..999f94d6bb98 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -25,8 +25,6 @@ #include -#include - /* * This is an implementation of the Power State Coordination Interface * as described in ARM document number ARM DEN 0022A. @@ -222,7 +220,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) * Bits[31:16] = Major Version = 0 * Bits[15:0] = Minor Version = 2 */ - val = 2; + val = KVM_ARM_PSCI_0_2; break; case PSCI_0_2_FN_CPU_SUSPEND: case PSCI_0_2_FN64_CPU_SUSPEND: From ce15f32d48840bbaf49bf5b9ba540befb59548cb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:10 +0000 Subject: [PATCH 328/770] arm/arm64: KVM: Add smccc accessors to PSCI code Commit 84684fecd7ea upstream. Instead of open coding the accesses to the various registers, let's add explicit SMCCC accessors. Reviewed-by: Christoffer Dall Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/psci.c | 52 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 999f94d6bb98..c41553d35110 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -32,6 +32,38 @@ #define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) +static u32 smccc_get_function(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 0); +} + +static unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 1); +} + +static unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 2); +} + +static unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu) +{ + return vcpu_get_reg(vcpu, 3); +} + +static void smccc_set_retval(struct kvm_vcpu *vcpu, + unsigned long a0, + unsigned long a1, + unsigned long a2, + unsigned long a3) +{ + vcpu_set_reg(vcpu, 0, a0); + vcpu_set_reg(vcpu, 1, a1); + vcpu_set_reg(vcpu, 2, a2); + vcpu_set_reg(vcpu, 3, a3); +} + static unsigned long psci_affinity_mask(unsigned long affinity_level) { if (affinity_level <= 3) @@ -77,7 +109,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) unsigned long context_id; phys_addr_t target_pc; - cpu_id = vcpu_get_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK; + cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK; if (vcpu_mode_is_32bit(source_vcpu)) cpu_id &= ~((u32) 0); @@ -96,8 +128,8 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) return PSCI_RET_INVALID_PARAMS; } - target_pc = vcpu_get_reg(source_vcpu, 2); - context_id = vcpu_get_reg(source_vcpu, 3); + target_pc = smccc_get_arg2(source_vcpu); + context_id = smccc_get_arg3(source_vcpu); kvm_reset_vcpu(vcpu); @@ -116,7 +148,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) * NOTE: We always update r0 (or x0) because for PSCI v0.1 * the general puspose registers are undefined upon CPU_ON. */ - vcpu_set_reg(vcpu, 0, context_id); + smccc_set_retval(vcpu, context_id, 0, 0, 0); vcpu->arch.power_off = false; smp_mb(); /* Make sure the above is visible */ @@ -136,8 +168,8 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *tmp; - target_affinity = vcpu_get_reg(vcpu, 1); - lowest_affinity_level = vcpu_get_reg(vcpu, 2); + target_affinity = smccc_get_arg1(vcpu); + lowest_affinity_level = smccc_get_arg2(vcpu); /* Determine target affinity mask */ target_affinity_mask = psci_affinity_mask(lowest_affinity_level); @@ -210,7 +242,7 @@ int kvm_psci_version(struct kvm_vcpu *vcpu) static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0); + u32 psci_fn = smccc_get_function(vcpu); unsigned long val; int ret = 1; @@ -277,14 +309,14 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) break; } - vcpu_set_reg(vcpu, 0, val); + smccc_set_retval(vcpu, val, 0, 0, 0); return ret; } static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0); + u32 psci_fn = smccc_get_function(vcpu); unsigned long val; switch (psci_fn) { @@ -302,7 +334,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) break; } - vcpu_set_reg(vcpu, 0, val); + smccc_set_retval(vcpu, val, 0, 0, 0); return 1; } From 4ba100aa94a0f8c44de187c46fee4917ce1e06aa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:11 +0000 Subject: [PATCH 329/770] arm/arm64: KVM: Implement PSCI 1.0 support Commit 58e0b2239a4d upstream. PSCI 1.0 can be trivially implemented by providing the FEATURES call on top of PSCI 0.2 and returning 1.0 as the PSCI version. We happily ignore everything else, as they are either optional or are clarifications that do not require any additional change. PSCI 1.0 is now the default until we decide to add a userspace selection API. Reviewed-by: Christoffer Dall Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- include/kvm/arm_psci.h | 3 +++ virt/kvm/arm/psci.c | 45 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index 5659343580a3..32360432cff5 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -22,6 +22,9 @@ #define KVM_ARM_PSCI_0_1 PSCI_VERSION(0, 1) #define KVM_ARM_PSCI_0_2 PSCI_VERSION(0, 2) +#define KVM_ARM_PSCI_1_0 PSCI_VERSION(1, 0) + +#define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_0 int kvm_psci_version(struct kvm_vcpu *vcpu); int kvm_psci_call(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index c41553d35110..3e7c63e15f04 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -234,7 +234,7 @@ static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) int kvm_psci_version(struct kvm_vcpu *vcpu) { if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) - return KVM_ARM_PSCI_0_2; + return KVM_ARM_PSCI_LATEST; return KVM_ARM_PSCI_0_1; } @@ -313,6 +313,47 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) return ret; } +static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu) +{ + u32 psci_fn = smccc_get_function(vcpu); + u32 feature; + unsigned long val; + int ret = 1; + + switch(psci_fn) { + case PSCI_0_2_FN_PSCI_VERSION: + val = KVM_ARM_PSCI_1_0; + break; + case PSCI_1_0_FN_PSCI_FEATURES: + feature = smccc_get_arg1(vcpu); + switch(feature) { + case PSCI_0_2_FN_PSCI_VERSION: + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: + case PSCI_0_2_FN_CPU_OFF: + case PSCI_0_2_FN_CPU_ON: + case PSCI_0_2_FN64_CPU_ON: + case PSCI_0_2_FN_AFFINITY_INFO: + case PSCI_0_2_FN64_AFFINITY_INFO: + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + case PSCI_0_2_FN_SYSTEM_OFF: + case PSCI_0_2_FN_SYSTEM_RESET: + case PSCI_1_0_FN_PSCI_FEATURES: + val = 0; + break; + default: + val = PSCI_RET_NOT_SUPPORTED; + break; + } + break; + default: + return kvm_psci_0_2_call(vcpu); + } + + smccc_set_retval(vcpu, val, 0, 0, 0); + return ret; +} + static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; @@ -355,6 +396,8 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) int kvm_psci_call(struct kvm_vcpu *vcpu) { switch (kvm_psci_version(vcpu)) { + case KVM_ARM_PSCI_1_0: + return kvm_psci_1_0_call(vcpu); case KVM_ARM_PSCI_0_2: return kvm_psci_0_2_call(vcpu); case KVM_ARM_PSCI_0_1: From 45e2061147c329fc08f81a7e2a551b92bcc2a6a3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:12 +0000 Subject: [PATCH 330/770] arm/arm64: KVM: Advertise SMCCC v1.1 Commit 09e6be12effd upstream. The new SMC Calling Convention (v1.1) allows for a reduced overhead when calling into the firmware, and provides a new feature discovery mechanism. Make it visible to KVM guests. Tested-by: Ard Biesheuvel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/handle_exit.c | 2 +- arch/arm64/kvm/handle_exit.c | 2 +- include/kvm/arm_psci.h | 2 +- include/linux/arm-smccc.h | 13 +++++++++++++ virt/kvm/arm/psci.c | 24 +++++++++++++++++++++++- 5 files changed, 39 insertions(+), 4 deletions(-) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 61fb27cdd9c1..76be5701f9d6 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -36,7 +36,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_vcpu_hvc_get_imm(vcpu)); vcpu->stat.hvc_exit_stat++; - ret = kvm_psci_call(vcpu); + ret = kvm_hvc_call_handler(vcpu); if (ret < 0) { kvm_inject_undefined(vcpu); return 1; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index ba2ade75d97d..ab48c5ed3943 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -43,7 +43,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_vcpu_hvc_get_imm(vcpu)); vcpu->stat.hvc_exit_stat++; - ret = kvm_psci_call(vcpu); + ret = kvm_hvc_call_handler(vcpu); if (ret < 0) { vcpu_set_reg(vcpu, 0, ~0UL); return 1; diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index 32360432cff5..ed1dd8088f1c 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -27,6 +27,6 @@ #define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_0 int kvm_psci_version(struct kvm_vcpu *vcpu); -int kvm_psci_call(struct kvm_vcpu *vcpu); +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); #endif /* __KVM_ARM_PSCI_H__ */ diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 4c5bca38c653..dc68aa5a7261 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -60,6 +60,19 @@ #define ARM_SMCCC_QUIRK_NONE 0 #define ARM_SMCCC_QUIRK_QCOM_A6 1 /* Save/restore register a6 */ +#define ARM_SMCCC_VERSION_1_0 0x10000 +#define ARM_SMCCC_VERSION_1_1 0x10001 + +#define ARM_SMCCC_VERSION_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 0) + +#define ARM_SMCCC_ARCH_FEATURES_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 1) + #ifndef __ASSEMBLY__ #include diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 3e7c63e15f04..46a98fee3ef5 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -15,6 +15,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -339,6 +340,7 @@ static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu) case PSCI_0_2_FN_SYSTEM_OFF: case PSCI_0_2_FN_SYSTEM_RESET: case PSCI_1_0_FN_PSCI_FEATURES: + case ARM_SMCCC_VERSION_FUNC_ID: val = 0; break; default: @@ -393,7 +395,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) * Errors: * -EINVAL: Unrecognized PSCI function */ -int kvm_psci_call(struct kvm_vcpu *vcpu) +static int kvm_psci_call(struct kvm_vcpu *vcpu) { switch (kvm_psci_version(vcpu)) { case KVM_ARM_PSCI_1_0: @@ -406,3 +408,23 @@ int kvm_psci_call(struct kvm_vcpu *vcpu) return -EINVAL; }; } + +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) +{ + u32 func_id = smccc_get_function(vcpu); + u32 val = PSCI_RET_NOT_SUPPORTED; + + switch (func_id) { + case ARM_SMCCC_VERSION_FUNC_ID: + val = ARM_SMCCC_VERSION_1_1; + break; + case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: + /* Nothing supported yet */ + break; + default: + return kvm_psci_call(vcpu); + } + + smccc_set_retval(vcpu, val, 0, 0, 0); + return 1; +} From 98be7165d9f71f5df9802548b7d8ab2cf6ccb211 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Jan 2018 16:38:37 +0000 Subject: [PATCH 331/770] arm64: KVM: Make PSCI_VERSION a fast path Commit 90348689d500 upstream. For those CPUs that require PSCI to perform a BP invalidation, going all the way to the PSCI code for not much is a waste of precious cycles. Let's terminate that call as early as possible. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/hyp/switch.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 0d3d3f1e348c..67f0ee24a406 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -322,6 +323,18 @@ again: if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu)) goto again; + if (exit_code == ARM_EXCEPTION_TRAP && + (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC64 || + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC32) && + vcpu_get_reg(vcpu, 0) == PSCI_0_2_FN_PSCI_VERSION) { + u64 val = PSCI_RET_NOT_SUPPORTED; + if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) + val = 2; + + vcpu_set_reg(vcpu, 0, val); + goto again; + } + if (static_branch_unlikely(&vgic_v2_cpuif_trap) && exit_code == ARM_EXCEPTION_TRAP) { bool valid; From 2cfe8929f6247dbee8def7e94f334909fe5ac084 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:13 +0000 Subject: [PATCH 332/770] arm/arm64: KVM: Turn kvm_psci_version into a static inline Commit a4097b351118 upstream. We're about to need kvm_psci_version in HYP too. So let's turn it into a static inline, and pass the kvm structure as a second parameter (so that HYP can do a kern_hyp_va on it). Tested-by: Ard Biesheuvel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/hyp/switch.c | 18 +++++++++++------- include/kvm/arm_psci.h | 21 ++++++++++++++++++++- virt/kvm/arm/psci.c | 12 ++---------- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 67f0ee24a406..6568656147c0 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -19,6 +19,8 @@ #include #include +#include + #include #include #include @@ -325,14 +327,16 @@ again: if (exit_code == ARM_EXCEPTION_TRAP && (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC64 || - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC32) && - vcpu_get_reg(vcpu, 0) == PSCI_0_2_FN_PSCI_VERSION) { - u64 val = PSCI_RET_NOT_SUPPORTED; - if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) - val = 2; + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC32)) { + u32 val = vcpu_get_reg(vcpu, 0); - vcpu_set_reg(vcpu, 0, val); - goto again; + if (val == PSCI_0_2_FN_PSCI_VERSION) { + val = kvm_psci_version(vcpu, kern_hyp_va(vcpu->kvm)); + if (unlikely(val == KVM_ARM_PSCI_0_1)) + val = PSCI_RET_NOT_SUPPORTED; + vcpu_set_reg(vcpu, 0, val); + goto again; + } } if (static_branch_unlikely(&vgic_v2_cpuif_trap) && diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index ed1dd8088f1c..e518e4e3dfb5 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -18,6 +18,7 @@ #ifndef __KVM_ARM_PSCI_H__ #define __KVM_ARM_PSCI_H__ +#include #include #define KVM_ARM_PSCI_0_1 PSCI_VERSION(0, 1) @@ -26,7 +27,25 @@ #define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_0 -int kvm_psci_version(struct kvm_vcpu *vcpu); +/* + * We need the KVM pointer independently from the vcpu as we can call + * this from HYP, and need to apply kern_hyp_va on it... + */ +static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm) +{ + /* + * Our PSCI implementation stays the same across versions from + * v0.2 onward, only adding the few mandatory functions (such + * as FEATURES with 1.0) that are required by newer + * revisions. It is thus safe to return the latest. + */ + if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) + return KVM_ARM_PSCI_LATEST; + + return KVM_ARM_PSCI_0_1; +} + + int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); #endif /* __KVM_ARM_PSCI_H__ */ diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 46a98fee3ef5..e105c1153794 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -123,7 +123,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) if (!vcpu) return PSCI_RET_INVALID_PARAMS; if (!vcpu->arch.power_off) { - if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) + if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1) return PSCI_RET_ALREADY_ON; else return PSCI_RET_INVALID_PARAMS; @@ -232,14 +232,6 @@ static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); } -int kvm_psci_version(struct kvm_vcpu *vcpu) -{ - if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) - return KVM_ARM_PSCI_LATEST; - - return KVM_ARM_PSCI_0_1; -} - static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; @@ -397,7 +389,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) */ static int kvm_psci_call(struct kvm_vcpu *vcpu) { - switch (kvm_psci_version(vcpu)) { + switch (kvm_psci_version(vcpu, vcpu->kvm)) { case KVM_ARM_PSCI_1_0: return kvm_psci_1_0_call(vcpu); case KVM_ARM_PSCI_0_2: From e47273d086236d06c3c2851fa8599971086b2a73 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:14 +0000 Subject: [PATCH 333/770] arm64: KVM: Report SMCCC_ARCH_WORKAROUND_1 BP hardening support Commit 6167ec5c9145 upstream. A new feature of SMCCC 1.1 is that it offers firmware-based CPU workarounds. In particular, SMCCC_ARCH_WORKAROUND_1 provides BP hardening for CVE-2017-5715. If the host has some mitigation for this issue, report that we deal with it using SMCCC_ARCH_WORKAROUND_1, as we apply the host workaround on every guest exit. Tested-by: Ard Biesheuvel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_host.h | 6 ++++++ arch/arm64/include/asm/kvm_host.h | 5 +++++ include/linux/arm-smccc.h | 5 +++++ virt/kvm/arm/psci.c | 9 ++++++++- 4 files changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 4a879f6ff13b..31fbb9285f62 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -293,4 +293,10 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +static inline bool kvm_arm_harden_branch_predictor(void) +{ + /* No way to detect it yet, pretend it is not there. */ + return false; +} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e923b58606e2..8ad208cb866c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -384,4 +384,9 @@ static inline void __cpu_init_stage2(void) "PARange is %d bits, unsupported configuration!", parange); } +static inline bool kvm_arm_harden_branch_predictor(void) +{ + return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR); +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index dc68aa5a7261..e1ef944ef1da 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -73,6 +73,11 @@ ARM_SMCCC_SMC_32, \ 0, 1) +#define ARM_SMCCC_ARCH_WORKAROUND_1 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 0x8000) + #ifndef __ASSEMBLY__ #include diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index e105c1153794..6919352cbf15 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -405,13 +405,20 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) { u32 func_id = smccc_get_function(vcpu); u32 val = PSCI_RET_NOT_SUPPORTED; + u32 feature; switch (func_id) { case ARM_SMCCC_VERSION_FUNC_ID: val = ARM_SMCCC_VERSION_1_1; break; case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: - /* Nothing supported yet */ + feature = smccc_get_arg1(vcpu); + switch(feature) { + case ARM_SMCCC_ARCH_WORKAROUND_1: + if (kvm_arm_harden_branch_predictor()) + val = 0; + break; + } break; default: return kvm_psci_call(vcpu); From 6db26ad1dc4685843ab3c4d655b51777e04d131e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:15 +0000 Subject: [PATCH 334/770] arm64: KVM: Add SMCCC_ARCH_WORKAROUND_1 fast handling Commit f72af90c3783 upstream. We want SMCCC_ARCH_WORKAROUND_1 to be fast. As fast as possible. So let's intercept it as early as we can by testing for the function call number as soon as we've identified a HVC call coming from the guest. Tested-by: Ard Biesheuvel Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/hyp/hyp-entry.S | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 5170ce1021da..f49b53331d28 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -15,6 +15,7 @@ * along with this program. If not, see . */ +#include #include #include @@ -64,10 +65,11 @@ alternative_endif lsr x0, x1, #ESR_ELx_EC_SHIFT cmp x0, #ESR_ELx_EC_HVC64 + ccmp x0, #ESR_ELx_EC_HVC32, #4, ne b.ne el1_trap - mrs x1, vttbr_el2 // If vttbr is valid, the 64bit guest - cbnz x1, el1_trap // called HVC + mrs x1, vttbr_el2 // If vttbr is valid, the guest + cbnz x1, el1_hvc_guest // called HVC /* Here, we're pretty sure the host called HVC. */ ldp x0, x1, [sp], #16 @@ -100,6 +102,20 @@ alternative_endif eret +el1_hvc_guest: + /* + * Fastest possible path for ARM_SMCCC_ARCH_WORKAROUND_1. + * The workaround has already been applied on the host, + * so let's quickly get back to the guest. We don't bother + * restoring x1, as it can be clobbered anyway. + */ + ldr x1, [sp] // Guest's x0 + eor w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1 + cbnz w1, el1_trap + mov x0, x1 + add sp, sp, #16 + eret + el1_trap: /* * x0: ESR_EC From 906a9f396cc8005807f6741e881da0ad317c4091 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:16 +0000 Subject: [PATCH 335/770] firmware/psci: Expose PSCI conduit Commit 09a8d6d48499 upstream. In order to call into the firmware to apply workarounds, it is useful to find out whether we're using HVC or SMC. Let's expose this through the psci_ops. Acked-by: Lorenzo Pieralisi Reviewed-by: Robin Murphy Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/psci.c | 28 +++++++++++++++++++++++----- include/linux/psci.h | 7 +++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c index 8b25d31e8401..e9493da2b111 100644 --- a/drivers/firmware/psci.c +++ b/drivers/firmware/psci.c @@ -59,7 +59,9 @@ bool psci_tos_resident_on(int cpu) return cpu == resident_cpu; } -struct psci_operations psci_ops; +struct psci_operations psci_ops = { + .conduit = PSCI_CONDUIT_NONE, +}; typedef unsigned long (psci_fn)(unsigned long, unsigned long, unsigned long, unsigned long); @@ -210,6 +212,22 @@ static unsigned long psci_migrate_info_up_cpu(void) 0, 0, 0); } +static void set_conduit(enum psci_conduit conduit) +{ + switch (conduit) { + case PSCI_CONDUIT_HVC: + invoke_psci_fn = __invoke_psci_fn_hvc; + break; + case PSCI_CONDUIT_SMC: + invoke_psci_fn = __invoke_psci_fn_smc; + break; + default: + WARN(1, "Unexpected PSCI conduit %d\n", conduit); + } + + psci_ops.conduit = conduit; +} + static int get_set_conduit_method(struct device_node *np) { const char *method; @@ -222,9 +240,9 @@ static int get_set_conduit_method(struct device_node *np) } if (!strcmp("hvc", method)) { - invoke_psci_fn = __invoke_psci_fn_hvc; + set_conduit(PSCI_CONDUIT_HVC); } else if (!strcmp("smc", method)) { - invoke_psci_fn = __invoke_psci_fn_smc; + set_conduit(PSCI_CONDUIT_SMC); } else { pr_warn("invalid \"method\" property: %s\n", method); return -EINVAL; @@ -654,9 +672,9 @@ int __init psci_acpi_init(void) pr_info("probing for conduit method from ACPI.\n"); if (acpi_psci_use_hvc()) - invoke_psci_fn = __invoke_psci_fn_hvc; + set_conduit(PSCI_CONDUIT_HVC); else - invoke_psci_fn = __invoke_psci_fn_smc; + set_conduit(PSCI_CONDUIT_SMC); return psci_probe(); } diff --git a/include/linux/psci.h b/include/linux/psci.h index 6306ab10af18..66ff54787d3e 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -25,6 +25,12 @@ bool psci_tos_resident_on(int cpu); int psci_cpu_init_idle(unsigned int cpu); int psci_cpu_suspend_enter(unsigned long index); +enum psci_conduit { + PSCI_CONDUIT_NONE, + PSCI_CONDUIT_SMC, + PSCI_CONDUIT_HVC, +}; + struct psci_operations { u32 (*get_version)(void); int (*cpu_suspend)(u32 state, unsigned long entry_point); @@ -34,6 +40,7 @@ struct psci_operations { int (*affinity_info)(unsigned long target_affinity, unsigned long lowest_affinity_level); int (*migrate_info_type)(void); + enum psci_conduit conduit; }; extern struct psci_operations psci_ops; From 908ad7a1484d78228bc88d242121574f86eb35e8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:17 +0000 Subject: [PATCH 336/770] firmware/psci: Expose SMCCC version through psci_ops Commit e78eef554a91 upstream. Since PSCI 1.0 allows the SMCCC version to be (indirectly) probed, let's do that at boot time, and expose the version of the calling convention as part of the psci_ops structure. Acked-by: Lorenzo Pieralisi Reviewed-by: Robin Murphy Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/psci.c | 27 +++++++++++++++++++++++++++ include/linux/psci.h | 6 ++++++ 2 files changed, 33 insertions(+) diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c index e9493da2b111..c80ec1d03274 100644 --- a/drivers/firmware/psci.c +++ b/drivers/firmware/psci.c @@ -61,6 +61,7 @@ bool psci_tos_resident_on(int cpu) struct psci_operations psci_ops = { .conduit = PSCI_CONDUIT_NONE, + .smccc_version = SMCCC_VERSION_1_0, }; typedef unsigned long (psci_fn)(unsigned long, unsigned long, @@ -511,6 +512,31 @@ static void __init psci_init_migrate(void) pr_info("Trusted OS resident on physical CPU 0x%lx\n", cpuid); } +static void __init psci_init_smccc(void) +{ + u32 ver = ARM_SMCCC_VERSION_1_0; + int feature; + + feature = psci_features(ARM_SMCCC_VERSION_FUNC_ID); + + if (feature != PSCI_RET_NOT_SUPPORTED) { + u32 ret; + ret = invoke_psci_fn(ARM_SMCCC_VERSION_FUNC_ID, 0, 0, 0); + if (ret == ARM_SMCCC_VERSION_1_1) { + psci_ops.smccc_version = SMCCC_VERSION_1_1; + ver = ret; + } + } + + /* + * Conveniently, the SMCCC and PSCI versions are encoded the + * same way. No, this isn't accidental. + */ + pr_info("SMC Calling Convention v%d.%d\n", + PSCI_VERSION_MAJOR(ver), PSCI_VERSION_MINOR(ver)); + +} + static void __init psci_0_2_set_functions(void) { pr_info("Using standard PSCI v0.2 function IDs\n"); @@ -559,6 +585,7 @@ static int __init psci_probe(void) psci_init_migrate(); if (PSCI_VERSION_MAJOR(ver) >= 1) { + psci_init_smccc(); psci_init_cpu_suspend(); psci_init_system_suspend(); } diff --git a/include/linux/psci.h b/include/linux/psci.h index 66ff54787d3e..347077cf19c6 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -31,6 +31,11 @@ enum psci_conduit { PSCI_CONDUIT_HVC, }; +enum smccc_version { + SMCCC_VERSION_1_0, + SMCCC_VERSION_1_1, +}; + struct psci_operations { u32 (*get_version)(void); int (*cpu_suspend)(u32 state, unsigned long entry_point); @@ -41,6 +46,7 @@ struct psci_operations { unsigned long lowest_affinity_level); int (*migrate_info_type)(void); enum psci_conduit conduit; + enum smccc_version smccc_version; }; extern struct psci_operations psci_ops; From 37dc3e6c117eced753d6ce6cce85535cec3ad013 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:18 +0000 Subject: [PATCH 337/770] arm/arm64: smccc: Make function identifiers an unsigned quantity Commit ded4c39e93f3 upstream. Function identifiers are a 32bit, unsigned quantity. But we never tell so to the compiler, resulting in the following: 4ac: b26187e0 mov x0, #0xffffffff80000001 We thus rely on the firmware narrowing it for us, which is not always a reasonable expectation. Cc: stable@vger.kernel.org Reported-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Reviewed-by: Robin Murphy Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- include/linux/arm-smccc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index e1ef944ef1da..dd44d8458c04 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -14,14 +14,16 @@ #ifndef __LINUX_ARM_SMCCC_H #define __LINUX_ARM_SMCCC_H +#include + /* * This file provides common defines for ARM SMC Calling Convention as * specified in * http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html */ -#define ARM_SMCCC_STD_CALL 0 -#define ARM_SMCCC_FAST_CALL 1 +#define ARM_SMCCC_STD_CALL _AC(0,U) +#define ARM_SMCCC_FAST_CALL _AC(1,U) #define ARM_SMCCC_TYPE_SHIFT 31 #define ARM_SMCCC_SMC_32 0 From ac63fdb4a2b229bdd7ad8449a88791ad5da5f572 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:19 +0000 Subject: [PATCH 338/770] arm/arm64: smccc: Implement SMCCC v1.1 inline primitive Commit f2d3b2e8759a upstream. One of the major improvement of SMCCC v1.1 is that it only clobbers the first 4 registers, both on 32 and 64bit. This means that it becomes very easy to provide an inline version of the SMC call primitive, and avoid performing a function call to stash the registers that would otherwise be clobbered by SMCCC v1.0. Reviewed-by: Robin Murphy Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- include/linux/arm-smccc.h | 141 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index dd44d8458c04..a031897fca76 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -150,5 +150,146 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define arm_smccc_hvc_quirk(...) __arm_smccc_hvc(__VA_ARGS__) +/* SMCCC v1.1 implementation madness follows */ +#ifdef CONFIG_ARM64 + +#define SMCCC_SMC_INST "smc #0" +#define SMCCC_HVC_INST "hvc #0" + +#elif defined(CONFIG_ARM) +#include +#include + +#define SMCCC_SMC_INST __SMC(0) +#define SMCCC_HVC_INST __HVC(0) + +#endif + +#define ___count_args(_0, _1, _2, _3, _4, _5, _6, _7, _8, x, ...) x + +#define __count_args(...) \ + ___count_args(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0) + +#define __constraint_write_0 \ + "+r" (r0), "=&r" (r1), "=&r" (r2), "=&r" (r3) +#define __constraint_write_1 \ + "+r" (r0), "+r" (r1), "=&r" (r2), "=&r" (r3) +#define __constraint_write_2 \ + "+r" (r0), "+r" (r1), "+r" (r2), "=&r" (r3) +#define __constraint_write_3 \ + "+r" (r0), "+r" (r1), "+r" (r2), "+r" (r3) +#define __constraint_write_4 __constraint_write_3 +#define __constraint_write_5 __constraint_write_4 +#define __constraint_write_6 __constraint_write_5 +#define __constraint_write_7 __constraint_write_6 + +#define __constraint_read_0 +#define __constraint_read_1 +#define __constraint_read_2 +#define __constraint_read_3 +#define __constraint_read_4 "r" (r4) +#define __constraint_read_5 __constraint_read_4, "r" (r5) +#define __constraint_read_6 __constraint_read_5, "r" (r6) +#define __constraint_read_7 __constraint_read_6, "r" (r7) + +#define __declare_arg_0(a0, res) \ + struct arm_smccc_res *___res = res; \ + register u32 r0 asm("r0") = a0; \ + register unsigned long r1 asm("r1"); \ + register unsigned long r2 asm("r2"); \ + register unsigned long r3 asm("r3") + +#define __declare_arg_1(a0, a1, res) \ + struct arm_smccc_res *___res = res; \ + register u32 r0 asm("r0") = a0; \ + register typeof(a1) r1 asm("r1") = a1; \ + register unsigned long r2 asm("r2"); \ + register unsigned long r3 asm("r3") + +#define __declare_arg_2(a0, a1, a2, res) \ + struct arm_smccc_res *___res = res; \ + register u32 r0 asm("r0") = a0; \ + register typeof(a1) r1 asm("r1") = a1; \ + register typeof(a2) r2 asm("r2") = a2; \ + register unsigned long r3 asm("r3") + +#define __declare_arg_3(a0, a1, a2, a3, res) \ + struct arm_smccc_res *___res = res; \ + register u32 r0 asm("r0") = a0; \ + register typeof(a1) r1 asm("r1") = a1; \ + register typeof(a2) r2 asm("r2") = a2; \ + register typeof(a3) r3 asm("r3") = a3 + +#define __declare_arg_4(a0, a1, a2, a3, a4, res) \ + __declare_arg_3(a0, a1, a2, a3, res); \ + register typeof(a4) r4 asm("r4") = a4 + +#define __declare_arg_5(a0, a1, a2, a3, a4, a5, res) \ + __declare_arg_4(a0, a1, a2, a3, a4, res); \ + register typeof(a5) r5 asm("r5") = a5 + +#define __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res) \ + __declare_arg_5(a0, a1, a2, a3, a4, a5, res); \ + register typeof(a6) r6 asm("r6") = a6 + +#define __declare_arg_7(a0, a1, a2, a3, a4, a5, a6, a7, res) \ + __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res); \ + register typeof(a7) r7 asm("r7") = a7 + +#define ___declare_args(count, ...) __declare_arg_ ## count(__VA_ARGS__) +#define __declare_args(count, ...) ___declare_args(count, __VA_ARGS__) + +#define ___constraints(count) \ + : __constraint_write_ ## count \ + : __constraint_read_ ## count \ + : "memory" +#define __constraints(count) ___constraints(count) + +/* + * We have an output list that is not necessarily used, and GCC feels + * entitled to optimise the whole sequence away. "volatile" is what + * makes it stick. + */ +#define __arm_smccc_1_1(inst, ...) \ + do { \ + __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ + asm volatile(inst "\n" \ + __constraints(__count_args(__VA_ARGS__))); \ + if (___res) \ + *___res = (typeof(*___res)){r0, r1, r2, r3}; \ + } while (0) + +/* + * arm_smccc_1_1_smc() - make an SMCCC v1.1 compliant SMC call + * + * This is a variadic macro taking one to eight source arguments, and + * an optional return structure. + * + * @a0-a7: arguments passed in registers 0 to 7 + * @res: result values from registers 0 to 3 + * + * This macro is used to make SMC calls following SMC Calling Convention v1.1. + * The content of the supplied param are copied to registers 0 to 7 prior + * to the SMC instruction. The return values are updated with the content + * from register 0 to 3 on return from the SMC instruction if not NULL. + */ +#define arm_smccc_1_1_smc(...) __arm_smccc_1_1(SMCCC_SMC_INST, __VA_ARGS__) + +/* + * arm_smccc_1_1_hvc() - make an SMCCC v1.1 compliant HVC call + * + * This is a variadic macro taking one to eight source arguments, and + * an optional return structure. + * + * @a0-a7: arguments passed in registers 0 to 7 + * @res: result values from registers 0 to 3 + * + * This macro is used to make HVC calls following SMC Calling Convention v1.1. + * The content of the supplied param are copied to registers 0 to 7 prior + * to the HVC instruction. The return values are updated with the content + * from register 0 to 3 on return from the HVC instruction if not NULL. + */ +#define arm_smccc_1_1_hvc(...) __arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__) + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/ From dbca45b996550f1ab646011f48bede5b9c2e2ea9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:20 +0000 Subject: [PATCH 339/770] arm64: Add ARM_SMCCC_ARCH_WORKAROUND_1 BP hardening support Commit b092201e0020 upstream. Add the detection and runtime code for ARM_SMCCC_ARCH_WORKAROUND_1. It is lovely. Really. Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/bpi.S | 20 ++++++++++ arch/arm64/kernel/cpu_errata.c | 68 +++++++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S index 76225c2611ea..fdeed629f2c6 100644 --- a/arch/arm64/kernel/bpi.S +++ b/arch/arm64/kernel/bpi.S @@ -17,6 +17,7 @@ */ #include +#include .macro ventry target .rept 31 @@ -85,3 +86,22 @@ ENTRY(__qcom_hyp_sanitize_link_stack_start) .endr ldp x29, x30, [sp], #16 ENTRY(__qcom_hyp_sanitize_link_stack_end) + +.macro smccc_workaround_1 inst + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + \inst #0 + ldp x2, x3, [sp, #(8 * 0)] + ldp x0, x1, [sp, #(8 * 2)] + add sp, sp, #(8 * 4) +.endm + +ENTRY(__smccc_workaround_1_smc_start) + smccc_workaround_1 smc +ENTRY(__smccc_workaround_1_smc_end) + +ENTRY(__smccc_workaround_1_hvc_start) + smccc_workaround_1 hvc +ENTRY(__smccc_workaround_1_hvc_end) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index ed6881882231..9e77809a3b23 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -70,6 +70,10 @@ DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[]; extern char __qcom_hyp_sanitize_link_stack_start[]; extern char __qcom_hyp_sanitize_link_stack_end[]; +extern char __smccc_workaround_1_smc_start[]; +extern char __smccc_workaround_1_smc_end[]; +extern char __smccc_workaround_1_hvc_start[]; +extern char __smccc_workaround_1_hvc_end[]; static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, const char *hyp_vecs_end) @@ -116,6 +120,10 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, #define __psci_hyp_bp_inval_end NULL #define __qcom_hyp_sanitize_link_stack_start NULL #define __qcom_hyp_sanitize_link_stack_end NULL +#define __smccc_workaround_1_smc_start NULL +#define __smccc_workaround_1_smc_end NULL +#define __smccc_workaround_1_hvc_start NULL +#define __smccc_workaround_1_hvc_end NULL static void __install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, @@ -142,17 +150,75 @@ static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry, __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end); } +#include +#include #include +static void call_smc_arch_workaround_1(void) +{ + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); +} + +static void call_hvc_arch_workaround_1(void) +{ + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); +} + +static bool check_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) +{ + bp_hardening_cb_t cb; + void *smccc_start, *smccc_end; + struct arm_smccc_res res; + + if (!entry->matches(entry, SCOPE_LOCAL_CPU)) + return false; + + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) + return false; + + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + if (res.a0) + return false; + cb = call_hvc_arch_workaround_1; + smccc_start = __smccc_workaround_1_hvc_start; + smccc_end = __smccc_workaround_1_hvc_end; + break; + + case PSCI_CONDUIT_SMC: + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + if (res.a0) + return false; + cb = call_smc_arch_workaround_1; + smccc_start = __smccc_workaround_1_smc_start; + smccc_end = __smccc_workaround_1_smc_end; + break; + + default: + return false; + } + + install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); + + return true; +} + static int enable_psci_bp_hardening(void *data) { const struct arm64_cpu_capabilities *entry = data; - if (psci_ops.get_version) + if (psci_ops.get_version) { + if (check_smccc_arch_workaround_1(entry)) + return 0; + install_bp_hardening_cb(entry, (bp_hardening_cb_t)psci_ops.get_version, __psci_hyp_bp_inval_start, __psci_hyp_bp_inval_end); + } return 0; } From c584c903bae9a1ec6e881847713df9c1b8b87df0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:21 +0000 Subject: [PATCH 340/770] arm64: Kill PSCI_GET_VERSION as a variant-2 workaround Commit 3a0a397ff5ff upstream. Now that we've standardised on SMCCC v1.1 to perform the branch prediction invalidation, let's drop the previous band-aid. If vendors haven't updated their firmware to do SMCCC 1.1, they haven't updated PSCI either, so we don't loose anything. Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/bpi.S | 24 ------------------ arch/arm64/kernel/cpu_errata.c | 45 ++++++++++------------------------ arch/arm64/kvm/hyp/switch.c | 14 ----------- 3 files changed, 13 insertions(+), 70 deletions(-) diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S index fdeed629f2c6..e5de33513b5d 100644 --- a/arch/arm64/kernel/bpi.S +++ b/arch/arm64/kernel/bpi.S @@ -54,30 +54,6 @@ ENTRY(__bp_harden_hyp_vecs_start) vectors __kvm_hyp_vector .endr ENTRY(__bp_harden_hyp_vecs_end) -ENTRY(__psci_hyp_bp_inval_start) - sub sp, sp, #(8 * 18) - stp x16, x17, [sp, #(16 * 0)] - stp x14, x15, [sp, #(16 * 1)] - stp x12, x13, [sp, #(16 * 2)] - stp x10, x11, [sp, #(16 * 3)] - stp x8, x9, [sp, #(16 * 4)] - stp x6, x7, [sp, #(16 * 5)] - stp x4, x5, [sp, #(16 * 6)] - stp x2, x3, [sp, #(16 * 7)] - stp x0, x1, [sp, #(16 * 8)] - mov x0, #0x84000000 - smc #0 - ldp x16, x17, [sp, #(16 * 0)] - ldp x14, x15, [sp, #(16 * 1)] - ldp x12, x13, [sp, #(16 * 2)] - ldp x10, x11, [sp, #(16 * 3)] - ldp x8, x9, [sp, #(16 * 4)] - ldp x6, x7, [sp, #(16 * 5)] - ldp x4, x5, [sp, #(16 * 6)] - ldp x2, x3, [sp, #(16 * 7)] - ldp x0, x1, [sp, #(16 * 8)] - add sp, sp, #(8 * 18) -ENTRY(__psci_hyp_bp_inval_end) ENTRY(__qcom_hyp_sanitize_link_stack_start) stp x29, x30, [sp, #-16]! diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9e77809a3b23..07823595b7f0 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -67,7 +67,6 @@ static int cpu_enable_trap_ctr_access(void *__unused) DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); #ifdef CONFIG_KVM -extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[]; extern char __qcom_hyp_sanitize_link_stack_start[]; extern char __qcom_hyp_sanitize_link_stack_end[]; extern char __smccc_workaround_1_smc_start[]; @@ -116,8 +115,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, spin_unlock(&bp_lock); } #else -#define __psci_hyp_bp_inval_start NULL -#define __psci_hyp_bp_inval_end NULL #define __qcom_hyp_sanitize_link_stack_start NULL #define __qcom_hyp_sanitize_link_stack_end NULL #define __smccc_workaround_1_smc_start NULL @@ -164,24 +161,25 @@ static void call_hvc_arch_workaround_1(void) arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static bool check_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) +static int enable_smccc_arch_workaround_1(void *data) { + const struct arm64_cpu_capabilities *entry = data; bp_hardening_cb_t cb; void *smccc_start, *smccc_end; struct arm_smccc_res res; if (!entry->matches(entry, SCOPE_LOCAL_CPU)) - return false; + return 0; if (psci_ops.smccc_version == SMCCC_VERSION_1_0) - return false; + return 0; switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if (res.a0) - return false; + return 0; cb = call_hvc_arch_workaround_1; smccc_start = __smccc_workaround_1_hvc_start; smccc_end = __smccc_workaround_1_hvc_end; @@ -191,35 +189,18 @@ static bool check_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *e arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if (res.a0) - return false; + return 0; cb = call_smc_arch_workaround_1; smccc_start = __smccc_workaround_1_smc_start; smccc_end = __smccc_workaround_1_smc_end; break; default: - return false; + return 0; } install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); - return true; -} - -static int enable_psci_bp_hardening(void *data) -{ - const struct arm64_cpu_capabilities *entry = data; - - if (psci_ops.get_version) { - if (check_smccc_arch_workaround_1(entry)) - return 0; - - install_bp_hardening_cb(entry, - (bp_hardening_cb_t)psci_ops.get_version, - __psci_hyp_bp_inval_start, - __psci_hyp_bp_inval_end); - } - return 0; } @@ -399,22 +380,22 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - .enable = enable_psci_bp_hardening, + .enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - .enable = enable_psci_bp_hardening, + .enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), - .enable = enable_psci_bp_hardening, + .enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), - .enable = enable_psci_bp_hardening, + .enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, @@ -428,12 +409,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), - .enable = enable_psci_bp_hardening, + .enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), - .enable = enable_psci_bp_hardening, + .enable = enable_smccc_arch_workaround_1, }, #endif { diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 6568656147c0..79364d3455c0 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -325,20 +325,6 @@ again: if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu)) goto again; - if (exit_code == ARM_EXCEPTION_TRAP && - (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC64 || - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC32)) { - u32 val = vcpu_get_reg(vcpu, 0); - - if (val == PSCI_0_2_FN_PSCI_VERSION) { - val = kvm_psci_version(vcpu, kern_hyp_va(vcpu->kvm)); - if (unlikely(val == KVM_ARM_PSCI_0_1)) - val = PSCI_RET_NOT_SUPPORTED; - vcpu_set_reg(vcpu, 0, val); - goto again; - } - } - if (static_branch_unlikely(&vgic_v2_cpuif_trap) && exit_code == ARM_EXCEPTION_TRAP) { bool valid; From 0763f0418bb53f4724a6d711bf3b735a71ce3d24 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Oct 2017 15:54:10 +0200 Subject: [PATCH 341/770] mtd: cfi: convert inline functions to macros commit 9e343e87d2c4c707ef8fae2844864d4dde3a2d13 upstream. The map_word_() functions, dating back to linux-2.6.8, try to perform bitwise operations on a 'map_word' structure. This may have worked with compilers that were current then (gcc-3.4 or earlier), but end up being rather inefficient on any version I could try now (gcc-4.4 or higher). Specifically we hit a problem analyzed in gcc PR81715 where we fail to reuse the stack space for local variables. This can be seen immediately in the stack consumption for cfi_staa_erase_varsize() and other functions that (with CONFIG_KASAN) can be up to 2200 bytes. Changing the inline functions into macros brings this down to 1280 bytes. Without KASAN, the same problem exists, but the stack consumption is lower to start with, my patch shrinks it from 920 to 496 bytes on with arm-linux-gnueabi-gcc-5.4, and saves around 1KB in .text size for cfi_cmdset_0020.c, as it avoids copying map_word structures for each call to one of these helpers. With the latest gcc-8 snapshot, the problem is fixed in upstream gcc, but nobody uses that yet, so we should still work around it in mainline kernels and probably backport the workaround to stable kernels as well. We had a couple of other functions that suffered from the same gcc bug, and all of those had a simpler workaround involving dummy variables in the inline function. Unfortunately that did not work here, the macro hack was the best I could come up with. It would also be helpful to have someone to a little performance testing on the patch, to see how much it helps in terms of CPU utilitzation. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 Signed-off-by: Arnd Bergmann Acked-by: Richard Weinberger Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- include/linux/mtd/map.h | 120 +++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 64 deletions(-) diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 3aa56e3104bb..b5b43f94f311 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -270,75 +270,67 @@ void map_destroy(struct mtd_info *mtd); #define INVALIDATE_CACHED_RANGE(map, from, size) \ do { if (map->inval_cache) map->inval_cache(map, from, size); } while (0) +#define map_word_equal(map, val1, val2) \ +({ \ + int i, ret = 1; \ + for (i = 0; i < map_words(map); i++) \ + if ((val1).x[i] != (val2).x[i]) { \ + ret = 0; \ + break; \ + } \ + ret; \ +}) -static inline int map_word_equal(struct map_info *map, map_word val1, map_word val2) -{ - int i; +#define map_word_and(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] & (val2).x[i]; \ + r; \ +}) - for (i = 0; i < map_words(map); i++) { - if (val1.x[i] != val2.x[i]) - return 0; - } +#define map_word_clr(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] & ~(val2).x[i]; \ + r; \ +}) - return 1; -} +#define map_word_or(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] | (val2).x[i]; \ + r; \ +}) -static inline map_word map_word_and(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; +#define map_word_andequal(map, val1, val2, val3) \ +({ \ + int i, ret = 1; \ + for (i = 0; i < map_words(map); i++) { \ + if (((val1).x[i] & (val2).x[i]) != (val2).x[i]) { \ + ret = 0; \ + break; \ + } \ + } \ + ret; \ +}) - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] & val2.x[i]; - - return r; -} - -static inline map_word map_word_clr(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; - - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] & ~val2.x[i]; - - return r; -} - -static inline map_word map_word_or(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; - - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] | val2.x[i]; - - return r; -} - -static inline int map_word_andequal(struct map_info *map, map_word val1, map_word val2, map_word val3) -{ - int i; - - for (i = 0; i < map_words(map); i++) { - if ((val1.x[i] & val2.x[i]) != val3.x[i]) - return 0; - } - - return 1; -} - -static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word val2) -{ - int i; - - for (i = 0; i < map_words(map); i++) { - if (val1.x[i] & val2.x[i]) - return 1; - } - - return 0; -} +#define map_word_bitsset(map, val1, val2) \ +({ \ + int i, ret = 0; \ + for (i = 0; i < map_words(map); i++) { \ + if ((val1).x[i] & (val2).x[i]) { \ + ret = 1; \ + break; \ + } \ + } \ + ret; \ +}) static inline map_word map_word_load(struct map_info *map, const void *ptr) { From 4527b0887bc5ae69513215f91233c27808cd1f04 Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Mon, 8 Jan 2018 15:36:48 -0500 Subject: [PATCH 342/770] mtd: nand: brcmnand: Disable prefetch by default commit f953f0f89663c39f08f4baaa8a4a881401b65654 upstream. Brcm nand controller prefetch feature needs to be disabled by default. Enabling affects performance on random reads as well as dma reads. Signed-off-by: Kamal Dasu Fixes: 27c5b17cd1b1 ("mtd: nand: add NAND driver "library" for Broadcom STB NAND controller") Acked-by: Florian Fainelli Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/brcmnand/brcmnand.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c index e0eb51d8c012..edf24c148fa6 100644 --- a/drivers/mtd/nand/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/brcmnand/brcmnand.c @@ -2193,16 +2193,9 @@ static int brcmnand_setup_dev(struct brcmnand_host *host) if (ctrl->nand_version >= 0x0702) tmp |= ACC_CONTROL_RD_ERASED; tmp &= ~ACC_CONTROL_FAST_PGM_RDIN; - if (ctrl->features & BRCMNAND_HAS_PREFETCH) { - /* - * FIXME: Flash DMA + prefetch may see spurious erased-page ECC - * errors - */ - if (has_flash_dma(ctrl)) - tmp &= ~ACC_CONTROL_PREFETCH; - else - tmp |= ACC_CONTROL_PREFETCH; - } + if (ctrl->features & BRCMNAND_HAS_PREFETCH) + tmp &= ~ACC_CONTROL_PREFETCH; + nand_writereg(ctrl, offs, tmp); return 0; From 16d11602c83b45f448355e02dd97a0d855016ef3 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 12 Jan 2018 10:13:36 +0100 Subject: [PATCH 343/770] mtd: nand: Fix nand_do_read_oob() return value commit 87e89ce8d0d14f573c068c61bec2117751fb5103 upstream. Starting from commit 041e4575f034 ("mtd: nand: handle ECC errors in OOB"), nand_do_read_oob() (from the NAND core) did return 0 or a negative error, and the MTD layer expected it. However, the trend for the NAND layer is now to return an error or a positive number of bitflips. Deciding which status to return to the user belongs to the MTD layer. Commit e47f68587b82 ("mtd: check for max_bitflips in mtd_read_oob()") brought this logic to the mtd_read_oob() function while the return value coming from nand_do_read_oob() (called by the ->_read_oob() hook) was left unchanged. Fixes: e47f68587b82 ("mtd: check for max_bitflips in mtd_read_oob()") Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/nand_base.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 3f1d806e590a..a0b0302aea14 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2201,6 +2201,7 @@ EXPORT_SYMBOL(nand_write_oob_syndrome); static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { + unsigned int max_bitflips = 0; int page, realpage, chipnr; struct nand_chip *chip = mtd_to_nand(mtd); struct mtd_ecc_stats stats; @@ -2258,6 +2259,8 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, nand_wait_ready(mtd); } + max_bitflips = max_t(unsigned int, max_bitflips, ret); + readlen -= len; if (!readlen) break; @@ -2283,7 +2286,7 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, if (mtd->ecc_stats.failed - stats.failed) return -EBADMSG; - return mtd->ecc_stats.corrected - stats.corrected ? -EUCLEAN : 0; + return max_bitflips; } /** From 7eb52a6889e90bcb1280cf2d93f8a80602191d5c Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 24 Jan 2018 23:49:31 +0100 Subject: [PATCH 344/770] mtd: nand: sunxi: Fix ECC strength choice commit f4c6cd1a7f2275d5bc0e494b21fff26f8dde80f0 upstream. When the requested ECC strength does not exactly match the strengths supported by the ECC engine, the driver is selecting the closest strength meeting the 'selected_strength > requested_strength' constraint. Fix the fact that, in this particular case, ecc->strength value was not updated to match the 'selected_strength'. For instance, one can encounter this issue when no ECC requirement is filled in the device tree while the NAND chip minimum requirement is not a strength/step_size combo natively supported by the ECC engine. Fixes: 1fef62c1423b ("mtd: nand: add sunxi NAND flash controller support") Suggested-by: Boris Brezillon Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/sunxi_nand.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index 82244be3e766..958974821582 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -1853,8 +1853,14 @@ static int sunxi_nand_hw_common_ecc_ctrl_init(struct mtd_info *mtd, /* Add ECC info retrieval from DT */ for (i = 0; i < ARRAY_SIZE(strengths); i++) { - if (ecc->strength <= strengths[i]) + if (ecc->strength <= strengths[i]) { + /* + * Update ecc->strength value with the actual strength + * that will be used by the ECC engine. + */ + ecc->strength = strengths[i]; break; + } } if (i >= ARRAY_SIZE(strengths)) { From c94c39ac85f11698f3f0babb515f79656df3917e Mon Sep 17 00:00:00 2001 From: Clay McClure Date: Thu, 21 Sep 2017 19:01:34 -0700 Subject: [PATCH 345/770] ubi: Fix race condition between ubi volume creation and udev commit a51a0c8d213594bc094cb8e54aad0cb6d7f7b9a6 upstream. Similar to commit 714fb87e8bc0 ("ubi: Fix race condition between ubi device creation and udev"), we should make the volume active before registering it. Signed-off-by: Clay McClure Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/vmt.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 85237cf661f9..3fd8d7ff7a02 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -270,6 +270,12 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) vol->last_eb_bytes = vol->usable_leb_size; } + /* Make volume "available" before it becomes accessible via sysfs */ + spin_lock(&ubi->volumes_lock); + ubi->volumes[vol_id] = vol; + ubi->vol_count += 1; + spin_unlock(&ubi->volumes_lock); + /* Register character device for the volume */ cdev_init(&vol->cdev, &ubi_vol_cdev_operations); vol->cdev.owner = THIS_MODULE; @@ -298,11 +304,6 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) if (err) goto out_sysfs; - spin_lock(&ubi->volumes_lock); - ubi->volumes[vol_id] = vol; - ubi->vol_count += 1; - spin_unlock(&ubi->volumes_lock); - ubi_volume_notify(ubi, vol, UBI_VOLUME_ADDED); self_check_volumes(ubi); return err; @@ -315,6 +316,10 @@ out_sysfs: */ cdev_device_del(&vol->cdev, &vol->dev); out_mapping: + spin_lock(&ubi->volumes_lock); + ubi->volumes[vol_id] = NULL; + ubi->vol_count -= 1; + spin_unlock(&ubi->volumes_lock); ubi_eba_destroy_table(eba_tbl); out_acc: spin_lock(&ubi->volumes_lock); From 5793f39de7f64938c23cc062b1e170505789b0b0 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 5 Dec 2017 16:01:20 +0100 Subject: [PATCH 346/770] ubi: fastmap: Erase outdated anchor PEBs during attach commit f78e5623f45bab2b726eec29dc5cefbbab2d0b1c upstream. The fastmap update code might erase the current fastmap anchor PEB in case it doesn't find any new free PEB. When a power cut happens in this situation we must not have any outdated fastmap anchor PEB on the device, because that would be used to attach during next boot. The easiest way to make that sure is to erase all outdated fastmap anchor PEBs synchronously during attach. Signed-off-by: Sascha Hauer Reviewed-by: Richard Weinberger Fixes: dbb7d2a88d2a ("UBI: Add fastmap core") Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/wl.c | 77 ++++++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index b5b8cd6f481c..668b46202507 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1528,6 +1528,46 @@ static void shutdown_work(struct ubi_device *ubi) } } +/** + * erase_aeb - erase a PEB given in UBI attach info PEB + * @ubi: UBI device description object + * @aeb: UBI attach info PEB + * @sync: If true, erase synchronously. Otherwise schedule for erasure + */ +static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync) +{ + struct ubi_wl_entry *e; + int err; + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->pnum = aeb->pnum; + e->ec = aeb->ec; + ubi->lookuptbl[e->pnum] = e; + + if (sync) { + err = sync_erase(ubi, e, false); + if (err) + goto out_free; + + wl_tree_add(e, &ubi->free); + ubi->free_count++; + } else { + err = schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false); + if (err) + goto out_free; + } + + return 0; + +out_free: + wl_entry_destroy(ubi, e); + + return err; +} + /** * ubi_wl_init - initialize the WL sub-system using attaching information. * @ubi: UBI device description object @@ -1566,18 +1606,10 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) { cond_resched(); - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) + err = erase_aeb(ubi, aeb, false); + if (err) goto out_free; - e->pnum = aeb->pnum; - e->ec = aeb->ec; - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) { - wl_entry_destroy(ubi, e); - goto out_free; - } - found_pebs++; } @@ -1635,6 +1667,8 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) ubi_assert(!ubi->lookuptbl[e->pnum]); ubi->lookuptbl[e->pnum] = e; } else { + bool sync = false; + /* * Usually old Fastmap PEBs are scheduled for erasure * and we don't have to care about them but if we face @@ -1644,18 +1678,21 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) if (ubi->lookuptbl[aeb->pnum]) continue; - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) - goto out_free; + /* + * The fastmap update code might not find a free PEB for + * writing the fastmap anchor to and then reuses the + * current fastmap anchor PEB. When this PEB gets erased + * and a power cut happens before it is written again we + * must make sure that the fastmap attach code doesn't + * find any outdated fastmap anchors, hence we erase the + * outdated fastmap anchor PEBs synchronously here. + */ + if (aeb->vol_id == UBI_FM_SB_VOLUME_ID) + sync = true; - e->pnum = aeb->pnum; - e->ec = aeb->ec; - ubi_assert(!ubi->lookuptbl[e->pnum]); - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) { - wl_entry_destroy(ubi, e); + err = erase_aeb(ubi, aeb, sync); + if (err) goto out_free; - } } found_pebs++; From e9a35f8b73f10c605fba4548957c8317228848c7 Mon Sep 17 00:00:00 2001 From: Bradley Bolen Date: Thu, 18 Jan 2018 08:55:20 -0500 Subject: [PATCH 347/770] ubi: block: Fix locking for idr_alloc/idr_remove commit 7f29ae9f977bcdc3654e68bc36d170223c52fd48 upstream. This fixes a race with idr_alloc where gd->first_minor can be set to the same value for two simultaneous calls to ubiblock_create. Each instance calls device_add_disk with the same first_minor. device_add_disk calls bdi_register_owner which generates several warnings. WARNING: CPU: 1 PID: 179 at kernel-source/fs/sysfs/dir.c:31 sysfs_warn_dup+0x68/0x88 sysfs: cannot create duplicate filename '/devices/virtual/bdi/252:2' WARNING: CPU: 1 PID: 179 at kernel-source/lib/kobject.c:240 kobject_add_internal+0x1ec/0x2f8 kobject_add_internal failed for 252:2 with -EEXIST, don't try to register things with the same name in the same directory WARNING: CPU: 1 PID: 179 at kernel-source/fs/sysfs/dir.c:31 sysfs_warn_dup+0x68/0x88 sysfs: cannot create duplicate filename '/dev/block/252:2' However, device_add_disk does not error out when bdi_register_owner returns an error. Control continues until reaching blk_register_queue. It then BUGs. kernel BUG at kernel-source/fs/sysfs/group.c:113! [] (internal_create_group) from [] (sysfs_create_group+0x20/0x24) [] (sysfs_create_group) from [] (blk_trace_init_sysfs+0x18/0x20) [] (blk_trace_init_sysfs) from [] (blk_register_queue+0xd8/0x154) [] (blk_register_queue) from [] (device_add_disk+0x194/0x44c) [] (device_add_disk) from [] (ubiblock_create+0x284/0x2e0) [] (ubiblock_create) from [] (vol_cdev_ioctl+0x450/0x554) [] (vol_cdev_ioctl) from [] (vfs_ioctl+0x30/0x44) [] (vfs_ioctl) from [] (do_vfs_ioctl+0xa0/0x790) [] (do_vfs_ioctl) from [] (SyS_ioctl+0x44/0x68) [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x34) Locking idr_alloc/idr_remove removes the race and keeps gd->first_minor unique. Fixes: 2bf50d42f3a4 ("UBI: block: Dynamically allocate minor numbers") Signed-off-by: Bradley Bolen Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/block.c | 42 +++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index b210fdb31c98..b1fc28f63882 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -99,6 +99,8 @@ struct ubiblock { /* Linked list of all ubiblock instances */ static LIST_HEAD(ubiblock_devices); +static DEFINE_IDR(ubiblock_minor_idr); +/* Protects ubiblock_devices and ubiblock_minor_idr */ static DEFINE_MUTEX(devices_mutex); static int ubiblock_major; @@ -351,8 +353,6 @@ static const struct blk_mq_ops ubiblock_mq_ops = { .init_request = ubiblock_init_request, }; -static DEFINE_IDR(ubiblock_minor_idr); - int ubiblock_create(struct ubi_volume_info *vi) { struct ubiblock *dev; @@ -365,14 +365,15 @@ int ubiblock_create(struct ubi_volume_info *vi) /* Check that the volume isn't already handled */ mutex_lock(&devices_mutex); if (find_dev_nolock(vi->ubi_num, vi->vol_id)) { - mutex_unlock(&devices_mutex); - return -EEXIST; + ret = -EEXIST; + goto out_unlock; } - mutex_unlock(&devices_mutex); dev = kzalloc(sizeof(struct ubiblock), GFP_KERNEL); - if (!dev) - return -ENOMEM; + if (!dev) { + ret = -ENOMEM; + goto out_unlock; + } mutex_init(&dev->dev_mutex); @@ -437,14 +438,13 @@ int ubiblock_create(struct ubi_volume_info *vi) goto out_free_queue; } - mutex_lock(&devices_mutex); list_add_tail(&dev->list, &ubiblock_devices); - mutex_unlock(&devices_mutex); /* Must be the last step: anyone can call file ops from now on */ add_disk(dev->gd); dev_info(disk_to_dev(dev->gd), "created from ubi%d:%d(%s)", dev->ubi_num, dev->vol_id, vi->name); + mutex_unlock(&devices_mutex); return 0; out_free_queue: @@ -457,6 +457,8 @@ out_put_disk: put_disk(dev->gd); out_free_dev: kfree(dev); +out_unlock: + mutex_unlock(&devices_mutex); return ret; } @@ -478,30 +480,36 @@ static void ubiblock_cleanup(struct ubiblock *dev) int ubiblock_remove(struct ubi_volume_info *vi) { struct ubiblock *dev; + int ret; mutex_lock(&devices_mutex); dev = find_dev_nolock(vi->ubi_num, vi->vol_id); if (!dev) { - mutex_unlock(&devices_mutex); - return -ENODEV; + ret = -ENODEV; + goto out_unlock; } /* Found a device, let's lock it so we can check if it's busy */ mutex_lock(&dev->dev_mutex); if (dev->refcnt > 0) { - mutex_unlock(&dev->dev_mutex); - mutex_unlock(&devices_mutex); - return -EBUSY; + ret = -EBUSY; + goto out_unlock_dev; } /* Remove from device list */ list_del(&dev->list); - mutex_unlock(&devices_mutex); - ubiblock_cleanup(dev); mutex_unlock(&dev->dev_mutex); + mutex_unlock(&devices_mutex); + kfree(dev); return 0; + +out_unlock_dev: + mutex_unlock(&dev->dev_mutex); +out_unlock: + mutex_unlock(&devices_mutex); + return ret; } static int ubiblock_resize(struct ubi_volume_info *vi) @@ -630,6 +638,7 @@ static void ubiblock_remove_all(void) struct ubiblock *next; struct ubiblock *dev; + mutex_lock(&devices_mutex); list_for_each_entry_safe(dev, next, &ubiblock_devices, list) { /* The module is being forcefully removed */ WARN_ON(dev->desc); @@ -638,6 +647,7 @@ static void ubiblock_remove_all(void) ubiblock_cleanup(dev); kfree(dev); } + mutex_unlock(&devices_mutex); } int __init ubiblock_init(void) From 068e53f9b1921bd6a3ed2340dc5d85792bdde089 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 11 Jan 2018 23:27:00 -0500 Subject: [PATCH 348/770] ubifs: free the encrypted symlink target commit 6b46d444146eb8d0b99562795cea8086639d7282 upstream. ubifs_symlink() forgot to free the kmalloc()'ed buffer holding the encrypted symlink target, creating a memory leak. Fix it. (UBIFS could actually encrypt directly into ui->data, removing the temporary buffer, but that is left for the patch that switches to use the symlink helper functions.) Fixes: ca7f85be8d6c ("ubifs: Add support for encrypted symlinks") Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/dir.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 417fe0b29f23..ef820f803176 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1216,10 +1216,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, ostr.len = disk_link.len; err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr); - if (err) { - kfree(sd); + if (err) goto out_inode; - } sd->len = cpu_to_le16(ostr.len); disk_link.name = (char *)sd; @@ -1251,11 +1249,10 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, goto out_cancel; mutex_unlock(&dir_ui->ui_mutex); - ubifs_release_budget(c, &req); insert_inode_hash(inode); d_instantiate(dentry, inode); - fscrypt_free_filename(&nm); - return 0; + err = 0; + goto out_fname; out_cancel: dir->i_size -= sz_change; @@ -1268,6 +1265,7 @@ out_fname: fscrypt_free_filename(&nm); out_budg: ubifs_release_budget(c, &req); + kfree(sd); return err; } From ace34428faa6be2098c9ac295394c11e65db2263 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 15 Dec 2017 16:12:32 -0500 Subject: [PATCH 349/770] nfs/pnfs: fix nfs_direct_req ref leak when i/o falls back to the mds commit ba4a76f703ab7eb72941fdaac848502073d6e9ee upstream. Currently when falling back to doing I/O through the MDS (via pnfs_{read|write}_through_mds), the client frees the nfs_pgio_header without releasing the reference taken on the dreq via pnfs_generic_pg_{read|write}pages -> nfs_pgheader_init -> nfs_direct_pgio_init. It then takes another reference on the dreq via nfs_generic_pg_pgios -> nfs_pgheader_init -> nfs_direct_pgio_init and as a result the requester will become stuck in inode_dio_wait. Once that happens, other processes accessing the inode will become stuck as well. Ensure that pnfs_read_through_mds() and pnfs_write_through_mds() clean up correctly by calling hdr->completion_ops->completion() instead of calling hdr->release() directly. This can be reproduced (sometimes) by performing "storage failover takeover" commands on NetApp filer while doing direct I/O from a client. This can also be reproduced using SystemTap to simulate a failure while doing direct I/O from a client (from Dave Wysochanski ): stap -v -g -e 'probe module("nfs_layout_nfsv41_files").function("nfs4_fl_prepare_ds").return { $return=NULL; exit(); }' Suggested-by: Trond Myklebust Signed-off-by: Scott Mayhew Fixes: 1ca018d28d ("pNFS: Fix a memory leak when attempted pnfs fails") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pnfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3bcd669a3152..5f2f852ef506 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2237,7 +2237,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_write_mds(desc); mirror->pg_recoalesce = 1; } - hdr->release(hdr); + hdr->completion_ops->completion(hdr); } static enum pnfs_try_status @@ -2360,7 +2360,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_read_mds(desc); mirror->pg_recoalesce = 1; } - hdr->release(hdr); + hdr->completion_ops->completion(hdr); } /* From 3ac2d17a6ed38d3e662e56ad79e0ba1a98247830 Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Tue, 16 Jan 2018 22:38:50 +0100 Subject: [PATCH 350/770] nfs41: do not return ENOMEM on LAYOUTUNAVAILABLE commit 7ff4cff637aa0bd2abbd81f53b2a6206c50afd95 upstream. A pNFS server may return LAYOUTUNAVAILABLE error on LAYOUTGET for files which don't have any layout. In this situation pnfs_update_layout currently returns NULL. As this NULL is converted into ENOMEM, IO requests fails instead of falling back to MDS. Do not return ENOMEM on LAYOUTUNAVAILABLE and let client retry through MDS. Fixes 8d40b0f14846f. I will suggest to backport this fix to affected stable branches. Signed-off-by: Tigran Mkrtchyan [trondmy: Use IS_ERR_OR_NULL()] Fixes: 8d40b0f14846 ("NFS filelayout:call GETDEVICEINFO after...") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/filelayout/filelayout.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 508126eb49f9..3db2b7464748 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -895,9 +895,7 @@ fl_pnfs_update_layout(struct inode *ino, lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode, gfp_flags); - if (!lseg) - lseg = ERR_PTR(-ENOMEM); - if (IS_ERR(lseg)) + if (IS_ERR_OR_NULL(lseg)) goto out; lo = NFS_I(ino)->layout; From 3d03af006438b91dab7c1feb474a5c899554acac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 Dec 2017 14:39:13 -0500 Subject: [PATCH 351/770] NFS: Add a cond_resched() to nfs_commit_release_pages() commit 7f1bda447c9bd48b415acedba6b830f61591601f upstream. The commit list can get very large, and so we need a cond_resched() in nfs_commit_release_pages() in order to ensure we don't hog the CPU for excessive periods of time. Reported-by: Mike Galbraith Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/write.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index de325804941d..76da415be39a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1836,6 +1836,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); next: nfs_unlock_and_release_request(req); + /* Latency breaker */ + cond_resched(); } nfss = NFS_SERVER(data->inode); if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) From 6421f29eb8af8992fd8a85823b435d44bb055b1c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 6 Jan 2018 09:53:49 -0500 Subject: [PATCH 352/770] NFS: Fix nfsstat breakage due to LOOKUPP commit 8634ef5e05311f32d7f2aee06f6b27a8834a3bd6 upstream. The LOOKUPP operation was inserted into the nfs4_procedures array rather than being appended, which put /proc/net/rpc/nfs out of whack, and broke the nfsstat utility. Fix by moving the LOOKUPP operation to the end of the array, and by ensuring that it keeps the same length whether or not NFSV4.1 and NFSv4.2 are compiled in. Fixes: 5b5faaf6df734 ("nfs4: add NFSv4 LOOKUPP handlers") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4xdr.c | 64 ++++++++++++++++++++++++++------------------ include/linux/nfs4.h | 12 ++++++--- 2 files changed, 46 insertions(+), 30 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 14ed9791ec9c..549c916d2859 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7668,6 +7668,22 @@ nfs4_stat_to_errno(int stat) .p_name = #proc, \ } +#if defined(CONFIG_NFS_V4_1) +#define PROC41(proc, argtype, restype) \ + PROC(proc, argtype, restype) +#else +#define PROC41(proc, argtype, restype) \ + STUB(proc) +#endif + +#if defined(CONFIG_NFS_V4_2) +#define PROC42(proc, argtype, restype) \ + PROC(proc, argtype, restype) +#else +#define PROC42(proc, argtype, restype) \ + STUB(proc) +#endif + const struct rpc_procinfo nfs4_procedures[] = { PROC(READ, enc_read, dec_read), PROC(WRITE, enc_write, dec_write), @@ -7688,7 +7704,6 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC(ACCESS, enc_access, dec_access), PROC(GETATTR, enc_getattr, dec_getattr), PROC(LOOKUP, enc_lookup, dec_lookup), - PROC(LOOKUPP, enc_lookupp, dec_lookupp), PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root), PROC(REMOVE, enc_remove, dec_remove), PROC(RENAME, enc_rename, dec_rename), @@ -7707,33 +7722,30 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), PROC(SECINFO, enc_secinfo, dec_secinfo), PROC(FSID_PRESENT, enc_fsid_present, dec_fsid_present), -#if defined(CONFIG_NFS_V4_1) - PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), - PROC(CREATE_SESSION, enc_create_session, dec_create_session), - PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), - PROC(SEQUENCE, enc_sequence, dec_sequence), - PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), - PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), - PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), - PROC(LAYOUTGET, enc_layoutget, dec_layoutget), - PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), - PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), - PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), - PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), - PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), + PROC41(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), + PROC41(CREATE_SESSION, enc_create_session, dec_create_session), + PROC41(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), + PROC41(SEQUENCE, enc_sequence, dec_sequence), + PROC41(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), + PROC41(RECLAIM_COMPLETE,enc_reclaim_complete, dec_reclaim_complete), + PROC41(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), + PROC41(LAYOUTGET, enc_layoutget, dec_layoutget), + PROC41(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), + PROC41(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), + PROC41(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), + PROC41(TEST_STATEID, enc_test_stateid, dec_test_stateid), + PROC41(FREE_STATEID, enc_free_stateid, dec_free_stateid), STUB(GETDEVICELIST), - PROC(BIND_CONN_TO_SESSION, + PROC41(BIND_CONN_TO_SESSION, enc_bind_conn_to_session, dec_bind_conn_to_session), - PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), -#endif /* CONFIG_NFS_V4_1 */ -#ifdef CONFIG_NFS_V4_2 - PROC(SEEK, enc_seek, dec_seek), - PROC(ALLOCATE, enc_allocate, dec_allocate), - PROC(DEALLOCATE, enc_deallocate, dec_deallocate), - PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), - PROC(CLONE, enc_clone, dec_clone), - PROC(COPY, enc_copy, dec_copy), -#endif /* CONFIG_NFS_V4_2 */ + PROC41(DESTROY_CLIENTID,enc_destroy_clientid, dec_destroy_clientid), + PROC42(SEEK, enc_seek, dec_seek), + PROC42(ALLOCATE, enc_allocate, dec_allocate), + PROC42(DEALLOCATE, enc_deallocate, dec_deallocate), + PROC42(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), + PROC42(CLONE, enc_clone, dec_clone), + PROC42(COPY, enc_copy, dec_copy), + PROC(LOOKUPP, enc_lookupp, dec_lookupp), }; static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)]; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 47adac640191..57ffaa20d564 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -457,7 +457,12 @@ enum lock_type4 { #define NFS4_DEBUG 1 -/* Index of predefined Linux client operations */ +/* + * Index of predefined Linux client operations + * + * To ensure that /proc/net/rpc/nfs remains correctly ordered, please + * append only to this enum when adding new client operations. + */ enum { NFSPROC4_CLNT_NULL = 0, /* Unused */ @@ -480,7 +485,6 @@ enum { NFSPROC4_CLNT_ACCESS, NFSPROC4_CLNT_GETATTR, NFSPROC4_CLNT_LOOKUP, - NFSPROC4_CLNT_LOOKUPP, NFSPROC4_CLNT_LOOKUP_ROOT, NFSPROC4_CLNT_REMOVE, NFSPROC4_CLNT_RENAME, @@ -500,7 +504,6 @@ enum { NFSPROC4_CLNT_SECINFO, NFSPROC4_CLNT_FSID_PRESENT, - /* nfs41 */ NFSPROC4_CLNT_EXCHANGE_ID, NFSPROC4_CLNT_CREATE_SESSION, NFSPROC4_CLNT_DESTROY_SESSION, @@ -518,13 +521,14 @@ enum { NFSPROC4_CLNT_BIND_CONN_TO_SESSION, NFSPROC4_CLNT_DESTROY_CLIENTID, - /* nfs42 */ NFSPROC4_CLNT_SEEK, NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_DEALLOCATE, NFSPROC4_CLNT_LAYOUTSTATS, NFSPROC4_CLNT_CLONE, NFSPROC4_CLNT_COPY, + + NFSPROC4_CLNT_LOOKUPP, }; /* nfs41 types */ From 60017643c21060b600faeae9a58e450804ea288d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 16 Jan 2018 10:08:00 -0500 Subject: [PATCH 353/770] NFS: commit direct writes even if they fail partially commit 1b8d97b0a837beaf48a8449955b52c650a7114b4 upstream. If some of the WRITE calls making up an O_DIRECT write syscall fail, we neglect to commit, even if some of the WRITEs succeed. We also depend on the commit code to free the reference count on the nfs_page taken in the "if (request_commit)" case at the end of nfs_direct_write_completion(). The problem was originally noticed because ENOSPC's encountered partway through a write would result in a closed file being sillyrenamed when it should have been unlinked. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/direct.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index d2972d537469..8c10b0562e75 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -775,10 +775,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) spin_lock(&dreq->lock); - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { - dreq->flags = 0; + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) dreq->error = hdr->error; - } if (dreq->error == 0) { nfs_direct_good_bytes(dreq, hdr); if (nfs_write_need_commit(hdr)) { From 1c38ad2f26d8511cde75ce5767313e8c3e789d69 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 19 Jan 2018 15:15:34 -0800 Subject: [PATCH 354/770] NFS: reject request for id_legacy key without auxdata commit 49686cbbb3ebafe42e63868222f269d8053ead00 upstream. nfs_idmap_legacy_upcall() is supposed to be called with 'aux' pointing to a 'struct idmap', via the call to request_key_with_auxdata() in nfs_idmap_request_key(). However it can also be reached via the request_key() system call in which case 'aux' will be NULL, causing a NULL pointer dereference in nfs_idmap_prepare_pipe_upcall(), assuming that the key description is valid enough to get that far. Fix this by making nfs_idmap_legacy_upcall() negate the key if no auxdata is provided. As usual, this bug was found by syzkaller. A simple reproducer using the command-line keyctl program is: keyctl request2 id_legacy uid:0 '' @s Fixes: 57e62324e469 ("NFS: Store the legacy idmapper result in the keyring") Reported-by: syzbot+5dfdbcf7b3eb5912abbb@syzkaller.appspotmail.com Signed-off-by: Eric Biggers Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4idmap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 30426c1a1bbd..22dc30a679a0 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -568,9 +568,13 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, struct idmap_msg *im; struct idmap *idmap = (struct idmap *)aux; struct key *key = cons->key; - int ret = -ENOMEM; + int ret = -ENOKEY; + + if (!aux) + goto out1; /* msg and im are freed in idmap_pipe_destroy_msg */ + ret = -ENOMEM; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) goto out1; From f1e31607e73d5f4546ff8d1e40e1153230392188 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 28 Jan 2018 09:29:41 -0500 Subject: [PATCH 355/770] NFS: Fix a race between mmap() and O_DIRECT commit e231c6879cfd44e4fffd384bb6dd7d313249a523 upstream. When locking the file in order to do O_DIRECT on it, we must unmap any mmapped ranges on the pagecache so that we can flush out the dirty data. Fixes: a5864c999de67 ("NFS: Do not serialise O_DIRECT reads and writes") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/io.c b/fs/nfs/io.c index 20fef85d2bb1..9034b4926909 100644 --- a/fs/nfs/io.c +++ b/fs/nfs/io.c @@ -99,7 +99,7 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode) { if (!test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { set_bit(NFS_INO_ODIRECT, &nfsi->flags); - nfs_wb_all(inode); + nfs_sync_mapping(inode->i_mapping); } } From 650d3d8512c6c542d7c11ece049b46d5f2337ce8 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 19 Jan 2018 09:18:54 +0100 Subject: [PATCH 356/770] kernfs: fix regression in kernfs_fop_write caused by wrong type commit ba87977a49913129962af8ac35b0e13e0fa4382d upstream. Commit b7ce40cff0b9 ("kernfs: cache atomic_write_len in kernfs_open_file") changes type of local variable 'len' from ssize_t to size_t. This change caused that the *ppos value is updated also when the previous write callback failed. Mentioned snippet: ... len = ops->write(...); <- return value can be negative ... if (len > 0) <- true here in this case *ppos += len; ... Fixes: b7ce40cff0b9 ("kernfs: cache atomic_write_len in kernfs_open_file") Acked-by: Tejun Heo Signed-off-by: Ivan Vecera Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 9698e51656b1..d8f49c412f50 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -275,7 +275,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, { struct kernfs_open_file *of = kernfs_of(file); const struct kernfs_ops *ops; - size_t len; + ssize_t len; char *buf; if (of->atomic_write_len) { From e985f7c8ac3801ce6b684d1b248f63701190154b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 6 Dec 2017 16:41:08 +0100 Subject: [PATCH 357/770] ahci: Annotate PCI ids for mobile Intel chipsets as such commit ca1b4974bd237f2373b0e980b11957aac3499b56 upstream. Intel uses different SATA PCI ids for the Desktop and Mobile SKUs of their chipsets. For older models the comment describing which chipset the PCI id is for, aksi indicates when we're dealing with a mobile SKU. Extend the comments for recent chipsets to also indicate mobile SKUs. The information this commit adds comes from Intel's chipset datasheets. This commit is a preparation patch for allowing a different default sata link powermanagement policy for mobile chipsets. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 9f78bb03bb76..3095bd291cfa 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -267,9 +267,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x3b23), board_ahci }, /* PCH AHCI */ { PCI_VDEVICE(INTEL, 0x3b24), board_ahci }, /* PCH RAID */ { PCI_VDEVICE(INTEL, 0x3b25), board_ahci }, /* PCH RAID */ - { PCI_VDEVICE(INTEL, 0x3b29), board_ahci }, /* PCH AHCI */ + { PCI_VDEVICE(INTEL, 0x3b29), board_ahci }, /* PCH M AHCI */ { PCI_VDEVICE(INTEL, 0x3b2b), board_ahci }, /* PCH RAID */ - { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH RAID */ + { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH M RAID */ { PCI_VDEVICE(INTEL, 0x3b2f), board_ahci }, /* PCH AHCI */ { PCI_VDEVICE(INTEL, 0x19b0), board_ahci }, /* DNV AHCI */ { PCI_VDEVICE(INTEL, 0x19b1), board_ahci }, /* DNV AHCI */ @@ -292,9 +292,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x19cE), board_ahci }, /* DNV AHCI */ { PCI_VDEVICE(INTEL, 0x19cF), board_ahci }, /* DNV AHCI */ { PCI_VDEVICE(INTEL, 0x1c02), board_ahci }, /* CPT AHCI */ - { PCI_VDEVICE(INTEL, 0x1c03), board_ahci }, /* CPT AHCI */ + { PCI_VDEVICE(INTEL, 0x1c03), board_ahci }, /* CPT M AHCI */ { PCI_VDEVICE(INTEL, 0x1c04), board_ahci }, /* CPT RAID */ - { PCI_VDEVICE(INTEL, 0x1c05), board_ahci }, /* CPT RAID */ + { PCI_VDEVICE(INTEL, 0x1c05), board_ahci }, /* CPT M RAID */ { PCI_VDEVICE(INTEL, 0x1c06), board_ahci }, /* CPT RAID */ { PCI_VDEVICE(INTEL, 0x1c07), board_ahci }, /* CPT RAID */ { PCI_VDEVICE(INTEL, 0x1d02), board_ahci }, /* PBG AHCI */ @@ -303,20 +303,20 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* PBG RAID */ { PCI_VDEVICE(INTEL, 0x2323), board_ahci }, /* DH89xxCC AHCI */ { PCI_VDEVICE(INTEL, 0x1e02), board_ahci }, /* Panther Point AHCI */ - { PCI_VDEVICE(INTEL, 0x1e03), board_ahci }, /* Panther Point AHCI */ + { PCI_VDEVICE(INTEL, 0x1e03), board_ahci }, /* Panther Point M AHCI */ { PCI_VDEVICE(INTEL, 0x1e04), board_ahci }, /* Panther Point RAID */ { PCI_VDEVICE(INTEL, 0x1e05), board_ahci }, /* Panther Point RAID */ { PCI_VDEVICE(INTEL, 0x1e06), board_ahci }, /* Panther Point RAID */ - { PCI_VDEVICE(INTEL, 0x1e07), board_ahci }, /* Panther Point RAID */ + { PCI_VDEVICE(INTEL, 0x1e07), board_ahci }, /* Panther Point M RAID */ { PCI_VDEVICE(INTEL, 0x1e0e), board_ahci }, /* Panther Point RAID */ { PCI_VDEVICE(INTEL, 0x8c02), board_ahci }, /* Lynx Point AHCI */ - { PCI_VDEVICE(INTEL, 0x8c03), board_ahci }, /* Lynx Point AHCI */ + { PCI_VDEVICE(INTEL, 0x8c03), board_ahci }, /* Lynx Point M AHCI */ { PCI_VDEVICE(INTEL, 0x8c04), board_ahci }, /* Lynx Point RAID */ - { PCI_VDEVICE(INTEL, 0x8c05), board_ahci }, /* Lynx Point RAID */ + { PCI_VDEVICE(INTEL, 0x8c05), board_ahci }, /* Lynx Point M RAID */ { PCI_VDEVICE(INTEL, 0x8c06), board_ahci }, /* Lynx Point RAID */ - { PCI_VDEVICE(INTEL, 0x8c07), board_ahci }, /* Lynx Point RAID */ + { PCI_VDEVICE(INTEL, 0x8c07), board_ahci }, /* Lynx Point M RAID */ { PCI_VDEVICE(INTEL, 0x8c0e), board_ahci }, /* Lynx Point RAID */ - { PCI_VDEVICE(INTEL, 0x8c0f), board_ahci }, /* Lynx Point RAID */ + { PCI_VDEVICE(INTEL, 0x8c0f), board_ahci }, /* Lynx Point M RAID */ { PCI_VDEVICE(INTEL, 0x9c02), board_ahci }, /* Lynx Point-LP AHCI */ { PCI_VDEVICE(INTEL, 0x9c03), board_ahci }, /* Lynx Point-LP AHCI */ { PCI_VDEVICE(INTEL, 0x9c04), board_ahci }, /* Lynx Point-LP RAID */ @@ -357,21 +357,21 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x9c87), board_ahci }, /* Wildcat Point-LP RAID */ { PCI_VDEVICE(INTEL, 0x9c8f), board_ahci }, /* Wildcat Point-LP RAID */ { PCI_VDEVICE(INTEL, 0x8c82), board_ahci }, /* 9 Series AHCI */ - { PCI_VDEVICE(INTEL, 0x8c83), board_ahci }, /* 9 Series AHCI */ + { PCI_VDEVICE(INTEL, 0x8c83), board_ahci }, /* 9 Series M AHCI */ { PCI_VDEVICE(INTEL, 0x8c84), board_ahci }, /* 9 Series RAID */ - { PCI_VDEVICE(INTEL, 0x8c85), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c85), board_ahci }, /* 9 Series M RAID */ { PCI_VDEVICE(INTEL, 0x8c86), board_ahci }, /* 9 Series RAID */ - { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series M RAID */ { PCI_VDEVICE(INTEL, 0x8c8e), board_ahci }, /* 9 Series RAID */ - { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series M RAID */ { PCI_VDEVICE(INTEL, 0x9d03), board_ahci }, /* Sunrise Point-LP AHCI */ { PCI_VDEVICE(INTEL, 0x9d05), board_ahci }, /* Sunrise Point-LP RAID */ { PCI_VDEVICE(INTEL, 0x9d07), board_ahci }, /* Sunrise Point-LP RAID */ { PCI_VDEVICE(INTEL, 0xa102), board_ahci }, /* Sunrise Point-H AHCI */ - { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H AHCI */ + { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H M AHCI */ { PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0xa106), board_ahci }, /* Sunrise Point-H RAID */ - { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */ + { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H M RAID */ { PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Lewisburg AHCI*/ From 53c5963da84337b71abff8b85291cd25a1002c17 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 6 Dec 2017 16:41:09 +0100 Subject: [PATCH 358/770] ahci: Add PCI ids for Intel Bay Trail, Cherry Trail and Apollo Lake AHCI commit 998008b779e424bd7513c434d0ab9c1268459009 upstream. Add PCI ids for Intel Bay Trail, Cherry Trail and Apollo Lake AHCI SATA controllers. This commit is a preparation patch for allowing a different default sata link powermanagement policy for mobile chipsets. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 3095bd291cfa..5336e569386e 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -385,6 +385,10 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x0f22), board_ahci }, /* Bay Trail AHCI */ + { PCI_VDEVICE(INTEL, 0x0f23), board_ahci }, /* Bay Trail AHCI */ + { PCI_VDEVICE(INTEL, 0x22a3), board_ahci }, /* Cherry Trail AHCI */ + { PCI_VDEVICE(INTEL, 0x5ae3), board_ahci }, /* Apollo Lake AHCI */ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, From 449704d3ba6f45585da8cc033af2e3178d7344a7 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 11 Jan 2018 15:55:50 +0300 Subject: [PATCH 359/770] ahci: Add Intel Cannon Lake PCH-H PCI ID commit f919dde0772a894c693a1eeabc77df69d6a9b937 upstream. Add Intel Cannon Lake PCH-H PCI ID to the list of supported controllers. Signed-off-by: Mika Westerberg Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 5336e569386e..bc013f757d5d 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -385,6 +385,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa356), board_ahci }, /* Cannon Lake PCH-H RAID */ { PCI_VDEVICE(INTEL, 0x0f22), board_ahci }, /* Bay Trail AHCI */ { PCI_VDEVICE(INTEL, 0x0f23), board_ahci }, /* Bay Trail AHCI */ { PCI_VDEVICE(INTEL, 0x22a3), board_ahci }, /* Cherry Trail AHCI */ From b8b32e2e68f0a3dd67edb34710964dc3e67bdf3b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:22 -0800 Subject: [PATCH 360/770] crypto: hash - introduce crypto_hash_alg_has_setkey() commit cd6ed77ad5d223dc6299fb58f62e0f5267f7e2ba upstream. Templates that use an shash spawn can use crypto_shash_alg_has_setkey() to determine whether the underlying algorithm requires a key or not. But there was no corresponding function for ahash spawns. Add it. Note that the new function actually has to support both shash and ahash algorithms, since the ahash API can be used with either. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ahash.c | 11 +++++++++++ include/crypto/internal/hash.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/crypto/ahash.c b/crypto/ahash.c index 5e8666e6ccae..63f3fd3a788e 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -655,5 +655,16 @@ struct hash_alg_common *ahash_attr_alg(struct rtattr *rta, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(ahash_attr_alg); +bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg) +{ + struct crypto_alg *alg = &halg->base; + + if (alg->cra_type != &crypto_ahash_type) + return crypto_shash_alg_has_setkey(__crypto_shash_alg(alg)); + + return __crypto_ahash_alg(alg)->setkey != NULL; +} +EXPORT_SYMBOL_GPL(crypto_hash_alg_has_setkey); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Asynchronous cryptographic hash type"); diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index c2bae8da642c..27040a46d50a 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -90,6 +90,8 @@ static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg) return alg->setkey != shash_no_setkey; } +bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg); + int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn, struct hash_alg_common *alg, struct crypto_instance *inst); From ed7b0af0ca02877cb4e23546da0d44dd49156bd7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:23 -0800 Subject: [PATCH 361/770] crypto: cryptd - pass through absence of ->setkey() commit 841a3ff329713f796a63356fef6e2f72e4a3f6a3 upstream. When the cryptd template is used to wrap an unkeyed hash algorithm, don't install a ->setkey() method to the cryptd instance. This change is necessary for cryptd to keep working with unkeyed hash algorithms once we start enforcing that ->setkey() is called when present. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/cryptd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 0508c48a45c4..d5182e7b2a9c 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -913,7 +913,8 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.finup = cryptd_hash_finup_enqueue; inst->alg.export = cryptd_hash_export; inst->alg.import = cryptd_hash_import; - inst->alg.setkey = cryptd_hash_setkey; + if (crypto_shash_alg_has_setkey(salg)) + inst->alg.setkey = cryptd_hash_setkey; inst->alg.digest = cryptd_hash_digest_enqueue; err = ahash_register_instance(tmpl, inst); From 16210524c424a5df0cdd9b2b6aaee037f995f6ad Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:24 -0800 Subject: [PATCH 362/770] crypto: mcryptd - pass through absence of ->setkey() commit fa59b92d299f2787e6bae1ff078ee0982e80211f upstream. When the mcryptd template is used to wrap an unkeyed hash algorithm, don't install a ->setkey() method to the mcryptd instance. This change is necessary for mcryptd to keep working with unkeyed hash algorithms once we start enforcing that ->setkey() is called when present. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/mcryptd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index eca04d3729b3..6b4dd4b76942 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -535,7 +535,8 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.finup = mcryptd_hash_finup_enqueue; inst->alg.export = mcryptd_hash_export; inst->alg.import = mcryptd_hash_import; - inst->alg.setkey = mcryptd_hash_setkey; + if (crypto_hash_alg_has_setkey(halg)) + inst->alg.setkey = mcryptd_hash_setkey; inst->alg.digest = mcryptd_hash_digest_enqueue; err = ahash_register_instance(tmpl, inst); From b806c0cc4c73511be382da411effdbb96f15919a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:25 -0800 Subject: [PATCH 363/770] crypto: poly1305 - remove ->setkey() method commit a16e772e664b9a261424107784804cffc8894977 upstream. Since Poly1305 requires a nonce per invocation, the Linux kernel implementations of Poly1305 don't use the crypto API's keying mechanism and instead expect the key and nonce as the first 32 bytes of the data. But ->setkey() is still defined as a stub returning an error code. This prevents Poly1305 from being used through AF_ALG and will also break it completely once we start enforcing that all crypto API users (not just AF_ALG) call ->setkey() if present. Fix it by removing crypto_poly1305_setkey(), leaving ->setkey as NULL. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/poly1305_glue.c | 1 - crypto/poly1305_generic.c | 17 +++++------------ include/crypto/poly1305.h | 2 -- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index e32142bc071d..28c372003e44 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -164,7 +164,6 @@ static struct shash_alg alg = { .init = poly1305_simd_init, .update = poly1305_simd_update, .final = crypto_poly1305_final, - .setkey = crypto_poly1305_setkey, .descsize = sizeof(struct poly1305_simd_desc_ctx), .base = { .cra_name = "poly1305", diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c index b1c2d57dc734..ba39eb308c79 100644 --- a/crypto/poly1305_generic.c +++ b/crypto/poly1305_generic.c @@ -47,17 +47,6 @@ int crypto_poly1305_init(struct shash_desc *desc) } EXPORT_SYMBOL_GPL(crypto_poly1305_init); -int crypto_poly1305_setkey(struct crypto_shash *tfm, - const u8 *key, unsigned int keylen) -{ - /* Poly1305 requires a unique key for each tag, which implies that - * we can't set it on the tfm that gets accessed by multiple users - * simultaneously. Instead we expect the key as the first 32 bytes in - * the update() call. */ - return -ENOTSUPP; -} -EXPORT_SYMBOL_GPL(crypto_poly1305_setkey); - static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key) { /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ @@ -76,6 +65,11 @@ static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key) dctx->s[3] = get_unaligned_le32(key + 12); } +/* + * Poly1305 requires a unique key for each tag, which implies that we can't set + * it on the tfm that gets accessed by multiple users simultaneously. Instead we + * expect the key as the first 32 bytes in the update() call. + */ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen) { @@ -281,7 +275,6 @@ static struct shash_alg poly1305_alg = { .init = crypto_poly1305_init, .update = crypto_poly1305_update, .final = crypto_poly1305_final, - .setkey = crypto_poly1305_setkey, .descsize = sizeof(struct poly1305_desc_ctx), .base = { .cra_name = "poly1305", diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index c65567d01e8e..f718a19da82f 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -31,8 +31,6 @@ struct poly1305_desc_ctx { }; int crypto_poly1305_init(struct shash_desc *desc); -int crypto_poly1305_setkey(struct crypto_shash *tfm, - const u8 *key, unsigned int keylen); unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen); int crypto_poly1305_update(struct shash_desc *desc, From 8d906d183bce4b0eb812e5766ee5016f16dc201c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:26 -0800 Subject: [PATCH 364/770] crypto: hash - annotate algorithms taking optional key commit a208fa8f33031b9e0aba44c7d1b7e68eb0cbd29e upstream. We need to consistently enforce that keyed hashes cannot be used without setting the key. To do this we need a reliable way to determine whether a given hash algorithm is keyed or not. AF_ALG currently does this by checking for the presence of a ->setkey() method. However, this is actually slightly broken because the CRC-32 algorithms implement ->setkey() but can also be used without a key. (The CRC-32 "key" is not actually a cryptographic key but rather represents the initial state. If not overridden, then a default initial state is used.) Prepare to fix this by introducing a flag CRYPTO_ALG_OPTIONAL_KEY which indicates that the algorithm has a ->setkey() method, but it is not required to be called. Then set it on all the CRC-32 algorithms. The same also applies to the Adler-32 implementation in Lustre. Also, the cryptd and mcryptd templates have to pass through the flag from their underlying algorithm. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm/crypto/crc32-ce-glue.c | 2 ++ arch/arm64/crypto/crc32-ce-glue.c | 2 ++ arch/powerpc/crypto/crc32c-vpmsum_glue.c | 1 + arch/s390/crypto/crc32-vx.c | 3 +++ arch/sparc/crypto/crc32c_glue.c | 1 + arch/x86/crypto/crc32-pclmul_glue.c | 1 + arch/x86/crypto/crc32c-intel_glue.c | 1 + crypto/crc32_generic.c | 1 + crypto/crc32c_generic.c | 1 + crypto/cryptd.c | 7 +++---- crypto/mcryptd.c | 7 +++---- drivers/crypto/bfin_crc.c | 3 ++- drivers/crypto/stm32/stm32_crc32.c | 2 ++ .../staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c | 1 + include/linux/crypto.h | 6 ++++++ 15 files changed, 30 insertions(+), 9 deletions(-) diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c index 1b0e0e86ee9c..96e62ec105d0 100644 --- a/arch/arm/crypto/crc32-ce-glue.c +++ b/arch/arm/crypto/crc32-ce-glue.c @@ -188,6 +188,7 @@ static struct shash_alg crc32_pmull_algs[] = { { .base.cra_name = "crc32", .base.cra_driver_name = "crc32-arm-ce", .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .base.cra_blocksize = 1, .base.cra_module = THIS_MODULE, }, { @@ -203,6 +204,7 @@ static struct shash_alg crc32_pmull_algs[] = { { .base.cra_name = "crc32c", .base.cra_driver_name = "crc32c-arm-ce", .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .base.cra_blocksize = 1, .base.cra_module = THIS_MODULE, } }; diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c index 624f4137918c..34b4e3d46aab 100644 --- a/arch/arm64/crypto/crc32-ce-glue.c +++ b/arch/arm64/crypto/crc32-ce-glue.c @@ -185,6 +185,7 @@ static struct shash_alg crc32_pmull_algs[] = { { .base.cra_name = "crc32", .base.cra_driver_name = "crc32-arm64-ce", .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .base.cra_blocksize = 1, .base.cra_module = THIS_MODULE, }, { @@ -200,6 +201,7 @@ static struct shash_alg crc32_pmull_algs[] = { { .base.cra_name = "crc32c", .base.cra_driver_name = "crc32c-arm64-ce", .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .base.cra_blocksize = 1, .base.cra_module = THIS_MODULE, } }; diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c index f058e0c3e4d4..fd1d6c83f0c0 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c @@ -141,6 +141,7 @@ static struct shash_alg alg = { .cra_name = "crc32c", .cra_driver_name = "crc32c-vpmsum", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 992e630c227b..6f4985f357c6 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -238,6 +238,7 @@ static struct shash_alg crc32_vx_algs[] = { .cra_name = "crc32", .cra_driver_name = "crc32-vx", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CRC32_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crc_ctx), .cra_module = THIS_MODULE, @@ -258,6 +259,7 @@ static struct shash_alg crc32_vx_algs[] = { .cra_name = "crc32be", .cra_driver_name = "crc32be-vx", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CRC32_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crc_ctx), .cra_module = THIS_MODULE, @@ -278,6 +280,7 @@ static struct shash_alg crc32_vx_algs[] = { .cra_name = "crc32c", .cra_driver_name = "crc32c-vx", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CRC32_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crc_ctx), .cra_module = THIS_MODULE, diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c index d1064e46efe8..8aa664638c3c 100644 --- a/arch/sparc/crypto/crc32c_glue.c +++ b/arch/sparc/crypto/crc32c_glue.c @@ -133,6 +133,7 @@ static struct shash_alg alg = { .cra_name = "crc32c", .cra_driver_name = "crc32c-sparc64", .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_alignmask = 7, diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 27226df3f7d8..c8d9cdacbf10 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -162,6 +162,7 @@ static struct shash_alg alg = { .cra_name = "crc32", .cra_driver_name = "crc32-pclmul", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index c194d5717ae5..5773e1161072 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -226,6 +226,7 @@ static struct shash_alg alg = { .cra_name = "crc32c", .cra_driver_name = "crc32c-intel", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c index aa2a25fc7482..718cbce8d169 100644 --- a/crypto/crc32_generic.c +++ b/crypto/crc32_generic.c @@ -133,6 +133,7 @@ static struct shash_alg alg = { .cra_name = "crc32", .cra_driver_name = "crc32-generic", .cra_priority = 100, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 4c0a0e271876..372320399622 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -146,6 +146,7 @@ static struct shash_alg alg = { .cra_name = "crc32c", .cra_driver_name = "crc32c-generic", .cra_priority = 100, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_alignmask = 3, .cra_ctxsize = sizeof(struct chksum_ctx), diff --git a/crypto/cryptd.c b/crypto/cryptd.c index d5182e7b2a9c..248f6ba41688 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -895,10 +895,9 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, if (err) goto out_free_inst; - type = CRYPTO_ALG_ASYNC; - if (alg->cra_flags & CRYPTO_ALG_INTERNAL) - type |= CRYPTO_ALG_INTERNAL; - inst->alg.halg.base.cra_flags = type; + inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC | + (alg->cra_flags & (CRYPTO_ALG_INTERNAL | + CRYPTO_ALG_OPTIONAL_KEY)); inst->alg.halg.digestsize = salg->digestsize; inst->alg.halg.statesize = salg->statesize; diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index 6b4dd4b76942..e0732d979e3b 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -517,10 +517,9 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, if (err) goto out_free_inst; - type = CRYPTO_ALG_ASYNC; - if (alg->cra_flags & CRYPTO_ALG_INTERNAL) - type |= CRYPTO_ALG_INTERNAL; - inst->alg.halg.base.cra_flags = type; + inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC | + (alg->cra_flags & (CRYPTO_ALG_INTERNAL | + CRYPTO_ALG_OPTIONAL_KEY)); inst->alg.halg.digestsize = halg->digestsize; inst->alg.halg.statesize = halg->statesize; diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c index a118b9bed669..bfbf8bf77f03 100644 --- a/drivers/crypto/bfin_crc.c +++ b/drivers/crypto/bfin_crc.c @@ -494,7 +494,8 @@ static struct ahash_alg algs = { .cra_driver_name = DRIVER_NAME, .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_AHASH | - CRYPTO_ALG_ASYNC, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(struct bfin_crypto_crc_ctx), .cra_alignmask = 3, diff --git a/drivers/crypto/stm32/stm32_crc32.c b/drivers/crypto/stm32/stm32_crc32.c index 090582baecfe..8f09b8430893 100644 --- a/drivers/crypto/stm32/stm32_crc32.c +++ b/drivers/crypto/stm32/stm32_crc32.c @@ -208,6 +208,7 @@ static struct shash_alg algs[] = { .cra_name = "crc32", .cra_driver_name = DRIVER_NAME, .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_alignmask = 3, .cra_ctxsize = sizeof(struct stm32_crc_ctx), @@ -229,6 +230,7 @@ static struct shash_alg algs[] = { .cra_name = "crc32c", .cra_driver_name = DRIVER_NAME, .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_alignmask = 3, .cra_ctxsize = sizeof(struct stm32_crc_ctx), diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c index db0572733712..ab30a0f5129c 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c @@ -119,6 +119,7 @@ static struct shash_alg alg = { .cra_name = "adler32", .cra_driver_name = "adler32-zlib", .cra_priority = 100, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 84da9978e951..958023baa19f 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -105,6 +105,12 @@ */ #define CRYPTO_ALG_INTERNAL 0x00002000 +/* + * Set if the algorithm has a ->setkey() method but can be used without + * calling it first, i.e. there is a default key. + */ +#define CRYPTO_ALG_OPTIONAL_KEY 0x00004000 + /* * Transform masks and values (for crt_flags). */ From 2f00eb27908895b46e807b632b53824d0844012f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:27 -0800 Subject: [PATCH 365/770] crypto: hash - prevent using keyed hashes without setting key commit 9fa68f620041be04720d0cbfb1bd3ddfc6310b24 upstream. Currently, almost none of the keyed hash algorithms check whether a key has been set before proceeding. Some algorithms are okay with this and will effectively just use a key of all 0's or some other bogus default. However, others will severely break, as demonstrated using "hmac(sha3-512-generic)", the unkeyed use of which causes a kernel crash via a (potentially exploitable) stack buffer overflow. A while ago, this problem was solved for AF_ALG by pairing each hash transform with a 'has_key' bool. However, there are still other places in the kernel where userspace can specify an arbitrary hash algorithm by name, and the kernel uses it as unkeyed hash without checking whether it is really unkeyed. Examples of this include: - KEYCTL_DH_COMPUTE, via the KDF extension - dm-verity - dm-crypt, via the ESSIV support - dm-integrity, via the "internal hash" mode with no key given - drbd (Distributed Replicated Block Device) This bug is especially bad for KEYCTL_DH_COMPUTE as that requires no privileges to call. Fix the bug for all users by adding a flag CRYPTO_TFM_NEED_KEY to the ->crt_flags of each hash transform that indicates whether the transform still needs to be keyed or not. Then, make the hash init, import, and digest functions return -ENOKEY if the key is still needed. The new flag also replaces the 'has_key' bool which algif_hash was previously using, thereby simplifying the algif_hash implementation. Reported-by: syzbot Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ahash.c | 22 ++++++++++++++---- crypto/algif_hash.c | 52 +++++++++--------------------------------- crypto/shash.c | 25 ++++++++++++++++---- include/crypto/hash.h | 34 +++++++++++++++++++-------- include/linux/crypto.h | 2 ++ 5 files changed, 75 insertions(+), 60 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index 63f3fd3a788e..f75b5c1f7152 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -193,11 +193,18 @@ int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen) { unsigned long alignmask = crypto_ahash_alignmask(tfm); + int err; if ((unsigned long)key & alignmask) - return ahash_setkey_unaligned(tfm, key, keylen); + err = ahash_setkey_unaligned(tfm, key, keylen); + else + err = tfm->setkey(tfm, key, keylen); - return tfm->setkey(tfm, key, keylen); + if (err) + return err; + + crypto_ahash_clear_flags(tfm, CRYPTO_TFM_NEED_KEY); + return 0; } EXPORT_SYMBOL_GPL(crypto_ahash_setkey); @@ -370,7 +377,12 @@ EXPORT_SYMBOL_GPL(crypto_ahash_finup); int crypto_ahash_digest(struct ahash_request *req) { - return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->digest); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + + if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) + return -ENOKEY; + + return crypto_ahash_op(req, tfm->digest); } EXPORT_SYMBOL_GPL(crypto_ahash_digest); @@ -456,7 +468,6 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm) struct ahash_alg *alg = crypto_ahash_alg(hash); hash->setkey = ahash_nosetkey; - hash->has_setkey = false; hash->export = ahash_no_export; hash->import = ahash_no_import; @@ -471,7 +482,8 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm) if (alg->setkey) { hash->setkey = alg->setkey; - hash->has_setkey = true; + if (!(alg->halg.base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY)) + crypto_ahash_set_flags(hash, CRYPTO_TFM_NEED_KEY); } if (alg->export) hash->export = alg->export; diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 5e92bd275ef3..39cebd3256bf 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -34,11 +34,6 @@ struct hash_ctx { struct ahash_request req; }; -struct algif_hash_tfm { - struct crypto_ahash *hash; - bool has_key; -}; - static int hash_alloc_result(struct sock *sk, struct hash_ctx *ctx) { unsigned ds; @@ -309,7 +304,7 @@ static int hash_check_key(struct socket *sock) int err = 0; struct sock *psk; struct alg_sock *pask; - struct algif_hash_tfm *tfm; + struct crypto_ahash *tfm; struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); @@ -323,7 +318,7 @@ static int hash_check_key(struct socket *sock) err = -ENOKEY; lock_sock_nested(psk, SINGLE_DEPTH_NESTING); - if (!tfm->has_key) + if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) goto unlock; if (!pask->refcnt++) @@ -414,41 +409,17 @@ static struct proto_ops algif_hash_ops_nokey = { static void *hash_bind(const char *name, u32 type, u32 mask) { - struct algif_hash_tfm *tfm; - struct crypto_ahash *hash; - - tfm = kzalloc(sizeof(*tfm), GFP_KERNEL); - if (!tfm) - return ERR_PTR(-ENOMEM); - - hash = crypto_alloc_ahash(name, type, mask); - if (IS_ERR(hash)) { - kfree(tfm); - return ERR_CAST(hash); - } - - tfm->hash = hash; - - return tfm; + return crypto_alloc_ahash(name, type, mask); } static void hash_release(void *private) { - struct algif_hash_tfm *tfm = private; - - crypto_free_ahash(tfm->hash); - kfree(tfm); + crypto_free_ahash(private); } static int hash_setkey(void *private, const u8 *key, unsigned int keylen) { - struct algif_hash_tfm *tfm = private; - int err; - - err = crypto_ahash_setkey(tfm->hash, key, keylen); - tfm->has_key = !err; - - return err; + return crypto_ahash_setkey(private, key, keylen); } static void hash_sock_destruct(struct sock *sk) @@ -463,11 +434,10 @@ static void hash_sock_destruct(struct sock *sk) static int hash_accept_parent_nokey(void *private, struct sock *sk) { - struct hash_ctx *ctx; + struct crypto_ahash *tfm = private; struct alg_sock *ask = alg_sk(sk); - struct algif_hash_tfm *tfm = private; - struct crypto_ahash *hash = tfm->hash; - unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(hash); + struct hash_ctx *ctx; + unsigned int len = sizeof(*ctx) + crypto_ahash_reqsize(tfm); ctx = sock_kmalloc(sk, len, GFP_KERNEL); if (!ctx) @@ -480,7 +450,7 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk) ask->private = ctx; - ahash_request_set_tfm(&ctx->req, hash); + ahash_request_set_tfm(&ctx->req, tfm); ahash_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG, af_alg_complete, &ctx->completion); @@ -491,9 +461,9 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk) static int hash_accept_parent(void *private, struct sock *sk) { - struct algif_hash_tfm *tfm = private; + struct crypto_ahash *tfm = private; - if (!tfm->has_key && crypto_ahash_has_setkey(tfm->hash)) + if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) return -ENOKEY; return hash_accept_parent_nokey(private, sk); diff --git a/crypto/shash.c b/crypto/shash.c index e849d3ee2e27..5d732c6bb4b2 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -58,11 +58,18 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key, { struct shash_alg *shash = crypto_shash_alg(tfm); unsigned long alignmask = crypto_shash_alignmask(tfm); + int err; if ((unsigned long)key & alignmask) - return shash_setkey_unaligned(tfm, key, keylen); + err = shash_setkey_unaligned(tfm, key, keylen); + else + err = shash->setkey(tfm, key, keylen); - return shash->setkey(tfm, key, keylen); + if (err) + return err; + + crypto_shash_clear_flags(tfm, CRYPTO_TFM_NEED_KEY); + return 0; } EXPORT_SYMBOL_GPL(crypto_shash_setkey); @@ -181,6 +188,9 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data, struct shash_alg *shash = crypto_shash_alg(tfm); unsigned long alignmask = crypto_shash_alignmask(tfm); + if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) + return -ENOKEY; + if (((unsigned long)data | (unsigned long)out) & alignmask) return shash_digest_unaligned(desc, data, len, out); @@ -360,7 +370,8 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm) crt->digest = shash_async_digest; crt->setkey = shash_async_setkey; - crt->has_setkey = alg->setkey != shash_no_setkey; + crypto_ahash_set_flags(crt, crypto_shash_get_flags(shash) & + CRYPTO_TFM_NEED_KEY); if (alg->export) crt->export = shash_async_export; @@ -375,8 +386,14 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm) static int crypto_shash_init_tfm(struct crypto_tfm *tfm) { struct crypto_shash *hash = __crypto_shash_cast(tfm); + struct shash_alg *alg = crypto_shash_alg(hash); + + hash->descsize = alg->descsize; + + if (crypto_shash_alg_has_setkey(alg) && + !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY)) + crypto_shash_set_flags(hash, CRYPTO_TFM_NEED_KEY); - hash->descsize = crypto_shash_alg(hash)->descsize; return 0; } diff --git a/include/crypto/hash.h b/include/crypto/hash.h index b5727bcd2336..74827781593c 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -205,7 +205,6 @@ struct crypto_ahash { unsigned int keylen); unsigned int reqsize; - bool has_setkey; struct crypto_tfm base; }; @@ -405,11 +404,6 @@ static inline void *ahash_request_ctx(struct ahash_request *req) int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen); -static inline bool crypto_ahash_has_setkey(struct crypto_ahash *tfm) -{ - return tfm->has_setkey; -} - /** * crypto_ahash_finup() - update and finalize message digest * @req: reference to the ahash_request handle that holds all information @@ -481,7 +475,12 @@ static inline int crypto_ahash_export(struct ahash_request *req, void *out) */ static inline int crypto_ahash_import(struct ahash_request *req, const void *in) { - return crypto_ahash_reqtfm(req)->import(req, in); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + + if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) + return -ENOKEY; + + return tfm->import(req, in); } /** @@ -498,7 +497,12 @@ static inline int crypto_ahash_import(struct ahash_request *req, const void *in) */ static inline int crypto_ahash_init(struct ahash_request *req) { - return crypto_ahash_reqtfm(req)->init(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + + if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) + return -ENOKEY; + + return tfm->init(req); } /** @@ -851,7 +855,12 @@ static inline int crypto_shash_export(struct shash_desc *desc, void *out) */ static inline int crypto_shash_import(struct shash_desc *desc, const void *in) { - return crypto_shash_alg(desc->tfm)->import(desc, in); + struct crypto_shash *tfm = desc->tfm; + + if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) + return -ENOKEY; + + return crypto_shash_alg(tfm)->import(desc, in); } /** @@ -867,7 +876,12 @@ static inline int crypto_shash_import(struct shash_desc *desc, const void *in) */ static inline int crypto_shash_init(struct shash_desc *desc) { - return crypto_shash_alg(desc->tfm)->init(desc); + struct crypto_shash *tfm = desc->tfm; + + if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) + return -ENOKEY; + + return crypto_shash_alg(tfm)->init(desc); } /** diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 958023baa19f..cc36484d29e1 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -114,6 +114,8 @@ /* * Transform masks and values (for crt_flags). */ +#define CRYPTO_TFM_NEED_KEY 0x00000001 + #define CRYPTO_TFM_REQ_MASK 0x000fff00 #define CRYPTO_TFM_RES_MASK 0xfff00000 From ea96d8f676dd3caeb77e3dc509c880cc81d303a6 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:44:22 +0100 Subject: [PATCH 366/770] media: v4l2-ioctl.c: use check_fmt for enum/g/s/try_fmt commit b2469c814fbc8f1f19676dd4912717b798df511e upstream. Don't duplicate the buffer type checks in enum/g/s/try_fmt. The check_fmt function does that already. It is hard to keep the checks in sync for all these functions and in fact the check for VBI was wrong in the _fmt functions as it allowed SDR types as well. This caused a v4l2-compliance failure for /dev/swradio0 using vivid. This simplifies the code and keeps the check in one place and fixes the SDR/VBI bug. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-ioctl.c | 140 +++++++++++---------------- 1 file changed, 54 insertions(+), 86 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index b60a6b0841d1..42e376f46729 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -1308,52 +1308,50 @@ static int v4l_enum_fmt(const struct v4l2_ioctl_ops *ops, struct file *file, void *fh, void *arg) { struct v4l2_fmtdesc *p = arg; - struct video_device *vfd = video_devdata(file); - bool is_vid = vfd->vfl_type == VFL_TYPE_GRABBER; - bool is_sdr = vfd->vfl_type == VFL_TYPE_SDR; - bool is_tch = vfd->vfl_type == VFL_TYPE_TOUCH; - bool is_rx = vfd->vfl_dir != VFL_DIR_TX; - bool is_tx = vfd->vfl_dir != VFL_DIR_RX; - int ret = -EINVAL; + int ret = check_fmt(file, p->type); + + if (ret) + return ret; + ret = -EINVAL; switch (p->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: - if (unlikely(!is_rx || (!is_vid && !is_tch) || !ops->vidioc_enum_fmt_vid_cap)) + if (unlikely(!ops->vidioc_enum_fmt_vid_cap)) break; ret = ops->vidioc_enum_fmt_vid_cap(file, fh, arg); break; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: - if (unlikely(!is_rx || !is_vid || !ops->vidioc_enum_fmt_vid_cap_mplane)) + if (unlikely(!ops->vidioc_enum_fmt_vid_cap_mplane)) break; ret = ops->vidioc_enum_fmt_vid_cap_mplane(file, fh, arg); break; case V4L2_BUF_TYPE_VIDEO_OVERLAY: - if (unlikely(!is_rx || !is_vid || !ops->vidioc_enum_fmt_vid_overlay)) + if (unlikely(!ops->vidioc_enum_fmt_vid_overlay)) break; ret = ops->vidioc_enum_fmt_vid_overlay(file, fh, arg); break; case V4L2_BUF_TYPE_VIDEO_OUTPUT: - if (unlikely(!is_tx || !is_vid || !ops->vidioc_enum_fmt_vid_out)) + if (unlikely(!ops->vidioc_enum_fmt_vid_out)) break; ret = ops->vidioc_enum_fmt_vid_out(file, fh, arg); break; case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - if (unlikely(!is_tx || !is_vid || !ops->vidioc_enum_fmt_vid_out_mplane)) + if (unlikely(!ops->vidioc_enum_fmt_vid_out_mplane)) break; ret = ops->vidioc_enum_fmt_vid_out_mplane(file, fh, arg); break; case V4L2_BUF_TYPE_SDR_CAPTURE: - if (unlikely(!is_rx || !is_sdr || !ops->vidioc_enum_fmt_sdr_cap)) + if (unlikely(!ops->vidioc_enum_fmt_sdr_cap)) break; ret = ops->vidioc_enum_fmt_sdr_cap(file, fh, arg); break; case V4L2_BUF_TYPE_SDR_OUTPUT: - if (unlikely(!is_tx || !is_sdr || !ops->vidioc_enum_fmt_sdr_out)) + if (unlikely(!ops->vidioc_enum_fmt_sdr_out)) break; ret = ops->vidioc_enum_fmt_sdr_out(file, fh, arg); break; case V4L2_BUF_TYPE_META_CAPTURE: - if (unlikely(!is_rx || !is_vid || !ops->vidioc_enum_fmt_meta_cap)) + if (unlikely(!ops->vidioc_enum_fmt_meta_cap)) break; ret = ops->vidioc_enum_fmt_meta_cap(file, fh, arg); break; @@ -1367,13 +1365,10 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops, struct file *file, void *fh, void *arg) { struct v4l2_format *p = arg; - struct video_device *vfd = video_devdata(file); - bool is_vid = vfd->vfl_type == VFL_TYPE_GRABBER; - bool is_sdr = vfd->vfl_type == VFL_TYPE_SDR; - bool is_tch = vfd->vfl_type == VFL_TYPE_TOUCH; - bool is_rx = vfd->vfl_dir != VFL_DIR_TX; - bool is_tx = vfd->vfl_dir != VFL_DIR_RX; - int ret; + int ret = check_fmt(file, p->type); + + if (ret) + return ret; /* * fmt can't be cleared for these overlay types due to the 'clips' @@ -1401,7 +1396,7 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops, switch (p->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: - if (unlikely(!is_rx || (!is_vid && !is_tch) || !ops->vidioc_g_fmt_vid_cap)) + if (unlikely(!ops->vidioc_g_fmt_vid_cap)) break; p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; ret = ops->vidioc_g_fmt_vid_cap(file, fh, arg); @@ -1409,23 +1404,15 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops, p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; return ret; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: - if (unlikely(!is_rx || !is_vid || !ops->vidioc_g_fmt_vid_cap_mplane)) - break; return ops->vidioc_g_fmt_vid_cap_mplane(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OVERLAY: - if (unlikely(!is_rx || !is_vid || !ops->vidioc_g_fmt_vid_overlay)) - break; return ops->vidioc_g_fmt_vid_overlay(file, fh, arg); case V4L2_BUF_TYPE_VBI_CAPTURE: - if (unlikely(!is_rx || is_vid || !ops->vidioc_g_fmt_vbi_cap)) - break; return ops->vidioc_g_fmt_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: - if (unlikely(!is_rx || is_vid || !ops->vidioc_g_fmt_sliced_vbi_cap)) - break; return ops->vidioc_g_fmt_sliced_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OUTPUT: - if (unlikely(!is_tx || !is_vid || !ops->vidioc_g_fmt_vid_out)) + if (unlikely(!ops->vidioc_g_fmt_vid_out)) break; p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; ret = ops->vidioc_g_fmt_vid_out(file, fh, arg); @@ -1433,32 +1420,18 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops, p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; return ret; case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - if (unlikely(!is_tx || !is_vid || !ops->vidioc_g_fmt_vid_out_mplane)) - break; return ops->vidioc_g_fmt_vid_out_mplane(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: - if (unlikely(!is_tx || !is_vid || !ops->vidioc_g_fmt_vid_out_overlay)) - break; return ops->vidioc_g_fmt_vid_out_overlay(file, fh, arg); case V4L2_BUF_TYPE_VBI_OUTPUT: - if (unlikely(!is_tx || is_vid || !ops->vidioc_g_fmt_vbi_out)) - break; return ops->vidioc_g_fmt_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - if (unlikely(!is_tx || is_vid || !ops->vidioc_g_fmt_sliced_vbi_out)) - break; return ops->vidioc_g_fmt_sliced_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SDR_CAPTURE: - if (unlikely(!is_rx || !is_sdr || !ops->vidioc_g_fmt_sdr_cap)) - break; return ops->vidioc_g_fmt_sdr_cap(file, fh, arg); case V4L2_BUF_TYPE_SDR_OUTPUT: - if (unlikely(!is_tx || !is_sdr || !ops->vidioc_g_fmt_sdr_out)) - break; return ops->vidioc_g_fmt_sdr_out(file, fh, arg); case V4L2_BUF_TYPE_META_CAPTURE: - if (unlikely(!is_rx || !is_vid || !ops->vidioc_g_fmt_meta_cap)) - break; return ops->vidioc_g_fmt_meta_cap(file, fh, arg); } return -EINVAL; @@ -1484,12 +1457,10 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, { struct v4l2_format *p = arg; struct video_device *vfd = video_devdata(file); - bool is_vid = vfd->vfl_type == VFL_TYPE_GRABBER; - bool is_sdr = vfd->vfl_type == VFL_TYPE_SDR; - bool is_tch = vfd->vfl_type == VFL_TYPE_TOUCH; - bool is_rx = vfd->vfl_dir != VFL_DIR_TX; - bool is_tx = vfd->vfl_dir != VFL_DIR_RX; - int ret; + int ret = check_fmt(file, p->type); + + if (ret) + return ret; ret = v4l_enable_media_source(vfd); if (ret) @@ -1498,37 +1469,37 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, switch (p->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: - if (unlikely(!is_rx || (!is_vid && !is_tch) || !ops->vidioc_s_fmt_vid_cap)) + if (unlikely(!ops->vidioc_s_fmt_vid_cap)) break; CLEAR_AFTER_FIELD(p, fmt.pix); ret = ops->vidioc_s_fmt_vid_cap(file, fh, arg); /* just in case the driver zeroed it again */ p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; - if (is_tch) + if (vfd->vfl_type == VFL_TYPE_TOUCH) v4l_pix_format_touch(&p->fmt.pix); return ret; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: - if (unlikely(!is_rx || !is_vid || !ops->vidioc_s_fmt_vid_cap_mplane)) + if (unlikely(!ops->vidioc_s_fmt_vid_cap_mplane)) break; CLEAR_AFTER_FIELD(p, fmt.pix_mp.xfer_func); return ops->vidioc_s_fmt_vid_cap_mplane(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OVERLAY: - if (unlikely(!is_rx || !is_vid || !ops->vidioc_s_fmt_vid_overlay)) + if (unlikely(!ops->vidioc_s_fmt_vid_overlay)) break; CLEAR_AFTER_FIELD(p, fmt.win); return ops->vidioc_s_fmt_vid_overlay(file, fh, arg); case V4L2_BUF_TYPE_VBI_CAPTURE: - if (unlikely(!is_rx || is_vid || !ops->vidioc_s_fmt_vbi_cap)) + if (unlikely(!ops->vidioc_s_fmt_vbi_cap)) break; CLEAR_AFTER_FIELD(p, fmt.vbi); return ops->vidioc_s_fmt_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: - if (unlikely(!is_rx || is_vid || !ops->vidioc_s_fmt_sliced_vbi_cap)) + if (unlikely(!ops->vidioc_s_fmt_sliced_vbi_cap)) break; CLEAR_AFTER_FIELD(p, fmt.sliced); return ops->vidioc_s_fmt_sliced_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OUTPUT: - if (unlikely(!is_tx || !is_vid || !ops->vidioc_s_fmt_vid_out)) + if (unlikely(!ops->vidioc_s_fmt_vid_out)) break; CLEAR_AFTER_FIELD(p, fmt.pix); ret = ops->vidioc_s_fmt_vid_out(file, fh, arg); @@ -1536,37 +1507,37 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; return ret; case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - if (unlikely(!is_tx || !is_vid || !ops->vidioc_s_fmt_vid_out_mplane)) + if (unlikely(!ops->vidioc_s_fmt_vid_out_mplane)) break; CLEAR_AFTER_FIELD(p, fmt.pix_mp.xfer_func); return ops->vidioc_s_fmt_vid_out_mplane(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: - if (unlikely(!is_tx || !is_vid || !ops->vidioc_s_fmt_vid_out_overlay)) + if (unlikely(!ops->vidioc_s_fmt_vid_out_overlay)) break; CLEAR_AFTER_FIELD(p, fmt.win); return ops->vidioc_s_fmt_vid_out_overlay(file, fh, arg); case V4L2_BUF_TYPE_VBI_OUTPUT: - if (unlikely(!is_tx || is_vid || !ops->vidioc_s_fmt_vbi_out)) + if (unlikely(!ops->vidioc_s_fmt_vbi_out)) break; CLEAR_AFTER_FIELD(p, fmt.vbi); return ops->vidioc_s_fmt_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - if (unlikely(!is_tx || is_vid || !ops->vidioc_s_fmt_sliced_vbi_out)) + if (unlikely(!ops->vidioc_s_fmt_sliced_vbi_out)) break; CLEAR_AFTER_FIELD(p, fmt.sliced); return ops->vidioc_s_fmt_sliced_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SDR_CAPTURE: - if (unlikely(!is_rx || !is_sdr || !ops->vidioc_s_fmt_sdr_cap)) + if (unlikely(!ops->vidioc_s_fmt_sdr_cap)) break; CLEAR_AFTER_FIELD(p, fmt.sdr); return ops->vidioc_s_fmt_sdr_cap(file, fh, arg); case V4L2_BUF_TYPE_SDR_OUTPUT: - if (unlikely(!is_tx || !is_sdr || !ops->vidioc_s_fmt_sdr_out)) + if (unlikely(!ops->vidioc_s_fmt_sdr_out)) break; CLEAR_AFTER_FIELD(p, fmt.sdr); return ops->vidioc_s_fmt_sdr_out(file, fh, arg); case V4L2_BUF_TYPE_META_CAPTURE: - if (unlikely(!is_rx || !is_vid || !ops->vidioc_s_fmt_meta_cap)) + if (unlikely(!ops->vidioc_s_fmt_meta_cap)) break; CLEAR_AFTER_FIELD(p, fmt.meta); return ops->vidioc_s_fmt_meta_cap(file, fh, arg); @@ -1578,19 +1549,16 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops, struct file *file, void *fh, void *arg) { struct v4l2_format *p = arg; - struct video_device *vfd = video_devdata(file); - bool is_vid = vfd->vfl_type == VFL_TYPE_GRABBER; - bool is_sdr = vfd->vfl_type == VFL_TYPE_SDR; - bool is_tch = vfd->vfl_type == VFL_TYPE_TOUCH; - bool is_rx = vfd->vfl_dir != VFL_DIR_TX; - bool is_tx = vfd->vfl_dir != VFL_DIR_RX; - int ret; + int ret = check_fmt(file, p->type); + + if (ret) + return ret; v4l_sanitize_format(p); switch (p->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: - if (unlikely(!is_rx || (!is_vid && !is_tch) || !ops->vidioc_try_fmt_vid_cap)) + if (unlikely(!ops->vidioc_try_fmt_vid_cap)) break; CLEAR_AFTER_FIELD(p, fmt.pix); ret = ops->vidioc_try_fmt_vid_cap(file, fh, arg); @@ -1598,27 +1566,27 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops, p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; return ret; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: - if (unlikely(!is_rx || !is_vid || !ops->vidioc_try_fmt_vid_cap_mplane)) + if (unlikely(!ops->vidioc_try_fmt_vid_cap_mplane)) break; CLEAR_AFTER_FIELD(p, fmt.pix_mp.xfer_func); return ops->vidioc_try_fmt_vid_cap_mplane(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OVERLAY: - if (unlikely(!is_rx || !is_vid || !ops->vidioc_try_fmt_vid_overlay)) + if (unlikely(!ops->vidioc_try_fmt_vid_overlay)) break; CLEAR_AFTER_FIELD(p, fmt.win); return ops->vidioc_try_fmt_vid_overlay(file, fh, arg); case V4L2_BUF_TYPE_VBI_CAPTURE: - if (unlikely(!is_rx || is_vid || !ops->vidioc_try_fmt_vbi_cap)) + if (unlikely(!ops->vidioc_try_fmt_vbi_cap)) break; CLEAR_AFTER_FIELD(p, fmt.vbi); return ops->vidioc_try_fmt_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: - if (unlikely(!is_rx || is_vid || !ops->vidioc_try_fmt_sliced_vbi_cap)) + if (unlikely(!ops->vidioc_try_fmt_sliced_vbi_cap)) break; CLEAR_AFTER_FIELD(p, fmt.sliced); return ops->vidioc_try_fmt_sliced_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OUTPUT: - if (unlikely(!is_tx || !is_vid || !ops->vidioc_try_fmt_vid_out)) + if (unlikely(!ops->vidioc_try_fmt_vid_out)) break; CLEAR_AFTER_FIELD(p, fmt.pix); ret = ops->vidioc_try_fmt_vid_out(file, fh, arg); @@ -1626,37 +1594,37 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops, p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; return ret; case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - if (unlikely(!is_tx || !is_vid || !ops->vidioc_try_fmt_vid_out_mplane)) + if (unlikely(!ops->vidioc_try_fmt_vid_out_mplane)) break; CLEAR_AFTER_FIELD(p, fmt.pix_mp.xfer_func); return ops->vidioc_try_fmt_vid_out_mplane(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: - if (unlikely(!is_tx || !is_vid || !ops->vidioc_try_fmt_vid_out_overlay)) + if (unlikely(!ops->vidioc_try_fmt_vid_out_overlay)) break; CLEAR_AFTER_FIELD(p, fmt.win); return ops->vidioc_try_fmt_vid_out_overlay(file, fh, arg); case V4L2_BUF_TYPE_VBI_OUTPUT: - if (unlikely(!is_tx || is_vid || !ops->vidioc_try_fmt_vbi_out)) + if (unlikely(!ops->vidioc_try_fmt_vbi_out)) break; CLEAR_AFTER_FIELD(p, fmt.vbi); return ops->vidioc_try_fmt_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - if (unlikely(!is_tx || is_vid || !ops->vidioc_try_fmt_sliced_vbi_out)) + if (unlikely(!ops->vidioc_try_fmt_sliced_vbi_out)) break; CLEAR_AFTER_FIELD(p, fmt.sliced); return ops->vidioc_try_fmt_sliced_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SDR_CAPTURE: - if (unlikely(!is_rx || !is_sdr || !ops->vidioc_try_fmt_sdr_cap)) + if (unlikely(!ops->vidioc_try_fmt_sdr_cap)) break; CLEAR_AFTER_FIELD(p, fmt.sdr); return ops->vidioc_try_fmt_sdr_cap(file, fh, arg); case V4L2_BUF_TYPE_SDR_OUTPUT: - if (unlikely(!is_tx || !is_sdr || !ops->vidioc_try_fmt_sdr_out)) + if (unlikely(!ops->vidioc_try_fmt_sdr_out)) break; CLEAR_AFTER_FIELD(p, fmt.sdr); return ops->vidioc_try_fmt_sdr_out(file, fh, arg); case V4L2_BUF_TYPE_META_CAPTURE: - if (unlikely(!is_rx || !is_vid || !ops->vidioc_try_fmt_meta_cap)) + if (unlikely(!ops->vidioc_try_fmt_meta_cap)) break; CLEAR_AFTER_FIELD(p, fmt.meta); return ops->vidioc_try_fmt_meta_cap(file, fh, arg); From 2d280dab382eef93311f089da0bb428336431208 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:44:23 +0100 Subject: [PATCH 367/770] media: v4l2-ioctl.c: don't copy back the result for -ENOTTY commit 181a4a2d5a0a7b43cab08a70710d727e7764ccdd upstream. If the ioctl returned -ENOTTY, then don't bother copying back the result as there is no point. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-ioctl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 42e376f46729..d06941cc6a55 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -2892,8 +2892,11 @@ video_usercopy(struct file *file, unsigned int cmd, unsigned long arg, /* Handles IOCTL */ err = func(file, cmd, parg); - if (err == -ENOIOCTLCMD) + if (err == -ENOTTY || err == -ENOIOCTLCMD) { err = -ENOTTY; + goto out; + } + if (err == 0) { if (cmd == VIDIOC_DQBUF) trace_v4l2_dqbuf(video_devdata(file)->minor, parg); From 2b991eeb5cfb34838c4e984334e5975d3351c026 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:44:24 +0100 Subject: [PATCH 368/770] media: v4l2-compat-ioctl32.c: add missing VIDIOC_PREPARE_BUF commit 3ee6d040719ae09110e5cdf24d5386abe5d1b776 upstream. The result of the VIDIOC_PREPARE_BUF ioctl was never copied back to userspace since it was missing in the switch. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 821f2aa299ae..f6a7f8793720 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -1052,6 +1052,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar err = put_v4l2_create32(&karg.v2crt, up); break; + case VIDIOC_PREPARE_BUF: case VIDIOC_QUERYBUF: case VIDIOC_QBUF: case VIDIOC_DQBUF: From 0b7d6ac53606ab9991b76e95ab3e54348b3b9b89 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:44:25 +0100 Subject: [PATCH 369/770] media: v4l2-compat-ioctl32.c: fix the indentation commit b7b957d429f601d6d1942122b339474f31191d75 upstream. The indentation of this source is all over the place. Fix this. This patch only changes whitespace. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 212 +++++++++--------- 1 file changed, 107 insertions(+), 105 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index f6a7f8793720..44644a2ea3e9 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -49,12 +49,12 @@ struct v4l2_window32 { static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_window32)) || - copy_from_user(&kp->w, &up->w, sizeof(up->w)) || - get_user(kp->field, &up->field) || - get_user(kp->chromakey, &up->chromakey) || - get_user(kp->clipcount, &up->clipcount) || - get_user(kp->global_alpha, &up->global_alpha)) - return -EFAULT; + copy_from_user(&kp->w, &up->w, sizeof(up->w)) || + get_user(kp->field, &up->field) || + get_user(kp->chromakey, &up->chromakey) || + get_user(kp->clipcount, &up->clipcount) || + get_user(kp->global_alpha, &up->global_alpha)) + return -EFAULT; if (kp->clipcount > 2048) return -EINVAL; if (kp->clipcount) { @@ -84,11 +84,11 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { if (copy_to_user(&up->w, &kp->w, sizeof(kp->w)) || - put_user(kp->field, &up->field) || - put_user(kp->chromakey, &up->chromakey) || - put_user(kp->clipcount, &up->clipcount) || - put_user(kp->global_alpha, &up->global_alpha)) - return -EFAULT; + put_user(kp->field, &up->field) || + put_user(kp->chromakey, &up->chromakey) || + put_user(kp->clipcount, &up->clipcount) || + put_user(kp->global_alpha, &up->global_alpha)) + return -EFAULT; return 0; } @@ -100,7 +100,7 @@ static inline int get_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pi } static inline int get_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp, - struct v4l2_pix_format_mplane __user *up) + struct v4l2_pix_format_mplane __user *up) { if (copy_from_user(kp, up, sizeof(struct v4l2_pix_format_mplane))) return -EFAULT; @@ -115,7 +115,7 @@ static inline int put_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pi } static inline int put_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp, - struct v4l2_pix_format_mplane __user *up) + struct v4l2_pix_format_mplane __user *up) { if (copy_to_user(up, kp, sizeof(struct v4l2_pix_format_mplane))) return -EFAULT; @@ -238,7 +238,7 @@ static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us return get_v4l2_meta_format(&kp->fmt.meta, &up->fmt.meta); default: pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", - kp->type); + kp->type); return -EINVAL; } } @@ -287,7 +287,7 @@ static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us return put_v4l2_meta_format(&kp->fmt.meta, &up->fmt.meta); default: pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", - kp->type); + kp->type); return -EINVAL; } } @@ -321,7 +321,7 @@ static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 { /* other fields are not set by the user, nor used by the driver */ if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_standard32)) || - get_user(kp->index, &up->index)) + get_user(kp->index, &up->index)) return -EFAULT; return 0; } @@ -329,13 +329,14 @@ static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 static int put_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_standard32)) || - put_user(kp->index, &up->index) || - put_user(kp->id, &up->id) || - copy_to_user(up->name, kp->name, 24) || - copy_to_user(&up->frameperiod, &kp->frameperiod, sizeof(kp->frameperiod)) || - put_user(kp->framelines, &up->framelines) || - copy_to_user(up->reserved, kp->reserved, 4 * sizeof(__u32))) - return -EFAULT; + put_user(kp->index, &up->index) || + put_user(kp->id, &up->id) || + copy_to_user(up->name, kp->name, 24) || + copy_to_user(&up->frameperiod, &kp->frameperiod, + sizeof(kp->frameperiod)) || + put_user(kp->framelines, &up->framelines) || + copy_to_user(up->reserved, kp->reserved, 4 * sizeof(__u32))) + return -EFAULT; return 0; } @@ -375,14 +376,14 @@ struct v4l2_buffer32 { }; static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, - enum v4l2_memory memory) + enum v4l2_memory memory) { void __user *up_pln; compat_long_t p; if (copy_in_user(up, up32, 2 * sizeof(__u32)) || - copy_in_user(&up->data_offset, &up32->data_offset, - sizeof(__u32))) + copy_in_user(&up->data_offset, &up32->data_offset, + sizeof(__u32))) return -EFAULT; if (memory == V4L2_MEMORY_USERPTR) { @@ -396,7 +397,7 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return -EFAULT; } else { if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, - sizeof(__u32))) + sizeof(__u32))) return -EFAULT; } @@ -404,23 +405,23 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ } static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, - enum v4l2_memory memory) + enum v4l2_memory memory) { if (copy_in_user(up32, up, 2 * sizeof(__u32)) || - copy_in_user(&up32->data_offset, &up->data_offset, - sizeof(__u32))) + copy_in_user(&up32->data_offset, &up->data_offset, + sizeof(__u32))) return -EFAULT; /* For MMAP, driver might've set up the offset, so copy it back. * USERPTR stays the same (was userspace-provided), so no copying. */ if (memory == V4L2_MEMORY_MMAP) if (copy_in_user(&up32->m.mem_offset, &up->m.mem_offset, - sizeof(__u32))) + sizeof(__u32))) return -EFAULT; /* For DMABUF, driver might've set up the fd, so copy it back. */ if (memory == V4L2_MEMORY_DMABUF) if (copy_in_user(&up32->m.fd, &up->m.fd, - sizeof(int))) + sizeof(int))) return -EFAULT; return 0; @@ -434,19 +435,19 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user int ret; if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_buffer32)) || - get_user(kp->index, &up->index) || - get_user(kp->type, &up->type) || - get_user(kp->flags, &up->flags) || - get_user(kp->memory, &up->memory) || - get_user(kp->length, &up->length)) - return -EFAULT; + get_user(kp->index, &up->index) || + get_user(kp->type, &up->type) || + get_user(kp->flags, &up->flags) || + get_user(kp->memory, &up->memory) || + get_user(kp->length, &up->length)) + return -EFAULT; if (V4L2_TYPE_IS_OUTPUT(kp->type)) if (get_user(kp->bytesused, &up->bytesused) || - get_user(kp->field, &up->field) || - get_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - get_user(kp->timestamp.tv_usec, - &up->timestamp.tv_usec)) + get_user(kp->field, &up->field) || + get_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || + get_user(kp->timestamp.tv_usec, + &up->timestamp.tv_usec)) return -EFAULT; if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { @@ -466,7 +467,7 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user uplane32 = compat_ptr(p); if (!access_ok(VERIFY_READ, uplane32, - kp->length * sizeof(struct v4l2_plane32))) + kp->length * sizeof(struct v4l2_plane32))) return -EFAULT; /* We don't really care if userspace decides to kill itself @@ -490,12 +491,12 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user break; case V4L2_MEMORY_USERPTR: { - compat_long_t tmp; + compat_long_t tmp; - if (get_user(tmp, &up->m.userptr)) - return -EFAULT; + if (get_user(tmp, &up->m.userptr)) + return -EFAULT; - kp->m.userptr = (unsigned long)compat_ptr(tmp); + kp->m.userptr = (unsigned long)compat_ptr(tmp); } break; case V4L2_MEMORY_OVERLAY: @@ -521,22 +522,23 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user int ret; if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_buffer32)) || - put_user(kp->index, &up->index) || - put_user(kp->type, &up->type) || - put_user(kp->flags, &up->flags) || - put_user(kp->memory, &up->memory)) - return -EFAULT; + put_user(kp->index, &up->index) || + put_user(kp->type, &up->type) || + put_user(kp->flags, &up->flags) || + put_user(kp->memory, &up->memory)) + return -EFAULT; if (put_user(kp->bytesused, &up->bytesused) || - put_user(kp->field, &up->field) || - put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - put_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec) || - copy_to_user(&up->timecode, &kp->timecode, sizeof(struct v4l2_timecode)) || - put_user(kp->sequence, &up->sequence) || - put_user(kp->reserved2, &up->reserved2) || - put_user(kp->reserved, &up->reserved) || - put_user(kp->length, &up->length)) - return -EFAULT; + put_user(kp->field, &up->field) || + put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || + put_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec) || + copy_to_user(&up->timecode, &kp->timecode, + sizeof(struct v4l2_timecode)) || + put_user(kp->sequence, &up->sequence) || + put_user(kp->reserved2, &up->reserved2) || + put_user(kp->reserved, &up->reserved) || + put_user(kp->length, &up->length)) + return -EFAULT; if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { num_planes = kp->length; @@ -600,11 +602,11 @@ static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame u32 tmp; if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_framebuffer32)) || - get_user(tmp, &up->base) || - get_user(kp->capability, &up->capability) || - get_user(kp->flags, &up->flags) || - copy_from_user(&kp->fmt, &up->fmt, sizeof(up->fmt))) - return -EFAULT; + get_user(tmp, &up->base) || + get_user(kp->capability, &up->capability) || + get_user(kp->flags, &up->flags) || + copy_from_user(&kp->fmt, &up->fmt, sizeof(up->fmt))) + return -EFAULT; kp->base = (__force void *)compat_ptr(tmp); return 0; } @@ -614,11 +616,11 @@ static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame u32 tmp = (u32)((unsigned long)kp->base); if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_framebuffer32)) || - put_user(tmp, &up->base) || - put_user(kp->capability, &up->capability) || - put_user(kp->flags, &up->flags) || - copy_to_user(&up->fmt, &kp->fmt, sizeof(up->fmt))) - return -EFAULT; + put_user(tmp, &up->base) || + put_user(kp->capability, &up->capability) || + put_user(kp->flags, &up->flags) || + copy_to_user(&up->fmt, &kp->fmt, sizeof(up->fmt))) + return -EFAULT; return 0; } @@ -694,12 +696,12 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext compat_caddr_t p; if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_ext_controls32)) || - get_user(kp->which, &up->which) || - get_user(kp->count, &up->count) || - get_user(kp->error_idx, &up->error_idx) || - copy_from_user(kp->reserved, up->reserved, - sizeof(kp->reserved))) - return -EFAULT; + get_user(kp->which, &up->which) || + get_user(kp->count, &up->count) || + get_user(kp->error_idx, &up->error_idx) || + copy_from_user(kp->reserved, up->reserved, + sizeof(kp->reserved))) + return -EFAULT; if (kp->count == 0) { kp->controls = NULL; return 0; @@ -710,7 +712,7 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext return -EFAULT; ucontrols = compat_ptr(p); if (!access_ok(VERIFY_READ, ucontrols, - kp->count * sizeof(struct v4l2_ext_control32))) + kp->count * sizeof(struct v4l2_ext_control32))) return -EFAULT; kcontrols = compat_alloc_user_space(kp->count * sizeof(struct v4l2_ext_control)); @@ -746,11 +748,11 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext compat_caddr_t p; if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_ext_controls32)) || - put_user(kp->which, &up->which) || - put_user(kp->count, &up->count) || - put_user(kp->error_idx, &up->error_idx) || - copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) - return -EFAULT; + put_user(kp->which, &up->which) || + put_user(kp->count, &up->count) || + put_user(kp->error_idx, &up->error_idx) || + copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) + return -EFAULT; if (!kp->count) return 0; @@ -758,7 +760,7 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext return -EFAULT; ucontrols = compat_ptr(p); if (!access_ok(VERIFY_WRITE, ucontrols, - n * sizeof(struct v4l2_ext_control32))) + n * sizeof(struct v4l2_ext_control32))) return -EFAULT; while (--n >= 0) { @@ -796,15 +798,15 @@ struct v4l2_event32 { static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_event32)) || - put_user(kp->type, &up->type) || - copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || - put_user(kp->pending, &up->pending) || - put_user(kp->sequence, &up->sequence) || - put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || - put_user(kp->id, &up->id) || - copy_to_user(up->reserved, kp->reserved, 8 * sizeof(__u32))) - return -EFAULT; + put_user(kp->type, &up->type) || + copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || + put_user(kp->pending, &up->pending) || + put_user(kp->sequence, &up->sequence) || + put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || + put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || + put_user(kp->id, &up->id) || + copy_to_user(up->reserved, kp->reserved, 8 * sizeof(__u32))) + return -EFAULT; return 0; } @@ -821,12 +823,12 @@ static int get_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) u32 tmp; if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_edid32)) || - get_user(kp->pad, &up->pad) || - get_user(kp->start_block, &up->start_block) || - get_user(kp->blocks, &up->blocks) || - get_user(tmp, &up->edid) || - copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) - return -EFAULT; + get_user(kp->pad, &up->pad) || + get_user(kp->start_block, &up->start_block) || + get_user(kp->blocks, &up->blocks) || + get_user(tmp, &up->edid) || + copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) + return -EFAULT; kp->edid = (__force u8 *)compat_ptr(tmp); return 0; } @@ -836,12 +838,12 @@ static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) u32 tmp = (u32)((unsigned long)kp->edid); if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_edid32)) || - put_user(kp->pad, &up->pad) || - put_user(kp->start_block, &up->start_block) || - put_user(kp->blocks, &up->blocks) || - put_user(tmp, &up->edid) || - copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) - return -EFAULT; + put_user(kp->pad, &up->pad) || + put_user(kp->start_block, &up->start_block) || + put_user(kp->blocks, &up->blocks) || + put_user(tmp, &up->edid) || + copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) + return -EFAULT; return 0; } From a38becb087b20a34bdbfa63567f69671849025ad Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:44:26 +0100 Subject: [PATCH 370/770] media: v4l2-compat-ioctl32.c: move 'helper' functions to __get/put_v4l2_format32 commit 486c521510c44a04cd756a9267e7d1e271c8a4ba upstream. These helper functions do not really help. Move the code to the __get/put_v4l2_format32 functions. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 124 ++++-------------- 1 file changed, 24 insertions(+), 100 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 44644a2ea3e9..297c924aefce 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -92,92 +92,6 @@ static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user return 0; } -static inline int get_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pix_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_pix_format))) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp, - struct v4l2_pix_format_mplane __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_pix_format_mplane))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pix_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_pix_format))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp, - struct v4l2_pix_format_mplane __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_pix_format_mplane))) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_vbi_format(struct v4l2_vbi_format *kp, struct v4l2_vbi_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_vbi_format))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_vbi_format(struct v4l2_vbi_format *kp, struct v4l2_vbi_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_vbi_format))) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_sliced_vbi_format(struct v4l2_sliced_vbi_format *kp, struct v4l2_sliced_vbi_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_sliced_vbi_format))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_sliced_vbi_format(struct v4l2_sliced_vbi_format *kp, struct v4l2_sliced_vbi_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_sliced_vbi_format))) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_sdr_format(struct v4l2_sdr_format *kp, struct v4l2_sdr_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_sdr_format))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_sdr_format(struct v4l2_sdr_format *kp, struct v4l2_sdr_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_sdr_format))) - return -EFAULT; - return 0; -} - -static inline int get_v4l2_meta_format(struct v4l2_meta_format *kp, struct v4l2_meta_format __user *up) -{ - if (copy_from_user(kp, up, sizeof(struct v4l2_meta_format))) - return -EFAULT; - return 0; -} - -static inline int put_v4l2_meta_format(struct v4l2_meta_format *kp, struct v4l2_meta_format __user *up) -{ - if (copy_to_user(up, kp, sizeof(struct v4l2_meta_format))) - return -EFAULT; - return 0; -} - struct v4l2_format32 { __u32 type; /* enum v4l2_buf_type */ union { @@ -217,25 +131,30 @@ static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us switch (kp->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: - return get_v4l2_pix_format(&kp->fmt.pix, &up->fmt.pix); + return copy_from_user(&kp->fmt.pix, &up->fmt.pix, + sizeof(kp->fmt.pix)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - return get_v4l2_pix_format_mplane(&kp->fmt.pix_mp, - &up->fmt.pix_mp); + return copy_from_user(&kp->fmt.pix_mp, &up->fmt.pix_mp, + sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_OVERLAY: case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: return get_v4l2_window32(&kp->fmt.win, &up->fmt.win); case V4L2_BUF_TYPE_VBI_CAPTURE: case V4L2_BUF_TYPE_VBI_OUTPUT: - return get_v4l2_vbi_format(&kp->fmt.vbi, &up->fmt.vbi); + return copy_from_user(&kp->fmt.vbi, &up->fmt.vbi, + sizeof(kp->fmt.vbi)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - return get_v4l2_sliced_vbi_format(&kp->fmt.sliced, &up->fmt.sliced); + return copy_from_user(&kp->fmt.sliced, &up->fmt.sliced, + sizeof(kp->fmt.sliced)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: - return get_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr); + return copy_from_user(&kp->fmt.sdr, &up->fmt.sdr, + sizeof(kp->fmt.sdr)) ? -EFAULT : 0; case V4L2_BUF_TYPE_META_CAPTURE: - return get_v4l2_meta_format(&kp->fmt.meta, &up->fmt.meta); + return copy_from_user(&kp->fmt.meta, &up->fmt.meta, + sizeof(kp->fmt.meta)) ? -EFAULT : 0; default: pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", kp->type); @@ -266,25 +185,30 @@ static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us switch (kp->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: - return put_v4l2_pix_format(&kp->fmt.pix, &up->fmt.pix); + return copy_to_user(&up->fmt.pix, &kp->fmt.pix, + sizeof(kp->fmt.pix)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - return put_v4l2_pix_format_mplane(&kp->fmt.pix_mp, - &up->fmt.pix_mp); + return copy_to_user(&up->fmt.pix_mp, &kp->fmt.pix_mp, + sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_OVERLAY: case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: return put_v4l2_window32(&kp->fmt.win, &up->fmt.win); case V4L2_BUF_TYPE_VBI_CAPTURE: case V4L2_BUF_TYPE_VBI_OUTPUT: - return put_v4l2_vbi_format(&kp->fmt.vbi, &up->fmt.vbi); + return copy_to_user(&up->fmt.vbi, &kp->fmt.vbi, + sizeof(kp->fmt.vbi)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - return put_v4l2_sliced_vbi_format(&kp->fmt.sliced, &up->fmt.sliced); + return copy_to_user(&up->fmt.sliced, &kp->fmt.sliced, + sizeof(kp->fmt.sliced)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: - return put_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr); + return copy_to_user(&up->fmt.sdr, &kp->fmt.sdr, + sizeof(kp->fmt.sdr)) ? -EFAULT : 0; case V4L2_BUF_TYPE_META_CAPTURE: - return put_v4l2_meta_format(&kp->fmt.meta, &up->fmt.meta); + return copy_to_user(&up->fmt.meta, &kp->fmt.meta, + sizeof(kp->fmt.meta)) ? -EFAULT : 0; default: pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", kp->type); From 16c25072560fb30df50c766260b69b14e3795194 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:44:27 +0100 Subject: [PATCH 371/770] media: v4l2-compat-ioctl32.c: avoid sizeof(type) commit 333b1e9f96ce05f7498b581509bb30cde03018bf upstream. Instead of doing sizeof(struct foo) use sizeof(*up). There even were cases where 4 * sizeof(__u32) was used instead of sizeof(kp->reserved), which is very dangerous when the size of the reserved array changes. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 79 +++++++++---------- 1 file changed, 36 insertions(+), 43 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 297c924aefce..d156b8975f1e 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -48,7 +48,7 @@ struct v4l2_window32 { static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_window32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || copy_from_user(&kp->w, &up->w, sizeof(up->w)) || get_user(kp->field, &up->field) || get_user(kp->chromakey, &up->chromakey) || @@ -66,7 +66,7 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user if (get_user(p, &up->clips)) return -EFAULT; uclips = compat_ptr(p); - kclips = compat_alloc_user_space(n * sizeof(struct v4l2_clip)); + kclips = compat_alloc_user_space(n * sizeof(*kclips)); kp->clips = kclips; while (--n >= 0) { if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c))) @@ -164,14 +164,14 @@ static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us static int get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) { - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_format32))) + if (!access_ok(VERIFY_READ, up, sizeof(*up))) return -EFAULT; return __get_v4l2_format32(kp, up); } static int get_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) { - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_create_buffers32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || copy_from_user(kp, up, offsetof(struct v4l2_create_buffers32, format))) return -EFAULT; return __get_v4l2_format32(&kp->format, &up->format); @@ -218,14 +218,14 @@ static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us static int put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_format32))) + if (!access_ok(VERIFY_WRITE, up, sizeof(*up))) return -EFAULT; return __put_v4l2_format32(kp, up); } static int put_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_create_buffers32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || copy_to_user(up, kp, offsetof(struct v4l2_create_buffers32, format)) || copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) return -EFAULT; @@ -244,7 +244,7 @@ struct v4l2_standard32 { static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) { /* other fields are not set by the user, nor used by the driver */ - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_standard32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(kp->index, &up->index)) return -EFAULT; return 0; @@ -252,14 +252,14 @@ static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 static int put_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_standard32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->index, &up->index) || put_user(kp->id, &up->id) || - copy_to_user(up->name, kp->name, 24) || + copy_to_user(up->name, kp->name, sizeof(up->name)) || copy_to_user(&up->frameperiod, &kp->frameperiod, sizeof(kp->frameperiod)) || put_user(kp->framelines, &up->framelines) || - copy_to_user(up->reserved, kp->reserved, 4 * sizeof(__u32))) + copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) return -EFAULT; return 0; } @@ -307,7 +307,7 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ if (copy_in_user(up, up32, 2 * sizeof(__u32)) || copy_in_user(&up->data_offset, &up32->data_offset, - sizeof(__u32))) + sizeof(up->data_offset))) return -EFAULT; if (memory == V4L2_MEMORY_USERPTR) { @@ -317,11 +317,11 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ if (put_user((unsigned long)up_pln, &up->m.userptr)) return -EFAULT; } else if (memory == V4L2_MEMORY_DMABUF) { - if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(int))) + if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(up32->m.fd))) return -EFAULT; } else { if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, - sizeof(__u32))) + sizeof(up32->m.mem_offset))) return -EFAULT; } @@ -333,19 +333,19 @@ static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ { if (copy_in_user(up32, up, 2 * sizeof(__u32)) || copy_in_user(&up32->data_offset, &up->data_offset, - sizeof(__u32))) + sizeof(up->data_offset))) return -EFAULT; /* For MMAP, driver might've set up the offset, so copy it back. * USERPTR stays the same (was userspace-provided), so no copying. */ if (memory == V4L2_MEMORY_MMAP) if (copy_in_user(&up32->m.mem_offset, &up->m.mem_offset, - sizeof(__u32))) + sizeof(up->m.mem_offset))) return -EFAULT; /* For DMABUF, driver might've set up the fd, so copy it back. */ if (memory == V4L2_MEMORY_DMABUF) if (copy_in_user(&up32->m.fd, &up->m.fd, - sizeof(int))) + sizeof(up->m.fd))) return -EFAULT; return 0; @@ -358,7 +358,7 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user compat_caddr_t p; int ret; - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_buffer32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(kp->index, &up->index) || get_user(kp->type, &up->type) || get_user(kp->flags, &up->flags) || @@ -370,8 +370,7 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user if (get_user(kp->bytesused, &up->bytesused) || get_user(kp->field, &up->field) || get_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - get_user(kp->timestamp.tv_usec, - &up->timestamp.tv_usec)) + get_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec)) return -EFAULT; if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { @@ -391,13 +390,12 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user uplane32 = compat_ptr(p); if (!access_ok(VERIFY_READ, uplane32, - kp->length * sizeof(struct v4l2_plane32))) + kp->length * sizeof(*uplane32))) return -EFAULT; /* We don't really care if userspace decides to kill itself * by passing a very big num_planes value */ - uplane = compat_alloc_user_space(kp->length * - sizeof(struct v4l2_plane)); + uplane = compat_alloc_user_space(kp->length * sizeof(*uplane)); kp->m.planes = (__force struct v4l2_plane *)uplane; for (num_planes = 0; num_planes < kp->length; num_planes++) { @@ -445,7 +443,7 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user int num_planes; int ret; - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_buffer32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->index, &up->index) || put_user(kp->type, &up->type) || put_user(kp->flags, &up->flags) || @@ -456,8 +454,7 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user put_user(kp->field, &up->field) || put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || put_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec) || - copy_to_user(&up->timecode, &kp->timecode, - sizeof(struct v4l2_timecode)) || + copy_to_user(&up->timecode, &kp->timecode, sizeof(kp->timecode)) || put_user(kp->sequence, &up->sequence) || put_user(kp->reserved2, &up->reserved2) || put_user(kp->reserved, &up->reserved) || @@ -525,7 +522,7 @@ static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame { u32 tmp; - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_framebuffer32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(tmp, &up->base) || get_user(kp->capability, &up->capability) || get_user(kp->flags, &up->flags) || @@ -539,7 +536,7 @@ static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame { u32 tmp = (u32)((unsigned long)kp->base); - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_framebuffer32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(tmp, &up->base) || put_user(kp->capability, &up->capability) || put_user(kp->flags, &up->flags) || @@ -564,14 +561,14 @@ struct v4l2_input32 { Otherwise it is identical to the 32-bit version. */ static inline int get_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up) { - if (copy_from_user(kp, up, sizeof(struct v4l2_input32))) + if (copy_from_user(kp, up, sizeof(*up))) return -EFAULT; return 0; } static inline int put_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up) { - if (copy_to_user(up, kp, sizeof(struct v4l2_input32))) + if (copy_to_user(up, kp, sizeof(*up))) return -EFAULT; return 0; } @@ -619,12 +616,11 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext unsigned int n; compat_caddr_t p; - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_ext_controls32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(kp->which, &up->which) || get_user(kp->count, &up->count) || get_user(kp->error_idx, &up->error_idx) || - copy_from_user(kp->reserved, up->reserved, - sizeof(kp->reserved))) + copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) return -EFAULT; if (kp->count == 0) { kp->controls = NULL; @@ -635,11 +631,9 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext if (get_user(p, &up->controls)) return -EFAULT; ucontrols = compat_ptr(p); - if (!access_ok(VERIFY_READ, ucontrols, - kp->count * sizeof(struct v4l2_ext_control32))) + if (!access_ok(VERIFY_READ, ucontrols, kp->count * sizeof(*ucontrols))) return -EFAULT; - kcontrols = compat_alloc_user_space(kp->count * - sizeof(struct v4l2_ext_control)); + kcontrols = compat_alloc_user_space(kp->count * sizeof(*kcontrols)); kp->controls = (__force struct v4l2_ext_control *)kcontrols; for (n = 0; n < kp->count; n++) { u32 id; @@ -671,7 +665,7 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext int n = kp->count; compat_caddr_t p; - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_ext_controls32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->which, &up->which) || put_user(kp->count, &up->count) || put_user(kp->error_idx, &up->error_idx) || @@ -683,8 +677,7 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext if (get_user(p, &up->controls)) return -EFAULT; ucontrols = compat_ptr(p); - if (!access_ok(VERIFY_WRITE, ucontrols, - n * sizeof(struct v4l2_ext_control32))) + if (!access_ok(VERIFY_WRITE, ucontrols, n * sizeof(*ucontrols))) return -EFAULT; while (--n >= 0) { @@ -721,7 +714,7 @@ struct v4l2_event32 { static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *up) { - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_event32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->type, &up->type) || copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || put_user(kp->pending, &up->pending) || @@ -729,7 +722,7 @@ static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *u put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || put_user(kp->id, &up->id) || - copy_to_user(up->reserved, kp->reserved, 8 * sizeof(__u32))) + copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) return -EFAULT; return 0; } @@ -746,7 +739,7 @@ static int get_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) { u32 tmp; - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_edid32)) || + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(kp->pad, &up->pad) || get_user(kp->start_block, &up->start_block) || get_user(kp->blocks, &up->blocks) || @@ -761,7 +754,7 @@ static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) { u32 tmp = (u32)((unsigned long)kp->edid); - if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_edid32)) || + if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || put_user(kp->pad, &up->pad) || put_user(kp->start_block, &up->start_block) || put_user(kp->blocks, &up->blocks) || From 3df1197724661c15d23b8359ec42af04a648b848 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:44:28 +0100 Subject: [PATCH 372/770] media: v4l2-compat-ioctl32.c: copy m.userptr in put_v4l2_plane32 commit 8ed5a59dcb47a6f76034ee760b36e089f3e82529 upstream. The struct v4l2_plane32 should set m.userptr as well. The same happens in v4l2_buffer32 and v4l2-compliance tests for this. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 47 +++++++++++-------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index d156b8975f1e..62d44fab5671 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -310,19 +310,24 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ sizeof(up->data_offset))) return -EFAULT; - if (memory == V4L2_MEMORY_USERPTR) { + switch (memory) { + case V4L2_MEMORY_MMAP: + case V4L2_MEMORY_OVERLAY: + if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, + sizeof(up32->m.mem_offset))) + return -EFAULT; + break; + case V4L2_MEMORY_USERPTR: if (get_user(p, &up32->m.userptr)) return -EFAULT; up_pln = compat_ptr(p); if (put_user((unsigned long)up_pln, &up->m.userptr)) return -EFAULT; - } else if (memory == V4L2_MEMORY_DMABUF) { + break; + case V4L2_MEMORY_DMABUF: if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(up32->m.fd))) return -EFAULT; - } else { - if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, - sizeof(up32->m.mem_offset))) - return -EFAULT; + break; } return 0; @@ -331,22 +336,32 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, enum v4l2_memory memory) { + unsigned long p; + if (copy_in_user(up32, up, 2 * sizeof(__u32)) || copy_in_user(&up32->data_offset, &up->data_offset, sizeof(up->data_offset))) return -EFAULT; - /* For MMAP, driver might've set up the offset, so copy it back. - * USERPTR stays the same (was userspace-provided), so no copying. */ - if (memory == V4L2_MEMORY_MMAP) + switch (memory) { + case V4L2_MEMORY_MMAP: + case V4L2_MEMORY_OVERLAY: if (copy_in_user(&up32->m.mem_offset, &up->m.mem_offset, sizeof(up->m.mem_offset))) return -EFAULT; - /* For DMABUF, driver might've set up the fd, so copy it back. */ - if (memory == V4L2_MEMORY_DMABUF) + break; + case V4L2_MEMORY_USERPTR: + if (get_user(p, &up->m.userptr) || + put_user((compat_ulong_t)ptr_to_compat((__force void *)p), + &up32->m.userptr)) + return -EFAULT; + break; + case V4L2_MEMORY_DMABUF: if (copy_in_user(&up32->m.fd, &up->m.fd, sizeof(up->m.fd))) return -EFAULT; + break; + } return 0; } @@ -408,6 +423,7 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user } else { switch (kp->memory) { case V4L2_MEMORY_MMAP: + case V4L2_MEMORY_OVERLAY: if (get_user(kp->m.offset, &up->m.offset)) return -EFAULT; break; @@ -421,10 +437,6 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user kp->m.userptr = (unsigned long)compat_ptr(tmp); } break; - case V4L2_MEMORY_OVERLAY: - if (get_user(kp->m.offset, &up->m.offset)) - return -EFAULT; - break; case V4L2_MEMORY_DMABUF: if (get_user(kp->m.fd, &up->m.fd)) return -EFAULT; @@ -481,6 +493,7 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user } else { switch (kp->memory) { case V4L2_MEMORY_MMAP: + case V4L2_MEMORY_OVERLAY: if (put_user(kp->m.offset, &up->m.offset)) return -EFAULT; break; @@ -488,10 +501,6 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user if (put_user(kp->m.userptr, &up->m.userptr)) return -EFAULT; break; - case V4L2_MEMORY_OVERLAY: - if (put_user(kp->m.offset, &up->m.offset)) - return -EFAULT; - break; case V4L2_MEMORY_DMABUF: if (put_user(kp->m.fd, &up->m.fd)) return -EFAULT; From f1b572d34648313a07856e5b315d8329eb649c93 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:44:29 +0100 Subject: [PATCH 373/770] media: v4l2-compat-ioctl32.c: fix ctrl_is_pointer commit b8c601e8af2d08f733d74defa8465303391bb930 upstream. ctrl_is_pointer just hardcoded two known string controls, but that caused problems when using e.g. custom controls that use a pointer for the payload. Reimplement this function: it now finds the v4l2_ctrl (if the driver uses the control framework) or it calls vidioc_query_ext_ctrl (if the driver implements that directly). In both cases it can now check if the control is a pointer control or not. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 57 ++++++++++++------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 62d44fab5671..d11334712c65 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include static long native_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -601,24 +603,39 @@ struct v4l2_ext_control32 { }; } __attribute__ ((packed)); -/* The following function really belong in v4l2-common, but that causes - a circular dependency between modules. We need to think about this, but - for now this will do. */ - -/* Return non-zero if this control is a pointer type. Currently only - type STRING is a pointer type. */ -static inline int ctrl_is_pointer(u32 id) +/* Return true if this control is a pointer type. */ +static inline bool ctrl_is_pointer(struct file *file, u32 id) { - switch (id) { - case V4L2_CID_RDS_TX_PS_NAME: - case V4L2_CID_RDS_TX_RADIO_TEXT: - return 1; - default: - return 0; + struct video_device *vdev = video_devdata(file); + struct v4l2_fh *fh = NULL; + struct v4l2_ctrl_handler *hdl = NULL; + struct v4l2_query_ext_ctrl qec = { id }; + const struct v4l2_ioctl_ops *ops = vdev->ioctl_ops; + + if (test_bit(V4L2_FL_USES_V4L2_FH, &vdev->flags)) + fh = file->private_data; + + if (fh && fh->ctrl_handler) + hdl = fh->ctrl_handler; + else if (vdev->ctrl_handler) + hdl = vdev->ctrl_handler; + + if (hdl) { + struct v4l2_ctrl *ctrl = v4l2_ctrl_find(hdl, id); + + return ctrl && ctrl->is_ptr; } + + if (!ops->vidioc_query_ext_ctrl) + return false; + + return !ops->vidioc_query_ext_ctrl(file, fh, &qec) && + (qec.flags & V4L2_CTRL_FLAG_HAS_PAYLOAD); } -static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up) +static int get_v4l2_ext_controls32(struct file *file, + struct v4l2_ext_controls *kp, + struct v4l2_ext_controls32 __user *up) { struct v4l2_ext_control32 __user *ucontrols; struct v4l2_ext_control __user *kcontrols; @@ -651,7 +668,7 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext return -EFAULT; if (get_user(id, &kcontrols->id)) return -EFAULT; - if (ctrl_is_pointer(id)) { + if (ctrl_is_pointer(file, id)) { void __user *s; if (get_user(p, &ucontrols->string)) @@ -666,7 +683,9 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext return 0; } -static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up) +static int put_v4l2_ext_controls32(struct file *file, + struct v4l2_ext_controls *kp, + struct v4l2_ext_controls32 __user *up) { struct v4l2_ext_control32 __user *ucontrols; struct v4l2_ext_control __user *kcontrols = @@ -698,7 +717,7 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext /* Do not modify the pointer when copying a pointer control. The contents of the pointer was changed, not the pointer itself. */ - if (ctrl_is_pointer(id)) + if (ctrl_is_pointer(file, id)) size -= sizeof(ucontrols->value64); if (copy_in_user(ucontrols, kcontrols, size)) return -EFAULT; @@ -912,7 +931,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_G_EXT_CTRLS: case VIDIOC_S_EXT_CTRLS: case VIDIOC_TRY_EXT_CTRLS: - err = get_v4l2_ext_controls32(&karg.v2ecs, up); + err = get_v4l2_ext_controls32(file, &karg.v2ecs, up); compatible_arg = 0; break; case VIDIOC_DQEVENT: @@ -939,7 +958,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_G_EXT_CTRLS: case VIDIOC_S_EXT_CTRLS: case VIDIOC_TRY_EXT_CTRLS: - if (put_v4l2_ext_controls32(&karg.v2ecs, up)) + if (put_v4l2_ext_controls32(file, &karg.v2ecs, up)) err = -EFAULT; break; case VIDIOC_S_EDID: From a20ab4df701b76b10eedcdcbe0a108462d76d6b6 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:44:30 +0100 Subject: [PATCH 374/770] media: v4l2-compat-ioctl32.c: copy clip list in put_v4l2_window32 commit a751be5b142ef6bcbbb96d9899516f4d9c8d0ef4 upstream. put_v4l2_window32() didn't copy back the clip list to userspace. Drivers can update the clip rectangles, so this should be done. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 61 +++++++++++++------ 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index d11334712c65..de3e99dc3caa 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -50,6 +50,11 @@ struct v4l2_window32 { static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { + struct v4l2_clip32 __user *uclips; + struct v4l2_clip __user *kclips; + compat_caddr_t p; + u32 n; + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || copy_from_user(&kp->w, &up->w, sizeof(up->w)) || get_user(kp->field, &up->field) || @@ -59,38 +64,54 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user return -EFAULT; if (kp->clipcount > 2048) return -EINVAL; - if (kp->clipcount) { - struct v4l2_clip32 __user *uclips; - struct v4l2_clip __user *kclips; - int n = kp->clipcount; - compat_caddr_t p; - - if (get_user(p, &up->clips)) - return -EFAULT; - uclips = compat_ptr(p); - kclips = compat_alloc_user_space(n * sizeof(*kclips)); - kp->clips = kclips; - while (--n >= 0) { - if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c))) - return -EFAULT; - if (put_user(n ? kclips + 1 : NULL, &kclips->next)) - return -EFAULT; - uclips += 1; - kclips += 1; - } - } else + if (!kp->clipcount) { kp->clips = NULL; + return 0; + } + + n = kp->clipcount; + if (get_user(p, &up->clips)) + return -EFAULT; + uclips = compat_ptr(p); + kclips = compat_alloc_user_space(n * sizeof(*kclips)); + kp->clips = kclips; + while (n--) { + if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c))) + return -EFAULT; + if (put_user(n ? kclips + 1 : NULL, &kclips->next)) + return -EFAULT; + uclips++; + kclips++; + } return 0; } static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) { + struct v4l2_clip __user *kclips = kp->clips; + struct v4l2_clip32 __user *uclips; + u32 n = kp->clipcount; + compat_caddr_t p; + if (copy_to_user(&up->w, &kp->w, sizeof(kp->w)) || put_user(kp->field, &up->field) || put_user(kp->chromakey, &up->chromakey) || put_user(kp->clipcount, &up->clipcount) || put_user(kp->global_alpha, &up->global_alpha)) return -EFAULT; + + if (!kp->clipcount) + return 0; + + if (get_user(p, &up->clips)) + return -EFAULT; + uclips = compat_ptr(p); + while (n--) { + if (copy_in_user(&uclips->c, &kclips->c, sizeof(uclips->c))) + return -EFAULT; + uclips++; + kclips++; + } return 0; } From efabe94f8a0d1aedb1df67c2e1a56d310baf7ce2 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:44:31 +0100 Subject: [PATCH 375/770] media: v4l2-compat-ioctl32.c: drop pr_info for unknown buffer type commit 169f24ca68bf0f247d111aef07af00dd3a02ae88 upstream. There is nothing wrong with using an unknown buffer type. So stop spamming the kernel log whenever this happens. The kernel will just return -EINVAL to signal this. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index de3e99dc3caa..bef9c990c9bd 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -179,8 +179,6 @@ static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us return copy_from_user(&kp->fmt.meta, &up->fmt.meta, sizeof(kp->fmt.meta)) ? -EFAULT : 0; default: - pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", - kp->type); return -EINVAL; } } @@ -233,8 +231,6 @@ static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us return copy_to_user(&up->fmt.meta, &kp->fmt.meta, sizeof(kp->fmt.meta)) ? -EFAULT : 0; default: - pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n", - kp->type); return -EINVAL; } } From b3f69836c92b336cd1ca08bcedf3250265e66e46 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:44:32 +0100 Subject: [PATCH 376/770] media: v4l2-compat-ioctl32.c: don't copy back the result for certain errors commit d83a8243aaefe62ace433e4384a4f077bed86acb upstream. Some ioctls need to copy back the result even if the ioctl returned an error. However, don't do this for the error code -ENOTTY. It makes no sense in that cases. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index bef9c990c9bd..5603c7c1edd5 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -968,6 +968,9 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar set_fs(old_fs); } + if (err == -ENOTTY) + return err; + /* Special case: even after an error we need to put the results back for these ioctls since the error_idx will contain information on which control failed. */ From ad01b40bb9be0987e883655205606c710a8785a5 Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Wed, 14 Feb 2018 12:44:33 +0100 Subject: [PATCH 377/770] media: v4l2-compat-ioctl32.c: refactor compat ioctl32 logic commit a1dfb4c48cc1e64eeb7800a27c66a6f7e88d075a upstream. The 32-bit compat v4l2 ioctl handling is implemented based on its 64-bit equivalent. It converts 32-bit data structures into its 64-bit equivalents and needs to provide the data to the 64-bit ioctl in user space memory which is commonly allocated using compat_alloc_user_space(). However, due to how that function is implemented, it can only be called a single time for every syscall invocation. Supposedly to avoid this limitation, the existing code uses a mix of memory from the kernel stack and memory allocated through compat_alloc_user_space(). Under normal circumstances, this would not work, because the 64-bit ioctl expects all pointers to point to user space memory. As a workaround, set_fs(KERNEL_DS) is called to temporarily disable this extra safety check and allow kernel pointers. However, this might introduce a security vulnerability: The result of the 32-bit to 64-bit conversion is writeable by user space because the output buffer has been allocated via compat_alloc_user_space(). A malicious user space process could then manipulate pointers inside this output buffer, and due to the previous set_fs(KERNEL_DS) call, functions like get_user() or put_user() no longer prevent kernel memory access. The new approach is to pre-calculate the total amount of user space memory that is needed, allocate it using compat_alloc_user_space() and then divide up the allocated memory to accommodate all data structures that need to be converted. An alternative approach would have been to retain the union type karg that they allocated on the kernel stack in do_video_ioctl(), copy all data from user space into karg and then back to user space. However, we decided against this approach because it does not align with other compat syscall implementations. Instead, we tried to replicate the get_user/put_user pairs as found in other places in the kernel: if (get_user(clipcount, &up->clipcount) || put_user(clipcount, &kp->clipcount)) return -EFAULT; Notes from hans.verkuil@cisco.com: This patch was taken from: https://github.com/LineageOS/android_kernel_samsung_apq8084/commit/97b733953c06e4f0398ade18850f0817778255f7 Clearly nobody could be bothered to upstream this patch or at minimum tell us :-( We only heard about this a week ago. This patch was rebased and cleaned up. Compared to the original I also swapped the order of the convert_in_user arguments so that they matched copy_in_user. It was hard to review otherwise. I also replaced the ALLOC_USER_SPACE/ALLOC_AND_GET by a normal function. Fixes: 6b5a9492ca ("v4l: introduce string control support.") Signed-off-by: Daniel Mentz Co-developed-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 774 +++++++++++------- 1 file changed, 494 insertions(+), 280 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 5603c7c1edd5..ec4a69728ed4 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -22,6 +22,14 @@ #include #include +/* Use the same argument order as copy_in_user */ +#define assign_in_user(to, from) \ +({ \ + typeof(*from) __assign_tmp; \ + \ + get_user(__assign_tmp, from) || put_user(__assign_tmp, to); \ +}) + static long native_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long ret = -ENOIOCTLCMD; @@ -48,37 +56,41 @@ struct v4l2_window32 { __u8 global_alpha; }; -static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) +static int get_v4l2_window32(struct v4l2_window __user *kp, + struct v4l2_window32 __user *up, + void __user *aux_buf, u32 aux_space) { struct v4l2_clip32 __user *uclips; struct v4l2_clip __user *kclips; compat_caddr_t p; - u32 n; + u32 clipcount; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - copy_from_user(&kp->w, &up->w, sizeof(up->w)) || - get_user(kp->field, &up->field) || - get_user(kp->chromakey, &up->chromakey) || - get_user(kp->clipcount, &up->clipcount) || - get_user(kp->global_alpha, &up->global_alpha)) + copy_in_user(&kp->w, &up->w, sizeof(up->w)) || + assign_in_user(&kp->field, &up->field) || + assign_in_user(&kp->chromakey, &up->chromakey) || + assign_in_user(&kp->global_alpha, &up->global_alpha) || + get_user(clipcount, &up->clipcount) || + put_user(clipcount, &kp->clipcount)) return -EFAULT; - if (kp->clipcount > 2048) + if (clipcount > 2048) return -EINVAL; - if (!kp->clipcount) { - kp->clips = NULL; - return 0; - } + if (!clipcount) + return put_user(NULL, &kp->clips); - n = kp->clipcount; if (get_user(p, &up->clips)) return -EFAULT; uclips = compat_ptr(p); - kclips = compat_alloc_user_space(n * sizeof(*kclips)); - kp->clips = kclips; - while (n--) { + if (aux_space < clipcount * sizeof(*kclips)) + return -EFAULT; + kclips = aux_buf; + if (put_user(kclips, &kp->clips)) + return -EFAULT; + + while (clipcount--) { if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c))) return -EFAULT; - if (put_user(n ? kclips + 1 : NULL, &kclips->next)) + if (put_user(clipcount ? kclips + 1 : NULL, &kclips->next)) return -EFAULT; uclips++; kclips++; @@ -86,27 +98,28 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user return 0; } -static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up) +static int put_v4l2_window32(struct v4l2_window __user *kp, + struct v4l2_window32 __user *up) { struct v4l2_clip __user *kclips = kp->clips; struct v4l2_clip32 __user *uclips; - u32 n = kp->clipcount; compat_caddr_t p; + u32 clipcount; - if (copy_to_user(&up->w, &kp->w, sizeof(kp->w)) || - put_user(kp->field, &up->field) || - put_user(kp->chromakey, &up->chromakey) || - put_user(kp->clipcount, &up->clipcount) || - put_user(kp->global_alpha, &up->global_alpha)) + if (copy_in_user(&up->w, &kp->w, sizeof(kp->w)) || + assign_in_user(&up->field, &kp->field) || + assign_in_user(&up->chromakey, &kp->chromakey) || + assign_in_user(&up->global_alpha, &kp->global_alpha) || + get_user(clipcount, &kp->clipcount) || + put_user(clipcount, &up->clipcount)) return -EFAULT; - - if (!kp->clipcount) + if (!clipcount) return 0; if (get_user(p, &up->clips)) return -EFAULT; uclips = compat_ptr(p); - while (n--) { + while (clipcount--) { if (copy_in_user(&uclips->c, &kclips->c, sizeof(uclips->c))) return -EFAULT; uclips++; @@ -146,107 +159,164 @@ struct v4l2_create_buffers32 { __u32 reserved[8]; }; -static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) +static int __bufsize_v4l2_format(struct v4l2_format32 __user *up, u32 *size) { - if (get_user(kp->type, &up->type)) + u32 type; + + if (get_user(type, &up->type)) return -EFAULT; - switch (kp->type) { - case V4L2_BUF_TYPE_VIDEO_CAPTURE: - case V4L2_BUF_TYPE_VIDEO_OUTPUT: - return copy_from_user(&kp->fmt.pix, &up->fmt.pix, - sizeof(kp->fmt.pix)) ? -EFAULT : 0; - case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: - case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - return copy_from_user(&kp->fmt.pix_mp, &up->fmt.pix_mp, - sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; + switch (type) { case V4L2_BUF_TYPE_VIDEO_OVERLAY: - case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: - return get_v4l2_window32(&kp->fmt.win, &up->fmt.win); - case V4L2_BUF_TYPE_VBI_CAPTURE: - case V4L2_BUF_TYPE_VBI_OUTPUT: - return copy_from_user(&kp->fmt.vbi, &up->fmt.vbi, - sizeof(kp->fmt.vbi)) ? -EFAULT : 0; - case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: - case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - return copy_from_user(&kp->fmt.sliced, &up->fmt.sliced, - sizeof(kp->fmt.sliced)) ? -EFAULT : 0; - case V4L2_BUF_TYPE_SDR_CAPTURE: - case V4L2_BUF_TYPE_SDR_OUTPUT: - return copy_from_user(&kp->fmt.sdr, &up->fmt.sdr, - sizeof(kp->fmt.sdr)) ? -EFAULT : 0; - case V4L2_BUF_TYPE_META_CAPTURE: - return copy_from_user(&kp->fmt.meta, &up->fmt.meta, - sizeof(kp->fmt.meta)) ? -EFAULT : 0; + case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: { + u32 clipcount; + + if (get_user(clipcount, &up->fmt.win.clipcount)) + return -EFAULT; + if (clipcount > 2048) + return -EINVAL; + *size = clipcount * sizeof(struct v4l2_clip); + return 0; + } default: - return -EINVAL; + *size = 0; + return 0; } } -static int get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) +static int bufsize_v4l2_format(struct v4l2_format32 __user *up, u32 *size) { if (!access_ok(VERIFY_READ, up, sizeof(*up))) return -EFAULT; - return __get_v4l2_format32(kp, up); + return __bufsize_v4l2_format(up, size); } -static int get_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) +static int __get_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up, + void __user *aux_buf, u32 aux_space) { - if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - copy_from_user(kp, up, offsetof(struct v4l2_create_buffers32, format))) - return -EFAULT; - return __get_v4l2_format32(&kp->format, &up->format); -} + u32 type; -static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) -{ - if (put_user(kp->type, &up->type)) + if (get_user(type, &up->type) || put_user(type, &kp->type)) return -EFAULT; - switch (kp->type) { + switch (type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: - return copy_to_user(&up->fmt.pix, &kp->fmt.pix, + return copy_in_user(&kp->fmt.pix, &up->fmt.pix, sizeof(kp->fmt.pix)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: - return copy_to_user(&up->fmt.pix_mp, &kp->fmt.pix_mp, + return copy_in_user(&kp->fmt.pix_mp, &up->fmt.pix_mp, sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; case V4L2_BUF_TYPE_VIDEO_OVERLAY: case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: - return put_v4l2_window32(&kp->fmt.win, &up->fmt.win); + return get_v4l2_window32(&kp->fmt.win, &up->fmt.win, + aux_buf, aux_space); case V4L2_BUF_TYPE_VBI_CAPTURE: case V4L2_BUF_TYPE_VBI_OUTPUT: - return copy_to_user(&up->fmt.vbi, &kp->fmt.vbi, + return copy_in_user(&kp->fmt.vbi, &up->fmt.vbi, sizeof(kp->fmt.vbi)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: - return copy_to_user(&up->fmt.sliced, &kp->fmt.sliced, + return copy_in_user(&kp->fmt.sliced, &up->fmt.sliced, sizeof(kp->fmt.sliced)) ? -EFAULT : 0; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: - return copy_to_user(&up->fmt.sdr, &kp->fmt.sdr, + return copy_in_user(&kp->fmt.sdr, &up->fmt.sdr, sizeof(kp->fmt.sdr)) ? -EFAULT : 0; case V4L2_BUF_TYPE_META_CAPTURE: - return copy_to_user(&up->fmt.meta, &kp->fmt.meta, + return copy_in_user(&kp->fmt.meta, &up->fmt.meta, sizeof(kp->fmt.meta)) ? -EFAULT : 0; default: return -EINVAL; } } -static int put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) +static int get_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up, + void __user *aux_buf, u32 aux_space) +{ + if (!access_ok(VERIFY_READ, up, sizeof(*up))) + return -EFAULT; + return __get_v4l2_format32(kp, up, aux_buf, aux_space); +} + +static int bufsize_v4l2_create(struct v4l2_create_buffers32 __user *up, + u32 *size) +{ + if (!access_ok(VERIFY_READ, up, sizeof(*up))) + return -EFAULT; + return __bufsize_v4l2_format(&up->format, size); +} + +static int get_v4l2_create32(struct v4l2_create_buffers __user *kp, + struct v4l2_create_buffers32 __user *up, + void __user *aux_buf, u32 aux_space) +{ + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + copy_in_user(kp, up, + offsetof(struct v4l2_create_buffers32, format))) + return -EFAULT; + return __get_v4l2_format32(&kp->format, &up->format, + aux_buf, aux_space); +} + +static int __put_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up) +{ + u32 type; + + if (get_user(type, &kp->type)) + return -EFAULT; + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + return copy_in_user(&up->fmt.pix, &kp->fmt.pix, + sizeof(kp->fmt.pix)) ? -EFAULT : 0; + case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: + case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: + return copy_in_user(&up->fmt.pix_mp, &kp->fmt.pix_mp, + sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0; + case V4L2_BUF_TYPE_VIDEO_OVERLAY: + case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: + return put_v4l2_window32(&kp->fmt.win, &up->fmt.win); + case V4L2_BUF_TYPE_VBI_CAPTURE: + case V4L2_BUF_TYPE_VBI_OUTPUT: + return copy_in_user(&up->fmt.vbi, &kp->fmt.vbi, + sizeof(kp->fmt.vbi)) ? -EFAULT : 0; + case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: + case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: + return copy_in_user(&up->fmt.sliced, &kp->fmt.sliced, + sizeof(kp->fmt.sliced)) ? -EFAULT : 0; + case V4L2_BUF_TYPE_SDR_CAPTURE: + case V4L2_BUF_TYPE_SDR_OUTPUT: + return copy_in_user(&up->fmt.sdr, &kp->fmt.sdr, + sizeof(kp->fmt.sdr)) ? -EFAULT : 0; + case V4L2_BUF_TYPE_META_CAPTURE: + return copy_in_user(&up->fmt.meta, &kp->fmt.meta, + sizeof(kp->fmt.meta)) ? -EFAULT : 0; + default: + return -EINVAL; + } +} + +static int put_v4l2_format32(struct v4l2_format __user *kp, + struct v4l2_format32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(*up))) return -EFAULT; return __put_v4l2_format32(kp, up); } -static int put_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) +static int put_v4l2_create32(struct v4l2_create_buffers __user *kp, + struct v4l2_create_buffers32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - copy_to_user(up, kp, offsetof(struct v4l2_create_buffers32, format)) || - copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) + copy_in_user(up, kp, + offsetof(struct v4l2_create_buffers32, format)) || + copy_in_user(up->reserved, kp->reserved, sizeof(kp->reserved))) return -EFAULT; return __put_v4l2_format32(&kp->format, &up->format); } @@ -260,25 +330,27 @@ struct v4l2_standard32 { __u32 reserved[4]; }; -static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) +static int get_v4l2_standard32(struct v4l2_standard __user *kp, + struct v4l2_standard32 __user *up) { /* other fields are not set by the user, nor used by the driver */ if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - get_user(kp->index, &up->index)) + assign_in_user(&kp->index, &up->index)) return -EFAULT; return 0; } -static int put_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up) +static int put_v4l2_standard32(struct v4l2_standard __user *kp, + struct v4l2_standard32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->index, &up->index) || - put_user(kp->id, &up->id) || - copy_to_user(up->name, kp->name, sizeof(up->name)) || - copy_to_user(&up->frameperiod, &kp->frameperiod, - sizeof(kp->frameperiod)) || - put_user(kp->framelines, &up->framelines) || - copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) + assign_in_user(&up->index, &kp->index) || + assign_in_user(&up->id, &kp->id) || + copy_in_user(up->name, kp->name, sizeof(up->name)) || + copy_in_user(&up->frameperiod, &kp->frameperiod, + sizeof(up->frameperiod)) || + assign_in_user(&up->framelines, &kp->framelines) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved))) return -EFAULT; return 0; } @@ -318,11 +390,11 @@ struct v4l2_buffer32 { __u32 reserved; }; -static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, +static int get_v4l2_plane32(struct v4l2_plane __user *up, + struct v4l2_plane32 __user *up32, enum v4l2_memory memory) { - void __user *up_pln; - compat_long_t p; + compat_ulong_t p; if (copy_in_user(up, up32, 2 * sizeof(__u32)) || copy_in_user(&up->data_offset, &up32->data_offset, @@ -337,10 +409,8 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return -EFAULT; break; case V4L2_MEMORY_USERPTR: - if (get_user(p, &up32->m.userptr)) - return -EFAULT; - up_pln = compat_ptr(p); - if (put_user((unsigned long)up_pln, &up->m.userptr)) + if (get_user(p, &up32->m.userptr) || + put_user((unsigned long)compat_ptr(p), &up->m.userptr)) return -EFAULT; break; case V4L2_MEMORY_DMABUF: @@ -352,7 +422,8 @@ static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return 0; } -static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32, +static int put_v4l2_plane32(struct v4l2_plane __user *up, + struct v4l2_plane32 __user *up32, enum v4l2_memory memory) { unsigned long p; @@ -376,8 +447,7 @@ static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return -EFAULT; break; case V4L2_MEMORY_DMABUF: - if (copy_in_user(&up32->m.fd, &up->m.fd, - sizeof(up->m.fd))) + if (copy_in_user(&up32->m.fd, &up->m.fd, sizeof(up->m.fd))) return -EFAULT; break; } @@ -385,79 +455,121 @@ static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __ return 0; } -static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user *up) +static int bufsize_v4l2_buffer(struct v4l2_buffer32 __user *up, u32 *size) { + u32 type; + u32 length; + + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + get_user(type, &up->type) || + get_user(length, &up->length)) + return -EFAULT; + + if (V4L2_TYPE_IS_MULTIPLANAR(type)) { + if (length > VIDEO_MAX_PLANES) + return -EINVAL; + + /* + * We don't really care if userspace decides to kill itself + * by passing a very big length value + */ + *size = length * sizeof(struct v4l2_plane); + } else { + *size = 0; + } + return 0; +} + +static int get_v4l2_buffer32(struct v4l2_buffer __user *kp, + struct v4l2_buffer32 __user *up, + void __user *aux_buf, u32 aux_space) +{ + u32 type; + u32 length; + enum v4l2_memory memory; struct v4l2_plane32 __user *uplane32; struct v4l2_plane __user *uplane; compat_caddr_t p; int ret; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - get_user(kp->index, &up->index) || - get_user(kp->type, &up->type) || - get_user(kp->flags, &up->flags) || - get_user(kp->memory, &up->memory) || - get_user(kp->length, &up->length)) + assign_in_user(&kp->index, &up->index) || + get_user(type, &up->type) || + put_user(type, &kp->type) || + assign_in_user(&kp->flags, &up->flags) || + get_user(memory, &up->memory) || + put_user(memory, &kp->memory) || + get_user(length, &up->length) || + put_user(length, &kp->length)) return -EFAULT; - if (V4L2_TYPE_IS_OUTPUT(kp->type)) - if (get_user(kp->bytesused, &up->bytesused) || - get_user(kp->field, &up->field) || - get_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - get_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec)) + if (V4L2_TYPE_IS_OUTPUT(type)) + if (assign_in_user(&kp->bytesused, &up->bytesused) || + assign_in_user(&kp->field, &up->field) || + assign_in_user(&kp->timestamp.tv_sec, + &up->timestamp.tv_sec) || + assign_in_user(&kp->timestamp.tv_usec, + &up->timestamp.tv_usec)) return -EFAULT; - if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { - unsigned int num_planes; + if (V4L2_TYPE_IS_MULTIPLANAR(type)) { + u32 num_planes = length; - if (kp->length == 0) { - kp->m.planes = NULL; - /* num_planes == 0 is legal, e.g. when userspace doesn't - * need planes array on DQBUF*/ - return 0; - } else if (kp->length > VIDEO_MAX_PLANES) { - return -EINVAL; + if (num_planes == 0) { + /* + * num_planes == 0 is legal, e.g. when userspace doesn't + * need planes array on DQBUF + */ + return put_user(NULL, &kp->m.planes); } + if (num_planes > VIDEO_MAX_PLANES) + return -EINVAL; if (get_user(p, &up->m.planes)) return -EFAULT; uplane32 = compat_ptr(p); if (!access_ok(VERIFY_READ, uplane32, - kp->length * sizeof(*uplane32))) + num_planes * sizeof(*uplane32))) return -EFAULT; - /* We don't really care if userspace decides to kill itself - * by passing a very big num_planes value */ - uplane = compat_alloc_user_space(kp->length * sizeof(*uplane)); - kp->m.planes = (__force struct v4l2_plane *)uplane; + /* + * We don't really care if userspace decides to kill itself + * by passing a very big num_planes value + */ + if (aux_space < num_planes * sizeof(*uplane)) + return -EFAULT; - for (num_planes = 0; num_planes < kp->length; num_planes++) { - ret = get_v4l2_plane32(uplane, uplane32, kp->memory); + uplane = aux_buf; + if (put_user((__force struct v4l2_plane *)uplane, + &kp->m.planes)) + return -EFAULT; + + while (num_planes--) { + ret = get_v4l2_plane32(uplane, uplane32, memory); if (ret) return ret; - ++uplane; - ++uplane32; + uplane++; + uplane32++; } } else { - switch (kp->memory) { + switch (memory) { case V4L2_MEMORY_MMAP: case V4L2_MEMORY_OVERLAY: - if (get_user(kp->m.offset, &up->m.offset)) + if (assign_in_user(&kp->m.offset, &up->m.offset)) return -EFAULT; break; - case V4L2_MEMORY_USERPTR: - { - compat_long_t tmp; + case V4L2_MEMORY_USERPTR: { + compat_ulong_t userptr; - if (get_user(tmp, &up->m.userptr)) - return -EFAULT; - - kp->m.userptr = (unsigned long)compat_ptr(tmp); - } + if (get_user(userptr, &up->m.userptr) || + put_user((unsigned long)compat_ptr(userptr), + &kp->m.userptr)) + return -EFAULT; break; + } case V4L2_MEMORY_DMABUF: - if (get_user(kp->m.fd, &up->m.fd)) + if (assign_in_user(&kp->m.fd, &up->m.fd)) return -EFAULT; break; } @@ -466,62 +578,70 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user return 0; } -static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user *up) +static int put_v4l2_buffer32(struct v4l2_buffer __user *kp, + struct v4l2_buffer32 __user *up) { + u32 type; + u32 length; + enum v4l2_memory memory; struct v4l2_plane32 __user *uplane32; struct v4l2_plane __user *uplane; compat_caddr_t p; - int num_planes; int ret; if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->index, &up->index) || - put_user(kp->type, &up->type) || - put_user(kp->flags, &up->flags) || - put_user(kp->memory, &up->memory)) + assign_in_user(&up->index, &kp->index) || + get_user(type, &kp->type) || + put_user(type, &up->type) || + assign_in_user(&up->flags, &kp->flags) || + get_user(memory, &kp->memory) || + put_user(memory, &up->memory)) return -EFAULT; - if (put_user(kp->bytesused, &up->bytesused) || - put_user(kp->field, &up->field) || - put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - put_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec) || - copy_to_user(&up->timecode, &kp->timecode, sizeof(kp->timecode)) || - put_user(kp->sequence, &up->sequence) || - put_user(kp->reserved2, &up->reserved2) || - put_user(kp->reserved, &up->reserved) || - put_user(kp->length, &up->length)) + if (assign_in_user(&up->bytesused, &kp->bytesused) || + assign_in_user(&up->field, &kp->field) || + assign_in_user(&up->timestamp.tv_sec, &kp->timestamp.tv_sec) || + assign_in_user(&up->timestamp.tv_usec, &kp->timestamp.tv_usec) || + copy_in_user(&up->timecode, &kp->timecode, sizeof(kp->timecode)) || + assign_in_user(&up->sequence, &kp->sequence) || + assign_in_user(&up->reserved2, &kp->reserved2) || + assign_in_user(&up->reserved, &kp->reserved) || + get_user(length, &kp->length) || + put_user(length, &up->length)) return -EFAULT; - if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) { - num_planes = kp->length; + if (V4L2_TYPE_IS_MULTIPLANAR(type)) { + u32 num_planes = length; + if (num_planes == 0) return 0; - uplane = (__force struct v4l2_plane __user *)kp->m.planes; + if (get_user(uplane, ((__force struct v4l2_plane __user **)&kp->m.planes))) + return -EFAULT; if (get_user(p, &up->m.planes)) return -EFAULT; uplane32 = compat_ptr(p); - while (--num_planes >= 0) { - ret = put_v4l2_plane32(uplane, uplane32, kp->memory); + while (num_planes--) { + ret = put_v4l2_plane32(uplane, uplane32, memory); if (ret) return ret; ++uplane; ++uplane32; } } else { - switch (kp->memory) { + switch (memory) { case V4L2_MEMORY_MMAP: case V4L2_MEMORY_OVERLAY: - if (put_user(kp->m.offset, &up->m.offset)) + if (assign_in_user(&up->m.offset, &kp->m.offset)) return -EFAULT; break; case V4L2_MEMORY_USERPTR: - if (put_user(kp->m.userptr, &up->m.userptr)) + if (assign_in_user(&up->m.userptr, &kp->m.userptr)) return -EFAULT; break; case V4L2_MEMORY_DMABUF: - if (put_user(kp->m.fd, &up->m.fd)) + if (assign_in_user(&up->m.fd, &kp->m.fd)) return -EFAULT; break; } @@ -546,29 +666,32 @@ struct v4l2_framebuffer32 { } fmt; }; -static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_framebuffer32 __user *up) +static int get_v4l2_framebuffer32(struct v4l2_framebuffer __user *kp, + struct v4l2_framebuffer32 __user *up) { - u32 tmp; + compat_caddr_t tmp; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || get_user(tmp, &up->base) || - get_user(kp->capability, &up->capability) || - get_user(kp->flags, &up->flags) || - copy_from_user(&kp->fmt, &up->fmt, sizeof(up->fmt))) + put_user((__force void *)compat_ptr(tmp), &kp->base) || + assign_in_user(&kp->capability, &up->capability) || + assign_in_user(&kp->flags, &up->flags) || + copy_in_user(&kp->fmt, &up->fmt, sizeof(kp->fmt))) return -EFAULT; - kp->base = (__force void *)compat_ptr(tmp); return 0; } -static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_framebuffer32 __user *up) +static int put_v4l2_framebuffer32(struct v4l2_framebuffer __user *kp, + struct v4l2_framebuffer32 __user *up) { - u32 tmp = (u32)((unsigned long)kp->base); + void *base; if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(tmp, &up->base) || - put_user(kp->capability, &up->capability) || - put_user(kp->flags, &up->flags) || - copy_to_user(&up->fmt, &kp->fmt, sizeof(up->fmt))) + get_user(base, &kp->base) || + put_user(ptr_to_compat(base), &up->base) || + assign_in_user(&up->capability, &kp->capability) || + assign_in_user(&up->flags, &kp->flags) || + copy_in_user(&up->fmt, &kp->fmt, sizeof(kp->fmt))) return -EFAULT; return 0; } @@ -585,18 +708,22 @@ struct v4l2_input32 { __u32 reserved[3]; }; -/* The 64-bit v4l2_input struct has extra padding at the end of the struct. - Otherwise it is identical to the 32-bit version. */ -static inline int get_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up) +/* + * The 64-bit v4l2_input struct has extra padding at the end of the struct. + * Otherwise it is identical to the 32-bit version. + */ +static inline int get_v4l2_input32(struct v4l2_input __user *kp, + struct v4l2_input32 __user *up) { - if (copy_from_user(kp, up, sizeof(*up))) + if (copy_in_user(kp, up, sizeof(*up))) return -EFAULT; return 0; } -static inline int put_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up) +static inline int put_v4l2_input32(struct v4l2_input __user *kp, + struct v4l2_input32 __user *up) { - if (copy_to_user(up, kp, sizeof(*up))) + if (copy_in_user(up, kp, sizeof(*up))) return -EFAULT; return 0; } @@ -650,41 +777,64 @@ static inline bool ctrl_is_pointer(struct file *file, u32 id) (qec.flags & V4L2_CTRL_FLAG_HAS_PAYLOAD); } +static int bufsize_v4l2_ext_controls(struct v4l2_ext_controls32 __user *up, + u32 *size) +{ + u32 count; + + if (!access_ok(VERIFY_READ, up, sizeof(*up)) || + get_user(count, &up->count)) + return -EFAULT; + if (count > V4L2_CID_MAX_CTRLS) + return -EINVAL; + *size = count * sizeof(struct v4l2_ext_control); + return 0; +} + static int get_v4l2_ext_controls32(struct file *file, - struct v4l2_ext_controls *kp, - struct v4l2_ext_controls32 __user *up) + struct v4l2_ext_controls __user *kp, + struct v4l2_ext_controls32 __user *up, + void __user *aux_buf, u32 aux_space) { struct v4l2_ext_control32 __user *ucontrols; struct v4l2_ext_control __user *kcontrols; - unsigned int n; + u32 count; + u32 n; compat_caddr_t p; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - get_user(kp->which, &up->which) || - get_user(kp->count, &up->count) || - get_user(kp->error_idx, &up->error_idx) || - copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) + assign_in_user(&kp->which, &up->which) || + get_user(count, &up->count) || + put_user(count, &kp->count) || + assign_in_user(&kp->error_idx, &up->error_idx) || + copy_in_user(kp->reserved, up->reserved, sizeof(kp->reserved))) return -EFAULT; - if (kp->count == 0) { - kp->controls = NULL; - return 0; - } else if (kp->count > V4L2_CID_MAX_CTRLS) { + + if (count == 0) + return put_user(NULL, &kp->controls); + if (count > V4L2_CID_MAX_CTRLS) return -EINVAL; - } if (get_user(p, &up->controls)) return -EFAULT; ucontrols = compat_ptr(p); - if (!access_ok(VERIFY_READ, ucontrols, kp->count * sizeof(*ucontrols))) + if (!access_ok(VERIFY_READ, ucontrols, count * sizeof(*ucontrols))) return -EFAULT; - kcontrols = compat_alloc_user_space(kp->count * sizeof(*kcontrols)); - kp->controls = (__force struct v4l2_ext_control *)kcontrols; - for (n = 0; n < kp->count; n++) { + if (aux_space < count * sizeof(*kcontrols)) + return -EFAULT; + kcontrols = aux_buf; + if (put_user((__force struct v4l2_ext_control *)kcontrols, + &kp->controls)) + return -EFAULT; + + for (n = 0; n < count; n++) { u32 id; if (copy_in_user(kcontrols, ucontrols, sizeof(*ucontrols))) return -EFAULT; + if (get_user(id, &kcontrols->id)) return -EFAULT; + if (ctrl_is_pointer(file, id)) { void __user *s; @@ -701,43 +851,54 @@ static int get_v4l2_ext_controls32(struct file *file, } static int put_v4l2_ext_controls32(struct file *file, - struct v4l2_ext_controls *kp, + struct v4l2_ext_controls __user *kp, struct v4l2_ext_controls32 __user *up) { struct v4l2_ext_control32 __user *ucontrols; - struct v4l2_ext_control __user *kcontrols = - (__force struct v4l2_ext_control __user *)kp->controls; - int n = kp->count; + struct v4l2_ext_control __user *kcontrols; + u32 count; + u32 n; compat_caddr_t p; if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->which, &up->which) || - put_user(kp->count, &up->count) || - put_user(kp->error_idx, &up->error_idx) || - copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) + assign_in_user(&up->which, &kp->which) || + get_user(count, &kp->count) || + put_user(count, &up->count) || + assign_in_user(&up->error_idx, &kp->error_idx) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved)) || + get_user(kcontrols, &kp->controls)) return -EFAULT; - if (!kp->count) - return 0; + if (!count) + return 0; if (get_user(p, &up->controls)) return -EFAULT; ucontrols = compat_ptr(p); - if (!access_ok(VERIFY_WRITE, ucontrols, n * sizeof(*ucontrols))) + if (!access_ok(VERIFY_WRITE, ucontrols, count * sizeof(*ucontrols))) return -EFAULT; - while (--n >= 0) { - unsigned size = sizeof(*ucontrols); + for (n = 0; n < count; n++) { + unsigned int size = sizeof(*ucontrols); u32 id; - if (get_user(id, &kcontrols->id)) + if (get_user(id, &kcontrols->id) || + put_user(id, &ucontrols->id) || + assign_in_user(&ucontrols->size, &kcontrols->size) || + copy_in_user(&ucontrols->reserved2, &kcontrols->reserved2, + sizeof(ucontrols->reserved2))) return -EFAULT; - /* Do not modify the pointer when copying a pointer control. - The contents of the pointer was changed, not the pointer - itself. */ + + /* + * Do not modify the pointer when copying a pointer control. + * The contents of the pointer was changed, not the pointer + * itself. + */ if (ctrl_is_pointer(file, id)) size -= sizeof(ucontrols->value64); + if (copy_in_user(ucontrols, kcontrols, size)) return -EFAULT; + ucontrols++; kcontrols++; } @@ -757,17 +918,18 @@ struct v4l2_event32 { __u32 reserved[8]; }; -static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *up) +static int put_v4l2_event32(struct v4l2_event __user *kp, + struct v4l2_event32 __user *up) { if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->type, &up->type) || - copy_to_user(&up->u, &kp->u, sizeof(kp->u)) || - put_user(kp->pending, &up->pending) || - put_user(kp->sequence, &up->sequence) || - put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) || - put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) || - put_user(kp->id, &up->id) || - copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved))) + assign_in_user(&up->type, &kp->type) || + copy_in_user(&up->u, &kp->u, sizeof(kp->u)) || + assign_in_user(&up->pending, &kp->pending) || + assign_in_user(&up->sequence, &kp->sequence) || + assign_in_user(&up->timestamp.tv_sec, &kp->timestamp.tv_sec) || + assign_in_user(&up->timestamp.tv_nsec, &kp->timestamp.tv_nsec) || + assign_in_user(&up->id, &kp->id) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved))) return -EFAULT; return 0; } @@ -780,31 +942,34 @@ struct v4l2_edid32 { compat_caddr_t edid; }; -static int get_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) +static int get_v4l2_edid32(struct v4l2_edid __user *kp, + struct v4l2_edid32 __user *up) { - u32 tmp; + compat_uptr_t tmp; if (!access_ok(VERIFY_READ, up, sizeof(*up)) || - get_user(kp->pad, &up->pad) || - get_user(kp->start_block, &up->start_block) || - get_user(kp->blocks, &up->blocks) || + assign_in_user(&kp->pad, &up->pad) || + assign_in_user(&kp->start_block, &up->start_block) || + assign_in_user(&kp->blocks, &up->blocks) || get_user(tmp, &up->edid) || - copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved))) + put_user(compat_ptr(tmp), &kp->edid) || + copy_in_user(kp->reserved, up->reserved, sizeof(kp->reserved))) return -EFAULT; - kp->edid = (__force u8 *)compat_ptr(tmp); return 0; } -static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) +static int put_v4l2_edid32(struct v4l2_edid __user *kp, + struct v4l2_edid32 __user *up) { - u32 tmp = (u32)((unsigned long)kp->edid); + void *edid; if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) || - put_user(kp->pad, &up->pad) || - put_user(kp->start_block, &up->start_block) || - put_user(kp->blocks, &up->blocks) || - put_user(tmp, &up->edid) || - copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved))) + assign_in_user(&up->pad, &kp->pad) || + assign_in_user(&up->start_block, &kp->start_block) || + assign_in_user(&up->blocks, &kp->blocks) || + get_user(edid, &kp->edid) || + put_user(ptr_to_compat(edid), &up->edid) || + copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved))) return -EFAULT; return 0; } @@ -837,22 +1002,23 @@ static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up) #define VIDIOC_G_OUTPUT32 _IOR ('V', 46, s32) #define VIDIOC_S_OUTPUT32 _IOWR('V', 47, s32) +static int alloc_userspace(unsigned int size, u32 aux_space, + void __user **up_native) +{ + *up_native = compat_alloc_user_space(size + aux_space); + if (!*up_native) + return -ENOMEM; + if (clear_user(*up_native, size)) + return -EFAULT; + return 0; +} + static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - union { - struct v4l2_format v2f; - struct v4l2_buffer v2b; - struct v4l2_framebuffer v2fb; - struct v4l2_input v2i; - struct v4l2_standard v2s; - struct v4l2_ext_controls v2ecs; - struct v4l2_event v2ev; - struct v4l2_create_buffers v2crt; - struct v4l2_edid v2edid; - unsigned long vx; - int vi; - } karg; void __user *up = compat_ptr(arg); + void __user *up_native = NULL; + void __user *aux_buf; + u32 aux_space; int compatible_arg = 1; long err = 0; @@ -891,30 +1057,52 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_STREAMOFF: case VIDIOC_S_INPUT: case VIDIOC_S_OUTPUT: - err = get_user(karg.vi, (s32 __user *)up); + err = alloc_userspace(sizeof(unsigned int), 0, &up_native); + if (!err && assign_in_user((unsigned int __user *)up_native, + (compat_uint_t __user *)up)) + err = -EFAULT; compatible_arg = 0; break; case VIDIOC_G_INPUT: case VIDIOC_G_OUTPUT: + err = alloc_userspace(sizeof(unsigned int), 0, &up_native); compatible_arg = 0; break; case VIDIOC_G_EDID: case VIDIOC_S_EDID: - err = get_v4l2_edid32(&karg.v2edid, up); + err = alloc_userspace(sizeof(struct v4l2_edid), 0, &up_native); + if (!err) + err = get_v4l2_edid32(up_native, up); compatible_arg = 0; break; case VIDIOC_G_FMT: case VIDIOC_S_FMT: case VIDIOC_TRY_FMT: - err = get_v4l2_format32(&karg.v2f, up); + err = bufsize_v4l2_format(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_format), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_format); + err = get_v4l2_format32(up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; case VIDIOC_CREATE_BUFS: - err = get_v4l2_create32(&karg.v2crt, up); + err = bufsize_v4l2_create(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_create_buffers), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_create_buffers); + err = get_v4l2_create32(up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; @@ -922,36 +1110,63 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_QUERYBUF: case VIDIOC_QBUF: case VIDIOC_DQBUF: - err = get_v4l2_buffer32(&karg.v2b, up); + err = bufsize_v4l2_buffer(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_buffer), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_buffer); + err = get_v4l2_buffer32(up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; case VIDIOC_S_FBUF: - err = get_v4l2_framebuffer32(&karg.v2fb, up); + err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0, + &up_native); + if (!err) + err = get_v4l2_framebuffer32(up_native, up); compatible_arg = 0; break; case VIDIOC_G_FBUF: + err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0, + &up_native); compatible_arg = 0; break; case VIDIOC_ENUMSTD: - err = get_v4l2_standard32(&karg.v2s, up); + err = alloc_userspace(sizeof(struct v4l2_standard), 0, + &up_native); + if (!err) + err = get_v4l2_standard32(up_native, up); compatible_arg = 0; break; case VIDIOC_ENUMINPUT: - err = get_v4l2_input32(&karg.v2i, up); + err = alloc_userspace(sizeof(struct v4l2_input), 0, &up_native); + if (!err) + err = get_v4l2_input32(up_native, up); compatible_arg = 0; break; case VIDIOC_G_EXT_CTRLS: case VIDIOC_S_EXT_CTRLS: case VIDIOC_TRY_EXT_CTRLS: - err = get_v4l2_ext_controls32(file, &karg.v2ecs, up); + err = bufsize_v4l2_ext_controls(up, &aux_space); + if (!err) + err = alloc_userspace(sizeof(struct v4l2_ext_controls), + aux_space, &up_native); + if (!err) { + aux_buf = up_native + sizeof(struct v4l2_ext_controls); + err = get_v4l2_ext_controls32(file, up_native, up, + aux_buf, aux_space); + } compatible_arg = 0; break; case VIDIOC_DQEVENT: + err = alloc_userspace(sizeof(struct v4l2_event), 0, &up_native); compatible_arg = 0; break; } @@ -960,29 +1175,26 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar if (compatible_arg) err = native_ioctl(file, cmd, (unsigned long)up); - else { - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - err = native_ioctl(file, cmd, (unsigned long)&karg); - set_fs(old_fs); - } + else + err = native_ioctl(file, cmd, (unsigned long)up_native); if (err == -ENOTTY) return err; - /* Special case: even after an error we need to put the - results back for these ioctls since the error_idx will - contain information on which control failed. */ + /* + * Special case: even after an error we need to put the + * results back for these ioctls since the error_idx will + * contain information on which control failed. + */ switch (cmd) { case VIDIOC_G_EXT_CTRLS: case VIDIOC_S_EXT_CTRLS: case VIDIOC_TRY_EXT_CTRLS: - if (put_v4l2_ext_controls32(file, &karg.v2ecs, up)) + if (put_v4l2_ext_controls32(file, up_native, up)) err = -EFAULT; break; case VIDIOC_S_EDID: - if (put_v4l2_edid32(&karg.v2edid, up)) + if (put_v4l2_edid32(up_native, up)) err = -EFAULT; break; } @@ -994,44 +1206,46 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar case VIDIOC_S_OUTPUT: case VIDIOC_G_INPUT: case VIDIOC_G_OUTPUT: - err = put_user(((s32)karg.vi), (s32 __user *)up); + if (assign_in_user((compat_uint_t __user *)up, + ((unsigned int __user *)up_native))) + err = -EFAULT; break; case VIDIOC_G_FBUF: - err = put_v4l2_framebuffer32(&karg.v2fb, up); + err = put_v4l2_framebuffer32(up_native, up); break; case VIDIOC_DQEVENT: - err = put_v4l2_event32(&karg.v2ev, up); + err = put_v4l2_event32(up_native, up); break; case VIDIOC_G_EDID: - err = put_v4l2_edid32(&karg.v2edid, up); + err = put_v4l2_edid32(up_native, up); break; case VIDIOC_G_FMT: case VIDIOC_S_FMT: case VIDIOC_TRY_FMT: - err = put_v4l2_format32(&karg.v2f, up); + err = put_v4l2_format32(up_native, up); break; case VIDIOC_CREATE_BUFS: - err = put_v4l2_create32(&karg.v2crt, up); + err = put_v4l2_create32(up_native, up); break; case VIDIOC_PREPARE_BUF: case VIDIOC_QUERYBUF: case VIDIOC_QBUF: case VIDIOC_DQBUF: - err = put_v4l2_buffer32(&karg.v2b, up); + err = put_v4l2_buffer32(up_native, up); break; case VIDIOC_ENUMSTD: - err = put_v4l2_standard32(&karg.v2s, up); + err = put_v4l2_standard32(up_native, up); break; case VIDIOC_ENUMINPUT: - err = put_v4l2_input32(&karg.v2i, up); + err = put_v4l2_input32(up_native, up); break; } return err; From 1a4834092539674b68f03f9b4f13f48c6b424a27 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 14 Feb 2018 12:44:34 +0100 Subject: [PATCH 378/770] media: v4l2-compat-ioctl32.c: make ctrl_is_pointer work for subdevs commit 273caa260035c03d89ad63d72d8cd3d9e5c5e3f1 upstream. If the device is of type VFL_TYPE_SUBDEV then vdev->ioctl_ops is NULL so the 'if (!ops->vidioc_query_ext_ctrl)' check would crash. Add a test for !ops to the condition. All sub-devices that have controls will use the control framework, so they do not have an equivalent to ops->vidioc_query_ext_ctrl. Returning false if ops is NULL is the correct thing to do here. Fixes: b8c601e8af ("v4l2-compat-ioctl32.c: fix ctrl_is_pointer") Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Reported-by: Laurent Pinchart Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index ec4a69728ed4..cbeea8343a5c 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -770,7 +770,7 @@ static inline bool ctrl_is_pointer(struct file *file, u32 id) return ctrl && ctrl->is_ptr; } - if (!ops->vidioc_query_ext_ctrl) + if (!ops || !ops->vidioc_query_ext_ctrl) return false; return !ops->vidioc_query_ext_ctrl(file, fh, &qec) && From f428567e005fa2a7f630266cb48f2036d7f01698 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Mon, 5 Feb 2018 11:15:52 +0200 Subject: [PATCH 379/770] crypto: caam - fix endless loop when DECO acquire fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 225ece3e7dad4cfc44cca38ce7a3a80f255ea8f1 upstream. In case DECO0 cannot be acquired - i.e. run_descriptor_deco0() fails with -ENODEV, caam_probe() enters an endless loop: run_descriptor_deco0 ret -ENODEV -> instantiate_rng -ENODEV, overwritten by -EAGAIN ret -EAGAIN -> caam_probe -EAGAIN results in endless loop It turns out the error path in instantiate_rng() is incorrect, the checks are done in the wrong order. Fixes: 1005bccd7a4a6 ("crypto: caam - enable instantiation of all RNG4 state handles") Reported-by: Bryan O'Donoghue Suggested-by: Auer Lukas Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/ctrl.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 027e121c6f70..e1d4ae1153c4 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -228,12 +228,16 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, * without any error (HW optimizations for later * CAAM eras), then try again. */ - rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK; - if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) || - !(rdsta_val & (1 << sh_idx))) - ret = -EAGAIN; if (ret) break; + + rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK; + if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) || + !(rdsta_val & (1 << sh_idx))) { + ret = -EAGAIN; + break; + } + dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx); /* Clear the contents before recreating the descriptor */ memset(desc, 0x00, CAAM_CMD_SZ * 7); From f37a798e7724b2b8c008a45e7604bc2cc75870d2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 24 Jan 2018 00:31:27 -0800 Subject: [PATCH 380/770] crypto: sha512-mb - initialize pending lengths correctly commit eff84b379089cd8b4e83599639c1f5f6e34ef7bf upstream. The SHA-512 multibuffer code keeps track of the number of blocks pending in each lane. The minimum of these values is used to identify the next lane that will be completed. Unused lanes are set to a large number (0xFFFFFFFF) so that they don't affect this calculation. However, it was forgotten to set the lengths to this value in the initial state, where all lanes are unused. As a result it was possible for sha512_mb_mgr_get_comp_job_avx2() to select an unused lane, causing a NULL pointer dereference. Specifically this could happen in the case where ->update() was passed fewer than SHA512_BLOCK_SIZE bytes of data, so it then called sha_complete_job() without having actually submitted any blocks to the multi-buffer code. This hit a NULL pointer dereference if another task happened to have submitted blocks concurrently to the same CPU and the flush timer had not yet expired. Fix this by initializing sha512_mb_mgr->lens correctly. As usual, this bug was found by syzkaller. Fixes: 45691e2d9b18 ("crypto: sha512-mb - submit/flush routines for AVX2") Reported-by: syzbot Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c index 36870b26067a..d08805032f01 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c @@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state) { unsigned int j; - state->lens[0] = 0; - state->lens[1] = 1; - state->lens[2] = 2; - state->lens[3] = 3; + /* initially all lanes are unused */ + state->lens[0] = 0xFFFFFFFF00000000; + state->lens[1] = 0xFFFFFFFF00000001; + state->lens[2] = 0xFFFFFFFF00000002; + state->lens[3] = 0xFFFFFFFF00000003; + state->unused_lanes = 0xFF03020100; for (j = 0; j < 4; j++) state->ldata[j].job_in_lane = NULL; From 7dffdb31ad13e37c3037185d3ac04a5e44ebacbd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Feb 2018 17:56:06 +0000 Subject: [PATCH 381/770] arm: KVM: Fix SMCCC handling of unimplemented SMC/HVC calls commit 20e8175d246e9f9deb377f2784b3e7dfb2ad3e86 upstream. KVM doesn't follow the SMCCC when it comes to unimplemented calls, and inject an UNDEF instead of returning an error. Since firmware calls are now used for security mitigation, they are becoming more common, and the undef is counter productive. Instead, let's follow the SMCCC which states that -1 must be returned to the caller when getting an unknown function number. Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/handle_exit.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 76be5701f9d6..910bd8dabb3c 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -38,7 +38,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_hvc_call_handler(vcpu); if (ret < 0) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } @@ -47,7 +47,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - kvm_inject_undefined(vcpu); + /* + * "If an SMC instruction executed at Non-secure EL1 is + * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a + * Trap exception, not a Secure Monitor Call exception [...]" + * + * We need to advance the PC after the trap, as it would + * otherwise return to the same address... + */ + vcpu_set_reg(vcpu, 0, ~0UL); + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 1; } From 6bad51166f875751657d11b9ff9cd79540302c90 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Thu, 9 Nov 2017 20:27:20 +0200 Subject: [PATCH 382/770] KVM: nVMX: Fix races when sending nested PI while dest enters/leaves L2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6b6977117f50d60455ace86b2d256f6fb4f3de05 upstream. Consider the following scenario: 1. CPU A calls vmx_deliver_nested_posted_interrupt() to send an IPI to CPU B via virtual posted-interrupt mechanism. 2. CPU B is currently executing L2 guest. 3. vmx_deliver_nested_posted_interrupt() calls kvm_vcpu_trigger_posted_interrupt() which will note that vcpu->mode == IN_GUEST_MODE. 4. Assume that before CPU A sends the physical POSTED_INTR_NESTED_VECTOR IPI, CPU B exits from L2 to L0 during event-delivery (valid IDT-vectoring-info). 5. CPU A now sends the physical IPI. The IPI is received in host and it's handler (smp_kvm_posted_intr_nested_ipi()) does nothing. 6. Assume that before CPU A sets pi_pending=true and KVM_REQ_EVENT, CPU B continues to run in L0 and reach vcpu_enter_guest(). As KVM_REQ_EVENT is not set yet, vcpu_enter_guest() will continue and resume L2 guest. 7. At this point, CPU A sets pi_pending=true and KVM_REQ_EVENT but it's too late! CPU B already entered L2 and KVM_REQ_EVENT will only be consumed at next L2 entry! Another scenario to consider: 1. CPU A calls vmx_deliver_nested_posted_interrupt() to send an IPI to CPU B via virtual posted-interrupt mechanism. 2. Assume that before CPU A calls kvm_vcpu_trigger_posted_interrupt(), CPU B is at L0 and is about to resume into L2. Further assume that it is in vcpu_enter_guest() after check for KVM_REQ_EVENT. 3. At this point, CPU A calls kvm_vcpu_trigger_posted_interrupt() which will note that vcpu->mode != IN_GUEST_MODE. Therefore, do nothing and return false. Then, will set pi_pending=true and KVM_REQ_EVENT. 4. Now CPU B continue and resumes into L2 guest without processing the posted-interrupt until next L2 entry! To fix both issues, we just need to change vmx_deliver_nested_posted_interrupt() to set pi_pending=true and KVM_REQ_EVENT before calling kvm_vcpu_trigger_posted_interrupt(). It will fix the first scenario by chaging step (6) to note that KVM_REQ_EVENT and pi_pending=true and therefore process nested posted-interrupt. It will fix the second scenario by two possible ways: 1. If kvm_vcpu_trigger_posted_interrupt() is called while CPU B has changed vcpu->mode to IN_GUEST_MODE, physical IPI will be sent and will be received when CPU resumes into L2. 2. If kvm_vcpu_trigger_posted_interrupt() is called while CPU B hasn't yet changed vcpu->mode to IN_GUEST_MODE, then after CPU B will change vcpu->mode it will call kvm_request_pending() which will return true and therefore force another round of vcpu_enter_guest() which will note that KVM_REQ_EVENT and pi_pending=true and therefore process nested posted-interrupt. Fixes: 705699a13994 ("KVM: nVMX: Enable nested posted interrupt processing") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Krish Sadhukhan [Add kvm_vcpu_kick to also handle the case where L1 doesn't intercept L2 HLT and L2 executes HLT instruction. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0ae4b1a86168..75a273bc1c52 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5322,14 +5322,15 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, if (is_guest_mode(vcpu) && vector == vmx->nested.posted_intr_nv) { - /* the PIR and ON have been set by L1. */ - kvm_vcpu_trigger_posted_interrupt(vcpu, true); /* * If a posted intr is not recognized by hardware, * we will accomplish it in the next vmentry. */ vmx->nested.pi_pending = true; kvm_make_request(KVM_REQ_EVENT, vcpu); + /* the PIR and ON have been set by L1. */ + if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true)) + kvm_vcpu_kick(vcpu); return 0; } return -1; From fe90a3a6f88179dfa427a758dbef47cb371b6be7 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sun, 19 Nov 2017 18:25:43 +0200 Subject: [PATCH 383/770] KVM: nVMX: Fix bug of injecting L2 exception into L1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5c7d4f9ad39d980728b39752304ce10bb2960cbf upstream. kvm_clear_exception_queue() should clear pending exception. This also includes exceptions which were only marked pending but not yet injected. This is because exception.pending is used for both L1 and L2 to determine if an exception should be raised to guest. Note that an exception which is pending but not yet injected will be raised again once the guest will be resumed. Consider the following scenario: 1) L0 KVM with ignore_msrs=false. 2) L1 prepare vmcs12 with the following: a) No intercepts on MSR (MSR_BITMAP exist and is filled with 0). b) No intercept for #GP. c) vmx-preemption-timer is configured. 3) L1 enters into L2. 4) L2 reads an unhandled MSR that exists in MSR_BITMAP (such as 0x1fff). L2 RDMSR could be handled as described below: 1) L2 exits to L0 on RDMSR and calls handle_rdmsr(). 2) handle_rdmsr() calls kvm_inject_gp() which sets KVM_REQ_EVENT, exception.pending=true and exception.injected=false. 3) vcpu_enter_guest() consumes KVM_REQ_EVENT and calls inject_pending_event() which calls vmx_check_nested_events() which sees that exception.pending=true but nested_vmx_check_exception() returns 0 and therefore does nothing at this point. However let's assume it later sees vmx-preemption-timer expired and therefore exits from L2 to L1 by calling nested_vmx_vmexit(). 4) nested_vmx_vmexit() calls prepare_vmcs12() which calls vmcs12_save_pending_event() but it does nothing as exception.injected is false. Also prepare_vmcs12() calls kvm_clear_exception_queue() which does nothing as exception.injected is already false. 5) We now return from vmx_check_nested_events() with 0 while still having exception.pending=true! 6) Therefore inject_pending_event() continues and we inject L2 exception to L1!... This commit will fix above issue by changing step (4) to clear exception.pending in kvm_clear_exception_queue(). Fixes: 664f8e26b00c ("KVM: X86: Fix loss of exception which has not yet been injected") Signed-off-by: Liran Alon Reviewed-by: Nikita Leshenko Reviewed-by: Krish Sadhukhan Signed-off-by: Krish Sadhukhan Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 1 - arch/x86/kvm/x86.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 75a273bc1c52..0ea909ca45c2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11246,7 +11246,6 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) if (block_nested_events) return -EBUSY; nested_vmx_inject_exception_vmexit(vcpu, exit_qual); - vcpu->arch.exception.pending = false; return 0; } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index d0b95b7a90b4..6d112d8f799c 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -12,6 +12,7 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) { + vcpu->arch.exception.pending = false; vcpu->arch.exception.injected = false; } From 88b64450cc0a9340145331b2ffefdff15a0f96d3 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 11 Jan 2018 14:31:43 +1100 Subject: [PATCH 384/770] KVM: PPC: Book3S HV: Make sure we don't re-enter guest without XIVE loaded commit 43ff3f65234061e08d234bdef5a9aadc19832b74 upstream. This fixes a bug where it is possible to enter a guest on a POWER9 system without having the XIVE (interrupt controller) context loaded. This can happen because we unload the XIVE context from the CPU before doing the real-mode handling for machine checks. After the real-mode handler runs, it is possible that we re-enter the guest via a fast path which does not load the XIVE context. To fix this, we move the unloading of the XIVE context to come after the real-mode machine check handler is called. Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller") Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 40 ++++++++++++------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c85ac5c83bd4..2b3194b9608f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1387,6 +1387,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) blt deliver_guest_interrupt guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ + /* Save more register state */ + mfdar r6 + mfdsisr r7 + std r6, VCPU_DAR(r9) + stw r7, VCPU_DSISR(r9) + /* don't overwrite fault_dar/fault_dsisr if HDSI */ + cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE + beq mc_cont + std r6, VCPU_FAULT_DAR(r9) + stw r7, VCPU_FAULT_DSISR(r9) + + /* See if it is a machine check */ + cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK + beq machine_check_realmode +mc_cont: +#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING + addi r3, r9, VCPU_TB_RMEXIT + mr r4, r9 + bl kvmhv_accumulate_time +#endif #ifdef CONFIG_KVM_XICS /* We are exiting, pull the VP from the XIVE */ lwz r0, VCPU_XIVE_PUSHED(r9) @@ -1424,26 +1444,6 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ eieio 1: #endif /* CONFIG_KVM_XICS */ - /* Save more register state */ - mfdar r6 - mfdsisr r7 - std r6, VCPU_DAR(r9) - stw r7, VCPU_DSISR(r9) - /* don't overwrite fault_dar/fault_dsisr if HDSI */ - cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE - beq mc_cont - std r6, VCPU_FAULT_DAR(r9) - stw r7, VCPU_FAULT_DSISR(r9) - - /* See if it is a machine check */ - cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK - beq machine_check_realmode -mc_cont: -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING - addi r3, r9, VCPU_TB_RMEXIT - mr r4, r9 - bl kvmhv_accumulate_time -#endif mr r3, r12 /* Increment exit count, poke other threads to exit */ From be54d79b43fdb796787d004afbacef57bbab3074 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 30 Jan 2018 10:51:32 +1100 Subject: [PATCH 385/770] KVM: PPC: Book3S HV: Drop locks before reading guest memory commit 36ee41d161c67a6fcf696d4817a0da31f778938c upstream. Running with CONFIG_DEBUG_ATOMIC_SLEEP reveals that HV KVM tries to read guest memory, in order to emulate guest instructions, while preempt is disabled and a vcore lock is held. This occurs in kvmppc_handle_exit_hv(), called from post_guest_process(), when emulating guest doorbell instructions on POWER9 systems, and also when checking whether we have hit a hypervisor breakpoint. Reading guest memory can cause a page fault and thus cause the task to sleep, so we need to avoid reading guest memory while holding a spinlock or when preempt is disabled. To fix this, we move the preempt_enable() in kvmppc_run_core() to before the loop that calls post_guest_process() for each vcore that has just run, and we drop and re-take the vcore lock around the calls to kvmppc_emulate_debug_inst() and kvmppc_emulate_doorbell_instr(). Dropping the lock is safe with respect to the iteration over the runnable vcpus in post_guest_process(); for_each_runnable_thread is actually safe to use locklessly. It is possible for a vcpu to become runnable and add itself to the runnable_threads array (code near the beginning of kvmppc_run_vcpu()) and then get included in the iteration in post_guest_process despite the fact that it has not just run. This is benign because vcpu->arch.trap and vcpu->arch.ceded will be zero. Fixes: 579006944e0d ("KVM: PPC: Book3S HV: Virtualize doorbell facility on POWER9") Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 8d43cf205d34..f48e3379a18a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -999,8 +999,6 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *tvcpu; - if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return EMULATE_FAIL; if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE) return RESUME_GUEST; if (get_op(inst) != 31) @@ -1050,6 +1048,7 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +/* Called with vcpu->arch.vcore->lock held */ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -1169,7 +1168,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, swab32(vcpu->arch.emul_inst) : vcpu->arch.emul_inst; if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { + /* Need vcore unlocked to call kvmppc_get_last_inst */ + spin_unlock(&vcpu->arch.vcore->lock); r = kvmppc_emulate_debug_inst(run, vcpu); + spin_lock(&vcpu->arch.vcore->lock); } else { kvmppc_core_queue_program(vcpu, SRR1_PROGILL); r = RESUME_GUEST; @@ -1184,8 +1186,13 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, */ case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: r = EMULATE_FAIL; - if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) + if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) && + cpu_has_feature(CPU_FTR_ARCH_300)) { + /* Need vcore unlocked to call kvmppc_get_last_inst */ + spin_unlock(&vcpu->arch.vcore->lock); r = kvmppc_emulate_doorbell_instr(vcpu); + spin_lock(&vcpu->arch.vcore->lock); + } if (r == EMULATE_FAIL) { kvmppc_core_queue_program(vcpu, SRR1_PROGILL); r = RESUME_GUEST; @@ -2889,13 +2896,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) /* make sure updates to secondary vcpu structs are visible now */ smp_mb(); + preempt_enable(); + for (sub = 0; sub < core_info.n_subcores; ++sub) { pvc = core_info.vc[sub]; post_guest_process(pvc, pvc == vc); } spin_lock(&vc->lock); - preempt_enable(); out: vc->vcore_state = VCORE_INACTIVE; From 985bf39913428d1797427e779d747ae76b144901 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 22 Jan 2018 18:19:06 +0000 Subject: [PATCH 386/770] KVM: arm/arm64: Handle CPU_PM_ENTER_FAILED commit 58d6b15e9da5042a99c9c30ad725792e4569150e upstream. cpu_pm_enter() calls the pm notifier chain with CPU_PM_ENTER, then if there is a failure: CPU_PM_ENTER_FAILED. When KVM receives CPU_PM_ENTER it calls cpu_hyp_reset() which will return us to the hyp-stub. If we subsequently get a CPU_PM_ENTER_FAILED, KVM does nothing, leaving the CPU running with the hyp-stub, at odds with kvm_arm_hardware_enabled. Add CPU_PM_ENTER_FAILED as a fallthrough for CPU_PM_EXIT, this reloads KVM based on kvm_arm_hardware_enabled. This is safe even if CPU_PM_ENTER never gets as far as KVM, as cpu_hyp_reinit() calls cpu_hyp_reset() to make sure the hyp-stub is loaded before reloading KVM. Fixes: 67f691976662 ("arm64: kvm: allows kvm cpu hotplug") CC: Lorenzo Pieralisi Reviewed-by: Christoffer Dall Signed-off-by: James Morse Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/arm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 385ba604a9b2..8b6c42dc1aa9 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1220,6 +1220,7 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self, cpu_hyp_reset(); return NOTIFY_OK; + case CPU_PM_ENTER_FAILED: case CPU_PM_EXIT: if (__this_cpu_read(kvm_arm_hardware_enabled)) /* The hardware was enabled before suspend. */ From e747a02d9fcf76c4146a5f151e2136591df5b0a3 Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Mon, 5 Feb 2018 02:21:14 +0100 Subject: [PATCH 387/770] KVM: PPC: Book3S PR: Fix broken select due to misspelling commit 57ea5f161a7de5b1913c212d04f57a175b159fdf upstream. Commit 76d837a4c0f9 ("KVM: PPC: Book3S PR: Don't include SPAPR TCE code on non-pseries platforms") added a reference to the globally undefined symbol PPC_SERIES. Looking at the rest of the commit, PPC_PSERIES was probably intended. Change PPC_SERIES to PPC_PSERIES. Discovered with the https://github.com/ulfalizer/Kconfiglib/blob/master/examples/list_undefined.py script. Fixes: 76d837a4c0f9 ("KVM: PPC: Book3S PR: Don't include SPAPR TCE code on non-pseries platforms") Signed-off-by: Ulf Magnusson Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index b12b8eb39c29..648160334abf 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -68,7 +68,7 @@ config KVM_BOOK3S_64 select KVM_BOOK3S_64_HANDLER select KVM select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE - select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_SERIES || PPC_POWERNV) + select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV) ---help--- Support running unmodified book3s_64 and book3s_32 guest kernels in virtual machines on book3s_64 host processors. From 4562bfdeac8708371dec016f868a2a18ec6d1285 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Mon, 8 Jan 2018 16:01:04 +0000 Subject: [PATCH 388/770] ASoC: rockchip: i2s: fix playback after runtime resume commit c66234cfedfc3e6e3b62563a5f2c1562be09a35d upstream. When restoring registers during runtime resume, we must not write to I2S_TXDR which is the transmit FIFO as this queues up a sample to be output and pushes all of the output channels down by one. This can be demonstrated with the speaker-test utility: for i in a b c; do speaker-test -c 2 -s 1; done which should play a test through the left speaker three times but if the I2S hardware starts runtime suspended the first sample will be played through the right speaker. Fix this by marking I2S_TXDR as volatile (which also requires marking it as readble, even though it technically isn't). This seems to be the most robust fix, the alternative of giving I2S_TXDR a default value is more fragile since it does not prevent regcache writing to the register in all circumstances. While here, also fix the configuration of I2S_RXDR and I2S_FIFOLR; these are not writable so they do not suffer from the same problem as I2S_TXDR but reading from I2S_RXDR does suffer from a similar problem. Fixes: f0447f6cbb20 ("ASoC: rockchip: i2s: restore register during runtime_suspend/resume cycle", 2016-09-07) Signed-off-by: John Keeping Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/rockchip/rockchip_i2s.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index b6590467fd14..66fc13a2396a 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -504,6 +504,7 @@ static bool rockchip_i2s_rd_reg(struct device *dev, unsigned int reg) case I2S_INTCR: case I2S_XFER: case I2S_CLR: + case I2S_TXDR: case I2S_RXDR: case I2S_FIFOLR: case I2S_INTSR: @@ -518,6 +519,9 @@ static bool rockchip_i2s_volatile_reg(struct device *dev, unsigned int reg) switch (reg) { case I2S_INTSR: case I2S_CLR: + case I2S_FIFOLR: + case I2S_TXDR: + case I2S_RXDR: return true; default: return false; @@ -527,6 +531,8 @@ static bool rockchip_i2s_volatile_reg(struct device *dev, unsigned int reg) static bool rockchip_i2s_precious_reg(struct device *dev, unsigned int reg) { switch (reg) { + case I2S_RXDR: + return true; default: return false; } From b897f1dc780e1ac019977297f2f32369776a30b1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Jan 2018 16:38:46 +0100 Subject: [PATCH 389/770] ASoC: skl: Fix kernel warning due to zero NHTL entry commit 20a1ea2222e7cbf96e9bf8579362e971491e6aea upstream. I got the following kernel warning when loading snd-soc-skl module on Dell Latitude 7270 laptop: memremap attempted on mixed range 0x0000000000000000 size: 0x0 WARNING: CPU: 0 PID: 484 at kernel/memremap.c:98 memremap+0x8a/0x180 Call Trace: skl_nhlt_init+0x82/0xf0 [snd_soc_skl] skl_probe+0x2ee/0x7c0 [snd_soc_skl] .... It seems that the machine doesn't support the SKL DSP gives the empty NHLT entry, and it triggers the warning. For avoiding it, let do the zero check before calling memremap(). Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/skylake/skl-nhlt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index e7d766d56c8e..55859c5b456f 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -41,7 +41,8 @@ struct nhlt_acpi_table *skl_nhlt_init(struct device *dev) obj = acpi_evaluate_dsm(handle, &osc_guid, 1, 1, NULL); if (obj && obj->type == ACPI_TYPE_BUFFER) { nhlt_ptr = (struct nhlt_resource_desc *)obj->buffer.pointer; - nhlt_table = (struct nhlt_acpi_table *) + if (nhlt_ptr->length) + nhlt_table = (struct nhlt_acpi_table *) memremap(nhlt_ptr->min_addr, nhlt_ptr->length, MEMREMAP_WB); ACPI_FREE(obj); From e06f7b686daa871c4a4cf745be30fd1864b6f9c8 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 1 Jan 2018 18:26:47 +0100 Subject: [PATCH 390/770] watchdog: imx2_wdt: restore previous timeout after suspend+resume commit 0be267255cef64e1c58475baa7b25568355a3816 upstream. When the watchdog device is suspended, its timeout is set to the maximum value. During resume, the previously set timeout should be restored. This does not work at the moment. The suspend function calls imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME); and resume reverts this by calling imx2_wdt_set_timeout(wdog, wdog->timeout); However, imx2_wdt_set_timeout() updates wdog->timeout. Therefore, wdog->timeout is set to IMX2_WDT_MAX_TIME when we enter the resume function. Fix this by adding a new function __imx2_wdt_set_timeout() which only updates the hardware settings. imx2_wdt_set_timeout() now calls __imx2_wdt_set_timeout() and then saves the new timeout to wdog->timeout. During suspend, we call __imx2_wdt_set_timeout() directly so that wdog->timeout won't be updated and we can restore the previous value during resume. This approach makes wdog->timeout different from the actual setting in the hardware which is usually not a good thing. However, the two differ only while we're suspended and no kernel code is running, so it should be ok in this case. Signed-off-by: Martin Kaiser Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/imx2_wdt.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c index 4874b0f18650..518dfa1047cb 100644 --- a/drivers/watchdog/imx2_wdt.c +++ b/drivers/watchdog/imx2_wdt.c @@ -169,15 +169,21 @@ static int imx2_wdt_ping(struct watchdog_device *wdog) return 0; } -static int imx2_wdt_set_timeout(struct watchdog_device *wdog, - unsigned int new_timeout) +static void __imx2_wdt_set_timeout(struct watchdog_device *wdog, + unsigned int new_timeout) { struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog); - wdog->timeout = new_timeout; - regmap_update_bits(wdev->regmap, IMX2_WDT_WCR, IMX2_WDT_WCR_WT, WDOG_SEC_TO_COUNT(new_timeout)); +} + +static int imx2_wdt_set_timeout(struct watchdog_device *wdog, + unsigned int new_timeout) +{ + __imx2_wdt_set_timeout(wdog, new_timeout); + + wdog->timeout = new_timeout; return 0; } @@ -371,7 +377,11 @@ static int imx2_wdt_suspend(struct device *dev) /* The watchdog IP block is running */ if (imx2_wdt_is_running(wdev)) { - imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME); + /* + * Don't update wdog->timeout, we'll restore the current value + * during resume. + */ + __imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME); imx2_wdt_ping(wdog); } From d16919a3fe2d83ae159b82808933fefb54fc2f6e Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 12 Jan 2018 18:07:01 -0700 Subject: [PATCH 391/770] Btrfs: raid56: iterate raid56 internal bio with bio_for_each_segment_all commit 0198e5b707cfeb5defbd1b71b1ec6e71580d7db9 upstream. Bio iterated by set_bio_pages_uptodate() is raid56 internal one, so it will never be a BIO_CLONED bio, and since this is called by end_io functions, bio->bi_iter.bi_size is zero, we mustn't use bio_for_each_segment() as that is a no-op if bi_size is zero. Fixes: 6592e58c6b68e61f003a01ba29a3716e7e2e9484 ("Btrfs: fix write corruption due to bio cloning on raid5/6") Signed-off-by: Liu Bo Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/raid56.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 24a62224b24b..6154825c30e1 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1432,14 +1432,13 @@ static int fail_bio_stripe(struct btrfs_raid_bio *rbio, */ static void set_bio_pages_uptodate(struct bio *bio) { - struct bio_vec bvec; - struct bvec_iter iter; + struct bio_vec *bvec; + int i; - if (bio_flagged(bio, BIO_CLONED)) - bio->bi_iter = btrfs_io_bio(bio)->iter; + ASSERT(!bio_flagged(bio, BIO_CLONED)); - bio_for_each_segment(bvec, bio, iter) - SetPageUptodate(bvec.bv_page); + bio_for_each_segment_all(bvec, bio, i) + SetPageUptodate(bvec->bv_page); } /* From e186d8bfdabddcff429b3231948d4af860ae7fc7 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 6 Feb 2018 15:36:00 -0800 Subject: [PATCH 392/770] kasan: don't emit builtin calls when sanitization is off commit 0e410e158e5baa1300bdf678cea4f4e0cf9d8b94 upstream. With KASAN enabled the kernel has two different memset() functions, one with KASAN checks (memset) and one without (__memset). KASAN uses some macro tricks to use the proper version where required. For example memset() calls in mm/slub.c are without KASAN checks, since they operate on poisoned slab object metadata. The issue is that clang emits memset() calls even when there is no memset() in the source code. They get linked with improper memset() implementation and the kernel fails to boot due to a huge amount of KASAN reports during early boot stages. The solution is to add -fno-builtin flag for files with KASAN_SANITIZE := n marker. Link: http://lkml.kernel.org/r/8ffecfffe04088c52c42b92739c2bd8a0bcb3f5e.1516384594.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Nick Desaulniers Cc: Masahiro Yamada Cc: Michal Marek Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- Makefile | 3 ++- scripts/Makefile.kasan | 3 +++ scripts/Makefile.lib | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 76a0b13623f4..a7160d6f0f2d 100644 --- a/Makefile +++ b/Makefile @@ -416,7 +416,8 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS -export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_KASAN CFLAGS_UBSAN +export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE +export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE CFLAGS_UBSAN export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 1ce7115aa499..94c9e65cc83a 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -31,4 +31,7 @@ else endif CFLAGS_KASAN += $(call cc-option, -fsanitize-address-use-after-scope) + +CFLAGS_KASAN_NOSANITIZE := -fno-builtin + endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 04b5633df1cf..0b46136a91a8 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -128,7 +128,7 @@ endif ifeq ($(CONFIG_KASAN),y) _c_flags += $(if $(patsubst n%,, \ $(KASAN_SANITIZE_$(basetarget).o)$(KASAN_SANITIZE)y), \ - $(CFLAGS_KASAN)) + $(CFLAGS_KASAN), $(CFLAGS_KASAN_NOSANITIZE)) endif ifeq ($(CONFIG_UBSAN),y) From 062cd3463c176c71d23e6ca92cda76500ad158d3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Feb 2018 15:41:41 -0800 Subject: [PATCH 393/770] kasan: rework Kconfig settings commit e7c52b84fb18f08ce49b6067ae6285aca79084a8 upstream. We get a lot of very large stack frames using gcc-7.0.1 with the default -fsanitize-address-use-after-scope --param asan-stack=1 options, which can easily cause an overflow of the kernel stack, e.g. drivers/gpu/drm/i915/gvt/handlers.c:2434:1: warning: the frame size of 46176 bytes is larger than 3072 bytes drivers/net/wireless/ralink/rt2x00/rt2800lib.c:5650:1: warning: the frame size of 23632 bytes is larger than 3072 bytes lib/atomic64_test.c:250:1: warning: the frame size of 11200 bytes is larger than 3072 bytes drivers/gpu/drm/i915/gvt/handlers.c:2621:1: warning: the frame size of 9208 bytes is larger than 3072 bytes drivers/media/dvb-frontends/stv090x.c:3431:1: warning: the frame size of 6816 bytes is larger than 3072 bytes fs/fscache/stats.c:287:1: warning: the frame size of 6536 bytes is larger than 3072 bytes To reduce this risk, -fsanitize-address-use-after-scope is now split out into a separate CONFIG_KASAN_EXTRA Kconfig option, leading to stack frames that are smaller than 2 kilobytes most of the time on x86_64. An earlier version of this patch also prevented combining KASAN_EXTRA with KASAN_INLINE, but that is no longer necessary with gcc-7.0.1. All patches to get the frame size below 2048 bytes with CONFIG_KASAN=y and CONFIG_KASAN_EXTRA=n have been merged by maintainers now, so we can bring back that default now. KASAN_EXTRA=y still causes lots of warnings but now defaults to !COMPILE_TEST to disable it in allmodconfig, and it remains disabled in all other defconfigs since it is a new option. I arbitrarily raise the warning limit for KASAN_EXTRA to 3072 to reduce the noise, but an allmodconfig kernel still has around 50 warnings on gcc-7. I experimented a bit more with smaller stack frames and have another follow-up series that reduces the warning limit for 64-bit architectures to 1280 bytes (without CONFIG_KASAN). With earlier versions of this patch series, I also had patches to address the warnings we get with KASAN and/or KASAN_EXTRA, using a "noinline_if_stackbloat" annotation. That annotation now got replaced with a gcc-8 bugfix (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715) and a workaround for older compilers, which means that KASAN_EXTRA is now just as bad as before and will lead to an instant stack overflow in a few extreme cases. This reverts parts of commit 3f181b4d8652 ("lib/Kconfig.debug: disable -Wframe-larger-than warnings with KASAN=y"). Two patches in linux-next should be merged first to avoid introducing warnings in an allmodconfig build: 3cd890dbe2a4 ("media: dvb-frontends: fix i2c access helpers for KASAN") 16c3ada89cff ("media: r820t: fix r820t_write_reg for KASAN") Do we really need to backport this? I think we do: without this patch, enabling KASAN will lead to unavoidable kernel stack overflow in certain device drivers when built with gcc-7 or higher on linux-4.10+ or any version that contains a backport of commit c5caf21ab0cf8. Most people are probably still on older compilers, but it will get worse over time as they upgrade their distros. The warnings we get on kernels older than this should all be for code that uses dangerously large stack frames, though most of them do not cause an actual stack overflow by themselves.The asan-stack option was added in linux-4.0, and commit 3f181b4d8652 ("lib/Kconfig.debug: disable -Wframe-larger-than warnings with KASAN=y") effectively turned off the warning for allmodconfig kernels, so I would like to see this fix backported to any kernels later than 4.0. I have done dozens of fixes for individual functions with stack frames larger than 2048 bytes with asan-stack, and I plan to make sure that all those fixes make it into the stable kernels as well (most are already there). Part of the complication here is that asan-stack (from 4.0) was originally assumed to always require much larger stacks, but that turned out to be a combination of multiple gcc bugs that we have now worked around and fixed, but sanitize-address-use-after-scope (from v4.10) has a much higher inherent stack usage and also suffers from at least three other problems that we have analyzed but not yet fixed upstream, each of them makes the stack usage more severe than it should be. Link: http://lkml.kernel.org/r/20171221134744.2295529-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Andrey Ryabinin Cc: Mauro Carvalho Chehab Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Konovalov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/Kconfig.debug | 2 +- lib/Kconfig.kasan | 11 +++++++++++ scripts/Makefile.kasan | 2 ++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ff21b4dbb392..00cb02daeddd 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -217,7 +217,7 @@ config ENABLE_MUST_CHECK config FRAME_WARN int "Warn for stack frames larger than (needs gcc 4.4)" range 0 8192 - default 0 if KASAN + default 3072 if KASAN_EXTRA default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 1280 if (!64BIT && PARISC) default 1024 if (!64BIT && !PARISC) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index bd38aab05929..3d35d062970d 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -20,6 +20,17 @@ config KASAN Currently CONFIG_KASAN doesn't work with CONFIG_DEBUG_SLAB (the resulting kernel does not boot). +config KASAN_EXTRA + bool "KAsan: extra checks" + depends on KASAN && DEBUG_KERNEL && !COMPILE_TEST + help + This enables further checks in the kernel address sanitizer, for now + it only includes the address-use-after-scope check that can lead + to excessive kernel stack usage, frame size warnings and longer + compile time. + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 has more + + choice prompt "Instrumentation type" depends on KASAN diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 94c9e65cc83a..97a56c0b565a 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -30,7 +30,9 @@ else endif endif +ifdef CONFIG_KASAN_EXTRA CFLAGS_KASAN += $(call cc-option, -fsanitize-address-use-after-scope) +endif CFLAGS_KASAN_NOSANITIZE := -fno-builtin From ad91b2e392be4339e09eefd8479675b4232ddfa1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 30 Nov 2017 11:55:46 -0500 Subject: [PATCH 394/770] media: dvb-frontends: fix i2c access helpers for KASAN commit 3cd890dbe2a4f14cc44c85bb6cf37e5e22d4dd0e upstream. A typical code fragment was copied across many dvb-frontend drivers and causes large stack frames when built with with CONFIG_KASAN on gcc-5/6/7: drivers/media/dvb-frontends/cxd2841er.c:3225:1: error: the frame size of 3992 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] drivers/media/dvb-frontends/cxd2841er.c:3404:1: error: the frame size of 3136 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] drivers/media/dvb-frontends/stv0367.c:3143:1: error: the frame size of 4016 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] drivers/media/dvb-frontends/stv090x.c:3430:1: error: the frame size of 5312 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] drivers/media/dvb-frontends/stv090x.c:4248:1: error: the frame size of 4872 bytes is larger than 3072 bytes [-Werror=frame-larger-than=] gcc-8 now solves this by consolidating the stack slots for the argument variables, but on older compilers we can get the same behavior by taking the pointer of a local variable rather than the inline function argument. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/ascot2e.c | 4 +++- drivers/media/dvb-frontends/cxd2841er.c | 4 +++- drivers/media/dvb-frontends/helene.c | 4 +++- drivers/media/dvb-frontends/horus3a.c | 4 +++- drivers/media/dvb-frontends/itd1000.c | 5 +++-- drivers/media/dvb-frontends/mt312.c | 5 ++++- drivers/media/dvb-frontends/stb0899_drv.c | 3 ++- drivers/media/dvb-frontends/stb6100.c | 6 ++++-- drivers/media/dvb-frontends/stv0367.c | 4 +++- drivers/media/dvb-frontends/stv090x.c | 4 +++- drivers/media/dvb-frontends/stv6110x.c | 4 +++- drivers/media/dvb-frontends/zl10039.c | 4 +++- 12 files changed, 37 insertions(+), 14 deletions(-) diff --git a/drivers/media/dvb-frontends/ascot2e.c b/drivers/media/dvb-frontends/ascot2e.c index 0ee0df53b91b..79d5d89bc95e 100644 --- a/drivers/media/dvb-frontends/ascot2e.c +++ b/drivers/media/dvb-frontends/ascot2e.c @@ -155,7 +155,9 @@ static int ascot2e_write_regs(struct ascot2e_priv *priv, static int ascot2e_write_reg(struct ascot2e_priv *priv, u8 reg, u8 val) { - return ascot2e_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return ascot2e_write_regs(priv, reg, &tmp, 1); } static int ascot2e_read_regs(struct ascot2e_priv *priv, diff --git a/drivers/media/dvb-frontends/cxd2841er.c b/drivers/media/dvb-frontends/cxd2841er.c index 48ee9bc00c06..ccbd84fd6428 100644 --- a/drivers/media/dvb-frontends/cxd2841er.c +++ b/drivers/media/dvb-frontends/cxd2841er.c @@ -257,7 +257,9 @@ static int cxd2841er_write_regs(struct cxd2841er_priv *priv, static int cxd2841er_write_reg(struct cxd2841er_priv *priv, u8 addr, u8 reg, u8 val) { - return cxd2841er_write_regs(priv, addr, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return cxd2841er_write_regs(priv, addr, reg, &tmp, 1); } static int cxd2841er_read_regs(struct cxd2841er_priv *priv, diff --git a/drivers/media/dvb-frontends/helene.c b/drivers/media/dvb-frontends/helene.c index 4bf5a551ba40..2ab8d83e5576 100644 --- a/drivers/media/dvb-frontends/helene.c +++ b/drivers/media/dvb-frontends/helene.c @@ -331,7 +331,9 @@ static int helene_write_regs(struct helene_priv *priv, static int helene_write_reg(struct helene_priv *priv, u8 reg, u8 val) { - return helene_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return helene_write_regs(priv, reg, &tmp, 1); } static int helene_read_regs(struct helene_priv *priv, diff --git a/drivers/media/dvb-frontends/horus3a.c b/drivers/media/dvb-frontends/horus3a.c index 68d759c4c52e..5c8b405f2ddc 100644 --- a/drivers/media/dvb-frontends/horus3a.c +++ b/drivers/media/dvb-frontends/horus3a.c @@ -89,7 +89,9 @@ static int horus3a_write_regs(struct horus3a_priv *priv, static int horus3a_write_reg(struct horus3a_priv *priv, u8 reg, u8 val) { - return horus3a_write_regs(priv, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return horus3a_write_regs(priv, reg, &tmp, 1); } static int horus3a_enter_power_save(struct horus3a_priv *priv) diff --git a/drivers/media/dvb-frontends/itd1000.c b/drivers/media/dvb-frontends/itd1000.c index 5bb1e73a10b4..ce7c443d3eac 100644 --- a/drivers/media/dvb-frontends/itd1000.c +++ b/drivers/media/dvb-frontends/itd1000.c @@ -95,8 +95,9 @@ static int itd1000_read_reg(struct itd1000_state *state, u8 reg) static inline int itd1000_write_reg(struct itd1000_state *state, u8 r, u8 v) { - int ret = itd1000_write_regs(state, r, &v, 1); - state->shadow[r] = v; + u8 tmp = v; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + int ret = itd1000_write_regs(state, r, &tmp, 1); + state->shadow[r] = tmp; return ret; } diff --git a/drivers/media/dvb-frontends/mt312.c b/drivers/media/dvb-frontends/mt312.c index 961b9a2508e0..0b23cbc021b8 100644 --- a/drivers/media/dvb-frontends/mt312.c +++ b/drivers/media/dvb-frontends/mt312.c @@ -142,7 +142,10 @@ static inline int mt312_readreg(struct mt312_state *state, static inline int mt312_writereg(struct mt312_state *state, const enum mt312_reg_addr reg, const u8 val) { - return mt312_write(state, reg, &val, 1); + u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + + return mt312_write(state, reg, &tmp, 1); } static inline u32 mt312_div(u32 a, u32 b) diff --git a/drivers/media/dvb-frontends/stb0899_drv.c b/drivers/media/dvb-frontends/stb0899_drv.c index 02347598277a..db5dde3215f0 100644 --- a/drivers/media/dvb-frontends/stb0899_drv.c +++ b/drivers/media/dvb-frontends/stb0899_drv.c @@ -539,7 +539,8 @@ int stb0899_write_regs(struct stb0899_state *state, unsigned int reg, u8 *data, int stb0899_write_reg(struct stb0899_state *state, unsigned int reg, u8 data) { - return stb0899_write_regs(state, reg, &data, 1); + u8 tmp = data; + return stb0899_write_regs(state, reg, &tmp, 1); } /* diff --git a/drivers/media/dvb-frontends/stb6100.c b/drivers/media/dvb-frontends/stb6100.c index 17a955d0031b..75509bec66e4 100644 --- a/drivers/media/dvb-frontends/stb6100.c +++ b/drivers/media/dvb-frontends/stb6100.c @@ -226,12 +226,14 @@ static int stb6100_write_reg_range(struct stb6100_state *state, u8 buf[], int st static int stb6100_write_reg(struct stb6100_state *state, u8 reg, u8 data) { + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + if (unlikely(reg >= STB6100_NUMREGS)) { dprintk(verbose, FE_ERROR, 1, "Invalid register offset 0x%x", reg); return -EREMOTEIO; } - data = (data & stb6100_template[reg].mask) | stb6100_template[reg].set; - return stb6100_write_reg_range(state, &data, reg, 1); + tmp = (tmp & stb6100_template[reg].mask) | stb6100_template[reg].set; + return stb6100_write_reg_range(state, &tmp, reg, 1); } diff --git a/drivers/media/dvb-frontends/stv0367.c b/drivers/media/dvb-frontends/stv0367.c index f3529df8211d..1a726196c126 100644 --- a/drivers/media/dvb-frontends/stv0367.c +++ b/drivers/media/dvb-frontends/stv0367.c @@ -166,7 +166,9 @@ int stv0367_writeregs(struct stv0367_state *state, u16 reg, u8 *data, int len) static int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data) { - return stv0367_writeregs(state, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv0367_writeregs(state, reg, &tmp, 1); } static u8 stv0367_readreg(struct stv0367_state *state, u16 reg) diff --git a/drivers/media/dvb-frontends/stv090x.c b/drivers/media/dvb-frontends/stv090x.c index 7ef469c0c866..2695e1eb6d9c 100644 --- a/drivers/media/dvb-frontends/stv090x.c +++ b/drivers/media/dvb-frontends/stv090x.c @@ -755,7 +755,9 @@ static int stv090x_write_regs(struct stv090x_state *state, unsigned int reg, u8 static int stv090x_write_reg(struct stv090x_state *state, unsigned int reg, u8 data) { - return stv090x_write_regs(state, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv090x_write_regs(state, reg, &tmp, 1); } static int stv090x_i2c_gate_ctrl(struct stv090x_state *state, int enable) diff --git a/drivers/media/dvb-frontends/stv6110x.c b/drivers/media/dvb-frontends/stv6110x.c index 66eba38f1014..7e8e01389c55 100644 --- a/drivers/media/dvb-frontends/stv6110x.c +++ b/drivers/media/dvb-frontends/stv6110x.c @@ -97,7 +97,9 @@ static int stv6110x_write_regs(struct stv6110x_state *stv6110x, int start, u8 da static int stv6110x_write_reg(struct stv6110x_state *stv6110x, u8 reg, u8 data) { - return stv6110x_write_regs(stv6110x, reg, &data, 1); + u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return stv6110x_write_regs(stv6110x, reg, &tmp, 1); } static int stv6110x_init(struct dvb_frontend *fe) diff --git a/drivers/media/dvb-frontends/zl10039.c b/drivers/media/dvb-frontends/zl10039.c index 623355fc2666..3208b866d1cb 100644 --- a/drivers/media/dvb-frontends/zl10039.c +++ b/drivers/media/dvb-frontends/zl10039.c @@ -134,7 +134,9 @@ static inline int zl10039_writereg(struct zl10039_state *state, const enum zl10039_reg_addr reg, const u8 val) { - return zl10039_write(state, reg, &val, 1); + const u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */ + + return zl10039_write(state, reg, &tmp, 1); } static int zl10039_init(struct dvb_frontend *fe) From 7a401e25f0a5909abf3f3e4fc7518b173cc5328f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 10 Jan 2018 07:20:39 -0500 Subject: [PATCH 395/770] media: ts2020: avoid integer overflows on 32 bit machines commit 81742be14b6a90c9fd0ff6eb4218bdf696ad8e46 upstream. Before this patch, when compiled for arm32, the signal strength were reported as: Lock (0x1f) Signal= 4294908.66dBm C/N= 12.79dB Because of a 32 bit integer overflow. After it, it is properly reported as: Lock (0x1f) Signal= -58.64dBm C/N= 12.79dB Fixes: 0f91c9d6bab9 ("[media] TS2020: Calculate tuner gain correctly") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/ts2020.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/dvb-frontends/ts2020.c b/drivers/media/dvb-frontends/ts2020.c index 931e5c98da8a..b879e1571469 100644 --- a/drivers/media/dvb-frontends/ts2020.c +++ b/drivers/media/dvb-frontends/ts2020.c @@ -368,7 +368,7 @@ static int ts2020_read_tuner_gain(struct dvb_frontend *fe, unsigned v_agc, gain2 = clamp_t(long, gain2, 0, 13); v_agc = clamp_t(long, v_agc, 400, 1100); - *_gain = -(gain1 * 2330 + + *_gain = -((__s64)gain1 * 2330 + gain2 * 3500 + v_agc * 24 / 10 * 10 + 10000); @@ -386,7 +386,7 @@ static int ts2020_read_tuner_gain(struct dvb_frontend *fe, unsigned v_agc, gain3 = clamp_t(long, gain3, 0, 6); v_agc = clamp_t(long, v_agc, 600, 1600); - *_gain = -(gain1 * 2650 + + *_gain = -((__s64)gain1 * 2650 + gain2 * 3380 + gain3 * 2850 + v_agc * 176 / 100 * 10 - From ebe2ba53851e785243d4ee3d7f4fcf64653a987b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 24 Jan 2018 06:01:57 -0500 Subject: [PATCH 396/770] media: cxusb, dib0700: ignore XC2028_I2C_FLUSH commit 9893b905e743ded332575ca04486bd586c0772f7 upstream. The XC2028_I2C_FLUSH only needs to be implemented on a few devices. Others can safely ignore it. That prevents filling the dmesg with lots of messages like: dib0700: stk7700ph_xc3028_callback: unknown command 2, arg 0 Fixes: 4d37ece757a8 ("[media] tuner/xc2028: Add I2C flush callback") Reported-by: Enrico Mioso Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/cxusb.c | 2 ++ drivers/media/usb/dvb-usb/dib0700_devices.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/media/usb/dvb-usb/cxusb.c b/drivers/media/usb/dvb-usb/cxusb.c index 37dea0adc695..cfe86b4864b3 100644 --- a/drivers/media/usb/dvb-usb/cxusb.c +++ b/drivers/media/usb/dvb-usb/cxusb.c @@ -677,6 +677,8 @@ static int dvico_bluebird_xc2028_callback(void *ptr, int component, case XC2028_RESET_CLK: deb_info("%s: XC2028_RESET_CLK %d\n", __func__, arg); break; + case XC2028_I2C_FLUSH: + break; default: deb_info("%s: unknown command %d, arg %d\n", __func__, command, arg); diff --git a/drivers/media/usb/dvb-usb/dib0700_devices.c b/drivers/media/usb/dvb-usb/dib0700_devices.c index 92098c1b78e5..9be1e658ef47 100644 --- a/drivers/media/usb/dvb-usb/dib0700_devices.c +++ b/drivers/media/usb/dvb-usb/dib0700_devices.c @@ -430,6 +430,7 @@ static int stk7700ph_xc3028_callback(void *ptr, int component, state->dib7000p_ops.set_gpio(adap->fe_adap[0].fe, 8, 0, 1); break; case XC2028_RESET_CLK: + case XC2028_I2C_FLUSH: break; default: err("%s: unknown command %d, arg %d\n", __func__, From 7e54b58285e1e5b3f2430342d40f98a53401bb91 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 6 Feb 2018 15:37:13 -0800 Subject: [PATCH 397/770] fs/proc/kcore.c: use probe_kernel_read() instead of memcpy() commit d0290bc20d4739b7a900ae37eb5d4cc3be2b393f upstream. Commit df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data") added a bounce buffer to avoid hardened usercopy checks. Copying to the bounce buffer was implemented with a simple memcpy() assuming that it is always valid to read from kernel memory iff the kern_addr_valid() check passed. A simple, but pointless, test case like "dd if=/proc/kcore of=/dev/null" now can easily crash the kernel, since the former execption handling on invalid kernel addresses now doesn't work anymore. Also adding a kern_addr_valid() implementation wouldn't help here. Most architectures simply return 1 here, while a couple implemented a page table walk to figure out if something is mapped at the address in question. With DEBUG_PAGEALLOC active mappings are established and removed all the time, so that relying on the result of kern_addr_valid() before executing the memcpy() also doesn't work. Therefore simply use probe_kernel_read() to copy to the bounce buffer. This also allows to simplify read_kcore(). At least on s390 this fixes the observed crashes and doesn't introduce warnings that were removed with df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data"), even though the generic probe_kernel_read() implementation uses uaccess functions. While looking into this I'm also wondering if kern_addr_valid() could be completely removed...(?) Link: http://lkml.kernel.org/r/20171202132739.99971-1-heiko.carstens@de.ibm.com Fixes: df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data") Fixes: f5509cc18daa ("mm: Hardened usercopy") Signed-off-by: Heiko Carstens Acked-by: Kees Cook Cc: Jiri Olsa Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/kcore.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 4bc85cb8be6a..e8a93bc8285d 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -512,23 +512,15 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) return -EFAULT; } else { if (kern_addr_valid(start)) { - unsigned long n; - /* * Using bounce buffer to bypass the * hardened user copy kernel text checks. */ - memcpy(buf, (char *) start, tsz); - n = copy_to_user(buffer, buf, tsz); - /* - * We cannot distinguish between fault on source - * and fault on destination. When this happens - * we clear too and hope it will trigger the - * EFAULT again. - */ - if (n) { - if (clear_user(buffer + tsz - n, - n)) + if (probe_kernel_read(buf, (void *) start, tsz)) { + if (clear_user(buffer, tsz)) + return -EFAULT; + } else { + if (copy_to_user(buffer, buf, tsz)) return -EFAULT; } } else { From 838f9cc948b00a31a56c0aa115a1ceca507b084b Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 6 Feb 2018 15:37:55 -0800 Subject: [PATCH 398/770] kernel/async.c: revert "async: simplify lowest_in_progress()" commit 4f7e988e63e336827f4150de48163bed05d653bd upstream. This reverts commit 92266d6ef60c ("async: simplify lowest_in_progress()") which was simply wrong: In the case where domain is NULL, we now use the wrong offsetof() in the list_first_entry macro, so we don't actually fetch the ->cookie value, but rather the eight bytes located sizeof(struct list_head) further into the struct async_entry. On 64 bit, that's the data member, while on 32 bit, that's a u64 built from func and data in some order. I think the bug happens to be harmless in practice: It obviously only affects callers which pass a NULL domain, and AFAICT the only such caller is async_synchronize_full() -> async_synchronize_full_domain(NULL) -> async_synchronize_cookie_domain(ASYNC_COOKIE_MAX, NULL) and the ASYNC_COOKIE_MAX means that in practice we end up waiting for the async_global_pending list to be empty - but it would break if somebody happened to pass (void*)-1 as the data element to async_schedule, and of course also if somebody ever does a async_synchronize_cookie_domain(, NULL) with a "finite" cookie value. Maybe the "harmless in practice" means this isn't -stable material. But I'm not completely confident my quick git grep'ing is enough, and there might be affected code in one of the earlier kernels that has since been removed, so I'll leave the decision to the stable guys. Link: http://lkml.kernel.org/r/20171128104938.3921-1-linux@rasmusvillemoes.dk Fixes: 92266d6ef60c "async: simplify lowest_in_progress()" Signed-off-by: Rasmus Villemoes Acked-by: Tejun Heo Cc: Arjan van de Ven Cc: Adam Wallis Cc: Lai Jiangshan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/async.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/async.c b/kernel/async.c index 2cbd3dd5940d..a893d6170944 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -84,20 +84,24 @@ static atomic_t entry_count; static async_cookie_t lowest_in_progress(struct async_domain *domain) { - struct list_head *pending; + struct async_entry *first = NULL; async_cookie_t ret = ASYNC_COOKIE_MAX; unsigned long flags; spin_lock_irqsave(&async_lock, flags); - if (domain) - pending = &domain->pending; - else - pending = &async_global_pending; + if (domain) { + if (!list_empty(&domain->pending)) + first = list_first_entry(&domain->pending, + struct async_entry, domain_list); + } else { + if (!list_empty(&async_global_pending)) + first = list_first_entry(&async_global_pending, + struct async_entry, global_list); + } - if (!list_empty(pending)) - ret = list_first_entry(pending, struct async_entry, - domain_list)->cookie; + if (first) + ret = first->cookie; spin_unlock_irqrestore(&async_lock, flags); return ret; From 75526a9bd3f4fe83161ae2f723fbc036e07194ae Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:40:24 -0800 Subject: [PATCH 399/770] kernel/relay.c: revert "kernel/relay.c: fix potential memory leak" commit a1be1f3931bfe0a42b46fef77a04593c2b136e7f upstream. This reverts commit ba62bafe942b ("kernel/relay.c: fix potential memory leak"). This commit introduced a double free bug, because 'chan' is already freed by the line: kref_put(&chan->kref, relay_destroy_channel); This bug was found by syzkaller, using the BLKTRACESETUP ioctl. Link: http://lkml.kernel.org/r/20180127004759.101823-1-ebiggers3@gmail.com Fixes: ba62bafe942b ("kernel/relay.c: fix potential memory leak") Signed-off-by: Eric Biggers Reported-by: syzbot Reviewed-by: Andrew Morton Cc: Zhouyi Zhou Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/relay.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/relay.c b/kernel/relay.c index 39a9dfc69486..55da824f4adc 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -611,7 +611,6 @@ free_bufs: kref_put(&chan->kref, relay_destroy_channel); mutex_unlock(&relay_channels_mutex); - kfree(chan); return NULL; } EXPORT_SYMBOL_GPL(relay_open); From a125e9a42302c5dc3c85b88eebc32c43bb65dd07 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:41:53 -0800 Subject: [PATCH 400/770] pipe: actually allow root to exceed the pipe buffer limits commit 85c2dd5473b2718b4b63e74bfeb1ca876868e11f upstream. pipe-user-pages-hard and pipe-user-pages-soft are only supposed to apply to unprivileged users, as documented in both Documentation/sysctl/fs.txt and the pipe(7) man page. However, the capabilities are actually only checked when increasing a pipe's size using F_SETPIPE_SZ, not when creating a new pipe. Therefore, if pipe-user-pages-hard has been set, the root user can run into it and be unable to create pipes. Similarly, if pipe-user-pages-soft has been set, the root user can run into it and have their pipes limited to 1 page each. Fix this by allowing the privileged override in both cases. Link: http://lkml.kernel.org/r/20180111052902.14409-4-ebiggers3@gmail.com Fixes: 759c01142a5d ("pipe: limit the per-user amount of pages allocated in pipes") Signed-off-by: Eric Biggers Acked-by: Kees Cook Acked-by: Joe Lawrence Cc: Alexander Viro Cc: "Luis R . Rodriguez" Cc: Michael Kerrisk Cc: Mikulas Patocka Cc: Willy Tarreau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index f0f4ab36c444..984067df26bb 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -618,6 +618,11 @@ static bool too_many_pipe_buffers_hard(unsigned long user_bufs) return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard; } +static bool is_unprivileged_user(void) +{ + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); +} + struct pipe_inode_info *alloc_pipe_info(void) { struct pipe_inode_info *pipe; @@ -634,12 +639,12 @@ struct pipe_inode_info *alloc_pipe_info(void) user_bufs = account_pipe_buffers(user, 0, pipe_bufs); - if (too_many_pipe_buffers_soft(user_bufs)) { + if (too_many_pipe_buffers_soft(user_bufs) && is_unprivileged_user()) { user_bufs = account_pipe_buffers(user, pipe_bufs, 1); pipe_bufs = 1; } - if (too_many_pipe_buffers_hard(user_bufs)) + if (too_many_pipe_buffers_hard(user_bufs) && is_unprivileged_user()) goto out_revert_acct; pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), @@ -1069,7 +1074,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) if (nr_pages > pipe->buffers && (too_many_pipe_buffers_hard(user_bufs) || too_many_pipe_buffers_soft(user_bufs)) && - !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) { + is_unprivileged_user()) { ret = -EPERM; goto out_revert_acct; } From 703b37d845987cedd76c534df7719bb8024ad66e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Feb 2018 15:41:56 -0800 Subject: [PATCH 401/770] pipe: fix off-by-one error when checking buffer limits commit 9903a91c763ecdae333a04a9d89d79d2b8966503 upstream. With pipe-user-pages-hard set to 'N', users were actually only allowed up to 'N - 1' buffers; and likewise for pipe-user-pages-soft. Fix this to allow up to 'N' buffers, as would be expected. Link: http://lkml.kernel.org/r/20180111052902.14409-5-ebiggers3@gmail.com Fixes: b0b91d18e2e9 ("pipe: fix limit checking in pipe_set_size()") Signed-off-by: Eric Biggers Acked-by: Willy Tarreau Acked-by: Kees Cook Acked-by: Joe Lawrence Cc: Alexander Viro Cc: "Luis R . Rodriguez" Cc: Michael Kerrisk Cc: Mikulas Patocka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 984067df26bb..8ef7d7bef775 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -610,12 +610,12 @@ static unsigned long account_pipe_buffers(struct user_struct *user, static bool too_many_pipe_buffers_soft(unsigned long user_bufs) { - return pipe_user_pages_soft && user_bufs >= pipe_user_pages_soft; + return pipe_user_pages_soft && user_bufs > pipe_user_pages_soft; } static bool too_many_pipe_buffers_hard(unsigned long user_bufs) { - return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard; + return pipe_user_pages_hard && user_bufs > pipe_user_pages_hard; } static bool is_unprivileged_user(void) From 0e4ac4aed9f92796c4177f065f4dbd1bc9c3a432 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 17 Jan 2018 21:05:55 +0100 Subject: [PATCH 402/770] HID: quirks: Fix keyboard + touchpad on Toshiba Click Mini not working commit edfc3722cfef4217c7fe92b272cbe0288ba1ff57 upstream. The Toshiba Click Mini uses an i2c attached keyboard/touchpad combo (single i2c_hid device for both) which has a vid:pid of 04F3:0401, which is also used by a bunch of Elan touchpads which are handled by the drivers/input/mouse/elan_i2c driver, but that driver deals with pure touchpads and does not work for a combo device such as the one on the Toshiba Click Mini. The combo on the Mini has an ACPI id of ELAN0800, which is not claimed by the elan_i2c driver, so check for that and if it is found do not ignore the device. This fixes the keyboard/touchpad combo on the Mini not working (although with the touchpad in mouse emulation mode). Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 330ca983828b..5744eb729d24 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2638,7 +2638,6 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE) }, { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20) }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, 0x0400) }, - { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, 0x0401) }, { HID_USB_DEVICE(USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5) }, { HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC5UH) }, { HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC4UM) }, @@ -2908,6 +2907,17 @@ bool hid_ignore(struct hid_device *hdev) strncmp(hdev->name, "www.masterkit.ru MA901", 22) == 0) return true; break; + case USB_VENDOR_ID_ELAN: + /* + * Many Elan devices have a product id of 0x0401 and are handled + * by the elan_i2c input driver. But the ACPI HID ELAN0800 dev + * is not (and cannot be) handled by that driver -> + * Ignore all 0x0401 devs except for the ELAN0800 dev. + */ + if (hdev->product == 0x0401 && + strncmp(hdev->name, "ELAN0800", 8) != 0) + return true; + break; } if (hdev->type == HID_TYPE_USBMOUSE && From ed72fcf6436271f63a4afc122770ae6537c8d3ce Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 29 Nov 2017 20:29:07 +0100 Subject: [PATCH 403/770] Bluetooth: btsdio: Do not bind to non-removable BCM43341 commit b4cdaba274247c9c841c6a682c08fa91fb3aa549 upstream. BCM43341 devices soldered onto the PCB (non-removable) always (AFAICT) use an UART connection for bluetooth. But they also advertise btsdio support on their 3th sdio function, this causes 2 problems: 1) A non functioning BT HCI getting registered 2) Since the btsdio driver does not have suspend/resume callbacks, mmc_sdio_pre_suspend will return -ENOSYS, causing mmc_pm_notify() to react as if the SDIO-card is removed and since the slot is marked as non-removable it will never get detected as inserted again. Which results in wifi no longer working after a suspend/resume. This commit fixes both by making btsdio ignore BCM43341 devices when connected to a slot which is marked non-removable. Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btsdio.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index c8e945d19ffe..20142bc77554 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -292,6 +293,14 @@ static int btsdio_probe(struct sdio_func *func, tuple = tuple->next; } + /* BCM43341 devices soldered onto the PCB (non-removable) use an + * uart connection for bluetooth, ignore the BT SDIO interface. + */ + if (func->vendor == SDIO_VENDOR_ID_BROADCOM && + func->device == SDIO_DEVICE_ID_BROADCOM_43341 && + !mmc_card_is_removable(func->card->host)) + return -ENODEV; + data = devm_kzalloc(&func->dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; From 2a2ee0c1fee7268f0cdf29415ba2708d3c2bfa33 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 20 Dec 2017 19:00:07 +0800 Subject: [PATCH 404/770] Revert "Bluetooth: btusb: fix QCA Rome suspend/resume" commit 7d06d5895c159f64c46560dc258e553ad8670fe0 upstream. This reverts commit fd865802c66bc451dc515ed89360f84376ce1a56. This commit causes a regression on some QCA ROME chips. The USB device reset happens in btusb_open(), hence firmware loading gets interrupted. Furthermore, this commit stops working after commit ("a0085f2510e8976614ad8f766b209448b385492f Bluetooth: btusb: driver to enable the usb-wakeup feature"). Reset-resume quirk only gets enabled in btusb_suspend() when it's not a wakeup source. If we really want to reset the USB device, we need to do it before btusb_open(). Let's handle it in drivers/usb/core/quirks.c. Cc: Leif Liddy Cc: Matthias Kaehlcke Cc: Brian Norris Cc: Daniel Drake Signed-off-by: Kai-Heng Feng Reviewed-by: Brian Norris Tested-by: Brian Norris Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 513a7a59d421..0a97142ccc42 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3099,12 +3099,6 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_QCA_ROME) { data->setup_on_usb = btusb_setup_qca; hdev->set_bdaddr = btusb_set_bdaddr_ath3012; - - /* QCA Rome devices lose their updated firmware over suspend, - * but the USB hub doesn't notice any status change. - * Explicitly request a device reset on resume. - */ - set_bit(BTUSB_RESET_RESUME, &data->flags); } #ifdef CONFIG_BT_HCIBTUSB_RTL From fed016a79b42943f7dea7409d107b641fe3d1080 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 8 Jan 2018 10:44:16 +0100 Subject: [PATCH 405/770] Bluetooth: btusb: Restore QCA Rome suspend/resume fix with a "rewritten" version commit 61f5acea8737d9b717fcc22bb6679924f3c82b98 upstream. Commit 7d06d5895c15 ("Revert "Bluetooth: btusb: fix QCA...suspend/resume"") removed the setting of the BTUSB_RESET_RESUME quirk for QCA Rome devices, instead favoring adding USB_QUIRK_RESET_RESUME quirks in usb/core/quirks.c. This was done because the DIY BTUSB_RESET_RESUME reset-resume handling has several issues (see the original commit message). An added advantage of moving over to the USB-core reset-resume handling is that it also disables autosuspend for these devices, which is similarly broken on these. But there are 2 issues with this approach: 1) It leaves the broken DIY BTUSB_RESET_RESUME code in place for Realtek devices. 2) Sofar only 2 of the 10 QCA devices known to the btusb code have been added to usb/core/quirks.c and if we fix the Realtek case the same way we need to add an additional 14 entries. So in essence we need to duplicate a large part of the usb_device_id table in btusb.c in usb/core/quirks.c and manually keep them in sync. This commit instead restores setting a reset-resume quirk for QCA devices in the btusb.c code, avoiding the duplicate usb_device_id table problem. This commit avoids the problems with the original DIY BTUSB_RESET_RESUME code by simply setting the USB_QUIRK_RESET_RESUME quirk directly on the usb_device. This commit also moves the BTUSB_REALTEK case over to directly setting the USB_QUIRK_RESET_RESUME on the usb_device and removes the now unused BTUSB_RESET_RESUME code. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1514836 Fixes: 7d06d5895c15 ("Revert "Bluetooth: btusb: fix QCA...suspend/resume"") Cc: Leif Liddy Cc: Matthias Kaehlcke Cc: Brian Norris Cc: Daniel Drake Cc: Kai-Heng Feng Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 0a97142ccc42..d54c3f6f728c 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -392,9 +393,8 @@ static const struct usb_device_id blacklist_table[] = { #define BTUSB_FIRMWARE_LOADED 7 #define BTUSB_FIRMWARE_FAILED 8 #define BTUSB_BOOTING 9 -#define BTUSB_RESET_RESUME 10 -#define BTUSB_DIAG_RUNNING 11 -#define BTUSB_OOB_WAKE_ENABLED 12 +#define BTUSB_DIAG_RUNNING 10 +#define BTUSB_OOB_WAKE_ENABLED 11 struct btusb_data { struct hci_dev *hdev; @@ -3099,6 +3099,12 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_QCA_ROME) { data->setup_on_usb = btusb_setup_qca; hdev->set_bdaddr = btusb_set_bdaddr_ath3012; + + /* QCA Rome devices lose their updated firmware over suspend, + * but the USB hub doesn't notice any status change. + * explicitly request a device reset on resume. + */ + interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME; } #ifdef CONFIG_BT_HCIBTUSB_RTL @@ -3109,7 +3115,7 @@ static int btusb_probe(struct usb_interface *intf, * but the USB hub doesn't notice any status change. * Explicitly request a device reset on resume. */ - set_bit(BTUSB_RESET_RESUME, &data->flags); + interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME; } #endif @@ -3274,14 +3280,6 @@ static int btusb_suspend(struct usb_interface *intf, pm_message_t message) enable_irq(data->oob_wake_irq); } - /* Optionally request a device reset on resume, but only when - * wakeups are disabled. If wakeups are enabled we assume the - * device will stay powered up throughout suspend. - */ - if (test_bit(BTUSB_RESET_RESUME, &data->flags) && - !device_may_wakeup(&data->udev->dev)) - data->udev->reset_resume = 1; - return 0; } From 765ae618ad9f106f6231570f29d0d5be3b56d269 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 18 Jan 2018 00:36:57 +0800 Subject: [PATCH 406/770] ipmi: use dynamic memory for DMI driver override commit 5516e21a1e95e9b9f39985598431a25477d91643 upstream. Currently a crash can be seen if we reach the "err" label in dmi_add_platform_ipmi(), calling platform_device_put(), like here: [ 7.270584] (null): ipmi:dmi: Unable to add resources: -16 [ 7.330229] ------------[ cut here ]------------ [ 7.334889] kernel BUG at mm/slub.c:3894! [ 7.338936] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 7.344475] Modules linked in: [ 7.347556] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.15.0-rc2-00004-gbe9cb7b-dirty #114 [ 7.355907] Hardware name: Huawei Taishan 2280 /D05, BIOS Hisilicon D05 IT17 Nemo 2.0 RC0 11/29/2017 [ 7.365137] task: 00000000c211f6d3 task.stack: 00000000f276e9af [ 7.371116] pstate: 60000005 (nZCv daif -PAN -UAO) [ 7.375957] pc : kfree+0x194/0x1b4 [ 7.379389] lr : platform_device_release+0xcc/0xd8 [ 7.384225] sp : ffff0000092dba90 [ 7.387567] x29: ffff0000092dba90 x28: ffff000008a83000 [ 7.392933] x27: ffff0000092dbc10 x26: 00000000000000e6 [ 7.398297] x25: 0000000000000003 x24: ffff0000085b51e8 [ 7.403662] x23: 0000000000000100 x22: ffff7e0000234cc0 [ 7.409027] x21: ffff000008af3660 x20: ffff8017d21acc10 [ 7.414392] x19: ffff8017d21acc00 x18: 0000000000000002 [ 7.419757] x17: 0000000000000001 x16: 0000000000000008 [ 7.425121] x15: 0000000000000001 x14: 6666666678303d65 [ 7.430486] x13: 6469727265766f5f x12: 7265766972642e76 [ 7.435850] x11: 6564703e2d617020 x10: 6530326435373638 [ 7.441215] x9 : 3030303030303030 x8 : 3d76656420657361 [ 7.446580] x7 : ffff000008f59df8 x6 : ffff8017fbe0ea50 [ 7.451945] x5 : 0000000000000000 x4 : 0000000000000000 [ 7.457309] x3 : ffffffffffffffff x2 : 0000000000000000 [ 7.462674] x1 : 0fffc00000000800 x0 : ffff7e0000234ce0 [ 7.468039] Process swapper/0 (pid: 1, stack limit = 0x00000000f276e9af) [ 7.474809] Call trace: [ 7.477272] kfree+0x194/0x1b4 [ 7.480351] platform_device_release+0xcc/0xd8 [ 7.484837] device_release+0x34/0x90 [ 7.488531] kobject_put+0x70/0xcc [ 7.491961] put_device+0x14/0x1c [ 7.495304] platform_device_put+0x14/0x1c [ 7.499439] dmi_add_platform_ipmi+0x348/0x3ac [ 7.503923] scan_for_dmi_ipmi+0xfc/0x10c [ 7.507970] do_one_initcall+0x38/0x124 [ 7.511840] kernel_init_freeable+0x188/0x228 [ 7.516238] kernel_init+0x10/0x100 [ 7.519756] ret_from_fork+0x10/0x18 [ 7.523362] Code: f94002c0 37780080 f94012c0 37000040 (d4210000) [ 7.529552] ---[ end trace 11750e4787deef9e ]--- [ 7.534228] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b [ 7.534228] This is because when the device is released in platform_device_release(), we try to free pdev.driver_override. This is a const string, hence the crash. Fix by using dynamic memory for pdev->driver_override. Signed-off-by: John Garry [Removed the free of driver_override from ipmi_si_remove_by_dev(). The free is done in platform_device_release(), and would result in a double free, and ipmi_si_remove_by_dev() is called by non-platform devices.] Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_dmi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c index 2059f79d669a..c3a23ec3e76f 100644 --- a/drivers/char/ipmi/ipmi_dmi.c +++ b/drivers/char/ipmi/ipmi_dmi.c @@ -81,7 +81,10 @@ static void __init dmi_add_platform_ipmi(unsigned long base_addr, pr_err("ipmi:dmi: Error allocation IPMI platform device"); return; } - pdev->driver_override = override; + pdev->driver_override = kasprintf(GFP_KERNEL, "%s", + override); + if (!pdev->driver_override) + goto err; if (type == IPMI_DMI_TYPE_SSIF) goto add_properties; From 34ad59099ead42461b968e36f939020821e82cef Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 1 Aug 2017 04:16:47 -0500 Subject: [PATCH 407/770] signal/openrisc: Fix do_unaligned_access to send the proper signal commit 500d58300571b6602341b041f97c082a461ef994 upstream. While reviewing the signal sending on openrisc the do_unaligned_access function stood out because it is obviously wrong. A comment about an si_code set above when actually si_code is never set. Leading to a random si_code being sent to userspace in the event of an unaligned access. Looking further SIGBUS BUS_ADRALN is the proper pair of signal and si_code to send for an unaligned access. That is what other architectures do and what is required by posix. Given that do_unaligned_access is broken in a way that no one can be relying on it on openrisc fix the code to just do the right thing. Fixes: 769a8a96229e ("OpenRISC: Traps") Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: Arnd Bergmann Cc: openrisc@lists.librecores.org Acked-by: Stafford Horne Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/openrisc/kernel/traps.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 803e9e756f77..8d8437169b5e 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -306,12 +306,12 @@ asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address) siginfo_t info; if (user_mode(regs)) { - /* Send a SIGSEGV */ - info.si_signo = SIGSEGV; + /* Send a SIGBUS */ + info.si_signo = SIGBUS; info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void *)address; - force_sig_info(SIGSEGV, &info, current); + info.si_code = BUS_ADRALN; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); } else { printk("KERNEL: Unaligned Access 0x%.8lx\n", address); show_registers(regs); From 16256f2658806cbd84d649ac700e673b16a06168 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 24 Jul 2017 17:30:30 -0500 Subject: [PATCH 408/770] signal/sh: Ensure si_signo is initialized in do_divide_error commit 0e88bb002a9b2ee8cc3cc9478ce2dc126f849696 upstream. Set si_signo. Cc: Yoshinori Sato Cc: Rich Felker Cc: Paul Mundt Cc: linux-sh@vger.kernel.org Fixes: 0983b31849bb ("sh: Wire up division and address error exceptions on SH-2A.") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/sh/kernel/traps_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index 57cff00cad17..b3770bb26211 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -609,7 +609,8 @@ asmlinkage void do_divide_error(unsigned long r4) break; } - force_sig_info(SIGFPE, &info, current); + info.si_signo = SIGFPE; + force_sig_info(info.si_signo, &info, current); } #endif From 632a537b3c6b031834a7480f95fbf98132262376 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Jan 2018 14:01:34 -0500 Subject: [PATCH 409/770] alpha: fix crash if pthread_create races with signal delivery commit 21ffceda1c8b3807615c40d440d7815e0c85d366 upstream. On alpha, a process will crash if it attempts to start a thread and a signal is delivered at the same time. The crash can be reproduced with this program: https://cygwin.com/ml/cygwin/2014-11/msg00473.html The reason for the crash is this: * we call the clone syscall * we go to the function copy_process * copy process calls copy_thread_tls, it is a wrapper around copy_thread * copy_thread sets the tls pointer: childti->pcb.unique = regs->r20 * copy_thread sets regs->r20 to zero * we go back to copy_process * copy process checks "if (signal_pending(current))" and returns -ERESTARTNOINTR * the clone syscall is restarted, but this time, regs->r20 is zero, so the new thread is created with zero tls pointer * the new thread crashes in start_thread when attempting to access tls The comment in the code says that setting the register r20 is some compatibility with OSF/1. But OSF/1 doesn't use the CLONE_SETTLS flag, so we don't have to zero r20 if CLONE_SETTLS is set. This patch fixes the bug by zeroing regs->r20 only if CLONE_SETTLS is not set. Signed-off-by: Mikulas Patocka Signed-off-by: Matt Turner Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/process.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 74bfb1f2d68e..3a885253f486 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -269,12 +269,13 @@ copy_thread(unsigned long clone_flags, unsigned long usp, application calling fork. */ if (clone_flags & CLONE_SETTLS) childti->pcb.unique = regs->r20; + else + regs->r20 = 0; /* OSF/1 has some strange fork() semantics. */ childti->pcb.usp = usp ?: rdusp(); *childregs = *regs; childregs->r0 = 0; childregs->r19 = 0; childregs->r20 = 1; /* OSF/1 has some strange fork() semantics. */ - regs->r20 = 0; stack = ((struct switch_stack *) regs) - 1; *childstack = *stack; childstack->r26 = (unsigned long) ret_from_fork; From 90d17505219a7f382e6a295580e4382091bef7f6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Nov 2017 16:02:13 +0100 Subject: [PATCH 410/770] alpha: osf_sys.c: fix put_tv32 regression commit 47669fb6b5951d0e09fc99719653e0ac92b50b99 upstream. There was a typo in the new version of put_tv32() that caused an unguarded access of a user space pointer, and failed to return the correct result in gettimeofday(), wait4(), usleep_thread() and old_adjtimex(). This fixes it to give the correct behavior again. Fixes: 1cc6c4635e9f ("osf_sys.c: switch handling of timeval32/itimerval32 to copy_{to,from}_user()") Signed-off-by: Arnd Bergmann Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/osf_sys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index ce3a675c0c4b..75a5c35a2067 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -964,8 +964,8 @@ static inline long put_tv32(struct timeval32 __user *o, struct timeval *i) { return copy_to_user(o, &(struct timeval32){ - .tv_sec = o->tv_sec, - .tv_usec = o->tv_usec}, + .tv_sec = i->tv_sec, + .tv_usec = i->tv_usec}, sizeof(struct timeval32)); } From 371c5ada2af7fc456d2c4ea5aa1e0ab6cb7a5efe Mon Sep 17 00:00:00 2001 From: Michael Cree Date: Fri, 24 Nov 2017 21:25:01 +1300 Subject: [PATCH 411/770] alpha: Fix mixed up args in EXC macro in futex operations commit 84e455361ec97ea6037d31d42a2955628ea2094b upstream. Fix the typo (mixed up arguments) in the EXC macro in the futex definitions introduced by commit ca282f697381 (alpha: add a helper for emitting exception table entries). Signed-off-by: Michael Cree Signed-off-by: Matt Turner Signed-off-by: Greg Kroah-Hartman --- arch/alpha/include/asm/futex.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h index d2e4da93e68c..ca3322536f72 100644 --- a/arch/alpha/include/asm/futex.h +++ b/arch/alpha/include/asm/futex.h @@ -20,8 +20,8 @@ "3: .subsection 2\n" \ "4: br 1b\n" \ " .previous\n" \ - EXC(1b,3b,%1,$31) \ - EXC(2b,3b,%1,$31) \ + EXC(1b,3b,$31,%1) \ + EXC(2b,3b,$31,%1) \ : "=&r" (oldval), "=&r"(ret) \ : "r" (uaddr), "r"(oparg) \ : "memory") @@ -82,8 +82,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "3: .subsection 2\n" "4: br 1b\n" " .previous\n" - EXC(1b,3b,%0,$31) - EXC(2b,3b,%0,$31) + EXC(1b,3b,$31,%0) + EXC(2b,3b,$31,%0) : "+r"(ret), "=&r"(prev), "=&r"(cmp) : "r"(uaddr), "r"((long)(int)oldval), "r"(newval) : "memory"); From bef0563f39cd457d090c0d0713f4699b670fd918 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Jan 2018 13:59:54 -0500 Subject: [PATCH 412/770] alpha: fix reboot on Avanti platform commit 55fc633c41a08ce9244ff5f528f420b16b1e04d6 upstream. We need to define NEED_SRM_SAVE_RESTORE on the Avanti, otherwise we get machine check exception when attempting to reboot the machine. Signed-off-by: Mikulas Patocka Signed-off-by: Matt Turner Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/pci_impl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h index 26231601630e..f332d88ffaff 100644 --- a/arch/alpha/kernel/pci_impl.h +++ b/arch/alpha/kernel/pci_impl.h @@ -144,7 +144,8 @@ struct pci_iommu_arena }; #if defined(CONFIG_ALPHA_SRM) && \ - (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA)) + (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA) || \ + defined(CONFIG_ALPHA_AVANTI)) # define NEED_SRM_SAVE_RESTORE #else # undef NEED_SRM_SAVE_RESTORE From 12ab9e1e8d61b2384f97e2e6fb2a17aec677b01a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Jan 2018 14:00:32 -0500 Subject: [PATCH 413/770] alpha: fix formating of stack content commit 4b01abdb32fc36abe877503bfbd33019159fad71 upstream. Since version 4.9, the kernel automatically breaks printk calls into multiple newlines unless pr_cont is used. Fix the alpha stacktrace code, so that it prints stack trace in four columns, as it was initially intended. Signed-off-by: Mikulas Patocka Signed-off-by: Matt Turner Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/traps.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 4bd99a7b1c41..f43bd05dede2 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -160,11 +160,16 @@ void show_stack(struct task_struct *task, unsigned long *sp) for(i=0; i < kstack_depth_to_print; i++) { if (((long) stack & (THREAD_SIZE-1)) == 0) break; - if (i && ((i % 4) == 0)) - printk("\n "); - printk("%016lx ", *stack++); + if ((i % 4) == 0) { + if (i) + pr_cont("\n"); + printk(" "); + } else { + pr_cont(" "); + } + pr_cont("%016lx", *stack++); } - printk("\n"); + pr_cont("\n"); dik_show_trace(sp); } From e68d638e4931236aebcf3bf66f18136ba59f852e Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 5 Jan 2018 14:27:58 -0800 Subject: [PATCH 414/770] xtensa: fix futex_atomic_cmpxchg_inatomic commit ca47480921587ae30417dd234a9f79af188e3666 upstream. Return 0 if the operation was successful, not the userspace memory value. Check that userspace value equals passed oldval, not itself. Don't update *uval if the value wasn't read from userspace memory. This fixes process hang due to infinite loop in futex_lock_pi. It also fixes a bunch of glibc tests nptl/tst-mutexpi*. Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/include/asm/futex.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/xtensa/include/asm/futex.h b/arch/xtensa/include/asm/futex.h index eaaf1ebcc7a4..5bfbc1c401d4 100644 --- a/arch/xtensa/include/asm/futex.h +++ b/arch/xtensa/include/asm/futex.h @@ -92,7 +92,6 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { int ret = 0; - u32 prev; if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; @@ -103,26 +102,24 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, __asm__ __volatile__ ( " # futex_atomic_cmpxchg_inatomic\n" - "1: l32i %1, %3, 0\n" - " mov %0, %5\n" - " wsr %1, scompare1\n" - "2: s32c1i %0, %3, 0\n" - "3:\n" + " wsr %5, scompare1\n" + "1: s32c1i %1, %4, 0\n" + " s32i %1, %6, 0\n" + "2:\n" " .section .fixup,\"ax\"\n" " .align 4\n" - "4: .long 3b\n" - "5: l32r %1, 4b\n" - " movi %0, %6\n" + "3: .long 2b\n" + "4: l32r %1, 3b\n" + " movi %0, %7\n" " jx %1\n" " .previous\n" " .section __ex_table,\"a\"\n" - " .long 1b,5b,2b,5b\n" + " .long 1b,4b\n" " .previous\n" - : "+r" (ret), "=&r" (prev), "+m" (*uaddr) - : "r" (uaddr), "r" (oldval), "r" (newval), "I" (-EFAULT) + : "+r" (ret), "+r" (newval), "+m" (*uaddr), "+m" (*uval) + : "r" (uaddr), "r" (oldval), "r" (uval), "I" (-EFAULT) : "memory"); - *uval = prev; return ret; } From bed938ba673c9c5045fda278305ec541e5d13fc8 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 13 Nov 2017 16:12:06 +0000 Subject: [PATCH 415/770] EDAC, octeon: Fix an uninitialized variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 544e92581a2ac44607d7cc602c6b54d18656f56d upstream. Fix an uninitialized variable warning in the Octeon EDAC driver, as seen in MIPS cavium_octeon_defconfig builds since v4.14 with Codescape GNU Tools 2016.05-03: drivers/edac/octeon_edac-lmc.c In function ‘octeon_lmc_edac_poll_o2’: drivers/edac/octeon_edac-lmc.c:87:24: warning: ‘((long unsigned int*)&int_reg)[1]’ may \ be used uninitialized in this function [-Wmaybe-uninitialized] if (int_reg.s.sec_err || int_reg.s.ded_err) { ^ Iinitialise the whole int_reg variable to zero before the conditional assignments in the error injection case. Signed-off-by: James Hogan Acked-by: David Daney Cc: linux-edac Cc: linux-mips@linux-mips.org Fixes: 1bc021e81565 ("EDAC: Octeon: Add error injection support") Link: http://lkml.kernel.org/r/20171113161206.20990-1-james.hogan@mips.com Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/octeon_edac-lmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c index 9c1ffe3e912b..aeb222ca3ed1 100644 --- a/drivers/edac/octeon_edac-lmc.c +++ b/drivers/edac/octeon_edac-lmc.c @@ -78,6 +78,7 @@ static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci) if (!pvt->inject) int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx)); else { + int_reg.u64 = 0; if (pvt->error_type == 1) int_reg.s.sec_err = 1; if (pvt->error_type == 2) From 05c9297f34053ca2f6aea1fd9a07ddea1d4793a4 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 29 Nov 2017 16:25:44 +0300 Subject: [PATCH 416/770] pinctrl: intel: Initialize GPIO properly when used through irqchip commit f5a26acf0162477af6ee4c11b4fb9cffe5d3e257 upstream. When a GPIO is requested using gpiod_get_* APIs the intel pinctrl driver switches the pin to GPIO mode and makes sure interrupts are routed to the GPIO hardware instead of IOAPIC. However, if the GPIO is used directly through irqchip, as is the case with many I2C-HID devices where I2C core automatically configures interrupt for the device, the pin is not initialized as GPIO. Instead we rely that the BIOS configures the pin accordingly which seems not to be the case at least in Asus X540NA SKU3 with Focaltech touchpad. When the pin is not properly configured it might result weird behaviour like interrupts suddenly stop firing completely and the touchpad stops responding to user input. Fix this by properly initializing the pin to GPIO mode also when it is used directly through irqchip. Fixes: 7981c0015af2 ("pinctrl: intel: Add Intel Sunrisepoint pin controller and GPIO support") Reported-by: Daniel Drake Reported-and-tested-by: Chris Chiu Signed-off-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-intel.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 71df0f70b61f..72b4527d690f 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -427,6 +427,18 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) writel(value, padcfg0); } +static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) +{ + u32 value; + + /* Put the pad into GPIO mode */ + value = readl(padcfg0) & ~PADCFG0_PMODE_MASK; + /* Disable SCI/SMI/NMI generation */ + value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI); + value &= ~(PADCFG0_GPIROUTSMI | PADCFG0_GPIROUTNMI); + writel(value, padcfg0); +} + static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned pin) @@ -434,7 +446,6 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, struct intel_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); void __iomem *padcfg0; unsigned long flags; - u32 value; raw_spin_lock_irqsave(&pctrl->lock, flags); @@ -444,13 +455,7 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, } padcfg0 = intel_get_padcfg(pctrl, pin, PADCFG0); - /* Put the pad into GPIO mode */ - value = readl(padcfg0) & ~PADCFG0_PMODE_MASK; - /* Disable SCI/SMI/NMI generation */ - value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI); - value &= ~(PADCFG0_GPIROUTSMI | PADCFG0_GPIROUTNMI); - writel(value, padcfg0); - + intel_gpio_set_gpio_mode(padcfg0); /* Disable TX buffer and enable RX (this will be input) */ __intel_gpio_set_direction(padcfg0, true); @@ -935,6 +940,8 @@ static int intel_gpio_irq_type(struct irq_data *d, unsigned type) raw_spin_lock_irqsave(&pctrl->lock, flags); + intel_gpio_set_gpio_mode(reg); + value = readl(reg); value &= ~(PADCFG0_RXEVCFG_MASK | PADCFG0_RXINV); From 688d1b8c4721e247649383bbfcafedca3cdff89a Mon Sep 17 00:00:00 2001 From: Dmitry Mastykin Date: Thu, 28 Dec 2017 18:19:24 +0300 Subject: [PATCH 417/770] pinctrl: mcp23s08: fix irq setup order commit 02e389e63e3523828fc3832f27e0341885f60f6f upstream. When using mcp23s08 module with gpio-keys, often (50% of boots) it fails to get irq numbers with message: "gpio-keys keys: Unable to get irq number for GPIO 0, error -6". Seems that irqs must be setup before devm_gpiochip_add_data(). Signed-off-by: Dmitry Mastykin Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-mcp23s08.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index 9c950bbf07ba..447763aad815 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -891,16 +891,16 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, goto fail; } - ret = devm_gpiochip_add_data(dev, &mcp->chip, mcp); - if (ret < 0) - goto fail; - if (mcp->irq && mcp->irq_controller) { ret = mcp23s08_irq_setup(mcp); if (ret) goto fail; } + ret = devm_gpiochip_add_data(dev, &mcp->chip, mcp); + if (ret < 0) + goto fail; + mcp->pinctrl_desc.name = "mcp23xxx-pinctrl"; mcp->pinctrl_desc.pctlops = &mcp_pinctrl_ops; mcp->pinctrl_desc.confops = &mcp_pinconf_ops; From 89cad3fa54919ba00004e0e202f549391a074a7c Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 17 Jan 2018 14:34:21 +0100 Subject: [PATCH 418/770] pinctrl: sx150x: Unregister the pinctrl on release commit 0657cb50b5a75abd92956028727dc255d690a4a6 upstream. There is no matching call to pinctrl_unregister, so switch to the managed devm_pinctrl_register to clean up properly when done. Fixes: 9e80f9064e73 ("pinctrl: Add SX150X GPIO Extender Pinctrl Driver") Signed-off-by: Peter Rosin Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-sx150x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-sx150x.c b/drivers/pinctrl/pinctrl-sx150x.c index 7450f5118445..7b0d5972bb21 100644 --- a/drivers/pinctrl/pinctrl-sx150x.c +++ b/drivers/pinctrl/pinctrl-sx150x.c @@ -1225,7 +1225,7 @@ static int sx150x_probe(struct i2c_client *client, pctl->pinctrl_desc.npins = pctl->data->npins; pctl->pinctrl_desc.owner = THIS_MODULE; - pctl->pctldev = pinctrl_register(&pctl->pinctrl_desc, dev, pctl); + pctl->pctldev = devm_pinctrl_register(dev, &pctl->pinctrl_desc, pctl); if (IS_ERR(pctl->pctldev)) { dev_err(dev, "Failed to register pinctrl device\n"); return PTR_ERR(pctl->pctldev); From 5219eedf2d5e19fdcd03aa685426eb891be85af6 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 17 Jan 2018 14:34:22 +0100 Subject: [PATCH 419/770] pinctrl: sx150x: Register pinctrl before adding the gpiochip commit 1a1d39e1b8dd1d0f92a79da4fcc1ab0be3ae9bfc upstream. Various gpiolib activity depend on the pinctrl to be up and kicking. Therefore, register the pinctrl before adding a gpiochip. Suggested-by: Linus Walleij Signed-off-by: Peter Rosin Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-sx150x.c | 35 +++++++++++++++++++------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/pinctrl/pinctrl-sx150x.c b/drivers/pinctrl/pinctrl-sx150x.c index 7b0d5972bb21..7fa396f45c8c 100644 --- a/drivers/pinctrl/pinctrl-sx150x.c +++ b/drivers/pinctrl/pinctrl-sx150x.c @@ -1144,6 +1144,27 @@ static int sx150x_probe(struct i2c_client *client, if (ret) return ret; + /* Pinctrl_desc */ + pctl->pinctrl_desc.name = "sx150x-pinctrl"; + pctl->pinctrl_desc.pctlops = &sx150x_pinctrl_ops; + pctl->pinctrl_desc.confops = &sx150x_pinconf_ops; + pctl->pinctrl_desc.pins = pctl->data->pins; + pctl->pinctrl_desc.npins = pctl->data->npins; + pctl->pinctrl_desc.owner = THIS_MODULE; + + ret = devm_pinctrl_register_and_init(dev, &pctl->pinctrl_desc, + pctl, &pctl->pctldev); + if (ret) { + dev_err(dev, "Failed to register pinctrl device\n"); + return ret; + } + + ret = pinctrl_enable(pctl->pctldev); + if (ret) { + dev_err(dev, "Failed to enable pinctrl device\n"); + return ret; + } + /* Register GPIO controller */ pctl->gpio.label = devm_kstrdup(dev, client->name, GFP_KERNEL); pctl->gpio.base = -1; @@ -1217,20 +1238,6 @@ static int sx150x_probe(struct i2c_client *client, client->irq); } - /* Pinctrl_desc */ - pctl->pinctrl_desc.name = "sx150x-pinctrl"; - pctl->pinctrl_desc.pctlops = &sx150x_pinctrl_ops; - pctl->pinctrl_desc.confops = &sx150x_pinconf_ops; - pctl->pinctrl_desc.pins = pctl->data->pins; - pctl->pinctrl_desc.npins = pctl->data->npins; - pctl->pinctrl_desc.owner = THIS_MODULE; - - pctl->pctldev = devm_pinctrl_register(dev, &pctl->pinctrl_desc, pctl); - if (IS_ERR(pctl->pctldev)) { - dev_err(dev, "Failed to register pinctrl device\n"); - return PTR_ERR(pctl->pctldev); - } - return 0; } From f04fe1192e5607f203e7c0096340fc867da8e84c Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 17 Jan 2018 14:34:23 +0100 Subject: [PATCH 420/770] pinctrl: sx150x: Add a static gpio/pinctrl pin range mapping commit b930151e5b55a0e62a3aad06876de891ac980471 upstream. Without such a range, gpiolib fails with -EPROBE_DEFER, pending the addition of the range. So, without a range, gpiolib will keep deferring indefinitely. Fixes: 9e80f9064e73 ("pinctrl: Add SX150X GPIO Extender Pinctrl Driver") Fixes: e10f72bf4b3e ("gpio: gpiolib: Generalise state persistence beyond sleep") Suggested-by: Linus Walleij Signed-off-by: Peter Rosin Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-sx150x.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pinctrl/pinctrl-sx150x.c b/drivers/pinctrl/pinctrl-sx150x.c index 7fa396f45c8c..70a0228f4e7f 100644 --- a/drivers/pinctrl/pinctrl-sx150x.c +++ b/drivers/pinctrl/pinctrl-sx150x.c @@ -1193,6 +1193,11 @@ static int sx150x_probe(struct i2c_client *client, if (ret) return ret; + ret = gpiochip_add_pin_range(&pctl->gpio, dev_name(dev), + 0, 0, pctl->data->npins); + if (ret) + return ret; + /* Add Interrupt support if an irq is specified */ if (client->irq > 0) { pctl->irq_chip.name = devm_kstrdup(dev, client->name, From 030dcf7d1af39b0dc341702cb7d2482df93d3c71 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 2 Jan 2018 11:39:47 -0800 Subject: [PATCH 421/770] pktcdvd: Fix pkt_setup_dev() error path commit 5a0ec388ef0f6e33841aeb810d7fa23f049ec4cd upstream. Commit 523e1d399ce0 ("block: make gendisk hold a reference to its queue") modified add_disk() and disk_release() but did not update any of the error paths that trigger a put_disk() call after disk->queue has been assigned. That introduced the following behavior in the pktcdvd driver if pkt_new_dev() fails: Kernel BUG at 00000000e98fd882 [verbose debug info unavailable] Since disk_release() calls blk_put_queue() anyway if disk->queue != NULL, fix this by removing the blk_cleanup_queue() call from the pkt_setup_dev() error path. Fixes: commit 523e1d399ce0 ("block: make gendisk hold a reference to its queue") Signed-off-by: Bart Van Assche Cc: Tejun Heo Cc: Maciej S. Szmigiero Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/pktcdvd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 67974796c350..2659b2534073 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2745,7 +2745,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) pd->pkt_dev = MKDEV(pktdev_major, idx); ret = pkt_new_dev(pd, dev); if (ret) - goto out_new_dev; + goto out_mem2; /* inherit events of the host device */ disk->events = pd->bdev->bd_disk->events; @@ -2763,8 +2763,6 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) mutex_unlock(&ctl_mutex); return 0; -out_new_dev: - blk_cleanup_queue(disk->queue); out_mem2: put_disk(disk); out_mem: From c846868070d0426667902c55a89008333584af50 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 2 Jan 2018 11:39:48 -0800 Subject: [PATCH 422/770] pktcdvd: Fix a recently introduced NULL pointer dereference commit 882d4171a8950646413b1a3cbe0e4a6a612fe82e upstream. Call bdev_get_queue(bdev) after bdev->bd_disk has been initialized instead of just before that pointer has been initialized. This patch avoids that the following command pktsetup 1 /dev/sr0 triggers the following kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000548 IP: pkt_setup_dev+0x2db/0x670 [pktcdvd] CPU: 2 PID: 724 Comm: pktsetup Not tainted 4.15.0-rc4-dbg+ #1 Call Trace: pkt_ctl_ioctl+0xce/0x1c0 [pktcdvd] do_vfs_ioctl+0x8e/0x670 SyS_ioctl+0x3c/0x70 entry_SYSCALL_64_fastpath+0x23/0x9a Reported-by: Maciej S. Szmigiero Fixes: commit ca18d6f769d2 ("block: Make most scsi_req_init() calls implicit") Signed-off-by: Bart Van Assche Tested-by: Maciej S. Szmigiero Cc: Maciej S. Szmigiero Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/pktcdvd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 2659b2534073..531a0915066b 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2579,14 +2579,14 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) bdev = bdget(dev); if (!bdev) return -ENOMEM; - if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) { - WARN_ONCE(true, "Attempt to register a non-SCSI queue\n"); - bdput(bdev); - return -EINVAL; - } ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL); if (ret) return ret; + if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) { + WARN_ONCE(true, "Attempt to register a non-SCSI queue\n"); + blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); + return -EINVAL; + } /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); From 392640fd18f8f7433820d618c02e7d3d696f3d64 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 6 Jan 2018 16:27:37 +0800 Subject: [PATCH 423/770] blk-mq: quiesce queue before freeing queue commit c2856ae2f315d754a0b6a268e4c6745b332b42e7 upstream. After queue is frozen, dispatch still may happen, for example: 1) requests are submitted from several contexts 2) requests from all these contexts are inserted to queue, but may dispatch to LLD in one of these paths, but other paths sill need to move on even all these requests are completed(that means blk_mq_freeze_queue_wait() returns at that time) 3) dispatch after queue freezing still moves on and causes use-after-free, because request queue is freed This patch quiesces queue after it is frozen, and makes sure all in-progress dispatch are completed. This patch fixes the following kernel crash when running heavy IOs vs. deleting device: [ 36.719251] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 36.720318] IP: kyber_has_work+0x14/0x40 [ 36.720847] PGD 254bf5067 P4D 254bf5067 PUD 255e6a067 PMD 0 [ 36.721584] Oops: 0000 [#1] PREEMPT SMP [ 36.722105] Dumping ftrace buffer: [ 36.722570] (ftrace buffer empty) [ 36.723057] Modules linked in: scsi_debug ebtable_filter ebtables ip6table_filter ip6_tables tcm_loop iscsi_target_mod target_core_file target_core_iblock target_core_pscsi target_core_mod xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c bridge stp llc fuse iptable_filter ip_tables sd_mod sg btrfs xor zstd_decompress zstd_compress xxhash raid6_pq mptsas mptscsih bcache crc32c_intel ahci mptbase libahci serio_raw scsi_transport_sas nvme libata shpchp lpc_ich virtio_scsi nvme_core binfmt_misc dm_mod iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi null_blk configs [ 36.733438] CPU: 2 PID: 2374 Comm: fio Not tainted 4.15.0-rc2.blk_mq_quiesce+ #714 [ 36.735143] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.9.3-1.fc25 04/01/2014 [ 36.736688] RIP: 0010:kyber_has_work+0x14/0x40 [ 36.737515] RSP: 0018:ffffc9000209bca0 EFLAGS: 00010202 [ 36.738431] RAX: 0000000000000008 RBX: ffff88025578bfc8 RCX: ffff880257bf4ed0 [ 36.739581] RDX: 0000000000000038 RSI: ffffffff81a98c6d RDI: ffff88025578bfc8 [ 36.740730] RBP: ffff880253cebfc8 R08: ffffc9000209bda0 R09: ffff8802554f3480 [ 36.741885] R10: ffffc9000209be60 R11: ffff880263f72538 R12: ffff88025573e9e8 [ 36.743036] R13: ffff88025578bfd0 R14: 0000000000000001 R15: 0000000000000000 [ 36.744189] FS: 00007f9b9bee67c0(0000) GS:ffff88027fc80000(0000) knlGS:0000000000000000 [ 36.746617] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 36.748483] CR2: 0000000000000008 CR3: 0000000254bf4001 CR4: 00000000003606e0 [ 36.750164] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 36.751455] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 36.752796] Call Trace: [ 36.753992] blk_mq_do_dispatch_sched+0x7f/0xe0 [ 36.755110] blk_mq_sched_dispatch_requests+0x119/0x190 [ 36.756179] __blk_mq_run_hw_queue+0x83/0x90 [ 36.757144] __blk_mq_delay_run_hw_queue+0xaf/0x110 [ 36.758046] blk_mq_run_hw_queue+0x24/0x70 [ 36.758845] blk_mq_flush_plug_list+0x1e7/0x270 [ 36.759676] blk_flush_plug_list+0xd6/0x240 [ 36.760463] blk_finish_plug+0x27/0x40 [ 36.761195] do_io_submit+0x19b/0x780 [ 36.761921] ? entry_SYSCALL_64_fastpath+0x1a/0x7d [ 36.762788] entry_SYSCALL_64_fastpath+0x1a/0x7d [ 36.763639] RIP: 0033:0x7f9b9699f697 [ 36.764352] RSP: 002b:00007ffc10f991b8 EFLAGS: 00000206 ORIG_RAX: 00000000000000d1 [ 36.765773] RAX: ffffffffffffffda RBX: 00000000008f6f00 RCX: 00007f9b9699f697 [ 36.766965] RDX: 0000000000a5e6c0 RSI: 0000000000000001 RDI: 00007f9b8462a000 [ 36.768377] RBP: 0000000000000000 R08: 0000000000000001 R09: 00000000008f6420 [ 36.769649] R10: 00007f9b846e5000 R11: 0000000000000206 R12: 00007f9b795d6a70 [ 36.770807] R13: 00007f9b795e4140 R14: 00007f9b795e3fe0 R15: 0000000100000000 [ 36.771955] Code: 83 c7 10 e9 3f 68 d1 ff 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 8b 97 b0 00 00 00 48 8d 42 08 48 83 c2 38 <48> 3b 00 74 06 b8 01 00 00 00 c3 48 3b 40 08 75 f4 48 83 c0 10 [ 36.775004] RIP: kyber_has_work+0x14/0x40 RSP: ffffc9000209bca0 [ 36.776012] CR2: 0000000000000008 [ 36.776690] ---[ end trace 4045cbce364ff2a4 ]--- [ 36.777527] Kernel panic - not syncing: Fatal exception [ 36.778526] Dumping ftrace buffer: [ 36.779313] (ftrace buffer empty) [ 36.780081] Kernel Offset: disabled [ 36.780877] ---[ end Kernel panic - not syncing: Fatal exception Reviewed-by: Christoph Hellwig Tested-by: Yi Zhang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 7b30bf10b1d4..f3750389e351 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -660,6 +660,15 @@ void blk_cleanup_queue(struct request_queue *q) queue_flag_set(QUEUE_FLAG_DEAD, q); spin_unlock_irq(lock); + /* + * make sure all in-progress dispatch are completed because + * blk_freeze_queue() can only complete all requests, and + * dispatch may still be in-progress since we dispatch requests + * from more than one contexts + */ + if (q->mq_ops) + blk_mq_quiesce_queue(q); + /* for synchronous bio-based driver finish in-flight integrity i/o */ blk_flush_integrity(); From c87806a8e565e1253cad5072577b759f76f79fdb Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:50 +0100 Subject: [PATCH 424/770] clocksource/drivers/stm32: Fix kernel panic with multiple timers commit e0aeca3d8cbaea514eb98df1149faa918f9ec42d upstream. The current code hides a couple of bugs: - The global variable 'clock_event_ddata' is overwritten each time the init function is invoked. This is fixed with a kmemdup() instead of assigning the global variable. That prevents a memory corruption when several timers are defined in the DT. - The clockevent's event_handler is NULL if the time framework does not select the clockevent when registering it, this is fine but the init code generates in any case an interrupt leading to dereference this NULL pointer. The stm32 timer works with shadow registers, a mechanism to cache the registers. When a change is done in one buffered register, we need to artificially generate an event to force the timer to copy the content of the register to the shadowed register. The auto-reload register (ARR) is one of the shadowed register as well as the prescaler register (PSC), so in order to force the copy, we issue an event which in turn leads to an interrupt and the NULL dereference. This is fixed by inverting two lines where we clear the status register before enabling the update event interrupt. As this kernel crash is resulting from the combination of these two bugs, the fixes are grouped into a single patch. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-11-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/timer-stm32.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 8f2423789ba9..4bfeb9929ab2 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -106,6 +106,10 @@ static int __init stm32_clockevent_init(struct device_node *np) unsigned long rate, max_delta; int irq, ret, bits, prescaler = 1; + data = kmemdup(&clock_event_ddata, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + clk = of_clk_get(np, 0); if (IS_ERR(clk)) { ret = PTR_ERR(clk); @@ -156,8 +160,8 @@ static int __init stm32_clockevent_init(struct device_node *np) writel_relaxed(prescaler - 1, data->base + TIM_PSC); writel_relaxed(TIM_EGR_UG, data->base + TIM_EGR); - writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); writel_relaxed(0, data->base + TIM_SR); + writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); data->periodic_top = DIV_ROUND_CLOSEST(rate, prescaler * HZ); @@ -184,6 +188,7 @@ err_iomap: err_clk_enable: clk_put(clk); err_clk_get: + kfree(data); return ret; } From 5a5df7771064defa931d44ed28fec673fe66752f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 6 Feb 2018 15:40:38 -0800 Subject: [PATCH 425/770] lib/ubsan.c: s/missaligned/misaligned/ commit b8fe1120b4ba342b4f156d24e952d6e686b20298 upstream. A vist from the spelling fairy. Cc: David Laight Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/ubsan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index fb0409df1bcf..1e2328fa002d 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -281,7 +281,7 @@ static void handle_null_ptr_deref(struct type_mismatch_data *data) ubsan_epilogue(&flags); } -static void handle_missaligned_access(struct type_mismatch_data *data, +static void handle_misaligned_access(struct type_mismatch_data *data, unsigned long ptr) { unsigned long flags; @@ -322,7 +322,7 @@ void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, if (!ptr) handle_null_ptr_deref(data); else if (data->alignment && !IS_ALIGNED(ptr, data->alignment)) - handle_missaligned_access(data, ptr); + handle_misaligned_access(data, ptr); else handle_object_size_mismatch(data, ptr); } From 2617e62c2f120fe02a14e01232db4127f8e414a2 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 6 Feb 2018 15:40:42 -0800 Subject: [PATCH 426/770] lib/ubsan: add type mismatch handler for new GCC/Clang commit 42440c1f9911b4b7b8ba3dc4e90c1197bc561211 upstream. UBSAN=y fails to build with new GCC/clang: arch/x86/kernel/head64.o: In function `sanitize_boot_params': arch/x86/include/asm/bootparam_utils.h:37: undefined reference to `__ubsan_handle_type_mismatch_v1' because Clang and GCC 8 slightly changed ABI for 'type mismatch' errors. Compiler now uses new __ubsan_handle_type_mismatch_v1() function with slightly modified 'struct type_mismatch_data'. Let's add new 'struct type_mismatch_data_common' which is independent from compiler's layout of 'struct type_mismatch_data'. And make __ubsan_handle_type_mismatch[_v1]() functions transform compiler-dependent type mismatch data to our internal representation. This way, we can support both old and new compilers with minimal amount of change. Link: http://lkml.kernel.org/r/20180119152853.16806-1-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Reported-by: Sodagudi Prasad Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/ubsan.c | 48 ++++++++++++++++++++++++++++++++++++++---------- lib/ubsan.h | 14 ++++++++++++++ 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index 1e2328fa002d..50d1d5c25deb 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -265,14 +265,14 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data, } EXPORT_SYMBOL(__ubsan_handle_divrem_overflow); -static void handle_null_ptr_deref(struct type_mismatch_data *data) +static void handle_null_ptr_deref(struct type_mismatch_data_common *data) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s null pointer of type %s\n", type_check_kinds[data->type_check_kind], @@ -281,15 +281,15 @@ static void handle_null_ptr_deref(struct type_mismatch_data *data) ubsan_epilogue(&flags); } -static void handle_misaligned_access(struct type_mismatch_data *data, +static void handle_misaligned_access(struct type_mismatch_data_common *data, unsigned long ptr) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s misaligned address %p for type %s\n", type_check_kinds[data->type_check_kind], @@ -299,15 +299,15 @@ static void handle_misaligned_access(struct type_mismatch_data *data, ubsan_epilogue(&flags); } -static void handle_object_size_mismatch(struct type_mismatch_data *data, +static void handle_object_size_mismatch(struct type_mismatch_data_common *data, unsigned long ptr) { unsigned long flags; - if (suppress_report(&data->location)) + if (suppress_report(data->location)) return; - ubsan_prologue(&data->location, &flags); + ubsan_prologue(data->location, &flags); pr_err("%s address %p with insufficient space\n", type_check_kinds[data->type_check_kind], (void *) ptr); @@ -315,7 +315,7 @@ static void handle_object_size_mismatch(struct type_mismatch_data *data, ubsan_epilogue(&flags); } -void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, +static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data, unsigned long ptr) { @@ -326,8 +326,36 @@ void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, else handle_object_size_mismatch(data, ptr); } + +void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, + unsigned long ptr) +{ + struct type_mismatch_data_common common_data = { + .location = &data->location, + .type = data->type, + .alignment = data->alignment, + .type_check_kind = data->type_check_kind + }; + + ubsan_type_mismatch_common(&common_data, ptr); +} EXPORT_SYMBOL(__ubsan_handle_type_mismatch); +void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data, + unsigned long ptr) +{ + + struct type_mismatch_data_common common_data = { + .location = &data->location, + .type = data->type, + .alignment = 1UL << data->log_alignment, + .type_check_kind = data->type_check_kind + }; + + ubsan_type_mismatch_common(&common_data, ptr); +} +EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1); + void __ubsan_handle_nonnull_return(struct nonnull_return_data *data) { unsigned long flags; diff --git a/lib/ubsan.h b/lib/ubsan.h index 88f23557edbe..7e30b26497e0 100644 --- a/lib/ubsan.h +++ b/lib/ubsan.h @@ -37,6 +37,20 @@ struct type_mismatch_data { unsigned char type_check_kind; }; +struct type_mismatch_data_v1 { + struct source_location location; + struct type_descriptor *type; + unsigned char log_alignment; + unsigned char type_check_kind; +}; + +struct type_mismatch_data_common { + struct source_location *location; + struct type_descriptor *type; + unsigned long alignment; + unsigned char type_check_kind; +}; + struct nonnull_arg_data { struct source_location location; struct source_location attr_location; From 4063cd5683ce4d4a18e89a61b9dbf8492c9faf31 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 5 Dec 2017 09:29:19 +0200 Subject: [PATCH 427/770] btrfs: Handle btrfs_set_extent_delalloc failure in fixup worker commit f3038ee3a3f1017a1cbe9907e31fa12d366c5dcb upstream. This function was introduced by 247e743cbe6e ("Btrfs: Use async helpers to deal with pages that have been improperly dirtied") and it didn't do any error handling then. This function might very well fail in ENOMEM situation, yet it's not handled, this could lead to inconsistent state. So let's handle the failure by setting the mapping error bit. Signed-off-by: Nikolay Borisov Reviewed-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c71afd424900..5eaedff28a32 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2101,8 +2101,15 @@ again: goto out; } - btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state, - 0); + ret = btrfs_set_extent_delalloc(inode, page_start, page_end, + &cached_state, 0); + if (ret) { + mapping_set_error(page->mapping, ret); + end_extent_writepage(page, ret, page_start, page_end); + ClearPageChecked(page); + goto out; + } + ClearPageChecked(page); set_page_dirty(page); out: From 2a3d3015a73cff4a701db85b935f464492e49901 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 8 Feb 2018 14:02:32 +0100 Subject: [PATCH 428/770] objtool: Fix switch-table detection commit 99ce7962d52d1948ad6f2785e308d48e76e0a6ef upstream. Linus reported that GCC-7.3 generated a switch-table construct that confused objtool. It turns out that, in particular due to KASAN, it is possible to have unrelated .rodata usage in between the .rodata setup for the switch-table and the following indirect jump. The simple linear reverse search from the indirect jump would hit upon the KASAN .rodata usage first and fail to find a switch_table, resulting in a spurious 'sibling call with modified stack frame' warning. Fix this by creating a 'jump-stack' which we can 'unwind' during reversal, thereby skipping over much of the in-between code. This is not fool proof by any means, but is sufficient to make the known cases work. Future work would be to construct more comprehensive flow analysis code. Reported-and-tested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Cc: Borislav Petkov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180208130232.GF25235@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 41 +++++++++++++++++++++++++++++++++++++++-- tools/objtool/check.h | 1 + 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 9cd028aa1509..2e458eb45586 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -851,8 +851,14 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func, * This is a fairly uncommon pattern which is new for GCC 6. As of this * writing, there are 11 occurrences of it in the allmodconfig kernel. * + * As of GCC 7 there are quite a few more of these and the 'in between' code + * is significant. Esp. with KASAN enabled some of the code between the mov + * and jmpq uses .rodata itself, which can confuse things. + * * TODO: Once we have DWARF CFI and smarter instruction decoding logic, * ensure the same register is used in the mov and jump instructions. + * + * NOTE: RETPOLINE made it harder still to decode dynamic jumps. */ static struct rela *find_switch_table(struct objtool_file *file, struct symbol *func, @@ -874,12 +880,25 @@ static struct rela *find_switch_table(struct objtool_file *file, text_rela->addend + 4); if (!rodata_rela) return NULL; + file->ignore_unreachables = true; return rodata_rela; } /* case 3 */ - func_for_each_insn_continue_reverse(file, func, insn) { + /* + * Backward search using the @first_jump_src links, these help avoid + * much of the 'in between' code. Which avoids us getting confused by + * it. + */ + for (insn = list_prev_entry(insn, list); + + &insn->list != &file->insn_list && + insn->sec == func->sec && + insn->offset >= func->offset; + + insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { + if (insn->type == INSN_JUMP_DYNAMIC) break; @@ -909,14 +928,32 @@ static struct rela *find_switch_table(struct objtool_file *file, return NULL; } + static int add_func_switch_tables(struct objtool_file *file, struct symbol *func) { - struct instruction *insn, *prev_jump = NULL; + struct instruction *insn, *last = NULL, *prev_jump = NULL; struct rela *rela, *prev_rela = NULL; int ret; func_for_each_insn(file, func, insn) { + if (!last) + last = insn; + + /* + * Store back-pointers for unconditional forward jumps such + * that find_switch_table() can back-track using those and + * avoid some potentially confusing code. + */ + if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && + insn->offset > last->offset && + insn->jump_dest->offset > insn->offset && + !insn->jump_dest->first_jump_src) { + + insn->jump_dest->first_jump_src = insn; + last = insn->jump_dest; + } + if (insn->type != INSN_JUMP_DYNAMIC) continue; diff --git a/tools/objtool/check.h b/tools/objtool/check.h index dbadb304a410..23a1d065cae1 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -47,6 +47,7 @@ struct instruction { bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts; struct symbol *call_dest; struct instruction *jump_dest; + struct instruction *first_jump_src; struct list_head alts; struct symbol *func; struct stack_op stack_op; From 812245b4dbf663aa8c4d5483e2e7c5d2c0fb01fb Mon Sep 17 00:00:00 2001 From: Yan Markman Date: Wed, 3 Jan 2018 16:18:52 +0100 Subject: [PATCH 429/770] arm64: dts: marvell: add Ethernet aliases commit 474c5885582c4a79c21bcf01ed98f98c935f1f4a upstream. This patch adds Ethernet aliases in the Marvell Armada 7040 DB, 8040 DB and 8040 mcbin device trees so that the bootloader setup the MAC addresses correctly. Signed-off-by: Yan Markman [Antoine: commit message, small fixes] Signed-off-by: Antoine Tenart Signed-off-by: Gregory CLEMENT Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/marvell/armada-7040-db.dts | 6 ++++++ arch/arm64/boot/dts/marvell/armada-8040-db.dts | 7 +++++++ arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts | 6 ++++++ 3 files changed, 19 insertions(+) diff --git a/arch/arm64/boot/dts/marvell/armada-7040-db.dts b/arch/arm64/boot/dts/marvell/armada-7040-db.dts index 9c3bdf87e543..51327645b3fb 100644 --- a/arch/arm64/boot/dts/marvell/armada-7040-db.dts +++ b/arch/arm64/boot/dts/marvell/armada-7040-db.dts @@ -61,6 +61,12 @@ reg = <0x0 0x0 0x0 0x80000000>; }; + aliases { + ethernet0 = &cpm_eth0; + ethernet1 = &cpm_eth1; + ethernet2 = &cpm_eth2; + }; + cpm_reg_usb3_0_vbus: cpm-usb3-0-vbus { compatible = "regulator-fixed"; regulator-name = "usb3h0-vbus"; diff --git a/arch/arm64/boot/dts/marvell/armada-8040-db.dts b/arch/arm64/boot/dts/marvell/armada-8040-db.dts index 0d7b2ae46610..a4f82f1efbbc 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040-db.dts +++ b/arch/arm64/boot/dts/marvell/armada-8040-db.dts @@ -61,6 +61,13 @@ reg = <0x0 0x0 0x0 0x80000000>; }; + aliases { + ethernet0 = &cpm_eth0; + ethernet1 = &cpm_eth2; + ethernet2 = &cps_eth0; + ethernet3 = &cps_eth1; + }; + cpm_reg_usb3_0_vbus: cpm-usb3-0-vbus { compatible = "regulator-fixed"; regulator-name = "cpm-usb3h0-vbus"; diff --git a/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts b/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts index acf5c7d16d79..2b6b792dab93 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts +++ b/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts @@ -62,6 +62,12 @@ reg = <0x0 0x0 0x0 0x80000000>; }; + aliases { + ethernet0 = &cpm_eth0; + ethernet1 = &cps_eth0; + ethernet2 = &cps_eth1; + }; + /* Regulator labels correspond with schematics */ v_3_3: regulator-3-3v { compatible = "regulator-fixed"; From a51421b4cb099ce36cbc85689cabc0b3525d0e29 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 29 Nov 2017 19:51:37 +0200 Subject: [PATCH 430/770] drm/i915: Avoid PPS HW/SW state mismatch due to rounding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5643205c6340b565a3be0fe0e7305dc4aa551c74 upstream. We store a SW state of the t11_t12 timing in 100usec units but have to program it in 100msec as required by HW. The rounding used during programming means there will be a mismatch between the SW and HW states of this value triggering a "PPS state mismatch" error. Avoid this by storing the already rounded-up value in the SW state. Note that we still calculate panel_power_cycle_delay with the finer 100usec granularity to avoid any needless waits using that version of the delay. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103903 Cc: joks Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171129175137.2889-1-imre.deak@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 09f274419eea..76cf68745870 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5340,6 +5340,12 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, */ final->t8 = 1; final->t9 = 1; + + /* + * HW has only a 100msec granularity for t11_t12 so round it up + * accordingly. + */ + final->t11_t12 = roundup(final->t11_t12, 100 * 10); } static void From f232bfdcdd768bb6ba5945f1f8f28faa94fc10b8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 19 Jan 2018 10:06:03 +0100 Subject: [PATCH 431/770] ACPI: sbshc: remove raw pointer from printk() message commit 43cdd1b716b26f6af16da4e145b6578f98798bf6 upstream. There's no need to be printing a raw kernel pointer to the kernel log at every boot. So just remove it, and change the whole message to use the correct dev_info() call at the same time. Reported-by: Wang Qize Signed-off-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/sbshc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index 2fa8304171e0..7a3431018e0a 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -275,8 +275,8 @@ static int acpi_smbus_hc_add(struct acpi_device *device) device->driver_data = hc; acpi_ec_add_query_handler(hc->ec, hc->query_bit, NULL, smbus_alarm, hc); - printk(KERN_INFO PREFIX "SBS HC: EC = 0x%p, offset = 0x%0x, query_bit = 0x%0x\n", - hc->ec, hc->offset, hc->query_bit); + dev_info(&device->dev, "SBS HC: offset = 0x%0x, query_bit = 0x%0x\n", + hc->offset, hc->query_bit); return 0; } From 9fc03876355ac0bbbb774d15a362e35805c9db85 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 2 Feb 2018 14:00:36 -0700 Subject: [PATCH 432/770] acpi, nfit: fix register dimm error handling commit 23fbd7c70aec7600e3227eb24259fc55bf6e4881 upstream. A NULL pointer reference kernel bug was observed when acpi_nfit_add_dimm() called in acpi_nfit_register_dimms() failed. This error path does not set nfit_mem->nvdimm, but the 2nd list_for_each_entry() loop in the function assumes it's always set. Add a check to nfit_mem->nvdimm. Fixes: ba9c8dd3c222 ("acpi, nfit: add dimm device notification support") Signed-off-by: Toshi Kani Cc: "Rafael J. Wysocki" Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index dea0fb3d6f64..f14b4326e855 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1618,6 +1618,9 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) struct kernfs_node *nfit_kernfs; nvdimm = nfit_mem->nvdimm; + if (!nvdimm) + continue; + nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit"); if (nfit_kernfs) nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs, From e822be7502707b994a4847f618b49f17d1c02dae Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 8 Nov 2017 09:39:46 +0200 Subject: [PATCH 433/770] ovl: fix failure to fsync lower dir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d796e77f1dd541fe34481af2eee6454688d13982 upstream. As a writable mount, it is not expected for overlayfs to return EINVAL/EROFS for fsync, even if dir/file is not changed. This commit fixes the case of fsync of directory, which is easier to address, because overlayfs already implements fsync file operation for directories. The problem reported by Raphael is that new PostgreSQL 10.0 with a database in overlayfs where lower layer in squashfs fails to start. The failure is due to fsync error, when PostgreSQL does fsync on all existing db directories on startup and a specific directory exists lower layer with no changes. Reported-by: Raphael Hertzog Signed-off-by: Amir Goldstein Tested-by: Raphaël Hertzog Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/readdir.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index d94a51dc4e32..f1e9b24c9945 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -751,10 +751,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, struct dentry *dentry = file->f_path.dentry; struct file *realfile = od->realfile; + /* Nothing to sync for lower */ + if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) + return 0; + /* * Need to check if we started out being a lower dir, but got copied up */ - if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { + if (!od->is_upper) { struct inode *inode = file_inode(file); realfile = READ_ONCE(od->upperfile); From c7aee3941e1e10a662e5b22f345933280f0caac8 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 3 Jan 2018 18:54:42 +0200 Subject: [PATCH 434/770] ovl: take mnt_want_write() for removing impure xattr commit a5a927a7c82e28ea76599dee4019c41e372c911f upstream. The optimization in ovl_cache_get_impure() that tries to remove an unneeded "impure" xattr needs to take mnt_want_write() on upper fs. Fixes: 4edb83bb1041 ("ovl: constant d_ino for non-merge dirs") Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/readdir.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index f1e9b24c9945..7fa7d68baa6d 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -575,8 +575,15 @@ static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path) return ERR_PTR(res); } if (list_empty(&cache->entries)) { - /* Good oportunity to get rid of an unnecessary "impure" flag */ - ovl_do_removexattr(ovl_dentry_upper(dentry), OVL_XATTR_IMPURE); + /* + * A good opportunity to get rid of an unneeded "impure" flag. + * Removing the "impure" xattr is best effort. + */ + if (!ovl_want_write(dentry)) { + ovl_do_removexattr(ovl_dentry_upper(dentry), + OVL_XATTR_IMPURE); + ovl_drop_write(dentry); + } ovl_clear_flag(OVL_IMPURE, d_inode(dentry)); kfree(cache); return NULL; From dff5406432ffd805e6917d7b7005e1098354596b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 1 Aug 2017 05:02:38 -0500 Subject: [PATCH 435/770] mn10300/misalignment: Use SIGSEGV SEGV_MAPERR to report a failed user copy commit 6ac1dc736b323011a55ecd1fc5897c24c4f77cbd upstream. Setting si_code to 0 is the same a setting si_code to SI_USER which is definitely not correct. With si_code set to SI_USER si_pid and si_uid will be copied to userspace instead of si_addr. Which is very wrong. So fix this by using a sensible si_code (SEGV_MAPERR) for this failure. Fixes: b920de1b77b7 ("mn10300: add the MN10300/AM33 architecture to the kernel") Cc: David Howells Cc: Masakazu Urade Cc: Koichi Yasutake Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/mn10300/mm/misalignment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mn10300/mm/misalignment.c b/arch/mn10300/mm/misalignment.c index b39a388825ae..8ace89617c1c 100644 --- a/arch/mn10300/mm/misalignment.c +++ b/arch/mn10300/mm/misalignment.c @@ -437,7 +437,7 @@ transfer_failed: info.si_signo = SIGSEGV; info.si_errno = 0; - info.si_code = 0; + info.si_code = SEGV_MAPERR; info.si_addr = (void *) regs->pc; force_sig_info(SIGSEGV, &info, current); return; From d9ef4003744321041b8cd2495a576c0be83c4b91 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 31 Jan 2018 00:49:18 -0800 Subject: [PATCH 436/770] devpts: fix error handling in devpts_mntget() commit c9cc8d01fb04117928830449388512a5047569c9 upstream. If devpts_ptmx_path() returns an error code, then devpts_mntget() dereferences an ERR_PTR(): BUG: unable to handle kernel paging request at fffffffffffffff5 IP: devpts_mntget+0x13f/0x280 fs/devpts/inode.c:173 Fix it by returning early in the error paths. Reproducer: #define _GNU_SOURCE #include #include #include #define TIOCGPTPEER _IO('T', 0x41) int main() { for (;;) { int fd = open("/dev/ptmx", 0); unshare(CLONE_NEWNS); ioctl(fd, TIOCGPTPEER, 0); } } Fixes: 311fc65c9fb9 ("pty: Repair TIOCGPTPEER") Reported-by: syzbot Signed-off-by: Eric Biggers Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/devpts/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 7eae33ffa3fc..e31d6ed3ec32 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -168,11 +168,11 @@ struct vfsmount *devpts_mntget(struct file *filp, struct pts_fs_info *fsi) dput(path.dentry); if (err) { mntput(path.mnt); - path.mnt = ERR_PTR(err); + return ERR_PTR(err); } if (DEVPTS_SB(path.mnt->mnt_sb) != fsi) { mntput(path.mnt); - path.mnt = ERR_PTR(-ENODEV); + return ERR_PTR(-ENODEV); } return path.mnt; } From b470fb7b791039d8afea0e4ebcbd6f4a41723c47 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 5 Feb 2018 22:05:31 -0500 Subject: [PATCH 437/770] ftrace: Remove incorrect setting of glob search field commit 7b6586562708d2b3a04fe49f217ddbadbbbb0546 upstream. __unregister_ftrace_function_probe() will incorrectly parse the glob filter because it resets the search variable that was setup by filter_parse_regex(). Al Viro reported this: After that call of filter_parse_regex() we could have func_g.search not equal to glob only if glob started with '!' or '*'. In the former case we would've buggered off with -EINVAL (not = 1). In the latter we would've set func_g.search equal to glob + 1, calculated the length of that thing in func_g.len and proceeded to reset func_g.search back to glob. Suppose the glob is e.g. *foo*. We end up with func_g.type = MATCH_MIDDLE_ONLY; func_g.len = 3; func_g.search = "*foo"; Feeding that to ftrace_match_record() will not do anything sane - we will be looking for names containing "*foo" (->len is ignored for that one). Link: http://lkml.kernel.org/r/20180127031706.GE13338@ZenIV.linux.org.uk Fixes: 3ba009297149f ("ftrace: Introduce ftrace_glob structure") Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Reviewed-by: Masami Hiramatsu Reported-by: Al Viro Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8319e09e15b9..7379bcf3baa0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4488,7 +4488,6 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, func_g.type = filter_parse_regex(glob, strlen(glob), &func_g.search, ¬); func_g.len = strlen(func_g.search); - func_g.search = glob; /* we do not support '!' for function probes */ if (WARN_ON(not)) From c561093ed6843684690436dea034af53b462cfe5 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 4 Dec 2017 10:06:23 -0800 Subject: [PATCH 438/770] scsi: core: Ensure that the SCSI error handler gets woken up commit 3bd6f43f5cb3714f70c591514f344389df593501 upstream. If scsi_eh_scmd_add() is called concurrently with scsi_host_queue_ready() while shost->host_blocked > 0 then it can happen that neither function wakes up the SCSI error handler. Fix this by making every function that decreases the host_busy counter wake up the error handler if necessary and by protecting the host_failed checks with the SCSI host lock. Reported-by: Pavel Tikhomirov References: https://marc.info/?l=linux-kernel&m=150461610630736 Fixes: commit 746650160866 ("scsi: convert host_busy to atomic_t") Signed-off-by: Bart Van Assche Reviewed-by: Pavel Tikhomirov Tested-by: Stuart Hayes Cc: Konstantin Khorenko Cc: Stuart Hayes Cc: Pavel Tikhomirov Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hosts.c | 6 ++++++ drivers/scsi/scsi_error.c | 18 ++++++++++++++++-- drivers/scsi/scsi_lib.c | 39 ++++++++++++++++++++++++++++----------- include/scsi/scsi_host.h | 2 ++ 4 files changed, 52 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index fe3a0da3ec97..57bf43e34863 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -318,6 +318,9 @@ static void scsi_host_dev_release(struct device *dev) scsi_proc_hostdir_rm(shost->hostt); + /* Wait for functions invoked through call_rcu(&shost->rcu, ...) */ + rcu_barrier(); + if (shost->tmf_work_q) destroy_workqueue(shost->tmf_work_q); if (shost->ehandler) @@ -325,6 +328,8 @@ static void scsi_host_dev_release(struct device *dev) if (shost->work_q) destroy_workqueue(shost->work_q); + destroy_rcu_head(&shost->rcu); + if (shost->shost_state == SHOST_CREATED) { /* * Free the shost_dev device name here if scsi_host_alloc() @@ -399,6 +404,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) INIT_LIST_HEAD(&shost->starved_list); init_waitqueue_head(&shost->host_wait); mutex_init(&shost->scan_mutex); + init_rcu_head(&shost->rcu); index = ida_simple_get(&host_index_ida, 0, 0, GFP_KERNEL); if (index < 0) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index dab876c65473..fa504ba83ade 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -220,6 +220,17 @@ static void scsi_eh_reset(struct scsi_cmnd *scmd) } } +static void scsi_eh_inc_host_failed(struct rcu_head *head) +{ + struct Scsi_Host *shost = container_of(head, typeof(*shost), rcu); + unsigned long flags; + + spin_lock_irqsave(shost->host_lock, flags); + shost->host_failed++; + scsi_eh_wakeup(shost); + spin_unlock_irqrestore(shost->host_lock, flags); +} + /** * scsi_eh_scmd_add - add scsi cmd to error handling. * @scmd: scmd to run eh on. @@ -242,9 +253,12 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd) scsi_eh_reset(scmd); list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q); - shost->host_failed++; - scsi_eh_wakeup(shost); spin_unlock_irqrestore(shost->host_lock, flags); + /* + * Ensure that all tasks observe the host state change before the + * host_failed change. + */ + call_rcu(&shost->rcu, scsi_eh_inc_host_failed); } /** diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 635cfa1f2ace..0d3696e9dddd 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -318,23 +318,40 @@ static void scsi_init_cmd_errh(struct scsi_cmnd *cmd) cmd->cmd_len = scsi_command_size(cmd->cmnd); } +/* + * Decrement the host_busy counter and wake up the error handler if necessary. + * Avoid as follows that the error handler is not woken up if shost->host_busy + * == shost->host_failed: use call_rcu() in scsi_eh_scmd_add() in combination + * with an RCU read lock in this function to ensure that this function in its + * entirety either finishes before scsi_eh_scmd_add() increases the + * host_failed counter or that it notices the shost state change made by + * scsi_eh_scmd_add(). + */ +static void scsi_dec_host_busy(struct Scsi_Host *shost) +{ + unsigned long flags; + + rcu_read_lock(); + atomic_dec(&shost->host_busy); + if (unlikely(scsi_host_in_recovery(shost))) { + spin_lock_irqsave(shost->host_lock, flags); + if (shost->host_failed || shost->host_eh_scheduled) + scsi_eh_wakeup(shost); + spin_unlock_irqrestore(shost->host_lock, flags); + } + rcu_read_unlock(); +} + void scsi_device_unbusy(struct scsi_device *sdev) { struct Scsi_Host *shost = sdev->host; struct scsi_target *starget = scsi_target(sdev); - unsigned long flags; - atomic_dec(&shost->host_busy); + scsi_dec_host_busy(shost); + if (starget->can_queue > 0) atomic_dec(&starget->target_busy); - if (unlikely(scsi_host_in_recovery(shost) && - (shost->host_failed || shost->host_eh_scheduled))) { - spin_lock_irqsave(shost->host_lock, flags); - scsi_eh_wakeup(shost); - spin_unlock_irqrestore(shost->host_lock, flags); - } - atomic_dec(&sdev->device_busy); } @@ -1532,7 +1549,7 @@ starved: list_add_tail(&sdev->starved_entry, &shost->starved_list); spin_unlock_irq(shost->host_lock); out_dec: - atomic_dec(&shost->host_busy); + scsi_dec_host_busy(shost); return 0; } @@ -1993,7 +2010,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; out_dec_host_busy: - atomic_dec(&shost->host_busy); + scsi_dec_host_busy(shost); out_dec_target_busy: if (scsi_target(sdev)->can_queue > 0) atomic_dec(&scsi_target(sdev)->target_busy); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index a8b7bf879ced..1a1df0d21ee3 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -571,6 +571,8 @@ struct Scsi_Host { struct blk_mq_tag_set tag_set; }; + struct rcu_head rcu; + atomic_t host_busy; /* commands actually active on low-level */ atomic_t host_blocked; From ce6faf10fd6544febcf5c3efe453193def873c60 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 7 Dec 2017 09:40:38 -0800 Subject: [PATCH 439/770] rcu: Export init_rcu_head() and destroy_rcu_head() to GPL modules commit 156baec39732f025dc778e00da95fc10d6e45885 upstream. Use of init_rcu_head() and destroy_rcu_head() from modules results in the following build-time error with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y: ERROR: "init_rcu_head" [drivers/scsi/scsi_mod.ko] undefined! ERROR: "destroy_rcu_head" [drivers/scsi/scsi_mod.ko] undefined! This commit therefore adds EXPORT_SYMBOL_GPL() for each to allow them to be used by GPL-licensed kernel modules. Reported-by: Bart Van Assche Reported-by: Stephen Rothwell Signed-off-by: Paul E. McKenney Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/update.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 5033b66d2753..7a577bd989a4 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -421,11 +421,13 @@ void init_rcu_head(struct rcu_head *head) { debug_object_init(head, &rcuhead_debug_descr); } +EXPORT_SYMBOL_GPL(init_rcu_head); void destroy_rcu_head(struct rcu_head *head) { debug_object_free(head, &rcuhead_debug_descr); } +EXPORT_SYMBOL_GPL(destroy_rcu_head); static bool rcuhead_is_static_object(void *addr) { From 5fc77964eaa97884e630487254e4c265644fb116 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 20 Nov 2017 16:00:31 -0800 Subject: [PATCH 440/770] scsi: lpfc: Fix crash after bad bar setup on driver attachment commit e4b9794efdce13242f4af6682f3ed48ce3864a87 upstream. In test cases where an instance of the driver is detached and reattached, the driver will crash on reattachment. There is a compound if statement that will skip over the bar setup if the pci_resource_start call is not successful. The driver erroneously returns success to its bar setup in this scenario even though the bars aren't properly configured. Rework the offending code segment for proper initialization steps. If the pci_resource_start call fails, -ENOMEM is now returned. Sample stack: rport-5:0-10: blocked FC remote port time out: removing rport BUG: unable to handle kernel NULL pointer dereference at (null) ... lpfc_sli4_wait_bmbx_ready+0x32/0x70 [lpfc] ... ... RIP: 0010:... ... lpfc_sli4_wait_bmbx_ready+0x32/0x70 [lpfc] Call Trace: ... lpfc_sli4_post_sync_mbox+0x106/0x4d0 [lpfc] ... ? __alloc_pages_nodemask+0x176/0x420 ... ? __kmalloc+0x2e/0x230 ... lpfc_sli_issue_mbox_s4+0x533/0x720 [lpfc] ... ? mempool_alloc+0x69/0x170 ... ? dma_generic_alloc_coherent+0x8f/0x140 ... lpfc_sli_issue_mbox+0xf/0x20 [lpfc] ... lpfc_sli4_driver_resource_setup+0xa6f/0x1130 [lpfc] ... ? lpfc_pci_probe_one+0x23e/0x16f0 [lpfc] ... lpfc_pci_probe_one+0x445/0x16f0 [lpfc] ... local_pci_probe+0x45/0xa0 ... work_for_cpu_fn+0x14/0x20 ... process_one_work+0x17a/0x440 Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_init.c | 84 +++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 33 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 6acf1bb1d320..25612ccf6ff2 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -9413,44 +9413,62 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba) lpfc_sli4_bar0_register_memmap(phba, if_type); } - if ((if_type == LPFC_SLI_INTF_IF_TYPE_0) && - (pci_resource_start(pdev, PCI_64BIT_BAR2))) { - /* - * Map SLI4 if type 0 HBA Control Register base to a kernel - * virtual address and setup the registers. - */ - phba->pci_bar1_map = pci_resource_start(pdev, PCI_64BIT_BAR2); - bar1map_len = pci_resource_len(pdev, PCI_64BIT_BAR2); - phba->sli4_hba.ctrl_regs_memmap_p = - ioremap(phba->pci_bar1_map, bar1map_len); - if (!phba->sli4_hba.ctrl_regs_memmap_p) { - dev_printk(KERN_ERR, &pdev->dev, - "ioremap failed for SLI4 HBA control registers.\n"); + if (if_type == LPFC_SLI_INTF_IF_TYPE_0) { + if (pci_resource_start(pdev, PCI_64BIT_BAR2)) { + /* + * Map SLI4 if type 0 HBA Control Register base to a + * kernel virtual address and setup the registers. + */ + phba->pci_bar1_map = pci_resource_start(pdev, + PCI_64BIT_BAR2); + bar1map_len = pci_resource_len(pdev, PCI_64BIT_BAR2); + phba->sli4_hba.ctrl_regs_memmap_p = + ioremap(phba->pci_bar1_map, + bar1map_len); + if (!phba->sli4_hba.ctrl_regs_memmap_p) { + dev_err(&pdev->dev, + "ioremap failed for SLI4 HBA " + "control registers.\n"); + error = -ENOMEM; + goto out_iounmap_conf; + } + phba->pci_bar2_memmap_p = + phba->sli4_hba.ctrl_regs_memmap_p; + lpfc_sli4_bar1_register_memmap(phba); + } else { + error = -ENOMEM; goto out_iounmap_conf; } - phba->pci_bar2_memmap_p = phba->sli4_hba.ctrl_regs_memmap_p; - lpfc_sli4_bar1_register_memmap(phba); } - if ((if_type == LPFC_SLI_INTF_IF_TYPE_0) && - (pci_resource_start(pdev, PCI_64BIT_BAR4))) { - /* - * Map SLI4 if type 0 HBA Doorbell Register base to a kernel - * virtual address and setup the registers. - */ - phba->pci_bar2_map = pci_resource_start(pdev, PCI_64BIT_BAR4); - bar2map_len = pci_resource_len(pdev, PCI_64BIT_BAR4); - phba->sli4_hba.drbl_regs_memmap_p = - ioremap(phba->pci_bar2_map, bar2map_len); - if (!phba->sli4_hba.drbl_regs_memmap_p) { - dev_printk(KERN_ERR, &pdev->dev, - "ioremap failed for SLI4 HBA doorbell registers.\n"); - goto out_iounmap_ctrl; - } - phba->pci_bar4_memmap_p = phba->sli4_hba.drbl_regs_memmap_p; - error = lpfc_sli4_bar2_register_memmap(phba, LPFC_VF0); - if (error) + if (if_type == LPFC_SLI_INTF_IF_TYPE_0) { + if (pci_resource_start(pdev, PCI_64BIT_BAR4)) { + /* + * Map SLI4 if type 0 HBA Doorbell Register base to + * a kernel virtual address and setup the registers. + */ + phba->pci_bar2_map = pci_resource_start(pdev, + PCI_64BIT_BAR4); + bar2map_len = pci_resource_len(pdev, PCI_64BIT_BAR4); + phba->sli4_hba.drbl_regs_memmap_p = + ioremap(phba->pci_bar2_map, + bar2map_len); + if (!phba->sli4_hba.drbl_regs_memmap_p) { + dev_err(&pdev->dev, + "ioremap failed for SLI4 HBA" + " doorbell registers.\n"); + error = -ENOMEM; + goto out_iounmap_ctrl; + } + phba->pci_bar4_memmap_p = + phba->sli4_hba.drbl_regs_memmap_p; + error = lpfc_sli4_bar2_register_memmap(phba, LPFC_VF0); + if (error) + goto out_iounmap_all; + } else { + error = -ENOMEM; goto out_iounmap_all; + } } return 0; From 1dbdcf117b227eb966691818e850a2fde589f2a2 Mon Sep 17 00:00:00 2001 From: Uma Krishnan Date: Wed, 3 Jan 2018 16:54:02 -0600 Subject: [PATCH 441/770] scsi: cxlflash: Reset command ioasc commit 96cf727fe8f102bf92150b741db71ee39fb8c521 upstream. In the event of a command failure, cxlflash returns the failure to the upper layers to process. After processing the error, when the command is queued again, the private command structure will not be zeroed and the ioasc could be stale. Per the SISLite specification, the AFU only sets the ioasc in the presence of a failure. Thus, even though the original command succeeds the second time, the command is considered a failure due to stale ioasc. This cycle repeats indefinitely and can cause a hang or IO failure. To fix the issue, clear the ioasc before queuing any command. [mkp: added Cc: stable per request] Fixes: 479ad8e9d48c ("scsi: cxlflash: Remove zeroing of private command data") Signed-off-by: Uma Krishnan Acked-by: Matthew R. Ochs Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/cxlflash/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index 76b8b7eed0c0..0b6467206f8e 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -620,6 +620,7 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) cmd->parent = afu; cmd->hwq_index = hwq_index; + cmd->sa.ioasc = 0; cmd->rcb.ctx_id = hwq->ctx_hndl; cmd->rcb.msi = SISL_MSI_RRQ_UPDATED; cmd->rcb.port_sel = CHAN2PORTMASK(scp->device->channel); From 7e83b2ff485cacbf73d27f821e07a8c78ad8cc68 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 16 Feb 2018 20:23:12 +0100 Subject: [PATCH 442/770] Linux 4.14.20 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a7160d6f0f2d..33176140f133 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 19 +SUBLEVEL = 20 EXTRAVERSION = NAME = Petit Gorille From b6f2efb86440af16ecdf08e23924a4dc52c1bd4a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 15 Jan 2018 11:07:27 -0800 Subject: [PATCH 443/770] tracing: Prevent PROFILE_ALL_BRANCHES when FORTIFY_SOURCE=y commit 68e76e034b6b1c1ce2eece1ab8ae4008e14be470 upstream. I regularly get 50 MB - 60 MB files during kernel randconfig builds. These large files mostly contain (many repeats of; e.g., 124,594): In file included from ../include/linux/string.h:6:0, from ../include/linux/uuid.h:20, from ../include/linux/mod_devicetable.h:13, from ../scripts/mod/devicetable-offsets.c:3: ../include/linux/compiler.h:64:4: warning: '______f' is static but declared in inline function 'strcpy' which is not static [enabled by default] ______f = { \ ^ ../include/linux/compiler.h:56:23: note: in expansion of macro '__trace_if' ^ ../include/linux/string.h:425:2: note: in expansion of macro 'if' if (p_size == (size_t)-1 && q_size == (size_t)-1) ^ This only happens when CONFIG_FORTIFY_SOURCE=y and CONFIG_PROFILE_ALL_BRANCHES=y, so prevent PROFILE_ALL_BRANCHES if FORTIFY_SOURCE=y. Link: http://lkml.kernel.org/r/9199446b-a141-c0c3-9678-a3f9107f2750@infradead.org Signed-off-by: Randy Dunlap Signed-off-by: Steven Rostedt (VMware) Cc: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- kernel/trace/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 434c840e2d82..4ad6f6ca18c1 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -343,7 +343,7 @@ config PROFILE_ANNOTATED_BRANCHES on if you need to profile the system's use of these macros. config PROFILE_ALL_BRANCHES - bool "Profile all if conditionals" + bool "Profile all if conditionals" if !FORTIFY_SOURCE select TRACE_BRANCH_PROFILING help This tracer profiles all branch conditions. Every if () From e99306bb4f6741d5482f924585975db5a6934953 Mon Sep 17 00:00:00 2001 From: Steffen Weber Date: Tue, 2 Jan 2018 19:24:09 +0100 Subject: [PATCH 444/770] scsi: smartpqi: allow static build ("built-in") commit dc2db1dc5fb9ab3a43b305c2720fee5278dbee2a upstream. If CONFIG_SCSI_SMARTPQI=y then don't build this driver as a module. Signed-off-by: Steffen Weber Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/smartpqi/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/smartpqi/Makefile b/drivers/scsi/smartpqi/Makefile index 0f42a225a664..e6b779930230 100644 --- a/drivers/scsi/smartpqi/Makefile +++ b/drivers/scsi/smartpqi/Makefile @@ -1,3 +1,3 @@ ccflags-y += -I. -obj-m += smartpqi.o +obj-$(CONFIG_SCSI_SMARTPQI) += smartpqi.o smartpqi-objs := smartpqi_init.o smartpqi_sis.o smartpqi_sas_transport.o From 7a748f0bb2e4aca2c18223585523eb07199a4715 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 28 Jan 2018 11:25:29 +0200 Subject: [PATCH 445/770] IB/umad: Fix use of unprotected device pointer commit f23a5350e43c810ca36b26d4ed4ecd9a08686f47 upstream. The ib_write_umad() is protected by taking the umad file mutex. However, it accesses file->port->ib_dev -- which is protected only by the port's mutex (field file_mutex). The ib_umad_remove_one() calls ib_umad_kill_port() which sets port->ib_dev to NULL under the port mutex (NOT the file mutex). It then sets the mad agent to "dead" under the umad file mutex. This is a race condition -- because there is a window where port->ib_dev is NULL, while the agent is not "dead". As a result, we saw stack traces like: [16490.678059] BUG: unable to handle kernel NULL pointer dereference at 00000000000000b0 [16490.678246] IP: ib_umad_write+0x29c/0xa3a [ib_umad] [16490.678333] PGD 0 P4D 0 [16490.678404] Oops: 0000 [#1] SMP PTI [16490.678466] Modules linked in: rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_uverbs(OE) ib_umad(OE) mlx4_en(OE) ptp pps_core mlx4_ib(OE-) ib_core(OE) mlx4_core(OE) mlx_compat (OE) memtrack(OE) devlink mst_pciconf(OE) mst_pci(OE) netconsole nfsv3 nfs_acl nfs lockd grace fscache cfg80211 rfkill esp6_offload esp6 esp4_offload esp4 sunrpc kvm_intel kvm ppdev parport_pc irqbypass parport joydev i2c_piix4 virtio_balloon cirrus drm_kms_helper ttm drm e1000 serio_raw virtio_pci virtio_ring virtio ata_generic pata_acpi qemu_fw_cfg [last unloaded: mlxfw] [16490.679202] CPU: 4 PID: 3115 Comm: sminfo Tainted: G OE 4.14.13-300.fc27.x86_64 #1 [16490.679339] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu2 04/01/2014 [16490.679477] task: ffff9cf753890000 task.stack: ffffaf70c26b0000 [16490.679571] RIP: 0010:ib_umad_write+0x29c/0xa3a [ib_umad] [16490.679664] RSP: 0018:ffffaf70c26b3d90 EFLAGS: 00010202 [16490.679747] RAX: 0000000000000010 RBX: ffff9cf75610fd80 RCX: 0000000000000000 [16490.679856] RDX: 0000000000000001 RSI: 00007ffdf2bfd714 RDI: ffff9cf6bb2a9c00 In the above trace, ib_umad_write is trying to dereference the NULL file->port->ib_dev pointer. Fix this by using the agent's device pointer (the device field in struct ib_mad_agent) -- which IS protected by the umad file mutex. Fixes: 44c58487d51a ("IB/core: Define 'ib' and 'roce' rdma_ah_attr types") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/user_mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 603acaf91828..6511cb21f6e2 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -500,7 +500,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, } memset(&ah_attr, 0, sizeof ah_attr); - ah_attr.type = rdma_ah_find_type(file->port->ib_dev, + ah_attr.type = rdma_ah_find_type(agent->device, file->port->port_num); rdma_ah_set_dlid(&ah_attr, be16_to_cpu(packet->mad.hdr.lid)); rdma_ah_set_sl(&ah_attr, packet->mad.hdr.sl); From 5a4255467cd8ce9e2c5f9a25b50f2fe692cf1bb8 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 14 Nov 2017 04:34:52 -0800 Subject: [PATCH 446/770] IB/qib: Fix comparison error with qperf compare/swap test commit 87b3524cb5058fdc7c2afdb92bdb2e079661ddc4 upstream. This failure exists with qib: ver_rc_compare_swap: mismatch, sequence 2, expected 123456789abcdef, got 0 The request builder was using the incorrect inlines to build the request header resulting in incorrect data in the atomic header. Fix by using the appropriate inlines to create the request. Fixes: 261a4351844b ("IB/qib,IB/hfi: Use core common header file") Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/qib/qib_rc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index e9a91736b12d..d80b61a71eb8 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -434,13 +434,13 @@ no_flow_control: qp->s_state = OP(COMPARE_SWAP); put_ib_ateth_swap(wqe->atomic_wr.swap, &ohdr->u.atomic_eth); - put_ib_ateth_swap(wqe->atomic_wr.compare_add, - &ohdr->u.atomic_eth); + put_ib_ateth_compare(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); } else { qp->s_state = OP(FETCH_ADD); put_ib_ateth_swap(wqe->atomic_wr.compare_add, &ohdr->u.atomic_eth); - put_ib_ateth_swap(0, &ohdr->u.atomic_eth); + put_ib_ateth_compare(0, &ohdr->u.atomic_eth); } put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr, &ohdr->u.atomic_eth); From ade57e9031fac0c972c488acfce58e9a50e2c8dc Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 12 Jan 2018 07:58:40 +0200 Subject: [PATCH 447/770] IB/mlx4: Fix incorrectly releasing steerable UD QPs when have only ETH ports commit 852f6927594d0d3e8632c889b2ab38cbc46476ad upstream. Allocating steerable UD QPs depends on having at least one IB port, while releasing those QPs does not. As a result, when there are only ETH ports, the IB (RoCE) driver requests releasing a qp range whose base qp is zero, with qp count zero. When SR-IOV is enabled, and the VF driver is running on a VM over a hypervisor which treats such qp release calls as errors (rather than NOPs), we see lines in the VM message log like: mlx4_core 0002:00:02.0: Failed to release qp range base:0 cnt:0 Fix this by adding a check for a zero count in mlx4_release_qp_range() (which thus treats releasing 0 qps as a nop), and eliminating the check for device managed flow steering when releasing steerable UD QPs. (Freeing ib_uc_qpns_bitmap unconditionally is also OK, since it remains NULL when steerable UD QPs are not allocated). Fixes: 4196670be786 ("IB/mlx4: Don't allocate range of steerable UD QPs for Ethernet-only device") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/main.c | 13 +++++-------- drivers/net/ethernet/mellanox/mlx4/qp.c | 3 +++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c636842c5be0..8c681a36e6c7 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2972,9 +2972,8 @@ err_steer_free_bitmap: kfree(ibdev->ib_uc_qpns_bitmap); err_steer_qp_release: - if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) - mlx4_qp_release_range(dev, ibdev->steer_qpn_base, - ibdev->steer_qpn_count); + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_count); err_counter: for (i = 0; i < ibdev->num_ports; ++i) mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]); @@ -3079,11 +3078,9 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) ibdev->iboe.nb.notifier_call = NULL; } - if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) { - mlx4_qp_release_range(dev, ibdev->steer_qpn_base, - ibdev->steer_qpn_count); - kfree(ibdev->ib_uc_qpns_bitmap); - } + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_count); + kfree(ibdev->ib_uc_qpns_bitmap); iounmap(ibdev->uar_map); for (p = 0; p < ibdev->num_ports; ++p) diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 728a2fb1f5c0..22a3bfe1ed8f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -287,6 +287,9 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) u64 in_param = 0; int err; + if (!cnt) + return; + if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, base_qpn); set_param_h(&in_param, cnt); From 18c0ee900c8d93ae48a99cc1f46d372897b8e9a6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 3 Jan 2018 13:28:18 -0800 Subject: [PATCH 448/770] IB/core: Fix two kernel warnings triggered by rxe registration commit 02ee9da347873699603d9ce0112a80b5dd69dea1 upstream. Eliminate the WARN_ONs that create following two warnings when registering an rxe device: WARNING: CPU: 2 PID: 1005 at drivers/infiniband/core/device.c:449 ib_register_device+0x591/0x640 [ib_core] CPU: 2 PID: 1005 Comm: run_tests Not tainted 4.15.0-rc4-dbg+ #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:ib_register_device+0x591/0x640 [ib_core] Call Trace: rxe_register_device+0x3c6/0x470 [rdma_rxe] rxe_add+0x543/0x5e0 [rdma_rxe] rxe_net_add+0x37/0xb0 [rdma_rxe] rxe_param_set_add+0x5a/0x120 [rdma_rxe] param_attr_store+0x5e/0xc0 module_attr_store+0x19/0x30 sysfs_kf_write+0x3d/0x50 kernfs_fop_write+0x116/0x1a0 __vfs_write+0x23/0x120 vfs_write+0xbe/0x1b0 SyS_write+0x44/0xa0 entry_SYSCALL_64_fastpath+0x23/0x9a WARNING: CPU: 2 PID: 1005 at drivers/infiniband/core/sysfs.c:1279 ib_device_register_sysfs+0x11d/0x160 [ib_core] CPU: 2 PID: 1005 Comm: run_tests Tainted: G W 4.15.0-rc4-dbg+ #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:ib_device_register_sysfs+0x11d/0x160 [ib_core] Call Trace: ib_register_device+0x3f7/0x640 [ib_core] rxe_register_device+0x3c6/0x470 [rdma_rxe] rxe_add+0x543/0x5e0 [rdma_rxe] rxe_net_add+0x37/0xb0 [rdma_rxe] rxe_param_set_add+0x5a/0x120 [rdma_rxe] param_attr_store+0x5e/0xc0 module_attr_store+0x19/0x30 sysfs_kf_write+0x3d/0x50 kernfs_fop_write+0x116/0x1a0 __vfs_write+0x23/0x120 vfs_write+0xbe/0x1b0 SyS_write+0x44/0xa0 entry_SYSCALL_64_fastpath+0x23/0x9a The code should accept either a parent pointer or a fully specified DMA specification without producing warnings. Fixes: 99db9494035f ("IB/core: Remove ib_device.dma_device") Signed-off-by: Bart Van Assche Cc: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/device.c | 20 ++++++++++++++------ drivers/infiniband/core/sysfs.c | 1 - 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 84fc32a2c8b3..ebfdb5503701 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -446,7 +446,6 @@ int ib_register_device(struct ib_device *device, struct ib_udata uhw = {.outlen = 0, .inlen = 0}; struct device *parent = device->dev.parent; - WARN_ON_ONCE(!parent); WARN_ON_ONCE(device->dma_device); if (device->dev.dma_ops) { /* @@ -455,16 +454,25 @@ int ib_register_device(struct ib_device *device, * into device->dev. */ device->dma_device = &device->dev; - if (!device->dev.dma_mask) - device->dev.dma_mask = parent->dma_mask; - if (!device->dev.coherent_dma_mask) - device->dev.coherent_dma_mask = - parent->coherent_dma_mask; + if (!device->dev.dma_mask) { + if (parent) + device->dev.dma_mask = parent->dma_mask; + else + WARN_ON_ONCE(true); + } + if (!device->dev.coherent_dma_mask) { + if (parent) + device->dev.coherent_dma_mask = + parent->coherent_dma_mask; + else + WARN_ON_ONCE(true); + } } else { /* * The caller did not provide custom DMA operations. Use the * DMA mapping operations of the parent device. */ + WARN_ON_ONCE(!parent); device->dma_device = parent; } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index abc5ab581f82..0a1e96c25ca3 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1262,7 +1262,6 @@ int ib_device_register_sysfs(struct ib_device *device, int ret; int i; - WARN_ON_ONCE(!device->dev.parent); ret = dev_set_name(class_dev, "%s", device->name); if (ret) return ret; From d40ad865704b0a3547ca199acba102020ebb947e Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Fri, 12 Jan 2018 07:58:41 +0200 Subject: [PATCH 449/770] IB/core: Fix ib_wc structure size to remain in 64 bytes boundary commit cd2a6e7d384b043d5d029e39663061cebc949385 upstream. The change of slid from u16 to u32 results in sizeof(struct ib_wc) cross 64B boundary, which causes more cache misses. This patch rearranges the fields and remain the size to 64B. Pahole output before this change: struct ib_wc { union { u64 wr_id; /* 8 */ struct ib_cqe * wr_cqe; /* 8 */ }; /* 0 8 */ enum ib_wc_status status; /* 8 4 */ enum ib_wc_opcode opcode; /* 12 4 */ u32 vendor_err; /* 16 4 */ u32 byte_len; /* 20 4 */ struct ib_qp * qp; /* 24 8 */ union { __be32 imm_data; /* 4 */ u32 invalidate_rkey; /* 4 */ } ex; /* 32 4 */ u32 src_qp; /* 36 4 */ int wc_flags; /* 40 4 */ u16 pkey_index; /* 44 2 */ /* XXX 2 bytes hole, try to pack */ u32 slid; /* 48 4 */ u8 sl; /* 52 1 */ u8 dlid_path_bits; /* 53 1 */ u8 port_num; /* 54 1 */ u8 smac[6]; /* 55 6 */ /* XXX 1 byte hole, try to pack */ u16 vlan_id; /* 62 2 */ /* --- cacheline 1 boundary (64 bytes) --- */ u8 network_hdr_type; /* 64 1 */ /* size: 72, cachelines: 2, members: 17 */ /* sum members: 62, holes: 2, sum holes: 3 */ /* padding: 7 */ /* last cacheline: 8 bytes */ }; Pahole output after this change: struct ib_wc { union { u64 wr_id; /* 8 */ struct ib_cqe * wr_cqe; /* 8 */ }; /* 0 8 */ enum ib_wc_status status; /* 8 4 */ enum ib_wc_opcode opcode; /* 12 4 */ u32 vendor_err; /* 16 4 */ u32 byte_len; /* 20 4 */ struct ib_qp * qp; /* 24 8 */ union { __be32 imm_data; /* 4 */ u32 invalidate_rkey; /* 4 */ } ex; /* 32 4 */ u32 src_qp; /* 36 4 */ u32 slid; /* 40 4 */ int wc_flags; /* 44 4 */ u16 pkey_index; /* 48 2 */ u8 sl; /* 50 1 */ u8 dlid_path_bits; /* 51 1 */ u8 port_num; /* 52 1 */ u8 smac[6]; /* 53 6 */ /* XXX 1 byte hole, try to pack */ u16 vlan_id; /* 60 2 */ u8 network_hdr_type; /* 62 1 */ /* size: 64, cachelines: 1, members: 17 */ /* sum members: 62, holes: 1, sum holes: 1 */ /* padding: 1 */ }; Fixes: 7db20ecd1d97 ("IB/core: Change wc.slid from 16 to 32 bits") Signed-off-by: Bodong Wang Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- include/rdma/ib_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e8608b2dc844..6533aa64f009 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -971,9 +971,9 @@ struct ib_wc { u32 invalidate_rkey; } ex; u32 src_qp; + u32 slid; int wc_flags; u16 pkey_index; - u32 slid; u8 sl; u8 dlid_path_bits; u8 port_num; /* valid only for DR SMPs on switches */ From 1d6eb826e65d8b92d1f8cb081b33bb0786a5a4ab Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 1 Feb 2018 12:31:06 -0800 Subject: [PATCH 450/770] IB/core: Avoid a potential OOPs for an unused optional parameter commit 2ff124d597c2df8696169ce0006fc974c49a4569 upstream. The ev_file is an optional parameter for CQ creation. If the parameter is not passed, the ev_file pointer will be NULL. Using that pointer to set the cq_context will result in an OOPs. Verify that ev_file is not NULL before using. Cc: # 4.14.x Fixes: 9ee79fce3642 ("IB/core: Add completion queue (cq) object actions") Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/uverbs_std_types.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 0a98579700ec..5f9321eda1b7 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -315,7 +315,7 @@ static int uverbs_create_cq_handler(struct ib_device *ib_dev, cq->uobject = &obj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = &ev_file->ev_queue; + cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; obj->uobject.object = cq; obj->uobject.user_handle = user_handle; atomic_set(&cq->usecnt, 0); From 9c2e7a048d19dce8b82e0dc169feb1721d856d37 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 5 Jan 2018 17:31:18 +0100 Subject: [PATCH 451/770] selftests: seccomp: fix compile error seccomp_bpf commit 912ec316686df352028afb6efec59e47a958a24d upstream. aarch64-linux-gnu-gcc -Wl,-no-as-needed -Wall -lpthread seccomp_bpf.c -o seccomp_bpf seccomp_bpf.c: In function 'tracer_ptrace': seccomp_bpf.c:1720:12: error: '__NR_open' undeclared (first use in this function) if (nr == __NR_open) ^~~~~~~~~ seccomp_bpf.c:1720:12: note: each undeclared identifier is reported only once for each function it appears in In file included from seccomp_bpf.c:48:0: seccomp_bpf.c: In function 'TRACE_syscall_ptrace_syscall_dropped': seccomp_bpf.c:1795:39: error: '__NR_open' undeclared (first use in this function) EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open)); ^ open(2) is a legacy syscall, replaced with openat(2) since 2.6.16. Thus new architectures in the kernel, such as arm64, don't implement these legacy syscalls. Fixes: a33b2d0359a0 ("selftests/seccomp: Add tests for basic ptrace actions") Signed-off-by: Anders Roxell Tested-by: Naresh Kamboju Cc: stable@vger.kernel.org Acked-by: Kees Cook Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/seccomp/seccomp_bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 24dbf634e2dd..0b457e8e0f0c 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1717,7 +1717,7 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, if (nr == __NR_getpid) change_syscall(_metadata, tracee, __NR_getppid); - if (nr == __NR_open) + if (nr == __NR_openat) change_syscall(_metadata, tracee, -1); } @@ -1792,7 +1792,7 @@ TEST_F(TRACE_syscall, ptrace_syscall_dropped) true); /* Tracer should skip the open syscall, resulting in EPERM. */ - EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open)); + EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_openat)); } TEST_F(TRACE_syscall, syscall_allowed) From 7dd2dbdd4606fc44da2cbb59fcd1c560c3f3fb94 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 9 Jan 2018 17:26:24 +0100 Subject: [PATCH 452/770] kselftest: fix OOM in memory compaction test commit 4c1baad223906943b595a887305f2e8124821dad upstream. Running the compaction_test sometimes results in out-of-memory failures. When I debugged this, it turned out that the code to reset the number of hugepages to the initial value is simply broken since we write into an open sysctl file descriptor multiple times without seeking back to the start. Adding the lseek here fixes the problem. Cc: stable@vger.kernel.org Reported-by: Naresh Kamboju Link: https://bugs.linaro.org/show_bug.cgi?id=3145 Signed-off-by: Arnd Bergmann Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/vm/compaction_test.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c index a65b016d4c13..1097f04e4d80 100644 --- a/tools/testing/selftests/vm/compaction_test.c +++ b/tools/testing/selftests/vm/compaction_test.c @@ -137,6 +137,8 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) printf("No of huge pages allocated = %d\n", (atoi(nr_hugepages))); + lseek(fd, 0, SEEK_SET); + if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) != strlen(initial_nr_hugepages)) { perror("Failed to write value to /proc/sys/vm/nr_hugepages\n"); From 7b4e8a46d4cf2592910275dee6036ceb1d351975 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 9 Jan 2018 11:23:40 -0800 Subject: [PATCH 453/770] RDMA/rxe: Fix a race condition related to the QP error state commit 6f301e06de4cf9ab7303f5acd43e64fcd4aa04be upstream. The following sequence: * Change queue pair state into IB_QPS_ERR. * Post a work request on the queue pair. Triggers the following race condition in the rdma_rxe driver: * rxe_qp_error() triggers an asynchronous call of rxe_completer(), the function that examines the QP send queue. * rxe_post_send() posts a work request on the QP send queue. If rxe_completer() runs prior to rxe_post_send(), it will drain the send queue and the driver will assume no further action is necessary. However, once we post the send to the send queue, because the queue is in error, no send completion will ever happen and the send will get stuck. In order to process the send, we need to make sure that rxe_completer() gets run after a send is posted to a queue pair in an error state. This patch ensures that happens. Signed-off-by: Bart Van Assche Cc: Moni Shoua Cc: # v4.8 Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 0b362f49a10a..afbf701dc9a7 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -813,6 +813,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, (queue_count(qp->sq.queue) > 1); rxe_run_task(&qp->req.task, must_sched); + if (unlikely(qp->req.state == QP_STATE_ERROR)) + rxe_run_task(&qp->comp.task, 1); return err; } From 571cb36fac2f62bfe5fe7fa488f0ea7271db9c2f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 12 Jan 2018 15:11:58 -0800 Subject: [PATCH 454/770] RDMA/rxe: Fix a race condition in rxe_requester() commit 65567e41219888feec72fee1de98ccf1efbbc16d upstream. The rxe driver works as follows: * The send queue, receive queue and completion queues are implemented as circular buffers. * ib_post_send() and ib_post_recv() calls are serialized through a spinlock. * Removing elements from various queues happens from tasklet context. Tasklets are guaranteed to run on at most one CPU. This serializes access to these queues. See also rxe_completer(), rxe_requester() and rxe_responder(). * rxe_completer() processes the skbs queued onto qp->resp_pkts. * rxe_requester() handles the send queue (qp->sq.queue). * rxe_responder() processes the skbs queued onto qp->req_pkts. Since rxe_drain_req_pkts() processes qp->req_pkts, calling rxe_drain_req_pkts() from rxe_requester() is racy. Hence this patch. Reported-by: Moni Shoua Signed-off-by: Bart Van Assche Cc: stable@vger.kernel.org Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_loc.h | 1 - drivers/infiniband/sw/rxe/rxe_req.c | 9 +-------- drivers/infiniband/sw/rxe/rxe_resp.c | 2 +- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 77b3ed0df936..7f945f65d8cd 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -237,7 +237,6 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, void rxe_release(struct kref *kref); -void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify); int rxe_completer(void *arg); int rxe_requester(void *arg); int rxe_responder(void *arg); diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index d84222f9d5d2..44b838ec9420 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -594,15 +594,8 @@ int rxe_requester(void *arg) rxe_add_ref(qp); next_wqe: - if (unlikely(!qp->valid)) { - rxe_drain_req_pkts(qp, true); + if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) goto exit; - } - - if (unlikely(qp->req.state == QP_STATE_ERROR)) { - rxe_drain_req_pkts(qp, true); - goto exit; - } if (unlikely(qp->req.state == QP_STATE_RESET)) { qp->req.wqe_index = consumer_index(qp->sq.queue); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 4240866a5331..01f926fd9029 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -1210,7 +1210,7 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp) } } -void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) +static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) { struct sk_buff *skb; From 75a3f11c7bad580dc5fbf4175f5dc39192f088d5 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 12 Jan 2018 15:11:59 -0800 Subject: [PATCH 455/770] RDMA/rxe: Fix rxe_qp_cleanup() commit bb3ffb7ad48a21e98a5c64eb21103a74fd9f03f6 upstream. rxe_qp_cleanup() can sleep so it must be run in thread context and not in atomic context. This patch avoids that the following bug is triggered: Kernel BUG at 00000000560033f3 [verbose debug info unavailable] BUG: sleeping function called from invalid context at net/core/sock.c:2761 in_atomic(): 1, irqs_disabled(): 0, pid: 7, name: ksoftirqd/0 INFO: lockdep is turned off. Preemption disabled at: [<00000000b6e69628>] __do_softirq+0x4e/0x540 CPU: 0 PID: 7 Comm: ksoftirqd/0 Not tainted 4.15.0-rc7-dbg+ #4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0x85/0xbf ___might_sleep+0x177/0x260 lock_sock_nested+0x1d/0x90 inet_shutdown+0x2e/0xd0 rxe_qp_cleanup+0x107/0x140 [rdma_rxe] rxe_elem_release+0x18/0x80 [rdma_rxe] rxe_requester+0x1cf/0x11b0 [rdma_rxe] rxe_do_task+0x78/0xf0 [rdma_rxe] tasklet_action+0x99/0x270 __do_softirq+0xc0/0x540 run_ksoftirqd+0x1c/0x70 smpboot_thread_fn+0x1be/0x270 kthread+0x117/0x130 ret_from_fork+0x24/0x30 Signed-off-by: Bart Van Assche Cc: Moni Shoua Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_qp.c | 12 ++++++++++-- drivers/infiniband/sw/rxe/rxe_verbs.h | 3 +++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 00bda9380a2e..aeea994b04c4 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -824,9 +824,9 @@ void rxe_qp_destroy(struct rxe_qp *qp) } /* called when the last reference to the qp is dropped */ -void rxe_qp_cleanup(struct rxe_pool_entry *arg) +static void rxe_qp_do_cleanup(struct work_struct *work) { - struct rxe_qp *qp = container_of(arg, typeof(*qp), pelem); + struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); rxe_drop_all_mcast_groups(qp); @@ -859,3 +859,11 @@ void rxe_qp_cleanup(struct rxe_pool_entry *arg) kernel_sock_shutdown(qp->sk, SHUT_RDWR); sock_release(qp->sk); } + +/* called when the last reference to the qp is dropped */ +void rxe_qp_cleanup(struct rxe_pool_entry *arg) +{ + struct rxe_qp *qp = container_of(arg, typeof(*qp), pelem); + + execute_in_process_context(rxe_qp_do_cleanup, &qp->cleanup_work); +} diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 0c2dbe45c729..1019f5e7dbdd 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -35,6 +35,7 @@ #define RXE_VERBS_H #include +#include #include #include "rxe_pool.h" #include "rxe_task.h" @@ -281,6 +282,8 @@ struct rxe_qp { struct timer_list rnr_nak_timer; spinlock_t state_lock; /* guard requester and completer */ + + struct execute_work cleanup_work; }; enum rxe_mem_state { From 8e56a935a4b751bc8cb5288480b23827f8d2564d Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Fri, 12 Jan 2018 12:43:53 +0530 Subject: [PATCH 456/770] cpufreq: powernv: Dont assume distinct pstate values for nominal and pmin commit 3fa4680b860bf48b437d6a2c039789c4abe202ae upstream. Some OpenPOWER boxes can have same pstate values for nominal and pmin pstates. In these boxes the current code will not initialize 'powernv_pstate_info.min' variable and result in erroneous CPU frequency reporting. This patch fixes this problem. Fixes: 09ca4c9b5958 (cpufreq: powernv: Replacing pstate_id with frequency table index) Reported-by: Alvin Wang Signed-off-by: Shilpasri G Bhat Acked-by: Viresh Kumar Cc: 4.8+ # 4.8+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/powernv-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 3ff5160451b4..7e1e5bbcf430 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -287,9 +287,9 @@ next: if (id == pstate_max) powernv_pstate_info.max = i; - else if (id == pstate_nominal) + if (id == pstate_nominal) powernv_pstate_info.nominal = i; - else if (id == pstate_min) + if (id == pstate_min) powernv_pstate_info.min = i; if (powernv_pstate_info.wof_enabled && id == pstate_turbo) { From 37efa60e167036b3eaed20e70095bbdcb72a55ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 4 Jan 2018 14:24:19 +0100 Subject: [PATCH 457/770] swiotlb: suppress warning when __GFP_NOWARN is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d0bc0c2a31c95002d37c3cc511ffdcab851b3256 upstream. TTM tries to allocate coherent memory in chunks of 2MB first to improve TLB efficiency and falls back to allocating 4K pages if that fails. Suppress the warning when the 2MB allocations fails since there is a valid fall back path. Signed-off-by: Christian König Reported-by: Mike Galbraith Acked-by: Konrad Rzeszutek Wilk Bug: https://bugs.freedesktop.org/show_bug.cgi?id=104082 Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- lib/swiotlb.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 8c6c83ef57a4..20df2fd9b150 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -585,7 +585,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, not_found: spin_unlock_irqrestore(&io_tlb_lock, flags); - if (printk_ratelimit()) + if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); return SWIOTLB_MAP_ERROR; found: @@ -712,6 +712,7 @@ void * swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) { + bool warn = !(flags & __GFP_NOWARN); dma_addr_t dev_addr; void *ret; int order = get_order(size); @@ -737,8 +738,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, * GFP_DMA memory; fall back on map_single(), which * will grab memory from the lowest available address range. */ - phys_addr_t paddr = map_single(hwdev, 0, size, - DMA_FROM_DEVICE, 0); + phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE, + warn ? 0 : DMA_ATTR_NO_WARN); if (paddr == SWIOTLB_MAP_ERROR) goto err_warn; @@ -768,9 +769,11 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, return ret; err_warn: - pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n", - dev_name(hwdev), size); - dump_stack(); + if (warn && printk_ratelimit()) { + pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n", + dev_name(hwdev), size); + dump_stack(); + } return NULL; } From 62def1d604a5473055b1cce23b55b7639302710a Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sun, 5 Nov 2017 21:27:41 -0800 Subject: [PATCH 458/770] PM / devfreq: Propagate error from devfreq_add_device() commit d1bf2d30728f310f72296b54f0651ecdb09cbb12 upstream. Propagate the error of devfreq_add_device() in devm_devfreq_add_device() rather than statically returning ENOMEM. This makes it slightly faster to pinpoint the cause of a returned error. Fixes: 8cd84092d35e ("PM / devfreq: Add resource-managed function for devfreq device") Cc: stable@vger.kernel.org Acked-by: Chanwoo Choi Signed-off-by: Bjorn Andersson Signed-off-by: MyungJoo Ham Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index a1c4ee818614..202476fbbc4c 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -676,7 +676,7 @@ struct devfreq *devm_devfreq_add_device(struct device *dev, devfreq = devfreq_add_device(dev, profile, governor_name, data); if (IS_ERR(devfreq)) { devres_free(ptr); - return ERR_PTR(-ENOMEM); + return devfreq; } *ptr = devfreq; From 1ec4c78e3444c317c724820f0f40324e65470654 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 12 Jan 2018 13:08:37 -0800 Subject: [PATCH 459/770] mwifiex: resolve reset vs. remove()/shutdown() deadlocks commit a64e7a79dd6030479caad603c8d78e6c9c14904f upstream. Commit b014e96d1abb ("PCI: Protect pci_error_handlers->reset_notify() usage with device_lock()") resolves races between driver reset and removal, but it introduces some new deadlock problems. If we see a timeout while we've already started suspending, removing, or shutting down the driver, we might see: (a) a worker thread, running mwifiex_pcie_work() -> mwifiex_pcie_card_reset_work() -> pci_reset_function() (b) a removal thread, running mwifiex_pcie_remove() -> mwifiex_free_adapter() -> mwifiex_unregister() -> mwifiex_cleanup_pcie() -> cancel_work_sync(&card->work) Unfortunately, mwifiex_pcie_remove() already holds the device lock that pci_reset_function() is now requesting, and so we see a deadlock. It's necessary to cancel and synchronize our outstanding work before tearing down the driver, so we can't have this work wait indefinitely for the lock. It's reasonable to only "try" to reset here, since this will mostly happen for cases where it's already difficult to reset the firmware anyway (e.g., while we're suspending or powering off the system). And if reset *really* needs to happen, we can always try again later. Fixes: b014e96d1abb ("PCI: Protect pci_error_handlers->reset_notify() usage with device_lock()") Cc: Cc: Xinming Hu Signed-off-by: Brian Norris Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/pcie.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index cd314946452c..9511f5fe62f4 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -2781,7 +2781,10 @@ static void mwifiex_pcie_card_reset_work(struct mwifiex_adapter *adapter) { struct pcie_service_card *card = adapter->card; - pci_reset_function(card->dev); + /* We can't afford to wait here; remove() might be waiting on us. If we + * can't grab the device lock, maybe we'll get another chance later. + */ + pci_try_reset_function(card->dev); } static void mwifiex_pcie_work(struct work_struct *work) From 2e7e8bd8f19550c56ebc56158f60626546b7c564 Mon Sep 17 00:00:00 2001 From: Gang He Date: Wed, 31 Jan 2018 16:14:48 -0800 Subject: [PATCH 460/770] ocfs2: try a blocking lock before return AOP_TRUNCATED_PAGE commit ff26cc10aec128c3f86b5611fd5f59c71d49c0e3 upstream. If we can't get inode lock immediately in the function ocfs2_inode_lock_with_page() when reading a page, we should not return directly here, since this will lead to a softlockup problem when the kernel is configured with CONFIG_PREEMPT is not set. The method is to get a blocking lock and immediately unlock before returning, this can avoid CPU resource waste due to lots of retries, and benefits fairness in getting lock among multiple nodes, increase efficiency in case modifying the same file frequently from multiple nodes. The softlockup crash (when set /proc/sys/kernel/softlockup_panic to 1) looks like: Kernel panic - not syncing: softlockup: hung tasks CPU: 0 PID: 885 Comm: multi_mmap Tainted: G L 4.12.14-6.1-default #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x5c/0x82 panic+0xd5/0x21e watchdog_timer_fn+0x208/0x210 __hrtimer_run_queues+0xcc/0x200 hrtimer_interrupt+0xa6/0x1f0 smp_apic_timer_interrupt+0x34/0x50 apic_timer_interrupt+0x96/0xa0 RIP: 0010:unlock_page+0x17/0x30 RSP: 0000:ffffaf154080bc88 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff10 RAX: dead000000000100 RBX: fffff21e009f5300 RCX: 0000000000000004 RDX: dead0000000000ff RSI: 0000000000000202 RDI: fffff21e009f5300 RBP: 0000000000000000 R08: 0000000000000000 R09: ffffaf154080bb00 R10: ffffaf154080bc30 R11: 0000000000000040 R12: ffff993749a39518 R13: 0000000000000000 R14: fffff21e009f5300 R15: fffff21e009f5300 ocfs2_inode_lock_with_page+0x25/0x30 [ocfs2] ocfs2_readpage+0x41/0x2d0 [ocfs2] filemap_fault+0x12b/0x5c0 ocfs2_fault+0x29/0xb0 [ocfs2] __do_fault+0x1a/0xa0 __handle_mm_fault+0xbe8/0x1090 handle_mm_fault+0xaa/0x1f0 __do_page_fault+0x235/0x4b0 trace_do_page_fault+0x3c/0x110 async_page_fault+0x28/0x30 RIP: 0033:0x7fa75ded638e RSP: 002b:00007ffd6657db18 EFLAGS: 00010287 RAX: 000055c7662fb700 RBX: 0000000000000001 RCX: 000055c7662fb700 RDX: 0000000000001770 RSI: 00007fa75e909000 RDI: 000055c7662fb700 RBP: 0000000000000003 R08: 000000000000000e R09: 0000000000000000 R10: 0000000000000483 R11: 00007fa75ded61b0 R12: 00007fa75e90a770 R13: 000000000000000e R14: 0000000000001770 R15: 0000000000000000 About performance improvement, we can see the testing time is reduced, and CPU utilization decreases, the detailed data is as follows. I ran multi_mmap test case in ocfs2-test package in a three nodes cluster. Before applying this patch: PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 2754 ocfs2te+ 20 0 170248 6980 4856 D 80.73 0.341 0:18.71 multi_mmap 1505 root rt 0 222236 123060 97224 S 2.658 6.015 0:01.44 corosync 5 root 20 0 0 0 0 S 1.329 0.000 0:00.19 kworker/u8:0 95 root 20 0 0 0 0 S 1.329 0.000 0:00.25 kworker/u8:1 2728 root 20 0 0 0 0 S 0.997 0.000 0:00.24 jbd2/sda1-33 2721 root 20 0 0 0 0 S 0.664 0.000 0:00.07 ocfs2dc-3C8CFD4 2750 ocfs2te+ 20 0 142976 4652 3532 S 0.664 0.227 0:00.28 mpirun ocfs2test@tb-node2:~>multiple_run.sh -i ens3 -k ~/linux-4.4.21-69.tar.gz -o ~/ocfs2mullog -C hacluster -s pcmk -n tb-node2,tb-node1,tb-node3 -d /dev/sda1 -b 4096 -c 32768 -t multi_mmap /mnt/shared Tests with "-b 4096 -C 32768" Thu Dec 28 14:44:52 CST 2017 multi_mmap..................................................Passed. Runtime 783 seconds. After apply this patch: PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 2508 ocfs2te+ 20 0 170248 6804 4680 R 54.00 0.333 0:55.37 multi_mmap 155 root 20 0 0 0 0 S 2.667 0.000 0:01.20 kworker/u8:3 95 root 20 0 0 0 0 S 2.000 0.000 0:01.58 kworker/u8:1 2504 ocfs2te+ 20 0 142976 4604 3480 R 1.667 0.225 0:01.65 mpirun 5 root 20 0 0 0 0 S 1.000 0.000 0:01.36 kworker/u8:0 2482 root 20 0 0 0 0 S 1.000 0.000 0:00.86 jbd2/sda1-33 299 root 0 -20 0 0 0 S 0.333 0.000 0:00.13 kworker/2:1H 335 root 0 -20 0 0 0 S 0.333 0.000 0:00.17 kworker/1:1H 535 root 20 0 12140 7268 1456 S 0.333 0.355 0:00.34 haveged 1282 root rt 0 222284 123108 97224 S 0.333 6.017 0:01.33 corosync ocfs2test@tb-node2:~>multiple_run.sh -i ens3 -k ~/linux-4.4.21-69.tar.gz -o ~/ocfs2mullog -C hacluster -s pcmk -n tb-node2,tb-node1,tb-node3 -d /dev/sda1 -b 4096 -c 32768 -t multi_mmap /mnt/shared Tests with "-b 4096 -C 32768" Thu Dec 28 15:04:12 CST 2017 multi_mmap..................................................Passed. Runtime 487 seconds. Link: http://lkml.kernel.org/r/1514447305-30814-1-git-send-email-ghe@suse.com Fixes: 1cce4df04f37 ("ocfs2: do not lock/unlock() inode DLM lock") Signed-off-by: Gang He Reviewed-by: Eric Ren Acked-by: alex chen Acked-by: piaojun Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlmglue.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 4689940a953c..5193218f5889 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2486,6 +2486,15 @@ int ocfs2_inode_lock_with_page(struct inode *inode, ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); if (ret == -EAGAIN) { unlock_page(page); + /* + * If we can't get inode lock immediately, we should not return + * directly here, since this will lead to a softlockup problem. + * The method is to get a blocking lock and immediately unlock + * before returning, this can avoid CPU resource waste due to + * lots of retries, and benefits fairness in getting lock. + */ + if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) + ocfs2_inode_unlock(inode, ex); ret = AOP_TRUNCATED_PAGE; } From 5b98d31481f9e216e6df6b31e5a959db259c055e Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Thu, 30 Nov 2017 14:35:54 +0530 Subject: [PATCH 461/770] powerpc/radix: Remove trace_tlbie call from radix__flush_tlb_all commit 8d81296cfcce89013a714feb8d25004a156f8181 upstream. radix__flush_tlb_all() is called only in kexec path in real mode and any tracepoints at this stage will make kexec to fail if enabled. To verify enable tlbie trace before kexec. $ echo 1 > /sys/kernel/debug/tracing/events/powerpc/tlbie/enable == kexec into new kernel and kexec fails. Fix this by not calling trace_tlbie from radix__flush_tlb_all(). Fixes: 0428491cba92 ("powerpc/mm: Trace tlbie(l) instructions") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Mahesh Salgaonkar Acked-by: Balbir Singh Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/tlb-radix.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index d304028641a2..4b295cfd5f7e 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -453,14 +453,12 @@ void radix__flush_tlb_all(void) */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 0, rb, rs, ric, prs, r); /* * now flush host entires by passing PRS = 0 and LPID == 0 */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(0, 0, rb, 0, ric, prs, r); } void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, From 4386f223b41868197080bd74882c2d31aca92a1a Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 26 Jan 2018 13:41:59 -0600 Subject: [PATCH 462/770] powerpc/numa: Invalidate numa_cpu_lookup_table on cpu remove commit 1d9a090783bef19fe8cdec878620d22f05191316 upstream. When DLPAR removing a CPU, the unmapping of the cpu from a node in unmap_cpu_from_node() should also invalidate the CPUs entry in the numa_cpu_lookup_table. There is not a guarantee that on a subsequent DLPAR add of the CPU the associativity will be the same and thus could be in a different node. Invalidating the entry in the numa_cpu_lookup_table causes the associativity to be read from the device tree at the time of the add. The current behavior of not invalidating the CPUs entry in the numa_cpu_lookup_table can result in scenarios where the the topology layout of CPUs in the partition does not match the device tree or the topology reported by the HMC. This bug looks like it was introduced in 2004 in the commit titled "ppc64: cpu hotplug notifier for numa", which is 6b15e4e87e32 in the linux-fullhist tree. Hence tag it for all stable releases. Cc: stable@vger.kernel.org Signed-off-by: Nathan Fontenot Reviewed-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/topology.h | 5 +++++ arch/powerpc/mm/numa.c | 5 ----- arch/powerpc/platforms/pseries/hotplug-cpu.c | 2 ++ 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 023ff9f17501..d5f2ee882f74 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid); extern void sysfs_remove_device_from_node(struct device *dev, int nid); extern int numa_update_cpu_topology(bool cpus_locked); +static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) +{ + numa_cpu_lookup_table[cpu] = node; +} + static inline int early_cpu_to_node(int cpu) { int nid; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index a51df9ef529d..a81279249bfb 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -142,11 +142,6 @@ static void reset_numa_cpu_lookup_table(void) numa_cpu_lookup_table[cpu] = -1; } -static void update_numa_cpu_lookup_table(unsigned int cpu, int node) -{ - numa_cpu_lookup_table[cpu] = node; -} - static void map_cpu_to_node(int cpu, int node) { update_numa_cpu_lookup_table(cpu, node); diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index fadb95efbb9e..b1ac8ac38434 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "pseries.h" #include "offline_states.h" @@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np) BUG_ON(cpu_online(cpu)); set_cpu_present(cpu, false); set_hard_smp_processor_id(cpu, -1); + update_numa_cpu_lookup_table(cpu, -1); break; } if (cpu >= nr_cpu_ids) From 892674b5058a2b15d35d1185f5d09d8f4c257a7e Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 1 Feb 2018 16:09:44 +1100 Subject: [PATCH 463/770] powerpc/mm: Flush radix process translations when setting MMU type commit 62e984ddfd6b056d399e24113f5e6a7145e579d8 upstream. Radix guests do normally invalidate process-scoped translations when a new pid is allocated but migrated guests do not invalidate these so migrated guests crash sometime, especially easy to reproduce with migration happening within first 10 seconds after the guest boot start on the same machine. This adds the "Invalidate process-scoped translations" flush to fix radix guests migration. Fixes: 2ee13be34b13 ("KVM: PPC: Book3S HV: Update kvmppc_set_arch_compat() for ISA v3.00") Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Alexey Kardashevskiy Tested-by: Laurent Vivier Tested-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/pgtable_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index ac0717a90ca6..12f95b1f7d07 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -483,6 +483,8 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, if (old & PATB_HR) { asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1); } else { asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : From 3b09911d3beb326a117bfb05c717c3021e5458ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 13 Feb 2018 09:47:12 +0100 Subject: [PATCH 464/770] powerpc/xive: Use hw CPU ids when configuring the CPU queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8e036c8d30a2cd9d8fc7442fbf6824e0a3e986e7 upstream. The CPU event notification queues on sPAPR should be configured using a hardware CPU identifier. The problem did not show up on the Power Hypervisor because pHyp supports 8 threads per core which keeps CPU number contiguous. This is not the case on all sPAPR virtual machines, some use SMT=1. Also improve error logging by adding the CPU number. Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/sysdev/xive/spapr.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index d9c4c9366049..091f1d0d0af1 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio, rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size); if (rc) { - pr_err("Error %lld getting queue info prio %d\n", rc, prio); + pr_err("Error %lld getting queue info CPU %d prio %d\n", rc, + target, prio); rc = -EIO; goto fail; } @@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio, /* Configure and enable the queue in HW */ rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order); if (rc) { - pr_err("Error %lld setting queue for prio %d\n", rc, prio); + pr_err("Error %lld setting queue for CPU %d prio %d\n", rc, + target, prio); rc = -EIO; } else { q->qpage = qpage; @@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc, if (IS_ERR(qpage)) return PTR_ERR(qpage); - return xive_spapr_configure_queue(cpu, q, prio, qpage, - xive_queue_shift); + return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu), + q, prio, qpage, xive_queue_shift); } static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, @@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, struct xive_q *q = &xc->queue[prio]; unsigned int alloc_order; long rc; + int hw_cpu = get_hard_smp_processor_id(cpu); - rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0); + rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0); if (rc) - pr_err("Error %ld setting queue for prio %d\n", rc, prio); + pr_err("Error %ld setting queue for CPU %d prio %d\n", rc, + hw_cpu, prio); alloc_order = xive_alloc_order(xive_queue_shift); free_pages((unsigned long)q->qpage, alloc_order); From 40be210c830e80642ce5f4e3d726864ec22d5f50 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 10 Nov 2017 12:15:00 +1100 Subject: [PATCH 465/770] powerpc: Fix DABR match on hash based systems commit f23ab3efb1b30cc5c5ef5ae4ef294ed467f30675 upstream. Commit 398a719d34a1 ("powerpc/mm: Update bits used to skip hash_page") mistakenly dropped the DSISR_DABRMATCH bit from the mask of bit tested to skip trying to hash a page. As a result, the DABR matches would no longer be detected. This adds it back. We open code it in the 2 places where it matters rather than fold it into DSISR_BAD_FAULT_32S/64S because this isn't technically a bad fault and while we would never hit it with the current code, I prefer if page_fault_is_bad() didn't trigger on these. Fixes: 398a719d34a1 ("powerpc/mm: Update bits used to skip hash_page") Cc: stable@vger.kernel.org # v4.14 Tested-by: Pedro Miraglia Franco de Carvalho Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Naveen N. Rao Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/exceptions-64s.S | 2 +- arch/powerpc/kernel/head_32.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e9f72abc52b7..e91b40aa5417 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1617,7 +1617,7 @@ USE_TEXT_SECTION() .balign IFETCH_ALIGN_BYTES do_hash_page: #ifdef CONFIG_PPC_STD_MMU_64 - lis r0,DSISR_BAD_FAULT_64S@h + lis r0,(DSISR_BAD_FAULT_64S|DSISR_DABRMATCH)@h ori r0,r0,DSISR_BAD_FAULT_64S@l and. r0,r4,r0 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 8c54166491e7..29b2fed93289 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -388,7 +388,7 @@ DataAccess: EXCEPTION_PROLOG mfspr r10,SPRN_DSISR stw r10,_DSISR(r11) - andis. r0,r10,DSISR_BAD_FAULT_32S@h + andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h bne 1f /* if not, try to put a PTE */ mfspr r4,SPRN_DAR /* into the hash table */ rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */ From f25dfc9359cc5e173a5b94e407c6417f5889c9c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 22 Jan 2018 21:00:03 +0100 Subject: [PATCH 466/770] dma-buf: fix reservation_object_wait_timeout_rcu once more v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5bffee867df7494ecd32c1e6ec4e8fc934c521b7 upstream. We need to set shared_count even if we already have a fence to wait for. v2: init i to -1 as well Signed-off-by: Christian König Cc: stable@vger.kernel.org Tested-by: Lyude Paul Reviewed-by: Lyude Paul Reviewed-by: Chris Wilson Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20180122200003.6665-1-christian.koenig@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/reservation.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index b44d9d7db347..012fa3d1f407 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -455,13 +455,15 @@ long reservation_object_wait_timeout_rcu(struct reservation_object *obj, unsigned long timeout) { struct dma_fence *fence; - unsigned seq, shared_count, i = 0; + unsigned seq, shared_count; long ret = timeout ? timeout : 1; + int i; retry: shared_count = 0; seq = read_seqcount_begin(&obj->seq); rcu_read_lock(); + i = -1; fence = rcu_dereference(obj->fence_excl); if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { @@ -477,14 +479,14 @@ retry: fence = NULL; } - if (!fence && wait_all) { + if (wait_all) { struct reservation_object_list *fobj = rcu_dereference(obj->fence); if (fobj) shared_count = fobj->shared_count; - for (i = 0; i < shared_count; ++i) { + for (i = 0; !fence && i < shared_count; ++i) { struct dma_fence *lfence = rcu_dereference(fobj->shared[i]); if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, From 27f97375fcb86750cdf82a65dab8174bb991c238 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Mon, 15 Jan 2018 20:38:17 +0100 Subject: [PATCH 467/770] s390: fix handling of -1 in set{,fs}[gu]id16 syscalls commit 6dd0d2d22aa363fec075cb2577ba273ac8462e94 upstream. For some reason, the implementation of some 16-bit ID system calls (namely, setuid16/setgid16 and setfsuid16/setfsgid16) used type cast instead of low2highgid/low2highuid macros for converting [GU]IDs, which led to incorrect handling of value of -1 (which ought to be considered invalid). Discovered by strace test suite. Cc: stable@vger.kernel.org Signed-off-by: Eugene Syromiatnikov Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/compat_linux.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 59eea9c65d3e..79b7a3438d54 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -110,7 +110,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid) COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid) { - return sys_setgid((gid_t)gid); + return sys_setgid(low2highgid(gid)); } COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid) @@ -120,7 +120,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid) COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid) { - return sys_setuid((uid_t)uid); + return sys_setuid(low2highuid(uid)); } COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid) @@ -173,12 +173,12 @@ COMPAT_SYSCALL_DEFINE3(s390_getresgid16, u16 __user *, rgidp, COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid) { - return sys_setfsuid((uid_t)uid); + return sys_setfsuid(low2highuid(uid)); } COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid) { - return sys_setfsgid((gid_t)gid); + return sys_setfsgid(low2highgid(gid)); } static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info) From bd420093636759d081bb31b343807efc5bbc58d3 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 30 Nov 2017 21:27:32 -0800 Subject: [PATCH 468/770] arm64: dts: msm8916: Correct ipc references for smsm commit 566bd8902e7fa20bd412ed753e09e89c1c96939c upstream. SMSM is not symmetrical, the incoming bits from WCNSS are available at index 6, but the outgoing host id for WCNSS is 3. Further more, upstream references the base of APCS (in contrast to downstream), so the register offset of 8 must be included. Fixes: 1fb47e0a9ba4 ("arm64: dts: qcom: msm8916: Add smsm and smp2p nodes") Cc: stable@vger.kernel.org Reported-by: Ramon Fried Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/msm8916.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index dc3817593e14..8e5421a0b43d 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -1430,8 +1430,8 @@ #address-cells = <1>; #size-cells = <0>; - qcom,ipc-1 = <&apcs 0 13>; - qcom,ipc-6 = <&apcs 0 19>; + qcom,ipc-1 = <&apcs 8 13>; + qcom,ipc-3 = <&apcs 8 19>; apps_smsm: apps@0 { reg = <0>; From 34d58f7ccb65c9a2d2c9221b282e556e08e5a801 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 17:22:00 +0100 Subject: [PATCH 469/770] ARM: lpc3250: fix uda1380 gpio numbers commit ca32e0c4bf9ca4b87089c5a5ce945e5f2ec890da upstream. dtc warns about obviously incorrect GPIO numbers for the audio codec on both lpc32xx boards: arch/arm/boot/dts/lpc3250-phy3250.dtb: Warning (gpios_property): reset-gpio property size (12) too small for cell size 3 in /ahb/apb/i2c@400A0000/uda1380@18 arch/arm/boot/dts/lpc3250-phy3250.dtb: Warning (gpios_property): power-gpio property size (12) too small for cell size 3 in /ahb/apb/i2c@400A0000/uda1380@18 arch/arm/boot/dts/lpc3250-ea3250.dtb: Warning (gpios_property): reset-gpio property size (12) too small for cell size 3 in /ahb/apb/i2c@400A0000/uda1380@18 arch/arm/boot/dts/lpc3250-ea3250.dtb: Warning (gpios_property): power-gpio property size (12) too small for cell size 3 in /ahb/apb/i2c@400A0000/uda1380@18 It looks like the nodes are written for a different binding that combines the GPIO number into a single number rather than a bank/number pair. I found the right numbers on stackexchange.com, so this patch fixes the warning and has a reasonable chance of getting things to actually work. Cc: stable@vger.kernel.org Link: https://unix.stackexchange.com/questions/59497/alsa-asoc-how-to-correctly-load-devices-drivers/62217#62217 Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/lpc3250-ea3250.dts | 4 ++-- arch/arm/boot/dts/lpc3250-phy3250.dts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/lpc3250-ea3250.dts b/arch/arm/boot/dts/lpc3250-ea3250.dts index 52b3ed10283a..e2bc731079be 100644 --- a/arch/arm/boot/dts/lpc3250-ea3250.dts +++ b/arch/arm/boot/dts/lpc3250-ea3250.dts @@ -156,8 +156,8 @@ uda1380: uda1380@18 { compatible = "nxp,uda1380"; reg = <0x18>; - power-gpio = <&gpio 0x59 0>; - reset-gpio = <&gpio 0x51 0>; + power-gpio = <&gpio 3 10 0>; + reset-gpio = <&gpio 3 2 0>; dac-clk = "wspll"; }; diff --git a/arch/arm/boot/dts/lpc3250-phy3250.dts b/arch/arm/boot/dts/lpc3250-phy3250.dts index fd95e2b10357..b7bd3a110a8d 100644 --- a/arch/arm/boot/dts/lpc3250-phy3250.dts +++ b/arch/arm/boot/dts/lpc3250-phy3250.dts @@ -81,8 +81,8 @@ uda1380: uda1380@18 { compatible = "nxp,uda1380"; reg = <0x18>; - power-gpio = <&gpio 0x59 0>; - reset-gpio = <&gpio 0x51 0>; + power-gpio = <&gpio 3 10 0>; + reset-gpio = <&gpio 3 2 0>; dac-clk = "wspll"; }; From 9be4b2f6d024c89affe63360475fba9b167d7230 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Wed, 10 Jan 2018 09:21:02 +0100 Subject: [PATCH 470/770] ARM: dts: STi: Add gpio polarity for "hdmi,hpd-gpio" property commit 7ac1f59c09a61e6af6622df6809e003b0af07f3d upstream. The GPIO polarity is missing in the hdmi,hpd-gpio property, this fixes the following DT warnings: arch/arm/boot/dts/stih410-b2120.dtb: Warning (gpios_property): hdmi,hpd-gpio property size (8) too small for cell size 2 in /soc/sti-display-subsystem/sti-hdmi@8d04000 arch/arm/boot/dts/stih407-b2120.dtb: Warning (gpios_property): hdmi,hpd-gpio property size (8) too small for cell size 2 in /soc/sti-display-subsystem/sti-hdmi@8d04000 arch/arm/boot/dts/stih410-b2260.dtb: Warning (gpios_property): hdmi,hpd-gpio property size (8) too small for cell size 2 in /soc/sti-display-subsystem/sti-hdmi@8d04000 [arnd: marked Cc:stable since this warning shows up with the latest dtc by default, and is more likely to actually cause problems than the other patches from this series] Cc: stable@vger.kernel.org Signed-off-by: Patrice Chotard Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/stih407.dtsi | 3 ++- arch/arm/boot/dts/stih410.dtsi | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/stih407.dtsi b/arch/arm/boot/dts/stih407.dtsi index fa149837df14..11fdecd9312e 100644 --- a/arch/arm/boot/dts/stih407.dtsi +++ b/arch/arm/boot/dts/stih407.dtsi @@ -8,6 +8,7 @@ */ #include "stih407-clock.dtsi" #include "stih407-family.dtsi" +#include / { soc { sti-display-subsystem { @@ -122,7 +123,7 @@ <&clk_s_d2_quadfs 0>, <&clk_s_d2_quadfs 1>; - hdmi,hpd-gpio = <&pio5 3>; + hdmi,hpd-gpio = <&pio5 3 GPIO_ACTIVE_LOW>; reset-names = "hdmi"; resets = <&softreset STIH407_HDMI_TX_PHY_SOFTRESET>; ddc = <&hdmiddc>; diff --git a/arch/arm/boot/dts/stih410.dtsi b/arch/arm/boot/dts/stih410.dtsi index 21fe72b183d8..96eed0dc08b8 100644 --- a/arch/arm/boot/dts/stih410.dtsi +++ b/arch/arm/boot/dts/stih410.dtsi @@ -9,6 +9,7 @@ #include "stih410-clock.dtsi" #include "stih407-family.dtsi" #include "stih410-pinctrl.dtsi" +#include / { aliases { bdisp0 = &bdisp0; @@ -213,7 +214,7 @@ <&clk_s_d2_quadfs 0>, <&clk_s_d2_quadfs 1>; - hdmi,hpd-gpio = <&pio5 3>; + hdmi,hpd-gpio = <&pio5 3 GPIO_ACTIVE_LOW>; reset-names = "hdmi"; resets = <&softreset STIH407_HDMI_TX_PHY_SOFTRESET>; ddc = <&hdmiddc>; From be1965486122417edcd95893e8414902696e8e11 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 15 Jan 2018 17:37:08 +0100 Subject: [PATCH 471/770] ARM: dts: nomadik: add interrupt-parent for clcd commit e8bfa0422469cdfc86be3f525f621b1d44d2481b upstream. The clcd device is lacking an interrupt-parent property, which makes the interrupt unusable and shows up as a warning with the latest dtc version: arch/arm/boot/dts/ste-nomadik-s8815.dtb: Warning (interrupts_property): Missing interrupt-parent for /amba/clcd@10120000 arch/arm/boot/dts/ste-nomadik-nhk15.dtb: Warning (interrupts_property): Missing interrupt-parent for /amba/clcd@10120000 I looked up the old board files and found that this interrupt has the same irqchip as all the other on-chip device, it just needs one extra line. Fixes: 17470b7da11c ("ARM: dts: add the CLCD LCD display to the NHK15") Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/ste-nomadik-stn8815.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index 68aab50a73ab..733678b75b88 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -750,6 +750,7 @@ reg = <0x10120000 0x1000>; interrupt-names = "combined"; interrupts = <14>; + interrupt-parent = <&vica>; clocks = <&clcdclk>, <&hclkclcd>; clock-names = "clcdclk", "apb_pclk"; status = "disabled"; From cbcc2ff13af5664804f0c2622aff9159f79f249f Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 7 Dec 2017 14:43:24 +0800 Subject: [PATCH 472/770] arm: dts: mt7623: fix card detection issue on bananapi-r2 commit b96a696fb2a8dd1ee4e6cfee48dcac270fc53287 upstream. Fix that bananapi-r2 booting from SD-card would fail since incorrect polarity is applied to the previous setup with GPIO_ACTIVE_HIGH. Cc: stable@vger.kernel.org Fixes: 0eed8d097612 ("arm: dts: mt7623: Add SD-card and EMMC to bananapi-r2") Signed-off-by: Sean Wang Tested-by: Matthias Brugger Signed-off-by: Matthias Brugger Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts b/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts index 688a86378cee..7bf5aa2237c9 100644 --- a/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts +++ b/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts @@ -204,7 +204,7 @@ bus-width = <4>; max-frequency = <50000000>; cap-sd-highspeed; - cd-gpios = <&pio 261 0>; + cd-gpios = <&pio 261 GPIO_ACTIVE_LOW>; vmmc-supply = <&mt6323_vmch_reg>; vqmmc-supply = <&mt6323_vio18_reg>; }; From b1637c1236d0e0240277b616f94882022cd2a55e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Jan 2018 11:28:51 +0530 Subject: [PATCH 473/770] arm: spear600: Add missing interrupt-parent of rtc commit 6ffb5b4f248fe53e0361b8cbc2a523b432566442 upstream. The interrupt-parent of rtc was missing, add it. Fixes: 8113ba917dfa ("ARM: SPEAr: DT: Update device nodes") Cc: stable@vger.kernel.org # v3.8+ Reported-by: Arnd Bergmann Signed-off-by: Viresh Kumar Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/spear600.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/spear600.dtsi b/arch/arm/boot/dts/spear600.dtsi index 6b32d20acc9f..00166eb9be86 100644 --- a/arch/arm/boot/dts/spear600.dtsi +++ b/arch/arm/boot/dts/spear600.dtsi @@ -194,6 +194,7 @@ rtc: rtc@fc900000 { compatible = "st,spear600-rtc"; reg = <0xfc900000 0x1000>; + interrupt-parent = <&vic0>; interrupts = <10>; status = "disabled"; }; From 349d03cb5378e97e6bfd4fed5c78d3bab115b36e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Jan 2018 11:28:52 +0530 Subject: [PATCH 474/770] arm: spear13xx: Fix dmas cells commit cdd10409914184c7eee5ae3e11beb890c9c16c61 upstream. The "dmas" cells for the designware DMA controller need to have only 3 properties apart from the phandle: request line, src master and destination master. But the commit 6e8887f60f60 updated it incorrectly while moving from platform code to DT. Fix it. Cc: stable@vger.kernel.org # v3.10+ Fixes: 6e8887f60f60 ("ARM: SPEAr13xx: Pass generic DW DMAC platform data from DT") Reported-by: Arnd Bergmann Signed-off-by: Viresh Kumar Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/dma/snps-dma.txt | 2 +- arch/arm/boot/dts/spear1340.dtsi | 4 ++-- arch/arm/boot/dts/spear13xx.dtsi | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt index a122723907ac..99acc712f83a 100644 --- a/Documentation/devicetree/bindings/dma/snps-dma.txt +++ b/Documentation/devicetree/bindings/dma/snps-dma.txt @@ -64,6 +64,6 @@ Example: reg = <0xe0000000 0x1000>; interrupts = <0 35 0x4>; dmas = <&dmahost 12 0 1>, - <&dmahost 13 0 1 0>; + <&dmahost 13 1 0>; dma-names = "rx", "rx"; }; diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi index 5f347054527d..d4dbc4098653 100644 --- a/arch/arm/boot/dts/spear1340.dtsi +++ b/arch/arm/boot/dts/spear1340.dtsi @@ -142,8 +142,8 @@ reg = <0xb4100000 0x1000>; interrupts = <0 105 0x4>; status = "disabled"; - dmas = <&dwdma0 0x600 0 0 1>, /* 0xC << 11 */ - <&dwdma0 0x680 0 1 0>; /* 0xD << 7 */ + dmas = <&dwdma0 12 0 1>, + <&dwdma0 13 1 0>; dma-names = "tx", "rx"; }; diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index 17ea0abcdbd7..086b4b333249 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -100,7 +100,7 @@ reg = <0xb2800000 0x1000>; interrupts = <0 29 0x4>; status = "disabled"; - dmas = <&dwdma0 0 0 0 0>; + dmas = <&dwdma0 0 0 0>; dma-names = "data"; }; @@ -290,8 +290,8 @@ #size-cells = <0>; interrupts = <0 31 0x4>; status = "disabled"; - dmas = <&dwdma0 0x2000 0 0 0>, /* 0x4 << 11 */ - <&dwdma0 0x0280 0 0 0>; /* 0x5 << 7 */ + dmas = <&dwdma0 4 0 0>, + <&dwdma0 5 0 0>; dma-names = "tx", "rx"; }; From ce6ec5b8dc5a15b830022dd6ee5a040ead0debb9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 11 Jan 2018 11:28:53 +0530 Subject: [PATCH 475/770] arm: spear13xx: Fix spics gpio controller's warning commit f8975cb1b8a36d0839b6365235778dd9df1d04ca upstream. This fixes the following warning by also sending the flags argument for gpio controllers: Property 'cs-gpios', cell 6 is not a phandle reference in /ahb/apb/spi@e0100000 Fixes: 8113ba917dfa ("ARM: SPEAr: DT: Update device nodes") Cc: stable@vger.kernel.org # v3.8+ Reported-by: Arnd Bergmann Signed-off-by: Viresh Kumar Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/spear1310-evb.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/spear1310-evb.dts b/arch/arm/boot/dts/spear1310-evb.dts index 84101e4eebbf..0f5f379323a8 100644 --- a/arch/arm/boot/dts/spear1310-evb.dts +++ b/arch/arm/boot/dts/spear1310-evb.dts @@ -349,7 +349,7 @@ spi0: spi@e0100000 { status = "okay"; num-cs = <3>; - cs-gpios = <&gpio1 7 0>, <&spics 0>, <&spics 1>; + cs-gpios = <&gpio1 7 0>, <&spics 0 0>, <&spics 1 0>; stmpe610@0 { compatible = "st,stmpe610"; From cc2759b7bb7c55a4bed674a8223c89a8b88fb3d7 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 30 Aug 2017 17:12:05 +0100 Subject: [PATCH 476/770] drm/i915: add GT number to intel_device_info commit 0890540e21cf1156b4cf960a4c1c734db4e816f9 upstream. Up to Coffeelake we could deduce this GT number from the device ID. This doesn't seem to be the case anymore. This change reorders pciids per GT and adds a gt field to intel_device_info. We set this field on the following platforms : - SNB/IVB/HSW/BDW/SKL/KBL/CFL/CNL Before & After : $ modinfo drivers/gpu/drm/i915/i915.ko | grep ^alias | wc -l 209 v2: Add SNB & IVB (Chris) v3: Fix compilation error in early-quirks (Lionel) v4: Fix inconsistency between FEATURE/PLATFORM macros (Ville) Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20170830161208.29221-2-lionel.g.landwerlin@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_pci.c | 193 +++++++++++++++++++++++++------- include/drm/i915_pciids.h | 156 ++++++++++++++++---------- 3 files changed, 248 insertions(+), 102 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 18d9da53282b..3f818412765c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -842,6 +842,7 @@ struct intel_device_info { u8 gen; u16 gen_mask; enum intel_platform platform; + u8 gt; /* GT number, 0 if undefined */ u8 ring_mask; /* Rings supported by the HW */ u8 num_rings; #define DEFINE_FLAG(name) u8 name:1 diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 09d97e0990b7..2985f1e418ad 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -224,15 +224,34 @@ static const struct intel_device_info intel_ironlake_m_info = { GEN_DEFAULT_PIPEOFFSETS, \ CURSOR_OFFSETS -static const struct intel_device_info intel_sandybridge_d_info = { - GEN6_FEATURES, - .platform = INTEL_SANDYBRIDGE, +#define SNB_D_PLATFORM \ + GEN6_FEATURES, \ + .platform = INTEL_SANDYBRIDGE + +static const struct intel_device_info intel_sandybridge_d_gt1_info = { + SNB_D_PLATFORM, + .gt = 1, }; -static const struct intel_device_info intel_sandybridge_m_info = { - GEN6_FEATURES, - .platform = INTEL_SANDYBRIDGE, - .is_mobile = 1, +static const struct intel_device_info intel_sandybridge_d_gt2_info = { + SNB_D_PLATFORM, + .gt = 2, +}; + +#define SNB_M_PLATFORM \ + GEN6_FEATURES, \ + .platform = INTEL_SANDYBRIDGE, \ + .is_mobile = 1 + + +static const struct intel_device_info intel_sandybridge_m_gt1_info = { + SNB_M_PLATFORM, + .gt = 1, +}; + +static const struct intel_device_info intel_sandybridge_m_gt2_info = { + SNB_M_PLATFORM, + .gt = 2, }; #define GEN7_FEATURES \ @@ -249,22 +268,41 @@ static const struct intel_device_info intel_sandybridge_m_info = { GEN_DEFAULT_PIPEOFFSETS, \ IVB_CURSOR_OFFSETS -static const struct intel_device_info intel_ivybridge_d_info = { - GEN7_FEATURES, - .platform = INTEL_IVYBRIDGE, - .has_l3_dpf = 1, +#define IVB_D_PLATFORM \ + GEN7_FEATURES, \ + .platform = INTEL_IVYBRIDGE, \ + .has_l3_dpf = 1 + +static const struct intel_device_info intel_ivybridge_d_gt1_info = { + IVB_D_PLATFORM, + .gt = 1, }; -static const struct intel_device_info intel_ivybridge_m_info = { - GEN7_FEATURES, - .platform = INTEL_IVYBRIDGE, - .is_mobile = 1, - .has_l3_dpf = 1, +static const struct intel_device_info intel_ivybridge_d_gt2_info = { + IVB_D_PLATFORM, + .gt = 2, +}; + +#define IVB_M_PLATFORM \ + GEN7_FEATURES, \ + .platform = INTEL_IVYBRIDGE, \ + .is_mobile = 1, \ + .has_l3_dpf = 1 + +static const struct intel_device_info intel_ivybridge_m_gt1_info = { + IVB_M_PLATFORM, + .gt = 1, +}; + +static const struct intel_device_info intel_ivybridge_m_gt2_info = { + IVB_M_PLATFORM, + .gt = 2, }; static const struct intel_device_info intel_ivybridge_q_info = { GEN7_FEATURES, .platform = INTEL_IVYBRIDGE, + .gt = 2, .num_pipes = 0, /* legal, last one wins */ .has_l3_dpf = 1, }; @@ -299,10 +337,24 @@ static const struct intel_device_info intel_valleyview_info = { .has_rc6p = 0 /* RC6p removed-by HSW */, \ .has_runtime_pm = 1 -static const struct intel_device_info intel_haswell_info = { - HSW_FEATURES, - .platform = INTEL_HASWELL, - .has_l3_dpf = 1, +#define HSW_PLATFORM \ + HSW_FEATURES, \ + .platform = INTEL_HASWELL, \ + .has_l3_dpf = 1 + +static const struct intel_device_info intel_haswell_gt1_info = { + HSW_PLATFORM, + .gt = 1, +}; + +static const struct intel_device_info intel_haswell_gt2_info = { + HSW_PLATFORM, + .gt = 2, +}; + +static const struct intel_device_info intel_haswell_gt3_info = { + HSW_PLATFORM, + .gt = 3, }; #define BDW_FEATURES \ @@ -318,12 +370,27 @@ static const struct intel_device_info intel_haswell_info = { .gen = 8, \ .platform = INTEL_BROADWELL -static const struct intel_device_info intel_broadwell_info = { +static const struct intel_device_info intel_broadwell_gt1_info = { BDW_PLATFORM, + .gt = 1, +}; + +static const struct intel_device_info intel_broadwell_gt2_info = { + BDW_PLATFORM, + .gt = 2, +}; + +static const struct intel_device_info intel_broadwell_rsvd_info = { + BDW_PLATFORM, + .gt = 3, + /* According to the device ID those devices are GT3, they were + * previously treated as not GT3, keep it like that. + */ }; static const struct intel_device_info intel_broadwell_gt3_info = { BDW_PLATFORM, + .gt = 3, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; @@ -358,13 +425,29 @@ static const struct intel_device_info intel_cherryview_info = { .has_guc = 1, \ .ddb_size = 896 -static const struct intel_device_info intel_skylake_info = { +static const struct intel_device_info intel_skylake_gt1_info = { SKL_PLATFORM, + .gt = 1, }; -static const struct intel_device_info intel_skylake_gt3_info = { +static const struct intel_device_info intel_skylake_gt2_info = { SKL_PLATFORM, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, + .gt = 2, +}; + +#define SKL_GT3_PLUS_PLATFORM \ + SKL_PLATFORM, \ + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING + + +static const struct intel_device_info intel_skylake_gt3_info = { + SKL_GT3_PLUS_PLATFORM, + .gt = 3, +}; + +static const struct intel_device_info intel_skylake_gt4_info = { + SKL_GT3_PLUS_PLATFORM, + .gt = 4, }; #define GEN9_LP_FEATURES \ @@ -416,12 +499,19 @@ static const struct intel_device_info intel_geminilake_info = { .has_guc = 1, \ .ddb_size = 896 -static const struct intel_device_info intel_kabylake_info = { +static const struct intel_device_info intel_kabylake_gt1_info = { KBL_PLATFORM, + .gt = 1, +}; + +static const struct intel_device_info intel_kabylake_gt2_info = { + KBL_PLATFORM, + .gt = 2, }; static const struct intel_device_info intel_kabylake_gt3_info = { KBL_PLATFORM, + .gt = 3, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; @@ -434,20 +524,28 @@ static const struct intel_device_info intel_kabylake_gt3_info = { .has_guc = 1, \ .ddb_size = 896 -static const struct intel_device_info intel_coffeelake_info = { +static const struct intel_device_info intel_coffeelake_gt1_info = { CFL_PLATFORM, + .gt = 1, +}; + +static const struct intel_device_info intel_coffeelake_gt2_info = { + CFL_PLATFORM, + .gt = 2, }; static const struct intel_device_info intel_coffeelake_gt3_info = { CFL_PLATFORM, + .gt = 3, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; -static const struct intel_device_info intel_cannonlake_info = { +static const struct intel_device_info intel_cannonlake_gt2_info = { BDW_FEATURES, .is_alpha_support = 1, .platform = INTEL_CANNONLAKE, .gen = 10, + .gt = 2, .ddb_size = 1024, .has_csr = 1, .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } @@ -476,31 +574,40 @@ static const struct pci_device_id pciidlist[] = { INTEL_PINEVIEW_IDS(&intel_pineview_info), INTEL_IRONLAKE_D_IDS(&intel_ironlake_d_info), INTEL_IRONLAKE_M_IDS(&intel_ironlake_m_info), - INTEL_SNB_D_IDS(&intel_sandybridge_d_info), - INTEL_SNB_M_IDS(&intel_sandybridge_m_info), + INTEL_SNB_D_GT1_IDS(&intel_sandybridge_d_gt1_info), + INTEL_SNB_D_GT2_IDS(&intel_sandybridge_d_gt2_info), + INTEL_SNB_M_GT1_IDS(&intel_sandybridge_m_gt1_info), + INTEL_SNB_M_GT2_IDS(&intel_sandybridge_m_gt2_info), INTEL_IVB_Q_IDS(&intel_ivybridge_q_info), /* must be first IVB */ - INTEL_IVB_M_IDS(&intel_ivybridge_m_info), - INTEL_IVB_D_IDS(&intel_ivybridge_d_info), - INTEL_HSW_IDS(&intel_haswell_info), + INTEL_IVB_M_GT1_IDS(&intel_ivybridge_m_gt1_info), + INTEL_IVB_M_GT2_IDS(&intel_ivybridge_m_gt2_info), + INTEL_IVB_D_GT1_IDS(&intel_ivybridge_d_gt1_info), + INTEL_IVB_D_GT2_IDS(&intel_ivybridge_d_gt2_info), + INTEL_HSW_GT1_IDS(&intel_haswell_gt1_info), + INTEL_HSW_GT2_IDS(&intel_haswell_gt2_info), + INTEL_HSW_GT3_IDS(&intel_haswell_gt3_info), INTEL_VLV_IDS(&intel_valleyview_info), - INTEL_BDW_GT12_IDS(&intel_broadwell_info), + INTEL_BDW_GT1_IDS(&intel_broadwell_gt1_info), + INTEL_BDW_GT2_IDS(&intel_broadwell_gt2_info), INTEL_BDW_GT3_IDS(&intel_broadwell_gt3_info), - INTEL_BDW_RSVD_IDS(&intel_broadwell_info), + INTEL_BDW_RSVD_IDS(&intel_broadwell_rsvd_info), INTEL_CHV_IDS(&intel_cherryview_info), - INTEL_SKL_GT1_IDS(&intel_skylake_info), - INTEL_SKL_GT2_IDS(&intel_skylake_info), + INTEL_SKL_GT1_IDS(&intel_skylake_gt1_info), + INTEL_SKL_GT2_IDS(&intel_skylake_gt2_info), INTEL_SKL_GT3_IDS(&intel_skylake_gt3_info), - INTEL_SKL_GT4_IDS(&intel_skylake_gt3_info), + INTEL_SKL_GT4_IDS(&intel_skylake_gt4_info), INTEL_BXT_IDS(&intel_broxton_info), INTEL_GLK_IDS(&intel_geminilake_info), - INTEL_KBL_GT1_IDS(&intel_kabylake_info), - INTEL_KBL_GT2_IDS(&intel_kabylake_info), + INTEL_KBL_GT1_IDS(&intel_kabylake_gt1_info), + INTEL_KBL_GT2_IDS(&intel_kabylake_gt2_info), INTEL_KBL_GT3_IDS(&intel_kabylake_gt3_info), INTEL_KBL_GT4_IDS(&intel_kabylake_gt3_info), - INTEL_CFL_S_IDS(&intel_coffeelake_info), - INTEL_CFL_H_IDS(&intel_coffeelake_info), - INTEL_CFL_U_IDS(&intel_coffeelake_gt3_info), - INTEL_CNL_IDS(&intel_cannonlake_info), + INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info), + INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info), + INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info), + INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info), + INTEL_CNL_U_GT2_IDS(&intel_cannonlake_gt2_info), + INTEL_CNL_Y_GT2_IDS(&intel_cannonlake_gt2_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 34c8f5600ce0..1257e15c1a03 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -118,92 +118,125 @@ #define INTEL_IRONLAKE_M_IDS(info) \ INTEL_VGA_DEVICE(0x0046, info) -#define INTEL_SNB_D_IDS(info) \ +#define INTEL_SNB_D_GT1_IDS(info) \ INTEL_VGA_DEVICE(0x0102, info), \ - INTEL_VGA_DEVICE(0x0112, info), \ - INTEL_VGA_DEVICE(0x0122, info), \ INTEL_VGA_DEVICE(0x010A, info) -#define INTEL_SNB_M_IDS(info) \ - INTEL_VGA_DEVICE(0x0106, info), \ +#define INTEL_SNB_D_GT2_IDS(info) \ + INTEL_VGA_DEVICE(0x0112, info), \ + INTEL_VGA_DEVICE(0x0122, info) + +#define INTEL_SNB_D_IDS(info) \ + INTEL_SNB_D_GT1_IDS(info), \ + INTEL_SNB_D_GT2_IDS(info) + +#define INTEL_SNB_M_GT1_IDS(info) \ + INTEL_VGA_DEVICE(0x0106, info) + +#define INTEL_SNB_M_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x0116, info), \ INTEL_VGA_DEVICE(0x0126, info) +#define INTEL_SNB_M_IDS(info) \ + INTEL_SNB_M_GT1_IDS(info), \ + INTEL_SNB_M_GT2_IDS(info) + +#define INTEL_IVB_M_GT1_IDS(info) \ + INTEL_VGA_DEVICE(0x0156, info) /* GT1 mobile */ + +#define INTEL_IVB_M_GT2_IDS(info) \ + INTEL_VGA_DEVICE(0x0166, info) /* GT2 mobile */ + #define INTEL_IVB_M_IDS(info) \ - INTEL_VGA_DEVICE(0x0156, info), /* GT1 mobile */ \ - INTEL_VGA_DEVICE(0x0166, info) /* GT2 mobile */ + INTEL_IVB_M_GT1_IDS(info), \ + INTEL_IVB_M_GT2_IDS(info) + +#define INTEL_IVB_D_GT1_IDS(info) \ + INTEL_VGA_DEVICE(0x0152, info), /* GT1 desktop */ \ + INTEL_VGA_DEVICE(0x015a, info) /* GT1 server */ + +#define INTEL_IVB_D_GT2_IDS(info) \ + INTEL_VGA_DEVICE(0x0162, info), /* GT2 desktop */ \ + INTEL_VGA_DEVICE(0x016a, info) /* GT2 server */ #define INTEL_IVB_D_IDS(info) \ - INTEL_VGA_DEVICE(0x0152, info), /* GT1 desktop */ \ - INTEL_VGA_DEVICE(0x0162, info), /* GT2 desktop */ \ - INTEL_VGA_DEVICE(0x015a, info), /* GT1 server */ \ - INTEL_VGA_DEVICE(0x016a, info) /* GT2 server */ + INTEL_IVB_D_GT1_IDS(info), \ + INTEL_IVB_D_GT2_IDS(info) #define INTEL_IVB_Q_IDS(info) \ INTEL_QUANTA_VGA_DEVICE(info) /* Quanta transcode */ -#define INTEL_HSW_IDS(info) \ +#define INTEL_HSW_GT1_IDS(info) \ INTEL_VGA_DEVICE(0x0402, info), /* GT1 desktop */ \ - INTEL_VGA_DEVICE(0x0412, info), /* GT2 desktop */ \ - INTEL_VGA_DEVICE(0x0422, info), /* GT3 desktop */ \ INTEL_VGA_DEVICE(0x040a, info), /* GT1 server */ \ - INTEL_VGA_DEVICE(0x041a, info), /* GT2 server */ \ - INTEL_VGA_DEVICE(0x042a, info), /* GT3 server */ \ INTEL_VGA_DEVICE(0x040B, info), /* GT1 reserved */ \ - INTEL_VGA_DEVICE(0x041B, info), /* GT2 reserved */ \ - INTEL_VGA_DEVICE(0x042B, info), /* GT3 reserved */ \ INTEL_VGA_DEVICE(0x040E, info), /* GT1 reserved */ \ - INTEL_VGA_DEVICE(0x041E, info), /* GT2 reserved */ \ - INTEL_VGA_DEVICE(0x042E, info), /* GT3 reserved */ \ INTEL_VGA_DEVICE(0x0C02, info), /* SDV GT1 desktop */ \ - INTEL_VGA_DEVICE(0x0C12, info), /* SDV GT2 desktop */ \ - INTEL_VGA_DEVICE(0x0C22, info), /* SDV GT3 desktop */ \ INTEL_VGA_DEVICE(0x0C0A, info), /* SDV GT1 server */ \ - INTEL_VGA_DEVICE(0x0C1A, info), /* SDV GT2 server */ \ - INTEL_VGA_DEVICE(0x0C2A, info), /* SDV GT3 server */ \ INTEL_VGA_DEVICE(0x0C0B, info), /* SDV GT1 reserved */ \ - INTEL_VGA_DEVICE(0x0C1B, info), /* SDV GT2 reserved */ \ - INTEL_VGA_DEVICE(0x0C2B, info), /* SDV GT3 reserved */ \ INTEL_VGA_DEVICE(0x0C0E, info), /* SDV GT1 reserved */ \ - INTEL_VGA_DEVICE(0x0C1E, info), /* SDV GT2 reserved */ \ - INTEL_VGA_DEVICE(0x0C2E, info), /* SDV GT3 reserved */ \ INTEL_VGA_DEVICE(0x0A02, info), /* ULT GT1 desktop */ \ - INTEL_VGA_DEVICE(0x0A12, info), /* ULT GT2 desktop */ \ - INTEL_VGA_DEVICE(0x0A22, info), /* ULT GT3 desktop */ \ INTEL_VGA_DEVICE(0x0A0A, info), /* ULT GT1 server */ \ - INTEL_VGA_DEVICE(0x0A1A, info), /* ULT GT2 server */ \ - INTEL_VGA_DEVICE(0x0A2A, info), /* ULT GT3 server */ \ INTEL_VGA_DEVICE(0x0A0B, info), /* ULT GT1 reserved */ \ - INTEL_VGA_DEVICE(0x0A1B, info), /* ULT GT2 reserved */ \ - INTEL_VGA_DEVICE(0x0A2B, info), /* ULT GT3 reserved */ \ INTEL_VGA_DEVICE(0x0D02, info), /* CRW GT1 desktop */ \ - INTEL_VGA_DEVICE(0x0D12, info), /* CRW GT2 desktop */ \ - INTEL_VGA_DEVICE(0x0D22, info), /* CRW GT3 desktop */ \ INTEL_VGA_DEVICE(0x0D0A, info), /* CRW GT1 server */ \ - INTEL_VGA_DEVICE(0x0D1A, info), /* CRW GT2 server */ \ - INTEL_VGA_DEVICE(0x0D2A, info), /* CRW GT3 server */ \ INTEL_VGA_DEVICE(0x0D0B, info), /* CRW GT1 reserved */ \ - INTEL_VGA_DEVICE(0x0D1B, info), /* CRW GT2 reserved */ \ - INTEL_VGA_DEVICE(0x0D2B, info), /* CRW GT3 reserved */ \ INTEL_VGA_DEVICE(0x0D0E, info), /* CRW GT1 reserved */ \ - INTEL_VGA_DEVICE(0x0D1E, info), /* CRW GT2 reserved */ \ - INTEL_VGA_DEVICE(0x0D2E, info), /* CRW GT3 reserved */ \ INTEL_VGA_DEVICE(0x0406, info), /* GT1 mobile */ \ + INTEL_VGA_DEVICE(0x0C06, info), /* SDV GT1 mobile */ \ + INTEL_VGA_DEVICE(0x0A06, info), /* ULT GT1 mobile */ \ + INTEL_VGA_DEVICE(0x0A0E, info), /* ULX GT1 mobile */ \ + INTEL_VGA_DEVICE(0x0D06, info) /* CRW GT1 mobile */ + +#define INTEL_HSW_GT2_IDS(info) \ + INTEL_VGA_DEVICE(0x0412, info), /* GT2 desktop */ \ + INTEL_VGA_DEVICE(0x041a, info), /* GT2 server */ \ + INTEL_VGA_DEVICE(0x041B, info), /* GT2 reserved */ \ + INTEL_VGA_DEVICE(0x041E, info), /* GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0C12, info), /* SDV GT2 desktop */ \ + INTEL_VGA_DEVICE(0x0C1A, info), /* SDV GT2 server */ \ + INTEL_VGA_DEVICE(0x0C1B, info), /* SDV GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0C1E, info), /* SDV GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0A12, info), /* ULT GT2 desktop */ \ + INTEL_VGA_DEVICE(0x0A1A, info), /* ULT GT2 server */ \ + INTEL_VGA_DEVICE(0x0A1B, info), /* ULT GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0D12, info), /* CRW GT2 desktop */ \ + INTEL_VGA_DEVICE(0x0D1A, info), /* CRW GT2 server */ \ + INTEL_VGA_DEVICE(0x0D1B, info), /* CRW GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0D1E, info), /* CRW GT2 reserved */ \ INTEL_VGA_DEVICE(0x0416, info), /* GT2 mobile */ \ INTEL_VGA_DEVICE(0x0426, info), /* GT2 mobile */ \ - INTEL_VGA_DEVICE(0x0C06, info), /* SDV GT1 mobile */ \ INTEL_VGA_DEVICE(0x0C16, info), /* SDV GT2 mobile */ \ - INTEL_VGA_DEVICE(0x0C26, info), /* SDV GT3 mobile */ \ - INTEL_VGA_DEVICE(0x0A06, info), /* ULT GT1 mobile */ \ INTEL_VGA_DEVICE(0x0A16, info), /* ULT GT2 mobile */ \ - INTEL_VGA_DEVICE(0x0A26, info), /* ULT GT3 mobile */ \ - INTEL_VGA_DEVICE(0x0A0E, info), /* ULX GT1 mobile */ \ INTEL_VGA_DEVICE(0x0A1E, info), /* ULX GT2 mobile */ \ + INTEL_VGA_DEVICE(0x0D16, info) /* CRW GT2 mobile */ + +#define INTEL_HSW_GT3_IDS(info) \ + INTEL_VGA_DEVICE(0x0422, info), /* GT3 desktop */ \ + INTEL_VGA_DEVICE(0x042a, info), /* GT3 server */ \ + INTEL_VGA_DEVICE(0x042B, info), /* GT3 reserved */ \ + INTEL_VGA_DEVICE(0x042E, info), /* GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0C22, info), /* SDV GT3 desktop */ \ + INTEL_VGA_DEVICE(0x0C2A, info), /* SDV GT3 server */ \ + INTEL_VGA_DEVICE(0x0C2B, info), /* SDV GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0C2E, info), /* SDV GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0A22, info), /* ULT GT3 desktop */ \ + INTEL_VGA_DEVICE(0x0A2A, info), /* ULT GT3 server */ \ + INTEL_VGA_DEVICE(0x0A2B, info), /* ULT GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0D22, info), /* CRW GT3 desktop */ \ + INTEL_VGA_DEVICE(0x0D2A, info), /* CRW GT3 server */ \ + INTEL_VGA_DEVICE(0x0D2B, info), /* CRW GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0D2E, info), /* CRW GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0C26, info), /* SDV GT3 mobile */ \ + INTEL_VGA_DEVICE(0x0A26, info), /* ULT GT3 mobile */ \ INTEL_VGA_DEVICE(0x0A2E, info), /* ULT GT3 reserved */ \ - INTEL_VGA_DEVICE(0x0D06, info), /* CRW GT1 mobile */ \ - INTEL_VGA_DEVICE(0x0D16, info), /* CRW GT2 mobile */ \ INTEL_VGA_DEVICE(0x0D26, info) /* CRW GT3 mobile */ +#define INTEL_HSW_IDS(info) \ + INTEL_HSW_GT1_IDS(info), \ + INTEL_HSW_GT2_IDS(info), \ + INTEL_HSW_GT3_IDS(info) + #define INTEL_VLV_IDS(info) \ INTEL_VGA_DEVICE(0x0f30, info), \ INTEL_VGA_DEVICE(0x0f31, info), \ @@ -212,17 +245,19 @@ INTEL_VGA_DEVICE(0x0157, info), \ INTEL_VGA_DEVICE(0x0155, info) -#define INTEL_BDW_GT12_IDS(info) \ +#define INTEL_BDW_GT1_IDS(info) \ INTEL_VGA_DEVICE(0x1602, info), /* GT1 ULT */ \ INTEL_VGA_DEVICE(0x1606, info), /* GT1 ULT */ \ INTEL_VGA_DEVICE(0x160B, info), /* GT1 Iris */ \ INTEL_VGA_DEVICE(0x160E, info), /* GT1 ULX */ \ - INTEL_VGA_DEVICE(0x1612, info), /* GT2 Halo */ \ + INTEL_VGA_DEVICE(0x160A, info), /* GT1 Server */ \ + INTEL_VGA_DEVICE(0x160D, info) /* GT1 Workstation */ + +#define INTEL_BDW_GT2_IDS(info) \ + INTEL_VGA_DEVICE(0x1612, info), /* GT2 Halo */ \ INTEL_VGA_DEVICE(0x1616, info), /* GT2 ULT */ \ INTEL_VGA_DEVICE(0x161B, info), /* GT2 ULT */ \ - INTEL_VGA_DEVICE(0x161E, info), /* GT2 ULX */ \ - INTEL_VGA_DEVICE(0x160A, info), /* GT1 Server */ \ - INTEL_VGA_DEVICE(0x160D, info), /* GT1 Workstation */ \ + INTEL_VGA_DEVICE(0x161E, info), /* GT2 ULX */ \ INTEL_VGA_DEVICE(0x161A, info), /* GT2 Server */ \ INTEL_VGA_DEVICE(0x161D, info) /* GT2 Workstation */ @@ -243,7 +278,8 @@ INTEL_VGA_DEVICE(0x163D, info) /* Workstation */ #define INTEL_BDW_IDS(info) \ - INTEL_BDW_GT12_IDS(info), \ + INTEL_BDW_GT1_IDS(info), \ + INTEL_BDW_GT2_IDS(info), \ INTEL_BDW_GT3_IDS(info), \ INTEL_BDW_RSVD_IDS(info) @@ -335,20 +371,22 @@ INTEL_KBL_GT4_IDS(info) /* CFL S */ -#define INTEL_CFL_S_IDS(info) \ +#define INTEL_CFL_S_GT1_IDS(info) \ INTEL_VGA_DEVICE(0x3E90, info), /* SRV GT1 */ \ - INTEL_VGA_DEVICE(0x3E93, info), /* SRV GT1 */ \ + INTEL_VGA_DEVICE(0x3E93, info) /* SRV GT1 */ + +#define INTEL_CFL_S_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x3E91, info), /* SRV GT2 */ \ INTEL_VGA_DEVICE(0x3E92, info), /* SRV GT2 */ \ INTEL_VGA_DEVICE(0x3E96, info) /* SRV GT2 */ /* CFL H */ -#define INTEL_CFL_H_IDS(info) \ +#define INTEL_CFL_H_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \ INTEL_VGA_DEVICE(0x3E94, info) /* Halo GT2 */ /* CFL U */ -#define INTEL_CFL_U_IDS(info) \ +#define INTEL_CFL_U_GT3_IDS(info) \ INTEL_VGA_DEVICE(0x3EA6, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x3EA7, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x3EA8, info), /* ULT GT3 */ \ From ba86431b7b588d887aca002596a42890edee9bd0 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Wed, 20 Sep 2017 13:31:26 -0700 Subject: [PATCH 477/770] drm/i915/kbl: Change a KBL pci id to GT2 from GT1.5 commit 41693fd5237397d3c61b311af0fda1f6f39297c2 upstream. See Mesa commit 9c588ff Cc: Matt Turner Cc: Rodrigo Vivi Signed-off-by: Anuj Phogat Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20170920203126.1323-1-anuj.phogat@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/drm/i915_pciids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 1257e15c1a03..972a25633525 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -339,7 +339,6 @@ #define INTEL_KBL_GT1_IDS(info) \ INTEL_VGA_DEVICE(0x5913, info), /* ULT GT1.5 */ \ INTEL_VGA_DEVICE(0x5915, info), /* ULX GT1.5 */ \ - INTEL_VGA_DEVICE(0x5917, info), /* DT GT1.5 */ \ INTEL_VGA_DEVICE(0x5906, info), /* ULT GT1 */ \ INTEL_VGA_DEVICE(0x590E, info), /* ULX GT1 */ \ INTEL_VGA_DEVICE(0x5902, info), /* DT GT1 */ \ @@ -349,6 +348,7 @@ #define INTEL_KBL_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x5916, info), /* ULT GT2 */ \ + INTEL_VGA_DEVICE(0x5917, info), /* Mobile GT2 */ \ INTEL_VGA_DEVICE(0x5921, info), /* ULT GT2F */ \ INTEL_VGA_DEVICE(0x591E, info), /* ULX GT2 */ \ INTEL_VGA_DEVICE(0x5912, info), /* DT GT2 */ \ From 0fe1e5ec1c27115afdaf245cc1a8b42765448381 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 13 Dec 2017 12:04:25 -0800 Subject: [PATCH 478/770] x86/gpu: add CFL to early quirks commit 33aa69ed8aacd92dea12671e52eb3ca6ac2d7a49 upstream. CFL was missing from intel_early_ids[]. The PCI ID needs to be there to allow the memory region to be stolen, otherwise we could have RAM being arbitrarily overwritten if for example we keep using the UEFI framebuffer, depending on how BIOS has set up the e820 map. Fixes: b056f8f3d6b9 ("drm/i915/cfl: Add Coffee Lake PCI IDs for S Skus.") Signed-off-by: Lucas De Marchi Cc: Rodrigo Vivi Cc: Anusha Srivatsa Cc: Jani Nikula Cc: Joonas Lahtinen Cc: David Airlie Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: x86@kernel.org Cc: # v4.13+ 0890540e21cf drm/i915: add GT number to intel_device_info Cc: # v4.13+ 41693fd52373 drm/i915/kbl: Change a KBL pci id to GT2 from GT1.5 Cc: # v4.13+ Reviewed-by: Rodrigo Vivi Acked-by: Jani Nikula Acked-by: Ingo Molnar Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171213200425.2954-1-lucas.demarchi@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/early-quirks.c | 1 + include/drm/i915_pciids.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 1e82f787c160..c87560e1e3ef 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -527,6 +527,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_SKL_IDS(&gen9_early_ops), INTEL_BXT_IDS(&gen9_early_ops), INTEL_KBL_IDS(&gen9_early_ops), + INTEL_CFL_IDS(&gen9_early_ops), INTEL_GLK_IDS(&gen9_early_ops), INTEL_CNL_IDS(&gen9_early_ops), }; diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 972a25633525..c65e4489006d 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -392,6 +392,12 @@ INTEL_VGA_DEVICE(0x3EA8, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x3EA5, info) /* ULT GT3 */ +#define INTEL_CFL_IDS(info) \ + INTEL_CFL_S_GT1_IDS(info), \ + INTEL_CFL_S_GT2_IDS(info), \ + INTEL_CFL_H_GT2_IDS(info), \ + INTEL_CFL_U_GT3_IDS(info) + /* CNL U 2+2 */ #define INTEL_CNL_U_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x5A52, info), \ From 04f048fad32e191d48c1a260d6aa74e27cfe846e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 29 Jan 2018 14:08:45 +0300 Subject: [PATCH 479/770] x86/kexec: Make kexec (mostly) work in 5-level paging mode commit 5bf30316991d5bcda046343ee77d823cf16fdd03 upstream. Currently kexec() will crash when switching into a 5-level paging enabled kernel. I missed that we need to change relocate_kernel() to set CR4.LA57 flag if the kernel has 5-level paging enabled. I avoided using #ifdef CONFIG_X86_5LEVEL here and inferred if we need to enable 5-level paging from previous CR4 value. This way the code is ready for boot-time switching between paging modes. With this patch applied, in addition to kexec 4-to-4 which always worked, we can kexec 4-to-5 and 5-to-5 - while 5-to-4 will need more work. Reported-by: Baoquan He Signed-off-by: Kirill A. Shutemov Tested-by: Baoquan He Cc: # v4.14+ Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Fixes: 77ef56e4f0fb ("x86: Enable 5-level paging support via CONFIG_X86_5LEVEL=y") Link: http://lkml.kernel.org/r/20180129110845.26633-1-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/relocate_kernel_64.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 307d3bac5f04..11eda21eb697 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -68,6 +68,9 @@ relocate_kernel: movq %cr4, %rax movq %rax, CR4(%r11) + /* Save CR4. Required to enable the right paging mode later. */ + movq %rax, %r13 + /* zero out flags, and disable interrupts */ pushq $0 popfq @@ -126,8 +129,13 @@ identity_mapped: /* * Set cr4 to a known state: * - physical address extension enabled + * - 5-level paging, if it was enabled before */ movl $X86_CR4_PAE, %eax + testq $X86_CR4_LA57, %r13 + jz 1f + orl $X86_CR4_LA57, %eax +1: movq %rax, %cr4 jmp 1f From 41fd295d90a7d8518f3d3d705884fb05df9596f0 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 1 Feb 2018 13:40:19 +0100 Subject: [PATCH 480/770] x86/xen: init %gs very early to avoid page faults with stack protector commit 4f277295e54c5b7340e48efea3fc5cc21a2872b7 upstream. When running as Xen pv guest %gs is initialized some time after C code is started. Depending on stack protector usage this might be too late, resulting in page faults. So setup %gs and MSR_GS_BASE in assembly code already. Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Tested-by: Chris Patterson Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/xen-head.S | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 497cc55a0c16..96f26e026783 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -9,7 +9,9 @@ #include #include +#include #include +#include #include #include @@ -35,6 +37,20 @@ ENTRY(startup_xen) mov %_ASM_SI, xen_start_info mov $init_thread_union+THREAD_SIZE, %_ASM_SP +#ifdef CONFIG_X86_64 + /* Set up %gs. + * + * The base of %gs always points to the bottom of the irqstack + * union. If the stack protector canary is enabled, it is + * located at %gs:40. Note that, on SMP, the boot cpu uses + * init data section till per cpu areas are set up. + */ + movl $MSR_GS_BASE,%ecx + movq $INIT_PER_CPU_VAR(irq_stack_union),%rax + cdq + wrmsr +#endif + jmp xen_start_kernel END(startup_xen) __FINIT From 43d38b079ca7466a9eebf2936584e091f8c20349 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 Feb 2018 18:55:12 +0100 Subject: [PATCH 481/770] x86: PM: Make APM idle driver initialize polling state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f859422075165e32c00c8d75d63f300015cc07ae upstream. Update the APM driver overlooked by commit 1b39e3f813b4 (cpuidle: Make drivers initialize polling state) to initialize the polling state like the other cpuidle drivers modified by that commit to prevent cpuidle from crashing. Fixes: 1b39e3f813b4 (cpuidle: Make drivers initialize polling state) Reported-by: Ville Syrjälä Tested-by: Ville Syrjälä Cc: 4.14+ # 4.14+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apm_32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index e4b0d92b3ae0..2a7fd56e67b3 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -2389,6 +2389,7 @@ static int __init apm_init(void) if (HZ != 100) idle_period = (idle_period * HZ) / 100; if (idle_threshold < 100) { + cpuidle_poll_state_init(&apm_idle_driver); if (!cpuidle_register_driver(&apm_idle_driver)) if (cpuidle_register_device(&apm_cpuidle_device)) cpuidle_unregister_driver(&apm_idle_driver); From 2bc19b518cb25150572ae4fe34ea6d623d733e2e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Feb 2018 17:18:05 -0800 Subject: [PATCH 482/770] x86/entry/64: Clear extra registers beyond syscall arguments, to reduce speculation attack surface commit 8e1eb3fa009aa7c0b944b3c8b26b07de0efb3200 upstream. At entry userspace may have (maliciously) populated the extra registers outside the syscall calling convention with arbitrary values that could be useful in a speculative execution (Spectre style) attack. Clear these registers to minimize the kernel's attack surface. Note, this only clears the extra registers and not the unused registers for syscalls less than 6 arguments, since those registers are likely to be clobbered well before their values could be put to use under speculation. Note, Linus found that the XOR instructions can be executed with minimized cost if interleaved with the PUSH instructions, and Ingo's analysis found that R10 and R11 should be included in the register clearing beyond the typical 'extra' syscall calling convention registers. Suggested-by: Linus Torvalds Reported-by: Andi Kleen Signed-off-by: Dan Williams Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/151787988577.7847.16733592218894189003.stgit@dwillia2-desk3.amr.corp.intel.com [ Made small improvements to the changelog and the code comments. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 16e2d72e79a0..5771ae24ba58 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -231,13 +231,26 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) pushq %r8 /* pt_regs->r8 */ pushq %r9 /* pt_regs->r9 */ pushq %r10 /* pt_regs->r10 */ + /* + * Clear extra registers that a speculation attack might + * otherwise want to exploit. Interleave XOR with PUSH + * for better uop scheduling: + */ + xorq %r10, %r10 /* nospec r10 */ pushq %r11 /* pt_regs->r11 */ + xorq %r11, %r11 /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp */ + xorl %ebp, %ebp /* nospec rbp */ pushq %r12 /* pt_regs->r12 */ + xorq %r12, %r12 /* nospec r12 */ pushq %r13 /* pt_regs->r13 */ + xorq %r13, %r13 /* nospec r13 */ pushq %r14 /* pt_regs->r14 */ + xorq %r14, %r14 /* nospec r14 */ pushq %r15 /* pt_regs->r15 */ + xorq %r15, %r15 /* nospec r15 */ UNWIND_HINT_REGS TRACE_IRQS_OFF From ec35f83de1c80e4decdbda0d61cd1f8924ae606f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Feb 2018 17:18:17 -0800 Subject: [PATCH 483/770] x86/entry/64/compat: Clear registers for compat syscalls, to reduce speculation attack surface commit 6b8cf5cc9965673951f1ab3f0e3cf23d06e3e2ee upstream. At entry userspace may have populated registers with values that could otherwise be useful in a speculative execution attack. Clear them to minimize the kernel's attack surface. Originally-From: Andi Kleen Signed-off-by: Dan Williams Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/151787989697.7847.4083702787288600552.stgit@dwillia2-desk3.amr.corp.intel.com [ Made small improvements to the changelog. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64_compat.S | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 98d5358e4041..fd65e016e413 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -85,15 +85,25 @@ ENTRY(entry_SYSENTER_compat) pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ + xorq %r8, %r8 /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ + xorq %r9, %r9 /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ + xorq %r10, %r10 /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ + xorq %r11, %r11 /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ + xorl %ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ + xorq %r12, %r12 /* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ + xorq %r13, %r13 /* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ + xorq %r14, %r14 /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ + xorq %r15, %r15 /* nospec r15 */ cld /* @@ -214,15 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) pushq %rbp /* pt_regs->cx (stashed in bp) */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ + xorq %r8, %r8 /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ + xorq %r9, %r9 /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ + xorq %r10, %r10 /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ + xorq %r11, %r11 /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ + xorl %ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ + xorq %r12, %r12 /* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ + xorq %r13, %r13 /* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ + xorq %r14, %r14 /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ + xorq %r15, %r15 /* nospec r15 */ /* * User mode is traced as though IRQs are on, and SYSENTER @@ -338,15 +358,25 @@ ENTRY(entry_INT80_compat) pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ + xorq %r8, %r8 /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ + xorq %r9, %r9 /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ + xorq %r10, %r10 /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ + xorq %r11, %r11 /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp */ + xorl %ebp, %ebp /* nospec rbp */ pushq %r12 /* pt_regs->r12 */ + xorq %r12, %r12 /* nospec r12 */ pushq %r13 /* pt_regs->r13 */ + xorq %r13, %r13 /* nospec r13 */ pushq %r14 /* pt_regs->r14 */ + xorq %r14, %r14 /* nospec r14 */ pushq %r15 /* pt_regs->r15 */ + xorq %r15, %r15 /* nospec r15 */ cld /* From 045e5161abfcfefce5b2777788e5913d85bb5c6f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 1 Feb 2018 11:21:58 +0100 Subject: [PATCH 484/770] compiler-gcc.h: Introduce __optimize function attribute commit df5d45aa08f848b79caf395211b222790534ccc7 upstream. Create a new function attribute __optimize, which allows to specify an optimization level on a per-function basis. Signed-off-by: Geert Uytterhoeven Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-gcc.h | 4 ++++ include/linux/compiler.h | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 2272ded07496..7bba8e28c529 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -196,6 +196,10 @@ #endif /* __CHECKER__ */ #endif /* GCC_VERSION >= 40300 */ +#if GCC_VERSION >= 40400 +#define __optimize(level) __attribute__((__optimize__(level))) +#endif /* GCC_VERSION >= 40400 */ + #if GCC_VERSION >= 40500 #ifndef __CHECKER__ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index fab5dc250c61..e8c9cd18bb05 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -266,6 +266,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s #endif /* __ASSEMBLY__ */ +#ifndef __optimize +# define __optimize(level) +#endif + /* Compile time object size, -1 for unknown */ #ifndef __compiletime_object_size # define __compiletime_object_size(obj) -1 From fff8ad7c18a238a32a56056980fccd40ed8cce83 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 1 Feb 2018 11:21:59 +0100 Subject: [PATCH 485/770] compiler-gcc.h: __nostackprotector needs gcc-4.4 and up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d9afaaa4ff7af8b87d4a205e48cb8a6f666d7f01 upstream. Gcc versions before 4.4 do not recognize the __optimize__ compiler attribute: warning: ‘__optimize__’ attribute directive ignored Fixes: 7375ae3a0b79ea07 ("compiler-gcc.h: Introduce __nostackprotector function attribute") Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-gcc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7bba8e28c529..bf09213895f7 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -167,8 +167,6 @@ #if GCC_VERSION >= 40100 # define __compiletime_object_size(obj) __builtin_object_size(obj, 0) - -#define __nostackprotector __attribute__((__optimize__("no-stack-protector"))) #endif #if GCC_VERSION >= 40300 @@ -198,6 +196,7 @@ #if GCC_VERSION >= 40400 #define __optimize(level) __attribute__((__optimize__(level))) +#define __nostackprotector __optimize("no-stack-protector") #endif /* GCC_VERSION >= 40400 */ #if GCC_VERSION >= 40500 From b3d33c5f296b36a05e24828f96c8ea5e02a15b06 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Tue, 6 Feb 2018 22:20:21 +0100 Subject: [PATCH 486/770] crypto: sun4i_ss_prng - fix return value of sun4i_ss_prng_generate commit dd78c832ffaf86eb6434e56de4bc3bc31f03f771 upstream. According to crypto/rng.h generate function should return 0 on success and < 0 on error. Fixes: b8ae5c7387ad ("crypto: sun4i-ss - support the Security System PRNG") Signed-off-by: Artem Savkov Acked-by: Corentin Labbe Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/sunxi-ss/sun4i-ss-prng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-prng.c b/drivers/crypto/sunxi-ss/sun4i-ss-prng.c index 0d01d1624252..5754e0b92fb0 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-prng.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-prng.c @@ -52,5 +52,5 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src, writel(0, ss->base + SS_CTL); spin_unlock(&ss->slock); - return dlen; + return 0; } From 14d87b710338145e46940d0f751c5b7dd5b4c38f Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Tue, 6 Feb 2018 22:20:22 +0100 Subject: [PATCH 487/770] crypto: sun4i_ss_prng - convert lock to _bh in sun4i_ss_prng_generate commit 2e7d1d61ea6c0f1c4da5eb82cafac750d55637a7 upstream. Lockdep detects a possible deadlock in sun4i_ss_prng_generate() and throws an "inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage" warning. Disabling softirqs to fix this. Fixes: b8ae5c7387ad ("crypto: sun4i-ss - support the Security System PRNG") Signed-off-by: Artem Savkov Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/sunxi-ss/sun4i-ss-prng.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-prng.c b/drivers/crypto/sunxi-ss/sun4i-ss-prng.c index 5754e0b92fb0..63d636424161 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-prng.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-prng.c @@ -28,7 +28,7 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src, algt = container_of(alg, struct sun4i_ss_alg_template, alg.rng); ss = algt->ss; - spin_lock(&ss->slock); + spin_lock_bh(&ss->slock); writel(mode, ss->base + SS_CTL); @@ -51,6 +51,6 @@ int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src, } writel(0, ss->base + SS_CTL); - spin_unlock(&ss->slock); + spin_unlock_bh(&ss->slock); return 0; } From b973685e70c3b418ad46a03d6a9c81554c5c4ba2 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 7 Feb 2018 17:35:51 +1100 Subject: [PATCH 488/770] powerpc/mm/radix: Split linear mapping on hot-unplug commit 4dd5f8a99e791a8c6500e3592f3ce81ae7edcde1 upstream. This patch splits the linear mapping if the hot-unplug range is smaller than the mapping size. The code detects if the mapping needs to be split into a smaller size and if so, uses the stop machine infrastructure to clear the existing mapping and then remap the remaining range using a smaller page size. The code will skip any region of the mapping that overlaps with kernel text and warn about it once. We don't want to remove a mapping where the kernel text and the LMB we intend to remove overlap in the same TLB mapping as it may affect the currently executing code. I've tested these changes under a kvm guest with 2 vcpus, from a split mapping point of view, some of the caveats mentioned above applied to the testing I did. Fixes: 4b5d62ca17a1 ("powerpc/mm: add radix__remove_section_mapping()") Signed-off-by: Balbir Singh [mpe: Tweak change log to match updated behaviour] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/pgtable-radix.c | 95 +++++++++++++++++++++++++-------- 1 file changed, 74 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index cfbbee941a76..17ae5c15a9e0 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -671,6 +672,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud) pud_clear(pud); } +struct change_mapping_params { + pte_t *pte; + unsigned long start; + unsigned long end; + unsigned long aligned_start; + unsigned long aligned_end; +}; + +static int stop_machine_change_mapping(void *data) +{ + struct change_mapping_params *params = + (struct change_mapping_params *)data; + + if (!data) + return -1; + + spin_unlock(&init_mm.page_table_lock); + pte_clear(&init_mm, params->aligned_start, params->pte); + create_physical_mapping(params->aligned_start, params->start); + create_physical_mapping(params->end, params->aligned_end); + spin_lock(&init_mm.page_table_lock); + return 0; +} + static void remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end) { @@ -699,6 +724,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr, } } +/* + * clear the pte and potentially split the mapping helper + */ +static void split_kernel_mapping(unsigned long addr, unsigned long end, + unsigned long size, pte_t *pte) +{ + unsigned long mask = ~(size - 1); + unsigned long aligned_start = addr & mask; + unsigned long aligned_end = addr + size; + struct change_mapping_params params; + bool split_region = false; + + if ((end - addr) < size) { + /* + * We're going to clear the PTE, but not flushed + * the mapping, time to remap and flush. The + * effects if visible outside the processor or + * if we are running in code close to the + * mapping we cleared, we are in trouble. + */ + if (overlaps_kernel_text(aligned_start, addr) || + overlaps_kernel_text(end, aligned_end)) { + /* + * Hack, just return, don't pte_clear + */ + WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel " + "text, not splitting\n", addr, end); + return; + } + split_region = true; + } + + if (split_region) { + params.pte = pte; + params.start = addr; + params.end = end; + params.aligned_start = addr & ~(size - 1); + params.aligned_end = min_t(unsigned long, aligned_end, + (unsigned long)__va(memblock_end_of_DRAM())); + stop_machine(stop_machine_change_mapping, ¶ms, NULL); + return; + } + + pte_clear(&init_mm, addr, pte); +} + static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end) { @@ -714,13 +785,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, continue; if (pmd_huge(*pmd)) { - if (!IS_ALIGNED(addr, PMD_SIZE) || - !IS_ALIGNED(next, PMD_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pmd); + split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd); continue; } @@ -745,13 +810,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr, continue; if (pud_huge(*pud)) { - if (!IS_ALIGNED(addr, PUD_SIZE) || - !IS_ALIGNED(next, PUD_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pud); + split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud); continue; } @@ -777,13 +836,7 @@ static void remove_pagetable(unsigned long start, unsigned long end) continue; if (pgd_huge(*pgd)) { - if (!IS_ALIGNED(addr, PGDIR_SIZE) || - !IS_ALIGNED(next, PGDIR_SIZE)) { - WARN_ONCE(1, "%s: unaligned range\n", __func__); - continue; - } - - pte_clear(&init_mm, addr, (pte_t *)pgd); + split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd); continue; } From 372ed448263dcc733f4ef3d24d704d2f016d734c Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Fri, 9 Feb 2018 09:06:38 -0800 Subject: [PATCH 489/770] x86/mm/pti: Fix PTI comment in entry_SYSCALL_64() commit 14b1fcc62043729d12e8ae00f8297ab2ffe9fa91 upstream. The comment is confusing since the path is taken when CONFIG_PAGE_TABLE_ISOLATION=y is disabled (while the comment says it is not taken). Signed-off-by: Nadav Amit Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: nadav.amit@gmail.com Link: http://lkml.kernel.org/r/20180209170638.15161-1-namit@vmware.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 5771ae24ba58..62b381daadd2 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -209,7 +209,7 @@ ENTRY(entry_SYSCALL_64) swapgs /* - * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it + * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it * is not required to switch CR3. */ movq %rsp, PER_CPU_VAR(rsp_scratch) From d5a1b559235a38d527e8e363a325705fa0325602 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 10 Feb 2018 23:39:22 +0000 Subject: [PATCH 490/770] x86/speculation: Update Speculation Control microcode blacklist commit 1751342095f0d2b36fa8114d8e12c5688c455ac4 upstream. Intel have retroactively blessed the 0xc2 microcode on Skylake mobile and desktop parts, and the Gemini Lake 0x22 microcode is apparently fine too. We blacklisted the latter purely because it was present with all the other problematic ones in the 2018-01-08 release, but now it's explicitly listed as OK. We still list 0x84 for the various Kaby Lake / Coffee Lake parts, as that appeared in one version of the blacklist and then reverted to 0x80 again. We can change it if 0x84 is actually announced to be safe. Signed-off-by: David Woodhouse Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arjan.van.de.ven@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Cc: sironi@amazon.de Link: http://lkml.kernel.org/r/1518305967-31356-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4cf4f8cbc69d..c05591f82f22 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -123,8 +123,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, - { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, - { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, @@ -136,8 +134,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, - /* Updated in the 20180108 release; blacklist until we know otherwise */ - { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, /* Observed in the wild */ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, From b7451cb6159a573736cf674e586c75daabd62ae5 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 12 Feb 2018 15:27:34 +0000 Subject: [PATCH 491/770] x86/speculation: Correct Speculation Control microcode blacklist again commit d37fc6d360a404b208547ba112e7dabb6533c7fc upstream. Arjan points out that the Intel document only clears the 0xc2 microcode on *some* parts with CPUID 506E3 (INTEL_FAM6_SKYLAKE_DESKTOP stepping 3). For the Skylake H/S platform it's OK but for Skylake E3 which has the same CPUID it isn't (yet) cleared. So removing it from the blacklist was premature. Put it back for now. Also, Arjan assures me that the 0x84 microcode for Kaby Lake which was featured in one of the early revisions of the Intel document was never released to the public, and won't be until/unless it is also validated as safe. So those can change to 0x80 which is what all *other* versions of the doc have identified. Once the retrospective testing of existing public microcodes is done, we should be back into a mode where new microcodes are only released in batches and we shouldn't even need to update the blacklist for those anyway, so this tweaking of the list isn't expected to be a thing which keeps happening. Requested-by: Arjan van de Ven Signed-off-by: David Woodhouse Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arjan.van.de.ven@intel.com Cc: dave.hansen@intel.com Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Link: http://lkml.kernel.org/r/1518449255-2182-1-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index c05591f82f22..f5347173157c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -116,13 +116,14 @@ struct sku_microcode { u32 microcode; }; static const struct sku_microcode spectre_bad_microcodes[] = { - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, - { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, - { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, From bdc69a2ffa0105c1e736fbe4bfaa0b53bd3e915f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 10 Feb 2018 23:39:23 +0000 Subject: [PATCH 492/770] Revert "x86/speculation: Simplify indirect_branch_prediction_barrier()" commit f208820a321f9b23d77d7eed89945d862d62a3ed upstream. This reverts commit 64e16720ea0879f8ab4547e3b9758936d483909b. We cannot call C functions like that, without marking all the call-clobbered registers as, well, clobbered. We might have got away with it for now because the __ibp_barrier() function was *fairly* unlikely to actually use any other registers. But no. Just no. Signed-off-by: David Woodhouse Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arjan.van.de.ven@intel.com Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Cc: sironi@amazon.de Link: http://lkml.kernel.org/r/1518305967-31356-3-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 13 +++++++++---- arch/x86/include/asm/processor.h | 3 --- arch/x86/kernel/cpu/bugs.c | 6 ------ 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 4d57894635f2..300cc159b4a0 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -164,10 +164,15 @@ static inline void vmexit_fill_RSB(void) static inline void indirect_branch_prediction_barrier(void) { - alternative_input("", - "call __ibp_barrier", - X86_FEATURE_USE_IBPB, - ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory")); + asm volatile(ALTERNATIVE("", + "movl %[msr], %%ecx\n\t" + "movl %[val], %%eax\n\t" + "movl $0, %%edx\n\t" + "wrmsr", + X86_FEATURE_USE_IBPB) + : : [msr] "i" (MSR_IA32_PRED_CMD), + [val] "i" (PRED_CMD_IBPB) + : "eax", "ecx", "edx", "memory"); } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c57c6e77c29f..db3e68bcfd9b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -968,7 +968,4 @@ bool xen_set_default_idle(void); void stop_this_cpu(void *dummy); void df_debug(struct pt_regs *regs, long error_code); - -void __ibp_barrier(void); - #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 71949bf2de5a..61152aa53377 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -337,9 +337,3 @@ ssize_t cpu_show_spectre_v2(struct device *dev, spectre_v2_module_string()); } #endif - -void __ibp_barrier(void) -{ - __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0); -} -EXPORT_SYMBOL_GPL(__ibp_barrier); From 474bd0b82e24e8b928910aba876c20c49098ae6d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 10 Feb 2018 23:39:24 +0000 Subject: [PATCH 493/770] KVM/x86: Reduce retpoline performance impact in slot_handle_level_range(), by always inlining iterator helper methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 928a4c39484281f8ca366f53a1db79330d058401 upstream. With retpoline, tight loops of "call this function for every XXX" are very much pessimised by taking a prediction miss *every* time. This one is by far the biggest contributor to the guest launch time with retpoline. By marking the iterator slot_handle_…() functions always_inline, we can ensure that the indirect function call can be optimised away into a direct call and it actually generates slightly smaller code because some of the other conditionals can get optimised away too. Performance is now pretty close to what we see with nospectre_v2 on the command line. Suggested-by: Linus Torvalds Tested-by: Filippo Sironi Signed-off-by: David Woodhouse Reviewed-by: Filippo Sironi Acked-by: Paolo Bonzini Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arjan.van.de.ven@intel.com Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: rkrcmar@redhat.com Link: http://lkml.kernel.org/r/1518305967-31356-4-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index beb7f8795bc1..ca000fc644bc 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5063,7 +5063,7 @@ void kvm_mmu_uninit_vm(struct kvm *kvm) typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); /* The caller should hold mmu-lock before calling this function. */ -static bool +static __always_inline bool slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) @@ -5093,7 +5093,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, return flush; } -static bool +static __always_inline bool slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, int start_level, int end_level, bool lock_flush_tlb) @@ -5104,7 +5104,7 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, lock_flush_tlb); } -static bool +static __always_inline bool slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { @@ -5112,7 +5112,7 @@ slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); } -static bool +static __always_inline bool slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { @@ -5120,7 +5120,7 @@ slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); } -static bool +static __always_inline bool slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { From 07a3a99ed7f99d207328fae0da73c83be0799722 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Sat, 10 Feb 2018 23:39:25 +0000 Subject: [PATCH 494/770] X86/nVMX: Properly set spec_ctrl and pred_cmd before merging MSRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 206587a9fb764d71f035dc7f6d3b6488f5d5b304 upstream. These two variables should check whether SPEC_CTRL and PRED_CMD are supposed to be passed through to L2 guests or not. While msr_write_intercepted_l01 would return 'true' if it is not passed through. So just invert the result of msr_write_intercepted_l01 to implement the correct semantics. Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Reviewed-by: Jim Mattson Acked-by: Paolo Bonzini Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Thomas Gleixner Cc: arjan.van.de.ven@intel.com Cc: dave.hansen@intel.com Cc: kvm@vger.kernel.org Cc: sironi@amazon.de Fixes: 086e7d4118cc ("KVM: VMX: Allow direct access to MSR_IA32_SPEC_CTRL") Link: http://lkml.kernel.org/r/1518305967-31356-5-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0ea909ca45c2..9d03bca5cd1c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10216,8 +10216,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, * updated to reflect this when L1 (or its L2s) actually write to * the MSR. */ - bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); - bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); + bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); + bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && !pred_cmd && !spec_ctrl) From a15bdf6579b23f62bb0130941158dd2f9b94f079 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Sat, 10 Feb 2018 23:39:26 +0000 Subject: [PATCH 495/770] KVM/nVMX: Set the CPU_BASED_USE_MSR_BITMAPS if we have a valid L02 MSR bitmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3712caeb14dcb33fb4d5114f14c0beef10aca101 upstream. We either clear the CPU_BASED_USE_MSR_BITMAPS and end up intercepting all MSR accesses or create a valid L02 MSR bitmap and use that. This decision has to be made every time we evaluate whether we are going to generate the L02 MSR bitmap. Before commit: d28b387fb74d ("KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL") ... this was probably OK since the decision was always identical. This is no longer the case now since the MSR bitmap might actually change once we decide to not intercept SPEC_CTRL and PRED_CMD. Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Acked-by: Paolo Bonzini Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Thomas Gleixner Cc: arjan.van.de.ven@intel.com Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: kvm@vger.kernel.org Cc: sironi@amazon.de Link: http://lkml.kernel.org/r/1518305967-31356-6-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9d03bca5cd1c..dd35c6c50516 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10127,7 +10127,8 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, if (cpu_has_vmx_msr_bitmap() && nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS) && nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) - ; + vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, + CPU_BASED_USE_MSR_BITMAPS); else vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_USE_MSR_BITMAPS); From 2efd067aa4ef25386d4abbe2fe9b2c7f22cd9302 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 Feb 2018 09:03:08 +0100 Subject: [PATCH 496/770] x86/speculation: Clean up various Spectre related details commit 21e433bdb95bdf3aa48226fd3d33af608437f293 upstream. Harmonize all the Spectre messages so that a: dmesg | grep -i spectre ... gives us most Spectre related kernel boot messages. Also fix a few other details: - clarify a comment about firmware speculation control - s/KPTI/PTI - remove various line-breaks that made the code uglier Acked-by: David Woodhouse Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 61152aa53377..4acf16a76d1e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -162,8 +162,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; else { - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, - sizeof(arg)); + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); if (ret < 0) return SPECTRE_V2_CMD_AUTO; @@ -175,8 +174,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) } if (i >= ARRAY_SIZE(mitigation_options)) { - pr_err("unknown option (%s). Switching to AUTO select\n", - mitigation_options[i].option); + pr_err("unknown option (%s). Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } } @@ -185,8 +183,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && !IS_ENABLED(CONFIG_RETPOLINE)) { - pr_err("%s selected but not compiled in. Switching to AUTO select\n", - mitigation_options[i].option); + pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -256,14 +253,14 @@ static void __init spectre_v2_select_mitigation(void) goto retpoline_auto; break; } - pr_err("kernel not compiled with retpoline; no mitigation available!"); + pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!"); return; retpoline_auto: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { retpoline_amd: if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { - pr_err("LFENCE not serializing. Switching to generic retpoline\n"); + pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); goto retpoline_generic; } mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : @@ -281,7 +278,7 @@ retpoline_auto: pr_info("%s\n", spectre_v2_strings[mode]); /* - * If neither SMEP or KPTI are available, there is a risk of + * If neither SMEP nor PTI are available, there is a risk of * hitting userspace addresses in the RSB after a context switch * from a shallow call stack to a deeper one. To prevent this fill * the entire RSB, even when using IBRS. @@ -295,21 +292,20 @@ retpoline_auto: if ((!boot_cpu_has(X86_FEATURE_PTI) && !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); - pr_info("Filling RSB on context switch\n"); + pr_info("Spectre v2 mitigation: Filling RSB on context switch\n"); } /* Initialize Indirect Branch Prediction Barrier if supported */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Enabling Indirect Branch Prediction Barrier\n"); + pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); } } #undef pr_fmt #ifdef CONFIG_SYSFS -ssize_t cpu_show_meltdown(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) return sprintf(buf, "Not affected\n"); @@ -318,16 +314,14 @@ ssize_t cpu_show_meltdown(struct device *dev, return sprintf(buf, "Vulnerable\n"); } -ssize_t cpu_show_spectre_v1(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) return sprintf(buf, "Not affected\n"); return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } -ssize_t cpu_show_spectre_v2(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); From c59cce7943dff0fbafcaa4e71149ee1da879d03f Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 10 Feb 2018 19:13:58 +0100 Subject: [PATCH 497/770] PM / runtime: Update links_count also if !CONFIG_SRCU commit 433986c2c265d106d6a8e88006e0131fefc92b7b upstream. Commit baa8809f6097 (PM / runtime: Optimize the use of device links) added an invocation of pm_runtime_drop_link() to __device_link_del(). However there are two variants of that function, one for CONFIG_SRCU and another for !CONFIG_SRCU, and the commit only modified the former. Fixes: baa8809f6097 (PM / runtime: Optimize the use of device links) Cc: v4.10+ # v4.10+ Signed-off-by: Lukas Wunner Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index 12ebd055724c..c8501cdb95f4 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -313,6 +313,9 @@ static void __device_link_del(struct device_link *link) dev_info(link->consumer, "Dropping the link to %s\n", dev_name(link->supplier)); + if (link->flags & DL_FLAG_PM_RUNTIME) + pm_runtime_drop_link(link->consumer); + list_del(&link->s_node); list_del(&link->c_node); device_link_free(link); From e578fedba81e72e195fe71f893828d4597714db5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 12 Feb 2018 11:34:22 +0100 Subject: [PATCH 498/770] PM: cpuidle: Fix cpuidle_poll_state_init() prototype commit d7212cfb05ba802bea4dd6c90d61cfe6366ea224 upstream. Commit f85942207516 (x86: PM: Make APM idle driver initialize polling state) made apm_init() call cpuidle_poll_state_init(), but that only is defined for CONFIG_CPU_IDLE set, so make the empty stub of it available for CONFIG_CPU_IDLE unset too to fix the resulting build issue. Fixes: f85942207516 (x86: PM: Make APM idle driver initialize polling state) Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- include/linux/cpuidle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 8f7788d23b57..a6989e02d0a0 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -225,7 +225,7 @@ static inline void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, } #endif -#ifdef CONFIG_ARCH_HAS_CPU_RELAX +#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_ARCH_HAS_CPU_RELAX) void cpuidle_poll_state_init(struct cpuidle_driver *drv); #else static inline void cpuidle_poll_state_init(struct cpuidle_driver *drv) {} From d10d0bb86d974c41ccbbf1f22044e700597a84d0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Feb 2018 17:18:11 -0800 Subject: [PATCH 499/770] x86/entry/64: Clear registers for exceptions/interrupts, to reduce speculation attack surface commit 3ac6d8c787b835b997eb23e43e09aa0895ef7d58 upstream. Clear the 'extra' registers on entering the 64-bit kernel for exceptions and interrupts. The common registers are not cleared since they are likely clobbered well before they can be exploited in a speculative execution attack. Originally-From: Andi Kleen Signed-off-by: Dan Williams Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/151787989146.7847.15749181712358213254.stgit@dwillia2-desk3.amr.corp.intel.com [ Made small improvements to the changelog and the code comments. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/calling.h | 19 +++++++++++++++++++ arch/x86/entry/entry_64.S | 6 +++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 3f48f695d5e6..f4b129d4af42 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -147,6 +147,25 @@ For 32-bit we have the following conventions - kernel is built with UNWIND_HINT_REGS offset=\offset .endm + /* + * Sanitize registers of values that a speculation attack + * might otherwise want to exploit. The lower registers are + * likely clobbered well before they could be put to use in + * a speculative execution gadget: + */ + .macro CLEAR_REGS_NOSPEC + xorl %ebp, %ebp + xorl %ebx, %ebx + xorq %r8, %r8 + xorq %r9, %r9 + xorq %r10, %r10 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + xorq %r14, %r14 + xorq %r15, %r15 + .endm + .macro POP_EXTRA_REGS popq %r15 popq %r14 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 62b381daadd2..33737a709538 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -571,6 +571,7 @@ END(irq_entries_start) ALLOC_PT_GPREGS_ON_STACK SAVE_C_REGS SAVE_EXTRA_REGS + CLEAR_REGS_NOSPEC ENCODE_FRAME_POINTER testb $3, CS(%rsp) @@ -1127,6 +1128,7 @@ ENTRY(xen_failsafe_callback) ALLOC_PT_GPREGS_ON_STACK SAVE_C_REGS SAVE_EXTRA_REGS + CLEAR_REGS_NOSPEC ENCODE_FRAME_POINTER jmp error_exit END(xen_failsafe_callback) @@ -1172,6 +1174,7 @@ ENTRY(paranoid_entry) cld SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 + CLEAR_REGS_NOSPEC ENCODE_FRAME_POINTER 8 movl $1, %ebx movl $MSR_GS_BASE, %ecx @@ -1224,8 +1227,8 @@ ENTRY(error_entry) cld SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 + CLEAR_REGS_NOSPEC ENCODE_FRAME_POINTER 8 - xorl %ebx, %ebx testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -1422,6 +1425,7 @@ ENTRY(nmi) pushq %r14 /* pt_regs->r14 */ pushq %r15 /* pt_regs->r15 */ UNWIND_HINT_REGS + CLEAR_REGS_NOSPEC ENCODE_FRAME_POINTER /* From 51209eec238ffd1b4edec999a024159bbae51a3d Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 11:49:42 +0100 Subject: [PATCH 500/770] x86/entry/64: Merge SAVE_C_REGS and SAVE_EXTRA_REGS, remove unused extensions commit 2e3f0098bc45f710a2f4cbcc94b80a1fae7a99a1 upstream. All current code paths call SAVE_C_REGS and then immediately SAVE_EXTRA_REGS. Therefore, merge these two macros and order the MOV sequeneces properly. While at it, remove the macros to save all except specific registers, as these macros have been unused for a long time. Suggested-by: Linus Torvalds Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180211104949.12992-2-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/calling.h | 57 +++++++++++---------------------------- arch/x86/entry/entry_64.S | 12 +++------ 2 files changed, 19 insertions(+), 50 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index f4b129d4af42..8907a6593b42 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -101,49 +101,22 @@ For 32-bit we have the following conventions - kernel is built with addq $-(15*8), %rsp .endm - .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 - .if \r11 - movq %r11, 6*8+\offset(%rsp) - .endif - .if \r8910 - movq %r10, 7*8+\offset(%rsp) - movq %r9, 8*8+\offset(%rsp) - movq %r8, 9*8+\offset(%rsp) - .endif - .if \rax - movq %rax, 10*8+\offset(%rsp) - .endif - .if \rcx - movq %rcx, 11*8+\offset(%rsp) - .endif - movq %rdx, 12*8+\offset(%rsp) - movq %rsi, 13*8+\offset(%rsp) + .macro SAVE_REGS offset=0 movq %rdi, 14*8+\offset(%rsp) - UNWIND_HINT_REGS offset=\offset extra=0 - .endm - .macro SAVE_C_REGS offset=0 - SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 - .endm - .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0 - SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1 - .endm - .macro SAVE_C_REGS_EXCEPT_R891011 - SAVE_C_REGS_HELPER 0, 1, 1, 0, 0 - .endm - .macro SAVE_C_REGS_EXCEPT_RCX_R891011 - SAVE_C_REGS_HELPER 0, 1, 0, 0, 0 - .endm - .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11 - SAVE_C_REGS_HELPER 0, 0, 0, 1, 0 - .endm - - .macro SAVE_EXTRA_REGS offset=0 - movq %r15, 0*8+\offset(%rsp) - movq %r14, 1*8+\offset(%rsp) - movq %r13, 2*8+\offset(%rsp) - movq %r12, 3*8+\offset(%rsp) - movq %rbp, 4*8+\offset(%rsp) + movq %rsi, 13*8+\offset(%rsp) + movq %rdx, 12*8+\offset(%rsp) + movq %rcx, 11*8+\offset(%rsp) + movq %rax, 10*8+\offset(%rsp) + movq %r8, 9*8+\offset(%rsp) + movq %r9, 8*8+\offset(%rsp) + movq %r10, 7*8+\offset(%rsp) + movq %r11, 6*8+\offset(%rsp) movq %rbx, 5*8+\offset(%rsp) + movq %rbp, 4*8+\offset(%rsp) + movq %r12, 3*8+\offset(%rsp) + movq %r13, 2*8+\offset(%rsp) + movq %r14, 1*8+\offset(%rsp) + movq %r15, 0*8+\offset(%rsp) UNWIND_HINT_REGS offset=\offset .endm @@ -197,7 +170,7 @@ For 32-bit we have the following conventions - kernel is built with * is just setting the LSB, which makes it an invalid stack address and is also * a signal to the unwinder that it's a pt_regs pointer in disguise. * - * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts + * NOTE: This macro must be used *after* SAVE_REGS because it corrupts * the original rbp. */ .macro ENCODE_FRAME_POINTER ptregs_offset=0 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 33737a709538..acd912cbd2e5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -569,8 +569,7 @@ END(irq_entries_start) 1: ALLOC_PT_GPREGS_ON_STACK - SAVE_C_REGS - SAVE_EXTRA_REGS + SAVE_REGS CLEAR_REGS_NOSPEC ENCODE_FRAME_POINTER @@ -1126,8 +1125,7 @@ ENTRY(xen_failsafe_callback) UNWIND_HINT_IRET_REGS pushq $-1 /* orig_ax = -1 => not a system call */ ALLOC_PT_GPREGS_ON_STACK - SAVE_C_REGS - SAVE_EXTRA_REGS + SAVE_REGS CLEAR_REGS_NOSPEC ENCODE_FRAME_POINTER jmp error_exit @@ -1172,8 +1170,7 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1 ENTRY(paranoid_entry) UNWIND_HINT_FUNC cld - SAVE_C_REGS 8 - SAVE_EXTRA_REGS 8 + SAVE_REGS 8 CLEAR_REGS_NOSPEC ENCODE_FRAME_POINTER 8 movl $1, %ebx @@ -1225,8 +1222,7 @@ END(paranoid_exit) ENTRY(error_entry) UNWIND_HINT_FUNC cld - SAVE_C_REGS 8 - SAVE_EXTRA_REGS 8 + SAVE_REGS 8 CLEAR_REGS_NOSPEC ENCODE_FRAME_POINTER 8 testb $3, CS+8(%rsp) From bb68735527648cc3439bcacab5e6a30ab8219c46 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 11:49:43 +0100 Subject: [PATCH 501/770] x86/entry/64: Merge the POP_C_REGS and POP_EXTRA_REGS macros into a single POP_REGS macro commit 502af0d70843c2a9405d7ba1f79b4b0305aaf5f5 upstream. The two special, opencoded cases for POP_C_REGS can be handled by ASM macros. Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180211104949.12992-3-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/calling.h | 15 +++++++++++---- arch/x86/entry/entry_64.S | 26 ++++---------------------- 2 files changed, 15 insertions(+), 26 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 8907a6593b42..3bda31736a7b 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -139,25 +139,32 @@ For 32-bit we have the following conventions - kernel is built with xorq %r15, %r15 .endm - .macro POP_EXTRA_REGS + .macro POP_REGS pop_rdi=1 skip_r11rcx=0 popq %r15 popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx - .endm - - .macro POP_C_REGS + .if \skip_r11rcx + popq %rsi + .else popq %r11 + .endif popq %r10 popq %r9 popq %r8 popq %rax + .if \skip_r11rcx + popq %rsi + .else popq %rcx + .endif popq %rdx popq %rsi + .if \pop_rdi popq %rdi + .endif .endm .macro icebp diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index acd912cbd2e5..f4d72b81d9c3 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -330,15 +330,7 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ UNWIND_HINT_EMPTY - POP_EXTRA_REGS - popq %rsi /* skip r11 */ - popq %r10 - popq %r9 - popq %r8 - popq %rax - popq %rsi /* skip rcx */ - popq %rdx - popq %rsi + POP_REGS pop_rdi=0 skip_r11rcx=1 /* * Now all regs are restored except RSP and RDI. @@ -631,15 +623,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) ud2 1: #endif - POP_EXTRA_REGS - popq %r11 - popq %r10 - popq %r9 - popq %r8 - popq %rax - popq %rcx - popq %rdx - popq %rsi + POP_REGS pop_rdi=0 /* * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS. @@ -697,8 +681,7 @@ GLOBAL(restore_regs_and_return_to_kernel) ud2 1: #endif - POP_EXTRA_REGS - POP_C_REGS + POP_REGS addq $8, %rsp /* skip regs->orig_ax */ INTERRUPT_RETURN @@ -1655,8 +1638,7 @@ end_repeat_nmi: nmi_swapgs: SWAPGS_UNSAFE_STACK nmi_restore: - POP_EXTRA_REGS - POP_C_REGS + POP_REGS /* * Skip orig_ax and the "outermost" frame to point RSP at the "iret" From 47d9c905ae7af1c16b58a41be627a74027f5b601 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 11:49:44 +0100 Subject: [PATCH 502/770] x86/entry/64: Interleave XOR register clearing with PUSH instructions commit f7bafa2b05ef25eda1d9179fd930b0330cf2b7d1 upstream. Same as is done for syscalls, interleave XOR with PUSH instructions for exceptions/interrupts, in order to minimize the cost of the additional instructions required for register clearing. Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180211104949.12992-4-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/calling.h | 40 +++++++++++++++++++-------------------- arch/x86/entry/entry_64.S | 30 ++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 3bda31736a7b..a05cbb81268d 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -101,44 +101,42 @@ For 32-bit we have the following conventions - kernel is built with addq $-(15*8), %rsp .endm - .macro SAVE_REGS offset=0 + .macro SAVE_AND_CLEAR_REGS offset=0 + /* + * Save registers and sanitize registers of values that a + * speculation attack might otherwise want to exploit. The + * lower registers are likely clobbered well before they + * could be put to use in a speculative execution gadget. + * Interleave XOR with PUSH for better uop scheduling: + */ movq %rdi, 14*8+\offset(%rsp) movq %rsi, 13*8+\offset(%rsp) movq %rdx, 12*8+\offset(%rsp) movq %rcx, 11*8+\offset(%rsp) movq %rax, 10*8+\offset(%rsp) movq %r8, 9*8+\offset(%rsp) + xorq %r8, %r8 /* nospec r8 */ movq %r9, 8*8+\offset(%rsp) + xorq %r9, %r9 /* nospec r9 */ movq %r10, 7*8+\offset(%rsp) + xorq %r10, %r10 /* nospec r10 */ movq %r11, 6*8+\offset(%rsp) + xorq %r11, %r11 /* nospec r11 */ movq %rbx, 5*8+\offset(%rsp) + xorl %ebx, %ebx /* nospec rbx */ movq %rbp, 4*8+\offset(%rsp) + xorl %ebp, %ebp /* nospec rbp */ movq %r12, 3*8+\offset(%rsp) + xorq %r12, %r12 /* nospec r12 */ movq %r13, 2*8+\offset(%rsp) + xorq %r13, %r13 /* nospec r13 */ movq %r14, 1*8+\offset(%rsp) + xorq %r14, %r14 /* nospec r14 */ movq %r15, 0*8+\offset(%rsp) + xorq %r15, %r15 /* nospec r15 */ UNWIND_HINT_REGS offset=\offset .endm - /* - * Sanitize registers of values that a speculation attack - * might otherwise want to exploit. The lower registers are - * likely clobbered well before they could be put to use in - * a speculative execution gadget: - */ - .macro CLEAR_REGS_NOSPEC - xorl %ebp, %ebp - xorl %ebx, %ebx - xorq %r8, %r8 - xorq %r9, %r9 - xorq %r10, %r10 - xorq %r11, %r11 - xorq %r12, %r12 - xorq %r13, %r13 - xorq %r14, %r14 - xorq %r15, %r15 - .endm - .macro POP_REGS pop_rdi=1 skip_r11rcx=0 popq %r15 popq %r14 @@ -177,7 +175,7 @@ For 32-bit we have the following conventions - kernel is built with * is just setting the LSB, which makes it an invalid stack address and is also * a signal to the unwinder that it's a pt_regs pointer in disguise. * - * NOTE: This macro must be used *after* SAVE_REGS because it corrupts + * NOTE: This macro must be used *after* SAVE_AND_CLEAR_REGS because it corrupts * the original rbp. */ .macro ENCODE_FRAME_POINTER ptregs_offset=0 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f4d72b81d9c3..3a48fa6b6553 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -561,8 +561,7 @@ END(irq_entries_start) 1: ALLOC_PT_GPREGS_ON_STACK - SAVE_REGS - CLEAR_REGS_NOSPEC + SAVE_AND_CLEAR_REGS ENCODE_FRAME_POINTER testb $3, CS(%rsp) @@ -1108,8 +1107,7 @@ ENTRY(xen_failsafe_callback) UNWIND_HINT_IRET_REGS pushq $-1 /* orig_ax = -1 => not a system call */ ALLOC_PT_GPREGS_ON_STACK - SAVE_REGS - CLEAR_REGS_NOSPEC + SAVE_AND_CLEAR_REGS ENCODE_FRAME_POINTER jmp error_exit END(xen_failsafe_callback) @@ -1153,8 +1151,7 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1 ENTRY(paranoid_entry) UNWIND_HINT_FUNC cld - SAVE_REGS 8 - CLEAR_REGS_NOSPEC + SAVE_AND_CLEAR_REGS 8 ENCODE_FRAME_POINTER 8 movl $1, %ebx movl $MSR_GS_BASE, %ecx @@ -1205,8 +1202,7 @@ END(paranoid_exit) ENTRY(error_entry) UNWIND_HINT_FUNC cld - SAVE_REGS 8 - CLEAR_REGS_NOSPEC + SAVE_AND_CLEAR_REGS 8 ENCODE_FRAME_POINTER 8 testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -1393,18 +1389,34 @@ ENTRY(nmi) pushq (%rdx) /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq %rax /* pt_regs->ax */ + /* + * Sanitize registers of values that a speculation attack + * might otherwise want to exploit. The lower registers are + * likely clobbered well before they could be put to use in + * a speculative execution gadget. Interleave XOR with PUSH + * for better uop scheduling: + */ pushq %r8 /* pt_regs->r8 */ + xorq %r8, %r8 /* nospec r8 */ pushq %r9 /* pt_regs->r9 */ + xorq %r9, %r9 /* nospec r9 */ pushq %r10 /* pt_regs->r10 */ + xorq %r10, %r10 /* nospec r10 */ pushq %r11 /* pt_regs->r11 */ + xorq %r11, %r11 /* nospec r11*/ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx*/ pushq %rbp /* pt_regs->rbp */ + xorl %ebp, %ebp /* nospec rbp*/ pushq %r12 /* pt_regs->r12 */ + xorq %r12, %r12 /* nospec r12*/ pushq %r13 /* pt_regs->r13 */ + xorq %r13, %r13 /* nospec r13*/ pushq %r14 /* pt_regs->r14 */ + xorq %r14, %r14 /* nospec r14*/ pushq %r15 /* pt_regs->r15 */ + xorq %r15, %r15 /* nospec r15*/ UNWIND_HINT_REGS - CLEAR_REGS_NOSPEC ENCODE_FRAME_POINTER /* From 3ce4863a44ff4e7e8d48d480853eea79cfd27bf6 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 11:49:45 +0100 Subject: [PATCH 503/770] x86/entry/64: Introduce the PUSH_AND_CLEAN_REGS macro commit 3f01daecd545e818098d84fd1ad43e19a508d705 upstream. Those instances where ALLOC_PT_GPREGS_ON_STACK is called just before SAVE_AND_CLEAR_REGS can trivially be replaced by PUSH_AND_CLEAN_REGS. This macro uses PUSH instead of MOV and should therefore be faster, at least on newer CPUs. Suggested-by: Linus Torvalds Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180211104949.12992-5-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/calling.h | 36 ++++++++++++++++++++++++++++++++++++ arch/x86/entry/entry_64.S | 6 ++---- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index a05cbb81268d..57b1b87a04f0 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -137,6 +137,42 @@ For 32-bit we have the following conventions - kernel is built with UNWIND_HINT_REGS offset=\offset .endm + .macro PUSH_AND_CLEAR_REGS + /* + * Push registers and sanitize registers of values that a + * speculation attack might otherwise want to exploit. The + * lower registers are likely clobbered well before they + * could be put to use in a speculative execution gadget. + * Interleave XOR with PUSH for better uop scheduling: + */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq %rdx /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq %rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + xorq %r8, %r8 /* nospec r8 */ + pushq %r9 /* pt_regs->r9 */ + xorq %r9, %r9 /* nospec r9 */ + pushq %r10 /* pt_regs->r10 */ + xorq %r10, %r10 /* nospec r10 */ + pushq %r11 /* pt_regs->r11 */ + xorq %r11, %r11 /* nospec r11*/ + pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx*/ + pushq %rbp /* pt_regs->rbp */ + xorl %ebp, %ebp /* nospec rbp*/ + pushq %r12 /* pt_regs->r12 */ + xorq %r12, %r12 /* nospec r12*/ + pushq %r13 /* pt_regs->r13 */ + xorq %r13, %r13 /* nospec r13*/ + pushq %r14 /* pt_regs->r14 */ + xorq %r14, %r14 /* nospec r14*/ + pushq %r15 /* pt_regs->r15 */ + xorq %r15, %r15 /* nospec r15*/ + UNWIND_HINT_REGS + .endm + .macro POP_REGS pop_rdi=1 skip_r11rcx=0 popq %r15 popq %r14 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3a48fa6b6553..cf8119c46c2a 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -560,8 +560,7 @@ END(irq_entries_start) call switch_to_thread_stack 1: - ALLOC_PT_GPREGS_ON_STACK - SAVE_AND_CLEAR_REGS + PUSH_AND_CLEAR_REGS ENCODE_FRAME_POINTER testb $3, CS(%rsp) @@ -1106,8 +1105,7 @@ ENTRY(xen_failsafe_callback) addq $0x30, %rsp UNWIND_HINT_IRET_REGS pushq $-1 /* orig_ax = -1 => not a system call */ - ALLOC_PT_GPREGS_ON_STACK - SAVE_AND_CLEAR_REGS + PUSH_AND_CLEAR_REGS ENCODE_FRAME_POINTER jmp error_exit END(xen_failsafe_callback) From bd25388691c041903c084c68dc7da0fc45265d98 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 11:49:46 +0100 Subject: [PATCH 504/770] x86/entry/64: Use PUSH_AND_CLEAN_REGS in more cases commit 30907fd13bb593202574bb20af58d67c70a1ee14 upstream. entry_SYSCALL_64_after_hwframe() and nmi() can be converted to use PUSH_AND_CLEAN_REGS instead of opencoded variants thereof. Due to the interleaving, the additional XOR-based clearing of R8 and R9 in entry_SYSCALL_64_after_hwframe() should not have any noticeable negative implications. Suggested-by: Linus Torvalds Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180211104949.12992-6-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/calling.h | 6 ++-- arch/x86/entry/entry_64.S | 65 ++------------------------------------- 2 files changed, 6 insertions(+), 65 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 57b1b87a04f0..d6a97e2945ee 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -137,7 +137,7 @@ For 32-bit we have the following conventions - kernel is built with UNWIND_HINT_REGS offset=\offset .endm - .macro PUSH_AND_CLEAR_REGS + .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax /* * Push registers and sanitize registers of values that a * speculation attack might otherwise want to exploit. The @@ -147,9 +147,9 @@ For 32-bit we have the following conventions - kernel is built with */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ - pushq %rdx /* pt_regs->dx */ + pushq \rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ - pushq %rax /* pt_regs->ax */ + pushq \rax /* pt_regs->ax */ pushq %r8 /* pt_regs->r8 */ xorq %r8, %r8 /* nospec r8 */ pushq %r9 /* pt_regs->r9 */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index cf8119c46c2a..02c9b54203b6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -223,35 +223,8 @@ ENTRY(entry_SYSCALL_64) pushq %rcx /* pt_regs->ip */ GLOBAL(entry_SYSCALL_64_after_hwframe) pushq %rax /* pt_regs->orig_ax */ - pushq %rdi /* pt_regs->di */ - pushq %rsi /* pt_regs->si */ - pushq %rdx /* pt_regs->dx */ - pushq %rcx /* pt_regs->cx */ - pushq $-ENOSYS /* pt_regs->ax */ - pushq %r8 /* pt_regs->r8 */ - pushq %r9 /* pt_regs->r9 */ - pushq %r10 /* pt_regs->r10 */ - /* - * Clear extra registers that a speculation attack might - * otherwise want to exploit. Interleave XOR with PUSH - * for better uop scheduling: - */ - xorq %r10, %r10 /* nospec r10 */ - pushq %r11 /* pt_regs->r11 */ - xorq %r11, %r11 /* nospec r11 */ - pushq %rbx /* pt_regs->rbx */ - xorl %ebx, %ebx /* nospec rbx */ - pushq %rbp /* pt_regs->rbp */ - xorl %ebp, %ebp /* nospec rbp */ - pushq %r12 /* pt_regs->r12 */ - xorq %r12, %r12 /* nospec r12 */ - pushq %r13 /* pt_regs->r13 */ - xorq %r13, %r13 /* nospec r13 */ - pushq %r14 /* pt_regs->r14 */ - xorq %r14, %r14 /* nospec r14 */ - pushq %r15 /* pt_regs->r15 */ - xorq %r15, %r15 /* nospec r15 */ - UNWIND_HINT_REGS + + PUSH_AND_CLEAR_REGS rax=$-ENOSYS TRACE_IRQS_OFF @@ -1382,39 +1355,7 @@ ENTRY(nmi) pushq 1*8(%rdx) /* pt_regs->rip */ UNWIND_HINT_IRET_REGS pushq $-1 /* pt_regs->orig_ax */ - pushq %rdi /* pt_regs->di */ - pushq %rsi /* pt_regs->si */ - pushq (%rdx) /* pt_regs->dx */ - pushq %rcx /* pt_regs->cx */ - pushq %rax /* pt_regs->ax */ - /* - * Sanitize registers of values that a speculation attack - * might otherwise want to exploit. The lower registers are - * likely clobbered well before they could be put to use in - * a speculative execution gadget. Interleave XOR with PUSH - * for better uop scheduling: - */ - pushq %r8 /* pt_regs->r8 */ - xorq %r8, %r8 /* nospec r8 */ - pushq %r9 /* pt_regs->r9 */ - xorq %r9, %r9 /* nospec r9 */ - pushq %r10 /* pt_regs->r10 */ - xorq %r10, %r10 /* nospec r10 */ - pushq %r11 /* pt_regs->r11 */ - xorq %r11, %r11 /* nospec r11*/ - pushq %rbx /* pt_regs->rbx */ - xorl %ebx, %ebx /* nospec rbx*/ - pushq %rbp /* pt_regs->rbp */ - xorl %ebp, %ebp /* nospec rbp*/ - pushq %r12 /* pt_regs->r12 */ - xorq %r12, %r12 /* nospec r12*/ - pushq %r13 /* pt_regs->r13 */ - xorq %r13, %r13 /* nospec r13*/ - pushq %r14 /* pt_regs->r14 */ - xorq %r14, %r14 /* nospec r14*/ - pushq %r15 /* pt_regs->r15 */ - xorq %r15, %r15 /* nospec r15*/ - UNWIND_HINT_REGS + PUSH_AND_CLEAR_REGS rdx=(%rdx) ENCODE_FRAME_POINTER /* From de66c3a3b0a6403702d6d6ead20ccd38bae615d7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 11:49:47 +0100 Subject: [PATCH 505/770] x86/entry/64: Get rid of the ALLOC_PT_GPREGS_ON_STACK and SAVE_AND_CLEAR_REGS macros commit dde3036d62ba3375840b10ab9ec0d568fd773b07 upstream. Previously, error_entry() and paranoid_entry() saved the GP registers onto stack space previously allocated by its callers. Combine these two steps in the callers, and use the generic PUSH_AND_CLEAR_REGS macro for that. This adds a significant amount ot text size. However, Ingo Molnar points out that: "these numbers also _very_ significantly over-represent the extra footprint. The assumptions that resulted in us compressing the IRQ entry code have changed very significantly with the new x86 IRQ allocation code we introduced in the last year: - IRQ vectors are usually populated in tightly clustered groups. With our new vector allocator code the typical per CPU allocation percentage on x86 systems is ~3 device vectors and ~10 fixed vectors out of ~220 vectors - i.e. a very low ~6% utilization (!). [...] The days where we allocated a lot of vectors on every CPU and the compression of the IRQ entry code text mattered are over. - Another issue is that only a small minority of vectors is frequent enough to actually matter to cache utilization in practice: 3-4 key IPIs and 1-2 device IRQs at most - and those vectors tend to be tightly clustered as well into about two groups, and are probably already on 2-3 cache lines in practice. For the common case of 'cache cold' IRQs it's the depth of the call chain and the fragmentation of the resulting I$ that should be the main performance limit - not the overall size of it. - The CPU side cost of IRQ delivery is still very expensive even in the best, most cached case, as in 'over a thousand cycles'. So much stuff is done that maybe contemporary x86 IRQ entry microcode already prefetches the IDT entry and its expected call target address."[*] [*] http://lkml.kernel.org/r/20180208094710.qnjixhm6hybebdv7@gmail.com The "testb $3, CS(%rsp)" instruction in the idtentry macro does not need modification. Previously, %rsp was manually decreased by 15*8; with this patch, %rsp is decreased by 15 pushq instructions. [jpoimboe@redhat.com: unwind hint improvements] Suggested-by: Linus Torvalds Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180211104949.12992-7-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/calling.h | 42 +-------------------------------------- arch/x86/entry/entry_64.S | 20 +++++++++---------- 2 files changed, 10 insertions(+), 52 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index d6a97e2945ee..59675010c9a0 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -97,46 +97,6 @@ For 32-bit we have the following conventions - kernel is built with #define SIZEOF_PTREGS 21*8 - .macro ALLOC_PT_GPREGS_ON_STACK - addq $-(15*8), %rsp - .endm - - .macro SAVE_AND_CLEAR_REGS offset=0 - /* - * Save registers and sanitize registers of values that a - * speculation attack might otherwise want to exploit. The - * lower registers are likely clobbered well before they - * could be put to use in a speculative execution gadget. - * Interleave XOR with PUSH for better uop scheduling: - */ - movq %rdi, 14*8+\offset(%rsp) - movq %rsi, 13*8+\offset(%rsp) - movq %rdx, 12*8+\offset(%rsp) - movq %rcx, 11*8+\offset(%rsp) - movq %rax, 10*8+\offset(%rsp) - movq %r8, 9*8+\offset(%rsp) - xorq %r8, %r8 /* nospec r8 */ - movq %r9, 8*8+\offset(%rsp) - xorq %r9, %r9 /* nospec r9 */ - movq %r10, 7*8+\offset(%rsp) - xorq %r10, %r10 /* nospec r10 */ - movq %r11, 6*8+\offset(%rsp) - xorq %r11, %r11 /* nospec r11 */ - movq %rbx, 5*8+\offset(%rsp) - xorl %ebx, %ebx /* nospec rbx */ - movq %rbp, 4*8+\offset(%rsp) - xorl %ebp, %ebp /* nospec rbp */ - movq %r12, 3*8+\offset(%rsp) - xorq %r12, %r12 /* nospec r12 */ - movq %r13, 2*8+\offset(%rsp) - xorq %r13, %r13 /* nospec r13 */ - movq %r14, 1*8+\offset(%rsp) - xorq %r14, %r14 /* nospec r14 */ - movq %r15, 0*8+\offset(%rsp) - xorq %r15, %r15 /* nospec r15 */ - UNWIND_HINT_REGS offset=\offset - .endm - .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax /* * Push registers and sanitize registers of values that a @@ -211,7 +171,7 @@ For 32-bit we have the following conventions - kernel is built with * is just setting the LSB, which makes it an invalid stack address and is also * a signal to the unwinder that it's a pt_regs pointer in disguise. * - * NOTE: This macro must be used *after* SAVE_AND_CLEAR_REGS because it corrupts + * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts * the original rbp. */ .macro ENCODE_FRAME_POINTER ptregs_offset=0 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 02c9b54203b6..8a3280bf79a1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -867,7 +867,9 @@ ENTRY(\sym) pushq $-1 /* ORIG_RAX: no syscall to restart */ .endif - ALLOC_PT_GPREGS_ON_STACK + /* Save all registers in pt_regs */ + PUSH_AND_CLEAR_REGS + ENCODE_FRAME_POINTER .if \paranoid < 2 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ @@ -1115,15 +1117,12 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1 #endif /* - * Save all registers in pt_regs, and switch gs if needed. + * Switch gs if needed. * Use slow, but surefire "are we in kernel?" check. * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ ENTRY(paranoid_entry) - UNWIND_HINT_FUNC cld - SAVE_AND_CLEAR_REGS 8 - ENCODE_FRAME_POINTER 8 movl $1, %ebx movl $MSR_GS_BASE, %ecx rdmsr @@ -1136,7 +1135,7 @@ ENTRY(paranoid_entry) SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 ret -END(paranoid_entry) +ENDPROC(paranoid_entry) /* * "Paranoid" exit path from exception stack. This is invoked @@ -1167,14 +1166,12 @@ ENTRY(paranoid_exit) END(paranoid_exit) /* - * Save all registers in pt_regs, and switch gs if needed. + * Switch gs if needed. * Return: EBX=0: came from user mode; EBX=1: otherwise */ ENTRY(error_entry) - UNWIND_HINT_FUNC + UNWIND_HINT_REGS offset=8 cld - SAVE_AND_CLEAR_REGS 8 - ENCODE_FRAME_POINTER 8 testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -1565,7 +1562,8 @@ end_repeat_nmi: * frame to point back to repeat_nmi. */ pushq $-1 /* ORIG_RAX: no syscall to restart */ - ALLOC_PT_GPREGS_ON_STACK + PUSH_AND_CLEAR_REGS + ENCODE_FRAME_POINTER /* * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit From a816dd2fa2dcbde206bc67ebbdf8bbf75a54936c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 11:49:48 +0100 Subject: [PATCH 506/770] x86/entry/64: Indent PUSH_AND_CLEAR_REGS and POP_REGS properly commit 92816f571af81e9a71cc6f3dc8ce1e2fcdf7b6b8 upstream. ... same as the other macros in arch/x86/entry/calling.h Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180211104949.12992-8-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/calling.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 59675010c9a0..6985440c68fa 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with #define SIZEOF_PTREGS 21*8 - .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax /* * Push registers and sanitize registers of values that a * speculation attack might otherwise want to exploit. The @@ -131,9 +131,9 @@ For 32-bit we have the following conventions - kernel is built with pushq %r15 /* pt_regs->r15 */ xorq %r15, %r15 /* nospec r15*/ UNWIND_HINT_REGS - .endm +.endm - .macro POP_REGS pop_rdi=1 skip_r11rcx=0 +.macro POP_REGS pop_rdi=1 skip_r11rcx=0 popq %r15 popq %r14 popq %r13 @@ -163,7 +163,7 @@ For 32-bit we have the following conventions - kernel is built with .macro icebp .byte 0xf1 - .endm +.endm /* * This is a sneaky trick to help the unwinder find pt_regs on the stack. The From bdcf05c64a64743f512f12b17d4f9d8b14d2ad31 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 12 Feb 2018 11:45:03 -0600 Subject: [PATCH 507/770] x86/entry/64: Fix paranoid_entry() frame pointer warning commit b3ccefaed922529e6a67de7b30af5aa38c76ace9 upstream. With the following commit: f09d160992d1 ("x86/entry/64: Get rid of the ALLOC_PT_GPREGS_ON_STACK and SAVE_AND_CLEAR_REGS macros") ... one of my suggested improvements triggered a frame pointer warning: arch/x86/entry/entry_64.o: warning: objtool: paranoid_entry()+0x11: call without frame pointer save/setup The warning is correct for the build-time code, but it's actually not relevant at runtime because of paravirt patching. The paravirt swapgs call gets replaced with either a SWAPGS instruction or NOPs at runtime. Go back to the previous behavior by removing the ELF function annotation for paranoid_entry() and adding an unwind hint, which effectively silences the warning. Reported-by: kbuild test robot Signed-off-by: Josh Poimboeuf Cc: Dominik Brodowski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kbuild-all@01.org Cc: tipbuild@zytor.com Fixes: f09d160992d1 ("x86/entry/64: Get rid of the ALLOC_PT_GPREGS_ON_STACK and SAVE_AND_CLEAR_REGS macros") Link: http://lkml.kernel.org/r/20180212174503.5acbymg5z6p32snu@treble Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8a3280bf79a1..c5ef704df069 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1122,6 +1122,7 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ ENTRY(paranoid_entry) + UNWIND_HINT_FUNC cld movl $1, %ebx movl $MSR_GS_BASE, %ecx @@ -1135,7 +1136,7 @@ ENTRY(paranoid_entry) SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 ret -ENDPROC(paranoid_entry) +END(paranoid_entry) /* * "Paranoid" exit path from exception stack. This is invoked From 5cf7b883bcbaca7b85002770915e39eb6d510309 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 12 Feb 2018 21:13:18 +0100 Subject: [PATCH 508/770] x86/entry/64: Remove the unused 'icebp' macro commit b498c261107461d5c42140dfddd05df83d8ca078 upstream. That macro was touched around 2.5.8 times, judging by the full history linux repo, but it was unused even then. Get rid of it already. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux@dominikbrodowski.net Link: http://lkml.kernel.org/r/20180212201318.GD14640@pd.tnic Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/calling.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 6985440c68fa..dce7092ab24a 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -159,10 +159,6 @@ For 32-bit we have the following conventions - kernel is built with .if \pop_rdi popq %rdi .endif - .endm - - .macro icebp - .byte 0xf1 .endm /* From 301e6fe0915c8b2eab4da812de4f16c583e5b113 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 12:10:10 +0100 Subject: [PATCH 509/770] selftests/x86: Fix vDSO selftest segfault for vsyscall=none commit 198ee8e17502da2634f7366395db1d77630e0219 upstream. The vDSO selftest tries to execute a vsyscall unconditionally, even if it is not present on the test system (e.g. if booted with vsyscall=none or with CONFIG_LEGACY_VSYSCALL_NONE=y set. Fix this by copying (and tweaking) the vsyscall check from test_vsyscall.c Signed-off-by: Dominik Brodowski Cc: Andrew Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Link: http://lkml.kernel.org/r/20180211111013.16888-3-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/test_vdso.c | 52 +++++++++++++++++++++---- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c index 29973cde06d3..558c8207e7b9 100644 --- a/tools/testing/selftests/x86/test_vdso.c +++ b/tools/testing/selftests/x86/test_vdso.c @@ -28,18 +28,52 @@ int nerrs = 0; -#ifdef __x86_64__ -# define VSYS(x) (x) -#else -# define VSYS(x) 0 -#endif - typedef long (*getcpu_t)(unsigned *, unsigned *, void *); -const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); +getcpu_t vgetcpu; getcpu_t vdso_getcpu; -void fill_function_pointers() +static void *vsyscall_getcpu(void) +{ +#ifdef __x86_64__ + FILE *maps; + char line[128]; + bool found = false; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */ + return NULL; + + while (fgets(line, sizeof(line), maps)) { + char r, x; + void *start, *end; + char name[128]; + if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", + &start, &end, &r, &x, name) != 5) + continue; + + if (strcmp(name, "[vsyscall]")) + continue; + + /* assume entries are OK, as we test vDSO here not vsyscall */ + found = true; + break; + } + + fclose(maps); + + if (!found) { + printf("Warning: failed to find vsyscall getcpu\n"); + return NULL; + } + return (void *) (0xffffffffff600800); +#else + return NULL; +#endif +} + + +static void fill_function_pointers() { void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); @@ -54,6 +88,8 @@ void fill_function_pointers() vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); if (!vdso_getcpu) printf("Warning: failed to find getcpu in vDSO\n"); + + vgetcpu = (getcpu_t) vsyscall_getcpu(); } static long sys_getcpu(unsigned * cpu, unsigned * node, From 3eb95d5187a3df8c0aa1d5f531a6b9f567fcd9a7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 21:59:24 +0100 Subject: [PATCH 510/770] selftests/x86: Clean up and document sscanf() usage commit d8e92de8ef952bed88c56c7a44c02d8dcae0984e upstream. Replace a couple of magically connected buffer length literal constants with a common definition that makes their relationship obvious. Also document why our sscanf() usage is safe. No intended functional changes. Suggested-by: Ingo Molnar Signed-off-by: Dominik Brodowski Cc: Andrew Lutomirski Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Link: http://lkml.kernel.org/r/20180211205924.GA23210@light.dominikbrodowski.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/test_vdso.c | 11 ++++++++--- tools/testing/selftests/x86/test_vsyscall.c | 11 ++++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c index 558c8207e7b9..235259011704 100644 --- a/tools/testing/selftests/x86/test_vdso.c +++ b/tools/testing/selftests/x86/test_vdso.c @@ -26,6 +26,9 @@ # endif #endif +/* max length of lines in /proc/self/maps - anything longer is skipped here */ +#define MAPS_LINE_LEN 128 + int nerrs = 0; typedef long (*getcpu_t)(unsigned *, unsigned *, void *); @@ -37,17 +40,19 @@ static void *vsyscall_getcpu(void) { #ifdef __x86_64__ FILE *maps; - char line[128]; + char line[MAPS_LINE_LEN]; bool found = false; maps = fopen("/proc/self/maps", "r"); if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */ return NULL; - while (fgets(line, sizeof(line), maps)) { + while (fgets(line, MAPS_LINE_LEN, maps)) { char r, x; void *start, *end; - char name[128]; + char name[MAPS_LINE_LEN]; + + /* sscanf() is safe here as strlen(name) >= strlen(line) */ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", &start, &end, &r, &x, name) != 5) continue; diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index 6e0bd52ad53d..003b6c55b10e 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -33,6 +33,9 @@ # endif #endif +/* max length of lines in /proc/self/maps - anything longer is skipped here */ +#define MAPS_LINE_LEN 128 + static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) { @@ -98,7 +101,7 @@ static int init_vsys(void) #ifdef __x86_64__ int nerrs = 0; FILE *maps; - char line[128]; + char line[MAPS_LINE_LEN]; bool found = false; maps = fopen("/proc/self/maps", "r"); @@ -108,10 +111,12 @@ static int init_vsys(void) return 0; } - while (fgets(line, sizeof(line), maps)) { + while (fgets(line, MAPS_LINE_LEN, maps)) { char r, x; void *start, *end; - char name[128]; + char name[MAPS_LINE_LEN]; + + /* sscanf() is safe here as strlen(name) >= strlen(line) */ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", &start, &end, &r, &x, name) != 5) continue; From e6eced764e1a18665212c349f100188a337d8fa8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 Feb 2018 08:26:17 +0100 Subject: [PATCH 511/770] selftests/x86/pkeys: Remove unused functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ce676638fe7b284132a7d7d5e7e7ad81bab9947e upstream. This also gets rid of two build warnings: protection_keys.c: In function ‘dumpit’: protection_keys.c:419:3: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result] write(1, buf, nr_read); ^~~~~~~~~~~~~~~~~~~~~~ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Dave Hansen Cc: Shuah Khan Cc: Andy Lutomirski Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/protection_keys.c | 28 ------------------- 1 file changed, 28 deletions(-) diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index 7a1cc0e56d2d..6cbb83b47150 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -393,34 +393,6 @@ pid_t fork_lazy_child(void) return forkret; } -void davecmp(void *_a, void *_b, int len) -{ - int i; - unsigned long *a = _a; - unsigned long *b = _b; - - for (i = 0; i < len / sizeof(*a); i++) { - if (a[i] == b[i]) - continue; - - dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]); - } -} - -void dumpit(char *f) -{ - int fd = open(f, O_RDONLY); - char buf[100]; - int nr_read; - - dprintf2("maps fd: %d\n", fd); - do { - nr_read = read(fd, &buf[0], sizeof(buf)); - write(1, buf, nr_read); - } while (nr_read > 0); - close(fd); -} - #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 From cf4db6342da4c433b814ccdad0461e1ca7477695 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 12:10:11 +0100 Subject: [PATCH 512/770] selftests/x86: Do not rely on "int $0x80" in test_mremap_vdso.c commit 2cbc0d66de0480449c75636f55697c7ff3af61fc upstream. On 64-bit builds, we should not rely on "int $0x80" working (it only does if CONFIG_IA32_EMULATION=y is enabled). Without this patch, the move test may succeed, but the "int $0x80" causes a segfault, resulting in a false negative output of this self-test. Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Dmitry Safonov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Link: http://lkml.kernel.org/r/20180211111013.16888-4-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/test_mremap_vdso.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c index bf0d687c7db7..64f11c8d9b76 100644 --- a/tools/testing/selftests/x86/test_mremap_vdso.c +++ b/tools/testing/selftests/x86/test_mremap_vdso.c @@ -90,8 +90,12 @@ int main(int argc, char **argv, char **envp) vdso_size += PAGE_SIZE; } +#ifdef __i386__ /* Glibc is likely to explode now - exit with raw syscall */ asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret)); +#else /* __x86_64__ */ + syscall(SYS_exit, ret); +#endif } else { int status; From 8520ea2a043842b3b52d95210ee774ed16748ef5 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Feb 2018 09:13:21 +0100 Subject: [PATCH 513/770] selftests/x86: Do not rely on "int $0x80" in single_step_syscall.c commit 4105c69703cdeba76f384b901712c9397b04e9c2 upstream. On 64-bit builds, we should not rely on "int $0x80" working (it only does if CONFIG_IA32_EMULATION=y is enabled). To keep the "Set TF and check int80" test running on 64-bit installs with CONFIG_IA32_EMULATION=y enabled, build this test only if we can also build 32-bit binaries (which should be a good approximation for that). Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Dmitry Safonov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Link: http://lkml.kernel.org/r/20180211111013.16888-5-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/Makefile | 2 ++ tools/testing/selftests/x86/single_step_syscall.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 91fbfa8fdc15..73b8ef665c98 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -30,11 +30,13 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) ifeq ($(CAN_BUILD_I386),1) all: all_32 TEST_PROGS += $(BINARIES_32) +EXTRA_CFLAGS += -DCAN_BUILD_32 endif ifeq ($(CAN_BUILD_X86_64),1) all: all_64 TEST_PROGS += $(BINARIES_64) +EXTRA_CFLAGS += -DCAN_BUILD_64 endif all_32: $(BINARIES_32) diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index a48da95c18fd..ddfdd635de16 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -119,7 +119,9 @@ static void check_result(void) int main() { +#ifdef CAN_BUILD_32 int tmp; +#endif sethandler(SIGTRAP, sigtrap, 0); @@ -139,12 +141,13 @@ int main() : : "c" (post_nop) : "r11"); check_result(); #endif - +#ifdef CAN_BUILD_32 printf("[RUN]\tSet TF and check int80\n"); set_eflags(get_eflags() | X86_EFLAGS_TF); asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) : INT80_CLOBBERS); check_result(); +#endif /* * This test is particularly interesting if fast syscalls use From 410d273ecc8ed1f30ef0d06acb2d22ffa29cd732 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Feb 2018 09:15:19 +0100 Subject: [PATCH 514/770] selftests/x86: Disable tests requiring 32-bit support on pure 64-bit systems commit 9279ddf23ce78ff2676e8e8e19fec0f022c26d04 upstream. The ldt_gdt and ptrace_syscall selftests, even in their 64-bit variant, use hard-coded 32-bit syscall numbers and call "int $0x80". This will fail on 64-bit systems with CONFIG_IA32_EMULATION=y disabled. Therefore, do not build these tests if we cannot build 32-bit binaries (which should be a good approximation for CONFIG_IA32_EMULATION=y being enabled). Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Dmitry Safonov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Link: http://lkml.kernel.org/r/20180211111013.16888-6-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/Makefile | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 73b8ef665c98..aa6e2d7f6a1f 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -5,16 +5,26 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean -TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ - check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \ +UNAME_M := $(shell uname -m) +CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) + +TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ + check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ protection_keys test_vdso test_vsyscall TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip +# Some selftests require 32bit support enabled also on 64bit systems +TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall -TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) +TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) $(TARGETS_C_32BIT_NEEDED) TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) +ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11) +TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED) +endif + BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) @@ -23,10 +33,6 @@ BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie -UNAME_M := $(shell uname -m) -CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) -CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) - ifeq ($(CAN_BUILD_I386),1) all: all_32 TEST_PROGS += $(BINARIES_32) From 842a0d95ef16d8e3f85cd5316efc8c25934ff3ce Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 8 Feb 2018 17:09:25 -0600 Subject: [PATCH 515/770] objtool: Fix segfault in ignore_unreachable_insn() commit fe24e27128252c230a34a6c628da2bf1676781ea upstream. Peter Zijlstra's patch for converting WARN() to use UD2 triggered a bunch of false "unreachable instruction" warnings, which then triggered a seg fault in ignore_unreachable_insn(). The seg fault happened when it tried to dereference a NULL 'insn->func' pointer. Thanks to static_cpu_has(), some functions can jump to a non-function area in the .altinstr_aux section. That breaks ignore_unreachable_insn()'s assumption that it's always inside the original function. Make sure ignore_unreachable_insn() only follows jumps within the current function. Reported-by: Borislav Petkov Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kbuild test robot Link: http://lkml.kernel.org/r/bace77a60d5af9b45eddb8f8fb9c776c8de657ef.1518130694.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2e458eb45586..c7fb5c2392ee 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1935,13 +1935,19 @@ static bool ignore_unreachable_insn(struct instruction *insn) if (is_kasan_insn(insn) || is_ubsan_insn(insn)) return true; - if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) { - insn = insn->jump_dest; - continue; + if (insn->type == INSN_JUMP_UNCONDITIONAL) { + if (insn->jump_dest && + insn->jump_dest->func == insn->func) { + insn = insn->jump_dest; + continue; + } + + break; } if (insn->offset + insn->len >= insn->func->offset + insn->func->len) break; + insn = list_next_entry(insn, list); } From 4e0067c22d6b38a7b882e560b3db4c103166575e Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 8 Feb 2018 17:09:26 -0600 Subject: [PATCH 516/770] x86/debug, objtool: Annotate WARN()-related UD2 as reachable commit 2b5db66862b95532cb6cca8165ae6eb73633cf85 upstream. By default, objtool assumes that a UD2 is a dead end. This is mainly because GCC 7+ sometimes inserts a UD2 when it detects a divide-by-zero condition. Now that WARN() is moving back to UD2, annotate the code after it as reachable so objtool can follow the code flow. Reported-by: Borislav Petkov Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kbuild test robot Link: http://lkml.kernel.org/r/0e483379275a42626ba8898117f918e1bf661e40.1518130694.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/bug.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 34d99af43994..71e6f4bf9161 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -77,7 +77,11 @@ do { \ unreachable(); \ } while (0) -#define __WARN_FLAGS(flags) _BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags)) +#define __WARN_FLAGS(flags) \ +do { \ + _BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags)); \ + annotate_reachable(); \ +} while (0) #include From 41d315482500e9612d1906b7a057fbe9c4b6c676 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Feb 2018 13:16:59 +0100 Subject: [PATCH 517/770] x86/debug: Use UD2 for WARN() commit 3b3a371cc9bc980429baabe0a8e5f307f3d1f463 upstream. Since the Intel SDM added an ModR/M byte to UD0 and binutils followed that specification, we now cannot disassemble our kernel anymore. This now means Intel and AMD disagree on the encoding of UD0. And instead of playing games with additional bytes that are valid ModR/M and single byte instructions (0xd6 for instance), simply use UD2 for both WARN() and BUG(). Requested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180208194406.GD25181@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/bug.h | 15 ++++++--------- arch/x86/kernel/traps.c | 2 +- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 71e6f4bf9161..6804d6642767 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -5,23 +5,20 @@ #include /* - * Since some emulators terminate on UD2, we cannot use it for WARN. - * Since various instruction decoders disagree on the length of UD1, - * we cannot use it either. So use UD0 for WARN. + * Despite that some emulators terminate on UD2, we use it for WARN(). * - * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas - * our kernel decoder thinks it takes a ModRM byte, which seems consistent - * with various things like the Intel SDM instruction encoding rules) + * Since various instruction decoders/specs disagree on the encoding of + * UD0/UD1. */ -#define ASM_UD0 ".byte 0x0f, 0xff" +#define ASM_UD0 ".byte 0x0f, 0xff" /* + ModRM (for Intel) */ #define ASM_UD1 ".byte 0x0f, 0xb9" /* + ModRM */ #define ASM_UD2 ".byte 0x0f, 0x0b" #define INSN_UD0 0xff0f #define INSN_UD2 0x0b0f -#define LEN_UD0 2 +#define LEN_UD2 2 #ifdef CONFIG_GENERIC_BUG @@ -79,7 +76,7 @@ do { \ #define __WARN_FLAGS(flags) \ do { \ - _BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags)); \ + _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \ annotate_reachable(); \ } while (0) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b33e860d32fe..0ddaeb2184a6 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -181,7 +181,7 @@ int fixup_bug(struct pt_regs *regs, int trapnr) break; case BUG_TRAP_TYPE_WARN: - regs->ip += LEN_UD0; + regs->ip += LEN_UD2; return 1; } From 9d4cb4dc7a110ef253e99808b66b45c726ba5596 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Feb 2018 18:22:40 -0800 Subject: [PATCH 518/770] x86/speculation: Fix up array_index_nospec_mask() asm constraint commit be3233fbfcb8f5acb6e3bcd0895c3ef9e100d470 upstream. Allow the compiler to handle @size as an immediate value or memory directly rather than allocating a register. Reported-by: Linus Torvalds Signed-off-by: Dan Williams Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/151797010204.1289.1510000292250184993.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/barrier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 1e7c955b6303..4db77731e130 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -40,7 +40,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, asm ("cmp %1,%2; sbb %0,%0;" :"=r" (mask) - :"r"(size),"r" (index) + :"g"(size),"r" (index) :"cc"); return mask; } From 8b4cdbbb29d4eb34f02d5e1419789b53c3cbb930 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 5 Feb 2018 14:16:06 +0000 Subject: [PATCH 519/770] nospec: Move array_index_nospec() parameter checking into separate macro commit 8fa80c503b484ddc1abbd10c7cb2ab81f3824a50 upstream. For architectures providing their own implementation of array_index_mask_nospec() in asm/barrier.h, attempting to use WARN_ONCE() to complain about out-of-range parameters using WARN_ON() results in a mess of mutually-dependent include files. Rather than unpick the dependencies, simply have the core code in nospec.h perform the checking for us. Signed-off-by: Will Deacon Acked-by: Thomas Gleixner Cc: Dan Williams Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1517840166-15399-1-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/nospec.h | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/include/linux/nospec.h b/include/linux/nospec.h index b99bced39ac2..fbc98e2c8228 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -19,20 +19,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, unsigned long size) { - /* - * Warn developers about inappropriate array_index_nospec() usage. - * - * Even if the CPU speculates past the WARN_ONCE branch, the - * sign bit of @index is taken into account when generating the - * mask. - * - * This warning is compiled out when the compiler can infer that - * @index and @size are less than LONG_MAX. - */ - if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, - "array_index_nospec() limited to range of [0, LONG_MAX]\n")) - return 0; - /* * Always calculate and emit the mask even if the compiler * thinks the mask is not needed. The compiler does not take @@ -43,6 +29,26 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, } #endif +/* + * Warn developers about inappropriate array_index_nospec() usage. + * + * Even if the CPU speculates past the WARN_ONCE branch, the + * sign bit of @index is taken into account when generating the + * mask. + * + * This warning is compiled out when the compiler can infer that + * @index and @size are less than LONG_MAX. + */ +#define array_index_mask_nospec_check(index, size) \ +({ \ + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \ + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \ + _mask = 0; \ + else \ + _mask = array_index_mask_nospec(index, size); \ + _mask; \ +}) + /* * array_index_nospec - sanitize an array index after a bounds check * @@ -61,7 +67,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, ({ \ typeof(index) _i = (index); \ typeof(size) _s = (size); \ - unsigned long _mask = array_index_mask_nospec(_i, _s); \ + unsigned long _mask = array_index_mask_nospec_check(_i, _s); \ \ BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ From 1fed58f610b57c1190862819eba6f7bc5396b08d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 13 Feb 2018 14:28:19 +0100 Subject: [PATCH 520/770] x86/speculation: Add dependency commit ea00f301285ea2f07393678cd2b6057878320c9d upstream. Joe Konno reported a compile failure resulting from using an MSR without inclusion of , and while the current code builds fine (by accident) this needs fixing for future patches. Reported-by: Joe Konno Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arjan@linux.intel.com Cc: bp@alien8.de Cc: dan.j.williams@intel.com Cc: dave.hansen@linux.intel.com Cc: dwmw2@infradead.org Cc: dwmw@amazon.co.uk Cc: gregkh@linuxfoundation.org Cc: hpa@zytor.com Cc: jpoimboe@redhat.com Cc: linux-tip-commits@vger.kernel.org Cc: luto@kernel.org Fixes: 20ffa1caecca ("x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support") Link: http://lkml.kernel.org/r/20180213132819.GJ25201@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 300cc159b4a0..76b058533e47 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -6,6 +6,7 @@ #include #include #include +#include #ifdef __ASSEMBLY__ From 2abfcdf8e77d3719aa1d37b1f9de800fa596eda3 Mon Sep 17 00:00:00 2001 From: "Levin, Alexander (Sasha Levin)" Date: Wed, 15 Nov 2017 17:35:51 -0800 Subject: [PATCH 521/770] kmemcheck: remove annotations commit 4950276672fce5c241857540f8561c440663673d upstream. Patch series "kmemcheck: kill kmemcheck", v2. As discussed at LSF/MM, kill kmemcheck. KASan is a replacement that is able to work without the limitation of kmemcheck (single CPU, slow). KASan is already upstream. We are also not aware of any users of kmemcheck (or users who don't consider KASan as a suitable replacement). The only objection was that since KASAN wasn't supported by all GCC versions provided by distros at that time we should hold off for 2 years, and try again. Now that 2 years have passed, and all distros provide gcc that supports KASAN, kill kmemcheck again for the very same reasons. This patch (of 4): Remove kmemcheck annotations, and calls to kmemcheck from the kernel. [alexander.levin@verizon.com: correctly remove kmemcheck call from dma_map_sg_attrs] Link: http://lkml.kernel.org/r/20171012192151.26531-1-alexander.levin@verizon.com Link: http://lkml.kernel.org/r/20171007030159.22241-2-alexander.levin@verizon.com Signed-off-by: Sasha Levin Cc: Alexander Potapenko Cc: Eric W. Biederman Cc: Michal Hocko Cc: Pekka Enberg Cc: Steven Rostedt Cc: Tim Hansen Cc: Vegard Nossum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/dma-iommu.h | 1 - arch/openrisc/include/asm/dma-mapping.h | 1 - arch/x86/Makefile | 5 ----- arch/x86/include/asm/dma-mapping.h | 1 - arch/x86/include/asm/xor.h | 5 +---- arch/x86/kernel/traps.c | 5 ----- arch/x86/mm/fault.c | 6 ------ drivers/char/random.c | 1 - drivers/misc/c2port/core.c | 2 -- fs/dcache.c | 2 -- include/linux/c2port.h | 4 ---- include/linux/dma-mapping.h | 8 +------- include/linux/filter.h | 2 -- include/linux/mm_types.h | 8 -------- include/linux/net.h | 3 --- include/linux/ring_buffer.h | 3 --- include/linux/skbuff.h | 3 --- include/net/inet_sock.h | 3 --- include/net/inet_timewait_sock.h | 4 ---- include/net/sock.h | 3 --- init/main.c | 1 - kernel/bpf/core.c | 6 ------ kernel/locking/lockdep.c | 3 --- kernel/trace/ring_buffer.c | 3 --- mm/kmemleak.c | 9 --------- mm/page_alloc.c | 14 -------------- mm/slab.c | 14 -------------- mm/slab.h | 2 -- mm/slub.c | 20 -------------------- net/core/skbuff.c | 5 ----- net/core/sock.c | 2 -- net/ipv4/inet_timewait_sock.c | 3 --- net/ipv4/tcp_input.c | 1 - net/socket.c | 1 - 34 files changed, 2 insertions(+), 152 deletions(-) diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h index 0722ec6be692..6821f1249300 100644 --- a/arch/arm/include/asm/dma-iommu.h +++ b/arch/arm/include/asm/dma-iommu.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #define ARM_MAPPING_ERROR (~(dma_addr_t)0x0) diff --git a/arch/openrisc/include/asm/dma-mapping.h b/arch/openrisc/include/asm/dma-mapping.h index f41bd3cb76d9..e212a1f0b6d2 100644 --- a/arch/openrisc/include/asm/dma-mapping.h +++ b/arch/openrisc/include/asm/dma-mapping.h @@ -23,7 +23,6 @@ */ #include -#include #include extern const struct dma_map_ops or1k_dma_map_ops; diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 504b1a4535ac..fad55160dcb9 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -158,11 +158,6 @@ ifdef CONFIG_X86_X32 endif export CONFIG_X86_X32_ABI -# Don't unroll struct assignments with kmemcheck enabled -ifeq ($(CONFIG_KMEMCHECK),y) - KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) -endif - # # If the function graph tracer is used with mcount instead of fentry, # '-maccumulate-outgoing-args' is needed to prevent a GCC bug diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 836ca1178a6a..69f16f0729d0 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -7,7 +7,6 @@ * Documentation/DMA-API.txt for documentation. */ -#include #include #include #include diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index 1f5c5161ead6..45c8605467f1 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -1,7 +1,4 @@ -#ifdef CONFIG_KMEMCHECK -/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */ -# include -#elif !defined(_ASM_X86_XOR_H) +#ifndef _ASM_X86_XOR_H #define _ASM_X86_XOR_H /* diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 0ddaeb2184a6..a66428dc92ae 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -42,7 +42,6 @@ #include #endif -#include #include #include #include @@ -764,10 +763,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) if (!dr6 && user_mode(regs)) user_icebp = 1; - /* Catch kmemcheck conditions! */ - if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) - goto exit; - /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b264b590eeec..9150fe2c9b26 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -20,7 +20,6 @@ #include /* boot_cpu_has, ... */ #include /* dotraplinkage, ... */ #include /* pgd_*(), ... */ -#include /* kmemcheck_*(), ... */ #include /* VSYSCALL_ADDR */ #include /* emulate_vsyscall */ #include /* struct vm86 */ @@ -1257,8 +1256,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, * Detect and handle instructions that would cause a page fault for * both a tracked kernel page and a userspace page. */ - if (kmemcheck_active(regs)) - kmemcheck_hide(regs); prefetchw(&mm->mmap_sem); if (unlikely(kmmio_fault(regs, address))) @@ -1281,9 +1278,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { if (vmalloc_fault(address) >= 0) return; - - if (kmemcheck_fault(regs, address, error_code)) - return; } /* Can handle a stale RO->RW TLB: */ diff --git a/drivers/char/random.c b/drivers/char/random.c index 8ad92707e45f..ea0115cf5fc0 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -259,7 +259,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c index 1922cb8f6b88..1c5b7aec13d4 100644 --- a/drivers/misc/c2port/core.c +++ b/drivers/misc/c2port/core.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -904,7 +903,6 @@ struct c2port_device *c2port_device_register(char *name, return ERR_PTR(-EINVAL); c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL); - kmemcheck_annotate_bitfield(c2dev, flags); if (unlikely(!c2dev)) return ERR_PTR(-ENOMEM); diff --git a/fs/dcache.c b/fs/dcache.c index 34c852af215c..b8d999a5768b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2705,8 +2705,6 @@ static void swap_names(struct dentry *dentry, struct dentry *target) */ unsigned int i; BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); - kmemcheck_mark_initialized(dentry->d_iname, DNAME_INLINE_LEN); - kmemcheck_mark_initialized(target->d_iname, DNAME_INLINE_LEN); for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { swap(((long *) &dentry->d_iname)[i], ((long *) &target->d_iname)[i]); diff --git a/include/linux/c2port.h b/include/linux/c2port.h index 4efabcb51347..f2736348ca26 100644 --- a/include/linux/c2port.h +++ b/include/linux/c2port.h @@ -9,8 +9,6 @@ * the Free Software Foundation */ -#include - #define C2PORT_NAME_LEN 32 struct device; @@ -22,10 +20,8 @@ struct device; /* Main struct */ struct c2port_ops; struct c2port_device { - kmemcheck_bitfield_begin(flags); unsigned int access:1; unsigned int flash_access:1; - kmemcheck_bitfield_end(flags); int id; char name[C2PORT_NAME_LEN]; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 46930f82a988..7bf3b99e6fbb 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -230,7 +229,6 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, const struct dma_map_ops *ops = get_dma_ops(dev); dma_addr_t addr; - kmemcheck_mark_initialized(ptr, size); BUG_ON(!valid_dma_direction(dir)); addr = ops->map_page(dev, virt_to_page(ptr), offset_in_page(ptr), size, @@ -263,11 +261,8 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, unsigned long attrs) { const struct dma_map_ops *ops = get_dma_ops(dev); - int i, ents; - struct scatterlist *s; + int ents; - for_each_sg(sg, s, nents, i) - kmemcheck_mark_initialized(sg_virt(s), s->length); BUG_ON(!valid_dma_direction(dir)); ents = ops->map_sg(dev, sg, nents, dir, attrs); BUG_ON(ents < 0); @@ -297,7 +292,6 @@ static inline dma_addr_t dma_map_page_attrs(struct device *dev, const struct dma_map_ops *ops = get_dma_ops(dev); dma_addr_t addr; - kmemcheck_mark_initialized(page_address(page) + offset, size); BUG_ON(!valid_dma_direction(dir)); addr = ops->map_page(dev, page, offset, size, dir, attrs); debug_dma_map_page(dev, page, offset, size, dir, addr, false); diff --git a/include/linux/filter.h b/include/linux/filter.h index 48ec57e70f9f..42197b16dd78 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -454,13 +454,11 @@ struct bpf_binary_header { struct bpf_prog { u16 pages; /* Number of allocated pages */ - kmemcheck_bitfield_begin(meta); u16 jited:1, /* Is our filter JIT'ed? */ locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ dst_needed:1; /* Do we need dst entry? */ - kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ u32 jited_len; /* Size of jited insns in bytes */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c85f11dafd56..9f0bb908e2b5 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -207,14 +207,6 @@ struct page { not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ -#ifdef CONFIG_KMEMCHECK - /* - * kmemcheck wants to track the status of each byte in a page; this - * is a pointer to such a status block. NULL if not tracked. - */ - void *shadow; -#endif - #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS int _last_cpupid; #endif diff --git a/include/linux/net.h b/include/linux/net.h index d97d80d7fdf8..caeb159abda5 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -22,7 +22,6 @@ #include #include #include /* For O_CLOEXEC and O_NONBLOCK */ -#include #include #include #include @@ -111,9 +110,7 @@ struct socket_wq { struct socket { socket_state state; - kmemcheck_bitfield_begin(type); short type; - kmemcheck_bitfield_end(type); unsigned long flags; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index fa6ace66fea5..289e4d54e3e0 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -2,7 +2,6 @@ #ifndef _LINUX_RING_BUFFER_H #define _LINUX_RING_BUFFER_H -#include #include #include #include @@ -14,9 +13,7 @@ struct ring_buffer_iter; * Don't refer to this struct directly, use functions below. */ struct ring_buffer_event { - kmemcheck_bitfield_begin(bitfield); u32 type_len:5, time_delta:27; - kmemcheck_bitfield_end(bitfield); u32 array[]; }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 051e0939ec19..be45224b01d7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -15,7 +15,6 @@ #define _LINUX_SKBUFF_H #include -#include #include #include #include @@ -706,7 +705,6 @@ struct sk_buff { /* Following fields are _not_ copied in __copy_skb_header() * Note that queue_mapping is here mostly to fill a hole. */ - kmemcheck_bitfield_begin(flags1); __u16 queue_mapping; /* if you move cloned around you also must adapt those constants */ @@ -725,7 +723,6 @@ struct sk_buff { head_frag:1, xmit_more:1, __unused:1; /* one bit hole */ - kmemcheck_bitfield_end(flags1); /* fields enclosed in headers_start/headers_end are copied * using a single memcpy() in __copy_skb_header() diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index db8162dd8c0b..8e51b4a69088 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -17,7 +17,6 @@ #define _INET_SOCK_H #include -#include #include #include #include @@ -84,7 +83,6 @@ struct inet_request_sock { #define ireq_state req.__req_common.skc_state #define ireq_family req.__req_common.skc_family - kmemcheck_bitfield_begin(flags); u16 snd_wscale : 4, rcv_wscale : 4, tstamp_ok : 1, @@ -93,7 +91,6 @@ struct inet_request_sock { ecn_ok : 1, acked : 1, no_srccheck: 1; - kmemcheck_bitfield_end(flags); u32 ir_mark; union { struct ip_options_rcu __rcu *ireq_opt; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 6a75d67a30fd..1356fa6a7566 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -15,8 +15,6 @@ #ifndef _INET_TIMEWAIT_SOCK_ #define _INET_TIMEWAIT_SOCK_ - -#include #include #include #include @@ -69,14 +67,12 @@ struct inet_timewait_sock { /* Socket demultiplex comparisons on incoming packets. */ /* these three are in inet_sock */ __be16 tw_sport; - kmemcheck_bitfield_begin(flags); /* And these are ours. */ unsigned int tw_kill : 1, tw_transparent : 1, tw_flowlabel : 20, tw_pad : 2, /* 2 bits hole */ tw_tos : 8; - kmemcheck_bitfield_end(flags); struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; }; diff --git a/include/net/sock.h b/include/net/sock.h index 006580155a87..9bd5d68076d9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -436,7 +436,6 @@ struct sock { #define SK_FL_TYPE_MASK 0xffff0000 #endif - kmemcheck_bitfield_begin(flags); unsigned int sk_padding : 1, sk_kern_sock : 1, sk_no_check_tx : 1, @@ -445,8 +444,6 @@ struct sock { sk_protocol : 8, sk_type : 16; #define SK_PROTOCOL_MAX U8_MAX - kmemcheck_bitfield_end(flags); - u16 sk_gso_max_segs; unsigned long sk_lingertime; struct proto *sk_prot_creator; diff --git a/init/main.c b/init/main.c index b32ec72cdf3d..2d355a61dfc5 100644 --- a/init/main.c +++ b/init/main.c @@ -69,7 +69,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2246115365d9..d203a5d6b726 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -85,8 +85,6 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) if (fp == NULL) return NULL; - kmemcheck_annotate_bitfield(fp, meta); - aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags); if (aux == NULL) { vfree(fp); @@ -127,8 +125,6 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, if (fp == NULL) { __bpf_prog_uncharge(fp_old->aux->user, delta); } else { - kmemcheck_annotate_bitfield(fp, meta); - memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); fp->pages = pages; fp->aux->prog = fp; @@ -662,8 +658,6 @@ static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other, fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL); if (fp != NULL) { - kmemcheck_annotate_bitfield(fp, meta); - /* aux->prog still points to the fp_other one, so * when promoting the clone to the real program, * this still needs to be adapted. diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index e36e652d996f..4d362d3e4571 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include @@ -3225,8 +3224,6 @@ static void __lockdep_init_map(struct lockdep_map *lock, const char *name, { int i; - kmemcheck_mark_initialized(lock, sizeof(*lock)); - for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) lock->class_cache[i] = NULL; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0476a9372014..39c221454186 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -13,7 +13,6 @@ #include #include #include /* for self test */ -#include #include #include #include @@ -2059,7 +2058,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, } event = __rb_page_index(tail_page, tail); - kmemcheck_annotate_bitfield(event, bitfield); /* account for padding bytes */ local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); @@ -2690,7 +2688,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* We reserved something on the buffer */ event = __rb_page_index(tail_page, tail); - kmemcheck_annotate_bitfield(event, bitfield); rb_update_event(cpu_buffer, event, info); local_inc(&tail_page->entries); diff --git a/mm/kmemleak.c b/mm/kmemleak.c index a1ba553816eb..bd1374f402cd 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -110,7 +110,6 @@ #include #include -#include #include #include @@ -1238,9 +1237,6 @@ static bool update_checksum(struct kmemleak_object *object) { u32 old_csum = object->checksum; - if (!kmemcheck_is_obj_initialized(object->pointer, object->size)) - return false; - kasan_disable_current(); object->checksum = crc32(0, (void *)object->pointer, object->size); kasan_enable_current(); @@ -1314,11 +1310,6 @@ static void scan_block(void *_start, void *_end, if (scan_should_stop()) break; - /* don't scan uninitialized memory */ - if (!kmemcheck_is_obj_initialized((unsigned long)ptr, - BYTES_PER_POINTER)) - continue; - kasan_disable_current(); pointer = *ptr; kasan_enable_current(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2de080003693..6627caeeaf82 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -1022,7 +1021,6 @@ static __always_inline bool free_pages_prepare(struct page *page, VM_BUG_ON_PAGE(PageTail(page), page); trace_mm_page_free(page, order); - kmemcheck_free_shadow(page, order); /* * Check tail pages before head page information is cleared to @@ -2674,15 +2672,6 @@ void split_page(struct page *page, unsigned int order) VM_BUG_ON_PAGE(PageCompound(page), page); VM_BUG_ON_PAGE(!page_count(page), page); -#ifdef CONFIG_KMEMCHECK - /* - * Split shadow pages too, because free(page[0]) would - * otherwise free the whole shadow. - */ - if (kmemcheck_page_is_tracked(page)) - split_page(virt_to_page(page[0].shadow), order); -#endif - for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i); split_page_owner(page, order); @@ -4228,9 +4217,6 @@ out: page = NULL; } - if (kmemcheck_enabled && page) - kmemcheck_pagealloc_alloc(page, order, gfp_mask); - trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype); return page; diff --git a/mm/slab.c b/mm/slab.c index b7095884fd93..50713827b3ed 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -114,7 +114,6 @@ #include #include #include -#include #include #include #include @@ -1435,15 +1434,6 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, if (sk_memalloc_socks() && page_is_pfmemalloc(page)) SetPageSlabPfmemalloc(page); - if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { - kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); - - if (cachep->ctor) - kmemcheck_mark_uninitialized_pages(page, nr_pages); - else - kmemcheck_mark_unallocated_pages(page, nr_pages); - } - return page; } @@ -1455,8 +1445,6 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page) int order = cachep->gfporder; unsigned long nr_freed = (1 << order); - kmemcheck_free_shadow(page, order); - if (cachep->flags & SLAB_RECLAIM_ACCOUNT) mod_lruvec_page_state(page, NR_SLAB_RECLAIMABLE, -nr_freed); else @@ -3516,8 +3504,6 @@ void ___cache_free(struct kmem_cache *cachep, void *objp, kmemleak_free_recursive(objp, cachep->flags); objp = cache_free_debugcheck(cachep, objp, caller); - kmemcheck_slab_free(cachep, objp, cachep->object_size); - /* * Skip calling cache_free_alien() when the platform is not numa. * This will avoid cache misses that happen while accessing slabp (which diff --git a/mm/slab.h b/mm/slab.h index 86d7c7d860f9..438e4ace4d9d 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -40,7 +40,6 @@ struct kmem_cache { #include #include -#include #include #include #include @@ -439,7 +438,6 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, for (i = 0; i < size; i++) { void *object = p[i]; - kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); kasan_slab_alloc(s, object, flags); diff --git a/mm/slub.c b/mm/slub.c index 8e1c027a30f4..2de2fc3adbd2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -1375,7 +1374,6 @@ static inline void *slab_free_hook(struct kmem_cache *s, void *x) unsigned long flags; local_irq_save(flags); - kmemcheck_slab_free(s, x, s->object_size); debug_check_no_locks_freed(x, s->object_size); local_irq_restore(flags); } @@ -1596,22 +1594,6 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) stat(s, ORDER_FALLBACK); } - if (kmemcheck_enabled && - !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { - int pages = 1 << oo_order(oo); - - kmemcheck_alloc_shadow(page, oo_order(oo), alloc_gfp, node); - - /* - * Objects from caches that have a constructor don't get - * cleared when they're allocated, so we need to do it here. - */ - if (s->ctor) - kmemcheck_mark_uninitialized_pages(page, pages); - else - kmemcheck_mark_unallocated_pages(page, pages); - } - page->objects = oo_objects(oo); order = compound_order(page); @@ -1687,8 +1669,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page) check_object(s, page, p, SLUB_RED_INACTIVE); } - kmemcheck_free_shadow(page, compound_order(page)); - mod_lruvec_page_state(page, (s->flags & SLAB_RECLAIM_ACCOUNT) ? NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 15fa5baa8fae..cc811add68c6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -234,14 +233,12 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); - kmemcheck_annotate_variable(shinfo->destructor_arg); if (flags & SKB_ALLOC_FCLONE) { struct sk_buff_fclones *fclones; fclones = container_of(skb, struct sk_buff_fclones, skb1); - kmemcheck_annotate_bitfield(&fclones->skb2, flags1); skb->fclone = SKB_FCLONE_ORIG; refcount_set(&fclones->fclone_ref, 1); @@ -301,7 +298,6 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size) shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); - kmemcheck_annotate_variable(shinfo->destructor_arg); return skb; } @@ -1284,7 +1280,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) if (!n) return NULL; - kmemcheck_annotate_bitfield(n, flags1); n->fclone = SKB_FCLONE_UNAVAILABLE; } diff --git a/net/core/sock.c b/net/core/sock.c index beb1e299fed3..ec6eb546b228 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1469,8 +1469,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, sk = kmalloc(prot->obj_size, priority); if (sk != NULL) { - kmemcheck_annotate_bitfield(sk, flags); - if (security_sk_alloc(sk, family, priority)) goto out_free; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 5b039159e67a..d451b9f19b59 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -9,7 +9,6 @@ */ #include -#include #include #include #include @@ -167,8 +166,6 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, if (tw) { const struct inet_sock *inet = inet_sk(sk); - kmemcheck_annotate_bitfield(tw, flags); - tw->tw_dr = dr; /* Give us an identity. */ tw->tw_daddr = inet->inet_daddr; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ff48ac654e5a..d9d215e27b8a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6204,7 +6204,6 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, if (req) { struct inet_request_sock *ireq = inet_rsk(req); - kmemcheck_annotate_bitfield(ireq, flags); ireq->ireq_opt = NULL; #if IS_ENABLED(CONFIG_IPV6) ireq->pktopts = NULL; diff --git a/net/socket.c b/net/socket.c index d894c7c5fa54..43d2f17f5eea 100644 --- a/net/socket.c +++ b/net/socket.c @@ -568,7 +568,6 @@ struct socket *sock_alloc(void) sock = SOCKET_I(inode); - kmemcheck_annotate_bitfield(sock, type); inode->i_ino = get_next_ino(); inode->i_mode = S_IFSOCK | S_IRWXUGO; inode->i_uid = current_fsuid(); From ae63fd26b2d81eaaf3cf35693dc01c0148bd7a9b Mon Sep 17 00:00:00 2001 From: "Levin, Alexander (Sasha Levin)" Date: Wed, 15 Nov 2017 17:35:54 -0800 Subject: [PATCH 522/770] kmemcheck: stop using GFP_NOTRACK and SLAB_NOTRACK commit 75f296d93bcebcfe375884ddac79e30263a31766 upstream. Convert all allocations that used a NOTRACK flag to stop using it. Link: http://lkml.kernel.org/r/20171007030159.22241-3-alexander.levin@verizon.com Signed-off-by: Sasha Levin Cc: Alexander Potapenko Cc: Eric W. Biederman Cc: Michal Hocko Cc: Pekka Enberg Cc: Steven Rostedt Cc: Tim Hansen Cc: Vegard Nossum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/pgalloc.h | 2 +- arch/arm64/include/asm/pgalloc.h | 2 +- arch/powerpc/include/asm/pgalloc.h | 2 +- arch/sh/kernel/dwarf.c | 4 ++-- arch/sh/kernel/process.c | 2 +- arch/sparc/mm/init_64.c | 4 ++-- arch/unicore32/include/asm/pgalloc.h | 2 +- arch/x86/kernel/espfix_64.c | 2 +- arch/x86/mm/init.c | 3 +-- arch/x86/mm/init_64.c | 2 +- arch/x86/mm/pageattr.c | 10 +++++----- arch/x86/mm/pgtable.c | 2 +- arch/x86/platform/efi/efi_64.c | 2 +- crypto/xor.c | 7 +------ include/linux/thread_info.h | 5 ++--- init/do_mounts.c | 3 +-- kernel/fork.c | 12 ++++++------ kernel/signal.c | 3 +-- mm/kmemcheck.c | 2 +- mm/slab.c | 2 +- mm/slab.h | 5 ++--- mm/slab_common.c | 2 +- mm/slub.c | 4 +--- 23 files changed, 36 insertions(+), 48 deletions(-) diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index b2902a5cd780..2d7344f0e208 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -57,7 +57,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) static inline void clean_pte_table(pte_t *pte) { diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index d25f4f137c2a..5ca6a573a701 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,7 +26,7 @@ #define check_pgt_cache() do { } while (0) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #if CONFIG_PGTABLE_LEVELS > 2 diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index a14203c005f1..e11f03007b57 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -18,7 +18,7 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp) } #endif /* MODULE */ -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) #ifdef CONFIG_PPC_BOOK3S #include diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index e1d751ae2498..1a2526676a87 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c @@ -1172,11 +1172,11 @@ static int __init dwarf_unwinder_init(void) dwarf_frame_cachep = kmem_cache_create("dwarf_frames", sizeof(struct dwarf_frame), 0, - SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL); + SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL); dwarf_reg_cachep = kmem_cache_create("dwarf_regs", sizeof(struct dwarf_reg), 0, - SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL); + SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL); dwarf_frame_pool = mempool_create_slab_pool(DWARF_FRAME_MIN_REQ, dwarf_frame_cachep); diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index b2d9963d5978..68b1a67533ce 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c @@ -59,7 +59,7 @@ void arch_task_cache_init(void) task_xstate_cachep = kmem_cache_create("task_xstate", xstate_size, __alignof__(union thread_xstate), - SLAB_PANIC | SLAB_NOTRACK, NULL); + SLAB_PANIC, NULL); } #ifdef CONFIG_SH_FPU_EMU diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index a0cc1be767c8..984e9d65ea0d 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2934,7 +2934,7 @@ void __flush_tlb_all(void) pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO); pte_t *pte = NULL; if (page) @@ -2946,7 +2946,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) return NULL; if (!pgtable_page_ctor(page)) { diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h index 26775793c204..f0fdb268f8f2 100644 --- a/arch/unicore32/include/asm/pgalloc.h +++ b/arch/unicore32/include/asm/pgalloc.h @@ -28,7 +28,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd); #define pgd_alloc(mm) get_pgd_slow(mm) #define pgd_free(mm, pgd) free_pgd_slow(mm, pgd) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) /* * Allocate one PTE table. diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 9c4e7ba6870c..cbded50ee601 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -57,7 +57,7 @@ # error "Need more virtual address space for the ESPFIX hack" #endif -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) /* This contains the *bottom* address of the espfix stack */ DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6b462a472a7b..fcf38a2a4e7e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -93,8 +93,7 @@ __ref void *alloc_low_pages(unsigned int num) unsigned int order; order = get_order((unsigned long)num << PAGE_SHIFT); - return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK | - __GFP_ZERO, order); + return (void *)__get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); } if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index adcea90a2046..5fa3a58b5d78 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -184,7 +184,7 @@ static __ref void *spp_getpage(void) void *ptr; if (after_bootmem) - ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); + ptr = (void *) get_zeroed_page(GFP_ATOMIC); else ptr = alloc_bootmem_pages(PAGE_SIZE); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dfb7d657cf43..3ed9a08885c5 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -753,7 +753,7 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, if (!debug_pagealloc_enabled()) spin_unlock(&cpa_lock); - base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); + base = alloc_pages(GFP_KERNEL, 0); if (!debug_pagealloc_enabled()) spin_lock(&cpa_lock); if (!base) @@ -904,7 +904,7 @@ static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end) static int alloc_pte_page(pmd_t *pmd) { - pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); + pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL); if (!pte) return -1; @@ -914,7 +914,7 @@ static int alloc_pte_page(pmd_t *pmd) static int alloc_pmd_page(pud_t *pud) { - pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); + pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); if (!pmd) return -1; @@ -1120,7 +1120,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pgd_entry = cpa->pgd + pgd_index(addr); if (pgd_none(*pgd_entry)) { - p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); + p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL); if (!p4d) return -1; @@ -1132,7 +1132,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) */ p4d = p4d_offset(pgd_entry, addr); if (p4d_none(*p4d)) { - pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); + pud = (pud_t *)get_zeroed_page(GFP_KERNEL); if (!pud) return -1; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 9b7bcbd33cc2..004abf9ebf12 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -7,7 +7,7 @@ #include #include -#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) #ifdef CONFIG_HIGHPTE #define PGALLOC_USER_GFP __GFP_HIGHMEM diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 61975b6bcb1a..ad5d9538f0f9 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -211,7 +211,7 @@ int __init efi_alloc_page_tables(void) if (efi_enabled(EFI_OLD_MEMMAP)) return 0; - gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; + gfp_mask = GFP_KERNEL | __GFP_ZERO; efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); if (!efi_pgd) return -ENOMEM; diff --git a/crypto/xor.c b/crypto/xor.c index 263af9fb45ea..bce9fe7af40a 100644 --- a/crypto/xor.c +++ b/crypto/xor.c @@ -122,12 +122,7 @@ calibrate_xor_blocks(void) goto out; } - /* - * Note: Since the memory is not actually used for _anything_ but to - * test the XOR speed, we don't really want kmemcheck to warn about - * reading uninitialized bytes here. - */ - b1 = (void *) __get_free_pages(GFP_KERNEL | __GFP_NOTRACK, 2); + b1 = (void *) __get_free_pages(GFP_KERNEL, 2); if (!b1) { printk(KERN_WARNING "xor: Yikes! No memory available.\n"); return -ENOMEM; diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 4bcdf00c110f..34f053a150a9 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -44,10 +44,9 @@ enum { #endif #if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK) -# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \ - __GFP_ZERO) +# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) #else -# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK) +# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT) #endif /* diff --git a/init/do_mounts.c b/init/do_mounts.c index f6d4dd764a52..7cf4f6dafd5f 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -380,8 +380,7 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data) void __init mount_block_root(char *name, int flags) { - struct page *page = alloc_page(GFP_KERNEL | - __GFP_NOTRACK_FALSE_POSITIVE); + struct page *page = alloc_page(GFP_KERNEL); char *fs_names = page_address(page); char *p; #ifdef CONFIG_BLOCK diff --git a/kernel/fork.c b/kernel/fork.c index 500ce64517d9..98c91bd341b4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -469,7 +469,7 @@ void __init fork_init(void) /* create a slab on which task_structs can be allocated */ task_struct_cachep = kmem_cache_create("task_struct", arch_task_struct_size, align, - SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL); + SLAB_PANIC|SLAB_ACCOUNT, NULL); #endif /* do the arch specific task caches init */ @@ -2208,18 +2208,18 @@ void __init proc_caches_init(void) sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| - SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor); + SLAB_ACCOUNT, sighand_ctor); signal_cachep = kmem_cache_create("signal_cache", sizeof(struct signal_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); files_cachep = kmem_cache_create("files_cache", sizeof(struct files_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); fs_cachep = kmem_cache_create("fs_cache", sizeof(struct fs_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); /* * FIXME! The "sizeof(struct mm_struct)" currently includes the @@ -2230,7 +2230,7 @@ void __init proc_caches_init(void) */ mm_cachep = kmem_cache_create("mm_struct", sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); mmap_init(); diff --git a/kernel/signal.c b/kernel/signal.c index 1facff1dbbae..6895f6bb98a7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1038,8 +1038,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, else override_rlimit = 0; - q = __sigqueue_alloc(sig, t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE, - override_rlimit); + q = __sigqueue_alloc(sig, t, GFP_ATOMIC, override_rlimit); if (q) { list_add_tail(&q->list, &pending->list); switch ((unsigned long) info) { diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c index 800d64b854ea..b3a4d61d341c 100644 --- a/mm/kmemcheck.c +++ b/mm/kmemcheck.c @@ -18,7 +18,7 @@ void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node) * With kmemcheck enabled, we need to allocate a memory area for the * shadow bits as well. */ - shadow = alloc_pages_node(node, flags | __GFP_NOTRACK, order); + shadow = alloc_pages_node(node, flags, order); if (!shadow) { if (printk_ratelimit()) pr_err("kmemcheck: failed to allocate shadow bitmap\n"); diff --git a/mm/slab.c b/mm/slab.c index 50713827b3ed..966839a1ac2c 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1412,7 +1412,7 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, if (cachep->flags & SLAB_RECLAIM_ACCOUNT) flags |= __GFP_RECLAIMABLE; - page = __alloc_pages_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder); + page = __alloc_pages_node(nodeid, flags, cachep->gfporder); if (!page) { slab_out_of_memory(cachep, flags, nodeid); return NULL; diff --git a/mm/slab.h b/mm/slab.h index 438e4ace4d9d..485d9fbb8802 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -141,10 +141,10 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, #if defined(CONFIG_SLAB) #define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \ SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \ - SLAB_NOTRACK | SLAB_ACCOUNT) + SLAB_ACCOUNT) #elif defined(CONFIG_SLUB) #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ - SLAB_TEMPORARY | SLAB_NOTRACK | SLAB_ACCOUNT) + SLAB_TEMPORARY | SLAB_ACCOUNT) #else #define SLAB_CACHE_FLAGS (0) #endif @@ -163,7 +163,6 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, SLAB_NOLEAKTRACE | \ SLAB_RECLAIM_ACCOUNT | \ SLAB_TEMPORARY | \ - SLAB_NOTRACK | \ SLAB_ACCOUNT) int __kmem_cache_shutdown(struct kmem_cache *); diff --git a/mm/slab_common.c b/mm/slab_common.c index 0d7fe71ff5e4..65212caa1f2a 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -44,7 +44,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, SLAB_FAILSLAB | SLAB_KASAN) #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ - SLAB_NOTRACK | SLAB_ACCOUNT) + SLAB_ACCOUNT) /* * Merge control. If this is set then no merging of slab caches will occur. diff --git a/mm/slub.c b/mm/slub.c index 2de2fc3adbd2..6eeffac30025 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1434,8 +1434,6 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s, struct page *page; int order = oo_order(oo); - flags |= __GFP_NOTRACK; - if (node == NUMA_NO_NODE) page = alloc_pages(flags, order); else @@ -3772,7 +3770,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) struct page *page; void *ptr = NULL; - flags |= __GFP_COMP | __GFP_NOTRACK; + flags |= __GFP_COMP; page = alloc_pages_node(node, flags, get_order(size)); if (page) ptr = page_address(page); From b9870f85817ebb331d7496d50a0b99088e240e20 Mon Sep 17 00:00:00 2001 From: "Levin, Alexander (Sasha Levin)" Date: Wed, 15 Nov 2017 17:35:58 -0800 Subject: [PATCH 523/770] kmemcheck: remove whats left of NOTRACK flags commit d8be75663cec0069b85f80191abd2682ce4a512f upstream. Now that kmemcheck is gone, we don't need the NOTRACK flags. Link: http://lkml.kernel.org/r/20171007030159.22241-5-alexander.levin@verizon.com Signed-off-by: Sasha Levin Cc: Alexander Potapenko Cc: Eric W. Biederman Cc: Michal Hocko Cc: Pekka Enberg Cc: Steven Rostedt Cc: Tim Hansen Cc: Vegard Nossum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable.h | 5 ----- arch/x86/include/asm/pgtable_types.h | 13 ------------- include/linux/gfp.h | 9 --------- include/linux/slab.h | 6 ------ include/trace/events/mmflags.h | 1 - mm/slub.c | 2 -- tools/perf/builtin-kmem.c | 1 - 7 files changed, 37 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 211368922cad..8b8f1f14a0bf 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -668,11 +668,6 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a) return false; } -static inline int pte_hidden(pte_t pte) -{ - return pte_flags(pte) & _PAGE_HIDDEN; -} - static inline int pmd_present(pmd_t pmd) { /* diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 9e9b05fc4860..3696398a9475 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -32,7 +32,6 @@ #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 -#define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 @@ -79,18 +78,6 @@ #define _PAGE_KNL_ERRATUM_MASK 0 #endif -#ifdef CONFIG_KMEMCHECK -#define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) -#else -#define _PAGE_HIDDEN (_AT(pteval_t, 0)) -#endif - -/* - * The same hidden bit is used by kmemcheck, but since kmemcheck - * works on kernel pages while soft-dirty engine on user space, - * they do not conflict with each other. - */ - #ifdef CONFIG_MEM_SOFT_DIRTY #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY) #else diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 710143741eb5..b041f94678de 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -37,7 +37,6 @@ struct vm_area_struct; #define ___GFP_THISNODE 0x40000u #define ___GFP_ATOMIC 0x80000u #define ___GFP_ACCOUNT 0x100000u -#define ___GFP_NOTRACK 0x200000u #define ___GFP_DIRECT_RECLAIM 0x400000u #define ___GFP_WRITE 0x800000u #define ___GFP_KSWAPD_RECLAIM 0x1000000u @@ -201,19 +200,11 @@ struct vm_area_struct; * __GFP_COMP address compound page metadata. * * __GFP_ZERO returns a zeroed page on success. - * - * __GFP_NOTRACK avoids tracking with kmemcheck. - * - * __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of - * distinguishing in the source between false positives and allocations that - * cannot be supported (e.g. page tables). */ #define __GFP_COLD ((__force gfp_t)___GFP_COLD) #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) #define __GFP_COMP ((__force gfp_t)___GFP_COMP) #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) -#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) -#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) /* Disable lockdep for GFP context tracking */ #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) diff --git a/include/linux/slab.h b/include/linux/slab.h index af5aa65c7c18..ae5ed6492d54 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -78,12 +78,6 @@ #define SLAB_NOLEAKTRACE 0x00800000UL /* Avoid kmemleak tracing */ -/* Don't track use of uninitialized memory */ -#ifdef CONFIG_KMEMCHECK -# define SLAB_NOTRACK 0x01000000UL -#else -# define SLAB_NOTRACK 0x00000000UL -#endif #ifdef CONFIG_FAILSLAB # define SLAB_FAILSLAB 0x02000000UL /* Fault injection mark */ #else diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 648cbf603736..72162f3a03fa 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -46,7 +46,6 @@ {(unsigned long)__GFP_RECLAIMABLE, "__GFP_RECLAIMABLE"}, \ {(unsigned long)__GFP_MOVABLE, "__GFP_MOVABLE"}, \ {(unsigned long)__GFP_ACCOUNT, "__GFP_ACCOUNT"}, \ - {(unsigned long)__GFP_NOTRACK, "__GFP_NOTRACK"}, \ {(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \ {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ diff --git a/mm/slub.c b/mm/slub.c index 6eeffac30025..38acb5c114ed 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5633,8 +5633,6 @@ static char *create_unique_id(struct kmem_cache *s) *p++ = 'a'; if (s->flags & SLAB_CONSISTENCY_CHECKS) *p++ = 'F'; - if (!(s->flags & SLAB_NOTRACK)) - *p++ = 't'; if (s->flags & SLAB_ACCOUNT) *p++ = 'A'; if (p != name + 1) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 35d4b9c9a9e8..9e693ce4b73b 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -655,7 +655,6 @@ static const struct { { "__GFP_RECLAIMABLE", "RC" }, { "__GFP_MOVABLE", "M" }, { "__GFP_ACCOUNT", "AC" }, - { "__GFP_NOTRACK", "NT" }, { "__GFP_WRITE", "WR" }, { "__GFP_RECLAIM", "R" }, { "__GFP_DIRECT_RECLAIM", "DR" }, From f369f1486116b0f3e9630a2481addde6854df541 Mon Sep 17 00:00:00 2001 From: "Levin, Alexander (Sasha Levin)" Date: Wed, 15 Nov 2017 17:36:02 -0800 Subject: [PATCH 524/770] kmemcheck: rip it out commit 4675ff05de2d76d167336b368bd07f3fef6ed5a6 upstream. Fix up makefiles, remove references, and git rm kmemcheck. Link: http://lkml.kernel.org/r/20171007030159.22241-4-alexander.levin@verizon.com Signed-off-by: Sasha Levin Cc: Steven Rostedt Cc: Vegard Nossum Cc: Pekka Enberg Cc: Michal Hocko Cc: Eric W. Biederman Cc: Alexander Potapenko Cc: Tim Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 7 - Documentation/dev-tools/index.rst | 1 - Documentation/dev-tools/kmemcheck.rst | 733 ------------------ MAINTAINERS | 10 - arch/x86/Kconfig | 3 +- arch/x86/include/asm/kmemcheck.h | 42 - arch/x86/include/asm/string_32.h | 9 - arch/x86/include/asm/string_64.h | 8 - arch/x86/kernel/cpu/intel.c | 15 - arch/x86/mm/Makefile | 2 - arch/x86/mm/init.c | 5 +- arch/x86/mm/kmemcheck/Makefile | 1 - arch/x86/mm/kmemcheck/error.c | 227 ------ arch/x86/mm/kmemcheck/error.h | 15 - arch/x86/mm/kmemcheck/kmemcheck.c | 658 ---------------- arch/x86/mm/kmemcheck/opcode.c | 106 --- arch/x86/mm/kmemcheck/opcode.h | 9 - arch/x86/mm/kmemcheck/pte.c | 22 - arch/x86/mm/kmemcheck/pte.h | 10 - arch/x86/mm/kmemcheck/selftest.c | 70 -- arch/x86/mm/kmemcheck/selftest.h | 6 - arch/x86/mm/kmemcheck/shadow.c | 173 ----- arch/x86/mm/kmemcheck/shadow.h | 18 - include/linux/interrupt.h | 15 - include/linux/kmemcheck.h | 171 ---- kernel/softirq.c | 10 - kernel/sysctl.c | 10 - lib/Kconfig.debug | 6 +- lib/Kconfig.kmemcheck | 94 --- mm/Kconfig.debug | 1 - mm/Makefile | 2 - mm/kmemcheck.c | 125 --- mm/slub.c | 5 +- scripts/kernel-doc | 2 - tools/include/linux/kmemcheck.h | 8 - 35 files changed, 7 insertions(+), 2592 deletions(-) delete mode 100644 Documentation/dev-tools/kmemcheck.rst delete mode 100644 arch/x86/mm/kmemcheck/Makefile delete mode 100644 arch/x86/mm/kmemcheck/kmemcheck.c delete mode 100644 arch/x86/mm/kmemcheck/shadow.c delete mode 100644 lib/Kconfig.kmemcheck diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c76afdcafbef..fb385af482ff 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1841,13 +1841,6 @@ Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y, the default is off. - kmemcheck= [X86] Boot-time kmemcheck enable/disable/one-shot mode - Valid arguments: 0, 1, 2 - kmemcheck=0 (disabled) - kmemcheck=1 (enabled) - kmemcheck=2 (one-shot mode) - Default: 2 (one-shot mode) - kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. Default is 0 (don't ignore, but inject #GP) diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst index a81787cd47d7..e313925fb0fa 100644 --- a/Documentation/dev-tools/index.rst +++ b/Documentation/dev-tools/index.rst @@ -21,7 +21,6 @@ whole; patches welcome! kasan ubsan kmemleak - kmemcheck gdb-kernel-debugging kgdb kselftest diff --git a/Documentation/dev-tools/kmemcheck.rst b/Documentation/dev-tools/kmemcheck.rst deleted file mode 100644 index 7f3d1985de74..000000000000 --- a/Documentation/dev-tools/kmemcheck.rst +++ /dev/null @@ -1,733 +0,0 @@ -Getting started with kmemcheck -============================== - -Vegard Nossum - - -Introduction ------------- - -kmemcheck is a debugging feature for the Linux Kernel. More specifically, it -is a dynamic checker that detects and warns about some uses of uninitialized -memory. - -Userspace programmers might be familiar with Valgrind's memcheck. The main -difference between memcheck and kmemcheck is that memcheck works for userspace -programs only, and kmemcheck works for the kernel only. The implementations -are of course vastly different. Because of this, kmemcheck is not as accurate -as memcheck, but it turns out to be good enough in practice to discover real -programmer errors that the compiler is not able to find through static -analysis. - -Enabling kmemcheck on a kernel will probably slow it down to the extent that -the machine will not be usable for normal workloads such as e.g. an -interactive desktop. kmemcheck will also cause the kernel to use about twice -as much memory as normal. For this reason, kmemcheck is strictly a debugging -feature. - - -Downloading ------------ - -As of version 2.6.31-rc1, kmemcheck is included in the mainline kernel. - - -Configuring and compiling -------------------------- - -kmemcheck only works for the x86 (both 32- and 64-bit) platform. A number of -configuration variables must have specific settings in order for the kmemcheck -menu to even appear in "menuconfig". These are: - -- ``CONFIG_CC_OPTIMIZE_FOR_SIZE=n`` - This option is located under "General setup" / "Optimize for size". - - Without this, gcc will use certain optimizations that usually lead to - false positive warnings from kmemcheck. An example of this is a 16-bit - field in a struct, where gcc may load 32 bits, then discard the upper - 16 bits. kmemcheck sees only the 32-bit load, and may trigger a - warning for the upper 16 bits (if they're uninitialized). - -- ``CONFIG_SLAB=y`` or ``CONFIG_SLUB=y`` - This option is located under "General setup" / "Choose SLAB - allocator". - -- ``CONFIG_FUNCTION_TRACER=n`` - This option is located under "Kernel hacking" / "Tracers" / "Kernel - Function Tracer" - - When function tracing is compiled in, gcc emits a call to another - function at the beginning of every function. This means that when the - page fault handler is called, the ftrace framework will be called - before kmemcheck has had a chance to handle the fault. If ftrace then - modifies memory that was tracked by kmemcheck, the result is an - endless recursive page fault. - -- ``CONFIG_DEBUG_PAGEALLOC=n`` - This option is located under "Kernel hacking" / "Memory Debugging" - / "Debug page memory allocations". - -In addition, I highly recommend turning on ``CONFIG_DEBUG_INFO=y``. This is also -located under "Kernel hacking". With this, you will be able to get line number -information from the kmemcheck warnings, which is extremely valuable in -debugging a problem. This option is not mandatory, however, because it slows -down the compilation process and produces a much bigger kernel image. - -Now the kmemcheck menu should be visible (under "Kernel hacking" / "Memory -Debugging" / "kmemcheck: trap use of uninitialized memory"). Here follows -a description of the kmemcheck configuration variables: - -- ``CONFIG_KMEMCHECK`` - This must be enabled in order to use kmemcheck at all... - -- ``CONFIG_KMEMCHECK_``[``DISABLED`` | ``ENABLED`` | ``ONESHOT``]``_BY_DEFAULT`` - This option controls the status of kmemcheck at boot-time. "Enabled" - will enable kmemcheck right from the start, "disabled" will boot the - kernel as normal (but with the kmemcheck code compiled in, so it can - be enabled at run-time after the kernel has booted), and "one-shot" is - a special mode which will turn kmemcheck off automatically after - detecting the first use of uninitialized memory. - - If you are using kmemcheck to actively debug a problem, then you - probably want to choose "enabled" here. - - The one-shot mode is mostly useful in automated test setups because it - can prevent floods of warnings and increase the chances of the machine - surviving in case something is really wrong. In other cases, the one- - shot mode could actually be counter-productive because it would turn - itself off at the very first error -- in the case of a false positive - too -- and this would come in the way of debugging the specific - problem you were interested in. - - If you would like to use your kernel as normal, but with a chance to - enable kmemcheck in case of some problem, it might be a good idea to - choose "disabled" here. When kmemcheck is disabled, most of the run- - time overhead is not incurred, and the kernel will be almost as fast - as normal. - -- ``CONFIG_KMEMCHECK_QUEUE_SIZE`` - Select the maximum number of error reports to store in an internal - (fixed-size) buffer. Since errors can occur virtually anywhere and in - any context, we need a temporary storage area which is guaranteed not - to generate any other page faults when accessed. The queue will be - emptied as soon as a tasklet may be scheduled. If the queue is full, - new error reports will be lost. - - The default value of 64 is probably fine. If some code produces more - than 64 errors within an irqs-off section, then the code is likely to - produce many, many more, too, and these additional reports seldom give - any more information (the first report is usually the most valuable - anyway). - - This number might have to be adjusted if you are not using serial - console or similar to capture the kernel log. If you are using the - "dmesg" command to save the log, then getting a lot of kmemcheck - warnings might overflow the kernel log itself, and the earlier reports - will get lost in that way instead. Try setting this to 10 or so on - such a setup. - -- ``CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT`` - Select the number of shadow bytes to save along with each entry of the - error-report queue. These bytes indicate what parts of an allocation - are initialized, uninitialized, etc. and will be displayed when an - error is detected to help the debugging of a particular problem. - - The number entered here is actually the logarithm of the number of - bytes that will be saved. So if you pick for example 5 here, kmemcheck - will save 2^5 = 32 bytes. - - The default value should be fine for debugging most problems. It also - fits nicely within 80 columns. - -- ``CONFIG_KMEMCHECK_PARTIAL_OK`` - This option (when enabled) works around certain GCC optimizations that - produce 32-bit reads from 16-bit variables where the upper 16 bits are - thrown away afterwards. - - The default value (enabled) is recommended. This may of course hide - some real errors, but disabling it would probably produce a lot of - false positives. - -- ``CONFIG_KMEMCHECK_BITOPS_OK`` - This option silences warnings that would be generated for bit-field - accesses where not all the bits are initialized at the same time. This - may also hide some real bugs. - - This option is probably obsolete, or it should be replaced with - the kmemcheck-/bitfield-annotations for the code in question. The - default value is therefore fine. - -Now compile the kernel as usual. - - -How to use ----------- - -Booting -~~~~~~~ - -First some information about the command-line options. There is only one -option specific to kmemcheck, and this is called "kmemcheck". It can be used -to override the default mode as chosen by the ``CONFIG_KMEMCHECK_*_BY_DEFAULT`` -option. Its possible settings are: - -- ``kmemcheck=0`` (disabled) -- ``kmemcheck=1`` (enabled) -- ``kmemcheck=2`` (one-shot mode) - -If SLUB debugging has been enabled in the kernel, it may take precedence over -kmemcheck in such a way that the slab caches which are under SLUB debugging -will not be tracked by kmemcheck. In order to ensure that this doesn't happen -(even though it shouldn't by default), use SLUB's boot option ``slub_debug``, -like this: ``slub_debug=-`` - -In fact, this option may also be used for fine-grained control over SLUB vs. -kmemcheck. For example, if the command line includes -``kmemcheck=1 slub_debug=,dentry``, then SLUB debugging will be used only -for the "dentry" slab cache, and with kmemcheck tracking all the other -caches. This is advanced usage, however, and is not generally recommended. - - -Run-time enable/disable -~~~~~~~~~~~~~~~~~~~~~~~ - -When the kernel has booted, it is possible to enable or disable kmemcheck at -run-time. WARNING: This feature is still experimental and may cause false -positive warnings to appear. Therefore, try not to use this. If you find that -it doesn't work properly (e.g. you see an unreasonable amount of warnings), I -will be happy to take bug reports. - -Use the file ``/proc/sys/kernel/kmemcheck`` for this purpose, e.g.:: - - $ echo 0 > /proc/sys/kernel/kmemcheck # disables kmemcheck - -The numbers are the same as for the ``kmemcheck=`` command-line option. - - -Debugging -~~~~~~~~~ - -A typical report will look something like this:: - - WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024) - 80000000000000000000000000000000000000000088ffff0000000000000000 - i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u - ^ - - Pid: 1856, comm: ntpdate Not tainted 2.6.29-rc5 #264 945P-A - RIP: 0010:[] [] __dequeue_signal+0xc8/0x190 - RSP: 0018:ffff88003cdf7d98 EFLAGS: 00210002 - RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009 - RDX: ffff88003e5d6018 RSI: ffff88003e5d6024 RDI: ffff88003cdf7e84 - RBP: ffff88003cdf7db8 R08: ffff88003e5d6000 R09: 0000000000000000 - R10: 0000000000000080 R11: 0000000000000000 R12: 000000000000000e - R13: ffff88003cdf7e78 R14: ffff88003d530710 R15: ffff88003d5a98c8 - FS: 0000000000000000(0000) GS:ffff880001982000(0063) knlGS:00000 - CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 - CR2: ffff88003f806ea0 CR3: 000000003c036000 CR4: 00000000000006a0 - DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 - DR3: 0000000000000000 DR6: 00000000ffff4ff0 DR7: 0000000000000400 - [] dequeue_signal+0x8e/0x170 - [] get_signal_to_deliver+0x98/0x390 - [] do_notify_resume+0xad/0x7d0 - [] int_signal+0x12/0x17 - [] 0xffffffffffffffff - -The single most valuable information in this report is the RIP (or EIP on 32- -bit) value. This will help us pinpoint exactly which instruction that caused -the warning. - -If your kernel was compiled with ``CONFIG_DEBUG_INFO=y``, then all we have to do -is give this address to the addr2line program, like this:: - - $ addr2line -e vmlinux -i ffffffff8104ede8 - arch/x86/include/asm/string_64.h:12 - include/asm-generic/siginfo.h:287 - kernel/signal.c:380 - kernel/signal.c:410 - -The "``-e vmlinux``" tells addr2line which file to look in. **IMPORTANT:** -This must be the vmlinux of the kernel that produced the warning in the -first place! If not, the line number information will almost certainly be -wrong. - -The "``-i``" tells addr2line to also print the line numbers of inlined -functions. In this case, the flag was very important, because otherwise, -it would only have printed the first line, which is just a call to -``memcpy()``, which could be called from a thousand places in the kernel, and -is therefore not very useful. These inlined functions would not show up in -the stack trace above, simply because the kernel doesn't load the extra -debugging information. This technique can of course be used with ordinary -kernel oopses as well. - -In this case, it's the caller of ``memcpy()`` that is interesting, and it can be -found in ``include/asm-generic/siginfo.h``, line 287:: - - 281 static inline void copy_siginfo(struct siginfo *to, struct siginfo *from) - 282 { - 283 if (from->si_code < 0) - 284 memcpy(to, from, sizeof(*to)); - 285 else - 286 /* _sigchld is currently the largest know union member */ - 287 memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld)); - 288 } - -Since this was a read (kmemcheck usually warns about reads only, though it can -warn about writes to unallocated or freed memory as well), it was probably the -"from" argument which contained some uninitialized bytes. Following the chain -of calls, we move upwards to see where "from" was allocated or initialized, -``kernel/signal.c``, line 380:: - - 359 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) - 360 { - ... - 367 list_for_each_entry(q, &list->list, list) { - 368 if (q->info.si_signo == sig) { - 369 if (first) - 370 goto still_pending; - 371 first = q; - ... - 377 if (first) { - 378 still_pending: - 379 list_del_init(&first->list); - 380 copy_siginfo(info, &first->info); - 381 __sigqueue_free(first); - ... - 392 } - 393 } - -Here, it is ``&first->info`` that is being passed on to ``copy_siginfo()``. The -variable ``first`` was found on a list -- passed in as the second argument to -``collect_signal()``. We continue our journey through the stack, to figure out -where the item on "list" was allocated or initialized. We move to line 410:: - - 395 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, - 396 siginfo_t *info) - 397 { - ... - 410 collect_signal(sig, pending, info); - ... - 414 } - -Now we need to follow the ``pending`` pointer, since that is being passed on to -``collect_signal()`` as ``list``. At this point, we've run out of lines from the -"addr2line" output. Not to worry, we just paste the next addresses from the -kmemcheck stack dump, i.e.:: - - [] dequeue_signal+0x8e/0x170 - [] get_signal_to_deliver+0x98/0x390 - [] do_notify_resume+0xad/0x7d0 - [] int_signal+0x12/0x17 - - $ addr2line -e vmlinux -i ffffffff8104f04e ffffffff81050bd8 \ - ffffffff8100b87d ffffffff8100c7b5 - kernel/signal.c:446 - kernel/signal.c:1806 - arch/x86/kernel/signal.c:805 - arch/x86/kernel/signal.c:871 - arch/x86/kernel/entry_64.S:694 - -Remember that since these addresses were found on the stack and not as the -RIP value, they actually point to the _next_ instruction (they are return -addresses). This becomes obvious when we look at the code for line 446:: - - 422 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) - 423 { - ... - 431 signr = __dequeue_signal(&tsk->signal->shared_pending, - 432 mask, info); - 433 /* - 434 * itimer signal ? - 435 * - 436 * itimers are process shared and we restart periodic - 437 * itimers in the signal delivery path to prevent DoS - 438 * attacks in the high resolution timer case. This is - 439 * compliant with the old way of self restarting - 440 * itimers, as the SIGALRM is a legacy signal and only - 441 * queued once. Changing the restart behaviour to - 442 * restart the timer in the signal dequeue path is - 443 * reducing the timer noise on heavy loaded !highres - 444 * systems too. - 445 */ - 446 if (unlikely(signr == SIGALRM)) { - ... - 489 } - -So instead of looking at 446, we should be looking at 431, which is the line -that executes just before 446. Here we see that what we are looking for is -``&tsk->signal->shared_pending``. - -Our next task is now to figure out which function that puts items on this -``shared_pending`` list. A crude, but efficient tool, is ``git grep``:: - - $ git grep -n 'shared_pending' kernel/ - ... - kernel/signal.c:828: pending = group ? &t->signal->shared_pending : &t->pending; - kernel/signal.c:1339: pending = group ? &t->signal->shared_pending : &t->pending; - ... - -There were more results, but none of them were related to list operations, -and these were the only assignments. We inspect the line numbers more closely -and find that this is indeed where items are being added to the list:: - - 816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t, - 817 int group) - 818 { - ... - 828 pending = group ? &t->signal->shared_pending : &t->pending; - ... - 851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN && - 852 (is_si_special(info) || - 853 info->si_code >= 0))); - 854 if (q) { - 855 list_add_tail(&q->list, &pending->list); - ... - 890 } - -and:: - - 1309 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) - 1310 { - .... - 1339 pending = group ? &t->signal->shared_pending : &t->pending; - 1340 list_add_tail(&q->list, &pending->list); - .... - 1347 } - -In the first case, the list element we are looking for, ``q``, is being -returned from the function ``__sigqueue_alloc()``, which looks like an -allocation function. Let's take a look at it:: - - 187 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, - 188 int override_rlimit) - 189 { - 190 struct sigqueue *q = NULL; - 191 struct user_struct *user; - 192 - 193 /* - 194 * We won't get problems with the target's UID changing under us - 195 * because changing it requires RCU be used, and if t != current, the - 196 * caller must be holding the RCU readlock (by way of a spinlock) and - 197 * we use RCU protection here - 198 */ - 199 user = get_uid(__task_cred(t)->user); - 200 atomic_inc(&user->sigpending); - 201 if (override_rlimit || - 202 atomic_read(&user->sigpending) <= - 203 t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) - 204 q = kmem_cache_alloc(sigqueue_cachep, flags); - 205 if (unlikely(q == NULL)) { - 206 atomic_dec(&user->sigpending); - 207 free_uid(user); - 208 } else { - 209 INIT_LIST_HEAD(&q->list); - 210 q->flags = 0; - 211 q->user = user; - 212 } - 213 - 214 return q; - 215 } - -We see that this function initializes ``q->list``, ``q->flags``, and -``q->user``. It seems that now is the time to look at the definition of -``struct sigqueue``, e.g.:: - - 14 struct sigqueue { - 15 struct list_head list; - 16 int flags; - 17 siginfo_t info; - 18 struct user_struct *user; - 19 }; - -And, you might remember, it was a ``memcpy()`` on ``&first->info`` that -caused the warning, so this makes perfect sense. It also seems reasonable -to assume that it is the caller of ``__sigqueue_alloc()`` that has the -responsibility of filling out (initializing) this member. - -But just which fields of the struct were uninitialized? Let's look at -kmemcheck's report again:: - - WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024) - 80000000000000000000000000000000000000000088ffff0000000000000000 - i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u - ^ - -These first two lines are the memory dump of the memory object itself, and -the shadow bytemap, respectively. The memory object itself is in this case -``&first->info``. Just beware that the start of this dump is NOT the start -of the object itself! The position of the caret (^) corresponds with the -address of the read (ffff88003e4a2024). - -The shadow bytemap dump legend is as follows: - -- i: initialized -- u: uninitialized -- a: unallocated (memory has been allocated by the slab layer, but has not - yet been handed off to anybody) -- f: freed (memory has been allocated by the slab layer, but has been freed - by the previous owner) - -In order to figure out where (relative to the start of the object) the -uninitialized memory was located, we have to look at the disassembly. For -that, we'll need the RIP address again:: - - RIP: 0010:[] [] __dequeue_signal+0xc8/0x190 - - $ objdump -d --no-show-raw-insn vmlinux | grep -C 8 ffffffff8104ede8: - ffffffff8104edc8: mov %r8,0x8(%r8) - ffffffff8104edcc: test %r10d,%r10d - ffffffff8104edcf: js ffffffff8104ee88 <__dequeue_signal+0x168> - ffffffff8104edd5: mov %rax,%rdx - ffffffff8104edd8: mov $0xc,%ecx - ffffffff8104eddd: mov %r13,%rdi - ffffffff8104ede0: mov $0x30,%eax - ffffffff8104ede5: mov %rdx,%rsi - ffffffff8104ede8: rep movsl %ds:(%rsi),%es:(%rdi) - ffffffff8104edea: test $0x2,%al - ffffffff8104edec: je ffffffff8104edf0 <__dequeue_signal+0xd0> - ffffffff8104edee: movsw %ds:(%rsi),%es:(%rdi) - ffffffff8104edf0: test $0x1,%al - ffffffff8104edf2: je ffffffff8104edf5 <__dequeue_signal+0xd5> - ffffffff8104edf4: movsb %ds:(%rsi),%es:(%rdi) - ffffffff8104edf5: mov %r8,%rdi - ffffffff8104edf8: callq ffffffff8104de60 <__sigqueue_free> - -As expected, it's the "``rep movsl``" instruction from the ``memcpy()`` -that causes the warning. We know about ``REP MOVSL`` that it uses the register -``RCX`` to count the number of remaining iterations. By taking a look at the -register dump again (from the kmemcheck report), we can figure out how many -bytes were left to copy:: - - RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009 - -By looking at the disassembly, we also see that ``%ecx`` is being loaded -with the value ``$0xc`` just before (ffffffff8104edd8), so we are very -lucky. Keep in mind that this is the number of iterations, not bytes. And -since this is a "long" operation, we need to multiply by 4 to get the -number of bytes. So this means that the uninitialized value was encountered -at 4 * (0xc - 0x9) = 12 bytes from the start of the object. - -We can now try to figure out which field of the "``struct siginfo``" that -was not initialized. This is the beginning of the struct:: - - 40 typedef struct siginfo { - 41 int si_signo; - 42 int si_errno; - 43 int si_code; - 44 - 45 union { - .. - 92 } _sifields; - 93 } siginfo_t; - -On 64-bit, the int is 4 bytes long, so it must the union member that has -not been initialized. We can verify this using gdb:: - - $ gdb vmlinux - ... - (gdb) p &((struct siginfo *) 0)->_sifields - $1 = (union {...} *) 0x10 - -Actually, it seems that the union member is located at offset 0x10 -- which -means that gcc has inserted 4 bytes of padding between the members ``si_code`` -and ``_sifields``. We can now get a fuller picture of the memory dump:: - - _----------------------------=> si_code - / _--------------------=> (padding) - | / _------------=> _sifields(._kill._pid) - | | / _----=> _sifields(._kill._uid) - | | | / - -------|-------|-------|-------| - 80000000000000000000000000000000000000000088ffff0000000000000000 - i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u - -This allows us to realize another important fact: ``si_code`` contains the -value 0x80. Remember that x86 is little endian, so the first 4 bytes -"80000000" are really the number 0x00000080. With a bit of research, we -find that this is actually the constant ``SI_KERNEL`` defined in -``include/asm-generic/siginfo.h``:: - - 144 #define SI_KERNEL 0x80 /* sent by the kernel from somewhere */ - -This macro is used in exactly one place in the x86 kernel: In ``send_signal()`` -in ``kernel/signal.c``:: - - 816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t, - 817 int group) - 818 { - ... - 828 pending = group ? &t->signal->shared_pending : &t->pending; - ... - 851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN && - 852 (is_si_special(info) || - 853 info->si_code >= 0))); - 854 if (q) { - 855 list_add_tail(&q->list, &pending->list); - 856 switch ((unsigned long) info) { - ... - 865 case (unsigned long) SEND_SIG_PRIV: - 866 q->info.si_signo = sig; - 867 q->info.si_errno = 0; - 868 q->info.si_code = SI_KERNEL; - 869 q->info.si_pid = 0; - 870 q->info.si_uid = 0; - 871 break; - ... - 890 } - -Not only does this match with the ``.si_code`` member, it also matches the place -we found earlier when looking for where siginfo_t objects are enqueued on the -``shared_pending`` list. - -So to sum up: It seems that it is the padding introduced by the compiler -between two struct fields that is uninitialized, and this gets reported when -we do a ``memcpy()`` on the struct. This means that we have identified a false -positive warning. - -Normally, kmemcheck will not report uninitialized accesses in ``memcpy()`` calls -when both the source and destination addresses are tracked. (Instead, we copy -the shadow bytemap as well). In this case, the destination address clearly -was not tracked. We can dig a little deeper into the stack trace from above:: - - arch/x86/kernel/signal.c:805 - arch/x86/kernel/signal.c:871 - arch/x86/kernel/entry_64.S:694 - -And we clearly see that the destination siginfo object is located on the -stack:: - - 782 static void do_signal(struct pt_regs *regs) - 783 { - 784 struct k_sigaction ka; - 785 siginfo_t info; - ... - 804 signr = get_signal_to_deliver(&info, &ka, regs, NULL); - ... - 854 } - -And this ``&info`` is what eventually gets passed to ``copy_siginfo()`` as the -destination argument. - -Now, even though we didn't find an actual error here, the example is still a -good one, because it shows how one would go about to find out what the report -was all about. - - -Annotating false positives -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -There are a few different ways to make annotations in the source code that -will keep kmemcheck from checking and reporting certain allocations. Here -they are: - -- ``__GFP_NOTRACK_FALSE_POSITIVE`` - This flag can be passed to ``kmalloc()`` or ``kmem_cache_alloc()`` - (therefore also to other functions that end up calling one of - these) to indicate that the allocation should not be tracked - because it would lead to a false positive report. This is a "big - hammer" way of silencing kmemcheck; after all, even if the false - positive pertains to particular field in a struct, for example, we - will now lose the ability to find (real) errors in other parts of - the same struct. - - Example:: - - /* No warnings will ever trigger on accessing any part of x */ - x = kmalloc(sizeof *x, GFP_KERNEL | __GFP_NOTRACK_FALSE_POSITIVE); - -- ``kmemcheck_bitfield_begin(name)``/``kmemcheck_bitfield_end(name)`` and - ``kmemcheck_annotate_bitfield(ptr, name)`` - The first two of these three macros can be used inside struct - definitions to signal, respectively, the beginning and end of a - bitfield. Additionally, this will assign the bitfield a name, which - is given as an argument to the macros. - - Having used these markers, one can later use - kmemcheck_annotate_bitfield() at the point of allocation, to indicate - which parts of the allocation is part of a bitfield. - - Example:: - - struct foo { - int x; - - kmemcheck_bitfield_begin(flags); - int flag_a:1; - int flag_b:1; - kmemcheck_bitfield_end(flags); - - int y; - }; - - struct foo *x = kmalloc(sizeof *x); - - /* No warnings will trigger on accessing the bitfield of x */ - kmemcheck_annotate_bitfield(x, flags); - - Note that ``kmemcheck_annotate_bitfield()`` can be used even before the - return value of ``kmalloc()`` is checked -- in other words, passing NULL - as the first argument is legal (and will do nothing). - - -Reporting errors ----------------- - -As we have seen, kmemcheck will produce false positive reports. Therefore, it -is not very wise to blindly post kmemcheck warnings to mailing lists and -maintainers. Instead, I encourage maintainers and developers to find errors -in their own code. If you get a warning, you can try to work around it, try -to figure out if it's a real error or not, or simply ignore it. Most -developers know their own code and will quickly and efficiently determine the -root cause of a kmemcheck report. This is therefore also the most efficient -way to work with kmemcheck. - -That said, we (the kmemcheck maintainers) will always be on the lookout for -false positives that we can annotate and silence. So whatever you find, -please drop us a note privately! Kernel configs and steps to reproduce (if -available) are of course a great help too. - -Happy hacking! - - -Technical description ---------------------- - -kmemcheck works by marking memory pages non-present. This means that whenever -somebody attempts to access the page, a page fault is generated. The page -fault handler notices that the page was in fact only hidden, and so it calls -on the kmemcheck code to make further investigations. - -When the investigations are completed, kmemcheck "shows" the page by marking -it present (as it would be under normal circumstances). This way, the -interrupted code can continue as usual. - -But after the instruction has been executed, we should hide the page again, so -that we can catch the next access too! Now kmemcheck makes use of a debugging -feature of the processor, namely single-stepping. When the processor has -finished the one instruction that generated the memory access, a debug -exception is raised. From here, we simply hide the page again and continue -execution, this time with the single-stepping feature turned off. - -kmemcheck requires some assistance from the memory allocator in order to work. -The memory allocator needs to - - 1. Tell kmemcheck about newly allocated pages and pages that are about to - be freed. This allows kmemcheck to set up and tear down the shadow memory - for the pages in question. The shadow memory stores the status of each - byte in the allocation proper, e.g. whether it is initialized or - uninitialized. - - 2. Tell kmemcheck which parts of memory should be marked uninitialized. - There are actually a few more states, such as "not yet allocated" and - "recently freed". - -If a slab cache is set up using the SLAB_NOTRACK flag, it will never return -memory that can take page faults because of kmemcheck. - -If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still -request memory with the __GFP_NOTRACK or __GFP_NOTRACK_FALSE_POSITIVE flags. -This does not prevent the page faults from occurring, however, but marks the -object in question as being initialized so that no warnings will ever be -produced for this object. - -Currently, the SLAB and SLUB allocators are supported by kmemcheck. diff --git a/MAINTAINERS b/MAINTAINERS index 2811a211632c..76ea063d8083 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7670,16 +7670,6 @@ F: include/linux/kdb.h F: include/linux/kgdb.h F: kernel/debug/ -KMEMCHECK -M: Vegard Nossum -M: Pekka Enberg -S: Maintained -F: Documentation/dev-tools/kmemcheck.rst -F: arch/x86/include/asm/kmemcheck.h -F: arch/x86/mm/kmemcheck/ -F: include/linux/kmemcheck.h -F: mm/kmemcheck.c - KMEMLEAK M: Catalin Marinas S: Maintained diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 17de6acc0eab..559b37bf5a2e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -111,7 +111,6 @@ config X86 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if X86_64 select HAVE_ARCH_KGDB - select HAVE_ARCH_KMEMCHECK select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT @@ -1443,7 +1442,7 @@ config ARCH_DMA_ADDR_T_64BIT config X86_DIRECT_GBPAGES def_bool y - depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK + depends on X86_64 && !DEBUG_PAGEALLOC ---help--- Certain kernel features effectively disable kernel linear 1 GB mappings (even if the CPU otherwise diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h index 945a0337fbcf..ea32a7d3cf1b 100644 --- a/arch/x86/include/asm/kmemcheck.h +++ b/arch/x86/include/asm/kmemcheck.h @@ -1,43 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASM_X86_KMEMCHECK_H -#define ASM_X86_KMEMCHECK_H - -#include -#include - -#ifdef CONFIG_KMEMCHECK -bool kmemcheck_active(struct pt_regs *regs); - -void kmemcheck_show(struct pt_regs *regs); -void kmemcheck_hide(struct pt_regs *regs); - -bool kmemcheck_fault(struct pt_regs *regs, - unsigned long address, unsigned long error_code); -bool kmemcheck_trap(struct pt_regs *regs); -#else -static inline bool kmemcheck_active(struct pt_regs *regs) -{ - return false; -} - -static inline void kmemcheck_show(struct pt_regs *regs) -{ -} - -static inline void kmemcheck_hide(struct pt_regs *regs) -{ -} - -static inline bool kmemcheck_fault(struct pt_regs *regs, - unsigned long address, unsigned long error_code) -{ - return false; -} - -static inline bool kmemcheck_trap(struct pt_regs *regs) -{ - return false; -} -#endif /* CONFIG_KMEMCHECK */ - -#endif diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index 076502241eae..55d392c6bd29 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -179,8 +179,6 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) * No 3D Now! */ -#ifndef CONFIG_KMEMCHECK - #if (__GNUC__ >= 4) #define memcpy(t, f, n) __builtin_memcpy(t, f, n) #else @@ -189,13 +187,6 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) ? __constant_memcpy((t), (f), (n)) \ : __memcpy((t), (f), (n))) #endif -#else -/* - * kmemcheck becomes very happy if we use the REP instructions unconditionally, - * because it means that we know both memory operands in advance. - */ -#define memcpy(t, f, n) __memcpy((t), (f), (n)) -#endif #endif #endif /* !CONFIG_FORTIFY_SOURCE */ diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 0b1b4445f4c5..533f74c300c2 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -33,7 +33,6 @@ extern void *memcpy(void *to, const void *from, size_t len); extern void *__memcpy(void *to, const void *from, size_t len); #ifndef CONFIG_FORTIFY_SOURCE -#ifndef CONFIG_KMEMCHECK #if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 #define memcpy(dst, src, len) \ ({ \ @@ -46,13 +45,6 @@ extern void *__memcpy(void *to, const void *from, size_t len); __ret; \ }) #endif -#else -/* - * kmemcheck becomes very happy if we use the REP instructions unconditionally, - * because it means that we know both memory operands in advance. - */ -#define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len)) -#endif #endif /* !CONFIG_FORTIFY_SOURCE */ #define __HAVE_ARCH_MEMSET diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f5347173157c..ef796f14f7ae 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -250,21 +250,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86 == 6 && c->x86_model < 15) clear_cpu_cap(c, X86_FEATURE_PAT); -#ifdef CONFIG_KMEMCHECK - /* - * P4s have a "fast strings" feature which causes single- - * stepping REP instructions to only generate a #DB on - * cache-line boundaries. - * - * Ingo Molnar reported a Pentium D (model 6) and a Xeon - * (model 2) with the same problem. - */ - if (c->x86 == 15) - if (msr_clear_bit(MSR_IA32_MISC_ENABLE, - MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0) - pr_info("kmemcheck: Disabling fast string operations\n"); -#endif - /* * If fast string is not enabled in IA32_MISC_ENABLE for any reason, * clear the fast string and enhanced fast string CPU capabilities. diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 52906808e277..27e9e90a8d35 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -29,8 +29,6 @@ obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o -obj-$(CONFIG_KMEMCHECK) += kmemcheck/ - KASAN_SANITIZE_kasan_init_$(BITS).o := n obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index fcf38a2a4e7e..82f5252c723a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -170,12 +170,11 @@ static void enable_global_pages(void) static void __init probe_page_size_mask(void) { /* - * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will - * use small pages. + * For pagealloc debugging, identity mapping will use small pages. * This will simplify cpa(), which otherwise needs to support splitting * large pages into small in interrupt context, etc. */ - if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled() && !IS_ENABLED(CONFIG_KMEMCHECK)) + if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled()) page_size_mask |= 1 << PG_LEVEL_2M; else direct_gbpages = 0; diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile deleted file mode 100644 index 520b3bce4095..000000000000 --- a/arch/x86/mm/kmemcheck/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-y := error.o kmemcheck.o opcode.o pte.o selftest.o shadow.o diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index 872ec4159a68..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c @@ -1,228 +1 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include - -#include "error.h" -#include "shadow.h" - -enum kmemcheck_error_type { - KMEMCHECK_ERROR_INVALID_ACCESS, - KMEMCHECK_ERROR_BUG, -}; - -#define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT) - -struct kmemcheck_error { - enum kmemcheck_error_type type; - - union { - /* KMEMCHECK_ERROR_INVALID_ACCESS */ - struct { - /* Kind of access that caused the error */ - enum kmemcheck_shadow state; - /* Address and size of the erroneous read */ - unsigned long address; - unsigned int size; - }; - }; - - struct pt_regs regs; - struct stack_trace trace; - unsigned long trace_entries[32]; - - /* We compress it to a char. */ - unsigned char shadow_copy[SHADOW_COPY_SIZE]; - unsigned char memory_copy[SHADOW_COPY_SIZE]; -}; - -/* - * Create a ring queue of errors to output. We can't call printk() directly - * from the kmemcheck traps, since this may call the console drivers and - * result in a recursive fault. - */ -static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE]; -static unsigned int error_count; -static unsigned int error_rd; -static unsigned int error_wr; -static unsigned int error_missed_count; - -static struct kmemcheck_error *error_next_wr(void) -{ - struct kmemcheck_error *e; - - if (error_count == ARRAY_SIZE(error_fifo)) { - ++error_missed_count; - return NULL; - } - - e = &error_fifo[error_wr]; - if (++error_wr == ARRAY_SIZE(error_fifo)) - error_wr = 0; - ++error_count; - return e; -} - -static struct kmemcheck_error *error_next_rd(void) -{ - struct kmemcheck_error *e; - - if (error_count == 0) - return NULL; - - e = &error_fifo[error_rd]; - if (++error_rd == ARRAY_SIZE(error_fifo)) - error_rd = 0; - --error_count; - return e; -} - -void kmemcheck_error_recall(void) -{ - static const char *desc[] = { - [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated", - [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized", - [KMEMCHECK_SHADOW_INITIALIZED] = "initialized", - [KMEMCHECK_SHADOW_FREED] = "freed", - }; - - static const char short_desc[] = { - [KMEMCHECK_SHADOW_UNALLOCATED] = 'a', - [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u', - [KMEMCHECK_SHADOW_INITIALIZED] = 'i', - [KMEMCHECK_SHADOW_FREED] = 'f', - }; - - struct kmemcheck_error *e; - unsigned int i; - - e = error_next_rd(); - if (!e) - return; - - switch (e->type) { - case KMEMCHECK_ERROR_INVALID_ACCESS: - printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n", - 8 * e->size, e->state < ARRAY_SIZE(desc) ? - desc[e->state] : "(invalid shadow state)", - (void *) e->address); - - printk(KERN_WARNING); - for (i = 0; i < SHADOW_COPY_SIZE; ++i) - printk(KERN_CONT "%02x", e->memory_copy[i]); - printk(KERN_CONT "\n"); - - printk(KERN_WARNING); - for (i = 0; i < SHADOW_COPY_SIZE; ++i) { - if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) - printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]); - else - printk(KERN_CONT " ?"); - } - printk(KERN_CONT "\n"); - printk(KERN_WARNING "%*c\n", 2 + 2 - * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); - break; - case KMEMCHECK_ERROR_BUG: - printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n"); - break; - } - - __show_regs(&e->regs, 1); - print_stack_trace(&e->trace, 0); -} - -static void do_wakeup(unsigned long data) -{ - while (error_count > 0) - kmemcheck_error_recall(); - - if (error_missed_count > 0) { - printk(KERN_WARNING "kmemcheck: Lost %d error reports because " - "the queue was too small\n", error_missed_count); - error_missed_count = 0; - } -} - -static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0); - -/* - * Save the context of an error report. - */ -void kmemcheck_error_save(enum kmemcheck_shadow state, - unsigned long address, unsigned int size, struct pt_regs *regs) -{ - static unsigned long prev_ip; - - struct kmemcheck_error *e; - void *shadow_copy; - void *memory_copy; - - /* Don't report several adjacent errors from the same EIP. */ - if (regs->ip == prev_ip) - return; - prev_ip = regs->ip; - - e = error_next_wr(); - if (!e) - return; - - e->type = KMEMCHECK_ERROR_INVALID_ACCESS; - - e->state = state; - e->address = address; - e->size = size; - - /* Save regs */ - memcpy(&e->regs, regs, sizeof(*regs)); - - /* Save stack trace */ - e->trace.nr_entries = 0; - e->trace.entries = e->trace_entries; - e->trace.max_entries = ARRAY_SIZE(e->trace_entries); - e->trace.skip = 0; - save_stack_trace_regs(regs, &e->trace); - - /* Round address down to nearest 16 bytes */ - shadow_copy = kmemcheck_shadow_lookup(address - & ~(SHADOW_COPY_SIZE - 1)); - BUG_ON(!shadow_copy); - - memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE); - - kmemcheck_show_addr(address); - memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1)); - memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE); - kmemcheck_hide_addr(address); - - tasklet_hi_schedule_first(&kmemcheck_tasklet); -} - -/* - * Save the context of a kmemcheck bug. - */ -void kmemcheck_error_save_bug(struct pt_regs *regs) -{ - struct kmemcheck_error *e; - - e = error_next_wr(); - if (!e) - return; - - e->type = KMEMCHECK_ERROR_BUG; - - memcpy(&e->regs, regs, sizeof(*regs)); - - e->trace.nr_entries = 0; - e->trace.entries = e->trace_entries; - e->trace.max_entries = ARRAY_SIZE(e->trace_entries); - e->trace.skip = 1; - save_stack_trace(&e->trace); - - tasklet_hi_schedule_first(&kmemcheck_tasklet); -} diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h index 39f80d7a874d..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/error.h +++ b/arch/x86/mm/kmemcheck/error.h @@ -1,16 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H -#define ARCH__X86__MM__KMEMCHECK__ERROR_H - -#include - -#include "shadow.h" - -void kmemcheck_error_save(enum kmemcheck_shadow state, - unsigned long address, unsigned int size, struct pt_regs *regs); - -void kmemcheck_error_save_bug(struct pt_regs *regs); - -void kmemcheck_error_recall(void); - -#endif diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c deleted file mode 100644 index 4515bae36bbe..000000000000 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ /dev/null @@ -1,658 +0,0 @@ -/** - * kmemcheck - a heavyweight memory checker for the linux kernel - * Copyright (C) 2007, 2008 Vegard Nossum - * (With a lot of help from Ingo Molnar and Pekka Enberg.) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2) as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "error.h" -#include "opcode.h" -#include "pte.h" -#include "selftest.h" -#include "shadow.h" - - -#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT -# define KMEMCHECK_ENABLED 0 -#endif - -#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT -# define KMEMCHECK_ENABLED 1 -#endif - -#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT -# define KMEMCHECK_ENABLED 2 -#endif - -int kmemcheck_enabled = KMEMCHECK_ENABLED; - -int __init kmemcheck_init(void) -{ -#ifdef CONFIG_SMP - /* - * Limit SMP to use a single CPU. We rely on the fact that this code - * runs before SMP is set up. - */ - if (setup_max_cpus > 1) { - printk(KERN_INFO - "kmemcheck: Limiting number of CPUs to 1.\n"); - setup_max_cpus = 1; - } -#endif - - if (!kmemcheck_selftest()) { - printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n"); - kmemcheck_enabled = 0; - return -EINVAL; - } - - printk(KERN_INFO "kmemcheck: Initialized\n"); - return 0; -} - -early_initcall(kmemcheck_init); - -/* - * We need to parse the kmemcheck= option before any memory is allocated. - */ -static int __init param_kmemcheck(char *str) -{ - int val; - int ret; - - if (!str) - return -EINVAL; - - ret = kstrtoint(str, 0, &val); - if (ret) - return ret; - kmemcheck_enabled = val; - return 0; -} - -early_param("kmemcheck", param_kmemcheck); - -int kmemcheck_show_addr(unsigned long address) -{ - pte_t *pte; - - pte = kmemcheck_pte_lookup(address); - if (!pte) - return 0; - - set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); - __flush_tlb_one(address); - return 1; -} - -int kmemcheck_hide_addr(unsigned long address) -{ - pte_t *pte; - - pte = kmemcheck_pte_lookup(address); - if (!pte) - return 0; - - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); - __flush_tlb_one(address); - return 1; -} - -struct kmemcheck_context { - bool busy; - int balance; - - /* - * There can be at most two memory operands to an instruction, but - * each address can cross a page boundary -- so we may need up to - * four addresses that must be hidden/revealed for each fault. - */ - unsigned long addr[4]; - unsigned long n_addrs; - unsigned long flags; - - /* Data size of the instruction that caused a fault. */ - unsigned int size; -}; - -static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); - -bool kmemcheck_active(struct pt_regs *regs) -{ - struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); - - return data->balance > 0; -} - -/* Save an address that needs to be shown/hidden */ -static void kmemcheck_save_addr(unsigned long addr) -{ - struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); - - BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); - data->addr[data->n_addrs++] = addr; -} - -static unsigned int kmemcheck_show_all(void) -{ - struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); - unsigned int i; - unsigned int n; - - n = 0; - for (i = 0; i < data->n_addrs; ++i) - n += kmemcheck_show_addr(data->addr[i]); - - return n; -} - -static unsigned int kmemcheck_hide_all(void) -{ - struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); - unsigned int i; - unsigned int n; - - n = 0; - for (i = 0; i < data->n_addrs; ++i) - n += kmemcheck_hide_addr(data->addr[i]); - - return n; -} - -/* - * Called from the #PF handler. - */ -void kmemcheck_show(struct pt_regs *regs) -{ - struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); - - BUG_ON(!irqs_disabled()); - - if (unlikely(data->balance != 0)) { - kmemcheck_show_all(); - kmemcheck_error_save_bug(regs); - data->balance = 0; - return; - } - - /* - * None of the addresses actually belonged to kmemcheck. Note that - * this is not an error. - */ - if (kmemcheck_show_all() == 0) - return; - - ++data->balance; - - /* - * The IF needs to be cleared as well, so that the faulting - * instruction can run "uninterrupted". Otherwise, we might take - * an interrupt and start executing that before we've had a chance - * to hide the page again. - * - * NOTE: In the rare case of multiple faults, we must not override - * the original flags: - */ - if (!(regs->flags & X86_EFLAGS_TF)) - data->flags = regs->flags; - - regs->flags |= X86_EFLAGS_TF; - regs->flags &= ~X86_EFLAGS_IF; -} - -/* - * Called from the #DB handler. - */ -void kmemcheck_hide(struct pt_regs *regs) -{ - struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); - int n; - - BUG_ON(!irqs_disabled()); - - if (unlikely(data->balance != 1)) { - kmemcheck_show_all(); - kmemcheck_error_save_bug(regs); - data->n_addrs = 0; - data->balance = 0; - - if (!(data->flags & X86_EFLAGS_TF)) - regs->flags &= ~X86_EFLAGS_TF; - if (data->flags & X86_EFLAGS_IF) - regs->flags |= X86_EFLAGS_IF; - return; - } - - if (kmemcheck_enabled) - n = kmemcheck_hide_all(); - else - n = kmemcheck_show_all(); - - if (n == 0) - return; - - --data->balance; - - data->n_addrs = 0; - - if (!(data->flags & X86_EFLAGS_TF)) - regs->flags &= ~X86_EFLAGS_TF; - if (data->flags & X86_EFLAGS_IF) - regs->flags |= X86_EFLAGS_IF; -} - -void kmemcheck_show_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) { - unsigned long address; - pte_t *pte; - unsigned int level; - - address = (unsigned long) page_address(&p[i]); - pte = lookup_address(address, &level); - BUG_ON(!pte); - BUG_ON(level != PG_LEVEL_4K); - - set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN)); - __flush_tlb_one(address); - } -} - -bool kmemcheck_page_is_tracked(struct page *p) -{ - /* This will also check the "hidden" flag of the PTE. */ - return kmemcheck_pte_lookup((unsigned long) page_address(p)); -} - -void kmemcheck_hide_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) { - unsigned long address; - pte_t *pte; - unsigned int level; - - address = (unsigned long) page_address(&p[i]); - pte = lookup_address(address, &level); - BUG_ON(!pte); - BUG_ON(level != PG_LEVEL_4K); - - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); - set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN)); - __flush_tlb_one(address); - } -} - -/* Access may NOT cross page boundary */ -static void kmemcheck_read_strict(struct pt_regs *regs, - unsigned long addr, unsigned int size) -{ - void *shadow; - enum kmemcheck_shadow status; - - shadow = kmemcheck_shadow_lookup(addr); - if (!shadow) - return; - - kmemcheck_save_addr(addr); - status = kmemcheck_shadow_test(shadow, size); - if (status == KMEMCHECK_SHADOW_INITIALIZED) - return; - - if (kmemcheck_enabled) - kmemcheck_error_save(status, addr, size, regs); - - if (kmemcheck_enabled == 2) - kmemcheck_enabled = 0; - - /* Don't warn about it again. */ - kmemcheck_shadow_set(shadow, size); -} - -bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) -{ - enum kmemcheck_shadow status; - void *shadow; - - shadow = kmemcheck_shadow_lookup(addr); - if (!shadow) - return true; - - status = kmemcheck_shadow_test_all(shadow, size); - - return status == KMEMCHECK_SHADOW_INITIALIZED; -} - -/* Access may cross page boundary */ -static void kmemcheck_read(struct pt_regs *regs, - unsigned long addr, unsigned int size) -{ - unsigned long page = addr & PAGE_MASK; - unsigned long next_addr = addr + size - 1; - unsigned long next_page = next_addr & PAGE_MASK; - - if (likely(page == next_page)) { - kmemcheck_read_strict(regs, addr, size); - return; - } - - /* - * What we do is basically to split the access across the - * two pages and handle each part separately. Yes, this means - * that we may now see reads that are 3 + 5 bytes, for - * example (and if both are uninitialized, there will be two - * reports), but it makes the code a lot simpler. - */ - kmemcheck_read_strict(regs, addr, next_page - addr); - kmemcheck_read_strict(regs, next_page, next_addr - next_page); -} - -static void kmemcheck_write_strict(struct pt_regs *regs, - unsigned long addr, unsigned int size) -{ - void *shadow; - - shadow = kmemcheck_shadow_lookup(addr); - if (!shadow) - return; - - kmemcheck_save_addr(addr); - kmemcheck_shadow_set(shadow, size); -} - -static void kmemcheck_write(struct pt_regs *regs, - unsigned long addr, unsigned int size) -{ - unsigned long page = addr & PAGE_MASK; - unsigned long next_addr = addr + size - 1; - unsigned long next_page = next_addr & PAGE_MASK; - - if (likely(page == next_page)) { - kmemcheck_write_strict(regs, addr, size); - return; - } - - /* See comment in kmemcheck_read(). */ - kmemcheck_write_strict(regs, addr, next_page - addr); - kmemcheck_write_strict(regs, next_page, next_addr - next_page); -} - -/* - * Copying is hard. We have two addresses, each of which may be split across - * a page (and each page will have different shadow addresses). - */ -static void kmemcheck_copy(struct pt_regs *regs, - unsigned long src_addr, unsigned long dst_addr, unsigned int size) -{ - uint8_t shadow[8]; - enum kmemcheck_shadow status; - - unsigned long page; - unsigned long next_addr; - unsigned long next_page; - - uint8_t *x; - unsigned int i; - unsigned int n; - - BUG_ON(size > sizeof(shadow)); - - page = src_addr & PAGE_MASK; - next_addr = src_addr + size - 1; - next_page = next_addr & PAGE_MASK; - - if (likely(page == next_page)) { - /* Same page */ - x = kmemcheck_shadow_lookup(src_addr); - if (x) { - kmemcheck_save_addr(src_addr); - for (i = 0; i < size; ++i) - shadow[i] = x[i]; - } else { - for (i = 0; i < size; ++i) - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } else { - n = next_page - src_addr; - BUG_ON(n > sizeof(shadow)); - - /* First page */ - x = kmemcheck_shadow_lookup(src_addr); - if (x) { - kmemcheck_save_addr(src_addr); - for (i = 0; i < n; ++i) - shadow[i] = x[i]; - } else { - /* Not tracked */ - for (i = 0; i < n; ++i) - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - - /* Second page */ - x = kmemcheck_shadow_lookup(next_page); - if (x) { - kmemcheck_save_addr(next_page); - for (i = n; i < size; ++i) - shadow[i] = x[i - n]; - } else { - /* Not tracked */ - for (i = n; i < size; ++i) - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } - - page = dst_addr & PAGE_MASK; - next_addr = dst_addr + size - 1; - next_page = next_addr & PAGE_MASK; - - if (likely(page == next_page)) { - /* Same page */ - x = kmemcheck_shadow_lookup(dst_addr); - if (x) { - kmemcheck_save_addr(dst_addr); - for (i = 0; i < size; ++i) { - x[i] = shadow[i]; - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } - } else { - n = next_page - dst_addr; - BUG_ON(n > sizeof(shadow)); - - /* First page */ - x = kmemcheck_shadow_lookup(dst_addr); - if (x) { - kmemcheck_save_addr(dst_addr); - for (i = 0; i < n; ++i) { - x[i] = shadow[i]; - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } - - /* Second page */ - x = kmemcheck_shadow_lookup(next_page); - if (x) { - kmemcheck_save_addr(next_page); - for (i = n; i < size; ++i) { - x[i - n] = shadow[i]; - shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; - } - } - } - - status = kmemcheck_shadow_test(shadow, size); - if (status == KMEMCHECK_SHADOW_INITIALIZED) - return; - - if (kmemcheck_enabled) - kmemcheck_error_save(status, src_addr, size, regs); - - if (kmemcheck_enabled == 2) - kmemcheck_enabled = 0; -} - -enum kmemcheck_method { - KMEMCHECK_READ, - KMEMCHECK_WRITE, -}; - -static void kmemcheck_access(struct pt_regs *regs, - unsigned long fallback_address, enum kmemcheck_method fallback_method) -{ - const uint8_t *insn; - const uint8_t *insn_primary; - unsigned int size; - - struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); - - /* Recursive fault -- ouch. */ - if (data->busy) { - kmemcheck_show_addr(fallback_address); - kmemcheck_error_save_bug(regs); - return; - } - - data->busy = true; - - insn = (const uint8_t *) regs->ip; - insn_primary = kmemcheck_opcode_get_primary(insn); - - kmemcheck_opcode_decode(insn, &size); - - switch (insn_primary[0]) { -#ifdef CONFIG_KMEMCHECK_BITOPS_OK - /* AND, OR, XOR */ - /* - * Unfortunately, these instructions have to be excluded from - * our regular checking since they access only some (and not - * all) bits. This clears out "bogus" bitfield-access warnings. - */ - case 0x80: - case 0x81: - case 0x82: - case 0x83: - switch ((insn_primary[1] >> 3) & 7) { - /* OR */ - case 1: - /* AND */ - case 4: - /* XOR */ - case 6: - kmemcheck_write(regs, fallback_address, size); - goto out; - - /* ADD */ - case 0: - /* ADC */ - case 2: - /* SBB */ - case 3: - /* SUB */ - case 5: - /* CMP */ - case 7: - break; - } - break; -#endif - - /* MOVS, MOVSB, MOVSW, MOVSD */ - case 0xa4: - case 0xa5: - /* - * These instructions are special because they take two - * addresses, but we only get one page fault. - */ - kmemcheck_copy(regs, regs->si, regs->di, size); - goto out; - - /* CMPS, CMPSB, CMPSW, CMPSD */ - case 0xa6: - case 0xa7: - kmemcheck_read(regs, regs->si, size); - kmemcheck_read(regs, regs->di, size); - goto out; - } - - /* - * If the opcode isn't special in any way, we use the data from the - * page fault handler to determine the address and type of memory - * access. - */ - switch (fallback_method) { - case KMEMCHECK_READ: - kmemcheck_read(regs, fallback_address, size); - goto out; - case KMEMCHECK_WRITE: - kmemcheck_write(regs, fallback_address, size); - goto out; - } - -out: - data->busy = false; -} - -bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code) -{ - pte_t *pte; - - /* - * XXX: Is it safe to assume that memory accesses from virtual 86 - * mode or non-kernel code segments will _never_ access kernel - * memory (e.g. tracked pages)? For now, we need this to avoid - * invoking kmemcheck for PnP BIOS calls. - */ - if (regs->flags & X86_VM_MASK) - return false; - if (regs->cs != __KERNEL_CS) - return false; - - pte = kmemcheck_pte_lookup(address); - if (!pte) - return false; - - WARN_ON_ONCE(in_nmi()); - - if (error_code & 2) - kmemcheck_access(regs, address, KMEMCHECK_WRITE); - else - kmemcheck_access(regs, address, KMEMCHECK_READ); - - kmemcheck_show(regs); - return true; -} - -bool kmemcheck_trap(struct pt_regs *regs) -{ - if (!kmemcheck_active(regs)) - return false; - - /* We're done. */ - kmemcheck_hide(regs); - return true; -} diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c index df8109ddf7fe..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/opcode.c +++ b/arch/x86/mm/kmemcheck/opcode.c @@ -1,107 +1 @@ // SPDX-License-Identifier: GPL-2.0 -#include - -#include "opcode.h" - -static bool opcode_is_prefix(uint8_t b) -{ - return - /* Group 1 */ - b == 0xf0 || b == 0xf2 || b == 0xf3 - /* Group 2 */ - || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 - || b == 0x64 || b == 0x65 - /* Group 3 */ - || b == 0x66 - /* Group 4 */ - || b == 0x67; -} - -#ifdef CONFIG_X86_64 -static bool opcode_is_rex_prefix(uint8_t b) -{ - return (b & 0xf0) == 0x40; -} -#else -static bool opcode_is_rex_prefix(uint8_t b) -{ - return false; -} -#endif - -#define REX_W (1 << 3) - -/* - * This is a VERY crude opcode decoder. We only need to find the size of the - * load/store that caused our #PF and this should work for all the opcodes - * that we care about. Moreover, the ones who invented this instruction set - * should be shot. - */ -void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size) -{ - /* Default operand size */ - int operand_size_override = 4; - - /* prefixes */ - for (; opcode_is_prefix(*op); ++op) { - if (*op == 0x66) - operand_size_override = 2; - } - - /* REX prefix */ - if (opcode_is_rex_prefix(*op)) { - uint8_t rex = *op; - - ++op; - if (rex & REX_W) { - switch (*op) { - case 0x63: - *size = 4; - return; - case 0x0f: - ++op; - - switch (*op) { - case 0xb6: - case 0xbe: - *size = 1; - return; - case 0xb7: - case 0xbf: - *size = 2; - return; - } - - break; - } - - *size = 8; - return; - } - } - - /* escape opcode */ - if (*op == 0x0f) { - ++op; - - /* - * This is move with zero-extend and sign-extend, respectively; - * we don't have to think about 0xb6/0xbe, because this is - * already handled in the conditional below. - */ - if (*op == 0xb7 || *op == 0xbf) - operand_size_override = 2; - } - - *size = (*op & 1) ? operand_size_override : 1; -} - -const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op) -{ - /* skip prefixes */ - while (opcode_is_prefix(*op)) - ++op; - if (opcode_is_rex_prefix(*op)) - ++op; - return op; -} diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h index 51a1ce94c24a..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/opcode.h +++ b/arch/x86/mm/kmemcheck/opcode.h @@ -1,10 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H -#define ARCH__X86__MM__KMEMCHECK__OPCODE_H - -#include - -void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size); -const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op); - -#endif diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c index 8a03be90272a..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/pte.c +++ b/arch/x86/mm/kmemcheck/pte.c @@ -1,23 +1 @@ // SPDX-License-Identifier: GPL-2.0 -#include - -#include - -#include "pte.h" - -pte_t *kmemcheck_pte_lookup(unsigned long address) -{ - pte_t *pte; - unsigned int level; - - pte = lookup_address(address, &level); - if (!pte) - return NULL; - if (level != PG_LEVEL_4K) - return NULL; - if (!pte_hidden(*pte)) - return NULL; - - return pte; -} - diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h index b595612382c2..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/pte.h +++ b/arch/x86/mm/kmemcheck/pte.h @@ -1,11 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ARCH__X86__MM__KMEMCHECK__PTE_H -#define ARCH__X86__MM__KMEMCHECK__PTE_H - -#include - -#include - -pte_t *kmemcheck_pte_lookup(unsigned long address); - -#endif diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c index 7ce0be1f99eb..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/selftest.c +++ b/arch/x86/mm/kmemcheck/selftest.c @@ -1,71 +1 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include - -#include "opcode.h" -#include "selftest.h" - -struct selftest_opcode { - unsigned int expected_size; - const uint8_t *insn; - const char *desc; -}; - -static const struct selftest_opcode selftest_opcodes[] = { - /* REP MOVS */ - {1, "\xf3\xa4", "rep movsb , "}, - {4, "\xf3\xa5", "rep movsl , "}, - - /* MOVZX / MOVZXD */ - {1, "\x66\x0f\xb6\x51\xf8", "movzwq , "}, - {1, "\x0f\xb6\x51\xf8", "movzwq , "}, - - /* MOVSX / MOVSXD */ - {1, "\x66\x0f\xbe\x51\xf8", "movswq , "}, - {1, "\x0f\xbe\x51\xf8", "movswq , "}, - -#ifdef CONFIG_X86_64 - /* MOVZX / MOVZXD */ - {1, "\x49\x0f\xb6\x51\xf8", "movzbq , "}, - {2, "\x49\x0f\xb7\x51\xf8", "movzbq , "}, - - /* MOVSX / MOVSXD */ - {1, "\x49\x0f\xbe\x51\xf8", "movsbq , "}, - {2, "\x49\x0f\xbf\x51\xf8", "movsbq , "}, - {4, "\x49\x63\x51\xf8", "movslq , "}, -#endif -}; - -static bool selftest_opcode_one(const struct selftest_opcode *op) -{ - unsigned size; - - kmemcheck_opcode_decode(op->insn, &size); - - if (size == op->expected_size) - return true; - - printk(KERN_WARNING "kmemcheck: opcode %s: expected size %d, got %d\n", - op->desc, op->expected_size, size); - return false; -} - -static bool selftest_opcodes_all(void) -{ - bool pass = true; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(selftest_opcodes); ++i) - pass = pass && selftest_opcode_one(&selftest_opcodes[i]); - - return pass; -} - -bool kmemcheck_selftest(void) -{ - bool pass = true; - - pass = pass && selftest_opcodes_all(); - - return pass; -} diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h index 8d759aae453d..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/selftest.h +++ b/arch/x86/mm/kmemcheck/selftest.h @@ -1,7 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ARCH_X86_MM_KMEMCHECK_SELFTEST_H -#define ARCH_X86_MM_KMEMCHECK_SELFTEST_H - -bool kmemcheck_selftest(void); - -#endif diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c deleted file mode 100644 index c2638a7d2c10..000000000000 --- a/arch/x86/mm/kmemcheck/shadow.c +++ /dev/null @@ -1,173 +0,0 @@ -#include -#include -#include - -#include -#include - -#include "pte.h" -#include "shadow.h" - -/* - * Return the shadow address for the given address. Returns NULL if the - * address is not tracked. - * - * We need to be extremely careful not to follow any invalid pointers, - * because this function can be called for *any* possible address. - */ -void *kmemcheck_shadow_lookup(unsigned long address) -{ - pte_t *pte; - struct page *page; - - if (!virt_addr_valid(address)) - return NULL; - - pte = kmemcheck_pte_lookup(address); - if (!pte) - return NULL; - - page = virt_to_page(address); - if (!page->shadow) - return NULL; - return page->shadow + (address & (PAGE_SIZE - 1)); -} - -static void mark_shadow(void *address, unsigned int n, - enum kmemcheck_shadow status) -{ - unsigned long addr = (unsigned long) address; - unsigned long last_addr = addr + n - 1; - unsigned long page = addr & PAGE_MASK; - unsigned long last_page = last_addr & PAGE_MASK; - unsigned int first_n; - void *shadow; - - /* If the memory range crosses a page boundary, stop there. */ - if (page == last_page) - first_n = n; - else - first_n = page + PAGE_SIZE - addr; - - shadow = kmemcheck_shadow_lookup(addr); - if (shadow) - memset(shadow, status, first_n); - - addr += first_n; - n -= first_n; - - /* Do full-page memset()s. */ - while (n >= PAGE_SIZE) { - shadow = kmemcheck_shadow_lookup(addr); - if (shadow) - memset(shadow, status, PAGE_SIZE); - - addr += PAGE_SIZE; - n -= PAGE_SIZE; - } - - /* Do the remaining page, if any. */ - if (n > 0) { - shadow = kmemcheck_shadow_lookup(addr); - if (shadow) - memset(shadow, status, n); - } -} - -void kmemcheck_mark_unallocated(void *address, unsigned int n) -{ - mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED); -} - -void kmemcheck_mark_uninitialized(void *address, unsigned int n) -{ - mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED); -} - -/* - * Fill the shadow memory of the given address such that the memory at that - * address is marked as being initialized. - */ -void kmemcheck_mark_initialized(void *address, unsigned int n) -{ - mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED); -} -EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized); - -void kmemcheck_mark_freed(void *address, unsigned int n) -{ - mark_shadow(address, n, KMEMCHECK_SHADOW_FREED); -} - -void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) - kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE); -} - -void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) - kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE); -} - -void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; ++i) - kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE); -} - -enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) -{ -#ifdef CONFIG_KMEMCHECK_PARTIAL_OK - uint8_t *x; - unsigned int i; - - x = shadow; - - /* - * Make sure _some_ bytes are initialized. Gcc frequently generates - * code to access neighboring bytes. - */ - for (i = 0; i < size; ++i) { - if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) - return x[i]; - } - - return x[0]; -#else - return kmemcheck_shadow_test_all(shadow, size); -#endif -} - -enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size) -{ - uint8_t *x; - unsigned int i; - - x = shadow; - - /* All bytes must be initialized. */ - for (i = 0; i < size; ++i) { - if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) - return x[i]; - } - - return x[0]; -} - -void kmemcheck_shadow_set(void *shadow, unsigned int size) -{ - uint8_t *x; - unsigned int i; - - x = shadow; - for (i = 0; i < size; ++i) - x[i] = KMEMCHECK_SHADOW_INITIALIZED; -} diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h index 49768dc18664..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/shadow.h +++ b/arch/x86/mm/kmemcheck/shadow.h @@ -1,19 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H -#define ARCH__X86__MM__KMEMCHECK__SHADOW_H - -enum kmemcheck_shadow { - KMEMCHECK_SHADOW_UNALLOCATED, - KMEMCHECK_SHADOW_UNINITIALIZED, - KMEMCHECK_SHADOW_INITIALIZED, - KMEMCHECK_SHADOW_FREED, -}; - -void *kmemcheck_shadow_lookup(unsigned long address); - -enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); -enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, - unsigned int size); -void kmemcheck_shadow_set(void *shadow, unsigned int size); - -#endif diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index baeb872283d9..69c238210325 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -594,21 +594,6 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t) __tasklet_hi_schedule(t); } -extern void __tasklet_hi_schedule_first(struct tasklet_struct *t); - -/* - * This version avoids touching any other tasklets. Needed for kmemcheck - * in order not to take any page faults while enqueueing this tasklet; - * consider VERY carefully whether you really need this or - * tasklet_hi_schedule()... - */ -static inline void tasklet_hi_schedule_first(struct tasklet_struct *t) -{ - if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) - __tasklet_hi_schedule_first(t); -} - - static inline void tasklet_disable_nosync(struct tasklet_struct *t) { atomic_inc(&t->count); diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index 7b1d7bead7d9..ea32a7d3cf1b 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h @@ -1,172 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef LINUX_KMEMCHECK_H -#define LINUX_KMEMCHECK_H - -#include -#include - -#ifdef CONFIG_KMEMCHECK -extern int kmemcheck_enabled; - -/* The slab-related functions. */ -void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node); -void kmemcheck_free_shadow(struct page *page, int order); -void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, - size_t size); -void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size); - -void kmemcheck_pagealloc_alloc(struct page *p, unsigned int order, - gfp_t gfpflags); - -void kmemcheck_show_pages(struct page *p, unsigned int n); -void kmemcheck_hide_pages(struct page *p, unsigned int n); - -bool kmemcheck_page_is_tracked(struct page *p); - -void kmemcheck_mark_unallocated(void *address, unsigned int n); -void kmemcheck_mark_uninitialized(void *address, unsigned int n); -void kmemcheck_mark_initialized(void *address, unsigned int n); -void kmemcheck_mark_freed(void *address, unsigned int n); - -void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n); -void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n); -void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n); - -int kmemcheck_show_addr(unsigned long address); -int kmemcheck_hide_addr(unsigned long address); - -bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size); - -/* - * Bitfield annotations - * - * How to use: If you have a struct using bitfields, for example - * - * struct a { - * int x:8, y:8; - * }; - * - * then this should be rewritten as - * - * struct a { - * kmemcheck_bitfield_begin(flags); - * int x:8, y:8; - * kmemcheck_bitfield_end(flags); - * }; - * - * Now the "flags_begin" and "flags_end" members may be used to refer to the - * beginning and end, respectively, of the bitfield (and things like - * &x.flags_begin is allowed). As soon as the struct is allocated, the bit- - * fields should be annotated: - * - * struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL); - * kmemcheck_annotate_bitfield(a, flags); - */ -#define kmemcheck_bitfield_begin(name) \ - int name##_begin[0]; - -#define kmemcheck_bitfield_end(name) \ - int name##_end[0]; - -#define kmemcheck_annotate_bitfield(ptr, name) \ - do { \ - int _n; \ - \ - if (!ptr) \ - break; \ - \ - _n = (long) &((ptr)->name##_end) \ - - (long) &((ptr)->name##_begin); \ - BUILD_BUG_ON(_n < 0); \ - \ - kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ - } while (0) - -#define kmemcheck_annotate_variable(var) \ - do { \ - kmemcheck_mark_initialized(&(var), sizeof(var)); \ - } while (0) \ - -#else -#define kmemcheck_enabled 0 - -static inline void -kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node) -{ -} - -static inline void -kmemcheck_free_shadow(struct page *page, int order) -{ -} - -static inline void -kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, - size_t size) -{ -} - -static inline void kmemcheck_slab_free(struct kmem_cache *s, void *object, - size_t size) -{ -} - -static inline void kmemcheck_pagealloc_alloc(struct page *p, - unsigned int order, gfp_t gfpflags) -{ -} - -static inline bool kmemcheck_page_is_tracked(struct page *p) -{ - return false; -} - -static inline void kmemcheck_mark_unallocated(void *address, unsigned int n) -{ -} - -static inline void kmemcheck_mark_uninitialized(void *address, unsigned int n) -{ -} - -static inline void kmemcheck_mark_initialized(void *address, unsigned int n) -{ -} - -static inline void kmemcheck_mark_freed(void *address, unsigned int n) -{ -} - -static inline void kmemcheck_mark_unallocated_pages(struct page *p, - unsigned int n) -{ -} - -static inline void kmemcheck_mark_uninitialized_pages(struct page *p, - unsigned int n) -{ -} - -static inline void kmemcheck_mark_initialized_pages(struct page *p, - unsigned int n) -{ -} - -static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) -{ - return true; -} - -#define kmemcheck_bitfield_begin(name) -#define kmemcheck_bitfield_end(name) -#define kmemcheck_annotate_bitfield(ptr, name) \ - do { \ - } while (0) - -#define kmemcheck_annotate_variable(var) \ - do { \ - } while (0) - -#endif /* CONFIG_KMEMCHECK */ - -#endif /* LINUX_KMEMCHECK_H */ diff --git a/kernel/softirq.c b/kernel/softirq.c index 4e09821f9d9e..e89c3b0cff6d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -486,16 +486,6 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) } EXPORT_SYMBOL(__tasklet_hi_schedule); -void __tasklet_hi_schedule_first(struct tasklet_struct *t) -{ - BUG_ON(!irqs_disabled()); - - t->next = __this_cpu_read(tasklet_hi_vec.head); - __this_cpu_write(tasklet_hi_vec.head, t); - __raise_softirq_irqoff(HI_SOFTIRQ); -} -EXPORT_SYMBOL(__tasklet_hi_schedule_first); - static __latent_entropy void tasklet_action(struct softirq_action *a) { struct tasklet_struct *list; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 56aca862c4f5..069550540a39 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -1173,15 +1172,6 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one_thousand, }, -#endif -#ifdef CONFIG_KMEMCHECK - { - .procname = "kmemcheck", - .data = &kmemcheck_enabled, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #endif { .procname = "panic_on_warn", diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 00cb02daeddd..62d0e25c054c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -504,7 +504,7 @@ config DEBUG_OBJECTS_ENABLE_DEFAULT config DEBUG_SLAB bool "Debug slab memory allocations" - depends on DEBUG_KERNEL && SLAB && !KMEMCHECK + depends on DEBUG_KERNEL && SLAB help Say Y here to have the kernel do limited verification on memory allocation as well as poisoning memory on free to catch use of freed @@ -516,7 +516,7 @@ config DEBUG_SLAB_LEAK config SLUB_DEBUG_ON bool "SLUB debugging on by default" - depends on SLUB && SLUB_DEBUG && !KMEMCHECK + depends on SLUB && SLUB_DEBUG default n help Boot with debugging on by default. SLUB boots by default with @@ -730,8 +730,6 @@ config DEBUG_STACKOVERFLOW If in doubt, say "N". -source "lib/Kconfig.kmemcheck" - source "lib/Kconfig.kasan" endmenu # "Memory Debugging" diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck deleted file mode 100644 index 846e039a86b4..000000000000 --- a/lib/Kconfig.kmemcheck +++ /dev/null @@ -1,94 +0,0 @@ -config HAVE_ARCH_KMEMCHECK - bool - -if HAVE_ARCH_KMEMCHECK - -menuconfig KMEMCHECK - bool "kmemcheck: trap use of uninitialized memory" - depends on DEBUG_KERNEL - depends on !X86_USE_3DNOW - depends on SLUB || SLAB - depends on !CC_OPTIMIZE_FOR_SIZE - depends on !FUNCTION_TRACER - select FRAME_POINTER - select STACKTRACE - default n - help - This option enables tracing of dynamically allocated kernel memory - to see if memory is used before it has been given an initial value. - Be aware that this requires half of your memory for bookkeeping and - will insert extra code at *every* read and write to tracked memory - thus slow down the kernel code (but user code is unaffected). - - The kernel may be started with kmemcheck=0 or kmemcheck=1 to disable - or enable kmemcheck at boot-time. If the kernel is started with - kmemcheck=0, the large memory and CPU overhead is not incurred. - -choice - prompt "kmemcheck: default mode at boot" - depends on KMEMCHECK - default KMEMCHECK_ONESHOT_BY_DEFAULT - help - This option controls the default behaviour of kmemcheck when the - kernel boots and no kmemcheck= parameter is given. - -config KMEMCHECK_DISABLED_BY_DEFAULT - bool "disabled" - depends on KMEMCHECK - -config KMEMCHECK_ENABLED_BY_DEFAULT - bool "enabled" - depends on KMEMCHECK - -config KMEMCHECK_ONESHOT_BY_DEFAULT - bool "one-shot" - depends on KMEMCHECK - help - In one-shot mode, only the first error detected is reported before - kmemcheck is disabled. - -endchoice - -config KMEMCHECK_QUEUE_SIZE - int "kmemcheck: error queue size" - depends on KMEMCHECK - default 64 - help - Select the maximum number of errors to store in the queue. Since - errors can occur virtually anywhere and in any context, we need a - temporary storage area which is guarantueed not to generate any - other faults. The queue will be emptied as soon as a tasklet may - be scheduled. If the queue is full, new error reports will be - lost. - -config KMEMCHECK_SHADOW_COPY_SHIFT - int "kmemcheck: shadow copy size (5 => 32 bytes, 6 => 64 bytes)" - depends on KMEMCHECK - range 2 8 - default 5 - help - Select the number of shadow bytes to save along with each entry of - the queue. These bytes indicate what parts of an allocation are - initialized, uninitialized, etc. and will be displayed when an - error is detected to help the debugging of a particular problem. - -config KMEMCHECK_PARTIAL_OK - bool "kmemcheck: allow partially uninitialized memory" - depends on KMEMCHECK - default y - help - This option works around certain GCC optimizations that produce - 32-bit reads from 16-bit variables where the upper 16 bits are - thrown away afterwards. This may of course also hide some real - bugs. - -config KMEMCHECK_BITOPS_OK - bool "kmemcheck: allow bit-field manipulation" - depends on KMEMCHECK - default n - help - This option silences warnings that would be generated for bit-field - accesses where not all the bits are initialized at the same time. - This may also hide some real bugs. - -endif diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 5b0adf1435de..e5e606ee5f71 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -11,7 +11,6 @@ config DEBUG_PAGEALLOC bool "Debug page memory allocations" depends on DEBUG_KERNEL depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC - depends on !KMEMCHECK select PAGE_EXTENSION select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC ---help--- diff --git a/mm/Makefile b/mm/Makefile index 4659b93cba43..e7ebd176fb93 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -17,7 +17,6 @@ KCOV_INSTRUMENT_slub.o := n KCOV_INSTRUMENT_page_alloc.o := n KCOV_INSTRUMENT_debug-pagealloc.o := n KCOV_INSTRUMENT_kmemleak.o := n -KCOV_INSTRUMENT_kmemcheck.o := n KCOV_INSTRUMENT_memcontrol.o := n KCOV_INSTRUMENT_mmzone.o := n KCOV_INSTRUMENT_vmstat.o := n @@ -70,7 +69,6 @@ obj-$(CONFIG_KSM) += ksm.o obj-$(CONFIG_PAGE_POISONING) += page_poison.o obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_SLUB) += slub.o -obj-$(CONFIG_KMEMCHECK) += kmemcheck.o obj-$(CONFIG_KASAN) += kasan/ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c index b3a4d61d341c..cec594032515 100644 --- a/mm/kmemcheck.c +++ b/mm/kmemcheck.c @@ -1,126 +1 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include "slab.h" -#include - -void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node) -{ - struct page *shadow; - int pages; - int i; - - pages = 1 << order; - - /* - * With kmemcheck enabled, we need to allocate a memory area for the - * shadow bits as well. - */ - shadow = alloc_pages_node(node, flags, order); - if (!shadow) { - if (printk_ratelimit()) - pr_err("kmemcheck: failed to allocate shadow bitmap\n"); - return; - } - - for(i = 0; i < pages; ++i) - page[i].shadow = page_address(&shadow[i]); - - /* - * Mark it as non-present for the MMU so that our accesses to - * this memory will trigger a page fault and let us analyze - * the memory accesses. - */ - kmemcheck_hide_pages(page, pages); -} - -void kmemcheck_free_shadow(struct page *page, int order) -{ - struct page *shadow; - int pages; - int i; - - if (!kmemcheck_page_is_tracked(page)) - return; - - pages = 1 << order; - - kmemcheck_show_pages(page, pages); - - shadow = virt_to_page(page[0].shadow); - - for(i = 0; i < pages; ++i) - page[i].shadow = NULL; - - __free_pages(shadow, order); -} - -void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, - size_t size) -{ - if (unlikely(!object)) /* Skip object if allocation failed */ - return; - - /* - * Has already been memset(), which initializes the shadow for us - * as well. - */ - if (gfpflags & __GFP_ZERO) - return; - - /* No need to initialize the shadow of a non-tracked slab. */ - if (s->flags & SLAB_NOTRACK) - return; - - if (!kmemcheck_enabled || gfpflags & __GFP_NOTRACK) { - /* - * Allow notracked objects to be allocated from - * tracked caches. Note however that these objects - * will still get page faults on access, they just - * won't ever be flagged as uninitialized. If page - * faults are not acceptable, the slab cache itself - * should be marked NOTRACK. - */ - kmemcheck_mark_initialized(object, size); - } else if (!s->ctor) { - /* - * New objects should be marked uninitialized before - * they're returned to the called. - */ - kmemcheck_mark_uninitialized(object, size); - } -} - -void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size) -{ - /* TODO: RCU freeing is unsupported for now; hide false positives. */ - if (!s->ctor && !(s->flags & SLAB_TYPESAFE_BY_RCU)) - kmemcheck_mark_freed(object, size); -} - -void kmemcheck_pagealloc_alloc(struct page *page, unsigned int order, - gfp_t gfpflags) -{ - int pages; - - if (gfpflags & (__GFP_HIGHMEM | __GFP_NOTRACK)) - return; - - pages = 1 << order; - - /* - * NOTE: We choose to track GFP_ZERO pages too; in fact, they - * can become uninitialized by copying uninitialized memory - * into them. - */ - - /* XXX: Can use zone->node for node? */ - kmemcheck_alloc_shadow(page, order, gfpflags, -1); - - if (gfpflags & __GFP_ZERO) - kmemcheck_mark_initialized_pages(page, pages); - else - kmemcheck_mark_uninitialized_pages(page, pages); -} diff --git a/mm/slub.c b/mm/slub.c index 38acb5c114ed..41c01690d116 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1369,7 +1369,7 @@ static inline void *slab_free_hook(struct kmem_cache *s, void *x) * So in order to make the debug calls that expect irqs to be * disabled we need to disable interrupts temporarily. */ -#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP) +#ifdef CONFIG_LOCKDEP { unsigned long flags; @@ -1397,8 +1397,7 @@ static inline void slab_free_freelist_hook(struct kmem_cache *s, * Compiler cannot detect this function can be removed if slab_free_hook() * evaluates to nothing. Thus, catch all relevant config debug options here. */ -#if defined(CONFIG_KMEMCHECK) || \ - defined(CONFIG_LOCKDEP) || \ +#if defined(CONFIG_LOCKDEP) || \ defined(CONFIG_DEBUG_KMEMLEAK) || \ defined(CONFIG_DEBUG_OBJECTS_FREE) || \ defined(CONFIG_KASAN) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 9d3eafea58f0..8323ff9dec71 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -2182,8 +2182,6 @@ sub dump_struct($$) { # strip comments: $members =~ s/\/\*.*?\*\///gos; $nested =~ s/\/\*.*?\*\///gos; - # strip kmemcheck_bitfield_{begin,end}.*; - $members =~ s/kmemcheck_bitfield_.*?;//gos; # strip attributes $members =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; $members =~ s/__aligned\s*\([^;]*\)//gos; diff --git a/tools/include/linux/kmemcheck.h b/tools/include/linux/kmemcheck.h index 2bccd2c7b897..ea32a7d3cf1b 100644 --- a/tools/include/linux/kmemcheck.h +++ b/tools/include/linux/kmemcheck.h @@ -1,9 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_KMEMCHECK_H_ -#define _LIBLOCKDEP_LINUX_KMEMCHECK_H_ - -static inline void kmemcheck_mark_initialized(void *address, unsigned int n) -{ -} - -#endif From d6d0c0a618a15ba4dadd7ad6131a814359a53973 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 6 Dec 2017 11:27:57 +0100 Subject: [PATCH 525/770] kmemcheck: rip it out for real commit f335195adf043168ee69d78ea72ac3e30f0c57ce upstream. Commit 4675ff05de2d ("kmemcheck: rip it out") has removed the code but for some reason SPDX header stayed in place. This looks like a rebase mistake in the mmotm tree or the merge mistake. Let's drop those leftovers as well. Signed-off-by: Michal Hocko Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kmemcheck.h | 1 - arch/x86/mm/kmemcheck/error.c | 1 - arch/x86/mm/kmemcheck/error.h | 1 - arch/x86/mm/kmemcheck/opcode.c | 1 - arch/x86/mm/kmemcheck/opcode.h | 1 - arch/x86/mm/kmemcheck/pte.c | 1 - arch/x86/mm/kmemcheck/pte.h | 1 - arch/x86/mm/kmemcheck/selftest.c | 1 - arch/x86/mm/kmemcheck/selftest.h | 1 - arch/x86/mm/kmemcheck/shadow.h | 1 - include/linux/kmemcheck.h | 1 - mm/kmemcheck.c | 1 - tools/include/linux/kmemcheck.h | 1 - 13 files changed, 13 deletions(-) delete mode 100644 arch/x86/include/asm/kmemcheck.h delete mode 100644 arch/x86/mm/kmemcheck/error.c delete mode 100644 arch/x86/mm/kmemcheck/error.h delete mode 100644 arch/x86/mm/kmemcheck/opcode.c delete mode 100644 arch/x86/mm/kmemcheck/opcode.h delete mode 100644 arch/x86/mm/kmemcheck/pte.c delete mode 100644 arch/x86/mm/kmemcheck/pte.h delete mode 100644 arch/x86/mm/kmemcheck/selftest.c delete mode 100644 arch/x86/mm/kmemcheck/selftest.h delete mode 100644 arch/x86/mm/kmemcheck/shadow.h delete mode 100644 include/linux/kmemcheck.h delete mode 100644 mm/kmemcheck.c delete mode 100644 tools/include/linux/kmemcheck.h diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/include/asm/kmemcheck.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/error.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/error.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/opcode.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/opcode.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/pte.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/pte.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c deleted file mode 100644 index cec594032515..000000000000 --- a/arch/x86/mm/kmemcheck/selftest.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/selftest.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/arch/x86/mm/kmemcheck/shadow.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/include/linux/kmemcheck.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c deleted file mode 100644 index cec594032515..000000000000 --- a/mm/kmemcheck.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/tools/include/linux/kmemcheck.h b/tools/include/linux/kmemcheck.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/tools/include/linux/kmemcheck.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ From 208beef6d8f223a0cc57e2fcecc9ca28fc778f50 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 31 Jan 2018 08:03:10 -0800 Subject: [PATCH 526/770] x86/mm: Rename flush_tlb_single() and flush_tlb_one() to __flush_tlb_one_[user|kernel]() commit 1299ef1d8870d2d9f09a5aadf2f8b2c887c2d033 upstream. flush_tlb_single() and flush_tlb_one() sound almost identical, but they really mean "flush one user translation" and "flush one kernel translation". Rename them to flush_tlb_one_user() and flush_tlb_one_kernel() to make the semantics more obvious. [ I was looking at some PTI-related code, and the flush-one-address code is unnecessarily hard to understand because the names of the helpers are uninformative. This came up during PTI review, but no one got around to doing it. ] Signed-off-by: Andy Lutomirski Acked-by: Peter Zijlstra (Intel) Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Eduardo Valentin Cc: Hugh Dickins Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Linux-MM Cc: Rik van Riel Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/3303b02e3c3d049dc5235d5651e0ae6d29a34354.1517414378.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/paravirt.h | 4 ++-- arch/x86/include/asm/paravirt_types.h | 2 +- arch/x86/include/asm/pgtable_32.h | 2 +- arch/x86/include/asm/tlbflush.h | 27 ++++++++++++++++++++------- arch/x86/kernel/acpi/apei.c | 2 +- arch/x86/kernel/paravirt.c | 6 +++--- arch/x86/mm/init_64.c | 2 +- arch/x86/mm/ioremap.c | 2 +- arch/x86/mm/kmmio.c | 2 +- arch/x86/mm/pgtable_32.c | 2 +- arch/x86/mm/tlb.c | 6 +++--- arch/x86/platform/uv/tlb_uv.c | 2 +- arch/x86/xen/mmu_pv.c | 6 +++--- include/trace/events/xen.h | 2 +- 14 files changed, 40 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 892df375b615..554841fab717 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -297,9 +297,9 @@ static inline void __flush_tlb_global(void) { PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel); } -static inline void __flush_tlb_single(unsigned long addr) +static inline void __flush_tlb_one_user(unsigned long addr) { - PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); + PVOP_VCALL1(pv_mmu_ops.flush_tlb_one_user, addr); } static inline void flush_tlb_others(const struct cpumask *cpumask, diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 6ec54d01972d..f624f1f10316 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -217,7 +217,7 @@ struct pv_mmu_ops { /* TLB operations */ void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); - void (*flush_tlb_single)(unsigned long addr); + void (*flush_tlb_one_user)(unsigned long addr); void (*flush_tlb_others)(const struct cpumask *cpus, const struct flush_tlb_info *info); diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index e67c0620aec2..e55466760ff8 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -61,7 +61,7 @@ void paging_init(void); #define kpte_clear_flush(ptep, vaddr) \ do { \ pte_clear(&init_mm, (vaddr), (ptep)); \ - __flush_tlb_one((vaddr)); \ + __flush_tlb_one_kernel((vaddr)); \ } while (0) #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 4405c4b308e8..704f31315dde 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -140,7 +140,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) #else #define __flush_tlb() __native_flush_tlb() #define __flush_tlb_global() __native_flush_tlb_global() -#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) +#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr) #endif static inline bool tlb_defer_switch_to_init_mm(void) @@ -397,7 +397,7 @@ static inline void __native_flush_tlb_global(void) /* * flush one page in the user mapping */ -static inline void __native_flush_tlb_single(unsigned long addr) +static inline void __native_flush_tlb_one_user(unsigned long addr) { u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); @@ -434,18 +434,31 @@ static inline void __flush_tlb_all(void) /* * flush one page in the kernel mapping */ -static inline void __flush_tlb_one(unsigned long addr) +static inline void __flush_tlb_one_kernel(unsigned long addr) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); - __flush_tlb_single(addr); + + /* + * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its + * paravirt equivalent. Even with PCID, this is sufficient: we only + * use PCID if we also use global PTEs for the kernel mapping, and + * INVLPG flushes global translations across all address spaces. + * + * If PTI is on, then the kernel is mapped with non-global PTEs, and + * __flush_tlb_one_user() will flush the given address for the current + * kernel address space and for its usermode counterpart, but it does + * not flush it for other address spaces. + */ + __flush_tlb_one_user(addr); if (!static_cpu_has(X86_FEATURE_PTI)) return; /* - * __flush_tlb_single() will have cleared the TLB entry for this ASID, - * but since kernel space is replicated across all, we must also - * invalidate all others. + * See above. We need to propagate the flush to all other address + * spaces. In principle, we only need to propagate it to kernelmode + * address spaces, but the extra bookkeeping we would need is not + * worth it. */ invalidate_other_asid(); } diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c index ea3046e0b0cf..28d70ac93faf 100644 --- a/arch/x86/kernel/acpi/apei.c +++ b/arch/x86/kernel/acpi/apei.c @@ -55,5 +55,5 @@ void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) void arch_apei_flush_tlb_one(unsigned long addr) { - __flush_tlb_one(addr); + __flush_tlb_one_kernel(addr); } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 19a3e8f961c7..e1df9ef5d78c 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -190,9 +190,9 @@ static void native_flush_tlb_global(void) __native_flush_tlb_global(); } -static void native_flush_tlb_single(unsigned long addr) +static void native_flush_tlb_one_user(unsigned long addr) { - __native_flush_tlb_single(addr); + __native_flush_tlb_one_user(addr); } struct static_key paravirt_steal_enabled; @@ -391,7 +391,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = { .flush_tlb_user = native_flush_tlb, .flush_tlb_kernel = native_flush_tlb_global, - .flush_tlb_single = native_flush_tlb_single, + .flush_tlb_one_user = native_flush_tlb_one_user, .flush_tlb_others = native_flush_tlb_others, .pgd_alloc = __paravirt_pgd_alloc, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5fa3a58b5d78..fe85d1204db8 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -256,7 +256,7 @@ static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte) * It's enough to flush this one mapping. * (PGE mappings get flushed as well) */ - __flush_tlb_one(vaddr); + __flush_tlb_one_kernel(vaddr); } void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 34f0e1847dd6..bb120e59c597 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -749,5 +749,5 @@ void __init __early_set_fixmap(enum fixed_addresses idx, set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); else pte_clear(&init_mm, addr, pte); - __flush_tlb_one(addr); + __flush_tlb_one_kernel(addr); } diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index c21c2ed04612..aa44c3aa4cd5 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -168,7 +168,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) return -1; } - __flush_tlb_one(f->addr); + __flush_tlb_one_kernel(f->addr); return 0; } diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index c3c5274410a9..9bb7f0ab9fe6 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -63,7 +63,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) * It's enough to flush this one mapping. * (PGE mappings get flushed as well) */ - __flush_tlb_one(vaddr); + __flush_tlb_one_kernel(vaddr); } unsigned long __FIXADDR_TOP = 0xfffff000; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 012d02624848..0c936435ea93 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -492,7 +492,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, * flush that changes context.tlb_gen from 2 to 3. If they get * processed on this CPU in reverse order, we'll see * local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL. - * If we were to use __flush_tlb_single() and set local_tlb_gen to + * If we were to use __flush_tlb_one_user() and set local_tlb_gen to * 3, we'd be break the invariant: we'd update local_tlb_gen above * 1 without the full flush that's needed for tlb_gen 2. * @@ -513,7 +513,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, addr = f->start; while (addr < f->end) { - __flush_tlb_single(addr); + __flush_tlb_one_user(addr); addr += PAGE_SIZE; } if (local) @@ -660,7 +660,7 @@ static void do_kernel_range_flush(void *info) /* flush range by one by one 'invlpg' */ for (addr = f->start; addr < f->end; addr += PAGE_SIZE) - __flush_tlb_one(addr); + __flush_tlb_one_kernel(addr); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 8538a6723171..7d5d53f36a7a 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, local_flush_tlb(); stat->d_alltlb++; } else { - __flush_tlb_single(msg->address); + __flush_tlb_one_user(msg->address); stat->d_onetlb++; } stat->d_requestee++; diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index a0e2b8c6e5c7..d0b943a6b117 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1300,12 +1300,12 @@ static void xen_flush_tlb(void) preempt_enable(); } -static void xen_flush_tlb_single(unsigned long addr) +static void xen_flush_tlb_one_user(unsigned long addr) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb_single(addr); + trace_xen_mmu_flush_tlb_one_user(addr); preempt_disable(); @@ -2360,7 +2360,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .flush_tlb_user = xen_flush_tlb, .flush_tlb_kernel = xen_flush_tlb, - .flush_tlb_single = xen_flush_tlb_single, + .flush_tlb_one_user = xen_flush_tlb_one_user, .flush_tlb_others = xen_flush_tlb_others, .pgd_alloc = xen_pgd_alloc, diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index a7c8b452aab9..d791863b62fc 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -365,7 +365,7 @@ TRACE_EVENT(xen_mmu_flush_tlb, TP_printk("%s", "") ); -TRACE_EVENT(xen_mmu_flush_tlb_single, +TRACE_EVENT(xen_mmu_flush_tlb_one_user, TP_PROTO(unsigned long addr), TP_ARGS(addr), TP_STRUCT__entry( From 73f231c7ee696cdc53ee0922e6d3bf0480a5ae90 Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Mon, 18 Dec 2017 16:34:10 +0800 Subject: [PATCH 527/770] selftests/x86/mpx: Fix incorrect bounds with old _sigfault commit 961888b1d76d84efc66a8f5604b06ac12ac2f978 upstream. For distributions with old userspace header files, the _sigfault structure is different. mpx-mini-test fails with the following error: [root@Purley]# mpx-mini-test_64 tabletest XSAVE is supported by HW & OS XSAVE processor supported state mask: 0x2ff XSAVE OS supported state mask: 0x2ff BNDREGS: size: 64 user: 1 supervisor: 0 aligned: 0 BNDCSR: size: 64 user: 1 supervisor: 0 aligned: 0 starting mpx bounds table test ERROR: siginfo bounds do not match shadow bounds for register 0 Fix it by using the correct offset of _lower/_upper in _sigfault. RHEL needs this patch to work. Signed-off-by: Rui Wang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave.hansen@linux.intel.com Fixes: e754aedc26ef ("x86/mpx, selftests: Add MPX self test") Link: http://lkml.kernel.org/r/1513586050-1641-1-git-send-email-rui.y.wang@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/mpx-mini-test.c | 32 +++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index ec0f6b45ce8b..9c0325e1ea68 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -315,11 +315,39 @@ static inline void *__si_bounds_upper(siginfo_t *si) return si->si_upper; } #else + +/* + * This deals with old version of _sigfault in some distros: + * + +old _sigfault: + struct { + void *si_addr; + } _sigfault; + +new _sigfault: + struct { + void __user *_addr; + int _trapno; + short _addr_lsb; + union { + struct { + void __user *_lower; + void __user *_upper; + } _addr_bnd; + __u32 _pkey; + }; + } _sigfault; + * + */ + static inline void **__si_bounds_hack(siginfo_t *si) { void *sigfault = &si->_sifields._sigfault; void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault); - void **__si_lower = end_sigfault; + int *trapno = (int*)end_sigfault; + /* skip _trapno and _addr_lsb */ + void **__si_lower = (void**)(trapno + 2); return __si_lower; } @@ -331,7 +359,7 @@ static inline void *__si_bounds_lower(siginfo_t *si) static inline void *__si_bounds_upper(siginfo_t *si) { - return (*__si_bounds_hack(si)) + sizeof(void *); + return *(__si_bounds_hack(si) + 1); } #endif From 325cbb04dc61692ba01445c34fd551ad91df9e6d Mon Sep 17 00:00:00 2001 From: Jia Zhang Date: Mon, 1 Jan 2018 09:52:10 +0800 Subject: [PATCH 528/770] x86/cpu: Rename cpu_data.x86_mask to cpu_data.x86_stepping commit b399151cb48db30ad1e0e93dd40d68c6d007b637 upstream. x86_mask is a confusing name which is hard to associate with the processor's stepping. Additionally, correct an indent issue in lib/cpu.c. Signed-off-by: Jia Zhang [ Updated it to more recent kernels. ] Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/1514771530-70829-1-git-send-email-qianyue.zj@alibaba-inc.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 2 +- arch/x86/events/intel/lbr.c | 2 +- arch/x86/events/intel/p6.c | 2 +- arch/x86/include/asm/acpi.h | 2 +- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/amd_nb.c | 2 +- arch/x86/kernel/apic/apic.c | 6 +++--- arch/x86/kernel/asm-offsets_32.c | 2 +- arch/x86/kernel/cpu/amd.c | 28 +++++++++++++-------------- arch/x86/kernel/cpu/centaur.c | 4 ++-- arch/x86/kernel/cpu/common.c | 8 ++++---- arch/x86/kernel/cpu/cyrix.c | 2 +- arch/x86/kernel/cpu/intel.c | 18 ++++++++--------- arch/x86/kernel/cpu/intel_rdt.c | 2 +- arch/x86/kernel/cpu/microcode/intel.c | 4 ++-- arch/x86/kernel/cpu/mtrr/generic.c | 2 +- arch/x86/kernel/cpu/mtrr/main.c | 4 ++-- arch/x86/kernel/cpu/proc.c | 4 ++-- arch/x86/kernel/head_32.S | 4 ++-- arch/x86/kernel/mpparse.c | 2 +- arch/x86/lib/cpu.c | 2 +- drivers/char/hw_random/via-rng.c | 2 +- drivers/cpufreq/acpi-cpufreq.c | 2 +- drivers/cpufreq/longhaul.c | 6 +++--- drivers/cpufreq/p4-clockmod.c | 2 +- drivers/cpufreq/powernow-k7.c | 2 +- drivers/cpufreq/speedstep-centrino.c | 4 ++-- drivers/cpufreq/speedstep-lib.c | 6 +++--- drivers/crypto/padlock-aes.c | 2 +- drivers/edac/amd64_edac.c | 2 +- drivers/hwmon/coretemp.c | 6 +++--- drivers/hwmon/hwmon-vid.c | 2 +- drivers/hwmon/k10temp.c | 2 +- drivers/hwmon/k8temp.c | 2 +- drivers/video/fbdev/geode/video_gx.c | 2 +- 35 files changed, 73 insertions(+), 73 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 09c26a4f139c..1c2558430cf0 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3559,7 +3559,7 @@ static int intel_snb_pebs_broken(int cpu) break; case INTEL_FAM6_SANDYBRIDGE_X: - switch (cpu_data(cpu).x86_mask) { + switch (cpu_data(cpu).x86_stepping) { case 6: rev = 0x618; break; case 7: rev = 0x70c; break; } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index ae64d0b69729..cf372b90557e 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1186,7 +1186,7 @@ void __init intel_pmu_lbr_init_atom(void) * on PMU interrupt */ if (boot_cpu_data.x86_model == 28 - && boot_cpu_data.x86_mask < 10) { + && boot_cpu_data.x86_stepping < 10) { pr_cont("LBR disabled due to erratum"); return; } diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c index a5604c352930..408879b0c0d4 100644 --- a/arch/x86/events/intel/p6.c +++ b/arch/x86/events/intel/p6.c @@ -234,7 +234,7 @@ static __initconst const struct x86_pmu p6_pmu = { static __init void p6_pmu_rdpmc_quirk(void) { - if (boot_cpu_data.x86_mask < 9) { + if (boot_cpu_data.x86_stepping < 9) { /* * PPro erratum 26; fixed in stepping 9 and above. */ diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 8d0ec9df1cbe..f077401869ee 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -94,7 +94,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) if (boot_cpu_data.x86 == 0x0F && boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86_model <= 0x05 && - boot_cpu_data.x86_mask < 0x0A) + boot_cpu_data.x86_stepping < 0x0A) return 1; else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E)) return 1; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index db3e68bcfd9b..18852c2535ce 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -91,7 +91,7 @@ struct cpuinfo_x86 { __u8 x86; /* CPU family */ __u8 x86_vendor; /* CPU vendor */ __u8 x86_model; - __u8 x86_mask; + __u8 x86_stepping; #ifdef CONFIG_X86_64 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 6db28f17ff28..c88e0b127810 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -235,7 +235,7 @@ int amd_cache_northbridges(void) if (boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model >= 0x8 && (boot_cpu_data.x86_model > 0x9 || - boot_cpu_data.x86_mask >= 0x1)) + boot_cpu_data.x86_stepping >= 0x1)) amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; if (boot_cpu_data.x86 == 0x15) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 89c7c8569e5e..5942aa5f569b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -553,7 +553,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static u32 hsx_deadline_rev(void) { - switch (boot_cpu_data.x86_mask) { + switch (boot_cpu_data.x86_stepping) { case 0x02: return 0x3a; /* EP */ case 0x04: return 0x0f; /* EX */ } @@ -563,7 +563,7 @@ static u32 hsx_deadline_rev(void) static u32 bdx_deadline_rev(void) { - switch (boot_cpu_data.x86_mask) { + switch (boot_cpu_data.x86_stepping) { case 0x02: return 0x00000011; case 0x03: return 0x0700000e; case 0x04: return 0x0f00000c; @@ -575,7 +575,7 @@ static u32 bdx_deadline_rev(void) static u32 skx_deadline_rev(void) { - switch (boot_cpu_data.x86_mask) { + switch (boot_cpu_data.x86_stepping) { case 0x03: return 0x01000136; case 0x04: return 0x02000014; } diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index fa1261eefa16..f91ba53e06c8 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -18,7 +18,7 @@ void foo(void) OFFSET(CPUINFO_x86, cpuinfo_x86, x86); OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); - OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); + OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping); OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ea831c858195..e7d5a7883632 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -119,7 +119,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) return; } - if (c->x86_model == 6 && c->x86_mask == 1) { + if (c->x86_model == 6 && c->x86_stepping == 1) { const int K6_BUG_LOOP = 1000000; int n; void (*f_vide)(void); @@ -149,7 +149,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) /* K6 with old style WHCR */ if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { + (c->x86_model == 8 && c->x86_stepping < 8)) { /* We can only write allocate on the low 508Mb */ if (mbytes > 508) mbytes = 508; @@ -168,7 +168,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) return; } - if ((c->x86_model == 8 && c->x86_mask > 7) || + if ((c->x86_model == 8 && c->x86_stepping > 7) || c->x86_model == 9 || c->x86_model == 13) { /* The more serious chips .. */ @@ -221,7 +221,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx * As per AMD technical note 27212 0.2 */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) { rdmsr(MSR_K7_CLK_CTL, l, h); if ((l & 0xfff00000) != 0x20000000) { pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", @@ -241,12 +241,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * but they are not certified as MP capable. */ /* Athlon 660/661 is valid. */ - if ((c->x86_model == 6) && ((c->x86_mask == 0) || - (c->x86_mask == 1))) + if ((c->x86_model == 6) && ((c->x86_stepping == 0) || + (c->x86_stepping == 1))) return; /* Duron 670 is valid */ - if ((c->x86_model == 7) && (c->x86_mask == 0)) + if ((c->x86_model == 7) && (c->x86_stepping == 0)) return; /* @@ -256,8 +256,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for * more. */ - if (((c->x86_model == 6) && (c->x86_mask >= 2)) || - ((c->x86_model == 7) && (c->x86_mask >= 1)) || + if (((c->x86_model == 6) && (c->x86_stepping >= 2)) || + ((c->x86_model == 7) && (c->x86_stepping >= 1)) || (c->x86_model > 7)) if (cpu_has(c, X86_FEATURE_MP)) return; @@ -583,7 +583,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) /* Set MTRR capability flag if appropriate */ if (c->x86 == 5) if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) + (c->x86_model == 8 && c->x86_stepping >= 8)) set_cpu_cap(c, X86_FEATURE_K6_MTRR); #endif #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) @@ -769,7 +769,7 @@ static void init_amd_zn(struct cpuinfo_x86 *c) * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects * all up to and including B1. */ - if (c->x86_model <= 1 && c->x86_mask <= 1) + if (c->x86_model <= 1 && c->x86_stepping <= 1) set_cpu_cap(c, X86_FEATURE_CPB); } @@ -880,11 +880,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) /* AMD errata T13 (order #21922) */ if ((c->x86 == 6)) { /* Duron Rev A0 */ - if (c->x86_model == 3 && c->x86_mask == 0) + if (c->x86_model == 3 && c->x86_stepping == 0) size = 64; /* Tbird rev A1/A2 */ if (c->x86_model == 4 && - (c->x86_mask == 0 || c->x86_mask == 1)) + (c->x86_stepping == 0 || c->x86_stepping == 1)) size = 256; } return size; @@ -1021,7 +1021,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) } /* OSVW unavailable or ID unknown, match family-model-stepping range */ - ms = (cpu->x86_model << 4) | cpu->x86_mask; + ms = (cpu->x86_model << 4) | cpu->x86_stepping; while ((range = *erratum++)) if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && (ms >= AMD_MODEL_RANGE_START(range)) && diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 68bc6d9b3132..595be776727d 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -136,7 +136,7 @@ static void init_centaur(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_TSC); break; case 8: - switch (c->x86_mask) { + switch (c->x86_stepping) { default: name = "2"; break; @@ -211,7 +211,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) * - Note, it seems this may only be in engineering samples. */ if ((c->x86 == 6) && (c->x86_model == 9) && - (c->x86_mask == 1) && (size == 65)) + (c->x86_stepping == 1) && (size == 65)) size -= 1; return size; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 92b66e21bae5..74dcb2127fd8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -707,7 +707,7 @@ void cpu_detect(struct cpuinfo_x86 *c) cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = x86_family(tfms); c->x86_model = x86_model(tfms); - c->x86_mask = x86_stepping(tfms); + c->x86_stepping = x86_stepping(tfms); if (cap0 & (1<<19)) { c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; @@ -1162,7 +1162,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->loops_per_jiffy = loops_per_jiffy; c->x86_cache_size = -1; c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_model = c->x86_stepping = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; @@ -1353,8 +1353,8 @@ void print_cpu_info(struct cpuinfo_x86 *c) pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model); - if (c->x86_mask || c->cpuid_level >= 0) - pr_cont(", stepping: 0x%x)\n", c->x86_mask); + if (c->x86_stepping || c->cpuid_level >= 0) + pr_cont(", stepping: 0x%x)\n", c->x86_stepping); else pr_cont(")\n"); } diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 6b4bb335641f..8949b7ae6d92 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -215,7 +215,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) /* common case step number/rev -- exceptions handled below */ c->x86_model = (dir1 >> 4) + 1; - c->x86_mask = dir1 & 0xf; + c->x86_stepping = dir1 & 0xf; /* Now cook; the original recipe is by Channing Corn, from Cyrix. * We do the same thing for each generation: we work out diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ef796f14f7ae..d19e903214b4 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -146,7 +146,7 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { if (c->x86_model == spectre_bad_microcodes[i].model && - c->x86_mask == spectre_bad_microcodes[i].stepping) + c->x86_stepping == spectre_bad_microcodes[i].stepping) return (c->microcode <= spectre_bad_microcodes[i].microcode); } return false; @@ -193,7 +193,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) * need the microcode to have already been loaded... so if it is * not, recommend a BIOS update and disable large pages. */ - if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && + if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 && c->microcode < 0x20e) { pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); clear_cpu_cap(c, X86_FEATURE_PSE); @@ -209,7 +209,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) /* CPUID workaround for 0F33/0F34 CPU */ if (c->x86 == 0xF && c->x86_model == 0x3 - && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) + && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) c->x86_phys_bits = 36; /* @@ -307,7 +307,7 @@ int ppro_with_ram_bug(void) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask < 8) { + boot_cpu_data.x86_stepping < 8) { pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); return 1; } @@ -324,7 +324,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c) * Mask B, Pentium, but not Pentium MMX */ if (c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_stepping >= 1 && c->x86_stepping <= 4 && c->x86_model <= 3) { /* * Remember we have B step Pentia with bugs @@ -367,7 +367,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until * model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633) clear_cpu_cap(c, X86_FEATURE_SEP); /* @@ -385,7 +385,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * P4 Xeon erratum 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { + if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) { if (msr_set_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { pr_info("CPU: C0 stepping P4 Xeon detected.\n"); @@ -400,7 +400,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * Specification Update"). */ if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + (c->x86_stepping < 0x6 || c->x86_stepping == 0xb)) set_cpu_bug(c, X86_BUG_11AP); @@ -647,7 +647,7 @@ static void init_intel(struct cpuinfo_x86 *c) case 6: if (l2 == 128) p = "Celeron (Mendocino)"; - else if (c->x86_mask == 0 || c->x86_mask == 5) + else if (c->x86_stepping == 0 || c->x86_stepping == 5) p = "Celeron-A"; break; diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 99442370de40..18dd8f22e353 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -771,7 +771,7 @@ static __init void rdt_quirks(void) cache_alloc_hsw_probe(); break; case INTEL_FAM6_SKYLAKE_X: - if (boot_cpu_data.x86_mask <= 4) + if (boot_cpu_data.x86_stepping <= 4) set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); } } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index f7c55b0e753a..b94279bb5c04 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -921,7 +921,7 @@ static bool is_blacklisted(unsigned int cpu) */ if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X && - c->x86_mask == 0x01 && + c->x86_stepping == 0x01 && llc_size_per_core > 2621440 && c->microcode < 0x0b000021) { pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); @@ -944,7 +944,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device, return UCODE_NFOUND; sprintf(name, "intel-ucode/%02x-%02x-%02x", - c->x86, c->x86_model, c->x86_mask); + c->x86, c->x86_model, c->x86_stepping); if (request_firmware_direct(&firmware, name, device)) { pr_debug("data file %s load failed\n", name); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index fdc55215d44d..e12ee86906c6 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -859,7 +859,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, */ if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask <= 7) { + boot_cpu_data.x86_stepping <= 7) { if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); return -EINVAL; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 40d5a8a75212..7468de429087 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -711,8 +711,8 @@ void __init mtrr_bp_init(void) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 0x3 && - (boot_cpu_data.x86_mask == 0x3 || - boot_cpu_data.x86_mask == 0x4)) + (boot_cpu_data.x86_stepping == 0x3 || + boot_cpu_data.x86_stepping == 0x4)) phys_addr = 36; size_or_mask = SIZE_OR_MASK_BITS(phys_addr); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index e7ecedafa1c8..ee4cc388e8d3 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -72,8 +72,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) c->x86_model, c->x86_model_id[0] ? c->x86_model_id : "unknown"); - if (c->x86_mask || c->cpuid_level >= 0) - seq_printf(m, "stepping\t: %d\n", c->x86_mask); + if (c->x86_stepping || c->cpuid_level >= 0) + seq_printf(m, "stepping\t: %d\n", c->x86_stepping); else seq_puts(m, "stepping\t: unknown\n"); if (c->microcode) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index c29020907886..b59e4fb40fd9 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -37,7 +37,7 @@ #define X86 new_cpu_data+CPUINFO_x86 #define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor #define X86_MODEL new_cpu_data+CPUINFO_x86_model -#define X86_MASK new_cpu_data+CPUINFO_x86_mask +#define X86_STEPPING new_cpu_data+CPUINFO_x86_stepping #define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math #define X86_CPUID new_cpu_data+CPUINFO_cpuid_level #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability @@ -332,7 +332,7 @@ ENTRY(startup_32_smp) shrb $4,%al movb %al,X86_MODEL andb $0x0f,%cl # mask mask revision - movb %cl,X86_MASK + movb %cl,X86_STEPPING movl %edx,X86_CAPABILITY .Lis486: diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3a4b12809ab5..bc6bc6689e68 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -407,7 +407,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01; processor.cpuflag = CPU_ENABLED; processor.cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping; processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX]; processor.reserved[0] = 0; processor.reserved[1] = 0; diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c index d6f848d1211d..2dd1fe13a37b 100644 --- a/arch/x86/lib/cpu.c +++ b/arch/x86/lib/cpu.c @@ -18,7 +18,7 @@ unsigned int x86_model(unsigned int sig) { unsigned int fam, model; - fam = x86_family(sig); + fam = x86_family(sig); model = (sig >> 4) & 0xf; diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index d1f5bb534e0e..6e9df558325b 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -162,7 +162,7 @@ static int via_rng_init(struct hwrng *rng) /* Enable secondary noise source on CPUs where it is present. */ /* Nehemiah stepping 8 and higher */ - if ((c->x86_model == 9) && (c->x86_mask > 7)) + if ((c->x86_model == 9) && (c->x86_stepping > 7)) lo |= VIA_NOISESRC2; /* Esther */ diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 3a2ca0f79daf..d0c34df0529c 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -629,7 +629,7 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) if (c->x86_vendor == X86_VENDOR_INTEL) { if ((c->x86 == 15) && (c->x86_model == 6) && - (c->x86_mask == 8)) { + (c->x86_stepping == 8)) { pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n"); return -ENODEV; } diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index c46a12df40dd..d5e27bc7585a 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -775,7 +775,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) break; case 7: - switch (c->x86_mask) { + switch (c->x86_stepping) { case 0: longhaul_version = TYPE_LONGHAUL_V1; cpu_model = CPU_SAMUEL2; @@ -787,7 +787,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) break; case 1 ... 15: longhaul_version = TYPE_LONGHAUL_V2; - if (c->x86_mask < 8) { + if (c->x86_stepping < 8) { cpu_model = CPU_SAMUEL2; cpuname = "C3 'Samuel 2' [C5B]"; } else { @@ -814,7 +814,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) numscales = 32; memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults)); memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr)); - switch (c->x86_mask) { + switch (c->x86_stepping) { case 0 ... 1: cpu_model = CPU_NEHEMIAH; cpuname = "C3 'Nehemiah A' [C5XLOE]"; diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c index fd77812313f3..a25741b1281b 100644 --- a/drivers/cpufreq/p4-clockmod.c +++ b/drivers/cpufreq/p4-clockmod.c @@ -168,7 +168,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) #endif /* Errata workaround */ - cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; + cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_stepping; switch (cpuid) { case 0x0f07: case 0x0f0a: diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c index 80ac313e6c59..302e9ce793a0 100644 --- a/drivers/cpufreq/powernow-k7.c +++ b/drivers/cpufreq/powernow-k7.c @@ -131,7 +131,7 @@ static int check_powernow(void) return 0; } - if ((c->x86_model == 6) && (c->x86_mask == 0)) { + if ((c->x86_model == 6) && (c->x86_stepping == 0)) { pr_info("K7 660[A0] core detected, enabling errata workarounds\n"); have_a0 = 1; } diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c index 41bc5397f4bb..4fa5adf16c70 100644 --- a/drivers/cpufreq/speedstep-centrino.c +++ b/drivers/cpufreq/speedstep-centrino.c @@ -37,7 +37,7 @@ struct cpu_id { __u8 x86; /* CPU family */ __u8 x86_model; /* model */ - __u8 x86_mask; /* stepping */ + __u8 x86_stepping; /* stepping */ }; enum { @@ -277,7 +277,7 @@ static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, { if ((c->x86 == x->x86) && (c->x86_model == x->x86_model) && - (c->x86_mask == x->x86_mask)) + (c->x86_stepping == x->x86_stepping)) return 1; return 0; } diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c index ccab452a4ef5..dd7bb00991f4 100644 --- a/drivers/cpufreq/speedstep-lib.c +++ b/drivers/cpufreq/speedstep-lib.c @@ -272,9 +272,9 @@ unsigned int speedstep_detect_processor(void) ebx = cpuid_ebx(0x00000001); ebx &= 0x000000FF; - pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); + pr_debug("ebx value is %x, x86_stepping is %x\n", ebx, c->x86_stepping); - switch (c->x86_mask) { + switch (c->x86_stepping) { case 4: /* * B-stepping [M-P4-M] @@ -361,7 +361,7 @@ unsigned int speedstep_detect_processor(void) msr_lo, msr_hi); if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) { - if (c->x86_mask == 0x01) { + if (c->x86_stepping == 0x01) { pr_debug("early PIII version\n"); return SPEEDSTEP_CPU_PIII_C_EARLY; } else diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index b3869748cc6b..c939f18f70cc 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -512,7 +512,7 @@ static int __init padlock_init(void) printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); - if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { + if (c->x86 == 6 && c->x86_model == 15 && c->x86_stepping == 2) { ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS; cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS; printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index ac2f30295efe..59ce32e405ac 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3147,7 +3147,7 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) struct amd64_family_type *fam_type = NULL; pvt->ext_model = boot_cpu_data.x86_model >> 4; - pvt->stepping = boot_cpu_data.x86_mask; + pvt->stepping = boot_cpu_data.x86_stepping; pvt->model = boot_cpu_data.x86_model; pvt->fam = boot_cpu_data.x86; diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index c13a4fd86b3c..a42744c7665b 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -268,13 +268,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) for (i = 0; i < ARRAY_SIZE(tjmax_model_table); i++) { const struct tjmax_model *tm = &tjmax_model_table[i]; if (c->x86_model == tm->model && - (tm->mask == ANY || c->x86_mask == tm->mask)) + (tm->mask == ANY || c->x86_stepping == tm->mask)) return tm->tjmax; } /* Early chips have no MSR for TjMax */ - if (c->x86_model == 0xf && c->x86_mask < 4) + if (c->x86_model == 0xf && c->x86_stepping < 4) usemsr_ee = 0; if (c->x86_model > 0xe && usemsr_ee) { @@ -425,7 +425,7 @@ static int chk_ucode_version(unsigned int cpu) * Readings might stop update when processor visited too deep sleep, * fixed for stepping D0 (6EC). */ - if (c->x86_model == 0xe && c->x86_mask < 0xc && c->microcode < 0x39) { + if (c->x86_model == 0xe && c->x86_stepping < 0xc && c->microcode < 0x39) { pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n"); return -ENODEV; } diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c index ef91b8a67549..84e91286fc4f 100644 --- a/drivers/hwmon/hwmon-vid.c +++ b/drivers/hwmon/hwmon-vid.c @@ -293,7 +293,7 @@ u8 vid_which_vrm(void) if (c->x86 < 6) /* Any CPU with family lower than 6 */ return 0; /* doesn't have VID */ - vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_mask, c->x86_vendor); + vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_stepping, c->x86_vendor); if (vrm_ret == 134) vrm_ret = get_via_model_d_vrm(); if (vrm_ret == 0) diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index ce3b91f22e30..5c740996aa62 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -179,7 +179,7 @@ static bool has_erratum_319(struct pci_dev *pdev) * and AM3 formats, but that's the best we can do. */ return boot_cpu_data.x86_model < 4 || - (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask <= 2); + (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_stepping <= 2); } static int k10temp_probe(struct pci_dev *pdev, diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c index 5a632bcf869b..e59f9113fb93 100644 --- a/drivers/hwmon/k8temp.c +++ b/drivers/hwmon/k8temp.c @@ -187,7 +187,7 @@ static int k8temp_probe(struct pci_dev *pdev, return -ENOMEM; model = boot_cpu_data.x86_model; - stepping = boot_cpu_data.x86_mask; + stepping = boot_cpu_data.x86_stepping; /* feature available since SH-C0, exclude older revisions */ if ((model == 4 && stepping == 0) || diff --git a/drivers/video/fbdev/geode/video_gx.c b/drivers/video/fbdev/geode/video_gx.c index 6082f653c68a..67773e8bbb95 100644 --- a/drivers/video/fbdev/geode/video_gx.c +++ b/drivers/video/fbdev/geode/video_gx.c @@ -127,7 +127,7 @@ void gx_set_dclk_frequency(struct fb_info *info) int timeout = 1000; /* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */ - if (cpu_data(0).x86_mask == 1) { + if (cpu_data(0).x86_stepping == 1) { pll_table = gx_pll_table_14MHz; pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz); } else { From 7d7ebee6ce113fff252f1b923fbbbad83b1221b8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Feb 2018 10:14:17 +0300 Subject: [PATCH 529/770] x86/spectre: Fix an error message commit 9de29eac8d2189424d81c0d840cd0469aa3d41c8 upstream. If i == ARRAY_SIZE(mitigation_options) then we accidentally print garbage from one space beyond the end of the mitigation_options[] array. Signed-off-by: Dan Carpenter Cc: Andy Lutomirski Cc: Borislav Petkov Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: KarimAllah Ahmed Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-janitors@vger.kernel.org Fixes: 9005c6834c0f ("x86/spectre: Simplify spectre_v2 command line parsing") Link: http://lkml.kernel.org/r/20180214071416.GA26677@mwanda Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4acf16a76d1e..d71c8b54b696 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -174,7 +174,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) } if (i >= ARRAY_SIZE(mitigation_options)) { - pr_err("unknown option (%s). Switching to AUTO select\n", mitigation_options[i].option); + pr_err("unknown option (%s). Switching to AUTO select\n", arg); return SPECTRE_V2_CMD_AUTO; } } From 231d0c70be748bba8982b6bda3daa613bf5c8a29 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Feb 2018 13:22:08 -0600 Subject: [PATCH 530/770] x86/cpu: Change type of x86_cache_size variable to unsigned int commit 24dbc6000f4b9b0ef5a9daecb161f1907733765a upstream. Currently, x86_cache_size is of type int, which makes no sense as we will never have a valid cache size equal or less than 0. So instead of initializing this variable to -1, it can perfectly be initialized to 0 and use it as an unsigned variable instead. Suggested-by: Thomas Gleixner Signed-off-by: Gustavo A. R. Silva Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Addresses-Coverity-ID: 1464429 Link: http://lkml.kernel.org/r/20180213192208.GA26414@embeddedor.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/microcode/intel.c | 2 +- arch/x86/kernel/cpu/proc.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 18852c2535ce..15fc074bd628 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -109,7 +109,7 @@ struct cpuinfo_x86 { char x86_vendor_id[16]; char x86_model_id[64]; /* in KB - valid for CPUS which support this call: */ - int x86_cache_size; + unsigned int x86_cache_size; int x86_cache_alignment; /* In bytes */ /* Cache QoS architectural values: */ int x86_cache_max_rmid; /* max index */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 74dcb2127fd8..651b7afed4da 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1160,7 +1160,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) int i; c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; + c->x86_cache_size = 0; c->x86_vendor = X86_VENDOR_UNKNOWN; c->x86_model = c->x86_stepping = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index b94279bb5c04..a15db2b4e0d6 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -982,7 +982,7 @@ static struct microcode_ops microcode_intel_ops = { static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c) { - u64 llc_size = c->x86_cache_size * 1024; + u64 llc_size = c->x86_cache_size * 1024ULL; do_div(llc_size, c->x86_max_cores); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index ee4cc388e8d3..2c8522a39ed5 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -91,8 +91,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) } /* Cache size */ - if (c->x86_cache_size >= 0) - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); + if (c->x86_cache_size) + seq_printf(m, "cache size\t: %u KB\n", c->x86_cache_size); show_cpuinfo_core(m, c, cpu); show_cpuinfo_misc(m, c); From 6c0398cfebf5d3f6d5ebc6924a3faf26a12935f4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 14 Feb 2018 08:39:11 +0100 Subject: [PATCH 531/770] x86/entry/64: Fix CR3 restore in paranoid_exit() commit e48657573481a5dff7cfdc3d57005c80aa816500 upstream. Josh Poimboeuf noticed the following bug: "The paranoid exit code only restores the saved CR3 when it switches back to the user GS. However, even in the kernel GS case, it's possible that it needs to restore a user CR3, if for example, the paranoid exception occurred in the syscall exit path between SWITCH_TO_USER_CR3_STACK and SWAPGS." Josh also confirmed via targeted testing that it's possible to hit this bug. Fix the bug by also restoring CR3 in the paranoid_exit_no_swapgs branch. The reason we haven't seen this bug reported by users yet is probably because "paranoid" entry points are limited to the following cases: idtentry double_fault do_double_fault has_error_code=1 paranoid=2 idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry machine_check do_mce has_error_code=0 paranoid=1 Amongst those entry points only machine_check is one that will interrupt an IRQS-off critical section asynchronously - and machine check events are rare. The other main asynchronous entries are NMI entries, which can be very high-freq with perf profiling, but they are special: they don't use the 'idtentry' macro but are open coded and restore user CR3 unconditionally so don't have this bug. Reported-and-tested-by: Josh Poimboeuf Reviewed-by: Andy Lutomirski Acked-by: Thomas Gleixner Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180214073910.boevmg65upbk3vqb@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c5ef704df069..68a2d76e4f8f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1162,6 +1162,7 @@ ENTRY(paranoid_exit) jmp .Lparanoid_exit_restore .Lparanoid_exit_no_swapgs: TRACE_IRQS_IRETQ_DEBUG + RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 .Lparanoid_exit_restore: jmp restore_regs_and_return_to_kernel END(paranoid_exit) From f02c3f7f6d30fb7d0f245594c7d4a033763baa67 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 17 Jan 2018 23:52:03 -0500 Subject: [PATCH 532/770] drm/ttm: Don't add swapped BOs to swap-LRU list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fd5002d6a3c602664b07668a24df4ef7a43bf078 upstream. A BO that's already swapped would be added back to the swap-LRU list for example if its validation failed under high memory pressure. This could later lead to swapping it out again and leaking previous swap storage. This commit adds a condition to prevent that from happening. v2: Check page_flags instead of swap_storage Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c088703777e2..68eed684dff5 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -175,7 +175,8 @@ void ttm_bo_add_to_lru(struct ttm_buffer_object *bo) list_add_tail(&bo->lru, &man->lru[bo->priority]); kref_get(&bo->list_kref); - if (bo->ttm && !(bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) { + if (bo->ttm && !(bo->ttm->page_flags & + (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) { list_add_tail(&bo->swap, &bo->glob->swap_lru[bo->priority]); kref_get(&bo->list_kref); From 15cdc016f109e2b8b7ede8c68c413c9b97c09fa8 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Fri, 26 Jan 2018 09:32:29 -0500 Subject: [PATCH 533/770] drm/ttm: Fix 'buf' pointer update in ttm_bo_vm_access_kmap() (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 95244db2d3f743f37e69446a2807dd1a42750542 upstream. The buf pointer was not being incremented inside the loop meaning the same block of data would be read or written repeatedly. (v2) Change 'buf' pointer to uint8_t* type Cc: stable@vger.kernel.org Fixes: 09ac4fcb3f25 ("drm/ttm: Implement vm_operations_struct.access v2") Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index c8ebb757e36b..b17d0d38f290 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -299,7 +299,7 @@ static void ttm_bo_vm_close(struct vm_area_struct *vma) static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo, unsigned long offset, - void *buf, int len, int write) + uint8_t *buf, int len, int write) { unsigned long page = offset >> PAGE_SHIFT; unsigned long bytes_left = len; @@ -328,6 +328,7 @@ static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo, ttm_bo_kunmap(&map); page++; + buf += bytes; bytes_left -= bytes; offset = 0; } while (bytes_left); From b51521c9a191083033d3d8d7174e41d33b71a9bf Mon Sep 17 00:00:00 2001 From: Ray Strode Date: Mon, 27 Nov 2017 16:50:09 -0500 Subject: [PATCH 534/770] drm/qxl: unref cursor bo when finished with it commit 16c6db3688734b27487a42d0c2a1062d0b2bad03 upstream. qxl_cursor_atomic_update allocs a bo for the cursor that it never frees up at the end of the function. This commit fixes that. Signed-off-by: Ray Strode Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_display.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index afbf50d0c08f..ced6564b5814 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -564,7 +564,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, struct qxl_cursor_cmd *cmd; struct qxl_cursor *cursor; struct drm_gem_object *obj; - struct qxl_bo *cursor_bo, *user_bo = NULL; + struct qxl_bo *cursor_bo = NULL, *user_bo = NULL; int ret; void *user_ptr; int size = 64*64*4; @@ -634,6 +634,8 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); qxl_release_fence_buffer_objects(release); + qxl_bo_unref(&cursor_bo); + return; out_backoff: From dc2b0e41fc5e6d11fe4539317e57cf84c67cd18d Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Wed, 17 Jan 2018 17:24:13 -0500 Subject: [PATCH 535/770] drm/amd/powerplay: Fix smu_table_entry.handle type commit adab595d16abe48e9c097f000bf8921d35b28fb7 upstream. The handle describes kernel logical address, should be unsigned long and not uint32_t. Fixes KASAN error and GFP on driver unload. Reviewed-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Andrey Grodzovsky Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h index 262c8ded87c0..dafc9c4b1e6f 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h @@ -40,7 +40,7 @@ struct smu_table_entry { uint32_t table_addr_high; uint32_t table_addr_low; uint8_t *table; - uint32_t handle; + unsigned long handle; }; struct smu_table_array { From e4a6d687e5d9e603f7188ca2ee88040c5fdd03dd Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 18 Jan 2018 16:40:16 +0100 Subject: [PATCH 536/770] drm/ast: Load lut in crtc_commit commit 24b8ef699e8221d2b7f813adaab13eec053e1507 upstream. In the past the ast driver relied upon the fbdev emulation helpers to call ->load_lut at boot-up. But since commit b8e2b0199cc377617dc238f5106352c06dcd3fa2 Author: Peter Rosin Date: Tue Jul 4 12:36:57 2017 +0200 drm/fb-helper: factor out pseudo-palette that's cleaned up and drivers are expected to boot into a consistent lut state. This patch fixes that. Fixes: b8e2b0199cc3 ("drm/fb-helper: factor out pseudo-palette") Cc: Peter Rosin Cc: Daniel Vetter Cc: # v4.14+ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=198123 Cc: Bill Fraser Reported-and-Tested-by: Bill Fraser Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_mode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 6f3849ec0c1d..e9f1e6fe7b94 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -644,6 +644,7 @@ static void ast_crtc_commit(struct drm_crtc *crtc) { struct ast_private *ast = crtc->dev->dev_private; ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x1, 0xdf, 0); + ast_crtc_load_lut(crtc); } From 254d48c8cc3b1f478f2c75349930da6e12abdd39 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Sun, 11 Feb 2018 19:16:15 -0600 Subject: [PATCH 537/770] arm64: Add missing Falkor part number for branch predictor hardening commit 16e574d762ac5512eb922ac0ac5eed360b7db9d8 upstream. References to CPU part number MIDR_QCOM_FALKOR were dropped from the mailing list patch due to mainline/arm64 branch dependency. So this patch adds the missing part number. Fixes: ec82b567a74f ("arm64: Implement branch predictor hardening for Falkor") Acked-by: Marc Zyngier Signed-off-by: Shanker Donthineni Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 9 +++++++++ arch/arm64/kvm/hyp/switch.c | 4 +++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 07823595b7f0..52f15cd896e1 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -406,6 +406,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + .enable = qcom_enable_link_stack_sanitization, + }, + { + .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, + MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 79364d3455c0..e08ae6b6b63e 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -371,8 +371,10 @@ again: u32 midr = read_cpuid_id(); /* Apply BTAC predictors mitigation to all Falkor chips */ - if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1) + if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || + ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) { __qcom_hyp_sanitize_btac_predictors(); + } } fp_enabled = __fpsimd_enabled(); From 808700475218f382591313acabd4812c30ec7fa3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 21 Nov 2017 12:09:38 -0500 Subject: [PATCH 538/770] drm/radeon: Add dpm quirk for Jet PRO (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 239b5f64e12b1f09f506c164dff0374924782979 upstream. Fixes stability issues. v2: clamp sclk to 600 Mhz Bug: https://bugs.freedesktop.org/show_bug.cgi?id=103370 Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si_dpm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index ee3e74266a13..97a0a639dad9 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2984,6 +2984,11 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, (rdev->pdev->device == 0x6667)) { max_sclk = 75000; } + if ((rdev->pdev->revision == 0xC3) || + (rdev->pdev->device == 0x6665)) { + max_sclk = 60000; + max_mclk = 80000; + } } else if (rdev->family == CHIP_OLAND) { if ((rdev->pdev->revision == 0xC7) || (rdev->pdev->revision == 0x80) || From a29adc04cb6d5ec4ac71c63ceeb67efc011a4b65 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 27 Jan 2018 15:28:15 +0100 Subject: [PATCH 539/770] drm/radeon: adjust tested variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3a61b527b4e1f285d21b6e9e623dc45cf8bb391f upstream. Check the variable that was most recently initialized. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression x, y, f, g, e, m; statement S1,S2,S3,S4; @@ x = f(...); if (\(<+...x...+>\&e\)) S1 else S2 ( x = g(...); | m = g(...,&x,...); | y = g(...); *if (e) S3 else S4 ) // Signed-off-by: Julia Lawall Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_uvd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index d34d1cf33895..95f4db70dd22 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -995,7 +995,7 @@ int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, /* calc dclk divider with current vco freq */ dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, pd_min, pd_even); - if (vclk_div > pd_max) + if (dclk_div > pd_max) break; /* vco is too big, it has to stop */ /* calc score with current vco freq */ From c3bdd547042fa2e71825d7cc5e871b558d4153e9 Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Tue, 2 Aug 2016 11:50:16 +1000 Subject: [PATCH 540/770] rtc-opal: Fix handling of firmware error codes, prevent busy loops commit 5b8b58063029f02da573120ef4dc9079822e3cda upstream. According to the OPAL docs: skiboot-5.2.5/doc/opal-api/opal-rtc-read-3.txt skiboot-5.2.5/doc/opal-api/opal-rtc-write-4.txt OPAL_HARDWARE may be returned from OPAL_RTC_READ or OPAL_RTC_WRITE and this indicates either a transient or permanent error. Prior to this patch, Linux was not dealing with OPAL_HARDWARE being a permanent error particularly well, in that you could end up in a busy loop. This was not too hard to trigger on an AMI BMC based OpenPOWER machine doing a continuous "ipmitool mc reset cold" to the BMC, the result of that being that we'd get stuck in an infinite loop in opal_get_rtc_time(). We now retry a few times before returning the error higher up the stack. Fixes: 16b1d26e77b1 ("rtc/tpo: Driver to support rtc and wakeup on PowerNV platform") Cc: stable@vger.kernel.org # v3.19+ Signed-off-by: Stewart Smith Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-opal.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c index e2a946c0e667..304e891e35fc 100644 --- a/drivers/rtc/rtc-opal.c +++ b/drivers/rtc/rtc-opal.c @@ -58,6 +58,7 @@ static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms) static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm) { long rc = OPAL_BUSY; + int retries = 10; u32 y_m_d; u64 h_m_s_ms; __be32 __y_m_d; @@ -67,8 +68,11 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm) rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); if (rc == OPAL_BUSY_EVENT) opal_poll_events(NULL); - else + else if (retries-- && (rc == OPAL_HARDWARE + || rc == OPAL_INTERNAL_ERROR)) msleep(10); + else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT) + break; } if (rc != OPAL_SUCCESS) @@ -84,6 +88,7 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm) static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm) { long rc = OPAL_BUSY; + int retries = 10; u32 y_m_d = 0; u64 h_m_s_ms = 0; @@ -92,8 +97,11 @@ static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm) rc = opal_rtc_write(y_m_d, h_m_s_ms); if (rc == OPAL_BUSY_EVENT) opal_poll_events(NULL); - else + else if (retries-- && (rc == OPAL_HARDWARE + || rc == OPAL_INTERNAL_ERROR)) msleep(10); + else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT) + break; } return rc == OPAL_SUCCESS ? 0 : -EIO; From e96a219899740432662b58c60a93505ab3dced5c Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Sun, 7 Jan 2018 16:22:35 -0500 Subject: [PATCH 541/770] mbcache: initialize entry->e_referenced in mb_cache_entry_create() commit 3876bbe27d04b848750d5310a37d6b76b593f648 upstream. KMSAN reported use of uninitialized |entry->e_referenced| in a condition in mb_cache_shrink(): ================================================================== BUG: KMSAN: use of uninitialized memory in mb_cache_shrink+0x3b4/0xc50 fs/mbcache.c:287 CPU: 2 PID: 816 Comm: kswapd1 Not tainted 4.11.0-rc5+ #2877 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x172/0x1c0 lib/dump_stack.c:52 kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927 __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469 mb_cache_shrink+0x3b4/0xc50 fs/mbcache.c:287 mb_cache_scan+0x67/0x80 fs/mbcache.c:321 do_shrink_slab mm/vmscan.c:397 [inline] shrink_slab+0xc3d/0x12d0 mm/vmscan.c:500 shrink_node+0x208f/0x2fd0 mm/vmscan.c:2603 kswapd_shrink_node mm/vmscan.c:3172 [inline] balance_pgdat mm/vmscan.c:3289 [inline] kswapd+0x160f/0x2850 mm/vmscan.c:3478 kthread+0x46c/0x5f0 kernel/kthread.c:230 ret_from_fork+0x29/0x40 arch/x86/entry/entry_64.S:430 chained origin: save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 [inline] kmsan_save_stack mm/kmsan/kmsan.c:317 [inline] kmsan_internal_chain_origin+0x12a/0x1f0 mm/kmsan/kmsan.c:547 __msan_store_shadow_origin_1+0xac/0x110 mm/kmsan/kmsan_instr.c:257 mb_cache_entry_create+0x3b3/0xc60 fs/mbcache.c:95 ext4_xattr_cache_insert fs/ext4/xattr.c:1647 [inline] ext4_xattr_block_set+0x4c82/0x5530 fs/ext4/xattr.c:1022 ext4_xattr_set_handle+0x1332/0x20a0 fs/ext4/xattr.c:1252 ext4_xattr_set+0x4d2/0x680 fs/ext4/xattr.c:1306 ext4_xattr_trusted_set+0x8d/0xa0 fs/ext4/xattr_trusted.c:36 __vfs_setxattr+0x703/0x790 fs/xattr.c:149 __vfs_setxattr_noperm+0x27a/0x6f0 fs/xattr.c:180 vfs_setxattr fs/xattr.c:223 [inline] setxattr+0x6ae/0x790 fs/xattr.c:449 path_setxattr+0x1eb/0x380 fs/xattr.c:468 SYSC_lsetxattr+0x8d/0xb0 fs/xattr.c:490 SyS_lsetxattr+0x77/0xa0 fs/xattr.c:486 entry_SYSCALL_64_fastpath+0x13/0x94 origin: save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 [inline] kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198 kmsan_kmalloc+0x7f/0xe0 mm/kmsan/kmsan.c:337 kmem_cache_alloc+0x1c2/0x1e0 mm/slub.c:2766 mb_cache_entry_create+0x283/0xc60 fs/mbcache.c:86 ext4_xattr_cache_insert fs/ext4/xattr.c:1647 [inline] ext4_xattr_block_set+0x4c82/0x5530 fs/ext4/xattr.c:1022 ext4_xattr_set_handle+0x1332/0x20a0 fs/ext4/xattr.c:1252 ext4_xattr_set+0x4d2/0x680 fs/ext4/xattr.c:1306 ext4_xattr_trusted_set+0x8d/0xa0 fs/ext4/xattr_trusted.c:36 __vfs_setxattr+0x703/0x790 fs/xattr.c:149 __vfs_setxattr_noperm+0x27a/0x6f0 fs/xattr.c:180 vfs_setxattr fs/xattr.c:223 [inline] setxattr+0x6ae/0x790 fs/xattr.c:449 path_setxattr+0x1eb/0x380 fs/xattr.c:468 SYSC_lsetxattr+0x8d/0xb0 fs/xattr.c:490 SyS_lsetxattr+0x77/0xa0 fs/xattr.c:486 entry_SYSCALL_64_fastpath+0x13/0x94 ================================================================== Signed-off-by: Alexander Potapenko Signed-off-by: Eric Biggers Cc: stable@vger.kernel.org # v4.6 Signed-off-by: Greg Kroah-Hartman --- fs/mbcache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/mbcache.c b/fs/mbcache.c index d818fd236787..49c5b25bfa8c 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -94,6 +94,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, entry->e_key = key; entry->e_value = value; entry->e_reusable = reusable; + entry->e_referenced = 0; head = mb_cache_entry_head(cache, key); hlist_bl_lock(head); hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { From a1341cac93fa9d54f49f5fedf7a577bfc7a7665c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 29 Jan 2018 00:44:53 +0100 Subject: [PATCH 542/770] mmc: sdhci: Implement an SDHCI-specific bounce buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bd9b902798ab14d19ca116b10bde581ddff8f905 upstream. The bounce buffer is gone from the MMC core, and now we found out that there are some (crippled) i.MX boards out there that have broken ADMA (cannot do scatter-gather), and also broken PIO so they must use SDMA. Closer examination shows a less significant slowdown also on SDMA-only capable Laptop hosts. SDMA sets down the number of segments to one, so that each segment gets turned into a singular request that ping-pongs to the block layer before the next request/segment is issued. Apparently it happens a lot that the block layer send requests that include a lot of physically discontiguous segments. My guess is that this phenomenon is coming from the file system. These devices that cannot handle scatterlists in hardware can see major benefits from a DMA-contiguous bounce buffer. This patch accumulates those fragmented scatterlists in a physically contiguous bounce buffer so that we can issue bigger DMA data chunks to/from the card. When tested with a PCI-integrated host (1217:8221) that only supports SDMA: 0b:00.0 SD Host controller: O2 Micro, Inc. OZ600FJ0/OZ900FJ0/OZ600FJS SD/MMC Card Reader Controller (rev 05) This patch gave ~1Mbyte/s improved throughput on large reads and writes when testing using iozone than without the patch. dmesg: sdhci-pci 0000:0b:00.0: SDHCI controller found [1217:8221] (rev 5) mmc0 bounce up to 128 segments into one, max segment size 65536 bytes mmc0: SDHCI controller on PCI [0000:0b:00.0] using DMA On the i.MX SDHCI controllers on the crippled i.MX 25 and i.MX 35 the patch restores the performance to what it was before we removed the bounce buffers. Cc: Pierre Ossman Cc: Benoît Thébaudeau Cc: Fabio Estevam Cc: Benjamin Beckmeyer Cc: stable@vger.kernel.org # v4.14+ Fixes: de3ee99b097d ("mmc: Delete bounce buffer handling") Tested-by: Benjamin Beckmeyer Acked-by: Adrian Hunter Signed-off-by: Linus Walleij Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 164 +++++++++++++++++++++++++++++++++++++-- drivers/mmc/host/sdhci.h | 3 + 2 files changed, 159 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 6152e83ff935..90cc1977b792 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -502,8 +503,35 @@ static int sdhci_pre_dma_transfer(struct sdhci_host *host, if (data->host_cookie == COOKIE_PRE_MAPPED) return data->sg_count; - sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, - mmc_get_dma_dir(data)); + /* Bounce write requests to the bounce buffer */ + if (host->bounce_buffer) { + unsigned int length = data->blksz * data->blocks; + + if (length > host->bounce_buffer_size) { + pr_err("%s: asked for transfer of %u bytes exceeds bounce buffer %u bytes\n", + mmc_hostname(host->mmc), length, + host->bounce_buffer_size); + return -EIO; + } + if (mmc_get_dma_dir(data) == DMA_TO_DEVICE) { + /* Copy the data to the bounce buffer */ + sg_copy_to_buffer(data->sg, data->sg_len, + host->bounce_buffer, + length); + } + /* Switch ownership to the DMA */ + dma_sync_single_for_device(host->mmc->parent, + host->bounce_addr, + host->bounce_buffer_size, + mmc_get_dma_dir(data)); + /* Just a dummy value */ + sg_count = 1; + } else { + /* Just access the data directly from memory */ + sg_count = dma_map_sg(mmc_dev(host->mmc), + data->sg, data->sg_len, + mmc_get_dma_dir(data)); + } if (sg_count == 0) return -ENOSPC; @@ -673,6 +701,14 @@ static void sdhci_adma_table_post(struct sdhci_host *host, } } +static u32 sdhci_sdma_address(struct sdhci_host *host) +{ + if (host->bounce_buffer) + return host->bounce_addr; + else + return sg_dma_address(host->data->sg); +} + static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd) { u8 count; @@ -858,8 +894,8 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd) SDHCI_ADMA_ADDRESS_HI); } else { WARN_ON(sg_cnt != 1); - sdhci_writel(host, sg_dma_address(data->sg), - SDHCI_DMA_ADDRESS); + sdhci_writel(host, sdhci_sdma_address(host), + SDHCI_DMA_ADDRESS); } } @@ -2248,7 +2284,12 @@ static void sdhci_pre_req(struct mmc_host *mmc, struct mmc_request *mrq) mrq->data->host_cookie = COOKIE_UNMAPPED; - if (host->flags & SDHCI_REQ_USE_DMA) + /* + * No pre-mapping in the pre hook if we're using the bounce buffer, + * for that we would need two bounce buffers since one buffer is + * in flight when this is getting called. + */ + if (host->flags & SDHCI_REQ_USE_DMA && !host->bounce_buffer) sdhci_pre_dma_transfer(host, mrq->data, COOKIE_PRE_MAPPED); } @@ -2352,8 +2393,45 @@ static bool sdhci_request_done(struct sdhci_host *host) struct mmc_data *data = mrq->data; if (data && data->host_cookie == COOKIE_MAPPED) { - dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, - mmc_get_dma_dir(data)); + if (host->bounce_buffer) { + /* + * On reads, copy the bounced data into the + * sglist + */ + if (mmc_get_dma_dir(data) == DMA_FROM_DEVICE) { + unsigned int length = data->bytes_xfered; + + if (length > host->bounce_buffer_size) { + pr_err("%s: bounce buffer is %u bytes but DMA claims to have transferred %u bytes\n", + mmc_hostname(host->mmc), + host->bounce_buffer_size, + data->bytes_xfered); + /* Cap it down and continue */ + length = host->bounce_buffer_size; + } + dma_sync_single_for_cpu( + host->mmc->parent, + host->bounce_addr, + host->bounce_buffer_size, + DMA_FROM_DEVICE); + sg_copy_from_buffer(data->sg, + data->sg_len, + host->bounce_buffer, + length); + } else { + /* No copying, just switch ownership */ + dma_sync_single_for_cpu( + host->mmc->parent, + host->bounce_addr, + host->bounce_buffer_size, + mmc_get_dma_dir(data)); + } + } else { + /* Unmap the raw data */ + dma_unmap_sg(mmc_dev(host->mmc), data->sg, + data->sg_len, + mmc_get_dma_dir(data)); + } data->host_cookie = COOKIE_UNMAPPED; } } @@ -2636,7 +2714,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask) */ if (intmask & SDHCI_INT_DMA_END) { u32 dmastart, dmanow; - dmastart = sg_dma_address(host->data->sg); + + dmastart = sdhci_sdma_address(host); dmanow = dmastart + host->data->bytes_xfered; /* * Force update to the next DMA block boundary. @@ -3217,6 +3296,68 @@ void __sdhci_read_caps(struct sdhci_host *host, u16 *ver, u32 *caps, u32 *caps1) } EXPORT_SYMBOL_GPL(__sdhci_read_caps); +static int sdhci_allocate_bounce_buffer(struct sdhci_host *host) +{ + struct mmc_host *mmc = host->mmc; + unsigned int max_blocks; + unsigned int bounce_size; + int ret; + + /* + * Cap the bounce buffer at 64KB. Using a bigger bounce buffer + * has diminishing returns, this is probably because SD/MMC + * cards are usually optimized to handle this size of requests. + */ + bounce_size = SZ_64K; + /* + * Adjust downwards to maximum request size if this is less + * than our segment size, else hammer down the maximum + * request size to the maximum buffer size. + */ + if (mmc->max_req_size < bounce_size) + bounce_size = mmc->max_req_size; + max_blocks = bounce_size / 512; + + /* + * When we just support one segment, we can get significant + * speedups by the help of a bounce buffer to group scattered + * reads/writes together. + */ + host->bounce_buffer = devm_kmalloc(mmc->parent, + bounce_size, + GFP_KERNEL); + if (!host->bounce_buffer) { + pr_err("%s: failed to allocate %u bytes for bounce buffer, falling back to single segments\n", + mmc_hostname(mmc), + bounce_size); + /* + * Exiting with zero here makes sure we proceed with + * mmc->max_segs == 1. + */ + return 0; + } + + host->bounce_addr = dma_map_single(mmc->parent, + host->bounce_buffer, + bounce_size, + DMA_BIDIRECTIONAL); + ret = dma_mapping_error(mmc->parent, host->bounce_addr); + if (ret) + /* Again fall back to max_segs == 1 */ + return 0; + host->bounce_buffer_size = bounce_size; + + /* Lie about this since we're bouncing */ + mmc->max_segs = max_blocks; + mmc->max_seg_size = bounce_size; + mmc->max_req_size = bounce_size; + + pr_info("%s bounce up to %u segments into one, max segment size %u bytes\n", + mmc_hostname(mmc), max_blocks, bounce_size); + + return 0; +} + int sdhci_setup_host(struct sdhci_host *host) { struct mmc_host *mmc; @@ -3713,6 +3854,13 @@ int sdhci_setup_host(struct sdhci_host *host) */ mmc->max_blk_count = (host->quirks & SDHCI_QUIRK_NO_MULTIBLOCK) ? 1 : 65535; + if (mmc->max_segs == 1) { + /* This may alter mmc->*_blk_* parameters */ + ret = sdhci_allocate_bounce_buffer(host); + if (ret) + return ret; + } + return 0; unreg: diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 54bc444c317f..1d7d61e25dbf 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -440,6 +440,9 @@ struct sdhci_host { int irq; /* Device IRQ */ void __iomem *ioaddr; /* Mapped address */ + char *bounce_buffer; /* For packing SDMA reads/writes */ + dma_addr_t bounce_addr; + unsigned int bounce_buffer_size; const struct sdhci_ops *ops; /* Low level hw interface */ From e8012ff8779378a650aab3d088239053bfbdc57f Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Mon, 12 Feb 2018 21:13:44 +0100 Subject: [PATCH 543/770] mmc: bcm2835: Don't overwrite max frequency unconditionally commit 118032be389009b07ecb5a03ffe219a89d421def upstream. The optional DT parameter max-frequency could init the max bus frequency. So take care of this, before setting the max bus frequency. Fixes: 660fc733bd74 ("mmc: bcm2835: Add new driver for the sdhost controller.") Signed-off-by: Phil Elwell Signed-off-by: Stefan Wahren Cc: # 4.12+ Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/bcm2835.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c index 229dc18f0581..768972af8b85 100644 --- a/drivers/mmc/host/bcm2835.c +++ b/drivers/mmc/host/bcm2835.c @@ -1265,7 +1265,8 @@ static int bcm2835_add_host(struct bcm2835_host *host) char pio_limit_string[20]; int ret; - mmc->f_max = host->max_clk; + if (!mmc->f_max || mmc->f_max > host->max_clk) + mmc->f_max = host->max_clk; mmc->f_min = host->max_clk / SDCDIV_MAX_CDIV; mmc->max_busy_timeout = ~0 / (mmc->f_max / 1000); From 723cc3aeba9be034c7509860dbf445e1bd25467f Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 12 Feb 2018 14:13:59 +0100 Subject: [PATCH 544/770] Revert "mmc: meson-gx: include tx phase in the tuning process" commit fe0e58048f005fdce315eb4d185e5c160be4ac01 upstream. This reverts commit 0a44697627d17a66d7dc98f17aeca07ca79c5c20. This commit was initially intended to fix problems with hs200 and hs400 on some boards, mainly the odroid-c2. The OC2 (Rev 0.2) I have performs well in this modes, so I could not confirm these issues. We've had several reports about the issues being still present on (some) OC2, so apparently, this change does not do what it was supposed to do. Maybe the eMMC signal quality is on the edge on the board. This may explain the variability we see in term of stability, but this is just a guess. Lowering the max_frequency to 100Mhz seems to do trick for those affected by the issue Worse, the commit created new issues (CRC errors and hangs) on other boards, such as the kvim 1 and 2, the p200 or the libretech-cc. According to amlogic, the Tx phase should not be tuned and left in its default configuration, so it is best to just revert the commit. Fixes: 0a44697627d1 ("mmc: meson-gx: include tx phase in the tuning process") Cc: # 4.14+ Signed-off-by: Jerome Brunet Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/meson-gx-mmc.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 85745ef179e2..08a55c2e96e1 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -716,22 +716,6 @@ static int meson_mmc_clk_phase_tuning(struct mmc_host *mmc, u32 opcode, static int meson_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct meson_host *host = mmc_priv(mmc); - int ret; - - /* - * If this is the initial tuning, try to get a sane Rx starting - * phase before doing the actual tuning. - */ - if (!mmc->doing_retune) { - ret = meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk); - - if (ret) - return ret; - } - - ret = meson_mmc_clk_phase_tuning(mmc, opcode, host->tx_clk); - if (ret) - return ret; return meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk); } @@ -762,9 +746,8 @@ static void meson_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) if (!IS_ERR(mmc->supply.vmmc)) mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd); - /* Reset phases */ + /* Reset rx phase */ clk_set_phase(host->rx_clk, 0); - clk_set_phase(host->tx_clk, 270); break; From e7cedb56ae9a1c00b76df98d2adebb6208d2b71e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 5 Feb 2018 16:24:52 +0200 Subject: [PATCH 545/770] mlx5: fix mlx5_get_vector_affinity to start from completion vector 0 commit 2572cf57d75a7f91835d9a38771e9e76d575d122 upstream. The consumers of this routine expects the affinity map of of vector index relative to the first completion vector. The upper layers are not aware of internal/private completion vectors that mlx5 allocates for its own usage. Hence, return the affinity map of vector index relative to the first completion vector. Fixes: 05e0cc84e00c ("net/mlx5: Fix get vector affinity helper function") Reported-by: Logan Gunthorpe Tested-by: Max Gurtovoy Reviewed-by: Max Gurtovoy Cc: # v4.15 Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a13525daf09b..ae15864c8708 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1201,7 +1201,7 @@ mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector) int eqn; int err; - err = mlx5_vector2eqn(dev, vector, &eqn, &irq); + err = mlx5_vector2eqn(dev, MLX5_EQ_VEC_COMP_BASE + vector, &eqn, &irq); if (err) return NULL; From 598b21708ec09ad33cd66d162d2fea2dd66d9f64 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 24 Jan 2018 19:35:45 +0100 Subject: [PATCH 546/770] Revert "apple-gmux: lock iGP IO to protect from vgaarb changes" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d6fa7588fd7a8def4c747c0c574ce85d453e3788 upstream. Commit 4eebd5a4e726 ("apple-gmux: lock iGP IO to protect from vgaarb changes") amended this driver's ->probe hook to lock decoding of normal (non-legacy) I/O space accesses to the integrated GPU on dual-GPU MacBook Pros. The lock stays in place until the driver is unbound. The change was made to work around an issue with the out-of-tree nvidia graphics driver (available at http://www.nvidia.com/object/unix.html). It contains the following sequence in nvidia/nv.c: #if defined(CONFIG_VGA_ARB) && !defined(NVCPU_PPC64LE) #if defined(VGA_DEFAULT_DEVICE) vga_tryget(VGA_DEFAULT_DEVICE, VGA_RSRC_LEGACY_MASK); #endif vga_set_legacy_decoding(dev, VGA_RSRC_NONE); #endif This code was reported to cause deadlocks with VFIO already in 2013: https://devtalk.nvidia.com/default/topic/545560 I've reported the issue to Nvidia developers once more in 2017: https://www.spinics.net/lists/dri-devel/msg138754.html On the MacBookPro10,1, this code apparently breaks backlight control (which is handled by apple-gmux via an I/O region starting at 0x700), as reported by Petri Hodju: https://bugzilla.kernel.org/show_bug.cgi?id=86121 I tried to replicate Petri's observations on my MacBook9,1, which uses the same Intel Ivy Bridge + Nvidia GeForce GT 650M architecture, to no avail. On my machine apple-gmux' I/O region remains accessible even with the nvidia driver loaded and commit 4eebd5a4e726 reverted. Petri reported that apple-gmux becomes accessible again after a suspend/resume cycle because the BIOS changed the VGA routing on the root port to the Nvidia GPU. Perhaps this is a BIOS issue after all that can be fixed with an update? In any case, the change made by commit 4eebd5a4e726 has turned out to cause two new issues: * Wilfried Klaebe reports a deadlock when launching Xorg because it opens /dev/vga_arbiter and calls vga_get(), but apple-gmux is holding a lock on I/O space indefinitely. It looks like apple-gmux' current behavior is an abuse of the vgaarb API as locks are not meant to be held for longer periods: https://bugzilla.kernel.org/show_bug.cgi?id=88861#c11 https://bugzilla.kernel.org/attachment.cgi?id=217541 * On dual GPU MacBook Pros introduced since 2013, the integrated GPU is powergated on boot und thus becomes invisible to Linux unless a custom EFI protocol is used to leave it powered on. (A patch exists but is not in mainline yet due to several negative side effects.) On these machines, locking I/O to the integrated GPU (as done by 4eebd5a4e726) fails and backlight control is therefore broken: https://bugzilla.kernel.org/show_bug.cgi?id=105051 So let's revert commit 4eebd5a4e726 please. Users experiencing the issue with the proprietary nvidia driver can comment out the above- quoted problematic code as a workaround (or try updating the BIOS). Cc: Petri Hodju Cc: Bjorn Helgaas Cc: Bruno Prémont Cc: Andy Ritger Cc: Ronald Tschalär Tested-by: Wilfried Klaebe Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org Signed-off-by: Darren Hart (VMware) Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/apple-gmux.c | 48 +------------------------------ 1 file changed, 1 insertion(+), 47 deletions(-) diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c index 623d322447a2..7c4eb86c851e 100644 --- a/drivers/platform/x86/apple-gmux.c +++ b/drivers/platform/x86/apple-gmux.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include @@ -54,7 +53,6 @@ struct apple_gmux_data { bool indexed; struct mutex index_lock; - struct pci_dev *pdev; struct backlight_device *bdev; /* switcheroo data */ @@ -599,23 +597,6 @@ static int gmux_resume(struct device *dev) return 0; } -static struct pci_dev *gmux_get_io_pdev(void) -{ - struct pci_dev *pdev = NULL; - - while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev))) { - u16 cmd; - - pci_read_config_word(pdev, PCI_COMMAND, &cmd); - if (!(cmd & PCI_COMMAND_IO)) - continue; - - return pdev; - } - - return NULL; -} - static int is_thunderbolt(struct device *dev, void *data) { return to_pci_dev(dev)->is_thunderbolt; @@ -631,7 +612,6 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) int ret = -ENXIO; acpi_status status; unsigned long long gpe; - struct pci_dev *pdev = NULL; if (apple_gmux_data) return -EBUSY; @@ -682,7 +662,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) ver_minor = (version >> 16) & 0xff; ver_release = (version >> 8) & 0xff; } else { - pr_info("gmux device not present or IO disabled\n"); + pr_info("gmux device not present\n"); ret = -ENODEV; goto err_release; } @@ -690,23 +670,6 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) pr_info("Found gmux version %d.%d.%d [%s]\n", ver_major, ver_minor, ver_release, (gmux_data->indexed ? "indexed" : "classic")); - /* - * Apple systems with gmux are EFI based and normally don't use - * VGA. In addition changing IO+MEM ownership between IGP and dGPU - * disables IO/MEM used for backlight control on some systems. - * Lock IO+MEM to GPU with active IO to prevent switch. - */ - pdev = gmux_get_io_pdev(); - if (pdev && vga_tryget(pdev, - VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM)) { - pr_err("IO+MEM vgaarb-locking for PCI:%s failed\n", - pci_name(pdev)); - ret = -EBUSY; - goto err_release; - } else if (pdev) - pr_info("locked IO for PCI:%s\n", pci_name(pdev)); - gmux_data->pdev = pdev; - memset(&props, 0, sizeof(props)); props.type = BACKLIGHT_PLATFORM; props.max_brightness = gmux_read32(gmux_data, GMUX_PORT_MAX_BRIGHTNESS); @@ -822,10 +785,6 @@ err_enable_gpe: err_notify: backlight_device_unregister(bdev); err_release: - if (gmux_data->pdev) - vga_put(gmux_data->pdev, - VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM); - pci_dev_put(pdev); release_region(gmux_data->iostart, gmux_data->iolen); err_free: kfree(gmux_data); @@ -845,11 +804,6 @@ static void gmux_remove(struct pnp_dev *pnp) &gmux_notify_handler); } - if (gmux_data->pdev) { - vga_put(gmux_data->pdev, - VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM); - pci_dev_put(gmux_data->pdev); - } backlight_device_unregister(gmux_data->bdev); release_region(gmux_data->iostart, gmux_data->iolen); From 879bcbe0913fb6f938157af821d10f48235c166c Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Wed, 10 Jan 2018 00:27:29 -0500 Subject: [PATCH 547/770] jbd2: fix sphinx kernel-doc build warnings commit f69120ce6c024aa634a8fc25787205e42f0ccbe6 upstream. Sphinx emits various (26) warnings when building make target 'htmldocs'. Currently struct definitions contain duplicate documentation, some as kernel-docs and some as standard c89 comments. We can reduce duplication while cleaning up the kernel docs. Move all kernel-docs to right above each struct member. Use the set of all existing comments (kernel-doc and c89). Add documentation for missing struct members and function arguments. Signed-off-by: Tobin C. Harding Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/transaction.c | 5 +- include/linux/jbd2.h | 431 ++++++++++++++++++++++++++---------------- 2 files changed, 272 insertions(+), 164 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 8b08044b3120..c0681814c379 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -495,8 +495,10 @@ void jbd2_journal_free_reserved(handle_t *handle) EXPORT_SYMBOL(jbd2_journal_free_reserved); /** - * int jbd2_journal_start_reserved(handle_t *handle) - start reserved handle + * int jbd2_journal_start_reserved() - start reserved handle * @handle: handle to start + * @type: for handle statistics + * @line_no: for handle statistics * * Start handle that has been previously reserved with jbd2_journal_reserve(). * This attaches @handle to the running transaction (or creates one if there's @@ -626,6 +628,7 @@ error_out: * int jbd2_journal_restart() - restart a handle . * @handle: handle to restart * @nblocks: nr credits requested + * @gfp_mask: memory allocation flags (for start_this_handle) * * Restart a handle for a multi-transaction filesystem * operation. diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 606b6bce3a5b..29290bfb94a8 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -418,26 +418,41 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) #define JI_WAIT_DATA (1 << __JI_WAIT_DATA) /** - * struct jbd_inode is the structure linking inodes in ordered mode - * present in a transaction so that we can sync them during commit. + * struct jbd_inode - The jbd_inode type is the structure linking inodes in + * ordered mode present in a transaction so that we can sync them during commit. */ struct jbd2_inode { - /* Which transaction does this inode belong to? Either the running - * transaction or the committing one. [j_list_lock] */ + /** + * @i_transaction: + * + * Which transaction does this inode belong to? Either the running + * transaction or the committing one. [j_list_lock] + */ transaction_t *i_transaction; - /* Pointer to the running transaction modifying inode's data in case - * there is already a committing transaction touching it. [j_list_lock] */ + /** + * @i_next_transaction: + * + * Pointer to the running transaction modifying inode's data in case + * there is already a committing transaction touching it. [j_list_lock] + */ transaction_t *i_next_transaction; - /* List of inodes in the i_transaction [j_list_lock] */ + /** + * @i_list: List of inodes in the i_transaction [j_list_lock] + */ struct list_head i_list; - /* VFS inode this inode belongs to [constant during the lifetime - * of the structure] */ + /** + * @i_vfs_inode: + * + * VFS inode this inode belongs to [constant for lifetime of structure] + */ struct inode *i_vfs_inode; - /* Flags of inode [j_list_lock] */ + /** + * @i_flags: Flags of inode [j_list_lock] + */ unsigned long i_flags; }; @@ -447,12 +462,20 @@ struct jbd2_revoke_table_s; * struct handle_s - The handle_s type is the concrete type associated with * handle_t. * @h_transaction: Which compound transaction is this update a part of? + * @h_journal: Which journal handle belongs to - used iff h_reserved set. + * @h_rsv_handle: Handle reserved for finishing the logical operation. * @h_buffer_credits: Number of remaining buffers we are allowed to dirty. - * @h_ref: Reference count on this handle - * @h_err: Field for caller's use to track errors through large fs operations - * @h_sync: flag for sync-on-close - * @h_jdata: flag to force data journaling - * @h_aborted: flag indicating fatal error on handle + * @h_ref: Reference count on this handle. + * @h_err: Field for caller's use to track errors through large fs operations. + * @h_sync: Flag for sync-on-close. + * @h_jdata: Flag to force data journaling. + * @h_reserved: Flag for handle for reserved credits. + * @h_aborted: Flag indicating fatal error on handle. + * @h_type: For handle statistics. + * @h_line_no: For handle statistics. + * @h_start_jiffies: Handle Start time. + * @h_requested_credits: Holds @h_buffer_credits after handle is started. + * @saved_alloc_context: Saved context while transaction is open. **/ /* Docbook can't yet cope with the bit fields, but will leave the documentation @@ -462,32 +485,23 @@ struct jbd2_revoke_table_s; struct jbd2_journal_handle { union { - /* Which compound transaction is this update a part of? */ transaction_t *h_transaction; /* Which journal handle belongs to - used iff h_reserved set */ journal_t *h_journal; }; - /* Handle reserved for finishing the logical operation */ handle_t *h_rsv_handle; - - /* Number of remaining buffers we are allowed to dirty: */ int h_buffer_credits; - - /* Reference count on this handle */ int h_ref; - - /* Field for caller's use to track errors through large fs */ - /* operations */ int h_err; /* Flags [no locking] */ - unsigned int h_sync: 1; /* sync-on-close */ - unsigned int h_jdata: 1; /* force data journaling */ - unsigned int h_reserved: 1; /* handle with reserved credits */ - unsigned int h_aborted: 1; /* fatal error on handle */ - unsigned int h_type: 8; /* for handle statistics */ - unsigned int h_line_no: 16; /* for handle statistics */ + unsigned int h_sync: 1; + unsigned int h_jdata: 1; + unsigned int h_reserved: 1; + unsigned int h_aborted: 1; + unsigned int h_type: 8; + unsigned int h_line_no: 16; unsigned long h_start_jiffies; unsigned int h_requested_credits; @@ -729,228 +743,253 @@ jbd2_time_diff(unsigned long start, unsigned long end) /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. - * @j_flags: General journaling state flags - * @j_errno: Is there an outstanding uncleared error on the journal (from a - * prior abort)? - * @j_sb_buffer: First part of superblock buffer - * @j_superblock: Second part of superblock buffer - * @j_format_version: Version of the superblock format - * @j_state_lock: Protect the various scalars in the journal - * @j_barrier_count: Number of processes waiting to create a barrier lock - * @j_barrier: The barrier lock itself - * @j_running_transaction: The current running transaction.. - * @j_committing_transaction: the transaction we are pushing to disk - * @j_checkpoint_transactions: a linked circular list of all transactions - * waiting for checkpointing - * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction - * to start committing, or for a barrier lock to be released - * @j_wait_done_commit: Wait queue for waiting for commit to complete - * @j_wait_commit: Wait queue to trigger commit - * @j_wait_updates: Wait queue to wait for updates to complete - * @j_wait_reserved: Wait queue to wait for reserved buffer credits to drop - * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints - * @j_head: Journal head - identifies the first unused block in the journal - * @j_tail: Journal tail - identifies the oldest still-used block in the - * journal. - * @j_free: Journal free - how many free blocks are there in the journal? - * @j_first: The block number of the first usable block - * @j_last: The block number one beyond the last usable block - * @j_dev: Device where we store the journal - * @j_blocksize: blocksize for the location where we store the journal. - * @j_blk_offset: starting block offset for into the device where we store the - * journal - * @j_fs_dev: Device which holds the client fs. For internal journal this will - * be equal to j_dev - * @j_reserved_credits: Number of buffers reserved from the running transaction - * @j_maxlen: Total maximum capacity of the journal region on disk. - * @j_list_lock: Protects the buffer lists and internal buffer state. - * @j_inode: Optional inode where we store the journal. If present, all journal - * block numbers are mapped into this inode via bmap(). - * @j_tail_sequence: Sequence number of the oldest transaction in the log - * @j_transaction_sequence: Sequence number of the next transaction to grant - * @j_commit_sequence: Sequence number of the most recently committed - * transaction - * @j_commit_request: Sequence number of the most recent transaction wanting - * commit - * @j_uuid: Uuid of client object. - * @j_task: Pointer to the current commit thread for this journal - * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a - * single compound commit transaction - * @j_commit_interval: What is the maximum transaction lifetime before we begin - * a commit? - * @j_commit_timer: The timer used to wakeup the commit thread - * @j_revoke_lock: Protect the revoke table - * @j_revoke: The revoke table - maintains the list of revoked blocks in the - * current transaction. - * @j_revoke_table: alternate revoke tables for j_revoke - * @j_wbuf: array of buffer_heads for jbd2_journal_commit_transaction - * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the - * number that will fit in j_blocksize - * @j_last_sync_writer: most recent pid which did a synchronous write - * @j_history_lock: Protect the transactions statistics history - * @j_proc_entry: procfs entry for the jbd statistics directory - * @j_stats: Overall statistics - * @j_private: An opaque pointer to fs-private information. - * @j_trans_commit_map: Lockdep entity to track transaction commit dependencies */ - struct journal_s { - /* General journaling state flags [j_state_lock] */ + /** + * @j_flags: General journaling state flags [j_state_lock] + */ unsigned long j_flags; - /* + /** + * @j_errno: + * * Is there an outstanding uncleared error on the journal (from a prior * abort)? [j_state_lock] */ int j_errno; - /* The superblock buffer */ + /** + * @j_sb_buffer: The first part of the superblock buffer. + */ struct buffer_head *j_sb_buffer; + + /** + * @j_superblock: The second part of the superblock buffer. + */ journal_superblock_t *j_superblock; - /* Version of the superblock format */ + /** + * @j_format_version: Version of the superblock format. + */ int j_format_version; - /* - * Protect the various scalars in the journal + /** + * @j_state_lock: Protect the various scalars in the journal. */ rwlock_t j_state_lock; - /* + /** + * @j_barrier_count: + * * Number of processes waiting to create a barrier lock [j_state_lock] */ int j_barrier_count; - /* The barrier lock itself */ + /** + * @j_barrier: The barrier lock itself. + */ struct mutex j_barrier; - /* + /** + * @j_running_transaction: + * * Transactions: The current running transaction... * [j_state_lock] [caller holding open handle] */ transaction_t *j_running_transaction; - /* + /** + * @j_committing_transaction: + * * the transaction we are pushing to disk * [j_state_lock] [caller holding open handle] */ transaction_t *j_committing_transaction; - /* + /** + * @j_checkpoint_transactions: + * * ... and a linked circular list of all transactions waiting for * checkpointing. [j_list_lock] */ transaction_t *j_checkpoint_transactions; - /* + /** + * @j_wait_transaction_locked: + * * Wait queue for waiting for a locked transaction to start committing, - * or for a barrier lock to be released + * or for a barrier lock to be released. */ wait_queue_head_t j_wait_transaction_locked; - /* Wait queue for waiting for commit to complete */ + /** + * @j_wait_done_commit: Wait queue for waiting for commit to complete. + */ wait_queue_head_t j_wait_done_commit; - /* Wait queue to trigger commit */ + /** + * @j_wait_commit: Wait queue to trigger commit. + */ wait_queue_head_t j_wait_commit; - /* Wait queue to wait for updates to complete */ + /** + * @j_wait_updates: Wait queue to wait for updates to complete. + */ wait_queue_head_t j_wait_updates; - /* Wait queue to wait for reserved buffer credits to drop */ + /** + * @j_wait_reserved: + * + * Wait queue to wait for reserved buffer credits to drop. + */ wait_queue_head_t j_wait_reserved; - /* Semaphore for locking against concurrent checkpoints */ + /** + * @j_checkpoint_mutex: + * + * Semaphore for locking against concurrent checkpoints. + */ struct mutex j_checkpoint_mutex; - /* + /** + * @j_chkpt_bhs: + * * List of buffer heads used by the checkpoint routine. This * was moved from jbd2_log_do_checkpoint() to reduce stack * usage. Access to this array is controlled by the - * j_checkpoint_mutex. [j_checkpoint_mutex] + * @j_checkpoint_mutex. [j_checkpoint_mutex] */ struct buffer_head *j_chkpt_bhs[JBD2_NR_BATCH]; - - /* + + /** + * @j_head: + * * Journal head: identifies the first unused block in the journal. * [j_state_lock] */ unsigned long j_head; - /* + /** + * @j_tail: + * * Journal tail: identifies the oldest still-used block in the journal. * [j_state_lock] */ unsigned long j_tail; - /* + /** + * @j_free: + * * Journal free: how many free blocks are there in the journal? * [j_state_lock] */ unsigned long j_free; - /* - * Journal start and end: the block numbers of the first usable block - * and one beyond the last usable block in the journal. [j_state_lock] + /** + * @j_first: + * + * The block number of the first usable block in the journal + * [j_state_lock]. */ unsigned long j_first; + + /** + * @j_last: + * + * The block number one beyond the last usable block in the journal + * [j_state_lock]. + */ unsigned long j_last; - /* - * Device, blocksize and starting block offset for the location where we - * store the journal. + /** + * @j_dev: Device where we store the journal. */ struct block_device *j_dev; + + /** + * @j_blocksize: Block size for the location where we store the journal. + */ int j_blocksize; + + /** + * @j_blk_offset: + * + * Starting block offset into the device where we store the journal. + */ unsigned long long j_blk_offset; + + /** + * @j_devname: Journal device name. + */ char j_devname[BDEVNAME_SIZE+24]; - /* + /** + * @j_fs_dev: + * * Device which holds the client fs. For internal journal this will be * equal to j_dev. */ struct block_device *j_fs_dev; - /* Total maximum capacity of the journal region on disk. */ + /** + * @j_maxlen: Total maximum capacity of the journal region on disk. + */ unsigned int j_maxlen; - /* Number of buffers reserved from the running transaction */ + /** + * @j_reserved_credits: + * + * Number of buffers reserved from the running transaction. + */ atomic_t j_reserved_credits; - /* - * Protects the buffer lists and internal buffer state. + /** + * @j_list_lock: Protects the buffer lists and internal buffer state. */ spinlock_t j_list_lock; - /* Optional inode where we store the journal. If present, all */ - /* journal block numbers are mapped into this inode via */ - /* bmap(). */ + /** + * @j_inode: + * + * Optional inode where we store the journal. If present, all + * journal block numbers are mapped into this inode via bmap(). + */ struct inode *j_inode; - /* + /** + * @j_tail_sequence: + * * Sequence number of the oldest transaction in the log [j_state_lock] */ tid_t j_tail_sequence; - /* + /** + * @j_transaction_sequence: + * * Sequence number of the next transaction to grant [j_state_lock] */ tid_t j_transaction_sequence; - /* + /** + * @j_commit_sequence: + * * Sequence number of the most recently committed transaction * [j_state_lock]. */ tid_t j_commit_sequence; - /* + /** + * @j_commit_request: + * * Sequence number of the most recent transaction wanting commit * [j_state_lock] */ tid_t j_commit_request; - /* + /** + * @j_uuid: + * * Journal uuid: identifies the object (filesystem, LVM volume etc) * backed by this journal. This will eventually be replaced by an array * of uuids, allowing us to index multiple devices within a single @@ -958,85 +997,151 @@ struct journal_s */ __u8 j_uuid[16]; - /* Pointer to the current commit thread for this journal */ + /** + * @j_task: Pointer to the current commit thread for this journal. + */ struct task_struct *j_task; - /* + /** + * @j_max_transaction_buffers: + * * Maximum number of metadata buffers to allow in a single compound - * commit transaction + * commit transaction. */ int j_max_transaction_buffers; - /* + /** + * @j_commit_interval: + * * What is the maximum transaction lifetime before we begin a commit? */ unsigned long j_commit_interval; - /* The timer used to wakeup the commit thread: */ + /** + * @j_commit_timer: The timer used to wakeup the commit thread. + */ struct timer_list j_commit_timer; - /* - * The revoke table: maintains the list of revoked blocks in the - * current transaction. [j_revoke_lock] + /** + * @j_revoke_lock: Protect the revoke table. */ spinlock_t j_revoke_lock; + + /** + * @j_revoke: + * + * The revoke table - maintains the list of revoked blocks in the + * current transaction. + */ struct jbd2_revoke_table_s *j_revoke; + + /** + * @j_revoke_table: Alternate revoke tables for j_revoke. + */ struct jbd2_revoke_table_s *j_revoke_table[2]; - /* - * array of bhs for jbd2_journal_commit_transaction + /** + * @j_wbuf: Array of bhs for jbd2_journal_commit_transaction. */ struct buffer_head **j_wbuf; + + /** + * @j_wbufsize: + * + * Size of @j_wbuf array. + */ int j_wbufsize; - /* - * this is the pid of hte last person to run a synchronous operation - * through the journal + /** + * @j_last_sync_writer: + * + * The pid of the last person to run a synchronous operation + * through the journal. */ pid_t j_last_sync_writer; - /* - * the average amount of time in nanoseconds it takes to commit a + /** + * @j_average_commit_time: + * + * The average amount of time in nanoseconds it takes to commit a * transaction to disk. [j_state_lock] */ u64 j_average_commit_time; - /* - * minimum and maximum times that we should wait for - * additional filesystem operations to get batched into a - * synchronous handle in microseconds + /** + * @j_min_batch_time: + * + * Minimum time that we should wait for additional filesystem operations + * to get batched into a synchronous handle in microseconds. */ u32 j_min_batch_time; + + /** + * @j_max_batch_time: + * + * Maximum time that we should wait for additional filesystem operations + * to get batched into a synchronous handle in microseconds. + */ u32 j_max_batch_time; - /* This function is called when a transaction is closed */ + /** + * @j_commit_callback: + * + * This function is called when a transaction is closed. + */ void (*j_commit_callback)(journal_t *, transaction_t *); /* * Journal statistics */ + + /** + * @j_history_lock: Protect the transactions statistics history. + */ spinlock_t j_history_lock; + + /** + * @j_proc_entry: procfs entry for the jbd statistics directory. + */ struct proc_dir_entry *j_proc_entry; + + /** + * @j_stats: Overall statistics. + */ struct transaction_stats_s j_stats; - /* Failed journal commit ID */ + /** + * @j_failed_commit: Failed journal commit ID. + */ unsigned int j_failed_commit; - /* + /** + * @j_private: + * * An opaque pointer to fs-private information. ext3 puts its - * superblock pointer here + * superblock pointer here. */ void *j_private; - /* Reference to checksum algorithm driver via cryptoapi */ + /** + * @j_chksum_driver: + * + * Reference to checksum algorithm driver via cryptoapi. + */ struct crypto_shash *j_chksum_driver; - /* Precomputed journal UUID checksum for seeding other checksums */ + /** + * @j_csum_seed: + * + * Precomputed journal UUID checksum for seeding other checksums. + */ __u32 j_csum_seed; #ifdef CONFIG_DEBUG_LOCK_ALLOC - /* + /** + * @j_trans_commit_map: + * * Lockdep entity to track transaction commit dependencies. Handles * hold this "lock" for read, when we wait for commit, we acquire the * "lock" for writing. This matches the properties of jbd2 journalling From 4d4d103a1b7e2990bcdbf83a24a83b3e351b6838 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Wed, 10 Jan 2018 00:13:13 -0500 Subject: [PATCH 548/770] ext4: fix a race in the ext4 shutdown path commit abbc3f9395c76d554a9ed27d4b1ebfb5d9b0e4ca upstream. This patch fixes a race between the shutdown path and bio completion handling. In the ext4 direct io path with async io, after submitting a bio to the block layer, if journal starting fails, ext4_direct_IO_write() would bail out pretending that the IO failed. The caller would have had no way of knowing whether or not the IO was successfully submitted. So instead, we return -EIOCBQUEUED in this case. Now, the caller knows that the IO was submitted. The bio completion handler takes care of the error. Tested: Ran the shutdown xfstest test 461 in loop for over 2 hours across 4 machines resulting in over 400 runs. Verified that the race didn't occur. Usually the race was seen in about 20-30 iterations. Signed-off-by: Harshad Shirwadkar Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ea2ccc524bd9..0b9f3f284799 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3724,10 +3724,18 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) /* Credits for sb + inode write */ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); if (IS_ERR(handle)) { - /* This is really bad luck. We've written the data - * but cannot extend i_size. Bail out and pretend - * the write failed... */ - ret = PTR_ERR(handle); + /* + * We wrote the data but cannot extend + * i_size. Bail out. In async io case, we do + * not return error here because we have + * already submmitted the corresponding + * bio. Returning error here makes the caller + * think that this IO is done and failed + * resulting in race with bio's completion + * handler. + */ + if (!ret) + ret = PTR_ERR(handle); if (inode->i_nlink) ext4_orphan_del(NULL, inode); From 2e38988253c165a8b81d3ceea55fb68cb2d19e1c Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Wed, 10 Jan 2018 00:34:19 -0500 Subject: [PATCH 549/770] ext4: save error to disk in __ext4_grp_locked_error() commit 06f29cc81f0350261f59643a505010531130eea0 upstream. In the function __ext4_grp_locked_error(), __save_error_info() is called to save error info in super block block, but does not sync that information to disk to info the subsequence fsck after reboot. This patch writes the error information to disk. After this patch, I think there is no obvious EXT4 error handle branches which leads to "Remounting filesystem read-only" will leave the disk partition miss the subsequence fsck. Signed-off-by: Zhouyi Zhou Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f29351c66610..16d247f056e2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -742,6 +742,7 @@ __acquires(bitlock) } ext4_unlock_group(sb, grp); + ext4_commit_super(sb, 1); ext4_handle_error(sb); /* * We only get here in the ERRORS_RO case; relocking the group From ef7fd28b1137c4023a5fc9fc0497a92838d81910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Thu, 11 Jan 2018 13:43:33 -0500 Subject: [PATCH 550/770] ext4: correct documentation for grpid mount option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9f0372488cc9243018a812e8cfbf27de650b187b upstream. The grpid option is currently described as being the same as nogrpid. Signed-off-by: Ernesto A. Fernández Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/ext4.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 5a8f7f4d2bca..7449893dc039 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -233,7 +233,7 @@ data_err=ignore(*) Just print an error message if an error occurs data_err=abort Abort the journal if an error occurs in a file data buffer in ordered mode. -grpid Give objects the same group ID as their creator. +grpid New objects have the group ID of their parent. bsdgroups nogrpid (*) New objects have the group ID of their creator. From 568c61facce6b0bddbfa04ec634b34904c6e2f1c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Feb 2018 16:25:53 +0100 Subject: [PATCH 551/770] mm: hide a #warning for COMPILE_TEST commit af27d9403f5b80685b79c88425086edccecaf711 upstream. We get a warning about some slow configurations in randconfig kernels: mm/memory.c:83:2: error: #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid. [-Werror=cpp] The warning is reasonable by itself, but gets in the way of randconfig build testing, so I'm hiding it whenever CONFIG_COMPILE_TEST is set. The warning was added in 2013 in commit 75980e97dacc ("mm: fold page->_last_nid into page->flags where possible"). Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index a728bed16c20..fc7779165dcf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -81,7 +81,7 @@ #include "internal.h" -#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS +#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST) #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid. #endif From 1f21cd46cacae0fe7429d27c8781370317fe4a67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Fri, 19 Jan 2018 16:27:54 -0800 Subject: [PATCH 552/770] mm: Fix memory size alignment in devm_memremap_pages_release() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 10a0cd6e4932b5078215b1ec2c896597eec0eff9 upstream. The functions devm_memremap_pages() and devm_memremap_pages_release() use different ways to calculate the section-aligned amount of memory. The latter function may use an incorrect size if the memory region is small but straddles a section border. Use the same code for both. Cc: Fixes: 5f29a77cd957 ("mm: fix mixed zone detection in devm_memremap_pages") Signed-off-by: Jan H. Schönherr Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- kernel/memremap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/memremap.c b/kernel/memremap.c index 403ab9cdb949..4712ce646e04 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -301,7 +301,8 @@ static void devm_memremap_pages_release(struct device *dev, void *data) /* pages are dead and unused, undo the arch mapping */ align_start = res->start & ~(SECTION_SIZE - 1); - align_size = ALIGN(resource_size(res), SECTION_SIZE); + align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) + - align_start; mem_hotplug_begin(); arch_remove_memory(align_start, align_size); From f4f261974c6bf60f6b4cf3b6f1ea836504dc95b3 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 17 Jan 2018 19:56:38 +0100 Subject: [PATCH 553/770] MIPS: Fix typo BIG_ENDIAN to CPU_BIG_ENDIAN commit 2e6522c565522a2e18409c315c49d78c8b74807b upstream. MIPS_GENERIC selects some options conditional on BIG_ENDIAN which does not exist. Replace BIG_ENDIAN with CPU_BIG_ENDIAN which is the correct kconfig name. Note that BMIPS_GENERIC does the same which confirms that this patch is needed. Fixes: eed0eabd12ef0 ("MIPS: generic: Introduce generic DT-based board support") Signed-off-by: Corentin Labbe Reviewed-by: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: # 4.9+ Patchwork: https://patchwork.linux-mips.org/patch/18495/ [jhogan@kernel.org: Clean up commit message] Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/Kconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index c3d798b44030..c82457b0e733 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -119,12 +119,12 @@ config MIPS_GENERIC select SYS_SUPPORTS_MULTITHREADING select SYS_SUPPORTS_RELOCATABLE select SYS_SUPPORTS_SMARTMIPS - select USB_EHCI_BIG_ENDIAN_DESC if BIG_ENDIAN - select USB_EHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN - select USB_OHCI_BIG_ENDIAN_DESC if BIG_ENDIAN - select USB_OHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN - select USB_UHCI_BIG_ENDIAN_DESC if BIG_ENDIAN - select USB_UHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN + select USB_EHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN + select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN + select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN + select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN + select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN + select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select USE_OF help Select this to build a kernel which aims to support multiple boards, From 524a886aa8c98e6220e844afffa47439343db612 Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Thu, 1 Feb 2018 12:37:21 +0100 Subject: [PATCH 554/770] MIPS: Fix incorrect mem=X@Y handling commit 67a3ba25aa955198196f40b76b329b3ab9ad415a upstream. Commit 73fbc1eba7ff ("MIPS: fix mem=X@Y commandline processing") added a fix to ensure that the memory range between PHYS_OFFSET and low memory address specified by mem= cmdline argument is not later processed by free_all_bootmem. This change was incorrect for systems where the commandline specifies more than 1 mem argument, as it will cause all memory between PHYS_OFFSET and each of the memory offsets to be marked as reserved, which results in parts of the RAM marked as reserved (Creator CI20's u-boot has a default commandline argument 'mem=256M@0x0 mem=768M@0x30000000'). Change the behaviour to ensure that only the range between PHYS_OFFSET and the lowest start address of the memories is marked as protected. This change also ensures that the range is marked protected even if it's only defined through the devicetree and not only via commandline arguments. Reported-by: Mathieu Malaterre Signed-off-by: Marcin Nowakowski Fixes: 73fbc1eba7ff ("MIPS: fix mem=X@Y commandline processing") Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: # v4.11+ Tested-by: Mathieu Malaterre Patchwork: https://patchwork.linux-mips.org/patch/18562/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/setup.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index fe3939726765..795caa763da3 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -374,6 +374,7 @@ static void __init bootmem_init(void) unsigned long reserved_end; unsigned long mapstart = ~0UL; unsigned long bootmap_size; + phys_addr_t ramstart = (phys_addr_t)ULLONG_MAX; bool bootmap_valid = false; int i; @@ -394,7 +395,8 @@ static void __init bootmem_init(void) max_low_pfn = 0; /* - * Find the highest page frame number we have available. + * Find the highest page frame number we have available + * and the lowest used RAM address */ for (i = 0; i < boot_mem_map.nr_map; i++) { unsigned long start, end; @@ -406,6 +408,8 @@ static void __init bootmem_init(void) end = PFN_DOWN(boot_mem_map.map[i].addr + boot_mem_map.map[i].size); + ramstart = min(ramstart, boot_mem_map.map[i].addr); + #ifndef CONFIG_HIGHMEM /* * Skip highmem here so we get an accurate max_low_pfn if low @@ -435,6 +439,13 @@ static void __init bootmem_init(void) mapstart = max(reserved_end, start); } + /* + * Reserve any memory between the start of RAM and PHYS_OFFSET + */ + if (ramstart > PHYS_OFFSET) + add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET, + BOOT_MEM_RESERVED); + if (min_low_pfn >= max_low_pfn) panic("Incorrect memory mapping !!!"); if (min_low_pfn > ARCH_PFN_OFFSET) { @@ -663,9 +674,6 @@ static int __init early_parse_mem(char *p) add_memory_region(start, size, BOOT_MEM_RAM); - if (start && start > PHYS_OFFSET) - add_memory_region(PHYS_OFFSET, start - PHYS_OFFSET, - BOOT_MEM_RESERVED); return 0; } early_param("mem", early_parse_mem); From 990bb6eb9ec21bbf5925c9def1db53958041ce3b Mon Sep 17 00:00:00 2001 From: Dongdong Liu Date: Thu, 28 Dec 2017 17:53:32 +0800 Subject: [PATCH 555/770] PCI: Disable MSI for HiSilicon Hip06/Hip07 only in Root Port mode commit deb86999323661c019ef2740eb9d479d1e526b5c upstream. HiSilicon Hip06/Hip07 can operate as either a Root Port or an Endpoint. It always advertises an MSI capability, but it can only generate MSIs when in Endpoint mode. The device has the same Vendor and Device IDs in both modes, so check the Class Code and disable MSI only when operating as a Root Port. [bhelgaas: changelog] Fixes: 72f2ff0deb87 ("PCI: Disable MSI for HiSilicon Hip06/Hip07 Root Ports") Signed-off-by: Dongdong Liu Signed-off-by: Bjorn Helgaas Reviewed-by: Zhou Wang Cc: stable@vger.kernel.org # v4.11+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index f66f9375177c..4c3feb96f391 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1636,8 +1636,8 @@ static void quirk_pcie_mch(struct pci_dev *pdev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_pcie_mch); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_pcie_mch); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_pcie_mch); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0x1610, quirk_pcie_mch); +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_HUAWEI, 0x1610, PCI_CLASS_BRIDGE_PCI, 8, quirk_pcie_mch); /* * It's possible for the MSI to get corrupted if shpc and acpi From 8c125f391333a1e4fb1f760f5ed14657a1894bcf Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Thu, 11 Jan 2018 12:36:16 -0800 Subject: [PATCH 556/770] PCI: iproc: Fix NULL pointer dereference for BCMA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3b65ca50d24ce33cb92d88840e289135c92b40ed upstream. With the inbound DMA mapping supported added, the iProc PCIe driver parses DT property "dma-ranges" through call to "of_pci_dma_range_parser_init()". In the case of BCMA, this results in a NULL pointer deference due to a missing of_node. Fix this by adding a guard in pcie-iproc-platform.c to only enable the inbound DMA mapping logic when DT property "dma-ranges" is present. Fixes: dd9d4e7498de3 ("PCI: iproc: Add inbound DMA mapping support") Reported-by: Rafał Miłecki Signed-off-by: Ray Jui [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Tested-by: Rafał Miłecki cc: # 4.10+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pcie-iproc-platform.c | 7 +++++++ drivers/pci/host/pcie-iproc.c | 8 +++++--- drivers/pci/host/pcie-iproc.h | 2 ++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c index a5073a921a04..32228d41f746 100644 --- a/drivers/pci/host/pcie-iproc-platform.c +++ b/drivers/pci/host/pcie-iproc-platform.c @@ -92,6 +92,13 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) pcie->need_ob_cfg = true; } + /* + * DT nodes are not used by all platforms that use the iProc PCIe + * core driver. For platforms that require explict inbound mapping + * configuration, "dma-ranges" would have been present in DT + */ + pcie->need_ib_cfg = of_property_read_bool(np, "dma-ranges"); + /* PHY use is optional */ pcie->phy = devm_phy_get(dev, "pcie-phy"); if (IS_ERR(pcie->phy)) { diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c index 3a8b9d20ee57..c0ecc9f35667 100644 --- a/drivers/pci/host/pcie-iproc.c +++ b/drivers/pci/host/pcie-iproc.c @@ -1396,9 +1396,11 @@ int iproc_pcie_setup(struct iproc_pcie *pcie, struct list_head *res) } } - ret = iproc_pcie_map_dma_ranges(pcie); - if (ret && ret != -ENOENT) - goto err_power_off_phy; + if (pcie->need_ib_cfg) { + ret = iproc_pcie_map_dma_ranges(pcie); + if (ret && ret != -ENOENT) + goto err_power_off_phy; + } #ifdef CONFIG_ARM pcie->sysdata.private_data = pcie; diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h index a6b55cec9a66..4ac6282f2bfd 100644 --- a/drivers/pci/host/pcie-iproc.h +++ b/drivers/pci/host/pcie-iproc.h @@ -74,6 +74,7 @@ struct iproc_msi; * @ob: outbound mapping related parameters * @ob_map: outbound mapping related parameters specific to the controller * + * @need_ib_cfg: indicates SW needs to configure the inbound mapping window * @ib: inbound mapping related parameters * @ib_map: outbound mapping region related parameters * @@ -101,6 +102,7 @@ struct iproc_pcie { struct iproc_pcie_ob ob; const struct iproc_pcie_ob_map *ob_map; + bool need_ib_cfg; struct iproc_pcie_ib ib; const struct iproc_pcie_ib_map *ib_map; From c77b388550368068a1615e164b9441937786a1de Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Nov 2017 14:38:31 +0100 Subject: [PATCH 557/770] PCI: keystone: Fix interrupt-controller-node lookup commit eac56aa3bc8af3d9b9850345d0f2da9d83529134 upstream. Fix child-node lookup during initialisation which was using the wrong OF-helper and ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent pci node could end up being prematurely freed as of_find_node_by_name() drops a reference to its first argument. Any matching child interrupt-controller node was also leaked. Fixes: 0c4ffcfe1fbc ("PCI: keystone: Add TI Keystone PCIe driver") Cc: stable # 3.18 Acked-by: Murali Karicheri Signed-off-by: Johan Hovold [lorenzo.pieralisi@arm.com: updated commit subject] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Greg Kroah-Hartman --- drivers/pci/dwc/pci-keystone.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/pci/dwc/pci-keystone.c b/drivers/pci/dwc/pci-keystone.c index 5bee3af47588..39405598b22d 100644 --- a/drivers/pci/dwc/pci-keystone.c +++ b/drivers/pci/dwc/pci-keystone.c @@ -178,7 +178,7 @@ static int ks_pcie_get_irq_controller_info(struct keystone_pcie *ks_pcie, } /* interrupt controller is in a child node */ - *np_temp = of_find_node_by_name(np_pcie, controller); + *np_temp = of_get_child_by_name(np_pcie, controller); if (!(*np_temp)) { dev_err(dev, "Node for %s is absent\n", controller); return -EINVAL; @@ -187,6 +187,7 @@ static int ks_pcie_get_irq_controller_info(struct keystone_pcie *ks_pcie, temp = of_irq_count(*np_temp); if (!temp) { dev_err(dev, "No IRQ entries in %s\n", controller); + of_node_put(*np_temp); return -EINVAL; } @@ -204,6 +205,8 @@ static int ks_pcie_get_irq_controller_info(struct keystone_pcie *ks_pcie, break; } + of_node_put(*np_temp); + if (temp) { *num_irqs = temp; return 0; From 6e6fd5b4ea9d568ac989a61b8d78ae0c5ce19bcf Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 29 Dec 2017 19:48:43 +0100 Subject: [PATCH 558/770] video: fbdev: atmel_lcdfb: fix display-timings lookup commit 9cb18db0701f6b74f0c45c23ad767b3ebebe37f6 upstream. Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent display node was also prematurely freed. Note that the display and timings node references are never put after a successful dt-initialisation so the nodes would leak on later probe deferrals and on driver unbind. Fixes: b985172b328a ("video: atmel_lcdfb: add device tree suport") Cc: stable # 3.13 Cc: Jean-Christophe PLAGNIOL-VILLARD Cc: Nicolas Ferre Cc: Alexandre Belloni Signed-off-by: Johan Hovold Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/atmel_lcdfb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c index e06358da4b99..3dee267d7c75 100644 --- a/drivers/video/fbdev/atmel_lcdfb.c +++ b/drivers/video/fbdev/atmel_lcdfb.c @@ -1119,7 +1119,7 @@ static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo) goto put_display_node; } - timings_np = of_find_node_by_name(display_np, "display-timings"); + timings_np = of_get_child_by_name(display_np, "display-timings"); if (!timings_np) { dev_err(dev, "failed to find display-timings node\n"); ret = -ENODEV; @@ -1140,6 +1140,12 @@ static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo) fb_add_videomode(&fb_vm, &info->modelist); } + /* + * FIXME: Make sure we are not referencing any fields in display_np + * and timings_np and drop our references to them before returning to + * avoid leaking the nodes on probe deferral and driver unbind. + */ + return 0; put_timings_node: From 3711b5c568cd651ca78d6623d5c0fc839eac9499 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 15 Jan 2018 17:04:22 +0100 Subject: [PATCH 559/770] console/dummy: leave .con_font_get set to NULL commit 724ba8b30b044aa0d94b1cd374fc15806cdd6f18 upstream. When this method is set, the caller expects struct console_font fields to be properly initialized when it returns. Leave it unset otherwise nonsensical (leaked kernel stack) values are returned to user space. Signed-off-by: Nicolas Pitre Cc: stable@vger.kernel.org Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/console/dummycon.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/video/console/dummycon.c b/drivers/video/console/dummycon.c index 9269d5685239..b90ef96e43d6 100644 --- a/drivers/video/console/dummycon.c +++ b/drivers/video/console/dummycon.c @@ -67,7 +67,6 @@ const struct consw dummy_con = { .con_switch = DUMMY, .con_blank = DUMMY, .con_font_set = DUMMY, - .con_font_get = DUMMY, .con_font_default = DUMMY, .con_font_copy = DUMMY, }; From 0569dd9beef44d33e4866bf2db2e03fd3644de4f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 16 Jan 2018 15:41:54 +0100 Subject: [PATCH 560/770] rbd: whitelist RBD_FEATURE_OPERATIONS feature bit commit e573427a440fd67d3f522357d7ac901d59281948 upstream. This feature bit restricts older clients from performing certain maintenance operations against an image (e.g. clone, snap create). krbd does not perform maintenance operations. Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 609227211295..fe4fd8aee19f 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -124,11 +124,13 @@ static int atomic_dec_return_safe(atomic_t *v) #define RBD_FEATURE_STRIPINGV2 (1ULL<<1) #define RBD_FEATURE_EXCLUSIVE_LOCK (1ULL<<2) #define RBD_FEATURE_DATA_POOL (1ULL<<7) +#define RBD_FEATURE_OPERATIONS (1ULL<<8) #define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \ RBD_FEATURE_STRIPINGV2 | \ RBD_FEATURE_EXCLUSIVE_LOCK | \ - RBD_FEATURE_DATA_POOL) + RBD_FEATURE_DATA_POOL | \ + RBD_FEATURE_OPERATIONS) /* Features supported by this (client software) implementation. */ From a2fd6c0950337a39abd79046edb43c24ba91d033 Mon Sep 17 00:00:00 2001 From: Simon Gaiser Date: Wed, 7 Feb 2018 21:47:40 +0100 Subject: [PATCH 561/770] xen: Fix {set,clear}_foreign_p2m_mapping on autotranslating guests commit 781198f1f373c3e350dbeb3af04a7d4c81c1b8d7 upstream. Commit 82616f9599a7 ("xen: remove tests for pvh mode in pure pv paths") removed the check for autotranslation from {set,clear}_foreign_p2m_mapping but those are called by grant-table.c also on PVH/HVM guests. Cc: # 4.14 Fixes: 82616f9599a7 ("xen: remove tests for pvh mode in pure pv paths") Signed-off-by: Simon Gaiser Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/p2m.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 6083ba462f35..15812e553b95 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -694,6 +694,9 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, int i, ret = 0; pte_t *pte; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 0; + if (kmap_ops) { ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, kmap_ops, count); @@ -736,6 +739,9 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, { int i, ret = 0; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 0; + for (i = 0; i < count; i++) { unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); unsigned long pfn = page_to_pfn(pages[i]); From 0f0fd00739118302568ff8b57ddfc5898c1a3796 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2018 17:42:33 +0000 Subject: [PATCH 562/770] xenbus: track caller request id commit 29fee6eed2811ff1089b30fc579a2d19d78016ab upstream. Commit fd8aa9095a95 ("xen: optimize xenbus driver for multiple concurrent xenstore accesses") optimized xenbus concurrent accesses but in doing so broke UABI of /dev/xen/xenbus. Through /dev/xen/xenbus applications are in charge of xenbus message exchange with the correct header and body. Now, after the mentioned commit the replies received by application will no longer have the header req_id echoed back as it was on request (see specification below for reference), because that particular field is being overwritten by kernel. struct xsd_sockmsg { uint32_t type; /* XS_??? */ uint32_t req_id;/* Request identifier, echoed in daemon's response. */ uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */ uint32_t len; /* Length of data following this. */ /* Generally followed by nul-terminated string(s). */ }; Before there was only one request at a time so req_id could simply be forwarded back and forth. To allow simultaneous requests we need a different req_id for each message thus kernel keeps a monotonic increasing counter for this field and is written on every request irrespective of userspace value. Forwarding again the req_id on userspace requests is not a solution because we would open the possibility of userspace-generated req_id colliding with kernel ones. So this patch instead takes another route which is to artificially keep user req_id while keeping the xenbus logic as is. We do that by saving the original req_id before xs_send(), use the private kernel counter as req_id and then once reply comes and was validated, we restore back the original req_id. Cc: # 4.11 Fixes: fd8aa9095a ("xen: optimize xenbus driver for multiple concurrent xenstore accesses") Reported-by: Bhavesh Davda Signed-off-by: Joao Martins Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xenbus/xenbus.h | 1 + drivers/xen/xenbus/xenbus_comms.c | 1 + drivers/xen/xenbus/xenbus_xs.c | 3 +++ 3 files changed, 5 insertions(+) diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index 149c5e7efc89..092981171df1 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -76,6 +76,7 @@ struct xb_req_data { struct list_head list; wait_queue_head_t wq; struct xsd_sockmsg msg; + uint32_t caller_req_id; enum xsd_sockmsg_type type; char *body; const struct kvec *vec; diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 5b081a01779d..d239fc3c5e3d 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -309,6 +309,7 @@ static int process_msg(void) goto out; if (req->state == xb_req_state_wait_reply) { + req->msg.req_id = req->caller_req_id; req->msg.type = state.msg.type; req->msg.len = state.msg.len; req->body = state.body; diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 3e59590c7254..3f3b29398ab8 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -227,6 +227,8 @@ static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg) req->state = xb_req_state_queued; init_waitqueue_head(&req->wq); + /* Save the caller req_id and restore it later in the reply */ + req->caller_req_id = req->msg.req_id; req->msg.req_id = xs_request_enter(req); mutex_lock(&xb_write_mutex); @@ -310,6 +312,7 @@ static void *xs_talkv(struct xenbus_transaction t, req->num_vecs = num_vecs; req->cb = xs_wake_up; + msg.req_id = 0; msg.tx_id = t.id; msg.type = type; msg.len = 0; From 29b4af70409cfca62d045cd4e0170912ab262c66 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 15 Nov 2017 11:34:58 +0100 Subject: [PATCH 563/770] seq_file: fix incomplete reset on read from zero offset commit cf5eebae2cd28d37581507668605f4d23cd7218d upstream. When resetting iterator on a zero offset we need to discard any data already in the buffer (count), and private state of the iterator (version). For example this bug results in first line being repeated in /proc/mounts if doing a zero size read before a non-zero size read. Reported-by: Rich Felker Signed-off-by: Miklos Szeredi Fixes: e522751d605d ("seq_file: reset iterator to first record for zero offset") Cc: # v4.10 Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/seq_file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/seq_file.c b/fs/seq_file.c index 4be761c1a03d..eea09f6d8830 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -181,8 +181,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) * if request is to read from zero offset, reset iterator to first * record as it might have been already advanced by previous requests */ - if (*ppos == 0) + if (*ppos == 0) { m->index = 0; + m->version = 0; + m->count = 0; + } /* Don't assume *ppos is where we left it */ if (unlikely(*ppos != m->read_pos)) { From 294975841483c08e84572713f348cd51b8408021 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 5 Feb 2018 22:18:11 -0500 Subject: [PATCH 564/770] tracing: Fix parsing of globs with a wildcard at the beginning commit 07234021410bbc27b7c86c18de98616c29fbe667 upstream. Al Viro reported: For substring - sure, but what about something like "*a*b" and "a*b"? AFAICS, filter_parse_regex() ends up with identical results in both cases - MATCH_GLOB and *search = "a*b". And no way for the caller to tell one from another. Testing this with the following: # cd /sys/kernel/tracing # echo '*raw*lock' > set_ftrace_filter bash: echo: write error: Invalid argument With this patch: # echo '*raw*lock' > set_ftrace_filter # cat set_ftrace_filter _raw_read_trylock _raw_write_trylock _raw_read_unlock _raw_spin_unlock _raw_write_unlock _raw_spin_trylock _raw_spin_lock _raw_write_lock _raw_read_lock Al recommended not setting the search buffer to skip the first '*' unless we know we are not using MATCH_GLOB. This implements his suggested logic. Link: http://lkml.kernel.org/r/20180127170748.GF13338@ZenIV.linux.org.uk Cc: stable@vger.kernel.org Fixes: 60f1d5e3bac44 ("ftrace: Support full glob matching") Reviewed-by: Masami Hiramatsu Reported-by: Al Viro Suggsted-by: Al Viro Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_filter.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 61e7f0678d33..a764aec3c9a1 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -400,7 +400,6 @@ enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not) for (i = 0; i < len; i++) { if (buff[i] == '*') { if (!i) { - *search = buff + 1; type = MATCH_END_ONLY; } else if (i == len - 1) { if (type == MATCH_END_ONLY) @@ -410,14 +409,14 @@ enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not) buff[i] = 0; break; } else { /* pattern continues, use full glob */ - type = MATCH_GLOB; - break; + return MATCH_GLOB; } } else if (strchr("[?\\", buff[i])) { - type = MATCH_GLOB; - break; + return MATCH_GLOB; } } + if (buff[0] == '*') + *search = buff + 1; return type; } From 753fc48e595a24eac4d4476a421c36307a7fbcb5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 7 Feb 2018 22:34:24 -0800 Subject: [PATCH 565/770] mpls, nospec: Sanitize array index in mpls_label_ok() commit 3968523f855050b8195134da951b87c20bd66130 upstream. mpls_label_ok() validates that the 'platform_label' array index from a userspace netlink message payload is valid. Under speculation the mpls_label_ok() result may not resolve in the CPU pipeline until after the index is used to access an array element. Sanitize the index to zero to prevent userspace-controlled arbitrary out-of-bounds speculation, a precursor for a speculative execution side channel vulnerability. Cc: Cc: "David S. Miller" Cc: Eric W. Biederman Signed-off-by: Dan Williams Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mpls/af_mpls.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index c5b9ce41d66f..aee385eb72e7 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -904,24 +905,27 @@ errout: return err; } -static bool mpls_label_ok(struct net *net, unsigned int index, +static bool mpls_label_ok(struct net *net, unsigned int *index, struct netlink_ext_ack *extack) { + bool is_ok = true; + /* Reserved labels may not be set */ - if (index < MPLS_LABEL_FIRST_UNRESERVED) { + if (*index < MPLS_LABEL_FIRST_UNRESERVED) { NL_SET_ERR_MSG(extack, "Invalid label - must be MPLS_LABEL_FIRST_UNRESERVED or higher"); - return false; + is_ok = false; } /* The full 20 bit range may not be supported. */ - if (index >= net->mpls.platform_labels) { + if (is_ok && *index >= net->mpls.platform_labels) { NL_SET_ERR_MSG(extack, "Label >= configured maximum in platform_labels"); - return false; + is_ok = false; } - return true; + *index = array_index_nospec(*index, net->mpls.platform_labels); + return is_ok; } static int mpls_route_add(struct mpls_route_config *cfg, @@ -944,7 +948,7 @@ static int mpls_route_add(struct mpls_route_config *cfg, index = find_free_label(net); } - if (!mpls_label_ok(net, index, extack)) + if (!mpls_label_ok(net, &index, extack)) goto errout; /* Append makes no sense with mpls */ @@ -1021,7 +1025,7 @@ static int mpls_route_del(struct mpls_route_config *cfg, index = cfg->rc_label; - if (!mpls_label_ok(net, index, extack)) + if (!mpls_label_ok(net, &index, extack)) goto errout; mpls_route_update(net, index, NULL, &cfg->rc_nlinfo); @@ -1779,7 +1783,7 @@ static int rtm_to_route_config(struct sk_buff *skb, goto errout; if (!mpls_label_ok(cfg->rc_nlinfo.nl_net, - cfg->rc_label, extack)) + &cfg->rc_label, extack)) goto errout; break; } @@ -2106,7 +2110,7 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, goto errout; } - if (!mpls_label_ok(net, in_label, extack)) { + if (!mpls_label_ok(net, &in_label, extack)) { err = -EINVAL; goto errout; } From ff59e379234bb4b479deca97e29f685eabe22954 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 5 Feb 2018 12:38:11 -0600 Subject: [PATCH 566/770] rtlwifi: rtl8821ae: Fix connection lost problem correctly commit c713fb071edc0efc01a955f65a006b0e1795d2eb upstream. There has been a coding error in rtl8821ae since it was first introduced, namely that an 8-bit register was read using a 16-bit read in _rtl8821ae_dbi_read(). This error was fixed with commit 40b368af4b75 ("rtlwifi: Fix alignment issues"); however, this change led to instability in the connection. To restore stability, this change was reverted in commit b8b8b16352cd ("rtlwifi: rtl8821ae: Fix connection lost problem"). Unfortunately, the unaligned access causes machine checks in ARM architecture, and we were finally forced to find the actual cause of the problem on x86 platforms. Following a suggestion from Pkshih , it was found that increasing the ASPM L1 latency from 0 to 7 fixed the instability. This parameter was varied to see if a smaller value would work; however, it appears that 7 is the safest value. A new symbol is defined for this quantity, thus it can be easily changed if necessary. Fixes: b8b8b16352cd ("rtlwifi: rtl8821ae: Fix connection lost problem") Cc: Stable # 4.14+ Fix-suggested-by: Pkshih Signed-off-by: Larry Finger Tested-by: James Cameron # x86_64 OLPC NL3 Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 5 +++-- drivers/net/wireless/realtek/rtlwifi/wifi.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 9ac1511de7ba..b82e5b363c05 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -1122,7 +1122,7 @@ static u8 _rtl8821ae_dbi_read(struct rtl_priv *rtlpriv, u16 addr) } if (0 == tmp) { read_addr = REG_DBI_RDATA + addr % 4; - ret = rtl_read_word(rtlpriv, read_addr); + ret = rtl_read_byte(rtlpriv, read_addr); } return ret; } @@ -1164,7 +1164,8 @@ static void _rtl8821ae_enable_aspm_back_door(struct ieee80211_hw *hw) } tmp = _rtl8821ae_dbi_read(rtlpriv, 0x70f); - _rtl8821ae_dbi_write(rtlpriv, 0x70f, tmp | BIT(7)); + _rtl8821ae_dbi_write(rtlpriv, 0x70f, tmp | BIT(7) | + ASPM_L1_LATENCY << 3); tmp = _rtl8821ae_dbi_read(rtlpriv, 0x719); _rtl8821ae_dbi_write(rtlpriv, 0x719, tmp | BIT(3) | BIT(4)); diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index 1ab1024330fb..25c4e3e55921 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -99,6 +99,7 @@ #define RTL_USB_MAX_RX_COUNT 100 #define QBSS_LOAD_SIZE 5 #define MAX_WMMELE_LENGTH 64 +#define ASPM_L1_LATENCY 7 #define TOTAL_CAM_ENTRY 32 From 851e2ea82303f3a543b32f5cc81adbc1929d053a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 13 Feb 2018 13:14:09 +0000 Subject: [PATCH 567/770] arm64: proc: Set PTE_NG for table entries to avoid traversing them twice commit 2ce77f6d8a9ae9ce6d80397d88bdceb84a2004cd upstream. When KASAN is enabled, the swapper page table contains many identical mappings of the zero page, which can lead to a stall during boot whilst the G -> nG code continually walks the same page table entries looking for global mappings. This patch sets the nG bit (bit 11, which is IGNORED) in table entries after processing the subtree so we can easily skip them if we see them a second time. Tested-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/proc.S | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 27058f3fd132..329a1c43365e 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -190,7 +190,8 @@ ENDPROC(idmap_cpu_replace_ttbr1) dc cvac, cur_\()\type\()p // Ensure any existing dirty dmb sy // lines are written back before ldr \type, [cur_\()\type\()p] // loading the entry - tbz \type, #0, next_\()\type // Skip invalid entries + tbz \type, #0, skip_\()\type // Skip invalid and + tbnz \type, #11, skip_\()\type // non-global entries .endm .macro __idmap_kpti_put_pgtable_ent_ng, type @@ -249,8 +250,9 @@ ENTRY(idmap_kpti_install_ng_mappings) add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8) do_pgd: __idmap_kpti_get_pgtable_ent pgd tbnz pgd, #1, walk_puds - __idmap_kpti_put_pgtable_ent_ng pgd next_pgd: + __idmap_kpti_put_pgtable_ent_ng pgd +skip_pgd: add cur_pgdp, cur_pgdp, #8 cmp cur_pgdp, end_pgdp b.ne do_pgd @@ -278,8 +280,9 @@ walk_puds: add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8) do_pud: __idmap_kpti_get_pgtable_ent pud tbnz pud, #1, walk_pmds - __idmap_kpti_put_pgtable_ent_ng pud next_pud: + __idmap_kpti_put_pgtable_ent_ng pud +skip_pud: add cur_pudp, cur_pudp, 8 cmp cur_pudp, end_pudp b.ne do_pud @@ -298,8 +301,9 @@ walk_pmds: add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8) do_pmd: __idmap_kpti_get_pgtable_ent pmd tbnz pmd, #1, walk_ptes - __idmap_kpti_put_pgtable_ent_ng pmd next_pmd: + __idmap_kpti_put_pgtable_ent_ng pmd +skip_pmd: add cur_pmdp, cur_pmdp, #8 cmp cur_pmdp, end_pmdp b.ne do_pmd @@ -317,7 +321,7 @@ walk_ptes: add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8) do_pte: __idmap_kpti_get_pgtable_ent pte __idmap_kpti_put_pgtable_ent_ng pte -next_pte: +skip_pte: add cur_ptep, cur_ptep, #8 cmp cur_ptep, end_ptep b.ne do_pte From dc0b764a7c1a3a03b56c5582bf9a9c4554bf5e96 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 19 Oct 2017 08:21:50 +0200 Subject: [PATCH 568/770] qxl: alloc & use shadow for dumb buffers commit 62676d10b483a2ff6e8b08c5e7c7d63a831343f5 upstream. This patch changes the way the primary surface is used for dumb framebuffers. Instead of configuring the bo itself as primary surface a shadow bo is created and used instead. Framebuffers can share the shadow bo in case they have the same format and resolution. On atomic plane updates we don't have to update the primary surface in case we pageflip from one framebuffer to another framebuffer which shares the same shadow. This in turn avoids the flicker caused by the primary-destroy + primary-create cycle, which is very annonying when running wayland on qxl. The qxl driver never actually writes to the shadow bo. It sends qxl blit commands which update it though, and the spice server might actually execute them (and thereby write to the shadow) in case the local rendering is kicked for some reason. This happens for example in case qemu is asked to write out a dump of the guest display (screendump monitor command). Signed-off-by: Gerd Hoffmann Reviewed-by: Dave Airlie Link: http://patchwork.freedesktop.org/patch/msgid/20171019062150.28090-3-kraxel@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_cmd.c | 6 +++- drivers/gpu/drm/qxl/qxl_display.c | 49 +++++++++++++++++++++++++++++-- drivers/gpu/drm/qxl/qxl_drv.h | 2 ++ drivers/gpu/drm/qxl/qxl_dumb.c | 1 + 4 files changed, 54 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 74fc9362ecf9..3eb920851141 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -388,7 +388,11 @@ void qxl_io_create_primary(struct qxl_device *qdev, create->width = bo->surf.width; create->height = bo->surf.height; create->stride = bo->surf.stride; - create->mem = qxl_bo_physical_address(qdev, bo, offset); + if (bo->shadow) { + create->mem = qxl_bo_physical_address(qdev, bo->shadow, offset); + } else { + create->mem = qxl_bo_physical_address(qdev, bo, offset); + } QXL_INFO(qdev, "%s: mem = %llx, from %p\n", __func__, create->mem, bo->kptr); diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index ced6564b5814..7335d99244d5 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -305,7 +305,9 @@ static const struct drm_crtc_funcs qxl_crtc_funcs = { void qxl_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct qxl_framebuffer *qxl_fb = to_qxl_framebuffer(fb); + struct qxl_bo *bo = gem_to_qxl_bo(qxl_fb->obj); + WARN_ON(bo->shadow); drm_gem_object_unreference_unlocked(qxl_fb->obj); drm_framebuffer_cleanup(fb); kfree(qxl_fb); @@ -508,6 +510,7 @@ static void qxl_primary_atomic_update(struct drm_plane *plane, .x2 = qfb->base.width, .y2 = qfb->base.height }; + bool same_shadow = false; if (old_state->fb) { qfb_old = to_qxl_framebuffer(old_state->fb); @@ -519,15 +522,23 @@ static void qxl_primary_atomic_update(struct drm_plane *plane, if (bo == bo_old) return; + if (bo_old && bo_old->shadow && bo->shadow && + bo_old->shadow == bo->shadow) { + same_shadow = true; + } + if (bo_old && bo_old->is_primary) { - qxl_io_destroy_primary(qdev); + if (!same_shadow) + qxl_io_destroy_primary(qdev); bo_old->is_primary = false; } if (!bo->is_primary) { - qxl_io_create_primary(qdev, 0, bo); + if (!same_shadow) + qxl_io_create_primary(qdev, 0, bo); bo->is_primary = true; } + qxl_draw_dirty_fb(qdev, qfb, bo, 0, 0, &norect, 1, 1); } @@ -681,8 +692,9 @@ static void qxl_cursor_atomic_disable(struct drm_plane *plane, static int qxl_plane_prepare_fb(struct drm_plane *plane, struct drm_plane_state *new_state) { + struct qxl_device *qdev = plane->dev->dev_private; struct drm_gem_object *obj; - struct qxl_bo *user_bo; + struct qxl_bo *user_bo, *old_bo = NULL; int ret; if (!new_state->fb) @@ -691,6 +703,32 @@ static int qxl_plane_prepare_fb(struct drm_plane *plane, obj = to_qxl_framebuffer(new_state->fb)->obj; user_bo = gem_to_qxl_bo(obj); + if (plane->type == DRM_PLANE_TYPE_PRIMARY && + user_bo->is_dumb && !user_bo->shadow) { + if (plane->state->fb) { + obj = to_qxl_framebuffer(plane->state->fb)->obj; + old_bo = gem_to_qxl_bo(obj); + } + if (old_bo && old_bo->shadow && + user_bo->gem_base.size == old_bo->gem_base.size && + plane->state->crtc == new_state->crtc && + plane->state->crtc_w == new_state->crtc_w && + plane->state->crtc_h == new_state->crtc_h && + plane->state->src_x == new_state->src_x && + plane->state->src_y == new_state->src_y && + plane->state->src_w == new_state->src_w && + plane->state->src_h == new_state->src_h && + plane->state->rotation == new_state->rotation && + plane->state->zpos == new_state->zpos) { + drm_gem_object_get(&old_bo->shadow->gem_base); + user_bo->shadow = old_bo->shadow; + } else { + qxl_bo_create(qdev, user_bo->gem_base.size, + true, true, QXL_GEM_DOMAIN_VRAM, NULL, + &user_bo->shadow); + } + } + ret = qxl_bo_pin(user_bo, QXL_GEM_DOMAIN_CPU, NULL); if (ret) return ret; @@ -715,6 +753,11 @@ static void qxl_plane_cleanup_fb(struct drm_plane *plane, obj = to_qxl_framebuffer(old_state->fb)->obj; user_bo = gem_to_qxl_bo(obj); qxl_bo_unpin(user_bo); + + if (user_bo->shadow && !user_bo->is_primary) { + drm_gem_object_put_unlocked(&user_bo->shadow->gem_base); + user_bo->shadow = NULL; + } } static const uint32_t qxl_cursor_plane_formats[] = { diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 3397a1907336..8f3667de1537 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -113,6 +113,8 @@ struct qxl_bo { /* Constant after initialization */ struct drm_gem_object gem_base; bool is_primary; /* is this now a primary surface */ + bool is_dumb; + struct qxl_bo *shadow; bool hw_surf_alloc; struct qxl_surface surf; uint32_t surface_id; diff --git a/drivers/gpu/drm/qxl/qxl_dumb.c b/drivers/gpu/drm/qxl/qxl_dumb.c index 5e65d5d2d937..11085ab01374 100644 --- a/drivers/gpu/drm/qxl/qxl_dumb.c +++ b/drivers/gpu/drm/qxl/qxl_dumb.c @@ -63,6 +63,7 @@ int qxl_mode_dumb_create(struct drm_file *file_priv, &handle); if (r) return r; + qobj->is_dumb = true; args->pitch = pitch; args->handle = handle; return 0; From 848dd9bf51544c8e5436960124f3d1f9c51cdb99 Mon Sep 17 00:00:00 2001 From: Ray Strode Date: Mon, 27 Nov 2017 16:50:10 -0500 Subject: [PATCH 569/770] drm/qxl: reapply cursor after resetting primary commit 9428088c90b6f7d5edd2a1b0d742c75339b36f6e upstream. QXL associates mouse state with its primary plane. Destroying a primary plane and putting a new one in place has the side effect of destroying the cursor as well. This commit changes the driver to reapply the cursor any time a new primary is created. It achieves this by keeping a reference to the cursor bo on the qxl_crtc struct. This fix is very similar to commit 4532b241a4b7 ("drm/qxl: reapply cursor after SetCrtc calls") which got implicitly reverted as part of implementing the atomic modeset feature. Cc: Gerd Hoffmann Cc: Dave Airlie Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1512097 Fixes: 1277eed5fecb ("drm: qxl: Atomic phase 1: convert cursor to universal plane") Cc: stable@vger.kernel.org Signed-off-by: Ray Strode Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_display.c | 59 +++++++++++++++++++++++++++++++ drivers/gpu/drm/qxl/qxl_drv.h | 2 ++ 2 files changed, 61 insertions(+) diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 7335d99244d5..9a9214ae0fb5 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -289,6 +289,7 @@ static void qxl_crtc_destroy(struct drm_crtc *crtc) { struct qxl_crtc *qxl_crtc = to_qxl_crtc(crtc); + qxl_bo_unref(&qxl_crtc->cursor_bo); drm_crtc_cleanup(crtc); kfree(qxl_crtc); } @@ -495,6 +496,53 @@ static int qxl_primary_atomic_check(struct drm_plane *plane, return 0; } +static int qxl_primary_apply_cursor(struct drm_plane *plane) +{ + struct drm_device *dev = plane->dev; + struct qxl_device *qdev = dev->dev_private; + struct drm_framebuffer *fb = plane->state->fb; + struct qxl_crtc *qcrtc = to_qxl_crtc(plane->state->crtc); + struct qxl_cursor_cmd *cmd; + struct qxl_release *release; + int ret = 0; + + if (!qcrtc->cursor_bo) + return 0; + + ret = qxl_alloc_release_reserved(qdev, sizeof(*cmd), + QXL_RELEASE_CURSOR_CMD, + &release, NULL); + if (ret) + return ret; + + ret = qxl_release_list_add(release, qcrtc->cursor_bo); + if (ret) + goto out_free_release; + + ret = qxl_release_reserve_list(release, false); + if (ret) + goto out_free_release; + + cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release); + cmd->type = QXL_CURSOR_SET; + cmd->u.set.position.x = plane->state->crtc_x + fb->hot_x; + cmd->u.set.position.y = plane->state->crtc_y + fb->hot_y; + + cmd->u.set.shape = qxl_bo_physical_address(qdev, qcrtc->cursor_bo, 0); + + cmd->u.set.visible = 1; + qxl_release_unmap(qdev, release, &cmd->release_info); + + qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); + qxl_release_fence_buffer_objects(release); + + return ret; + +out_free_release: + qxl_release_free(qdev, release); + return ret; +} + static void qxl_primary_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) { @@ -510,6 +558,7 @@ static void qxl_primary_atomic_update(struct drm_plane *plane, .x2 = qfb->base.width, .y2 = qfb->base.height }; + int ret; bool same_shadow = false; if (old_state->fb) { @@ -531,6 +580,11 @@ static void qxl_primary_atomic_update(struct drm_plane *plane, if (!same_shadow) qxl_io_destroy_primary(qdev); bo_old->is_primary = false; + + ret = qxl_primary_apply_cursor(plane); + if (ret) + DRM_ERROR( + "could not set cursor after creating primary"); } if (!bo->is_primary) { @@ -571,6 +625,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, struct drm_device *dev = plane->dev; struct qxl_device *qdev = dev->dev_private; struct drm_framebuffer *fb = plane->state->fb; + struct qxl_crtc *qcrtc = to_qxl_crtc(plane->state->crtc); struct qxl_release *release; struct qxl_cursor_cmd *cmd; struct qxl_cursor *cursor; @@ -628,6 +683,10 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, cmd->u.set.shape = qxl_bo_physical_address(qdev, cursor_bo, 0); cmd->type = QXL_CURSOR_SET; + + qxl_bo_unref(&qcrtc->cursor_bo); + qcrtc->cursor_bo = cursor_bo; + cursor_bo = NULL; } else { ret = qxl_release_reserve_list(release, true); diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 8f3667de1537..c0a927efa653 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -135,6 +135,8 @@ struct qxl_bo_list { struct qxl_crtc { struct drm_crtc base; int index; + + struct qxl_bo *cursor_bo; }; struct qxl_output { From 84b41e3708ac9869190dd09d47b548392b7a656a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 31 Jan 2018 12:34:05 -0500 Subject: [PATCH 570/770] xprtrdma: Fix calculation of ri_max_send_sges commit 1179e2c27efe21167ec9d882b14becefba2ee990 upstream. Commit 16f906d66cd7 ("xprtrdma: Reduce required number of send SGEs") introduced the rpcrdma_ia::ri_max_send_sges field. This fixes a problem where xprtrdma would not work if the device's max_sge capability was small (low single digits). At least RPCRDMA_MIN_SEND_SGES are needed for the inline parts of each RPC. ri_max_send_sges is set to this value: ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES; Then when marshaling each RPC, rpcrdma_args_inline uses that value to determine whether the device has enough Send SGEs to convey an NFS WRITE payload inline, or whether instead a Read chunk is required. More recently, commit ae72950abf99 ("xprtrdma: Add data structure to manage RDMA Send arguments") used the ri_max_send_sges value to calculate the size of an array, but that commit erroneously assumed ri_max_send_sges contains a value similar to the device's max_sge, and not one that was reduced by the minimum SGE count. This assumption results in the calculated size of the sendctx's Send SGE array to be too small. When the array is used to marshal an RPC, the code can write Send SGEs into the following sendctx element in that array, corrupting it. When the device's max_sge is large, this issue is entirely harmless; but it results in an oops in the provider's post_send method, if dev.attrs.max_sge is small. So let's straighten this out: ri_max_send_sges will now contain a value with the same meaning as dev.attrs.max_sge, which makes the code easier to understand, and enables rpcrdma_sendctx_create to calculate the size of the SGE array correctly. Reported-by: Michal Kalderon Fixes: 16f906d66cd7 ("xprtrdma: Reduce required number of send SGEs") Signed-off-by: Chuck Lever Tested-by: Michal Kalderon Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/rpc_rdma.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index f1889f4d4803..491ae9fc561f 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -142,7 +142,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, if (xdr->page_len) { remaining = xdr->page_len; offset = offset_in_page(xdr->page_base); - count = 0; + count = RPCRDMA_MIN_SEND_SGES; while (remaining) { remaining -= min_t(unsigned int, PAGE_SIZE - offset, remaining); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 11a1fbf7e59e..cdf51c272c62 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -523,7 +523,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); return -ENOMEM; } - ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES; + ia->ri_max_send_sges = max_sge; if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { dprintk("RPC: %s: insufficient wqe's available\n", From 67154fb8012152aed14dbd70e5b7fc79dcfd53f4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 31 Jan 2018 12:34:13 -0500 Subject: [PATCH 571/770] xprtrdma: Fix BUG after a device removal commit e89e8d8fcdc6751e86ccad794b052fe67e6ad619 upstream. Michal Kalderon reports a BUG that occurs just after device removal: [ 169.112490] rpcrdma: removing device qedr0 for 192.168.110.146:20049 [ 169.143909] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 [ 169.181837] IP: rpcrdma_dma_unmap_regbuf+0xa/0x60 [rpcrdma] The RPC/RDMA client transport attempts to allocate some resources on demand. Registered buffers are one such resource. These are allocated (or re-allocated) by xprt_rdma_allocate to hold RPC Call and Reply messages. A hardware resource is associated with each of these buffers, as they can be used for a Send or Receive Work Request. If a device is removed from under an NFS/RDMA mount, the transport layer is responsible for releasing all hardware resources before the device can be finally unplugged. A BUG results when the NFS mount hasn't yet seen much activity: the transport tries to release resources that haven't yet been allocated. rpcrdma_free_regbuf() already checks for this case, so just move that check to cover the DEVICE_REMOVAL case as well. Reported-by: Michal Kalderon Fixes: bebd031866ca ("xprtrdma: Support unplugging an HCA ...") Signed-off-by: Chuck Lever Tested-by: Michal Kalderon Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/verbs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index cdf51c272c62..9e8e1de19b2e 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1331,6 +1331,9 @@ __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb) { + if (!rb) + return; + if (!rpcrdma_regbuf_is_mapped(rb)) return; @@ -1346,9 +1349,6 @@ rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb) void rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb) { - if (!rb) - return; - rpcrdma_dma_unmap_regbuf(rb); kfree(rb); } From 0528a533f368c93efe752dbd52698dcea214554a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 5 Feb 2018 13:16:56 -0700 Subject: [PATCH 572/770] blk-wbt: account flush requests correctly commit 5235553d821433e1f4fa720fd025d2c4b7ee9994 upstream. Mikulas reported a workload that saw bad performance, and figured out what it was due to various other types of requests being accounted as reads. Flush requests, for instance. Due to the high latency of those, we heavily throttle the writes to keep the latencies in balance. But they really should be accounted as writes. Fix this by checking the exact type of the request. If it's a read, account as a read, if it's a write or a flush, account as a write. Any other request we disregard. Previously everything would have been mistakenly accounted as reads. Reported-by: Mikulas Patocka Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-wbt.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index e59d59c11ebb..5c105514bca7 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -698,7 +698,15 @@ u64 wbt_default_latency_nsec(struct request_queue *q) static int wbt_data_dir(const struct request *rq) { - return rq_data_dir(rq); + const int op = req_op(rq); + + if (op == REQ_OP_READ) + return READ; + else if (op == REQ_OP_WRITE || op == REQ_OP_FLUSH) + return WRITE; + + /* don't account */ + return -1; } int wbt_init(struct request_queue *q) From 4cbb9fdf133cc5a5dcb1ac7153791792976c62c9 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 13 Dec 2017 18:22:30 +0100 Subject: [PATCH 573/770] target/iscsi: avoid NULL dereference in CHAP auth error path commit ce512d79d0466a604793addb6b769d12ee326822 upstream. If chap_server_compute_md5() fails early, e.g. via CHAP_N mismatch, then crypto_free_shash() is called with a NULL pointer which gets dereferenced in crypto_shash_tfm(). Fixes: 69110e3cedbb ("iscsi-target: Use shash and ahash") Suggested-by: Markus Elfring Signed-off-by: David Disseldorp Cc: stable@vger.kernel.org # 4.6+ Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_auth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index f9bc8ec6fb6b..9518ffd8b8ba 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -421,7 +421,8 @@ static int chap_server_compute_md5( auth_ret = 0; out: kzfree(desc); - crypto_free_shash(tfm); + if (tfm) + crypto_free_shash(tfm); kfree(challenge); kfree(challenge_binhex); return auth_ret; From c8d0f63c15851e7d6f41b455d74e4b2b0a862a09 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Jan 2018 14:36:29 +0100 Subject: [PATCH 574/770] iscsi-target: make sure to wake up sleeping login worker commit 1c130ae00b769a2e2df41bad3d6051ee8234b636 upstream. Mike Christie reports: Starting in 4.14 iscsi logins will fail around 50% of the time. Problem appears to be that iscsi_target_sk_data_ready() callback may return without doing anything in case it finds the login work queue is still blocked in sock_recvmsg(). Nicholas Bellinger says: It would indicate users providing their own ->sk_data_ready() callback must be responsible for waking up a kthread context blocked on sock_recvmsg(..., MSG_WAITALL), when a second ->sk_data_ready() is received before the first sock_recvmsg(..., MSG_WAITALL) completes. So, do this and invoke the original data_ready() callback -- in case of tcp sockets this takes care of waking the thread. Disclaimer: I do not understand why this problem did not show up before tcp prequeue removal. (Drop WARN_ON usage - nab) Reported-by: Mike Christie Bisected-by: Mike Christie Tested-by: Mike Christie Diagnosed-by: Nicholas Bellinger Fixes: e7942d0633c4 ("tcp: remove prequeue support") Signed-off-by: Florian Westphal Cc: stable@vger.kernel.org # 4.14+ Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_nego.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 7a6751fecd32..87248a2512e5 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -432,6 +432,9 @@ static void iscsi_target_sk_data_ready(struct sock *sk) if (test_and_set_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) { write_unlock_bh(&sk->sk_callback_lock); pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1, conn: %p >>>>\n", conn); + if (iscsi_target_sk_data_ready == conn->orig_data_ready) + return; + conn->orig_data_ready(sk); return; } From f6318abd3a5458be821eb417969726622e1e09c4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 15 Feb 2018 20:00:15 +1100 Subject: [PATCH 575/770] dm: correctly handle chained bios in dec_pending() commit 8dd601fa8317243be887458c49f6c29c2f3d719f upstream. dec_pending() is given an error status (possibly 0) to be recorded against a bio. It can be called several times on the one 'struct dm_io', and it is careful to only assign a non-zero error to io->status. However when it then assigned io->status to bio->bi_status, it is not careful and could overwrite a genuine error status with 0. This can happen when chained bios are in use. If a bio is chained beneath the bio that this dm_io is handling, the child bio might complete and set bio->bi_status before the dm_io completes. This has been possible since chained bios were introduced in 3.14, and has become a lot easier to trigger with commit 18a25da84354 ("dm: ensure bio submission follows a depth-first tree walk") as that commit caused dm to start using chained bios itself. A particular failure mode is that if a bio spans an 'error' target and a working target, the 'error' fragment will complete instantly and set the ->bi_status, and the other fragment will normally complete a little later, and will clear ->bi_status. The fix is simply to only assign io_error to bio->bi_status when io_error is not zero. Reported-and-tested-by: Milan Broz Cc: stable@vger.kernel.org (v3.14+) Signed-off-by: NeilBrown Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 804419635cc7..1dfc855ac708 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -815,7 +815,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error) queue_io(md, bio); } else { /* done with normal IO or empty flush */ - bio->bi_status = io_error; + if (io_error) + bio->bi_status = io_error; bio_endio(bio); } } From c766cb48771b7604fb5beca646bfa9904d0a2b17 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 25 Jan 2018 11:02:50 -0700 Subject: [PATCH 576/770] Btrfs: fix deadlock in run_delalloc_nocow commit e89166990f11c3f21e1649d760dd35f9e410321c upstream. @cur_offset is not set back to what it should be (@cow_start) if btrfs_next_leaf() returns something wrong, and the range [cow_start, cur_offset) remains locked forever. cc: Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5eaedff28a32..db3f67b58104 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1330,8 +1330,11 @@ next_slot: leaf = path->nodes[0]; if (path->slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); - if (ret < 0) + if (ret < 0) { + if (cow_start != (u64)-1) + cur_offset = cow_start; goto error; + } if (ret > 0) break; leaf = path->nodes[0]; From fda3bb933b332a206730300a40ee71023548d0b0 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 25 Jan 2018 11:02:51 -0700 Subject: [PATCH 577/770] Btrfs: fix crash due to not cleaning up tree log block's dirty bits commit 1846430c24d66e85cc58286b3319c82cd54debb2 upstream. In cases that the whole fs flips into readonly status due to failures in critical sections, then log tree's blocks are still dirty, and this leads to a crash during umount time, the crash is about use-after-free, umount -> close_ctree -> stop workers -> iput(btree_inode) -> iput_final -> write_inode_now -> ... -> queue job on stop'd workers cc: v3.12+ Fixes: 681ae50917df ("Btrfs: cleanup reserved space when freeing tree log on error") Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d3002842d7f6..6e1a146f6ab4 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2494,6 +2494,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, clean_tree_block(fs_info, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); + } else { + if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags)) + clear_extent_buffer_dirty(next); } WARN_ON(root_owner != @@ -2574,6 +2577,9 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, clean_tree_block(fs_info, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); + } else { + if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags)) + clear_extent_buffer_dirty(next); } WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); @@ -2652,6 +2658,9 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, clean_tree_block(fs_info, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); + } else { + if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags)) + clear_extent_buffer_dirty(next); } WARN_ON(log->root_key.objectid != From 9a701c4fa563d3102b291de2531e0288ac4d7ee1 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 25 Jan 2018 11:02:52 -0700 Subject: [PATCH 578/770] Btrfs: fix extent state leak from tree log commit 55237a5f2431a72435e3ed39e4306e973c0446b7 upstream. It's possible that btrfs_sync_log() bails out after one of the two btrfs_write_marked_extents() which convert extent state's state bit into EXTENT_NEED_WAIT from EXTENT_DIRTY/EXTENT_NEW, however only EXTENT_DIRTY and EXTENT_NEW are searched by free_log_tree() so that those extent states with EXTENT_NEED_WAIT lead to memory leak. cc: Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6e1a146f6ab4..75b964ff335b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3047,13 +3047,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans, while (1) { ret = find_first_extent_bit(&log->dirty_log_pages, - 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW, + 0, &start, &end, + EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT, NULL); if (ret) break; clear_extent_bits(&log->dirty_log_pages, start, end, - EXTENT_DIRTY | EXTENT_NEW); + EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT); } /* From 1371798b92c84ddb06e0630fc1e2a9fcdeec2b57 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 25 Jan 2018 11:02:53 -0700 Subject: [PATCH 579/770] Btrfs: fix btrfs_evict_inode to handle abnormal inodes correctly commit e8f1bc1493855e32b7a2a019decc3c353d94daf6 upstream. This regression is introduced in commit 3d48d9810de4 ("btrfs: Handle uninitialised inode eviction"). There are two problems, a) it is ->destroy_inode() that does the final free on inode, not ->evict_inode(), b) clear_inode() must be called before ->evict_inode() returns. This could end up hitting BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); in evict() because I_CLEAR is set in clear_inode(). Fixes: commit 3d48d9810de4 ("btrfs: Handle uninitialised inode eviction") Cc: # v4.7-rc6+ Signed-off-by: Liu Bo Reviewed-by: Nikolay Borisov Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index db3f67b58104..dec7eea8aa29 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5318,7 +5318,7 @@ void btrfs_evict_inode(struct inode *inode) trace_btrfs_inode_evict(inode); if (!root) { - kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); + clear_inode(inode); return; } From f30c7d95b4eb1fefe9ac0b31f4a8a3e9d4b04bb1 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 25 Jan 2018 11:02:54 -0700 Subject: [PATCH 580/770] Btrfs: fix use-after-free on root->orphan_block_rsv commit 1a932ef4e47984dee227834667b5ff5a334e4805 upstream. I got these from running generic/475, WARNING: CPU: 0 PID: 26384 at fs/btrfs/inode.c:3326 btrfs_orphan_commit_root+0x1ac/0x2b0 [btrfs] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: btrfs_block_rsv_release+0x1c/0x70 [btrfs] Call Trace: btrfs_orphan_release_metadata+0x9f/0x200 [btrfs] btrfs_orphan_del+0x10d/0x170 [btrfs] btrfs_setattr+0x500/0x640 [btrfs] notify_change+0x7ae/0x870 do_truncate+0xca/0x130 vfs_truncate+0x2ee/0x3d0 do_sys_truncate+0xaf/0xf0 SyS_truncate+0xe/0x10 entry_SYSCALL_64_fastpath+0x1f/0x96 The race is between btrfs_orphan_commit_root and btrfs_orphan_del, t1 t2 btrfs_orphan_commit_root btrfs_orphan_del spin_lock check (&root->orphan_inodes) root->orphan_block_rsv = NULL; spin_unlock atomic_dec(&root->orphan_inodes); access root->orphan_block_rsv Accessing root->orphan_block_rsv must be done before decreasing root->orphan_inodes. cc: v3.12+ Fixes: 703c88e03524 ("Btrfs: fix tracking of orphan inode count") Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dec7eea8aa29..1ae61f82e54b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3371,6 +3371,11 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, ret = btrfs_orphan_reserve_metadata(trans, inode); ASSERT(!ret); if (ret) { + /* + * dec doesn't need spin_lock as ->orphan_block_rsv + * would be released only if ->orphan_inodes is + * zero. + */ atomic_dec(&root->orphan_inodes); clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, &inode->runtime_flags); @@ -3385,12 +3390,17 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, if (insert >= 1) { ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); if (ret) { - atomic_dec(&root->orphan_inodes); if (reserve) { clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, &inode->runtime_flags); btrfs_orphan_release_metadata(inode); } + /* + * btrfs_orphan_commit_root may race with us and set + * ->orphan_block_rsv to zero, in order to avoid that, + * decrease ->orphan_inodes after everything is done. + */ + atomic_dec(&root->orphan_inodes); if (ret != -EEXIST) { clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &inode->runtime_flags); @@ -3422,29 +3432,27 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans, { struct btrfs_root *root = inode->root; int delete_item = 0; - int release_rsv = 0; int ret = 0; - spin_lock(&root->orphan_lock); if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &inode->runtime_flags)) delete_item = 1; + if (delete_item && trans) + ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); + if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, &inode->runtime_flags)) - release_rsv = 1; - spin_unlock(&root->orphan_lock); - - if (delete_item) { - atomic_dec(&root->orphan_inodes); - if (trans) - ret = btrfs_del_orphan_item(trans, root, - btrfs_ino(inode)); - } - - if (release_rsv) btrfs_orphan_release_metadata(inode); + /* + * btrfs_orphan_commit_root may race with us and set ->orphan_block_rsv + * to zero, in order to avoid that, decrease ->orphan_inodes after + * everything is done. + */ + if (delete_item) + atomic_dec(&root->orphan_inodes); + return ret; } From 61c07810bf2e53f348dbdba751ddf7ba31aba4aa Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 25 Jan 2018 11:02:56 -0700 Subject: [PATCH 581/770] Btrfs: fix unexpected -EEXIST when creating new inode commit 900c9981680067573671ecc5cbfa7c5770be3a40 upstream. The highest objectid, which is assigned to new inode, is decided at the time of initializing fs roots. However, in cases where log replay gets processed, the btree which fs root owns might be changed, so we have to search it again for the highest objectid, otherwise creating new inode would end up with -EEXIST. cc: v4.4-rc6+ Fixes: f32e48e92596 ("Btrfs: Initialize btrfs_root->highest_objectid when loading tree root and subvolume roots") Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 75b964ff335b..b6dfe7af7a1f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -28,6 +28,7 @@ #include "hash.h" #include "compression.h" #include "qgroup.h" +#include "inode-map.h" /* magic values for the inode_only field in btrfs_log_inode: * @@ -5715,6 +5716,23 @@ again: path); } + if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) { + struct btrfs_root *root = wc.replay_dest; + + btrfs_release_path(path); + + /* + * We have just replayed everything, and the highest + * objectid of fs roots probably has changed in case + * some inode_item's got replayed. + * + * root->objectid_mutex is not acquired as log replay + * could only happen during mount. + */ + ret = btrfs_find_highest_objectid(root, + &root->highest_objectid); + } + key.offset = found_key.offset - 1; wc.replay_dest->log_root = NULL; free_extent_buffer(log->node); From 8a8c9588c289bcd2dbaf7a198144293978833a7c Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 22 Jan 2018 22:02:05 +0100 Subject: [PATCH 582/770] 9p/trans_virtio: discard zero-length reply commit 26d99834f89e76514076d9cd06f61e56e6a509b8 upstream. When a 9p request is successfully flushed, the server is expected to just mark it as used without sending a 9p reply (ie, without writing data into the buffer). In this case, virtqueue_get_buf() will return len == 0 and we must not report a REQ_STATUS_RCVD status to the client, otherwise the client will erroneously assume the request has not been flushed. Cc: stable@vger.kernel.org Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_virtio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index f3a4efcf1456..3aa5a93ad107 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -160,7 +160,8 @@ static void req_done(struct virtqueue *vq) spin_unlock_irqrestore(&chan->lock, flags); /* Wakeup if anyone waiting for VirtIO ring space. */ wake_up(chan->vc_wq); - p9_client_cb(chan->client, req, REQ_STATUS_RCVD); + if (len) + p9_client_cb(chan->client, req, REQ_STATUS_RCVD); } } From bc74262f3a6524b5122cf1388965bd0c7d9cb5cb Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Fri, 9 Feb 2018 13:21:42 +0100 Subject: [PATCH 583/770] mtd: nand: vf610: set correct ooblayout commit ea56fb282368ea08c2a313af6b55cb597aec4db1 upstream. With commit 3cf32d180227 ("mtd: nand: vf610: switch to mtd_ooblayout_ops") the driver started to use the NAND cores default large page ooblayout. However, shortly after commit 6a623e076944 ("mtd: nand: add ooblayout for old hamming layout") changed the default layout to the old hamming layout, which is not what vf610_nfc is using. Specify the default large page layout explicitly. Fixes: 6a623e076944 ("mtd: nand: add ooblayout for old hamming layout") Cc: # v4.12+ Signed-off-by: Stefan Agner Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/vf610_nfc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c index 8037d4b48a05..e2583a539b41 100644 --- a/drivers/mtd/nand/vf610_nfc.c +++ b/drivers/mtd/nand/vf610_nfc.c @@ -752,10 +752,8 @@ static int vf610_nfc_probe(struct platform_device *pdev) if (mtd->oobsize > 64) mtd->oobsize = 64; - /* - * mtd->ecclayout is not specified here because we're using the - * default large page ECC layout defined in NAND core. - */ + /* Use default large page ECC layout defined in NAND core */ + mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops); if (chip->ecc.strength == 32) { nfc->ecc_mode = ECC_60_BYTE; chip->ecc.bytes = 60; From c0cf529a8f4df5d063816c59af8ea927d277c799 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 29 Jan 2018 14:23:15 +0800 Subject: [PATCH 584/770] ALSA: hda - Fix headset mic detection problem for two Dell machines commit 3f2f7c553d077be6a30cb96b2976a2c940bf5335 upstream. One of them has the codec of alc256 and the other one has the codec of alc289. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b2d039537d5e..bd6d562fe716 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6584,6 +6584,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0xb7a60130}, {0x14, 0x90170110}, {0x21, 0x02211020}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60130}, + {0x14, 0x90170110}, + {0x14, 0x01011020}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ALC256_STANDARD_PINS), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC, @@ -6653,6 +6658,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60120}, {0x14, 0x90170110}, {0x21, 0x0321101f}), + SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0xb7a60130}, + {0x14, 0x90170110}, + {0x21, 0x04211020}), SND_HDA_PIN_QUIRK(0x10ec0290, 0x103c, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1, ALC290_STANDARD_PINS, {0x15, 0x04211040}, From 8f65cd77c97c033bade459958611b1c9126c553c Mon Sep 17 00:00:00 2001 From: Kirill Marinushkin Date: Mon, 29 Jan 2018 06:37:55 +0100 Subject: [PATCH 585/770] ALSA: usb-audio: Fix UAC2 get_ctl request with a RANGE attribute commit 447cae58cecd69392b74a4a42cd0ab9cabd816af upstream. The layout of the UAC2 Control request and response varies depending on the request type. With the current implementation, only the Layout 2 Parameter Block (with the 2-byte sized RANGE attribute) is handled properly. For the Control requests with the 1-byte sized RANGE attribute (Bass Control, Mid Control, Tremble Control), the response is parsed incorrectly. This commit: * fixes the wLength field value in the request * fixes parsing the range values from the response Fixes: 23caaf19b11e ("ALSA: usb-mixer: Add support for Audio Class v2.0") Signed-off-by: Kirill Marinushkin Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 75bce127d768..89efec891e68 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -347,17 +347,20 @@ static int get_ctl_value_v2(struct usb_mixer_elem_info *cval, int request, int validx, int *value_ret) { struct snd_usb_audio *chip = cval->head.mixer->chip; - unsigned char buf[4 + 3 * sizeof(__u32)]; /* enough space for one range */ + /* enough space for one range */ + unsigned char buf[sizeof(__u16) + 3 * sizeof(__u32)]; unsigned char *val; - int idx = 0, ret, size; + int idx = 0, ret, val_size, size; __u8 bRequest; + val_size = uac2_ctl_value_size(cval->val_type); + if (request == UAC_GET_CUR) { bRequest = UAC2_CS_CUR; - size = uac2_ctl_value_size(cval->val_type); + size = val_size; } else { bRequest = UAC2_CS_RANGE; - size = sizeof(buf); + size = sizeof(__u16) + 3 * val_size; } memset(buf, 0, sizeof(buf)); @@ -390,16 +393,17 @@ error: val = buf + sizeof(__u16); break; case UAC_GET_MAX: - val = buf + sizeof(__u16) * 2; + val = buf + sizeof(__u16) + val_size; break; case UAC_GET_RES: - val = buf + sizeof(__u16) * 3; + val = buf + sizeof(__u16) + val_size * 2; break; default: return -EINVAL; } - *value_ret = convert_signed_value(cval, snd_usb_combine_bytes(val, sizeof(__u16))); + *value_ret = convert_signed_value(cval, + snd_usb_combine_bytes(val, val_size)); return 0; } From 5640397f06ada0800d734b98fced5e74fbd4b217 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 2 Feb 2018 15:13:09 +0800 Subject: [PATCH 586/770] ALSA: hda/realtek - Add headset mode support for Dell laptop commit 40e2c4e5a7efcd50983aacbddd3c617e776018bf upstream. This platform had two Dmic and single Dmic. This update was for single Dmic. This commit was for two Dmic. Fixes: 75ee94b20b46 ("ALSA: hda - fix headset mic problem for Dell machines...") Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bd6d562fe716..41acae02991d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6176,6 +6176,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), + SND_PCI_QUIRK(0x1028, 0x084b, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), + SND_PCI_QUIRK(0x1028, 0x084e, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From 81ae4f7479f96a5123ace58cc3bdc11303de8a46 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 2 Feb 2018 15:26:46 +0800 Subject: [PATCH 587/770] ALSA: hda/realtek - Enable Thinkpad Dock device for ALC298 platform commit 61fcf8ece9b6b09450250c4ca40cc3b81a96a68d upstream. Thinkpad Dock device support for ALC298 platform. It need to use SSID for the quirk table. Because IdeaPad also has ALC298 platform. Use verb for the quirk table will confuse. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 41acae02991d..80de62d89314 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4827,6 +4827,28 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec, } } +static void alc_fixup_tpt470_dock(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const struct hda_pintbl pincfgs[] = { + { 0x17, 0x21211010 }, /* dock headphone */ + { 0x19, 0x21a11010 }, /* dock mic */ + { } + }; + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; + /* Enable DOCK device */ + snd_hda_codec_write(codec, 0x17, 0, + AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0); + /* Enable DOCK device */ + snd_hda_codec_write(codec, 0x19, 0, + AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0); + snd_hda_apply_pincfgs(codec, pincfgs); + } +} + static void alc_shutup_dell_xps13(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -5301,6 +5323,7 @@ enum { ALC700_FIXUP_INTEL_REFERENCE, ALC274_FIXUP_DELL_BIND_DACS, ALC274_FIXUP_DELL_AIO_LINEOUT_VERB, + ALC298_FIXUP_TPT470_DOCK, }; static const struct hda_fixup alc269_fixups[] = { @@ -6126,6 +6149,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC274_FIXUP_DELL_BIND_DACS }, + [ALC298_FIXUP_TPT470_DOCK] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_tpt470_dock, + .chained = true, + .chain_id = ALC293_FIXUP_LENOVO_SPK_NOISE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -6307,8 +6336,16 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x222d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x222e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460), + SND_PCI_QUIRK(0x17aa, 0x2245, "Thinkpad T470", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x2246, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x2247, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x224b, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x224c, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x224d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), @@ -6329,7 +6366,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x5051, "Thinkpad L460", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460), + SND_PCI_QUIRK(0x17aa, 0x505d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x505f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x5062, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ From de3e8191756434f6fa4bd69d38ceb5bacd0f1028 Mon Sep 17 00:00:00 2001 From: Jan-Marek Glogowski Date: Wed, 14 Feb 2018 11:29:15 +0100 Subject: [PATCH 588/770] ALSA: hda/realtek: PCI quirk for Fujitsu U7x7 commit fdcc968a3b290407bcba9d4c90e2fba6d8d928f1 upstream. These laptops have a combined jack to attach headsets, the U727 on the left, the U757 on the right, but a headsets microphone doesn't work. Using hdajacksensetest I found that pin 0x19 changed the present state when plugging the headset, in addition to 0x21, but didn't have the correct configuration (shown as "Not connected"). So this sets the configuration to the same values as the headphone pin 0x21 except for the device type microphone, which makes it work correctly. With the patch the configured pins for U727 are Pin 0x12 (Internal Mic, Mobile-In): present = No Pin 0x14 (Internal Speaker): present = No Pin 0x19 (Black Mic, Left side): present = No Pin 0x1d (Internal Aux): present = No Pin 0x21 (Black Headphone, Left side): present = No Signed-off-by: Jan-Marek Glogowski Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 80de62d89314..b7acffdf16a4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3355,6 +3355,19 @@ static void alc269_fixup_pincfg_no_hp_to_lineout(struct hda_codec *codec, spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; } +static void alc269_fixup_pincfg_U7x7_headset_mic(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + unsigned int cfg_headphone = snd_hda_codec_get_pincfg(codec, 0x21); + unsigned int cfg_headset_mic = snd_hda_codec_get_pincfg(codec, 0x19); + + if (cfg_headphone && cfg_headset_mic == 0x411111f0) + snd_hda_codec_set_pincfg(codec, 0x19, + (cfg_headphone & ~AC_DEFCFG_DEVICE) | + (AC_JACK_MIC_IN << AC_DEFCFG_DEVICE_SHIFT)); +} + static void alc269_fixup_hweq(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -5228,6 +5241,7 @@ enum { ALC269_FIXUP_LIFEBOOK_EXTMIC, ALC269_FIXUP_LIFEBOOK_HP_PIN, ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT, + ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC, ALC269_FIXUP_AMIC, ALC269_FIXUP_DMIC, ALC269VB_FIXUP_AMIC, @@ -5434,6 +5448,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc269_fixup_pincfg_no_hp_to_lineout, }, + [ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_pincfg_U7x7_headset_mic, + }, [ALC269_FIXUP_AMIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -6308,6 +6326,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10cf, 0x159f, "Lifebook E780", ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT), SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN), + SND_PCI_QUIRK(0x10cf, 0x1629, "Lifebook U7x7", ALC255_FIXUP_LIFEBOOK_U7x7_HEADSET_MIC), SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), From e1b13eb16c98eb289751098f8f79ff220b8bfdff Mon Sep 17 00:00:00 2001 From: Lassi Ylikojola Date: Fri, 9 Feb 2018 16:51:36 +0200 Subject: [PATCH 589/770] ALSA: usb-audio: add implicit fb quirk for Behringer UFX1204 commit 5e35dc0338d85ccebacf3f77eca1e5dea73155e8 upstream. Add quirk to ensure a sync endpoint is properly configured. This patch is a fix for same symptoms on Behringer UFX1204 as patch from Albertto Aquirre on Dec 8 2016 for Axe-Fx II. Signed-off-by: Lassi Ylikojola Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/pcm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index b9c9a19f9588..3cbfae6604f9 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -352,6 +352,15 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, ep = 0x86; iface = usb_ifnum_to_if(dev, 2); + if (!iface || iface->num_altsetting == 0) + return -EINVAL; + + alts = &iface->altsetting[1]; + goto add_sync_ep; + case USB_ID(0x1397, 0x0002): + ep = 0x81; + iface = usb_ifnum_to_if(dev, 1); + if (!iface || iface->num_altsetting == 0) return -EINVAL; From ec5a08abefe6aa239ada078e6041f222eac52196 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sun, 11 Feb 2018 09:50:27 -0400 Subject: [PATCH 590/770] ALSA: usb: add more device quirks for USB DSD devices commit 7c74866baef1827e18f8269aec85030063520bd4 upstream. Add some more devices that need quirks to handle DSD modes correctly. Signed-off-by: Daniel Mack Reported-and-tested-by: Thomas Gresens Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 8d7db7cd4f88..ed56cd307059 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1369,8 +1369,11 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; - /* Amanero Combo384 USB interface with native DSD support */ - case USB_ID(0x16d0, 0x071a): + /* Amanero Combo384 USB based DACs with native DSD support */ + case USB_ID(0x16d0, 0x071a): /* Amanero - Combo384 */ + case USB_ID(0x2ab6, 0x0004): /* T+A DAC8DSD-V2.0, MP1000E-V2.0, MP2000R-V2.0, MP2500R-V2.0, MP3100HV-V2.0 */ + case USB_ID(0x2ab6, 0x0005): /* T+A USB HD Audio 1 */ + case USB_ID(0x2ab6, 0x0006): /* T+A USB HD Audio 2 */ if (fp->altsetting == 2) { switch (le16_to_cpu(chip->dev->descriptor.bcdDevice)) { case 0x199: From 7466294dad8963e29f051d8ab2f4d3f67d2f62a6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 12 Feb 2018 15:20:51 +0100 Subject: [PATCH 591/770] ALSA: seq: Fix racy pool initializations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d15d662e89fc667b90cd294b0eb45694e33144da upstream. ALSA sequencer core initializes the event pool on demand by invoking snd_seq_pool_init() when the first write happens and the pool is empty. Meanwhile user can reset the pool size manually via ioctl concurrently, and this may lead to UAF or out-of-bound accesses since the function tries to vmalloc / vfree the buffer. A simple fix is to just wrap the snd_seq_pool_init() call with the recently introduced client->ioctl_mutex; as the calls for snd_seq_pool_init() from other side are always protected with this mutex, we can avoid the race. Reported-by: 范龙飞 Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_clientmgr.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index ac30fc1ab98b..dea11d1babf5 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -999,7 +999,7 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf, { struct snd_seq_client *client = file->private_data; int written = 0, len; - int err = -EINVAL; + int err; struct snd_seq_event event; if (!(snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_OUTPUT)) @@ -1014,11 +1014,15 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf, /* allocate the pool now if the pool is not allocated yet */ if (client->pool->size > 0 && !snd_seq_write_pool_allocated(client)) { - if (snd_seq_pool_init(client->pool) < 0) + mutex_lock(&client->ioctl_mutex); + err = snd_seq_pool_init(client->pool); + mutex_unlock(&client->ioctl_mutex); + if (err < 0) return -ENOMEM; } /* only process whole events */ + err = -EINVAL; while (count >= sizeof(struct snd_seq_event)) { /* Read in the event header from the user */ len = sizeof(event); From ac98d5a624fed603233421ddd808f2623f331ad8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 11 Feb 2018 18:10:28 -0500 Subject: [PATCH 592/770] mvpp2: fix multicast address filter commit 7ac8ff95f48cbfa609a060fd6a1e361dd62feeb3 upstream. IPv6 doesn't work on the MacchiatoBIN board. It is caused by broken multicast address filter in the mvpp2 driver. The driver loads doesn't load any multicast entries if "allmulti" is not set. This condition should be reversed. The condition !netdev_mc_empty(dev) is useless (because netdev_for_each_mc_addr is nop if the list is empty). This patch also fixes a possible overflow of the multicast list - if mvpp2_prs_mac_da_accept fails, we set the allmulti flag and retry. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvpp2.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 1dd3a1264a53..06f3fe429d82 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -6888,6 +6888,7 @@ static void mvpp2_set_rx_mode(struct net_device *dev) int id = port->id; bool allmulti = dev->flags & IFF_ALLMULTI; +retry: mvpp2_prs_mac_promisc_set(priv, id, dev->flags & IFF_PROMISC); mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_ALL, allmulti); mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_IP6, allmulti); @@ -6895,9 +6896,13 @@ static void mvpp2_set_rx_mode(struct net_device *dev) /* Remove all port->id's mcast enries */ mvpp2_prs_mcast_del_all(priv, id); - if (allmulti && !netdev_mc_empty(dev)) { - netdev_for_each_mc_addr(ha, dev) - mvpp2_prs_mac_da_accept(priv, id, ha->addr, true); + if (!allmulti) { + netdev_for_each_mc_addr(ha, dev) { + if (mvpp2_prs_mac_da_accept(priv, id, ha->addr, true)) { + allmulti = true; + goto retry; + } + } } } From e4ea7c12229c5fec232120713abe74bd35164157 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 31 Jan 2018 22:24:45 +0000 Subject: [PATCH 593/770] usb: Move USB_UHCI_BIG_ENDIAN_* out of USB_SUPPORT commit ec897569ad7dbc6d595873a487c3fac23f463f76 upstream. Move the Kconfig symbols USB_UHCI_BIG_ENDIAN_MMIO and USB_UHCI_BIG_ENDIAN_DESC out of drivers/usb/host/Kconfig, which is conditional upon USB && USB_SUPPORT, so that it can be freely selected by platform Kconfig symbols in architecture code. For example once the MIPS_GENERIC platform selects are fixed in commit 2e6522c56552 ("MIPS: Fix typo BIG_ENDIAN to CPU_BIG_ENDIAN"), the MIPS 32r6_defconfig warns like so: warning: (MIPS_GENERIC) selects USB_UHCI_BIG_ENDIAN_MMIO which has unmet direct dependencies (USB_SUPPORT && USB) warning: (MIPS_GENERIC) selects USB_UHCI_BIG_ENDIAN_DESC which has unmet direct dependencies (USB_SUPPORT && USB) Fixes: 2e6522c56552 ("MIPS: Fix typo BIG_ENDIAN to CPU_BIG_ENDIAN") Signed-off-by: James Hogan Cc: Corentin Labbe Cc: Ralf Baechle Cc: Paul Burton Cc: linux-usb@vger.kernel.org Cc: linux-mips@linux-mips.org Acked-by: Greg Kroah-Hartman Patchwork: https://patchwork.linux-mips.org/patch/18559/ Signed-off-by: Greg Kroah-Hartman --- drivers/usb/Kconfig | 8 ++++++++ drivers/usb/host/Kconfig | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig index 939a63bca82f..72eb3e41e3b6 100644 --- a/drivers/usb/Kconfig +++ b/drivers/usb/Kconfig @@ -19,6 +19,14 @@ config USB_EHCI_BIG_ENDIAN_MMIO config USB_EHCI_BIG_ENDIAN_DESC bool +config USB_UHCI_BIG_ENDIAN_MMIO + bool + default y if SPARC_LEON + +config USB_UHCI_BIG_ENDIAN_DESC + bool + default y if SPARC_LEON + menuconfig USB_SUPPORT bool "USB support" depends on HAS_IOMEM diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index fa5692dec832..92b19721b595 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -637,14 +637,6 @@ config USB_UHCI_ASPEED bool default y if ARCH_ASPEED -config USB_UHCI_BIG_ENDIAN_MMIO - bool - default y if SPARC_LEON - -config USB_UHCI_BIG_ENDIAN_DESC - bool - default y if SPARC_LEON - config USB_FHCI_HCD tristate "Freescale QE USB Host Controller support" depends on OF_GPIO && QE_GPIO && QUICC_ENGINE From 26f8c38bb466c1a2d232d7609fb4bfb4bc121678 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 25 Jan 2018 14:23:48 -0800 Subject: [PATCH 594/770] x86/mm, mm/hwpoison: Don't unconditionally unmap kernel 1:1 pages commit fd0e786d9d09024f67bd71ec094b110237dc3840 upstream. In the following commit: ce0fa3e56ad2 ("x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages") ... we added code to memory_failure() to unmap the page from the kernel 1:1 virtual address space to avoid speculative access to the page logging additional errors. But memory_failure() may not always succeed in taking the page offline, especially if the page belongs to the kernel. This can happen if there are too many corrected errors on a page and either mcelog(8) or drivers/ras/cec.c asks to take a page offline. Since we remove the 1:1 mapping early in memory_failure(), we can end up with the page unmapped, but still in use. On the next access the kernel crashes :-( There are also various debug paths that call memory_failure() to simulate occurrence of an error. Since there is no actual error in memory, we don't need to map out the page for those cases. Revert most of the previous attempt and keep the solution local to arch/x86/kernel/cpu/mcheck/mce.c. Unmap the page only when: 1) there is a real error 2) memory_failure() succeeds. All of this only applies to 64-bit systems. 32-bit kernel doesn't map all of memory into kernel space. It isn't worth adding the code to unmap the piece that is mapped because nobody would run a 32-bit kernel on a machine that has recoverable machine checks. Signed-off-by: Tony Luck Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Cc: Denys Vlasenko Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Naoya Horiguchi Cc: Peter Zijlstra Cc: Robert (Persistent Memory) Cc: Thomas Gleixner Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org #v4.14 Fixes: ce0fa3e56ad2 ("x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages") Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/page_64.h | 4 ---- arch/x86/kernel/cpu/mcheck/mce-internal.h | 15 +++++++++++++++ arch/x86/kernel/cpu/mcheck/mce.c | 17 +++++++++++------ include/linux/mm_inline.h | 6 ------ mm/memory-failure.c | 2 -- 5 files changed, 26 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 4baa6bceb232..d652a3808065 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -52,10 +52,6 @@ static inline void clear_page(void *page) void copy_page(void *to, void *from); -#ifdef CONFIG_X86_MCE -#define arch_unmap_kpfn arch_unmap_kpfn -#endif - #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_VSYSCALL_EMULATION diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index aa0d5df9dc60..e956eb267061 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb) { } extern struct mca_config mca_cfg; +#ifndef CONFIG_X86_64 +/* + * On 32-bit systems it would be difficult to safely unmap a poison page + * from the kernel 1:1 map because there are no non-canonical addresses that + * we can use to refer to the address without risking a speculative access. + * However, this isn't much of an issue because: + * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which + * are only mapped into the kernel as needed + * 2) Few people would run a 32-bit kernel on a machine that supports + * recoverable errors because they have too much memory to boot 32-bit. + */ +static inline void mce_unmap_kpfn(unsigned long pfn) {} +#define mce_unmap_kpfn mce_unmap_kpfn +#endif + #endif /* __X86_MCE_INTERNAL_H__ */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a9e898b71208..73237aa271ea 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -106,6 +106,10 @@ static struct irq_work mce_irq_work; static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); +#ifndef mce_unmap_kpfn +static void mce_unmap_kpfn(unsigned long pfn); +#endif + /* * CPU/chipset specific EDAC code can register a notifier call here to print * MCE errors in a human-readable form. @@ -582,7 +586,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { pfn = mce->addr >> PAGE_SHIFT; - memory_failure(pfn, MCE_VECTOR, 0); + if (memory_failure(pfn, MCE_VECTOR, 0)) + mce_unmap_kpfn(pfn); } return NOTIFY_OK; @@ -1049,12 +1054,13 @@ static int do_memory_failure(struct mce *m) ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags); if (ret) pr_err("Memory error not recovered"); + else + mce_unmap_kpfn(m->addr >> PAGE_SHIFT); return ret; } -#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE) - -void arch_unmap_kpfn(unsigned long pfn) +#ifndef mce_unmap_kpfn +static void mce_unmap_kpfn(unsigned long pfn) { unsigned long decoy_addr; @@ -1065,7 +1071,7 @@ void arch_unmap_kpfn(unsigned long pfn) * We would like to just call: * set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1); * but doing that would radically increase the odds of a - * speculative access to the posion page because we'd have + * speculative access to the poison page because we'd have * the virtual address of the kernel 1:1 mapping sitting * around in registers. * Instead we get tricky. We create a non-canonical address @@ -1090,7 +1096,6 @@ void arch_unmap_kpfn(unsigned long pfn) if (set_memory_np(decoy_addr, 1)) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); - } #endif diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index c30b32e3c862..10191c28fc04 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -127,10 +127,4 @@ static __always_inline enum lru_list page_lru(struct page *page) #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) -#ifdef arch_unmap_kpfn -extern void arch_unmap_kpfn(unsigned long pfn); -#else -static __always_inline void arch_unmap_kpfn(unsigned long pfn) { } -#endif - #endif diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 88366626c0b7..1cd3b3569af8 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1146,8 +1146,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags) return 0; } - arch_unmap_kpfn(pfn); - orig_head = hpage = compound_head(p); num_poisoned_pages_inc(); From 875758d79d8f6182aa2c76c09eb810a7c54249ec Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 13 Dec 2017 14:21:37 +0100 Subject: [PATCH 595/770] scsi: core: check for device state in __scsi_remove_target() commit 81b6c999897919d5a16fedc018fe375dbab091c5 upstream. As it turned out device_get() doesn't use kref_get_unless_zero(), so we will be always getting a device pointer. Consequently, we need to check for the device state in __scsi_remove_target() to avoid tripping over deleted objects. Fixes: fbce4d97fd43 ("scsi: fixup kernel warning during rmmod()") Reported-by: Jason Yan Signed-off-by: Hannes Reinecke Reviewed-by: Bart Van Assche Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Cc: Max Ivanov Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_sysfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index f796bd61f3f0..40406c162d0d 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1383,7 +1383,10 @@ static void __scsi_remove_target(struct scsi_target *starget) * check. */ if (sdev->channel != starget->channel || - sdev->id != starget->id || + sdev->id != starget->id) + continue; + if (sdev->sdev_state == SDEV_DEL || + sdev->sdev_state == SDEV_CANCEL || !get_device(&sdev->sdev_gendev)) continue; spin_unlock_irqrestore(shost->host_lock, flags); From dcdc01c2edd4baf8096c70837fd583e1cc00d023 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Oct 2017 15:47:54 +0200 Subject: [PATCH 596/770] Bluetooth: BT_HCIUART now depends on SERIAL_DEV_BUS commit 05e89fb576f580ac95e7a5d00bdb34830b09671a upstream. It is no longer possible to build BT_HCIUART into the kernel when SERIAL_DEV_BUS is a loadable module, even if none of the SERIAL_DEV_BUS based implementations are selected: drivers/bluetooth/hci_ldisc.o: In function `hci_uart_set_flow_control': hci_ldisc.c:(.text+0xb40): undefined reference to `serdev_device_set_flow_control' hci_ldisc.c:(.text+0xb5c): undefined reference to `serdev_device_set_tiocm' This adds a dependency to avoid the broken configuration. Fixes: 7841d554809b ("Bluetooth: hci_uart_set_flow_control: Fix NULL deref when using serdev") Signed-off-by: Arnd Bergmann Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 98a60db8e5d1..b33c8d6eb8c7 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -66,6 +66,7 @@ config BT_HCIBTSDIO config BT_HCIUART tristate "HCI UART driver" + depends on SERIAL_DEV_BUS || !SERIAL_DEV_BUS depends on TTY help Bluetooth HCI UART driver. @@ -80,7 +81,6 @@ config BT_HCIUART config BT_HCIUART_SERDEV bool depends on SERIAL_DEV_BUS && BT_HCIUART - depends on SERIAL_DEV_BUS=y || SERIAL_DEV_BUS=BT_HCIUART default y config BT_HCIUART_H4 From f5eab7c3d1003d0d8df8beefa531187452212c59 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 21 Dec 2017 22:30:07 +0100 Subject: [PATCH 597/770] ARM: dts: exynos: fix RTC interrupt for exynos5410 commit 5628a8ca14149ba4226e3bdce3a04c3b688435ad upstream. According to the comment added to exynos_dt_pmu_match[] in commit 8b283c025443 ("ARM: exynos4/5: convert pmu wakeup to stacked domains"), the RTC is not able to wake up the system through the PMU on Exynos5410, unlike Exynos5420. However, when the RTC DT node got added, it was a straight copy of the Exynos5420 node, which now causes a warning from dtc. This removes the incorrect interrupt-parent, which should get the interrupt working and avoid the warning. Fixes: e1e146b1b062 ("ARM: dts: exynos: Add RTC and I2C to Exynos5410") Signed-off-by: Arnd Bergmann Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos5410.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/exynos5410.dtsi b/arch/arm/boot/dts/exynos5410.dtsi index 7eab4bc07cec..7628bbb02324 100644 --- a/arch/arm/boot/dts/exynos5410.dtsi +++ b/arch/arm/boot/dts/exynos5410.dtsi @@ -333,7 +333,6 @@ &rtc { clocks = <&clock CLK_RTC>; clock-names = "rtc"; - interrupt-parent = <&pmu_system_controller>; status = "disabled"; }; From 361bd5be7b845f10f6f282987088c1e62dd671d9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 15:40:37 +0100 Subject: [PATCH 598/770] ARM: pxa/tosa-bt: add MODULE_LICENSE tag commit 3343647813fdf0f2409fbf5816ee3e0622168079 upstream. Without this tag, we get a build warning: WARNING: modpost: missing MODULE_LICENSE() in arch/arm/mach-pxa/tosa-bt.o For completeness, I'm also adding author and description fields. Acked-by: Robert Jarzmik Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-pxa/tosa-bt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-pxa/tosa-bt.c b/arch/arm/mach-pxa/tosa-bt.c index 107f37210fb9..83606087edc7 100644 --- a/arch/arm/mach-pxa/tosa-bt.c +++ b/arch/arm/mach-pxa/tosa-bt.c @@ -132,3 +132,7 @@ static struct platform_driver tosa_bt_driver = { }, }; module_platform_driver(tosa_bt_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dmitry Baryshkov"); +MODULE_DESCRIPTION("Bluetooth built-in chip control"); From 34aac3426553aed01e59ec30467e42d165af2142 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 7 Nov 2017 19:45:01 -0800 Subject: [PATCH 599/770] arm64: dts: msm8916: Add missing #phy-cells commit b0ab681285aa66064f2de5b74191c0cabba381ff upstream. Add a missing #phy-cells to the dsi-phy, to silence dtc warning. Cc: Archit Taneja Fixes: 305410ffd1b2 ("arm64: dts: msm8916: Add display support") Signed-off-by: Bjorn Andersson Reviewed-by: Archit Taneja Signed-off-by: Andy Gross Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/msm8916.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index 8e5421a0b43d..61da6e65900b 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -901,6 +901,7 @@ "dsi_phy_regulator"; #clock-cells = <1>; + #phy-cells = <0>; clocks = <&gcc GCC_MDSS_AHB_CLK>; clock-names = "iface_clk"; From 7367af9cf0e4e1502151d42535cae779046d77e4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 17:10:11 +0100 Subject: [PATCH 600/770] ARM: dts: s5pv210: add interrupt-parent for ohci commit 5c1037196b9ee75897c211972de370ed1336ec8f upstream. The ohci-hcd node has an interrupt number but no interrupt-parent, leading to a warning with current dtc versions: arch/arm/boot/dts/s5pv210-aquila.dtb: Warning (interrupts_property): Missing interrupt-parent for /soc/ohci@ec300000 arch/arm/boot/dts/s5pv210-goni.dtb: Warning (interrupts_property): Missing interrupt-parent for /soc/ohci@ec300000 arch/arm/boot/dts/s5pv210-smdkc110.dtb: Warning (interrupts_property): Missing interrupt-parent for /soc/ohci@ec300000 arch/arm/boot/dts/s5pv210-smdkv210.dtb: Warning (interrupts_property): Missing interrupt-parent for /soc/ohci@ec300000 arch/arm/boot/dts/s5pv210-torbreck.dtb: Warning (interrupts_property): Missing interrupt-parent for /soc/ohci@ec300000 As seen from the related exynos dts files, the ohci and ehci controllers always share one interrupt number, and the number is the same here as well, so setting the same interrupt-parent is the reasonable solution here. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/s5pv210.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index 726c5d0dbd5b..b290a5abb901 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -463,6 +463,7 @@ compatible = "samsung,exynos4210-ohci"; reg = <0xec300000 0x100>; interrupts = <23>; + interrupt-parent = <&vic1>; clocks = <&clocks CLK_USB_HOST>; clock-names = "usbhost"; #address-cells = <1>; From 011c19041424dabd87e0bfa26b5ce20c1a0119e1 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Fri, 1 Dec 2017 13:07:07 +0100 Subject: [PATCH 601/770] arm: dts: mt7623: Update ethsys binding commit 76a09ce214addb8ddc0f6d50dc1106a5f829e713 upstream. The ethsys binding misses the reset-cells, this patch adds this property. Reviewed-by: Rob Herring Signed-off-by: Matthias Brugger Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/mt7623.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/mt7623.dtsi b/arch/arm/boot/dts/mt7623.dtsi index ec8a07415cb3..36983a7d7cfd 100644 --- a/arch/arm/boot/dts/mt7623.dtsi +++ b/arch/arm/boot/dts/mt7623.dtsi @@ -753,6 +753,7 @@ "syscon"; reg = <0 0x1b000000 0 0x1000>; #clock-cells = <1>; + #reset-cells = <1>; }; eth: ethernet@1b100000 { From 5782df0a58b6748cca77cd0a7684c8c7e2401af2 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Fri, 1 Dec 2017 13:07:08 +0100 Subject: [PATCH 602/770] arm: dts: mt2701: Add reset-cells commit ae72e95b5e4ded145bfc6926ad9457b74e3af41a upstream. The hifsys and ethsys needs the definition of the reset-cells property. Fix this. Reviewed-by: Rob Herring Signed-off-by: Matthias Brugger Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/mt2701.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/mt2701.dtsi b/arch/arm/boot/dts/mt2701.dtsi index afe12e5b51f9..f936000f0699 100644 --- a/arch/arm/boot/dts/mt2701.dtsi +++ b/arch/arm/boot/dts/mt2701.dtsi @@ -593,6 +593,7 @@ compatible = "mediatek,mt2701-hifsys", "syscon"; reg = <0 0x1a000000 0 0x1000>; #clock-cells = <1>; + #reset-cells = <1>; }; usb0: usb@1a1c0000 { @@ -677,6 +678,7 @@ compatible = "mediatek,mt2701-ethsys", "syscon"; reg = <0 0x1b000000 0 0x1000>; #clock-cells = <1>; + #reset-cells = <1>; }; eth: ethernet@1b100000 { From bbd577fec485647aaad0e2ba6142f2fec363b946 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 2 Jan 2018 08:57:17 +0100 Subject: [PATCH 603/770] ARM: dts: Delete bogus reference to the charlcd commit 586b2a4befad88cd87b372a1cea01e58c6811ea9 upstream. The EB MP board probably has a character LCD but the board manual does not really state which IRQ it has assigned to this device. The invalid assignment was a mistake by me during submission of the DTSI where I was looking for the reference, didn't find it and didn't fill it in. Delete this for now: it can probably be fixed but that requires access to the actual board for some trial-and-error experiments. Reported-by: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/arm-realview-eb-mp.dtsi | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm/boot/dts/arm-realview-eb-mp.dtsi b/arch/arm/boot/dts/arm-realview-eb-mp.dtsi index 7b8d90b7aeea..29b636fce23f 100644 --- a/arch/arm/boot/dts/arm-realview-eb-mp.dtsi +++ b/arch/arm/boot/dts/arm-realview-eb-mp.dtsi @@ -150,11 +150,6 @@ interrupts = <0 8 IRQ_TYPE_LEVEL_HIGH>; }; -&charlcd { - interrupt-parent = <&intc>; - interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; -}; - &serial0 { interrupt-parent = <&intc>; interrupts = <0 4 IRQ_TYPE_LEVEL_HIGH>; From 11785a9ece1b10d43156ae79e24f709053644bc5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 30 Nov 2017 06:08:05 -0500 Subject: [PATCH 604/770] media: r820t: fix r820t_write_reg for KASAN commit 16c3ada89cff9a8c2a0eea34ffa1aa20af3f6008 upstream. With CONFIG_KASAN, we get an overly long stack frame due to inlining the register access functions: drivers/media/tuners/r820t.c: In function 'generic_set_freq.isra.7': drivers/media/tuners/r820t.c:1334:1: error: the frame size of 2880 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] This is caused by a gcc bug that has now been fixed in gcc-8. To work around the problem, we can pass the register data through a local variable that older gcc versions can optimize out as well. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/tuners/r820t.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/media/tuners/r820t.c b/drivers/media/tuners/r820t.c index ba80376a3b86..d097eb04a0e9 100644 --- a/drivers/media/tuners/r820t.c +++ b/drivers/media/tuners/r820t.c @@ -396,9 +396,11 @@ static int r820t_write(struct r820t_priv *priv, u8 reg, const u8 *val, return 0; } -static int r820t_write_reg(struct r820t_priv *priv, u8 reg, u8 val) +static inline int r820t_write_reg(struct r820t_priv *priv, u8 reg, u8 val) { - return r820t_write(priv, reg, &val, 1); + u8 tmp = val; /* work around GCC PR81715 with asan-stack=1 */ + + return r820t_write(priv, reg, &tmp, 1); } static int r820t_read_cache_reg(struct r820t_priv *priv, int reg) @@ -411,17 +413,18 @@ static int r820t_read_cache_reg(struct r820t_priv *priv, int reg) return -EINVAL; } -static int r820t_write_reg_mask(struct r820t_priv *priv, u8 reg, u8 val, +static inline int r820t_write_reg_mask(struct r820t_priv *priv, u8 reg, u8 val, u8 bit_mask) { + u8 tmp = val; int rc = r820t_read_cache_reg(priv, reg); if (rc < 0) return rc; - val = (rc & ~bit_mask) | (val & bit_mask); + tmp = (rc & ~bit_mask) | (tmp & bit_mask); - return r820t_write(priv, reg, &val, 1); + return r820t_write(priv, reg, &tmp, 1); } static int r820t_read(struct r820t_priv *priv, u8 reg, u8 *val, int len) From ffe075e233bb8049ba7d3e362b37bc97ecf7e6fa Mon Sep 17 00:00:00 2001 From: yangbo lu Date: Thu, 21 Sep 2017 16:43:31 +0800 Subject: [PATCH 605/770] mmc: sdhci-of-esdhc: disable SD clock for clock value 0 commit dd3f6983b4a468efca9e8caa0e2b4aa20946d801 upstream. SD clock should be disabled for clock value 0. It's not right to just return. This may cause failure of signal voltage switching. Signed-off-by: Yangbo Lu Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Cc: Rasmus Villemoes Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-esdhc.c | 58 ++++++++++++++++--------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index d96a057a7db8..1f424374bbbb 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -458,6 +458,33 @@ static unsigned int esdhc_of_get_min_clock(struct sdhci_host *host) return clock / 256 / 16; } +static void esdhc_clock_enable(struct sdhci_host *host, bool enable) +{ + u32 val; + ktime_t timeout; + + val = sdhci_readl(host, ESDHC_SYSTEM_CONTROL); + + if (enable) + val |= ESDHC_CLOCK_SDCLKEN; + else + val &= ~ESDHC_CLOCK_SDCLKEN; + + sdhci_writel(host, val, ESDHC_SYSTEM_CONTROL); + + /* Wait max 20 ms */ + timeout = ktime_add_ms(ktime_get(), 20); + val = ESDHC_CLOCK_STABLE; + while (!(sdhci_readl(host, ESDHC_PRSSTAT) & val)) { + if (ktime_after(ktime_get(), timeout)) { + pr_err("%s: Internal clock never stabilised.\n", + mmc_hostname(host->mmc)); + break; + } + udelay(10); + } +} + static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); @@ -469,8 +496,10 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock) host->mmc->actual_clock = 0; - if (clock == 0) + if (clock == 0) { + esdhc_clock_enable(host, false); return; + } /* Workaround to start pre_div at 2 for VNN < VENDOR_V_23 */ if (esdhc->vendor_ver < VENDOR_V_23) @@ -558,33 +587,6 @@ static void esdhc_pltfm_set_bus_width(struct sdhci_host *host, int width) sdhci_writel(host, ctrl, ESDHC_PROCTL); } -static void esdhc_clock_enable(struct sdhci_host *host, bool enable) -{ - u32 val; - ktime_t timeout; - - val = sdhci_readl(host, ESDHC_SYSTEM_CONTROL); - - if (enable) - val |= ESDHC_CLOCK_SDCLKEN; - else - val &= ~ESDHC_CLOCK_SDCLKEN; - - sdhci_writel(host, val, ESDHC_SYSTEM_CONTROL); - - /* Wait max 20 ms */ - timeout = ktime_add_ms(ktime_get(), 20); - val = ESDHC_CLOCK_STABLE; - while (!(sdhci_readl(host, ESDHC_PRSSTAT) & val)) { - if (ktime_after(ktime_get(), timeout)) { - pr_err("%s: Internal clock never stabilised.\n", - mmc_hostname(host->mmc)); - break; - } - udelay(10); - } -} - static void esdhc_reset(struct sdhci_host *host, u8 mask) { sdhci_reset(host, mask); From 44e47693e3a680114332420a92281861bba5a9a4 Mon Sep 17 00:00:00 2001 From: "yinbo.zhu" Date: Wed, 8 Nov 2017 17:09:50 +0800 Subject: [PATCH 606/770] mmc: sdhci-of-esdhc: fix eMMC couldn't work after kexec commit 97618aca1440b5addc5c3d78659d3e176be23b80 upstream. The bit eSDHC_TBCTL[TB_EN] couldn't be reset by eSDHC_SYSCTL[RSTA] which is used to reset for all. The driver should make sure it's cleared before card initialization, otherwise the initialization would fail. Signed-off-by: yinbo.zhu Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Cc: Rasmus Villemoes Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-esdhc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index 1f424374bbbb..d74030f3bd12 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -785,6 +785,10 @@ static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host) pltfm_host = sdhci_priv(host); esdhc = sdhci_pltfm_priv(pltfm_host); + val = sdhci_readl(host, ESDHC_TBCTL); + val &= ~ESDHC_TB_EN; + sdhci_writel(host, val, ESDHC_TBCTL); + host_ver = sdhci_readw(host, SDHCI_HOST_VERSION); esdhc->vendor_ver = (host_ver & SDHCI_VENDOR_VER_MASK) >> SDHCI_VENDOR_VER_SHIFT; From 9a2b3777bd5335e34ec585ecc8e29d575dfc8c13 Mon Sep 17 00:00:00 2001 From: "yinbo.zhu" Date: Fri, 1 Dec 2017 15:09:34 +0800 Subject: [PATCH 607/770] mmc: sdhci-of-esdhc: fix the mmc error after sleep on ls1046ardb commit f2bc600008bd6f7f5d0b6b56238d14f95cd454d2 upstream. When system wakes up from sleep on ls1046ardb, the SD operation fails with mmc error messages since ESDHC_TB_EN bit couldn't be cleaned by eSDHC_SYSCTL[RSTA]. It's proper to clean this bit in esdhc_reset() rather than in probe. Signed-off-by: yinbo.zhu Acked-by: Yangbo Lu Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Cc: Rasmus Villemoes Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-esdhc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index d74030f3bd12..4ffa6b173a21 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -589,10 +589,18 @@ static void esdhc_pltfm_set_bus_width(struct sdhci_host *host, int width) static void esdhc_reset(struct sdhci_host *host, u8 mask) { + u32 val; + sdhci_reset(host, mask); sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); + + if (mask & SDHCI_RESET_ALL) { + val = sdhci_readl(host, ESDHC_TBCTL); + val &= ~ESDHC_TB_EN; + sdhci_writel(host, val, ESDHC_TBCTL); + } } /* The SCFG, Supplemental Configuration Unit, provides SoC specific @@ -785,10 +793,6 @@ static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host) pltfm_host = sdhci_priv(host); esdhc = sdhci_pltfm_priv(pltfm_host); - val = sdhci_readl(host, ESDHC_TBCTL); - val &= ~ESDHC_TB_EN; - sdhci_writel(host, val, ESDHC_TBCTL); - host_ver = sdhci_readw(host, SDHCI_HOST_VERSION); esdhc->vendor_ver = (host_ver & SDHCI_VENDOR_VER_MASK) >> SDHCI_VENDOR_VER_SHIFT; From f1a81c0eab38a1cf8887cd10731c77ad547cc17c Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 5 Jan 2018 14:55:33 -0600 Subject: [PATCH 608/770] ASoC: acpi: fix machine driver selection based on quirk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5c256045b87b8aa8e5bc9d2e2fdc0802351c1f99 upstream. The ACPI/machine-driver code refactoring introduced in 4.13 introduced a regression for cases where we need a DMI-based quirk to select the machine driver (the BIOS reports an invalid HID). The fix is just to make sure the results of the quirk are actually used. Fixes: 54746dabf770 ('ASoC: Improve machine driver selection based on quirk data') Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=96691 Tested-by: Nicole Færber Signed-off-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/common/sst-match-acpi.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sound/soc/intel/common/sst-match-acpi.c b/sound/soc/intel/common/sst-match-acpi.c index 56d26f36a3cb..b4a929562218 100644 --- a/sound/soc/intel/common/sst-match-acpi.c +++ b/sound/soc/intel/common/sst-match-acpi.c @@ -83,11 +83,9 @@ struct sst_acpi_mach *sst_acpi_find_machine(struct sst_acpi_mach *machines) for (mach = machines; mach->id[0]; mach++) { if (sst_acpi_check_hid(mach->id) == true) { - if (mach->machine_quirk == NULL) - return mach; - - if (mach->machine_quirk(mach) != NULL) - return mach; + if (mach->machine_quirk) + mach = mach->machine_quirk(mach); + return mach; } } return NULL; From e506ac1dab86c88b03f62404055a35262dda310d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 14 Jan 2018 18:35:40 +0200 Subject: [PATCH 609/770] ovl: hash directory inodes for fsnotify commit 31747eda41ef3c30c09c5c096b380bf54013746a upstream. fsnotify pins a watched directory inode in cache, but if directory dentry is released, new lookup will allocate a new dentry and a new inode. Directory events will be notified on the new inode, while fsnotify listener is watching the old pinned inode. Hash all directory inodes to reuse the pinned inode on lookup. Pure upper dirs are hashes by real upper inode, merge and lower dirs are hashed by real lower inode. The reference to lower inode was being held by the lower dentry object in the overlay dentry (oe->lowerstack[0]). Releasing the overlay dentry may drop lower inode refcount to zero. Add a refcount on behalf of the overlay inode to prevent that. As a by-product, hashing directory inodes also detects multiple redirected dirs to the same lower dir and uncovered redirected dir target on and returns -ESTALE on lookup. The reported issue dates back to initial version of overlayfs, but this patch depends on ovl_inode code that was introduced in kernel v4.13. Cc: #v4.13 Reported-by: Niklas Cassel Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Tested-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/inode.c | 37 +++++++++++++++++++++++++++---------- fs/overlayfs/super.c | 1 + fs/overlayfs/util.c | 4 ++-- 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 321511ed8c42..d60900b615f9 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -579,6 +579,16 @@ static int ovl_inode_set(struct inode *inode, void *data) static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, struct dentry *upperdentry) { + if (S_ISDIR(inode->i_mode)) { + /* Real lower dir moved to upper layer under us? */ + if (!lowerdentry && ovl_inode_lower(inode)) + return false; + + /* Lookup of an uncovered redirect origin? */ + if (!upperdentry && ovl_inode_upper(inode)) + return false; + } + /* * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. * This happens when finding a copied up overlay inode for a renamed @@ -606,6 +616,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, struct inode *inode; /* Already indexed or could be indexed on copy up? */ bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry)); + struct dentry *origin = indexed ? lowerdentry : NULL; + bool is_dir; if (WARN_ON(upperdentry && indexed && !lowerdentry)) return ERR_PTR(-EIO); @@ -614,15 +626,19 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, realinode = d_inode(lowerdentry); /* - * Copy up origin (lower) may exist for non-indexed upper, but we must - * not use lower as hash key in that case. - * Hash inodes that are or could be indexed by origin inode and - * non-indexed upper inodes that could be hard linked by upper inode. + * Copy up origin (lower) may exist for non-indexed non-dir upper, but + * we must not use lower as hash key in that case. + * Hash non-dir that is or could be indexed by origin inode. + * Hash dir that is or could be merged by origin inode. + * Hash pure upper and non-indexed non-dir by upper inode. */ - if (!S_ISDIR(realinode->i_mode) && (upperdentry || indexed)) { - struct inode *key = d_inode(indexed ? lowerdentry : - upperdentry); - unsigned int nlink; + is_dir = S_ISDIR(realinode->i_mode); + if (is_dir) + origin = lowerdentry; + + if (upperdentry || origin) { + struct inode *key = d_inode(origin ?: upperdentry); + unsigned int nlink = is_dir ? 1 : realinode->i_nlink; inode = iget5_locked(dentry->d_sb, (unsigned long) key, ovl_inode_test, ovl_inode_set, key); @@ -643,8 +659,9 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, goto out; } - nlink = ovl_get_nlink(lowerdentry, upperdentry, - realinode->i_nlink); + /* Recalculate nlink for non-dir due to indexing */ + if (!is_dir) + nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); set_nlink(inode, nlink); } else { inode = new_inode(dentry->d_sb); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index f5738e96a052..b8f8d666e8d4 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -200,6 +200,7 @@ static void ovl_destroy_inode(struct inode *inode) struct ovl_inode *oi = OVL_I(inode); dput(oi->__upperdentry); + iput(oi->lower); kfree(oi->redirect); ovl_dir_cache_free(inode); mutex_destroy(&oi->lock); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index b9b239fa5cfd..f60ce2e04df0 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -253,7 +253,7 @@ void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, if (upperdentry) OVL_I(inode)->__upperdentry = upperdentry; if (lowerdentry) - OVL_I(inode)->lower = d_inode(lowerdentry); + OVL_I(inode)->lower = igrab(d_inode(lowerdentry)); ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode); } @@ -269,7 +269,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) */ smp_wmb(); OVL_I(inode)->__upperdentry = upperdentry; - if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) { + if (inode_unhashed(inode)) { inode->i_private = upperinode; __insert_inode_hash(inode, (unsigned long) upperinode); } From 58056a531e440061142a4481358ebb365193df5e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 22 Feb 2018 15:42:33 +0100 Subject: [PATCH 610/770] Linux 4.14.21 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 33176140f133..68d70485b088 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 20 +SUBLEVEL = 21 EXTRAVERSION = NAME = Petit Gorille From ebf7d035c39a70756a9b68a6a670ee8a39ecc586 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 4 Oct 2017 16:15:59 +0200 Subject: [PATCH 611/770] usb: core: Add a helper function to check the validity of EP type in URB commit e901b9873876ca30a09253731bd3a6b00c44b5b0 upstream. This patch adds a new helper function to perform a sanity check of the given URB to see whether it contains a valid endpoint. It's a light- weight version of what usb_submit_urb() does, but without the kernel warning followed by the stack trace, just returns an error code. Especially for a driver that doesn't parse the descriptor but fills the URB with the fixed endpoint (e.g. some quirks for non-compliant devices), this kind of check is preferable at the probe phase before actually submitting the urb. Tested-by: Andrey Konovalov Acked-by: Greg Kroah-Hartman Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/urb.c | 30 ++++++++++++++++++++++++++---- include/linux/usb.h | 2 ++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index 47903d510955..8b800e34407b 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -187,6 +187,31 @@ EXPORT_SYMBOL_GPL(usb_unanchor_urb); /*-------------------------------------------------------------------*/ +static const int pipetypes[4] = { + PIPE_CONTROL, PIPE_ISOCHRONOUS, PIPE_BULK, PIPE_INTERRUPT +}; + +/** + * usb_urb_ep_type_check - sanity check of endpoint in the given urb + * @urb: urb to be checked + * + * This performs a light-weight sanity check for the endpoint in the + * given urb. It returns 0 if the urb contains a valid endpoint, otherwise + * a negative error code. + */ +int usb_urb_ep_type_check(const struct urb *urb) +{ + const struct usb_host_endpoint *ep; + + ep = usb_pipe_endpoint(urb->dev, urb->pipe); + if (!ep) + return -EINVAL; + if (usb_pipetype(urb->pipe) != pipetypes[usb_endpoint_type(&ep->desc)]) + return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(usb_urb_ep_type_check); + /** * usb_submit_urb - issue an asynchronous transfer request for an endpoint * @urb: pointer to the urb describing the request @@ -326,9 +351,6 @@ EXPORT_SYMBOL_GPL(usb_unanchor_urb); */ int usb_submit_urb(struct urb *urb, gfp_t mem_flags) { - static int pipetypes[4] = { - PIPE_CONTROL, PIPE_ISOCHRONOUS, PIPE_BULK, PIPE_INTERRUPT - }; int xfertype, max; struct usb_device *dev; struct usb_host_endpoint *ep; @@ -444,7 +466,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags) */ /* Check that the pipe's type matches the endpoint's type */ - if (usb_pipetype(urb->pipe) != pipetypes[xfertype]) + if (usb_urb_ep_type_check(urb)) dev_WARN(&dev->dev, "BOGUS urb xfer, pipe %x != type %x\n", usb_pipetype(urb->pipe), pipetypes[xfertype]); diff --git a/include/linux/usb.h b/include/linux/usb.h index 9c63792a8134..4192a1755ccb 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1729,6 +1729,8 @@ static inline int usb_urb_dir_out(struct urb *urb) return (urb->transfer_flags & URB_DIR_MASK) == URB_DIR_OUT; } +int usb_urb_ep_type_check(const struct urb *urb); + void *usb_alloc_coherent(struct usb_device *dev, size_t size, gfp_t mem_flags, dma_addr_t *dma); void usb_free_coherent(struct usb_device *dev, size_t size, From ed25667f33032b7715bc4af67e6728020fe75f5e Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 23 Jan 2018 17:27:25 +0800 Subject: [PATCH 612/770] vhost: use mutex_lock_nested() in vhost_dev_lock_vqs() commit e9cb4239134c860e5f92c75bf5321bd377bb505b upstream. We used to call mutex_lock() in vhost_dev_lock_vqs() which tries to hold mutexes of all virtqueues. This may confuse lockdep to report a possible deadlock because of trying to hold locks belong to same class. Switch to use mutex_lock_nested() to avoid false positive. Fixes: 6b1e6cc7855b0 ("vhost: new device IOTLB API") Reported-by: syzbot+dbb7c1161485e61b0241@syzkaller.appspotmail.com Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index d6dbb28245e6..a827c1a684a9 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -904,7 +904,7 @@ static void vhost_dev_lock_vqs(struct vhost_dev *d) { int i = 0; for (i = 0; i < d->nvqs; ++i) - mutex_lock(&d->vqs[i]->mutex); + mutex_lock_nested(&d->vqs[i]->mutex, i); } static void vhost_dev_unlock_vqs(struct vhost_dev *d) From 44890e9ff771ef11777b2d1ebf8589255eb12502 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 24 Jan 2018 12:35:41 -0800 Subject: [PATCH 613/770] kcm: Check if sk_user_data already set in kcm_attach commit e5571240236c5652f3e079b1d5866716a7ad819c upstream. This is needed to prevent sk_user_data being overwritten. The check is done under the callback lock. This should prevent a socket from being attached twice to a KCM mux. It also prevents a socket from being attached for other use cases of sk_user_data as long as the other cases set sk_user_data under the lock. Followup work is needed to unify all the use cases of sk_user_data to use the same locking. Reported-by: syzbot+114b15f2be420a8886c3@syzkaller.appspotmail.com Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Signed-off-by: Tom Herbert Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/kcm/kcmsock.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index c5fa634e63ca..51a5e39693a7 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1405,9 +1405,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock, return err; } - sock_hold(csk); - write_lock_bh(&csk->sk_callback_lock); + + /* Check if sk_user_data is aready by KCM or someone else. + * Must be done under lock to prevent race conditions. + */ + if (csk->sk_user_data) { + write_unlock_bh(&csk->sk_callback_lock); + strp_done(&psock->strp); + kmem_cache_free(kcm_psockp, psock); + return -EALREADY; + } + psock->save_data_ready = csk->sk_data_ready; psock->save_write_space = csk->sk_write_space; psock->save_state_change = csk->sk_state_change; @@ -1415,8 +1424,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock, csk->sk_data_ready = psock_data_ready; csk->sk_write_space = psock_write_space; csk->sk_state_change = psock_state_change; + write_unlock_bh(&csk->sk_callback_lock); + sock_hold(csk); + /* Finished initialization, now add the psock to the MUX. */ spin_lock_bh(&mux->lock); head = &mux->psocks; From 6c0b71202f83f7851f173caee9059f830e3b0dfd Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 24 Jan 2018 12:35:40 -0800 Subject: [PATCH 614/770] kcm: Only allow TCP sockets to be attached to a KCM mux commit 581e7226a5d43f629eb6399a121f85f6a15f81be upstream. TCP sockets for IPv4 and IPv6 that are not listeners or in closed stated are allowed to be attached to a KCM mux. Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+8865eaff7f9acd593945@syzkaller.appspotmail.com Signed-off-by: Tom Herbert Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/kcm/kcmsock.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 51a5e39693a7..58d53b907d53 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1387,8 +1387,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock, if (!csk) return -EINVAL; - /* We must prevent loops or risk deadlock ! */ - if (csk->sk_family == PF_KCM) + /* Only allow TCP sockets to be attached for now */ + if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) || + csk->sk_protocol != IPPROTO_TCP) + return -EOPNOTSUPP; + + /* Don't allow listeners or closed sockets */ + if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) return -EOPNOTSUPP; psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); From ddf0936b9eefe0af6d046cd7d6a9212478812c9a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 18 Jan 2018 01:15:21 +0100 Subject: [PATCH 615/770] bpf: mark dst unknown on inconsistent {s, u}bounds adjustments commit 6f16101e6a8b4324c36e58a29d9e0dbb287cdedb upstream. syzkaller generated a BPF proglet and triggered a warning with the following: 0: (b7) r0 = 0 1: (d5) if r0 s<= 0x0 goto pc+0 R0=inv0 R1=ctx(id=0,off=0,imm=0) R10=fp0 2: (1f) r0 -= r1 R0=inv0 R1=ctx(id=0,off=0,imm=0) R10=fp0 verifier internal error: known but bad sbounds What happens is that in the first insn, r0's min/max value are both 0 due to the immediate assignment, later in the jsle test the bounds are updated for the min value in the false path, meaning, they yield smin_val = 1, smax_val = 0, and when ctx pointer is subtracted from r0, verifier bails out with the internal error and throwing a WARN since smin_val != smax_val for the known constant. For min_val > max_val scenario it means that reg_set_min_max() and reg_set_min_max_inv() (which both refine existing bounds) demonstrated that such branch cannot be taken at runtime. In above scenario for the case where it will be taken, the existing [0, 0] bounds are kept intact. Meaning, the rejection is not due to a verifier internal error, and therefore the WARN() is not necessary either. We could just reject such cases in adjust_{ptr,scalar}_min_max_vals() when either known scalars have smin_val != smax_val or umin_val != umax_val or any scalar reg with bounds smin_val > smax_val or umin_val > umax_val. However, there may be a small risk of breakage of buggy programs, so handle this more gracefully and in adjust_{ptr,scalar}_min_max_vals() just taint the dst reg as unknown scalar when we see ops with such kind of src reg. Reported-by: syzbot+6d362cadd45dc0a12ba4@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 25 ++-- tools/testing/selftests/bpf/test_verifier.c | 123 +++++++++++++++++++- 2 files changed, 138 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f9339c3219bc..b5ae6488b890 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1865,15 +1865,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst_reg = ®s[dst]; - if (WARN_ON_ONCE(known && (smin_val != smax_val))) { - print_verifier_state(&env->cur_state); - verbose("verifier internal error: known but bad sbounds\n"); - return -EINVAL; - } - if (WARN_ON_ONCE(known && (umin_val != umax_val))) { - print_verifier_state(&env->cur_state); - verbose("verifier internal error: known but bad ubounds\n"); - return -EINVAL; + if ((known && (smin_val != smax_val || umin_val != umax_val)) || + smin_val > smax_val || umin_val > umax_val) { + /* Taint dst register if offset had invalid bounds derived from + * e.g. dead branches. + */ + __mark_reg_unknown(dst_reg); + return 0; } if (BPF_CLASS(insn->code) != BPF_ALU64) { @@ -2075,6 +2073,15 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, src_known = tnum_is_const(src_reg.var_off); dst_known = tnum_is_const(dst_reg->var_off); + if ((src_known && (smin_val != smax_val || umin_val != umax_val)) || + smin_val > smax_val || umin_val > umax_val) { + /* Taint dst register if offset had invalid bounds derived from + * e.g. dead branches. + */ + __mark_reg_unknown(dst_reg); + return 0; + } + if (!src_known && opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { __mark_reg_unknown(dst_reg); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 16299939d3ff..c55d265489ca 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6534,7 +6534,7 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JA, 0, 0, -7), }, .fixup_map1 = { 4 }, - .errstr = "unbounded min value", + .errstr = "R0 invalid mem access 'inv'", .result = REJECT, }, { @@ -7714,6 +7714,127 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, + { + "check deducing bounds from const, 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 1, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "check deducing bounds from const, 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "check deducing bounds from const, 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 6", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, ~0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "dereference of modified ctx ptr", + }, + { + "check deducing bounds from const, 8", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, ~0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "dereference of modified ctx ptr", + }, + { + "check deducing bounds from const, 9", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 10", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0), + /* Marks reg as unknown. */ + BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "math between ctx pointer and register with unbounded min value is not allowed", + }, { "XDP pkt read, pkt_end <= pkt_data', bad access 2", .insns = { From b9e441e2e6346284120c9e8e24c4050c245b8895 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Jan 2018 09:58:27 +0100 Subject: [PATCH 616/770] cfg80211: check dev_set_name() return value commit 59b179b48ce2a6076448a44531242ac2b3f6cef2 upstream. syzbot reported a warning from rfkill_alloc(), and after a while I think that the reason is that it was doing fault injection and the dev_set_name() failed, leaving the name NULL, and we didn't check the return value and got to rfkill_alloc() with a NULL name. Since we really don't want a NULL name, we ought to check the return value. Fixes: fb28ad35906a ("net: struct device - replace bus_id with dev_name(), dev_set_name()") Reported-by: syzbot+1ddfb3357e1d7bb5b5d3@syzkaller.appspotmail.com Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 7b33e8c366bc..33ce0484b2a0 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -439,6 +439,8 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, if (rv) goto use_default_name; } else { + int rv; + use_default_name: /* NOTE: This is *probably* safe w/out holding rtnl because of * the restrictions on phy names. Probably this call could @@ -446,7 +448,11 @@ use_default_name: * phyX. But, might should add some locking and check return * value, and use a different name if this one exists? */ - dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); + rv = dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); + if (rv < 0) { + kfree(rdev); + return NULL; + } } INIT_LIST_HEAD(&rdev->wiphy.wdev_list); From 447f1170c23bea791c3b5efdc0264519fd22f565 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Jan 2018 09:32:36 +0100 Subject: [PATCH 617/770] mac80211_hwsim: validate number of different channels commit 51a1aaa631c90223888d8beac4d649dc11d2ca55 upstream. When creating a new radio on the fly, hwsim allows this to be done with an arbitrary number of channels, but cfg80211 only supports a limited number of simultaneous channels, leading to a warning. Fix this by validating the number - this requires moving the define for the maximum out to a visible header file. Reported-by: syzbot+8dd9051ff19940290931@syzkaller.appspotmail.com Fixes: b59ec8dd4394 ("mac80211_hwsim: fix number of channels in interface combinations") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mac80211_hwsim.c | 5 +++++ include/net/cfg80211.h | 2 ++ net/wireless/core.h | 2 -- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index a59b54328c07..052e67bce6b3 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3119,6 +3119,11 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) if (info->attrs[HWSIM_ATTR_CHANNELS]) param.channels = nla_get_u32(info->attrs[HWSIM_ATTR_CHANNELS]); + if (param.channels > CFG80211_MAX_NUM_DIFFERENT_CHANNELS) { + GENL_SET_ERR_MSG(info, "too many channels specified"); + return -EINVAL; + } + if (info->attrs[HWSIM_ATTR_NO_VIF]) param.no_vif = true; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f12fa5245a45..ea0ed58db97e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -815,6 +815,8 @@ struct cfg80211_csa_settings { u8 count; }; +#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 + /** * struct iface_combination_params - input parameters for interface combinations * diff --git a/net/wireless/core.h b/net/wireless/core.h index 705835047f98..90f90c7d8bf9 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -502,8 +502,6 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); -#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 - #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS #define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond) #else From 201941360944afbde6ab6f5ff70f96f16d334971 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 5 Jan 2018 08:35:47 +0100 Subject: [PATCH 618/770] esp: Fix GRO when the headers not fully in the linear part of the skb. commit 374d1b5a81f7f9cc5e7f095ac3d5aff3f6600376 upstream. The GRO layer does not necessarily pull the complete headers into the linear part of the skb, a part may remain on the first page fragment. This can lead to a crash if we try to pull the headers, so make sure we have them on the linear part before pulling. Fixes: 7785bba299a8 ("esp: Add a software GRO codepath") Reported-by: syzbot+82bbd65569c49c6c0c4d@syzkaller.appspotmail.com Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/ipv4/esp4_offload.c | 3 ++- net/ipv6/esp6_offload.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 56c49623bb9d..29b333a62ab0 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -38,7 +38,8 @@ static struct sk_buff **esp4_gro_receive(struct sk_buff **head, __be32 spi; int err; - skb_pull(skb, offset); + if (!pskb_pull(skb, offset)) + return NULL; if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) goto out; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 1ea9d794447e..f52c314d4c97 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -60,7 +60,8 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head, int nhoff; int err; - skb_pull(skb, offset); + if (!pskb_pull(skb, offset)) + return NULL; if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) goto out; From 85c31887a20da06fe2803b4ec1031b9af89c1899 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 6 Jan 2018 01:13:08 +0100 Subject: [PATCH 619/770] xfrm: don't call xfrm_policy_cache_flush while holding spinlock commit b1bdcb59b64f806ef08d25a85c39ffb3ad841ce6 upstream. xfrm_policy_cache_flush can sleep, so it cannot be called while holding a spinlock. We could release the lock first, but I don't see why we need to invoke this function here in first place, the packet path won't reuse an xdst entry unless its still valid. While at it, add an annotation to xfrm_policy_cache_flush, it would have probably caught this bug sooner. Fixes: ec30d78c14a813 ("xfrm: add xdst pcpu cache") Reported-by: syzbot+e149f7d1328c26f9c12f@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 688ed34f0671..27b914000221 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -975,8 +975,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) } if (!cnt) err = -ESRCH; - else - xfrm_policy_cache_flush(); out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; @@ -1738,6 +1736,8 @@ void xfrm_policy_cache_flush(void) bool found = 0; int cpu; + might_sleep(); + local_bh_disable(); rcu_read_lock(); for_each_possible_cpu(cpu) { From e8370258ca0bc9e630043157db862be2c5c341a6 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Sun, 31 Dec 2017 16:18:56 +0100 Subject: [PATCH 620/770] xfrm: fix rcu usage in xfrm_get_type_offload commit 2f10a61cee8fdb9f8da90f5db687e1862b22cf06 upstream. request_module can sleep, thus we cannot hold rcu_read_lock() while calling it. The function also jumps back and takes rcu_read_lock() again (in xfrm_state_get_afinfo()), resulting in an imbalance. This codepath is triggered whenever a new offloaded state is created. Fixes: ffdb5211da1c ("xfrm: Auto-load xfrm offload modules") Reported-by: syzbot+ca425f44816d749e8eb49755567a75ee48cf4a30@syzkaller.appspotmail.com Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1f5cee2269af..58be0e7f4c7d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -313,13 +313,14 @@ retry: if ((type && !try_module_get(type->owner))) type = NULL; + rcu_read_unlock(); + if (!type && try_load) { request_module("xfrm-offload-%d-%d", family, proto); try_load = 0; goto retry; } - rcu_read_unlock(); return type; } From 8e754b4ec589f14ef6efa454ee872580ba1802fa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Dec 2017 23:25:45 +0100 Subject: [PATCH 621/770] xfrm: skip policies marked as dead while rehashing commit 862591bf4f519d1b8d859af720fafeaebdd0162a upstream. syzkaller triggered following KASAN splat: BUG: KASAN: slab-out-of-bounds in xfrm_hash_rebuild+0xdbe/0xf00 net/xfrm/xfrm_policy.c:618 read of size 2 at addr ffff8801c8e92fe4 by task kworker/1:1/23 [..] Workqueue: events xfrm_hash_rebuild [..] __asan_report_load2_noabort+0x14/0x20 mm/kasan/report.c:428 xfrm_hash_rebuild+0xdbe/0xf00 net/xfrm/xfrm_policy.c:618 process_one_work+0xbbf/0x1b10 kernel/workqueue.c:2112 worker_thread+0x223/0x1990 kernel/workqueue.c:2246 [..] The reproducer triggers: 1016 if (error) { 1017 list_move_tail(&walk->walk.all, &x->all); 1018 goto out; 1019 } in xfrm_policy_walk() via pfkey (it sets tiny rcv space, dump callback returns -ENOBUFS). In this case, *walk is located the pfkey socket struct, so this socket becomes visible in the global policy list. It looks like this is intentional -- phony walker has walk.dead set to 1 and all other places skip such "policies". Ccing original authors of the two commits that seem to expose this issue (first patch missed ->dead check, second patch adds pfkey sockets to policies dumper list). Fixes: 880a6fab8f6ba5b ("xfrm: configure policy hash table thresholds by netlink") Fixes: 12a169e7d8f4b1c ("ipsec: Put dumpers on the dump list") Cc: Herbert Xu Cc: Timo Teras Cc: Christophe Gouault Reported-by: syzbot Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 27b914000221..9dcfb7e080e1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -610,7 +610,8 @@ static void xfrm_hash_rebuild(struct work_struct *work) /* re-insert all policies by order of creation */ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { - if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) { + if (policy->walk.dead || + xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) { /* skip socket policies */ continue; } From 7880fc541566166d140954825fc83c826534e622 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 18 Dec 2017 20:31:41 +0900 Subject: [PATCH 622/770] mm,vmscan: Make unregister_shrinker() no-op if register_shrinker() failed. commit bb422a738f6566f7439cd347d54e321e4fe92a9f upstream. Syzbot caught an oops at unregister_shrinker() because combination of commit 1d3d4437eae1bb29 ("vmscan: per-node deferred work") and fault injection made register_shrinker() fail and the caller of register_shrinker() did not check for failure. ---------- [ 554.881422] FAULT_INJECTION: forcing a failure. [ 554.881422] name failslab, interval 1, probability 0, space 0, times 0 [ 554.881438] CPU: 1 PID: 13231 Comm: syz-executor1 Not tainted 4.14.0-rc8+ #82 [ 554.881443] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ 554.881445] Call Trace: [ 554.881459] dump_stack+0x194/0x257 [ 554.881474] ? arch_local_irq_restore+0x53/0x53 [ 554.881486] ? find_held_lock+0x35/0x1d0 [ 554.881507] should_fail+0x8c0/0xa40 [ 554.881522] ? fault_create_debugfs_attr+0x1f0/0x1f0 [ 554.881537] ? check_noncircular+0x20/0x20 [ 554.881546] ? find_next_zero_bit+0x2c/0x40 [ 554.881560] ? ida_get_new_above+0x421/0x9d0 [ 554.881577] ? find_held_lock+0x35/0x1d0 [ 554.881594] ? __lock_is_held+0xb6/0x140 [ 554.881628] ? check_same_owner+0x320/0x320 [ 554.881634] ? lock_downgrade+0x990/0x990 [ 554.881649] ? find_held_lock+0x35/0x1d0 [ 554.881672] should_failslab+0xec/0x120 [ 554.881684] __kmalloc+0x63/0x760 [ 554.881692] ? lock_downgrade+0x990/0x990 [ 554.881712] ? register_shrinker+0x10e/0x2d0 [ 554.881721] ? trace_event_raw_event_module_request+0x320/0x320 [ 554.881737] register_shrinker+0x10e/0x2d0 [ 554.881747] ? prepare_kswapd_sleep+0x1f0/0x1f0 [ 554.881755] ? _down_write_nest_lock+0x120/0x120 [ 554.881765] ? memcpy+0x45/0x50 [ 554.881785] sget_userns+0xbcd/0xe20 (...snipped...) [ 554.898693] kasan: CONFIG_KASAN_INLINE enabled [ 554.898724] kasan: GPF could be caused by NULL-ptr deref or user memory access [ 554.898732] general protection fault: 0000 [#1] SMP KASAN [ 554.898737] Dumping ftrace buffer: [ 554.898741] (ftrace buffer empty) [ 554.898743] Modules linked in: [ 554.898752] CPU: 1 PID: 13231 Comm: syz-executor1 Not tainted 4.14.0-rc8+ #82 [ 554.898755] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ 554.898760] task: ffff8801d1dbe5c0 task.stack: ffff8801c9e38000 [ 554.898772] RIP: 0010:__list_del_entry_valid+0x7e/0x150 [ 554.898775] RSP: 0018:ffff8801c9e3f108 EFLAGS: 00010246 [ 554.898780] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 554.898784] RDX: 0000000000000000 RSI: ffff8801c53c6f98 RDI: ffff8801c53c6fa0 [ 554.898788] RBP: ffff8801c9e3f120 R08: 1ffff100393c7d55 R09: 0000000000000004 [ 554.898791] R10: ffff8801c9e3ef70 R11: 0000000000000000 R12: 0000000000000000 [ 554.898795] R13: dffffc0000000000 R14: 1ffff100393c7e45 R15: ffff8801c53c6f98 [ 554.898800] FS: 0000000000000000(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 [ 554.898804] CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 [ 554.898807] CR2: 00000000dbc23000 CR3: 00000001c7269000 CR4: 00000000001406e0 [ 554.898813] DR0: 0000000020000000 DR1: 0000000020000000 DR2: 0000000000000000 [ 554.898816] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600 [ 554.898818] Call Trace: [ 554.898828] unregister_shrinker+0x79/0x300 [ 554.898837] ? perf_trace_mm_vmscan_writepage+0x750/0x750 [ 554.898844] ? down_write+0x87/0x120 [ 554.898851] ? deactivate_super+0x139/0x1b0 [ 554.898857] ? down_read+0x150/0x150 [ 554.898864] ? check_same_owner+0x320/0x320 [ 554.898875] deactivate_locked_super+0x64/0xd0 [ 554.898883] deactivate_super+0x141/0x1b0 ---------- Since allowing register_shrinker() callers to call unregister_shrinker() when register_shrinker() failed can simplify error recovery path, this patch makes unregister_shrinker() no-op when register_shrinker() failed. Also, reset shrinker->nr_deferred in case unregister_shrinker() was by error called twice. Signed-off-by: Tetsuo Handa Signed-off-by: Aliaksei Karaliou Reported-by: syzbot Cc: Glauber Costa Cc: Al Viro Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/vmscan.c b/mm/vmscan.c index eb2f0315b8c0..441f346fb5e2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -297,10 +297,13 @@ EXPORT_SYMBOL(register_shrinker); */ void unregister_shrinker(struct shrinker *shrinker) { + if (!shrinker->nr_deferred) + return; down_write(&shrinker_rwsem); list_del(&shrinker->list); up_write(&shrinker_rwsem); kfree(shrinker->nr_deferred); + shrinker->nr_deferred = NULL; } EXPORT_SYMBOL(unregister_shrinker); From 846666fad8d7de79b47a4147e39330fe96e1bdc3 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Thu, 14 Dec 2017 03:01:52 -0500 Subject: [PATCH 623/770] KVM/x86: Check input paging mode when cs.l is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f29810335965ac1f7bcb501ee2af5f039f792416 upstream. Reported by syzkaller: WARNING: CPU: 0 PID: 27962 at arch/x86/kvm/emulate.c:5631 x86_emulate_insn+0x557/0x15f0 [kvm] Modules linked in: kvm_intel kvm [last unloaded: kvm] CPU: 0 PID: 27962 Comm: syz-executor Tainted: G B W 4.15.0-rc2-next-20171208+ #32 Hardware name: Intel Corporation S1200SP/S1200SP, BIOS S1200SP.86B.01.03.0006.040720161253 04/07/2016 RIP: 0010:x86_emulate_insn+0x557/0x15f0 [kvm] RSP: 0018:ffff8807234476d0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff88072d0237a0 RCX: ffffffffa0065c4d RDX: 1ffff100e5a046f9 RSI: 0000000000000003 RDI: ffff88072d0237c8 RBP: ffff880723447728 R08: ffff88072d020000 R09: ffffffffa008d240 R10: 0000000000000002 R11: ffffed00e7d87db3 R12: ffff88072d0237c8 R13: ffff88072d023870 R14: ffff88072d0238c2 R15: ffffffffa008d080 FS: 00007f8a68666700(0000) GS:ffff880802200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000002009506c CR3: 000000071fec4005 CR4: 00000000003626f0 Call Trace: x86_emulate_instruction+0x3bc/0xb70 [kvm] ? reexecute_instruction.part.162+0x130/0x130 [kvm] vmx_handle_exit+0x46d/0x14f0 [kvm_intel] ? trace_event_raw_event_kvm_entry+0xe7/0x150 [kvm] ? handle_vmfunc+0x2f0/0x2f0 [kvm_intel] ? wait_lapic_expire+0x25/0x270 [kvm] vcpu_enter_guest+0x720/0x1ef0 [kvm] ... When CS.L is set, vcpu should run in the 64 bit paging mode. Current kvm set_sregs function doesn't have such check when userspace inputs sreg values. This will lead unexpected behavior. This patch is to add checks for CS.L, EFER.LME, EFER.LMA and CR4.PAE when get SREG inputs from userspace in order to avoid unexpected behavior. Suggested-by: Paolo Bonzini Reported-by: Dmitry Vyukov Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Cc: Jim Mattson Signed-off-by: Tianyu Lan Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f97358423f9c..8b7ede04e8a3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7474,6 +7474,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, } EXPORT_SYMBOL_GPL(kvm_task_switch); +int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) { + /* + * When EFER.LME and CR0.PG are set, the processor is in + * 64-bit mode (though maybe in a 32-bit code segment). + * CR4.PAE and EFER.LMA must be set. + */ + if (!(sregs->cr4 & X86_CR4_PAE_BIT) + || !(sregs->efer & EFER_LMA)) + return -EINVAL; + } else { + /* + * Not in 64-bit mode: EFER.LMA is clear and the code + * segment cannot be 64-bit. + */ + if (sregs->efer & EFER_LMA || sregs->cs.l) + return -EINVAL; + } + + return 0; +} + int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { @@ -7486,6 +7509,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, (sregs->cr4 & X86_CR4_OSXSAVE)) return -EINVAL; + if (kvm_valid_sregs(vcpu, sregs)) + return -EINVAL; + apic_base_msr.data = sregs->apic_base; apic_base_msr.host_initiated = true; if (kvm_set_apic_base(vcpu, &apic_base_msr)) From d6b36a2616ab94040b24ce622bd6befb1a28e4aa Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 5 Dec 2017 22:30:04 +0200 Subject: [PATCH 624/770] RDMA/netlink: Fix general protection fault commit d0e312fe3d34c1bc014a7f8ec6540d05e8077483 upstream. The RDMA netlink core code checks validity of messages by ensuring that type and operand are in range. It works well for almost all clients except NLDEV, which has cb_table less than number of operands. Request to access such operand will trigger the following kernel panic. This patch updates all places where cb_table is declared for the consistency, but only NLDEV is actually need it. general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN Modules linked in: CPU: 0 PID: 522 Comm: syz-executor6 Not tainted 4.13.0+ #4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 task: ffff8800657799c0 task.stack: ffff8800695d000 RIP: 0010:rdma_nl_rcv_msg+0x13a/0x4c0 RSP: 0018:ffff8800695d7838 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: 1ffff1000d2baf0b RCX: 00000000704ff4d7 RDX: 0000000000000000 RSI: ffffffff81ddb03c RDI: 00000003827fa6bc RBP: ffff8800695d7900 R08: ffffffff82ec0578 R09: 0000000000000000 R10: ffff8800695d7900 R11: 0000000000000001 R12: 000000000000001c R13: ffff880069d31e00 R14: 00000000ffffffff R15: ffff880069d357c0 FS: 00007fee6acb8700(0000) GS:ffff88006ca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000201a9000 CR3: 0000000059766000 CR4: 00000000000006b0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? rdma_nl_multicast+0x80/0x80 rdma_nl_rcv+0x36b/0x4d0 ? ibnl_put_attr+0xc0/0xc0 netlink_unicast+0x4bd/0x6d0 ? netlink_sendskb+0x50/0x50 ? drop_futex_key_refs.isra.4+0x68/0xb0 netlink_sendmsg+0x9ab/0xbd0 ? nlmsg_notify+0x140/0x140 ? wake_up_q+0xa1/0xf0 ? drop_futex_key_refs.isra.4+0x68/0xb0 sock_sendmsg+0x88/0xd0 sock_write_iter+0x228/0x3c0 ? sock_sendmsg+0xd0/0xd0 ? do_futex+0x3e5/0xb20 ? iov_iter_init+0xaf/0x1d0 __vfs_write+0x46e/0x640 ? sched_clock_cpu+0x1b/0x190 ? __vfs_read+0x620/0x620 ? __fget+0x23a/0x390 ? rw_verify_area+0xca/0x290 vfs_write+0x192/0x490 SyS_write+0xde/0x1c0 ? SyS_read+0x1c0/0x1c0 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL_64_fastpath+0x18/0xad RIP: 0033:0x7fee6a74a219 RSP: 002b:00007fee6acb7d58 EFLAGS: 00000212 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000638000 RCX: 00007fee6a74a219 RDX: 0000000000000078 RSI: 0000000020141000 RDI: 0000000000000006 RBP: 0000000000000046 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000212 R12: ffff8800695d7f98 R13: 0000000020141000 R14: 0000000000000006 R15: 00000000ffffffff Code: d6 48 b8 00 00 00 00 00 fc ff df 66 41 81 e4 ff 03 44 8d 72 ff 4a 8d 3c b5 c0 a6 7f 82 44 89 b5 4c ff ff ff 48 89 f9 48 c1 e9 03 <0f> b6 0c 01 48 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85 RIP: rdma_nl_rcv_msg+0x13a/0x4c0 RSP: ffff8800695d7838 ---[ end trace ba085d123959c8ec ]--- Kernel panic - not syncing: Fatal exception Cc: syzkaller Fixes: b4c598a67ea1 ("RDMA/netlink: Implement nldev device dumpit calback") Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 2 +- drivers/infiniband/core/device.c | 2 +- drivers/infiniband/core/iwcm.c | 2 +- drivers/infiniband/core/nldev.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fa79c7076ccd..c07d0451cd4a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4461,7 +4461,7 @@ out: return skb->len; } -static const struct rdma_nl_cbs cma_cb_table[] = { +static const struct rdma_nl_cbs cma_cb_table[RDMA_NL_RDMA_CM_NUM_OPS] = { [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats}, }; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index ebfdb5503701..fb01f5f13495 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1154,7 +1154,7 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, } EXPORT_SYMBOL(ib_get_net_dev_by_params); -static const struct rdma_nl_cbs ibnl_ls_cb_table[] = { +static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { [RDMA_NL_LS_OP_RESOLVE] = { .doit = ib_nl_handle_resolve_resp, .flags = RDMA_NL_ADMIN_PERM, diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index fcf42f6bb82a..30d7277249b8 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -80,7 +80,7 @@ const char *__attribute_const__ iwcm_reject_msg(int reason) } EXPORT_SYMBOL(iwcm_reject_msg); -static struct rdma_nl_cbs iwcm_nl_cb_table[] = { +static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = { [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 2fae850a3eff..9a05245a1acf 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -303,7 +303,7 @@ out: cb->args[0] = idx; return skb->len; } -static const struct rdma_nl_cbs nldev_cb_table[] = { +static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, .dump = nldev_get_dumpit, From 70c5e41f4734ee5cbafb5c49576bdc914f02c6b8 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 29 Nov 2017 06:53:55 +0100 Subject: [PATCH 625/770] xfrm: Fix stack-out-of-bounds read on socket policy lookup. commit ddc47e4404b58f03e98345398fb12d38fe291512 upstream. When we do tunnel or beet mode, we pass saddr and daddr from the template to xfrm_state_find(), this is ok. On transport mode, we pass the addresses from the flowi, assuming that the IP addresses (and address family) don't change during transformation. This assumption is wrong in the IPv4 mapped IPv6 case, packet is IPv4 and template is IPv6. Fix this by catching address family missmatches of the policy and the flow already before we do the lookup. Reported-by: syzbot Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9dcfb7e080e1..22f5da66357b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1168,9 +1168,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, again: pol = rcu_dereference(sk->sk_policy[dir]); if (pol != NULL) { - bool match = xfrm_selector_match(&pol->selector, fl, family); + bool match; int err = 0; + if (pol->family != family) { + pol = NULL; + goto out; + } + + match = xfrm_selector_match(&pol->selector, fl, family); if (match) { if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { pol = NULL; From 4d4d55665f32c20117ec1d1a96dc807f6d04fc09 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 27 Nov 2017 11:15:16 -0800 Subject: [PATCH 626/770] xfrm: check id proto in validate_tmpl() commit 6a53b7593233ab9e4f96873ebacc0f653a55c3e1 upstream. syzbot reported a kernel warning in xfrm_state_fini(), which indicates that we have entries left in the list net->xfrm.state_all whose proto is zero. And xfrm_id_proto_match() doesn't consider them as a match with IPSEC_PROTO_ANY in this case. Proto with value 0 is probably not a valid value, at least verify_newsa_info() doesn't consider it valid either. This patch fixes it by checking the proto value in validate_tmpl() and rejecting invalid ones, like what iproute2 does in xfrm_xfrmproto_getbyname(). Reported-by: syzbot Cc: Steffen Klassert Cc: Herbert Xu Signed-off-by: Cong Wang Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e44a0fed48dd..487bec4851a3 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1443,6 +1443,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) default: return -EINVAL; } + + switch (ut[i].id.proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_COMP: +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: +#endif + case IPSEC_PROTO_ANY: + break; + default: + return -EINVAL; + } + } return 0; From 2df0d6de5eff25b8fb4e858c9d77c98cae8a0bc0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 17 Nov 2017 14:11:11 +0800 Subject: [PATCH 627/770] sctp: set frag_point in sctp_setsockopt_maxseg correctly commit ecca8f88da5c4260cc2bccfefd2a24976704c366 upstream. Now in sctp_setsockopt_maxseg user_frag or frag_point can be set with val >= 8 and val <= SCTP_MAX_CHUNK_LEN. But both checks are incorrect. val >= 8 means frag_point can even be less than SCTP_DEFAULT_MINSEGMENT. Then in sctp_datamsg_from_user(), when it's value is greater than cookie echo len and trying to bundle with cookie echo chunk, the first_len will overflow. The worse case is when it's value is equal as cookie echo len, first_len becomes 0, it will go into a dead loop for fragment later on. In Hangbin syzkaller testing env, oom was even triggered due to consecutive memory allocation in that loop. Besides, SCTP_MAX_CHUNK_LEN is the max size of the whole chunk, it should deduct the data header for frag_point or user_frag check. This patch does a proper check with SCTP_DEFAULT_MINSEGMENT subtracting the sctphdr and datahdr, SCTP_MAX_CHUNK_LEN subtracting datahdr when setting frag_point via sockopt. It also improves sctp_setsockopt_maxseg codes. Suggested-by: Marcelo Ricardo Leitner Reported-by: Hangbin Liu Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sctp/sctp.h | 3 ++- net/sctp/socket.c | 29 +++++++++++++++++++---------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index d7d8cba01469..749a42882437 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -444,7 +444,8 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag); - frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); + frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN - + sizeof(struct sctp_data_chunk))); return frag; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1c08d86efe94..3c8b92667866 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3136,9 +3136,9 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign */ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_assoc_value params; struct sctp_association *asoc; - struct sctp_sock *sp = sctp_sk(sk); int val; if (optlen == sizeof(int)) { @@ -3154,26 +3154,35 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; val = params.assoc_value; - } else + } else { return -EINVAL; + } - if ((val != 0) && ((val < 8) || (val > SCTP_MAX_CHUNK_LEN))) - return -EINVAL; + if (val) { + int min_len, max_len; + + min_len = SCTP_DEFAULT_MINSEGMENT - sp->pf->af->net_header_len; + min_len -= sizeof(struct sctphdr) + + sizeof(struct sctp_data_chunk); + + max_len = SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_data_chunk); + + if (val < min_len || val > max_len) + return -EINVAL; + } asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id && sctp_style(sk, UDP)) - return -EINVAL; - if (asoc) { if (val == 0) { - val = asoc->pathmtu; - val -= sp->pf->af->net_header_len; + val = asoc->pathmtu - sp->pf->af->net_header_len; val -= sizeof(struct sctphdr) + - sizeof(struct sctp_data_chunk); + sizeof(struct sctp_data_chunk); } asoc->user_frag = val; asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); } else { + if (params.assoc_id && sctp_style(sk, UDP)) + return -EINVAL; sp->user_frag = val; } From 90f9a1ff1e8eaa15c1e33fefa44705707db4de2b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 5 Nov 2017 09:16:09 -0700 Subject: [PATCH 628/770] blktrace: fix unlocked registration of tracepoints commit a6da0024ffc19e0d47712bb5ca4fd083f76b07df upstream. We need to ensure that tracepoints are registered and unregistered with the users of them. The existing atomic count isn't enough for that. Add a lock around the tracepoints, so we serialize access to them. This fixes cases where we have multiple users setting up and tearing down tracepoints, like this: CPU: 0 PID: 2995 Comm: syzkaller857118 Not tainted 4.14.0-rc5-next-20171018+ #36 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 panic+0x1e4/0x41c kernel/panic.c:183 __warn+0x1c4/0x1e0 kernel/panic.c:546 report_bug+0x211/0x2d0 lib/bug.c:183 fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:177 do_trap_no_signal arch/x86/kernel/traps.c:211 [inline] do_trap+0x260/0x390 arch/x86/kernel/traps.c:260 do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:297 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:310 invalid_op+0x18/0x20 arch/x86/entry/entry_64.S:905 RIP: 0010:tracepoint_add_func kernel/tracepoint.c:210 [inline] RIP: 0010:tracepoint_probe_register_prio+0x397/0x9a0 kernel/tracepoint.c:283 RSP: 0018:ffff8801d1d1f6c0 EFLAGS: 00010293 RAX: ffff8801d22e8540 RBX: 00000000ffffffef RCX: ffffffff81710f07 RDX: 0000000000000000 RSI: ffffffff85b679c0 RDI: ffff8801d5f19818 RBP: ffff8801d1d1f7c8 R08: ffffffff81710c10 R09: 0000000000000004 R10: ffff8801d1d1f6b0 R11: 0000000000000003 R12: ffffffff817597f0 R13: 0000000000000000 R14: 00000000ffffffff R15: ffff8801d1d1f7a0 tracepoint_probe_register+0x2a/0x40 kernel/tracepoint.c:304 register_trace_block_rq_insert include/trace/events/block.h:191 [inline] blk_register_tracepoints+0x1e/0x2f0 kernel/trace/blktrace.c:1043 do_blk_trace_setup+0xa10/0xcf0 kernel/trace/blktrace.c:542 blk_trace_setup+0xbd/0x180 kernel/trace/blktrace.c:564 sg_ioctl+0xc71/0x2d90 drivers/scsi/sg.c:1089 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x444339 RSP: 002b:00007ffe05bb5b18 EFLAGS: 00000206 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00000000006d66c0 RCX: 0000000000444339 RDX: 000000002084cf90 RSI: 00000000c0481273 RDI: 0000000000000009 RBP: 0000000000000082 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000206 R12: ffffffffffffffff R13: 00000000c0481273 R14: 0000000000000000 R15: 0000000000000000 since we can now run these in parallel. Ensure that the exported helpers for doing this are grabbing the queue trace mutex. Reported-by: Steven Rostedt Tested-by: Dmitry Vyukov Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- kernel/trace/blktrace.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 45a3928544ce..e73dcab8e9f0 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -66,7 +66,8 @@ static struct tracer_flags blk_tracer_flags = { }; /* Global reference count of probes */ -static atomic_t blk_probes_ref = ATOMIC_INIT(0); +static DEFINE_MUTEX(blk_probe_mutex); +static int blk_probes_ref; static void blk_register_tracepoints(void); static void blk_unregister_tracepoints(void); @@ -329,11 +330,26 @@ static void blk_trace_free(struct blk_trace *bt) kfree(bt); } +static void get_probe_ref(void) +{ + mutex_lock(&blk_probe_mutex); + if (++blk_probes_ref == 1) + blk_register_tracepoints(); + mutex_unlock(&blk_probe_mutex); +} + +static void put_probe_ref(void) +{ + mutex_lock(&blk_probe_mutex); + if (!--blk_probes_ref) + blk_unregister_tracepoints(); + mutex_unlock(&blk_probe_mutex); +} + static void blk_trace_cleanup(struct blk_trace *bt) { blk_trace_free(bt); - if (atomic_dec_and_test(&blk_probes_ref)) - blk_unregister_tracepoints(); + put_probe_ref(); } int blk_trace_remove(struct request_queue *q) @@ -538,8 +554,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (cmpxchg(&q->blk_trace, NULL, bt)) goto err; - if (atomic_inc_return(&blk_probes_ref) == 1) - blk_register_tracepoints(); + get_probe_ref(); ret = 0; err: @@ -1558,9 +1573,7 @@ static int blk_trace_remove_queue(struct request_queue *q) if (bt == NULL) return -EINVAL; - if (atomic_dec_and_test(&blk_probes_ref)) - blk_unregister_tracepoints(); - + put_probe_ref(); blk_trace_free(bt); return 0; } @@ -1591,8 +1604,7 @@ static int blk_trace_setup_queue(struct request_queue *q, if (cmpxchg(&q->blk_trace, NULL, bt)) goto free_bt; - if (atomic_inc_return(&blk_probes_ref) == 1) - blk_register_tracepoints(); + get_probe_ref(); return 0; free_bt: From 3587188ad596da05a8dd691fc28ee56d335f41b8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 31 Oct 2017 09:53:28 +0100 Subject: [PATCH 629/770] dnotify: Handle errors from fsnotify_add_mark_locked() in fcntl_dirnotify() commit b3a0066005821acdc0cdb092cb72587182ab583f upstream. fsnotify_add_mark_locked() can fail but we do not check its return value. This didn't matter before commit 9dd813c15b2c "fsnotify: Move mark list head from object into dedicated structure" as none of possible failures could happen for dnotify but after that commit -ENOMEM can be returned. Handle this error properly in fcntl_dirnotify() as otherwise we just hit BUG_ON(dn_mark->dn) in dnotify_free_mark(). Reviewed-by: Amir Goldstein Reported-by: syzkaller Fixes: 9dd813c15b2c101168808d4f5941a29985758973 Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/notify/dnotify/dnotify.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index cba328315929..63a1ca4b9dee 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -319,7 +319,11 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); } else { - fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0); + error = fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0); + if (error) { + mutex_unlock(&dnotify_group->mark_mutex); + goto out_err; + } spin_lock(&new_fsn_mark->lock); fsn_mark = new_fsn_mark; dn_mark = new_dn_mark; @@ -345,6 +349,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) */ if (dn_mark == new_dn_mark) destroy = 1; + error = 0; goto out; } From 8c666e6471acb1fe33b6b88d95486ed2d548d944 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 31 Oct 2017 11:55:35 +0000 Subject: [PATCH 630/770] drm: Require __GFP_NOFAIL for the legacy drm_modeset_lock_all MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d18d1a5ac811d12f7ebc1129230312b5f2c50cb8 upstream. To acquire all modeset locks requires a ww_ctx to be allocated. As this is the legacy path and the allocation small, to reduce the changes required (and complex untested error handling) to the legacy drivers, we simply assume that the allocation succeeds. At present, it relies on the too-small-to-fail rule, but syzbot found that by injecting a failure here we would hit the WARN. Document that this allocation must succeed with __GFP_NOFAIL. Signed-off-by: Chris Wilson Cc: Daniel Vetter Reported-by: syzbot Signed-off-by: Daniel Vetter Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171031115535.15166-1-chris@chris-wilson.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_modeset_lock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index af4e906c630d..56b9f9b1c3ae 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -88,7 +88,7 @@ void drm_modeset_lock_all(struct drm_device *dev) struct drm_modeset_acquire_ctx *ctx; int ret; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL); if (WARN_ON(!ctx)) return; From 90bca3712acb0ddffbb85beb099eb7eeafba6cd3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 4 Oct 2017 15:09:24 +0200 Subject: [PATCH 631/770] ALSA: line6: Add a sanity check for invalid EPs commit 2a4340c57717162c6bf07a0860d05711d4de994b upstream. As syzkaller spotted, currently line6 drivers submit a URB with the fixed EP without checking whether it's actually available, which may result in a kernel warning like: usb 1-1: BOGUS urb xfer, pipe 3 != type 1 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 24 at drivers/usb/core/urb.c:449 usb_submit_urb+0xf8a/0x11d0 Modules linked in: CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc2-42613-g1488251d1a98 #238 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event Call Trace: line6_start_listen+0x55f/0x9e0 sound/usb/line6/driver.c:82 line6_init_cap_control sound/usb/line6/driver.c:690 line6_probe+0x7c9/0x1310 sound/usb/line6/driver.c:764 podhd_probe+0x64/0x70 sound/usb/line6/podhd.c:474 usb_probe_interface+0x35d/0x8e0 drivers/usb/core/driver.c:361 .... This patch adds a sanity check of validity of EPs at the device initialization phase for avoiding the call with an invalid EP. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/line6/driver.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index c8f723c3a033..167aebf8276e 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -78,6 +78,13 @@ static int line6_start_listen(struct usb_line6 *line6) line6->buffer_listen, LINE6_BUFSIZE_LISTEN, line6_data_received, line6); } + + /* sanity checks of EP before actually submitting */ + if (usb_urb_ep_type_check(line6->urb_listen)) { + dev_err(line6->ifcdev, "invalid control EP\n"); + return -EINVAL; + } + line6->urb_listen->actual_length = 0; err = usb_submit_urb(line6->urb_listen, GFP_ATOMIC); return err; From b43a3e21937c869706e94efb53ad166478ad1a93 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 4 Oct 2017 15:07:21 +0200 Subject: [PATCH 632/770] ALSA: caiaq: Add a sanity check for invalid EPs commit 58fc7f73a85d45a47057dad2af53502fdf6cf778 upstream. As syzkaller spotted, currently caiaq driver submits a URB with the fixed EP without checking whether it's actually available, which may result in a kernel warning like: usb 1-1: BOGUS urb xfer, pipe 3 != type 1 ------------[ cut here ]------------ WARNING: CPU: 1 PID: 1150 at drivers/usb/core/urb.c:449 usb_submit_urb+0xf8a/0x11d0 Modules linked in: CPU: 1 PID: 1150 Comm: kworker/1:1 Not tainted 4.14.0-rc2-42660-g24b7bd59eec0 #277 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event Call Trace: init_card sound/usb/caiaq/device.c:467 snd_probe+0x81c/0x1150 sound/usb/caiaq/device.c:525 usb_probe_interface+0x35d/0x8e0 drivers/usb/core/driver.c:361 .... This patch adds a sanity check of validity of EPs at the device initialization phase for avoiding the call with an invalid EP. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/caiaq/device.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index d8409d9ae55b..d55ca48de3ea 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -461,6 +461,13 @@ static int init_card(struct snd_usb_caiaqdev *cdev) cdev->midi_out_buf, EP1_BUFSIZE, snd_usb_caiaq_midi_output_done, cdev); + /* sanity checks of EPs before actually submitting */ + if (usb_urb_ep_type_check(&cdev->ep1_in_urb) || + usb_urb_ep_type_check(&cdev->midi_out_urb)) { + dev_err(dev, "invalid EPs\n"); + return -EINVAL; + } + init_waitqueue_head(&cdev->ep1_wait_queue); init_waitqueue_head(&cdev->prepare_wait_queue); From 73e6a383d1509d8bee58579041e76e5d36793c18 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 4 Oct 2017 15:03:40 +0200 Subject: [PATCH 633/770] ALSA: bcd2000: Add a sanity check for invalid EPs commit 6815a0b444572527256f0d0efd8efe3ddede6018 upstream. As syzkaller spotted, currently bcd2000 driver submits a URB with the fixed EP without checking whether it's actually available, which may result in a kernel warning like: usb 1-1: BOGUS urb xfer, pipe 1 != type 3 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1846 at drivers/usb/core/urb.c:449 usb_submit_urb+0xf8a/0x11d0 Modules linked in: CPU: 0 PID: 1846 Comm: kworker/0:2 Not tainted 4.14.0-rc2-42613-g1488251d1a98 #238 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event Call Trace: bcd2000_init_device sound/usb/bcd2000/bcd2000.c:289 bcd2000_init_midi sound/usb/bcd2000/bcd2000.c:345 bcd2000_probe+0xe64/0x19e0 sound/usb/bcd2000/bcd2000.c:406 usb_probe_interface+0x35d/0x8e0 drivers/usb/core/driver.c:361 .... This patch adds a sanity check of validity of EPs at the device initialization phase for avoiding the call with an invalid EP. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/bcd2000/bcd2000.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/usb/bcd2000/bcd2000.c b/sound/usb/bcd2000/bcd2000.c index 7371e5b06035..a6408209d7f1 100644 --- a/sound/usb/bcd2000/bcd2000.c +++ b/sound/usb/bcd2000/bcd2000.c @@ -342,6 +342,13 @@ static int bcd2000_init_midi(struct bcd2000 *bcd2k) bcd2k->midi_out_buf, BUFSIZE, bcd2000_output_complete, bcd2k, 1); + /* sanity checks of EPs before actually submitting */ + if (usb_urb_ep_type_check(bcd2k->midi_in_urb) || + usb_urb_ep_type_check(bcd2k->midi_out_urb)) { + dev_err(&bcd2k->dev->dev, "invalid MIDI EP\n"); + return -EINVAL; + } + bcd2000_init_device(bcd2k); return 0; From 6688494804d87dcc8c936941f3f9969797af5419 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 9 Feb 2018 17:45:49 +0800 Subject: [PATCH 634/770] ptr_ring: fail early if queue occupies more than KMALLOC_MAX_SIZE commit 6e6e41c3112276288ccaf80c70916779b84bb276 upstream. To avoid slab to warn about exceeded size, fail early if queue occupies more than KMALLOC_MAX_SIZE. Reported-by: syzbot+e4d4f9ddd4295539735d@syzkaller.appspotmail.com Fixes: 2e0ab8ca83c12 ("ptr_ring: array based FIFO for pointers") Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/ptr_ring.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 6866df4f31b5..bb515163bac3 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -447,6 +447,8 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) { + if (size * sizeof(void *) > KMALLOC_MAX_SIZE) + return NULL; return kcalloc(size, sizeof(void *), gfp); } From b517942f5126c04fff8ff341cdd6a28b0a8de84f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 9 Feb 2018 17:45:50 +0800 Subject: [PATCH 635/770] ptr_ring: try vmalloc() when kmalloc() fails commit 0bf7800f1799b5b1fd7d4f024e9ece53ac489011 upstream. This patch switch to use kvmalloc_array() for using a vmalloc() fallback to help in case kmalloc() fails. Reported-by: syzbot+e4d4f9ddd4295539735d@syzkaller.appspotmail.com Fixes: 2e0ab8ca83c12 ("ptr_ring: array based FIFO for pointers") Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/ptr_ring.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index bb515163bac3..35d125569e68 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -445,11 +445,14 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, __PTR_RING_PEEK_CALL_v; \ }) +/* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See + * documentation for vmalloc for which of them are legal. + */ static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) { if (size * sizeof(void *) > KMALLOC_MAX_SIZE) return NULL; - return kcalloc(size, sizeof(void *), gfp); + return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO); } static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) @@ -582,7 +585,7 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, spin_unlock(&(r)->producer_lock); spin_unlock_irqrestore(&(r)->consumer_lock, flags); - kfree(old); + kvfree(old); return 0; } @@ -622,7 +625,7 @@ static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, } for (i = 0; i < nrings; ++i) - kfree(queues[i]); + kvfree(queues[i]); kfree(queues); @@ -630,7 +633,7 @@ static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, nomem: while (--i >= 0) - kfree(queues[i]); + kvfree(queues[i]); kfree(queues); @@ -645,7 +648,7 @@ static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *)) if (destroy) while ((ptr = ptr_ring_consume(r))) destroy(ptr); - kfree(r->queue); + kvfree(r->queue); } #endif /* _LINUX_PTR_RING_H */ From 116df867dbc12297d0e04cd6540ecad401f4c060 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 28 Nov 2017 18:51:12 -0500 Subject: [PATCH 636/770] selinux: ensure the context is NUL terminated in security_context_to_sid_core() commit ef28df55ac27e1e5cd122e19fa311d886d47a756 upstream. The syzbot/syzkaller automated tests found a problem in security_context_to_sid_core() during early boot (before we load the SELinux policy) where we could potentially feed context strings without NUL terminators into the strcmp() function. We already guard against this during normal operation (after the SELinux policy has been loaded) by making a copy of the context strings and explicitly adding a NUL terminator to the end. The patch extends this protection to the early boot case (no loaded policy) by moving the context copy earlier in security_context_to_sid_core(). Reported-by: syzbot Signed-off-by: Paul Moore Reviewed-By: William Roberts Signed-off-by: Greg Kroah-Hartman --- security/selinux/ss/services.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index e4a1c0dc561a..338af05e06fa 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1413,27 +1413,25 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len, if (!scontext_len) return -EINVAL; + /* Copy the string to allow changes and ensure a NUL terminator */ + scontext2 = kmemdup_nul(scontext, scontext_len, gfp_flags); + if (!scontext2) + return -ENOMEM; + if (!ss_initialized) { int i; for (i = 1; i < SECINITSID_NUM; i++) { - if (!strcmp(initial_sid_to_string[i], scontext)) { + if (!strcmp(initial_sid_to_string[i], scontext2)) { *sid = i; - return 0; + goto out; } } *sid = SECINITSID_KERNEL; - return 0; + goto out; } *sid = SECSID_NULL; - /* Copy the string so that we can modify the copy as we parse it. */ - scontext2 = kmalloc(scontext_len + 1, gfp_flags); - if (!scontext2) - return -ENOMEM; - memcpy(scontext2, scontext, scontext_len); - scontext2[scontext_len] = 0; - if (force) { /* Save another copy for storing in uninterpreted form */ rc = -ENOMEM; From ca181454e726e47434566d9a126e9cfc29db48ae Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 5 Dec 2017 17:17:43 -0500 Subject: [PATCH 637/770] selinux: skip bounded transition processing if the policy isn't loaded commit 4b14752ec4e0d87126e636384cf37c8dd9df157c upstream. We can't do anything reasonable in security_bounded_transition() if we don't have a policy loaded, and in fact we could run into problems with some of the code inside expecting a policy. Fix these problems like we do many others in security/selinux/ss/services.c by checking to see if the policy is loaded (ss_initialized) and returning quickly if it isn't. Reported-by: syzbot Signed-off-by: Paul Moore Acked-by: Stephen Smalley Reviewed-by: James Morris Signed-off-by: Greg Kroah-Hartman --- security/selinux/ss/services.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 338af05e06fa..c9c031e3d1ae 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -867,6 +867,9 @@ int security_bounded_transition(u32 old_sid, u32 new_sid) int index; int rc; + if (!ss_initialized) + return 0; + read_lock(&policy_rwlock); rc = -EINVAL; From 9bae74042eb17eeaed782f85438a7a131fe68282 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 2 Nov 2017 09:52:27 -0400 Subject: [PATCH 638/770] media: pvrusb2: properly check endpoint types commit 72c27a68a2a3f650f0dc7891ee98f02283fc11af upstream. As syzkaller detected, pvrusb2 driver submits bulk urb withount checking the the endpoint type is actually blunk. Add a check. usb 1-1: BOGUS urb xfer, pipe 3 != type 1 ------------[ cut here ]------------ WARNING: CPU: 1 PID: 2713 at drivers/usb/core/urb.c:449 usb_submit_urb+0xf8a/0x11d0 Modules linked in: CPU: 1 PID: 2713 Comm: pvrusb2-context Not tainted 4.14.0-rc1-42251-gebb2c2437d80 #210 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88006b7a18c0 task.stack: ffff880069978000 RIP: 0010:usb_submit_urb+0xf8a/0x11d0 drivers/usb/core/urb.c:448 RSP: 0018:ffff88006997f990 EFLAGS: 00010286 RAX: 0000000000000029 RBX: ffff880063661900 RCX: 0000000000000000 RDX: 0000000000000029 RSI: ffffffff86876d60 RDI: ffffed000d32ff24 RBP: ffff88006997fa90 R08: 1ffff1000d32fdca R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff1000d32ff39 R13: 0000000000000001 R14: 0000000000000003 R15: ffff880068bbed68 FS: 0000000000000000(0000) GS:ffff88006c600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000001032000 CR3: 000000006a0ff000 CR4: 00000000000006f0 Call Trace: pvr2_send_request_ex+0xa57/0x1d80 drivers/media/usb/pvrusb2/pvrusb2-hdw.c:3645 pvr2_hdw_check_firmware drivers/media/usb/pvrusb2/pvrusb2-hdw.c:1812 pvr2_hdw_setup_low drivers/media/usb/pvrusb2/pvrusb2-hdw.c:2107 pvr2_hdw_setup drivers/media/usb/pvrusb2/pvrusb2-hdw.c:2250 pvr2_hdw_initialize+0x548/0x3c10 drivers/media/usb/pvrusb2/pvrusb2-hdw.c:2327 pvr2_context_check drivers/media/usb/pvrusb2/pvrusb2-context.c:118 pvr2_context_thread_func+0x361/0x8c0 drivers/media/usb/pvrusb2/pvrusb2-context.c:167 kthread+0x3a1/0x470 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Code: 48 8b 85 30 ff ff ff 48 8d b8 98 00 00 00 e8 ee 82 89 fe 45 89 e8 44 89 f1 4c 89 fa 48 89 c6 48 c7 c7 40 c0 ea 86 e8 30 1b dc fc <0f> ff e9 9b f7 ff ff e8 aa 95 25 fd e9 80 f7 ff ff e8 50 74 f3 ---[ end trace 6919030503719da6 ]--- Signed-off-by: Andrey Konovalov Signed-off-by: Greg Kroah-Hartman Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/pvrusb2/pvrusb2-hdw.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c index ad5b25b89699..44975061b953 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c @@ -3642,6 +3642,12 @@ static int pvr2_send_request_ex(struct pvr2_hdw *hdw, hdw); hdw->ctl_write_urb->actual_length = 0; hdw->ctl_write_pend_flag = !0; + if (usb_urb_ep_type_check(hdw->ctl_write_urb)) { + pvr2_trace( + PVR2_TRACE_ERROR_LEGS, + "Invalid write control endpoint"); + return -EINVAL; + } status = usb_submit_urb(hdw->ctl_write_urb,GFP_KERNEL); if (status < 0) { pvr2_trace(PVR2_TRACE_ERROR_LEGS, @@ -3666,6 +3672,12 @@ status); hdw); hdw->ctl_read_urb->actual_length = 0; hdw->ctl_read_pend_flag = !0; + if (usb_urb_ep_type_check(hdw->ctl_read_urb)) { + pvr2_trace( + PVR2_TRACE_ERROR_LEGS, + "Invalid read control endpoint"); + return -EINVAL; + } status = usb_submit_urb(hdw->ctl_read_urb,GFP_KERNEL); if (status < 0) { pvr2_trace(PVR2_TRACE_ERROR_LEGS, From 392e03283a3ddddf7c48dcc002b8668e1612a578 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Dec 2017 16:40:26 -0800 Subject: [PATCH 639/770] crypto: x86/twofish-3way - Fix %rbp usage commit d8c7fe9f2a486a6e5f0d5229ca43807af5ab22c6 upstream. Using %rbp as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. In twofish-3way, we can't simply replace %rbp with another register because there are none available. Instead, we use the stack to hold the values that %rbp, %r11, and %r12 were holding previously. Each of these values represents the half of the output from the previous Feistel round that is being passed on unchanged to the following round. They are only used once per round, when they are exchanged with %rax, %rbx, and %rcx. As a result, we free up 3 registers (one per block) and can reassign them so that %rbp is not used, and additionally %r14 and %r15 are not used so they do not need to be saved/restored. There may be a small overhead caused by replacing 'xchg REG, REG' with the needed sequence 'mov MEM, REG; mov REG, MEM; mov REG, REG' once per round. But, counterintuitively, when I tested "ctr-twofish-3way" on a Haswell processor, the new version was actually about 2% faster. (Perhaps 'xchg' is not as well optimized as plain moves.) Reported-by: syzbot Signed-off-by: Eric Biggers Reviewed-by: Josh Poimboeuf Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 112 ++++++++++--------- 1 file changed, 60 insertions(+), 52 deletions(-) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S index 1c3b7ceb36d2..e7273a606a07 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S @@ -55,29 +55,31 @@ #define RAB1bl %bl #define RAB2bl %cl +#define CD0 0x0(%rsp) +#define CD1 0x8(%rsp) +#define CD2 0x10(%rsp) + +# used only before/after all rounds #define RCD0 %r8 #define RCD1 %r9 #define RCD2 %r10 -#define RCD0d %r8d -#define RCD1d %r9d -#define RCD2d %r10d +# used only during rounds +#define RX0 %r8 +#define RX1 %r9 +#define RX2 %r10 -#define RX0 %rbp -#define RX1 %r11 -#define RX2 %r12 +#define RX0d %r8d +#define RX1d %r9d +#define RX2d %r10d -#define RX0d %ebp -#define RX1d %r11d -#define RX2d %r12d +#define RY0 %r11 +#define RY1 %r12 +#define RY2 %r13 -#define RY0 %r13 -#define RY1 %r14 -#define RY2 %r15 - -#define RY0d %r13d -#define RY1d %r14d -#define RY2d %r15d +#define RY0d %r11d +#define RY1d %r12d +#define RY2d %r13d #define RT0 %rdx #define RT1 %rsi @@ -85,6 +87,8 @@ #define RT0d %edx #define RT1d %esi +#define RT1bl %sil + #define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \ movzbl ab ## bl, tmp2 ## d; \ movzbl ab ## bh, tmp1 ## d; \ @@ -92,6 +96,11 @@ op1##l T0(CTX, tmp2, 4), dst ## d; \ op2##l T1(CTX, tmp1, 4), dst ## d; +#define swap_ab_with_cd(ab, cd, tmp) \ + movq cd, tmp; \ + movq ab, cd; \ + movq tmp, ab; + /* * Combined G1 & G2 function. Reordered with help of rotates to have moves * at begining. @@ -110,15 +119,15 @@ /* G1,2 && G2,2 */ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \ - xchgq cd ## 0, ab ## 0; \ + swap_ab_with_cd(ab ## 0, cd ## 0, RT0); \ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \ - xchgq cd ## 1, ab ## 1; \ + swap_ab_with_cd(ab ## 1, cd ## 1, RT0); \ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \ - xchgq cd ## 2, ab ## 2; + swap_ab_with_cd(ab ## 2, cd ## 2, RT0); #define enc_round_end(ab, x, y, n) \ addl y ## d, x ## d; \ @@ -168,6 +177,16 @@ decrypt_round3(ba, dc, (n*2)+1); \ decrypt_round3(ba, dc, (n*2)); +#define push_cd() \ + pushq RCD2; \ + pushq RCD1; \ + pushq RCD0; + +#define pop_cd() \ + popq RCD0; \ + popq RCD1; \ + popq RCD2; + #define inpack3(in, n, xy, m) \ movq 4*(n)(in), xy ## 0; \ xorq w+4*m(CTX), xy ## 0; \ @@ -223,11 +242,8 @@ ENTRY(__twofish_enc_blk_3way) * %rdx: src, RIO * %rcx: bool, if true: xor output */ - pushq %r15; - pushq %r14; pushq %r13; pushq %r12; - pushq %rbp; pushq %rbx; pushq %rcx; /* bool xor */ @@ -235,40 +251,36 @@ ENTRY(__twofish_enc_blk_3way) inpack_enc3(); - encrypt_cycle3(RAB, RCD, 0); - encrypt_cycle3(RAB, RCD, 1); - encrypt_cycle3(RAB, RCD, 2); - encrypt_cycle3(RAB, RCD, 3); - encrypt_cycle3(RAB, RCD, 4); - encrypt_cycle3(RAB, RCD, 5); - encrypt_cycle3(RAB, RCD, 6); - encrypt_cycle3(RAB, RCD, 7); + push_cd(); + encrypt_cycle3(RAB, CD, 0); + encrypt_cycle3(RAB, CD, 1); + encrypt_cycle3(RAB, CD, 2); + encrypt_cycle3(RAB, CD, 3); + encrypt_cycle3(RAB, CD, 4); + encrypt_cycle3(RAB, CD, 5); + encrypt_cycle3(RAB, CD, 6); + encrypt_cycle3(RAB, CD, 7); + pop_cd(); popq RIO; /* dst */ - popq %rbp; /* bool xor */ + popq RT1; /* bool xor */ - testb %bpl, %bpl; + testb RT1bl, RT1bl; jnz .L__enc_xor3; outunpack_enc3(mov); popq %rbx; - popq %rbp; popq %r12; popq %r13; - popq %r14; - popq %r15; ret; .L__enc_xor3: outunpack_enc3(xor); popq %rbx; - popq %rbp; popq %r12; popq %r13; - popq %r14; - popq %r15; ret; ENDPROC(__twofish_enc_blk_3way) @@ -278,35 +290,31 @@ ENTRY(twofish_dec_blk_3way) * %rsi: dst * %rdx: src, RIO */ - pushq %r15; - pushq %r14; pushq %r13; pushq %r12; - pushq %rbp; pushq %rbx; pushq %rsi; /* dst */ inpack_dec3(); - decrypt_cycle3(RAB, RCD, 7); - decrypt_cycle3(RAB, RCD, 6); - decrypt_cycle3(RAB, RCD, 5); - decrypt_cycle3(RAB, RCD, 4); - decrypt_cycle3(RAB, RCD, 3); - decrypt_cycle3(RAB, RCD, 2); - decrypt_cycle3(RAB, RCD, 1); - decrypt_cycle3(RAB, RCD, 0); + push_cd(); + decrypt_cycle3(RAB, CD, 7); + decrypt_cycle3(RAB, CD, 6); + decrypt_cycle3(RAB, CD, 5); + decrypt_cycle3(RAB, CD, 4); + decrypt_cycle3(RAB, CD, 3); + decrypt_cycle3(RAB, CD, 2); + decrypt_cycle3(RAB, CD, 1); + decrypt_cycle3(RAB, CD, 0); + pop_cd(); popq RIO; /* dst */ outunpack_dec3(); popq %rbx; - popq %rbp; popq %r12; popq %r13; - popq %r14; - popq %r15; ret; ENDPROC(twofish_dec_blk_3way) From 747ad3d315923647ca4d79d92b28b512e85cd96e Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 5 Jan 2018 11:14:08 -0800 Subject: [PATCH 640/770] staging: android: ion: Add __GFP_NOWARN for system contig heap commit 0c75f10312a35b149b2cebb1832316b35c2337ca upstream. syzbot reported a warning from Ion: WARNING: CPU: 1 PID: 3485 at mm/page_alloc.c:3926 ... __alloc_pages_nodemask+0x9fb/0xd80 mm/page_alloc.c:4252 alloc_pages_current+0xb6/0x1e0 mm/mempolicy.c:2036 alloc_pages include/linux/gfp.h:492 [inline] ion_system_contig_heap_allocate+0x40/0x2c0 drivers/staging/android/ion/ion_system_heap.c:374 ion_buffer_create drivers/staging/android/ion/ion.c:93 [inline] ion_alloc+0x2c1/0x9e0 drivers/staging/android/ion/ion.c:420 ion_ioctl+0x26d/0x380 drivers/staging/android/ion/ion-ioctl.c:84 vfs_ioctl fs/ioctl.c:46 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686 SYSC_ioctl fs/ioctl.c:701 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692 This is a warning about attempting to allocate order > MAX_ORDER. This is coming from a userspace Ion allocation request. Since userspace is free to request however much memory it wants (and the kernel is free to deny its allocation), silence the allocation attempt with __GFP_NOWARN in case it fails. Reported-by: syzbot+76e7efc4748495855a4d@syzkaller.appspotmail.com Reported-by: syzbot Signed-off-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion_system_heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c index 4dc5d7a589c2..b6ece18e6a88 100644 --- a/drivers/staging/android/ion/ion_system_heap.c +++ b/drivers/staging/android/ion/ion_system_heap.c @@ -371,7 +371,7 @@ static int ion_system_contig_heap_allocate(struct ion_heap *heap, unsigned long i; int ret; - page = alloc_pages(low_order_gfp_flags, order); + page = alloc_pages(low_order_gfp_flags | __GFP_NOWARN, order); if (!page) return -ENOMEM; From 2c565a9538a13e0b427ff7c3fbd2ee8e8cd59709 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 5 Jan 2018 11:14:09 -0800 Subject: [PATCH 641/770] staging: android: ion: Switch from WARN to pr_warn commit e4e179a844f52e907e550f887d0a2171f1508af1 upstream. Syzbot reported a warning with Ion: WARNING: CPU: 0 PID: 3502 at drivers/staging/android/ion/ion-ioctl.c:73 ion_ioctl+0x2db/0x380 drivers/staging/android/ion/ion-ioctl.c:73 Kernel panic - not syncing: panic_on_warn set ... This is a warning that validation of the ioctl fields failed. This was deliberately added as a warning to make it very obvious to developers that something needed to be fixed. In reality, this is overkill and disturbs fuzzing. Switch to pr_warn for a message instead. Reported-by: syzbot+fa2d5f63ee5904a0115a@syzkaller.appspotmail.com Reported-by: syzbot Signed-off-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion-ioctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/android/ion/ion-ioctl.c b/drivers/staging/android/ion/ion-ioctl.c index d9f8b1424da1..021a956db1a8 100644 --- a/drivers/staging/android/ion/ion-ioctl.c +++ b/drivers/staging/android/ion/ion-ioctl.c @@ -71,8 +71,10 @@ long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -EFAULT; ret = validate_ioctl_arg(cmd, &data); - if (WARN_ON_ONCE(ret)) + if (ret) { + pr_warn_once("%s: ioctl validate failed\n", __func__); return ret; + } if (!(dir & _IOC_WRITE)) memset(&data, 0, sizeof(data)); From 2cc50a1946e69400399575f8ea63c2131baa83b7 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Sun, 14 Jan 2018 17:00:48 -0500 Subject: [PATCH 642/770] blk_rq_map_user_iov: fix error override commit 69e0927b3774563c19b5fb32e91d75edc147fb62 upstream. During stress tests by syzkaller on the sg driver the block layer infrequently returns EINVAL. Closer inspection shows the block layer was trying to return ENOMEM (which is much more understandable) but for some reason overroad that useful error. Patch below does not show this (unchanged) line: ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy); That 'ret' was being overridden when that function failed. Signed-off-by: Douglas Gilbert Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-map.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index 368daa02714e..e31be14da8ea 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -126,7 +126,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); struct bio *bio = NULL; struct iov_iter i; - int ret; + int ret = -EINVAL; if (!iter_is_iovec(iter)) goto fail; @@ -155,7 +155,7 @@ unmap_rq: __blk_rq_unmap_user(bio); fail: rq->bio = NULL; - return -EINVAL; + return ret; } EXPORT_SYMBOL(blk_rq_map_user_iov); From 7b5ec5505945ee2ba6f6dbab51483a1102720623 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 13 Dec 2017 10:46:40 +0100 Subject: [PATCH 643/770] KVM: x86: fix escape of guest dr6 to the host MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit efdab992813fb2ed825745625b83c05032e9cda2 upstream. syzkaller reported: WARNING: CPU: 0 PID: 12927 at arch/x86/kernel/traps.c:780 do_debug+0x222/0x250 CPU: 0 PID: 12927 Comm: syz-executor Tainted: G OE 4.15.0-rc2+ #16 RIP: 0010:do_debug+0x222/0x250 Call Trace: <#DB> debug+0x3e/0x70 RIP: 0010:copy_user_enhanced_fast_string+0x10/0x20 _copy_from_user+0x5b/0x90 SyS_timer_create+0x33/0x80 entry_SYSCALL_64_fastpath+0x23/0x9a The testcase sets a watchpoint (with perf_event_open) on a buffer that is passed to timer_create() as the struct sigevent argument. In timer_create(), copy_from_user()'s rep movsb triggers the BP. The testcase also sets the debug registers for the guest. However, KVM only restores host debug registers when the host has active watchpoints, which triggers a race condition when running the testcase with multiple threads. The guest's DR6.BS bit can escape to the host before another thread invokes timer_create(), and do_debug() complains. The fix is to respect do_debug()'s dr6 invariant when leaving KVM. Reported-by: Dmitry Vyukov Cc: Paolo Bonzini Cc: Radim Krčmář Cc: David Hildenbrand Cc: Dmitry Vyukov Reviewed-by: David Hildenbrand Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8b7ede04e8a3..0dcd7bf45dc1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2926,6 +2926,12 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); vcpu->arch.last_host_tsc = rdtsc(); + /* + * If userspace has set any breakpoints or watchpoints, dr6 is restored + * on every vmexit, but if not, we might have a stale dr6 from the + * guest. do_debug expects dr6 to be cleared after it runs, do the same. + */ + set_debugreg(0, 6); } static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, From 4249e8af8108afadc6316d9e4815f3333c9770d0 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 6 Feb 2018 15:40:28 -0800 Subject: [PATCH 644/770] kcov: detect double association with a single task commit a77660d231f8b3d84fd23ed482e0964f7aa546d6 upstream. Currently KCOV_ENABLE does not check if the current task is already associated with another kcov descriptor. As the result it is possible to associate a single task with more than one kcov descriptor, which later leads to a memory leak of the old descriptor. This relation is really meant to be one-to-one (task has only one back link). Extend validation to detect such misuse. Link: http://lkml.kernel.org/r/20180122082520.15716-1-dvyukov@google.com Fixes: 5c9a8750a640 ("kernel: add kcov code coverage") Signed-off-by: Dmitry Vyukov Reported-by: Shankara Pailoor Cc: Dmitry Vyukov Cc: syzbot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/kcov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index fc6af9e1308b..b11ef6e51f7e 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -225,9 +225,9 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, if (unused != 0 || kcov->mode == KCOV_MODE_DISABLED || kcov->area == NULL) return -EINVAL; - if (kcov->t != NULL) - return -EBUSY; t = current; + if (kcov->t != NULL || t->kcov != NULL) + return -EBUSY; /* Cache in task struct for performance. */ t->kcov_size = kcov->size; t->kcov_area = kcov->area; From 6609d112229fff5dbccf57bbc31facffc1ba06dd Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 28 Dec 2017 09:48:54 +0100 Subject: [PATCH 645/770] netfilter: x_tables: fix int overflow in xt_alloc_table_info() commit 889c604fd0b5f6d3b8694ade229ee44124de1127 upstream. syzkaller triggered OOM kills by passing ipt_replace.size = -1 to IPT_SO_SET_REPLACE. The root cause is that SMP_ALIGN() in xt_alloc_table_info() causes int overflow and the size check passes when it should not. SMP_ALIGN() is no longer needed leftover. Remove SMP_ALIGN() call in xt_alloc_table_info(). Reported-by: syzbot+4396883fa8c4f64e0175@syzkaller.appspotmail.com Signed-off-by: Dmitry Vyukov Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/x_tables.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d8571f414208..20f7856fec16 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -39,7 +39,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); -#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) #define XT_PCPU_BLOCK_SIZE 4096 struct compat_delta { @@ -1000,7 +999,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) return NULL; /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ - if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) + if ((size >> PAGE_SHIFT) + 2 > totalram_pages) return NULL; info = kvmalloc(sz, GFP_KERNEL); From f25f048d3bde992239b7e019467b4fb4832bac99 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Jan 2018 17:16:09 -0800 Subject: [PATCH 646/770] netfilter: x_tables: avoid out-of-bounds reads in xt_request_find_{match|target} commit da17c73b6eb74aad3c3c0654394635675b623b3e upstream. It looks like syzbot found its way into netfilter territory. Issue here is that @name comes from user space and might not be null terminated. Out-of-bound reads happen, KASAN is not happy. v2 added similar fix for xt_request_find_target(), as Florian advised. Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/x_tables.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 20f7856fec16..60c92158a2cd 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -209,6 +209,9 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision) { struct xt_match *match; + if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) + return ERR_PTR(-EINVAL); + match = xt_find_match(nfproto, name, revision); if (IS_ERR(match)) { request_module("%st_%s", xt_prefix[nfproto], name); @@ -251,6 +254,9 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) { struct xt_target *target; + if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) + return ERR_PTR(-EINVAL); + target = xt_find_target(af, name, revision); if (IS_ERR(target)) { request_module("%st_%s", xt_prefix[af], name); From 2852a7dd15fe311e96be2d36a7361faac471b7a9 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 30 Jan 2018 15:21:34 +0100 Subject: [PATCH 647/770] netfilter: ipt_CLUSTERIP: fix out-of-bounds accesses in clusterip_tg_check() commit 1a38956cce5eabd7b74f94bab70265e4df83165e upstream. Commit 136e92bbec0a switched local_nodes from an array to a bitmask but did not add proper bounds checks. As the result clusterip_config_init_nodelist() can both over-read ipt_clusterip_tgt_info.local_nodes and over-write clusterip_config.local_nodes. Add bounds checks for both. Fixes: 136e92bbec0a ("[NETFILTER] CLUSTERIP: use a bitmap to store node responsibility data") Signed-off-by: Dmitry Vyukov Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 17b4ca562944..24a8c2e63e3d 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -431,7 +431,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; const struct ipt_entry *e = par->entryinfo; struct clusterip_config *config; - int ret; + int ret, i; if (par->nft_compat) { pr_err("cannot use CLUSTERIP target from nftables compat\n"); @@ -450,8 +450,18 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) pr_info("Please specify destination IP\n"); return -EINVAL; } - - /* FIXME: further sanity checks */ + if (cipinfo->num_local_nodes > ARRAY_SIZE(cipinfo->local_nodes)) { + pr_info("bad num_local_nodes %u\n", cipinfo->num_local_nodes); + return -EINVAL; + } + for (i = 0; i < cipinfo->num_local_nodes; i++) { + if (cipinfo->local_nodes[i] - 1 >= + sizeof(config->local_nodes) * 8) { + pr_info("bad local_nodes[%d] %u\n", + i, cipinfo->local_nodes[i]); + return -EINVAL; + } + } config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1); if (!config) { From 516c855cf51462678eb50ec52da845c10734d0bf Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Jan 2018 19:01:40 +0100 Subject: [PATCH 648/770] netfilter: on sockopt() acquire sock lock only in the required scope commit 3f34cfae1238848fd53f25e5c8fd59da57901f4b upstream. Syzbot reported several deadlocks in the netfilter area caused by rtnl lock and socket lock being acquired with a different order on different code paths, leading to backtraces like the following one: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc9+ #212 Not tainted ------------------------------------------------------ syzkaller041579/3682 is trying to acquire lock: (sk_lock-AF_INET6){+.+.}, at: [<000000008775e4dd>] lock_sock include/net/sock.h:1463 [inline] (sk_lock-AF_INET6){+.+.}, at: [<000000008775e4dd>] do_ipv6_setsockopt.isra.8+0x3c5/0x39d0 net/ipv6/ipv6_sockglue.c:167 but task is already holding lock: (rtnl_mutex){+.+.}, at: [<000000004342eaa9>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (rtnl_mutex){+.+.}: __mutex_lock_common kernel/locking/mutex.c:756 [inline] __mutex_lock+0x16f/0x1a80 kernel/locking/mutex.c:893 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908 rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 register_netdevice_notifier+0xad/0x860 net/core/dev.c:1607 tee_tg_check+0x1a0/0x280 net/netfilter/xt_TEE.c:106 xt_check_target+0x22c/0x7d0 net/netfilter/x_tables.c:845 check_target net/ipv6/netfilter/ip6_tables.c:538 [inline] find_check_entry.isra.7+0x935/0xcf0 net/ipv6/netfilter/ip6_tables.c:580 translate_table+0xf52/0x1690 net/ipv6/netfilter/ip6_tables.c:749 do_replace net/ipv6/netfilter/ip6_tables.c:1165 [inline] do_ip6t_set_ctl+0x370/0x5f0 net/ipv6/netfilter/ip6_tables.c:1691 nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115 ipv6_setsockopt+0x115/0x150 net/ipv6/ipv6_sockglue.c:928 udpv6_setsockopt+0x45/0x80 net/ipv6/udp.c:1422 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2978 SYSC_setsockopt net/socket.c:1849 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1828 entry_SYSCALL_64_fastpath+0x29/0xa0 -> #0 (sk_lock-AF_INET6){+.+.}: lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3914 lock_sock_nested+0xc2/0x110 net/core/sock.c:2780 lock_sock include/net/sock.h:1463 [inline] do_ipv6_setsockopt.isra.8+0x3c5/0x39d0 net/ipv6/ipv6_sockglue.c:167 ipv6_setsockopt+0xd7/0x150 net/ipv6/ipv6_sockglue.c:922 udpv6_setsockopt+0x45/0x80 net/ipv6/udp.c:1422 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2978 SYSC_setsockopt net/socket.c:1849 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1828 entry_SYSCALL_64_fastpath+0x29/0xa0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(rtnl_mutex); lock(sk_lock-AF_INET6); lock(rtnl_mutex); lock(sk_lock-AF_INET6); *** DEADLOCK *** 1 lock held by syzkaller041579/3682: #0: (rtnl_mutex){+.+.}, at: [<000000004342eaa9>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 The problem, as Florian noted, is that nf_setsockopt() is always called with the socket held, even if the lock itself is required only for very tight scopes and only for some operation. This patch addresses the issues moving the lock_sock() call only where really needed, namely in ipv*_getorigdst(), so that nf_setsockopt() does not need anymore to acquire both locks. Fixes: 22265a5c3c10 ("netfilter: xt_TEE: resolve oif using netdevice notifiers") Reported-by: syzbot+a4c2dc980ac1af699b36@syzkaller.appspotmail.com Suggested-by: Florian Westphal Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_sockglue.c | 14 ++++---------- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 +++++- net/ipv6/ipv6_sockglue.c | 17 +++++------------ net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 18 ++++++++++++------ 4 files changed, 26 insertions(+), 29 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 60fb1eb7d7d8..c7df4969f80a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1251,11 +1251,8 @@ int ip_setsockopt(struct sock *sk, int level, if (err == -ENOPROTOOPT && optname != IP_HDRINCL && optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY && - !ip_mroute_opt(optname)) { - lock_sock(sk); + !ip_mroute_opt(optname)) err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); - release_sock(sk); - } #endif return err; } @@ -1280,12 +1277,9 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname, if (err == -ENOPROTOOPT && optname != IP_HDRINCL && optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY && - !ip_mroute_opt(optname)) { - lock_sock(sk); - err = compat_nf_setsockopt(sk, PF_INET, optname, - optval, optlen); - release_sock(sk); - } + !ip_mroute_opt(optname)) + err = compat_nf_setsockopt(sk, PF_INET, optname, optval, + optlen); #endif return err; } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index fe374da4bc13..997a96896f1a 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -218,15 +218,19 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) struct nf_conntrack_tuple tuple; memset(&tuple, 0, sizeof(tuple)); + + lock_sock(sk); tuple.src.u3.ip = inet->inet_rcv_saddr; tuple.src.u.tcp.port = inet->inet_sport; tuple.dst.u3.ip = inet->inet_daddr; tuple.dst.u.tcp.port = inet->inet_dport; tuple.src.l3num = PF_INET; tuple.dst.protonum = sk->sk_protocol; + release_sock(sk); /* We only do TCP and SCTP at the moment: is there a better way? */ - if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) { + if (tuple.dst.protonum != IPPROTO_TCP && + tuple.dst.protonum != IPPROTO_SCTP) { pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); return -ENOPROTOOPT; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3b251760cb8c..24b69384bdbf 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -915,12 +915,8 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && - optname != IPV6_XFRM_POLICY) { - lock_sock(sk); - err = nf_setsockopt(sk, PF_INET6, optname, optval, - optlen); - release_sock(sk); - } + optname != IPV6_XFRM_POLICY) + err = nf_setsockopt(sk, PF_INET6, optname, optval, optlen); #endif return err; } @@ -950,12 +946,9 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && - optname != IPV6_XFRM_POLICY) { - lock_sock(sk); - err = compat_nf_setsockopt(sk, PF_INET6, optname, - optval, optlen); - release_sock(sk); - } + optname != IPV6_XFRM_POLICY) + err = compat_nf_setsockopt(sk, PF_INET6, optname, optval, + optlen); #endif return err; } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index fe01dc953c56..b807478c4f7f 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -226,20 +226,27 @@ static const struct nf_hook_ops ipv6_conntrack_ops[] = { static int ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) { - const struct inet_sock *inet = inet_sk(sk); + struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; const struct ipv6_pinfo *inet6 = inet6_sk(sk); + const struct inet_sock *inet = inet_sk(sk); const struct nf_conntrack_tuple_hash *h; struct sockaddr_in6 sin6; - struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; struct nf_conn *ct; + __be32 flow_label; + int bound_dev_if; + lock_sock(sk); tuple.src.u3.in6 = sk->sk_v6_rcv_saddr; tuple.src.u.tcp.port = inet->inet_sport; tuple.dst.u3.in6 = sk->sk_v6_daddr; tuple.dst.u.tcp.port = inet->inet_dport; tuple.dst.protonum = sk->sk_protocol; + bound_dev_if = sk->sk_bound_dev_if; + flow_label = inet6->flow_label; + release_sock(sk); - if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) + if (tuple.dst.protonum != IPPROTO_TCP && + tuple.dst.protonum != IPPROTO_SCTP) return -ENOPROTOOPT; if (*len < 0 || (unsigned int) *len < sizeof(sin6)) @@ -257,14 +264,13 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) sin6.sin6_family = AF_INET6; sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; - sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK; + sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK; memcpy(&sin6.sin6_addr, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, sizeof(sin6.sin6_addr)); nf_ct_put(ct); - sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, - sk->sk_bound_dev_if); + sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if); return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; } From be6c08bf9aea46775232e48241b55e768362b9e0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 31 Jan 2018 15:02:47 -0800 Subject: [PATCH 649/770] netfilter: xt_cgroup: initialize info->priv in cgroup_mt_check_v1() commit ba7cd5d95f25cc6005f687dabdb4e7a6063adda9 upstream. xt_cgroup_info_v1->priv is an internal pointer only used for kernel, we should not trust what user-space provides. Reported-by: Fixes: c38c4597e4bf ("netfilter: implement xt_cgroup cgroup2 path match") Cc: Pablo Neira Ayuso Signed-off-by: Cong Wang Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/xt_cgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 1db1ce59079f..891f4e7e8ea7 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -52,6 +52,7 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) return -EINVAL; } + info->priv = NULL; if (info->has_path) { cgrp = cgroup_get_from_path(info->path); if (IS_ERR(cgrp)) { From 6e12516df1367e80f25fc204b6002db2a700a6ad Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 5 Feb 2018 14:41:45 -0800 Subject: [PATCH 650/770] netfilter: xt_RATEEST: acquire xt_rateest_mutex for hash insert commit 7dc68e98757a8eccf8ca7a53a29b896f1eef1f76 upstream. rateest_hash is supposed to be protected by xt_rateest_mutex, and, as suggested by Eric, lookup and insert should be atomic, so we should acquire the xt_rateest_mutex once for both. So introduce a non-locking helper for internal use and keep the locking one for external. Reported-by: Fixes: 5859034d7eb8 ("[NETFILTER]: x_tables: add RATEEST target") Signed-off-by: Cong Wang Reviewed-by: Florian Westphal Reviewed-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/xt_RATEEST.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 498b54fd04d7..141c295191f6 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -39,23 +39,31 @@ static void xt_rateest_hash_insert(struct xt_rateest *est) hlist_add_head(&est->list, &rateest_hash[h]); } -struct xt_rateest *xt_rateest_lookup(const char *name) +static struct xt_rateest *__xt_rateest_lookup(const char *name) { struct xt_rateest *est; unsigned int h; h = xt_rateest_hash(name); - mutex_lock(&xt_rateest_mutex); hlist_for_each_entry(est, &rateest_hash[h], list) { if (strcmp(est->name, name) == 0) { est->refcnt++; - mutex_unlock(&xt_rateest_mutex); return est; } } - mutex_unlock(&xt_rateest_mutex); + return NULL; } + +struct xt_rateest *xt_rateest_lookup(const char *name) +{ + struct xt_rateest *est; + + mutex_lock(&xt_rateest_mutex); + est = __xt_rateest_lookup(name); + mutex_unlock(&xt_rateest_mutex); + return est; +} EXPORT_SYMBOL_GPL(xt_rateest_lookup); void xt_rateest_put(struct xt_rateest *est) @@ -100,8 +108,10 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); - est = xt_rateest_lookup(info->name); + mutex_lock(&xt_rateest_mutex); + est = __xt_rateest_lookup(info->name); if (est) { + mutex_unlock(&xt_rateest_mutex); /* * If estimator parameters are specified, they must match the * existing estimator. @@ -139,11 +149,13 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) info->est = est; xt_rateest_hash_insert(est); + mutex_unlock(&xt_rateest_mutex); return 0; err2: kfree(est); err1: + mutex_unlock(&xt_rateest_mutex); return ret; } From c64534f82608a8f9d58d7c74d4a12122bdb2c3fe Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 30 Nov 2017 11:11:28 -0800 Subject: [PATCH 651/770] rds: tcp: correctly sequence cleanup on netns deletion. commit 681648e67d43cf269c5590ecf021ed481f4551fc upstream. Commit 8edc3affc077 ("rds: tcp: Take explicit refcounts on struct net") introduces a regression in rds-tcp netns cleanup. The cleanup_net(), (and thus rds_tcp_dev_event notification) is only called from put_net() when all netns refcounts go to 0, but this cannot happen if the rds_connection itself is holding a c_net ref that it expects to release in rds_tcp_kill_sock. Instead, the rds_tcp_kill_sock callback should make sure to tear down state carefully, ensuring that the socket teardown is only done after all data-structures and workqs that depend on it are quiesced. The original motivation for commit 8edc3affc077 ("rds: tcp: Take explicit refcounts on struct net") was to resolve a race condition reported by syzkaller where workqs for tx/rx/connect were triggered after the namespace was deleted. Those worker threads should have been cancelled/flushed before socket tear-down and indeed, rds_conn_path_destroy() does try to sequence this by doing /* cancel cp_send_w */ /* cancel cp_recv_w */ /* flush cp_down_w */ /* free data structures */ Here the "flush cp_down_w" will trigger rds_conn_shutdown and thus invoke rds_tcp_conn_path_shutdown() to close the tcp socket, so that we ought to have satisfied the requirement that "socket-close is done after all other dependent state is quiesced". However, rds_conn_shutdown has a bug in that it *always* triggers the reconnect workq (and if connection is successful, we always restart tx/rx workqs so with the right timing, we risk the race conditions reported by syzkaller). Netns deletion is like module teardown- no need to restart a reconnect in this case. We can use the c_destroy_in_prog bit to avoid restarting the reconnect. Fixes: 8edc3affc077 ("rds: tcp: Take explicit refcounts on struct net") Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/connection.c | 3 ++- net/rds/rds.h | 6 +++--- net/rds/tcp.c | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index 7ee2d5d68b78..9efc82c665b5 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -366,6 +366,8 @@ void rds_conn_shutdown(struct rds_conn_path *cp) * to the conn hash, so we never trigger a reconnect on this * conn - the reconnect is always triggered by the active peer. */ cancel_delayed_work_sync(&cp->cp_conn_w); + if (conn->c_destroy_in_prog) + return; rcu_read_lock(); if (!hlist_unhashed(&conn->c_hash_node)) { rcu_read_unlock(); @@ -445,7 +447,6 @@ void rds_conn_destroy(struct rds_connection *conn) */ rds_cong_remove_conn(conn); - put_net(conn->c_net); kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); diff --git a/net/rds/rds.h b/net/rds/rds.h index c349c71babff..d09f6c1facb4 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -150,7 +150,7 @@ struct rds_connection { /* Protocol version */ unsigned int c_version; - struct net *c_net; + possible_net_t c_net; struct list_head c_map_item; unsigned long c_map_queued; @@ -165,13 +165,13 @@ struct rds_connection { static inline struct net *rds_conn_net(struct rds_connection *conn) { - return conn->c_net; + return read_pnet(&conn->c_net); } static inline void rds_conn_net_set(struct rds_connection *conn, struct net *net) { - conn->c_net = get_net(net); + write_pnet(&conn->c_net, net); } #define RDS_FLAG_CONG_BITMAP 0x01 diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 6b7ee71f40c6..85bea7a76230 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -527,7 +527,7 @@ static void rds_tcp_kill_sock(struct net *net) rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = tc->t_cpath->cp_conn->c_net; + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; @@ -586,7 +586,7 @@ static void rds_tcp_sysctl_reset(struct net *net) spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = tc->t_cpath->cp_conn->c_net; + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; From 83ee02de2c8d958b598498a81323635f02bc4b1e Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 30 Nov 2017 11:11:29 -0800 Subject: [PATCH 652/770] rds: tcp: atomically purge entries from rds_tcp_conn_list during netns delete commit f10b4cff98c6977668434fbf5dd58695eeca2897 upstream. The rds_tcp_kill_sock() function parses the rds_tcp_conn_list to find the rds_connection entries marked for deletion as part of the netns deletion under the protection of the rds_tcp_conn_lock. Since the rds_tcp_conn_list tracks rds_tcp_connections (which have a 1:1 mapping with rds_conn_path), multiple tc entries in the rds_tcp_conn_list will map to a single rds_connection, and will be deleted as part of the rds_conn_destroy() operation that is done outside the rds_tcp_conn_lock. The rds_tcp_conn_list traversal done under the protection of rds_tcp_conn_lock should not leave any doomed tc entries in the list after the rds_tcp_conn_lock is released, else another concurrently executiong netns delete (for a differnt netns) thread may trip on these entries. Reported-by: syzbot Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/tcp.c | 9 +++++++-- net/rds/tcp.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 85bea7a76230..2a08bf75d008 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -306,7 +306,8 @@ static void rds_tcp_conn_free(void *arg) rdsdebug("freeing tc %p\n", tc); spin_lock_irqsave(&rds_tcp_conn_lock, flags); - list_del(&tc->t_tcp_node); + if (!tc->t_tcp_node_detached) + list_del(&tc->t_tcp_node); spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); kmem_cache_free(rds_tcp_conn_slab, tc); @@ -531,8 +532,12 @@ static void rds_tcp_kill_sock(struct net *net) if (net != c_net || !tc->t_sock) continue; - if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) + if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) { list_move_tail(&tc->t_tcp_node, &tmp_list); + } else { + list_del(&tc->t_tcp_node); + tc->t_tcp_node_detached = true; + } } spin_unlock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 1aafbf7c3011..e7858ee8ed8b 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -12,6 +12,7 @@ struct rds_tcp_incoming { struct rds_tcp_connection { struct list_head t_tcp_node; + bool t_tcp_node_detached; struct rds_conn_path *t_cpath; /* t_conn_path_lock synchronizes the connection establishment between * rds_tcp_accept_one and rds_tcp_conn_path_connect From 701ba0df95205187f1afc4f3f520bff57ba4001d Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 12 Dec 2017 11:39:04 -0500 Subject: [PATCH 653/770] net: avoid skb_warn_bad_offload on IS_ERR commit 8d74e9f88d65af8bb2e095aff506aa6eac755ada upstream. skb_warn_bad_offload warns when packets enter the GSO stack that require skb_checksum_help or vice versa. Do not warn on arbitrary bad packets. Packet sockets can craft many. Syzkaller was able to demonstrate another one with eth_type games. In particular, suppress the warning when segmentation returns an error, which is for reasons other than checksum offload. See also commit 36c92474498a ("net: WARN if skb_checksum_help() is called on skb requiring segmentation") for context on this warning. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index ffee085f0357..d33bbed640b1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2792,7 +2792,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, segs = skb_mac_gso_segment(skb, features); - if (unlikely(skb_needs_check(skb, tx_path))) + if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) skb_warn_bad_offload(skb); return segs; From 14b2ad0bb2fa7bbc5b5b90ec3b3b3e7a68aa10c2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 27 Jan 2018 10:58:43 -0800 Subject: [PATCH 654/770] net_sched: gen_estimator: fix lockdep splat commit 40ca54e3a686f13117f3de0c443f8026dadf7c44 upstream. syzbot reported a lockdep splat in gen_new_estimator() / est_fetch_counters() when attempting to lock est->stats_lock. Since est_fetch_counters() is called from BH context from timer interrupt, we need to block BH as well when calling it from process context. Most qdiscs use per cpu counters and are immune to the problem, but net/sched/act_api.c and net/netfilter/xt_RATEEST.c are using a spinlock to protect their data. They both call gen_new_estimator() while object is created and not yet alive, so this bug could not trigger a deadlock, only a lockdep splat. Fixes: 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/gen_estimator.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 7c1ffd6f9501..00ecec4891f3 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -159,7 +159,11 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, est->intvl_log = intvl_log; est->cpu_bstats = cpu_bstats; + if (stats_lock) + local_bh_disable(); est_fetch_counters(est, &b); + if (stats_lock) + local_bh_enable(); est->last_bytes = b.bytes; est->last_packets = b.packets; old = rcu_dereference_protected(*rate_est, 1); From 1f4f5777e5fa10f8d4c50aeece7d3eb2e9865d09 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 17:34:45 +0100 Subject: [PATCH 655/770] ASoC: ux500: add MODULE_LICENSE tag commit 1783c9d7cb7bc3181b9271665959b87280d98d8e upstream. This adds MODULE_LICENSE/AUTHOR/DESCRIPTION tags to the ux500 platform drivers, to avoid these build warnings: WARNING: modpost: missing MODULE_LICENSE() in sound/soc/ux500/snd-soc-ux500-plat-dma.o WARNING: modpost: missing MODULE_LICENSE() in sound/soc/ux500/snd-soc-ux500-mach-mop500.o The company no longer exists, so the email addresses of the authors don't work any more, but I've added them anyway for consistency. Signed-off-by: Arnd Bergmann Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/ux500/mop500.c | 4 ++++ sound/soc/ux500/ux500_pcm.c | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/sound/soc/ux500/mop500.c b/sound/soc/ux500/mop500.c index 070a6880980e..c60a57797640 100644 --- a/sound/soc/ux500/mop500.c +++ b/sound/soc/ux500/mop500.c @@ -163,3 +163,7 @@ static struct platform_driver snd_soc_mop500_driver = { }; module_platform_driver(snd_soc_mop500_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("ASoC MOP500 board driver"); +MODULE_AUTHOR("Ola Lilja"); diff --git a/sound/soc/ux500/ux500_pcm.c b/sound/soc/ux500/ux500_pcm.c index f12c01dddc8d..d35ba7700f46 100644 --- a/sound/soc/ux500/ux500_pcm.c +++ b/sound/soc/ux500/ux500_pcm.c @@ -165,3 +165,8 @@ int ux500_pcm_unregister_platform(struct platform_device *pdev) return 0; } EXPORT_SYMBOL_GPL(ux500_pcm_unregister_platform); + +MODULE_AUTHOR("Ola Lilja"); +MODULE_AUTHOR("Roger Nilsson"); +MODULE_DESCRIPTION("ASoC UX500 driver"); +MODULE_LICENSE("GPL v2"); From 52d4c1978cb49e7228c4d8c8bcddfdb734a51a3f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 15 Jan 2018 17:04:22 +0100 Subject: [PATCH 656/770] video: fbdev/mmp: add MODULE_LICENSE commit c1530ac5a3ce93a1f02adabc4508b5fbf862dfe2 upstream. Kbuild complains about the lack of a license tag in this driver: WARNING: modpost: missing MODULE_LICENSE() in drivers/video/fbdev/mmp/mmp_disp.o This adds the license, author and description tags. Signed-off-by: Arnd Bergmann Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/mmp/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/video/fbdev/mmp/core.c b/drivers/video/fbdev/mmp/core.c index a0f496049db7..3a6bb6561ba0 100644 --- a/drivers/video/fbdev/mmp/core.c +++ b/drivers/video/fbdev/mmp/core.c @@ -23,6 +23,7 @@ #include #include #include +#include #include