From 2c5abe0f8e9c88339d138d7b6dcd7bdf1bf4352e Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Tue, 9 Feb 2021 16:31:04 +0300 Subject: [PATCH 01/61] gpio: ep93xx: fix BUG_ON port F usage commit 8b81a7ab8055d01d827ef66374b126eeac3bd108 upstream. Two index spaces and ep93xx_gpio_port are confusing. Instead add a separate struct to store necessary data and remove ep93xx_gpio_port. - add struct to store IRQ related data for each IRQ capable chip - replace offset array with defined offsets - add IRQ registers offset for each IRQ capable chip into ep93xx_gpio_banks ------------[ cut here ]------------ kernel BUG at drivers/gpio/gpio-ep93xx.c:64! ---[ end trace 3f6544e133e9f5ae ]--- Fixes: fd935fc421e74 ("gpio: ep93xx: Do not pingpong irq numbers") Cc: Reviewed-by: Alexander Sverdlin Tested-by: Alexander Sverdlin Signed-off-by: Nikita Shubin Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-ep93xx.c | 186 ++++++++++++++++++++----------------- 1 file changed, 99 insertions(+), 87 deletions(-) diff --git a/drivers/gpio/gpio-ep93xx.c b/drivers/gpio/gpio-ep93xx.c index 226da8df6f10..64d6c2b4282e 100644 --- a/drivers/gpio/gpio-ep93xx.c +++ b/drivers/gpio/gpio-ep93xx.c @@ -25,6 +25,9 @@ /* Maximum value for gpio line identifiers */ #define EP93XX_GPIO_LINE_MAX 63 +/* Number of GPIO chips in EP93XX */ +#define EP93XX_GPIO_CHIP_NUM 8 + /* Maximum value for irq capable line identifiers */ #define EP93XX_GPIO_LINE_MAX_IRQ 23 @@ -34,74 +37,74 @@ */ #define EP93XX_GPIO_F_IRQ_BASE 80 +struct ep93xx_gpio_irq_chip { + u8 irq_offset; + u8 int_unmasked; + u8 int_enabled; + u8 int_type1; + u8 int_type2; + u8 int_debounce; +}; + +struct ep93xx_gpio_chip { + struct gpio_chip gc; + struct ep93xx_gpio_irq_chip *eic; +}; + struct ep93xx_gpio { void __iomem *base; - struct gpio_chip gc[8]; + struct ep93xx_gpio_chip gc[EP93XX_GPIO_CHIP_NUM]; }; +#define to_ep93xx_gpio_chip(x) container_of(x, struct ep93xx_gpio_chip, gc) + +static struct ep93xx_gpio_irq_chip *to_ep93xx_gpio_irq_chip(struct gpio_chip *gc) +{ + struct ep93xx_gpio_chip *egc = to_ep93xx_gpio_chip(gc); + + return egc->eic; +} + /************************************************************************* * Interrupt handling for EP93xx on-chip GPIOs *************************************************************************/ -static unsigned char gpio_int_unmasked[3]; -static unsigned char gpio_int_enabled[3]; -static unsigned char gpio_int_type1[3]; -static unsigned char gpio_int_type2[3]; -static unsigned char gpio_int_debounce[3]; +#define EP93XX_INT_TYPE1_OFFSET 0x00 +#define EP93XX_INT_TYPE2_OFFSET 0x04 +#define EP93XX_INT_EOI_OFFSET 0x08 +#define EP93XX_INT_EN_OFFSET 0x0c +#define EP93XX_INT_STATUS_OFFSET 0x10 +#define EP93XX_INT_RAW_STATUS_OFFSET 0x14 +#define EP93XX_INT_DEBOUNCE_OFFSET 0x18 -/* Port ordering is: A B F */ -static const u8 int_type1_register_offset[3] = { 0x90, 0xac, 0x4c }; -static const u8 int_type2_register_offset[3] = { 0x94, 0xb0, 0x50 }; -static const u8 eoi_register_offset[3] = { 0x98, 0xb4, 0x54 }; -static const u8 int_en_register_offset[3] = { 0x9c, 0xb8, 0x58 }; -static const u8 int_debounce_register_offset[3] = { 0xa8, 0xc4, 0x64 }; - -static void ep93xx_gpio_update_int_params(struct ep93xx_gpio *epg, unsigned port) +static void ep93xx_gpio_update_int_params(struct ep93xx_gpio *epg, + struct ep93xx_gpio_irq_chip *eic) { - BUG_ON(port > 2); + writeb_relaxed(0, epg->base + eic->irq_offset + EP93XX_INT_EN_OFFSET); - writeb_relaxed(0, epg->base + int_en_register_offset[port]); + writeb_relaxed(eic->int_type2, + epg->base + eic->irq_offset + EP93XX_INT_TYPE2_OFFSET); - writeb_relaxed(gpio_int_type2[port], - epg->base + int_type2_register_offset[port]); + writeb_relaxed(eic->int_type1, + epg->base + eic->irq_offset + EP93XX_INT_TYPE1_OFFSET); - writeb_relaxed(gpio_int_type1[port], - epg->base + int_type1_register_offset[port]); - - writeb(gpio_int_unmasked[port] & gpio_int_enabled[port], - epg->base + int_en_register_offset[port]); -} - -static int ep93xx_gpio_port(struct gpio_chip *gc) -{ - struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port = 0; - - while (port < ARRAY_SIZE(epg->gc) && gc != &epg->gc[port]) - port++; - - /* This should not happen but is there as a last safeguard */ - if (port == ARRAY_SIZE(epg->gc)) { - pr_crit("can't find the GPIO port\n"); - return 0; - } - - return port; + writeb_relaxed(eic->int_unmasked & eic->int_enabled, + epg->base + eic->irq_offset + EP93XX_INT_EN_OFFSET); } static void ep93xx_gpio_int_debounce(struct gpio_chip *gc, unsigned int offset, bool enable) { struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port = ep93xx_gpio_port(gc); + struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); int port_mask = BIT(offset); if (enable) - gpio_int_debounce[port] |= port_mask; + eic->int_debounce |= port_mask; else - gpio_int_debounce[port] &= ~port_mask; + eic->int_debounce &= ~port_mask; - writeb(gpio_int_debounce[port], - epg->base + int_debounce_register_offset[port]); + writeb(eic->int_debounce, + epg->base + eic->irq_offset + EP93XX_INT_DEBOUNCE_OFFSET); } static void ep93xx_gpio_ab_irq_handler(struct irq_desc *desc) @@ -122,12 +125,12 @@ static void ep93xx_gpio_ab_irq_handler(struct irq_desc *desc) */ stat = readb(epg->base + EP93XX_GPIO_A_INT_STATUS); for_each_set_bit(offset, &stat, 8) - generic_handle_irq(irq_find_mapping(epg->gc[0].irq.domain, + generic_handle_irq(irq_find_mapping(epg->gc[0].gc.irq.domain, offset)); stat = readb(epg->base + EP93XX_GPIO_B_INT_STATUS); for_each_set_bit(offset, &stat, 8) - generic_handle_irq(irq_find_mapping(epg->gc[1].irq.domain, + generic_handle_irq(irq_find_mapping(epg->gc[1].gc.irq.domain, offset)); chained_irq_exit(irqchip, desc); @@ -153,52 +156,52 @@ static void ep93xx_gpio_f_irq_handler(struct irq_desc *desc) static void ep93xx_gpio_irq_ack(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port = ep93xx_gpio_port(gc); int port_mask = BIT(d->irq & 7); if (irqd_get_trigger_type(d) == IRQ_TYPE_EDGE_BOTH) { - gpio_int_type2[port] ^= port_mask; /* switch edge direction */ - ep93xx_gpio_update_int_params(epg, port); + eic->int_type2 ^= port_mask; /* switch edge direction */ + ep93xx_gpio_update_int_params(epg, eic); } - writeb(port_mask, epg->base + eoi_register_offset[port]); + writeb(port_mask, epg->base + eic->irq_offset + EP93XX_INT_EOI_OFFSET); } static void ep93xx_gpio_irq_mask_ack(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port = ep93xx_gpio_port(gc); int port_mask = BIT(d->irq & 7); if (irqd_get_trigger_type(d) == IRQ_TYPE_EDGE_BOTH) - gpio_int_type2[port] ^= port_mask; /* switch edge direction */ + eic->int_type2 ^= port_mask; /* switch edge direction */ - gpio_int_unmasked[port] &= ~port_mask; - ep93xx_gpio_update_int_params(epg, port); + eic->int_unmasked &= ~port_mask; + ep93xx_gpio_update_int_params(epg, eic); - writeb(port_mask, epg->base + eoi_register_offset[port]); + writeb(port_mask, epg->base + eic->irq_offset + EP93XX_INT_EOI_OFFSET); } static void ep93xx_gpio_irq_mask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port = ep93xx_gpio_port(gc); - gpio_int_unmasked[port] &= ~BIT(d->irq & 7); - ep93xx_gpio_update_int_params(epg, port); + eic->int_unmasked &= ~BIT(d->irq & 7); + ep93xx_gpio_update_int_params(epg, eic); } static void ep93xx_gpio_irq_unmask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port = ep93xx_gpio_port(gc); - gpio_int_unmasked[port] |= BIT(d->irq & 7); - ep93xx_gpio_update_int_params(epg, port); + eic->int_unmasked |= BIT(d->irq & 7); + ep93xx_gpio_update_int_params(epg, eic); } /* @@ -209,8 +212,8 @@ static void ep93xx_gpio_irq_unmask(struct irq_data *d) static int ep93xx_gpio_irq_type(struct irq_data *d, unsigned int type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port = ep93xx_gpio_port(gc); int offset = d->irq & 7; int port_mask = BIT(offset); irq_flow_handler_t handler; @@ -219,32 +222,32 @@ static int ep93xx_gpio_irq_type(struct irq_data *d, unsigned int type) switch (type) { case IRQ_TYPE_EDGE_RISING: - gpio_int_type1[port] |= port_mask; - gpio_int_type2[port] |= port_mask; + eic->int_type1 |= port_mask; + eic->int_type2 |= port_mask; handler = handle_edge_irq; break; case IRQ_TYPE_EDGE_FALLING: - gpio_int_type1[port] |= port_mask; - gpio_int_type2[port] &= ~port_mask; + eic->int_type1 |= port_mask; + eic->int_type2 &= ~port_mask; handler = handle_edge_irq; break; case IRQ_TYPE_LEVEL_HIGH: - gpio_int_type1[port] &= ~port_mask; - gpio_int_type2[port] |= port_mask; + eic->int_type1 &= ~port_mask; + eic->int_type2 |= port_mask; handler = handle_level_irq; break; case IRQ_TYPE_LEVEL_LOW: - gpio_int_type1[port] &= ~port_mask; - gpio_int_type2[port] &= ~port_mask; + eic->int_type1 &= ~port_mask; + eic->int_type2 &= ~port_mask; handler = handle_level_irq; break; case IRQ_TYPE_EDGE_BOTH: - gpio_int_type1[port] |= port_mask; + eic->int_type1 |= port_mask; /* set initial polarity based on current input level */ if (gc->get(gc, offset)) - gpio_int_type2[port] &= ~port_mask; /* falling */ + eic->int_type2 &= ~port_mask; /* falling */ else - gpio_int_type2[port] |= port_mask; /* rising */ + eic->int_type2 |= port_mask; /* rising */ handler = handle_edge_irq; break; default: @@ -253,9 +256,9 @@ static int ep93xx_gpio_irq_type(struct irq_data *d, unsigned int type) irq_set_handler_locked(d, handler); - gpio_int_enabled[port] |= port_mask; + eic->int_enabled |= port_mask; - ep93xx_gpio_update_int_params(epg, port); + ep93xx_gpio_update_int_params(epg, eic); return 0; } @@ -276,17 +279,19 @@ struct ep93xx_gpio_bank { const char *label; int data; int dir; + int irq; int base; bool has_irq; bool has_hierarchical_irq; unsigned int irq_base; }; -#define EP93XX_GPIO_BANK(_label, _data, _dir, _base, _has_irq, _has_hier, _irq_base) \ +#define EP93XX_GPIO_BANK(_label, _data, _dir, _irq, _base, _has_irq, _has_hier, _irq_base) \ { \ .label = _label, \ .data = _data, \ .dir = _dir, \ + .irq = _irq, \ .base = _base, \ .has_irq = _has_irq, \ .has_hierarchical_irq = _has_hier, \ @@ -295,16 +300,16 @@ struct ep93xx_gpio_bank { static struct ep93xx_gpio_bank ep93xx_gpio_banks[] = { /* Bank A has 8 IRQs */ - EP93XX_GPIO_BANK("A", 0x00, 0x10, 0, true, false, 64), + EP93XX_GPIO_BANK("A", 0x00, 0x10, 0x90, 0, true, false, 64), /* Bank B has 8 IRQs */ - EP93XX_GPIO_BANK("B", 0x04, 0x14, 8, true, false, 72), - EP93XX_GPIO_BANK("C", 0x08, 0x18, 40, false, false, 0), - EP93XX_GPIO_BANK("D", 0x0c, 0x1c, 24, false, false, 0), - EP93XX_GPIO_BANK("E", 0x20, 0x24, 32, false, false, 0), + EP93XX_GPIO_BANK("B", 0x04, 0x14, 0xac, 8, true, false, 72), + EP93XX_GPIO_BANK("C", 0x08, 0x18, 0x00, 40, false, false, 0), + EP93XX_GPIO_BANK("D", 0x0c, 0x1c, 0x00, 24, false, false, 0), + EP93XX_GPIO_BANK("E", 0x20, 0x24, 0x00, 32, false, false, 0), /* Bank F has 8 IRQs */ - EP93XX_GPIO_BANK("F", 0x30, 0x34, 16, false, true, 0), - EP93XX_GPIO_BANK("G", 0x38, 0x3c, 48, false, false, 0), - EP93XX_GPIO_BANK("H", 0x40, 0x44, 56, false, false, 0), + EP93XX_GPIO_BANK("F", 0x30, 0x34, 0x4c, 16, false, true, 0), + EP93XX_GPIO_BANK("G", 0x38, 0x3c, 0x00, 48, false, false, 0), + EP93XX_GPIO_BANK("H", 0x40, 0x44, 0x00, 56, false, false, 0), }; static int ep93xx_gpio_set_config(struct gpio_chip *gc, unsigned offset, @@ -326,13 +331,14 @@ static int ep93xx_gpio_f_to_irq(struct gpio_chip *gc, unsigned offset) return EP93XX_GPIO_F_IRQ_BASE + offset; } -static int ep93xx_gpio_add_bank(struct gpio_chip *gc, +static int ep93xx_gpio_add_bank(struct ep93xx_gpio_chip *egc, struct platform_device *pdev, struct ep93xx_gpio *epg, struct ep93xx_gpio_bank *bank) { void __iomem *data = epg->base + bank->data; void __iomem *dir = epg->base + bank->dir; + struct gpio_chip *gc = &egc->gc; struct device *dev = &pdev->dev; struct gpio_irq_chip *girq; int err; @@ -347,6 +353,12 @@ static int ep93xx_gpio_add_bank(struct gpio_chip *gc, girq = &gc->irq; if (bank->has_irq || bank->has_hierarchical_irq) { gc->set_config = ep93xx_gpio_set_config; + egc->eic = devm_kcalloc(dev, 1, + sizeof(*egc->eic), + GFP_KERNEL); + if (!egc->eic) + return -ENOMEM; + egc->eic->irq_offset = bank->irq; girq->chip = &ep93xx_gpio_irq_chip; } @@ -415,7 +427,7 @@ static int ep93xx_gpio_probe(struct platform_device *pdev) return PTR_ERR(epg->base); for (i = 0; i < ARRAY_SIZE(ep93xx_gpio_banks); i++) { - struct gpio_chip *gc = &epg->gc[i]; + struct ep93xx_gpio_chip *gc = &epg->gc[i]; struct ep93xx_gpio_bank *bank = &ep93xx_gpio_banks[i]; if (ep93xx_gpio_add_bank(gc, pdev, epg, bank)) From 4851d7b340e90953cd778681d7b7f3d650d6a882 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Tue, 9 Feb 2021 16:31:05 +0300 Subject: [PATCH 02/61] gpio: ep93xx: Fix single irqchip with multi gpiochips commit 28dc10eb77a2db7681b08e3b109764bbe469e347 upstream. Fixes the following warnings which results in interrupts disabled on port B/F: gpio gpiochip1: (B): detected irqchip that is shared with multiple gpiochips: please fix the driver. gpio gpiochip5: (F): detected irqchip that is shared with multiple gpiochips: please fix the driver. - added separate irqchip for each interrupt capable gpiochip - provided unique names for each irqchip Fixes: d2b091961510 ("gpio: ep93xx: Pass irqchip when adding gpiochip") Cc: Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-ep93xx.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/gpio/gpio-ep93xx.c b/drivers/gpio/gpio-ep93xx.c index 64d6c2b4282e..94d9fa0d6aa7 100644 --- a/drivers/gpio/gpio-ep93xx.c +++ b/drivers/gpio/gpio-ep93xx.c @@ -38,6 +38,7 @@ #define EP93XX_GPIO_F_IRQ_BASE 80 struct ep93xx_gpio_irq_chip { + struct irq_chip ic; u8 irq_offset; u8 int_unmasked; u8 int_enabled; @@ -263,15 +264,6 @@ static int ep93xx_gpio_irq_type(struct irq_data *d, unsigned int type) return 0; } -static struct irq_chip ep93xx_gpio_irq_chip = { - .name = "GPIO", - .irq_ack = ep93xx_gpio_irq_ack, - .irq_mask_ack = ep93xx_gpio_irq_mask_ack, - .irq_mask = ep93xx_gpio_irq_mask, - .irq_unmask = ep93xx_gpio_irq_unmask, - .irq_set_type = ep93xx_gpio_irq_type, -}; - /************************************************************************* * gpiolib interface for EP93xx on-chip GPIOs *************************************************************************/ @@ -331,6 +323,15 @@ static int ep93xx_gpio_f_to_irq(struct gpio_chip *gc, unsigned offset) return EP93XX_GPIO_F_IRQ_BASE + offset; } +static void ep93xx_init_irq_chip(struct device *dev, struct irq_chip *ic) +{ + ic->irq_ack = ep93xx_gpio_irq_ack; + ic->irq_mask_ack = ep93xx_gpio_irq_mask_ack; + ic->irq_mask = ep93xx_gpio_irq_mask; + ic->irq_unmask = ep93xx_gpio_irq_unmask; + ic->irq_set_type = ep93xx_gpio_irq_type; +} + static int ep93xx_gpio_add_bank(struct ep93xx_gpio_chip *egc, struct platform_device *pdev, struct ep93xx_gpio *epg, @@ -352,6 +353,8 @@ static int ep93xx_gpio_add_bank(struct ep93xx_gpio_chip *egc, girq = &gc->irq; if (bank->has_irq || bank->has_hierarchical_irq) { + struct irq_chip *ic; + gc->set_config = ep93xx_gpio_set_config; egc->eic = devm_kcalloc(dev, 1, sizeof(*egc->eic), @@ -359,7 +362,12 @@ static int ep93xx_gpio_add_bank(struct ep93xx_gpio_chip *egc, if (!egc->eic) return -ENOMEM; egc->eic->irq_offset = bank->irq; - girq->chip = &ep93xx_gpio_irq_chip; + ic = &egc->eic->ic; + ic->name = devm_kasprintf(dev, GFP_KERNEL, "gpio-irq-%s", bank->label); + if (!ic->name) + return -ENOMEM; + ep93xx_init_irq_chip(dev, ic); + girq->chip = ic; } if (bank->has_irq) { @@ -401,7 +409,7 @@ static int ep93xx_gpio_add_bank(struct ep93xx_gpio_chip *egc, gpio_irq = EP93XX_GPIO_F_IRQ_BASE + i; irq_set_chip_data(gpio_irq, &epg->gc[5]); irq_set_chip_and_handler(gpio_irq, - &ep93xx_gpio_irq_chip, + girq->chip, handle_level_irq); irq_clear_status_flags(gpio_irq, IRQ_NOREQUEST); } From 74c7bafdd303c1b6efc0d53800654b3b89a2d34c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 5 Feb 2021 15:40:04 -0500 Subject: [PATCH 03/61] tracing: Do not count ftrace events in top level enable output commit 256cfdd6fdf70c6fcf0f7c8ddb0ebd73ce8f3bc9 upstream. The file /sys/kernel/tracing/events/enable is used to enable all events by echoing in "1", or disabling all events when echoing in "0". To know if all events are enabled, disabled, or some are enabled but not all of them, cating the file should show either "1" (all enabled), "0" (all disabled), or "X" (some enabled but not all of them). This works the same as the "enable" files in the individule system directories (like tracing/events/sched/enable). But when all events are enabled, the top level "enable" file shows "X". The reason is that its checking the "ftrace" events, which are special events that only exist for their format files. These include the format for the function tracer events, that are enabled when the function tracer is enabled, but not by the "enable" file. The check includes these events, which will always be disabled, and even though all true events are enabled, the top level "enable" file will show "X" instead of "1". To fix this, have the check test the event's flags to see if it has the "IGNORE_ENABLE" flag set, and if so, not test it. Cc: stable@vger.kernel.org Fixes: 553552ce1796c ("tracing: Combine event filter_active and enable into single flags field") Reported-by: "Yordan Karadzhov (VMware)" Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 309b2b3c5349..e31ee325dad1 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1107,7 +1107,8 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; - if (!trace_event_name(call) || !call->class || !call->class->reg) + if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || + !trace_event_name(call) || !call->class || !call->class->reg) continue; if (system && strcmp(call->class->system, system->name) != 0) From a0997a86f5c0085e183ddee5fb72091d584d3d16 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 10 Feb 2021 11:53:22 -0500 Subject: [PATCH 04/61] tracing: Check length before giving out the filter buffer commit b220c049d5196dd94d992dd2dc8cba1a5e6123bf upstream. When filters are used by trace events, a page is allocated on each CPU and used to copy the trace event fields to this page before writing to the ring buffer. The reason to use the filter and not write directly into the ring buffer is because a filter may discard the event and there's more overhead on discarding from the ring buffer than the extra copy. The problem here is that there is no check against the size being allocated when using this page. If an event asks for more than a page size while being filtered, it will get only a page, leading to the caller writing more that what was allocated. Check the length of the request, and if it is more than PAGE_SIZE minus the header default back to allocating from the ring buffer directly. The ring buffer may reject the event if its too big anyway, but it wont overflow. Link: https://lore.kernel.org/ath10k/1612839593-2308-1-git-send-email-wgong@codeaurora.org/ Cc: stable@vger.kernel.org Fixes: 0fc1b09ff1ff4 ("tracing: Use temp buffer when filtering events") Reported-by: Wen Gong Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 67af28f03cf4..1a75610f5f57 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2498,7 +2498,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, (entry = this_cpu_read(trace_buffered_event))) { /* Try to use the per cpu buffer first */ val = this_cpu_inc_return(trace_buffered_event_cnt); - if (val == 1) { + if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) { trace_event_setup(entry, type, flags, pc); entry->array[0] = len; return entry; From 18d691d837b340c941ad778bc8dfa77446e48bb4 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Wed, 10 Feb 2021 17:06:54 +0000 Subject: [PATCH 05/61] arm/xen: Don't probe xenbus as part of an early initcall commit c4295ab0b485b8bc50d2264bcae2acd06f25caaf upstream. After Commit 3499ba8198cad ("xen: Fix event channel callback via INTX/GSI"), xenbus_probe() will be called too early on Arm. This will recent to a guest hang during boot. If the hang wasn't there, we would have ended up to call xenbus_probe() twice (the second time is in xenbus_probe_initcall()). We don't need to initialize xenbus_probe() early for Arm guest. Therefore, the call in xen_guest_init() is now removed. After this change, there is no more external caller for xenbus_probe(). So the function is turned to a static one. Interestingly there were two prototypes for it. Cc: stable@vger.kernel.org Fixes: 3499ba8198cad ("xen: Fix event channel callback via INTX/GSI") Reported-by: Ian Jackson Signed-off-by: Julien Grall Reviewed-by: David Woodhouse Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/20210210170654.5377-1-julien@xen.org Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/arm/xen/enlighten.c | 2 -- drivers/xen/xenbus/xenbus.h | 1 - drivers/xen/xenbus/xenbus_probe.c | 2 +- include/xen/xenbus.h | 2 -- 4 files changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index f45bff158fc2..57dfc13b2752 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -370,8 +370,6 @@ static int __init xen_guest_init(void) return -ENOMEM; } gnttab_init(); - if (!xen_initial_domain()) - xenbus_probe(); /* * Making sure board specific code will not set up ops for diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index a9bb5f91082d..88516a8a9f93 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -115,7 +115,6 @@ int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename); int xenbus_probe_devices(struct xen_bus_type *bus); -void xenbus_probe(void); void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 786494bb7f20..652894d61967 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -683,7 +683,7 @@ void unregister_xenstore_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); -void xenbus_probe(void) +static void xenbus_probe(void) { xenstored_ready = 1; diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index fe9a9fa2ebc4..14d47ed4114f 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -187,8 +187,6 @@ void xs_suspend_cancel(void); struct work_struct; -void xenbus_probe(void); - #define XENBUS_IS_ERR_READ(str) ({ \ if (!IS_ERR(str) && strlen(str) == 0) { \ kfree(str); \ From 2407794f2298ee83b38be3aae71867d6d90e87ae Mon Sep 17 00:00:00 2001 From: Odin Ugedal Date: Sat, 16 Jan 2021 18:36:33 +0100 Subject: [PATCH 06/61] cgroup: fix psi monitor for root cgroup commit 385aac1519417b89cb91b77c22e4ca21db563cd0 upstream. Fix NULL pointer dereference when adding new psi monitor to the root cgroup. PSI files for root cgroup was introduced in df5ba5be742 by using system wide psi struct when reading, but file write/monitor was not properly fixed. Since the PSI config for the root cgroup isn't initialized, the current implementation tries to lock a NULL ptr, resulting in a crash. Can be triggered by running this as root: $ tee /sys/fs/cgroup/cpu.pressure <<< "some 10000 1000000" Signed-off-by: Odin Ugedal Reviewed-by: Suren Baghdasaryan Acked-by: Dan Schatzberg Fixes: df5ba5be7425 ("kernel/sched/psi.c: expose pressure metrics on root cgroup") Acked-by: Johannes Weiner Cc: stable@vger.kernel.org # 5.2+ Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup/cgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 35faf082a709..37db8eba149a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3627,6 +3627,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, { struct psi_trigger *new; struct cgroup *cgrp; + struct psi_group *psi; cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) @@ -3635,7 +3636,8 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, cgroup_get(cgrp); cgroup_kn_unlock(of->kn); - new = psi_trigger_create(&cgrp->psi, buf, nbytes, res); + psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; + new = psi_trigger_create(psi, buf, nbytes, res); if (IS_ERR(new)) { cgroup_put(cgrp); return PTR_ERR(new); From 92c40ed0abcb75f9323d1cd512cf8599674893b6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 15 Aug 2020 13:51:12 +0100 Subject: [PATCH 07/61] arm64: dts: rockchip: Fix PCIe DT properties on rk3399 [ Upstream commit 43f20b1c6140896916f4e91aacc166830a7ba849 ] It recently became apparent that the lack of a 'device_type = "pci"' in the PCIe root complex node for rk3399 is a violation of the PCI binding, as documented in IEEE Std 1275-1994. Changes to the kernel's parsing of the DT made such violation fatal, as drivers cannot probe the controller anymore. Add the missing property makes the PCIe node compliant. While we are at it, drop the pointless linux,pci-domain property, which only makes sense when there are multiple host bridges. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200815125112.462652-3-maz@kernel.org Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index bb7d0aac6b9d..9d6ed8cda2c8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -232,6 +232,7 @@ reg = <0x0 0xf8000000 0x0 0x2000000>, <0x0 0xfd000000 0x0 0x1000000>; reg-names = "axi-base", "apb-base"; + device_type = "pci"; #address-cells = <3>; #size-cells = <2>; #interrupt-cells = <1>; @@ -250,7 +251,6 @@ <0 0 0 2 &pcie0_intc 1>, <0 0 0 3 &pcie0_intc 2>, <0 0 0 4 &pcie0_intc 3>; - linux,pci-domain = <0>; max-link-speed = <1>; msi-map = <0x0 &its 0x0 0x1000>; phys = <&pcie_phy 0>, <&pcie_phy 1>, From 2cb208ba0fd1c3afb8484e42d2aa79bdd844592a Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 21 Dec 2020 16:11:03 -0800 Subject: [PATCH 08/61] arm64: dts: qcom: sdm845: Reserve LPASS clocks in gcc [ Upstream commit 93f2a11580a9732c1d90f9e01a7e9facc825658f ] The GCC_LPASS_Q6_AXI_CLK and GCC_LPASS_SWAY_CLK clocks may not be touched on a typical UEFI based SDM845 device, but when the kernel is built with CONFIG_SDM_LPASSCC_845 this happens, unless they are marked as protected-clocks in the DT. This was done for the MTP and the Pocophone, but not for DB845c and the Lenovo Yoga C630 - causing these to fail to boot if the LPASS clock controller is enabled (which it typically isn't). Tested-by: Vinod Koul #on db845c Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/20201222001103.3112306-1-bjorn.andersson@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm845-db845c.dts | 4 +++- arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts index 751651a6cd81..bf4fde88011c 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts @@ -337,7 +337,9 @@ &gcc { protected-clocks = , , - ; + , + , + ; }; &pm8998_gpio { diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts index e638f216dbfb..840d6b9bbb59 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts @@ -232,7 +232,9 @@ &gcc { protected-clocks = , , - ; + , + , + ; }; &i2c1 { From 2bb22cd4dae4665f18ebd6df4e652cb03957bbdc Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 10 Jan 2021 21:09:11 +0200 Subject: [PATCH 09/61] ARM: OMAP2+: Fix suspcious RCU usage splats for omap_enter_idle_coupled [ Upstream commit 06862d789ddde8a99c1e579e934ca17c15a84755 ] We get suspcious RCU usage splats with cpuidle in several places in omap_enter_idle_coupled() with the kernel debug options enabled: RCU used illegally from extended quiescent state! ... (_raw_spin_lock_irqsave) (omap_enter_idle_coupled+0x17c/0x2d8) (omap_enter_idle_coupled) (cpuidle_enter_state) (cpuidle_enter_state_coupled) (cpuidle_enter) Let's use RCU_NONIDLE to suppress these splats. Things got changed around with commit 1098582a0f6c ("sched,idle,rcu: Push rcu_idle deeper into the idle path") that started triggering these warnings. For the tick_broadcast related calls, ideally we'd just switch over to using CPUIDLE_FLAG_TIMER_STOP for omap_enter_idle_coupled() to have the generic cpuidle code handle the tick_broadcast related calls for us and then just drop the tick_broadcast calls here. But we're currently missing the call in the common cpuidle code for tick_broadcast_enable() that CPU1 hotplug needs as described in earlier commit 50d6b3cf9403 ("ARM: OMAP2+: fix lack of timer interrupts on CPU1 after hotplug"). Cc: Daniel Lezcano Cc: Paul E. McKenney Cc: Russell King Acked-by: Paul E. McKenney Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/cpuidle44xx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index c8d317fafe2e..de37027ad758 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -151,10 +151,10 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, (cx->mpu_logic_state == PWRDM_POWER_OFF); /* Enter broadcast mode for periodic timers */ - tick_broadcast_enable(); + RCU_NONIDLE(tick_broadcast_enable()); /* Enter broadcast mode for one-shot timers */ - tick_broadcast_enter(); + RCU_NONIDLE(tick_broadcast_enter()); /* * Call idle CPU PM enter notifier chain so that @@ -166,7 +166,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, if (dev->cpu == 0) { pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state); - omap_set_pwrdm_state(mpu_pd, cx->mpu_state); + RCU_NONIDLE(omap_set_pwrdm_state(mpu_pd, cx->mpu_state)); /* * Call idle CPU cluster PM enter notifier chain @@ -178,7 +178,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, index = 0; cx = state_ptr + index; pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state); - omap_set_pwrdm_state(mpu_pd, cx->mpu_state); + RCU_NONIDLE(omap_set_pwrdm_state(mpu_pd, cx->mpu_state)); mpuss_can_lose_context = 0; } } @@ -194,9 +194,9 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, mpuss_can_lose_context) gic_dist_disable(); - clkdm_deny_idle(cpu_clkdm[1]); - omap_set_pwrdm_state(cpu_pd[1], PWRDM_POWER_ON); - clkdm_allow_idle(cpu_clkdm[1]); + RCU_NONIDLE(clkdm_deny_idle(cpu_clkdm[1])); + RCU_NONIDLE(omap_set_pwrdm_state(cpu_pd[1], PWRDM_POWER_ON)); + RCU_NONIDLE(clkdm_allow_idle(cpu_clkdm[1])); if (IS_PM44XX_ERRATUM(PM_OMAP4_ROM_SMP_BOOT_ERRATUM_GICD) && mpuss_can_lose_context) { @@ -222,7 +222,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, cpu_pm_exit(); cpu_pm_out: - tick_broadcast_exit(); + RCU_NONIDLE(tick_broadcast_exit()); fail: cpuidle_coupled_parallel_barrier(dev, &abort_barrier); From cc512646b2e73b38ae8fdb6103524f3f73e4ca1a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 20 Jan 2021 13:49:41 +0100 Subject: [PATCH 10/61] platform/x86: hp-wmi: Disable tablet-mode reporting by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 67fbe02a5cebc3c653610f12e3c0424e58450153 ] Recently userspace has started making more use of SW_TABLET_MODE (when an input-dev reports this). Specifically recent GNOME3 versions will: 1. When SW_TABLET_MODE is reported and is reporting 0: 1.1 Disable accelerometer-based screen auto-rotation 1.2 Disable automatically showing the on-screen keyboard when a text-input field is focussed 2. When SW_TABLET_MODE is reported and is reporting 1: 2.1 Ignore input-events from the builtin keyboard and touchpad (this is for 360° hinges style 2-in-1s where the keyboard and touchpads are accessible on the back of the tablet when folded into tablet-mode) This means that claiming to support SW_TABLET_MODE when it does not actually work / reports correct values has bad side-effects. The check in the hp-wmi code which is used to decide if the input-dev should claim SW_TABLET_MODE support, only checks if the HPWMI_HARDWARE_QUERY is supported. It does *not* check if the hardware actually is capable of reporting SW_TABLET_MODE. This leads to the hp-wmi input-dev claiming SW_TABLET_MODE support, while in reality it will always report 0 as SW_TABLET_MODE value. This has been seen on a "HP ENVY x360 Convertible 15-cp0xxx" and this likely is the case on a whole lot of other HP models. This problem causes both auto-rotation and on-screen keyboard support to not work on affected x360 models. There is no easy fix for this, but since userspace expects SW_TABLET_MODE reporting to be reliable when advertised it is better to not claim/report SW_TABLET_MODE support at all, then to claim to support it while it does not work. To avoid the mentioned problems, add a new enable_tablet_mode_sw module-parameter which defaults to false. Note I've made this an int using the standard -1=auto, 0=off, 1=on triplett, with the hope that in the future we can come up with a better way to detect SW_TABLET_MODE support. ATM the default auto option just does the same as off. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1918255 Cc: Stefan Brüns Signed-off-by: Hans de Goede Acked-by: Mark Gross Link: https://lore.kernel.org/r/20210120124941.73409-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/hp-wmi.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index a44a2ec33287..63a530a3d9fe 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -32,6 +32,10 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("wmi:95F24279-4D7B-4334-9387-ACCDC67EF61C"); MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4"); +static int enable_tablet_mode_sw = -1; +module_param(enable_tablet_mode_sw, int, 0444); +MODULE_PARM_DESC(enable_tablet_mode_sw, "Enable SW_TABLET_MODE reporting (-1=auto, 0=no, 1=yes)"); + #define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C" #define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4" @@ -654,10 +658,12 @@ static int __init hp_wmi_input_setup(void) } /* Tablet mode */ - val = hp_wmi_hw_state(HPWMI_TABLET_MASK); - if (!(val < 0)) { - __set_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit); - input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, val); + if (enable_tablet_mode_sw > 0) { + val = hp_wmi_hw_state(HPWMI_TABLET_MASK); + if (val >= 0) { + __set_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit); + input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, val); + } } err = sparse_keymap_setup(hp_wmi_input_dev, hp_wmi_keymap, NULL); From df094aa0aab02bfd1baa130ee06114efff669807 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 28 Jan 2021 10:22:48 +0100 Subject: [PATCH 11/61] ovl: perform vfs_getxattr() with mounter creds [ Upstream commit 554677b97257b0b69378bd74e521edb7e94769ff ] The vfs_getxattr() in ovl_xattr_set() is used to check whether an xattr exist on a lower layer file that is to be removed. If the xattr does not exist, then no need to copy up the file. This call of vfs_getxattr() wasn't wrapped in credential override, and this is probably okay. But for consitency wrap this instance as well. Reported-by: "Eric W. Biederman" Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/overlayfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index bb980721502d..56b55397a7a0 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -337,7 +337,9 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, goto out; if (!value && !upperdentry) { + old_cred = ovl_override_creds(dentry->d_sb); err = vfs_getxattr(realdentry, name, NULL, 0); + revert_creds(old_cred); if (err < 0) goto out_drop_write; } From 43e3cf46afb132f85df65dcfa1d08336055712b7 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 28 Jan 2021 10:22:48 +0100 Subject: [PATCH 12/61] cap: fix conversions on getxattr [ Upstream commit f2b00be488730522d0fb7a8a5de663febdcefe0a ] If a capability is stored on disk in v2 format cap_inode_getsecurity() will currently return in v2 format unconditionally. This is wrong: v2 cap should be equivalent to a v3 cap with zero rootid, and so the same conversions performed on it. If the rootid cannot be mapped, v3 is returned unconverted. Fix this so that both v2 and v3 return -EOVERFLOW if the rootid (or the owner of the fs user namespace in case of v2) cannot be mapped into the current user namespace. Signed-off-by: Miklos Szeredi Acked-by: "Eric W. Biederman" Signed-off-by: Sasha Levin --- security/commoncap.c | 67 ++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/security/commoncap.c b/security/commoncap.c index 0ca31c8bc0b1..28a6939bcc4e 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -371,10 +371,11 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, { int size, ret; kuid_t kroot; + u32 nsmagic, magic; uid_t root, mappedroot; char *tmpbuf = NULL; struct vfs_cap_data *cap; - struct vfs_ns_cap_data *nscap; + struct vfs_ns_cap_data *nscap = NULL; struct dentry *dentry; struct user_namespace *fs_ns; @@ -396,46 +397,61 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, fs_ns = inode->i_sb->s_user_ns; cap = (struct vfs_cap_data *) tmpbuf; if (is_v2header((size_t) ret, cap)) { - /* If this is sizeof(vfs_cap_data) then we're ok with the - * on-disk value, so return that. */ - if (alloc) - *buffer = tmpbuf; - else - kfree(tmpbuf); - return ret; - } else if (!is_v3header((size_t) ret, cap)) { - kfree(tmpbuf); - return -EINVAL; + root = 0; + } else if (is_v3header((size_t) ret, cap)) { + nscap = (struct vfs_ns_cap_data *) tmpbuf; + root = le32_to_cpu(nscap->rootid); + } else { + size = -EINVAL; + goto out_free; } - nscap = (struct vfs_ns_cap_data *) tmpbuf; - root = le32_to_cpu(nscap->rootid); kroot = make_kuid(fs_ns, root); /* If the root kuid maps to a valid uid in current ns, then return * this as a nscap. */ mappedroot = from_kuid(current_user_ns(), kroot); if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) { + size = sizeof(struct vfs_ns_cap_data); if (alloc) { - *buffer = tmpbuf; + if (!nscap) { + /* v2 -> v3 conversion */ + nscap = kzalloc(size, GFP_ATOMIC); + if (!nscap) { + size = -ENOMEM; + goto out_free; + } + nsmagic = VFS_CAP_REVISION_3; + magic = le32_to_cpu(cap->magic_etc); + if (magic & VFS_CAP_FLAGS_EFFECTIVE) + nsmagic |= VFS_CAP_FLAGS_EFFECTIVE; + memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32); + nscap->magic_etc = cpu_to_le32(nsmagic); + } else { + /* use allocated v3 buffer */ + tmpbuf = NULL; + } nscap->rootid = cpu_to_le32(mappedroot); - } else - kfree(tmpbuf); - return size; + *buffer = nscap; + } + goto out_free; } if (!rootid_owns_currentns(kroot)) { - kfree(tmpbuf); - return -EOPNOTSUPP; + size = -EOVERFLOW; + goto out_free; } /* This comes from a parent namespace. Return as a v2 capability */ size = sizeof(struct vfs_cap_data); if (alloc) { - *buffer = kmalloc(size, GFP_ATOMIC); - if (*buffer) { - struct vfs_cap_data *cap = *buffer; - __le32 nsmagic, magic; + if (nscap) { + /* v3 -> v2 conversion */ + cap = kzalloc(size, GFP_ATOMIC); + if (!cap) { + size = -ENOMEM; + goto out_free; + } magic = VFS_CAP_REVISION_2; nsmagic = le32_to_cpu(nscap->magic_etc); if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE) @@ -443,9 +459,12 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32); cap->magic_etc = cpu_to_le32(magic); } else { - size = -ENOMEM; + /* use unconverted v2 */ + tmpbuf = NULL; } + *buffer = cap; } +out_free: kfree(tmpbuf); return size; } From 335a285aa0f0e1df6ac5c498c5dfc21b0880ea60 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 19 Dec 2020 12:16:08 +0200 Subject: [PATCH 13/61] ovl: skip getxattr of security labels [ Upstream commit 03fedf93593c82538b18476d8c4f0e8f8435ea70 ] When inode has no listxattr op of its own (e.g. squashfs) vfs_listxattr calls the LSM inode_listsecurity hooks to list the xattrs that LSMs will intercept in inode_getxattr hooks. When selinux LSM is installed but not initialized, it will list the security.selinux xattr in inode_listsecurity, but will not intercept it in inode_getxattr. This results in -ENODATA for a getxattr call for an xattr returned by listxattr. This situation was manifested as overlayfs failure to copy up lower files from squashfs when selinux is built-in but not initialized, because ovl_copy_xattr() iterates the lower inode xattrs by vfs_listxattr() and vfs_getxattr(). ovl_copy_xattr() skips copy up of security labels that are indentified by inode_copy_up_xattr LSM hooks, but it does that after vfs_getxattr(). Since we are not going to copy them, skip vfs_getxattr() of the security labels. Reported-by: Michael Labriola Tested-by: Michael Labriola Link: https://lore.kernel.org/linux-unionfs/2nv9d47zt7.fsf@aldarion.sourceruckus.org/ Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/overlayfs/copy_up.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index ec5eca5a96f4..7b758d623b5b 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -76,6 +76,14 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) if (ovl_is_private_xattr(name)) continue; + + error = security_inode_copy_up_xattr(name); + if (error < 0 && error != -EOPNOTSUPP) + break; + if (error == 1) { + error = 0; + continue; /* Discard */ + } retry: size = vfs_getxattr(old, name, value, value_size); if (size == -ERANGE) @@ -99,13 +107,6 @@ retry: goto retry; } - error = security_inode_copy_up_xattr(name); - if (error < 0 && error != -EOPNOTSUPP) - break; - if (error == 1) { - error = 0; - continue; /* Discard */ - } error = vfs_setxattr(new, name, value, size, 0); if (error) break; From 4516a0a2c087f437f2b00bac27d97c29a2756a46 Mon Sep 17 00:00:00 2001 From: Claus Stovgaard Date: Mon, 1 Feb 2021 22:08:22 +0100 Subject: [PATCH 14/61] nvme-pci: ignore the subsysem NQN on Phison E16 [ Upstream commit c9e95c39280530200cdd0bbd2670e6334a81970b ] Tested both with Corsairs firmware 11.3 and 13.0 for the Corsairs MP600 and both have the issue as reported by the kernel. nvme nvme0: missing or invalid SUBNQN field. Signed-off-by: Claus Stovgaard Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 434d3f21f0e1..19e375b59f40 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3147,6 +3147,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ From 76979956a8afdca5cb85d39608d20b7fa6b7608a Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Fri, 15 Jan 2021 13:53:15 -0500 Subject: [PATCH 15/61] drm/amd/display: Add more Clock Sources to DCN2.1 [ Upstream commit 1622711beebe887e4f0f8237fea1f09bb48e9a51 ] [WHY] When enabling HDMI on ComboPHY, there are not enough clock sources to complete display detection. [HOW] Initialize more clock sources. Signed-off-by: Sung Lee Reviewed-by: Tony Cheng Acked-by: Anson Jacob Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index a6d5beada663..f63cbbee7b33 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -826,6 +826,8 @@ enum dcn20_clk_src_array_id { DCN20_CLK_SRC_PLL0, DCN20_CLK_SRC_PLL1, DCN20_CLK_SRC_PLL2, + DCN20_CLK_SRC_PLL3, + DCN20_CLK_SRC_PLL4, DCN20_CLK_SRC_TOTAL_DCN21 }; @@ -1498,6 +1500,14 @@ static bool construct( dcn21_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL2, &clk_src_regs[2], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL3] = + dcn21_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL4] = + dcn21_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL_DCN21; From f35da70b516f50e35dfa1d7d06f9e70f7d8cee83 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Thu, 14 Jan 2021 22:24:14 -0500 Subject: [PATCH 16/61] drm/amd/display: Fix dc_sink kref count in emulated_link_detect [ Upstream commit 3ddc818d9bb877c64f5c649beab97af86c403702 ] [why] prev_sink is not used anywhere else in the function and the reference to it from dc_link is replaced with a new dc_sink. [how] Change dc_sink_retain(prev_sink) to dc_sink_release(prev_sink). Signed-off-by: Victor Lu Reviewed-by: Nicholas Kazlauskas Acked-by: Anson Jacob Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index de06ee7d2ad4..3671b3e8f69d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1093,8 +1093,8 @@ static void emulated_link_detect(struct dc_link *link) link->type = dc_connection_none; prev_sink = link->local_sink; - if (prev_sink != NULL) - dc_sink_retain(prev_sink); + if (prev_sink) + dc_sink_release(prev_sink); switch (link->connector_signal) { case SIGNAL_TYPE_HDMI_TYPE_A: { From 5c8f63266255f8713541dce981c8279c2d738fa9 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Thu, 14 Jan 2021 16:27:07 -0500 Subject: [PATCH 17/61] drm/amd/display: Free atomic state after drm_atomic_commit [ Upstream commit 2abaa323d744011982b20b8f3886184d56d23946 ] [why] drm_atomic_commit was changed so that the caller must free their drm_atomic_state reference on successes. [how] Add drm_atomic_commit_put after drm_atomic_commit call in dm_force_atomic_commit. Signed-off-by: Victor Lu Reviewed-by: Roman Li Acked-by: Anson Jacob Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3671b3e8f69d..b4da8d1e4fb8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6463,14 +6463,14 @@ static int dm_force_atomic_commit(struct drm_connector *connector) ret = PTR_ERR_OR_ZERO(conn_state); if (ret) - goto err; + goto out; /* Attach crtc to drm_atomic_state*/ crtc_state = drm_atomic_get_crtc_state(state, &disconnected_acrtc->base); ret = PTR_ERR_OR_ZERO(crtc_state); if (ret) - goto err; + goto out; /* force a restore */ crtc_state->mode_changed = true; @@ -6480,17 +6480,15 @@ static int dm_force_atomic_commit(struct drm_connector *connector) ret = PTR_ERR_OR_ZERO(plane_state); if (ret) - goto err; - + goto out; /* Call commit internally with the state we just constructed */ ret = drm_atomic_commit(state); - if (!ret) - return 0; -err: - DRM_ERROR("Restoring old state failed with %i\n", ret); +out: drm_atomic_state_put(state); + if (ret) + DRM_ERROR("Restoring old state failed with %i\n", ret); return ret; } From 0db8d192ee57ec206bd0410db7634c5cca38a45c Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Fri, 15 Jan 2021 11:02:48 -0500 Subject: [PATCH 18/61] drm/amd/display: Decrement refcount of dc_sink before reassignment [ Upstream commit 8e92bb0fa75bca9a57e4aba2e36f67d8016a3053 ] [why] An old dc_sink state is causing a memory leak because it is missing a dc_sink_release before a new dc_sink is assigned back to aconnector->dc_sink. [how] Decrement the dc_sink refcount before reassigning it to a new dc_sink. Signed-off-by: Victor Lu Reviewed-by: Rodrigo Siqueira Acked-by: Anson Jacob Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b4da8d1e4fb8..fbbe611d4873 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1417,8 +1417,10 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector) * TODO: check if we still need the S3 mode update workaround. * If yes, put it here. */ - if (aconnector->dc_sink) + if (aconnector->dc_sink) { amdgpu_dm_update_freesync_caps(connector, NULL); + dc_sink_release(aconnector->dc_sink); + } aconnector->dc_sink = sink; dc_sink_retain(aconnector->dc_sink); From 91d604ab2a9910c1c585abe5f491cc016a77e1ac Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 29 Jan 2021 12:31:05 -0500 Subject: [PATCH 19/61] riscv: virt_addr_valid must check the address belongs to linear mapping [ Upstream commit 2ab543823322b564f205cb15d0f0302803c87d11 ] virt_addr_valid macro checks that a virtual address is valid, ie that the address belongs to the linear mapping and that the corresponding physical page exists. Add the missing check that ensures the virtual address belongs to the linear mapping, otherwise __virt_to_phys, when compiled with CONFIG_DEBUG_VIRTUAL enabled, raises a WARN that is interpreted as a kernel bug by syzbot. Signed-off-by: Alexandre Ghiti Reviewed-by: Atish Patra Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/include/asm/page.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 3db261c4810f..6a30794aa1ee 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -119,7 +119,10 @@ extern unsigned long min_low_pfn; #endif /* __ASSEMBLY__ */ -#define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) +#define virt_addr_valid(vaddr) ({ \ + unsigned long _addr = (unsigned long)vaddr; \ + (unsigned long)(_addr) >= PAGE_OFFSET && pfn_valid(virt_to_pfn(_addr)); \ +}) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) From e60577e29deb5f7dacbb3c8a7b22cf88fdc716fe Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Tue, 2 Feb 2021 07:18:23 -0700 Subject: [PATCH 20/61] bfq-iosched: Revert "bfq: Fix computation of shallow depth" [ Upstream commit 388c705b95f23f317fa43e6abf9ff07b583b721a ] This reverts commit 6d4d273588378c65915acaf7b2ee74e9dd9c130a. bfq.limit_depth passes word_depths[] as shallow_depth down to sbitmap core sbitmap_get_shallow, which uses just the number to limit the scan depth of each bitmap word, formula: scan_percentage_for_each_word = shallow_depth / (1 << sbimap->shift) * 100% That means the comments's percentiles 50%, 75%, 18%, 37% of bfq are correct. But after commit patch 'bfq: Fix computation of shallow depth', we use sbitmap.depth instead, as a example in following case: sbitmap.depth = 256, map_nr = 4, shift = 6; sbitmap_word.depth = 64. The resulsts of computed bfqd->word_depths[] are {128, 192, 48, 96}, and three of the numbers exceed core dirver's 'sbitmap_word.depth=64' limit nothing. Signed-off-by: Lin Feng Reviewed-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-iosched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 7d19aae015ae..ba32adaeefdd 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6320,13 +6320,13 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * limit 'something'. */ /* no more than 50% of tags for async I/O */ - bfqd->word_depths[0][0] = max(bt->sb.depth >> 1, 1U); + bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U); /* * no more than 75% of tags for sync writes (25% extra tags * w.r.t. async I/O, to prevent async I/O from starving sync * writes) */ - bfqd->word_depths[0][1] = max((bt->sb.depth * 3) >> 2, 1U); + bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U); /* * In-word depths in case some bfq_queue is being weight- @@ -6336,9 +6336,9 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * shortage. */ /* no more than ~18% of tags for async I/O */ - bfqd->word_depths[1][0] = max((bt->sb.depth * 3) >> 4, 1U); + bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U); /* no more than ~37% of tags for sync writes (~20% extra tags) */ - bfqd->word_depths[1][1] = max((bt->sb.depth * 6) >> 4, 1U); + bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U); for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) From 06350c7f7923b7613564b77b44edb135e918c89d Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 3 Feb 2021 10:03:20 +0100 Subject: [PATCH 21/61] ARM: dts: lpc32xx: Revert set default clock rate of HCLK PLL [ Upstream commit 5638159f6d93b99ec9743ac7f65563fca3cf413d ] This reverts commit c17e9377aa81664d94b4f2102559fcf2a01ec8e7. The lpc32xx clock driver is not able to actually change the PLL rate as this would require reparenting ARM_CLK, DDRAM_CLK, PERIPH_CLK to SYSCLK, then stop the PLL, update the register, restart the PLL and wait for the PLL to lock and finally reparent ARM_CLK, DDRAM_CLK, PERIPH_CLK to HCLK PLL. Currently, the HCLK driver simply updates the registers but this has no real effect and all the clock rate calculation end up being wrong. This is especially annoying for the peripheral (e.g. UARTs, I2C, SPI). Signed-off-by: Alexandre Belloni Tested-by: Gregory CLEMENT Link: https://lore.kernel.org/r/20210203090320.GA3760268@piout.net' Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm/boot/dts/lpc32xx.dtsi | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/boot/dts/lpc32xx.dtsi b/arch/arm/boot/dts/lpc32xx.dtsi index 7b7ec7b1217b..824393e1bcfb 100644 --- a/arch/arm/boot/dts/lpc32xx.dtsi +++ b/arch/arm/boot/dts/lpc32xx.dtsi @@ -329,9 +329,6 @@ clocks = <&xtal_32k>, <&xtal>; clock-names = "xtal_32k", "xtal"; - - assigned-clocks = <&clk LPC32XX_CLK_HCLK_PLL>; - assigned-clock-rates = <208000000>; }; }; From f49bff85b6dbb60a410c7f7dc53b52ee1dc22470 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 29 Jan 2021 10:19:07 +0000 Subject: [PATCH 22/61] ARM: ensure the signal page contains defined contents [ Upstream commit 9c698bff66ab4914bb3d71da7dc6112519bde23e ] Ensure that the signal page contains our poison instruction to increase the protection against ROP attacks and also contains well defined contents. Acked-by: Will Deacon Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/kernel/signal.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index ab2568996ddb..c01f76cd0242 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -694,18 +694,20 @@ struct page *get_signal_page(void) addr = page_address(page); + /* Poison the entire page */ + memset32(addr, __opcode_to_mem_arm(0xe7fddef1), + PAGE_SIZE / sizeof(u32)); + /* Give the signal return code some randomness */ offset = 0x200 + (get_random_int() & 0x7fc); signal_return_offset = offset; - /* - * Copy signal return handlers into the vector page, and - * set sigreturn to be a pointer to these. - */ + /* Copy signal return handlers into the page */ memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes)); - ptr = (unsigned long)addr + offset; - flush_icache_range(ptr, ptr + sizeof(sigreturn_codes)); + /* Flush out all instructions in this page */ + ptr = (unsigned long)addr; + flush_icache_range(ptr, ptr + PAGE_SIZE); return page; } From b7f41a7ad44426c5b73a5be7f66e8b8528b03bb0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 1 Feb 2021 19:40:01 +0000 Subject: [PATCH 23/61] ARM: kexec: fix oops after TLB are invalidated [ Upstream commit 4d62e81b60d4025e2dfcd5ea531cc1394ce9226f ] Giancarlo Ferrari reports the following oops while trying to use kexec: Unable to handle kernel paging request at virtual address 80112f38 pgd = fd7ef03e [80112f38] *pgd=0001141e(bad) Internal error: Oops: 80d [#1] PREEMPT SMP ARM ... This is caused by machine_kexec() trying to set the kernel text to be read/write, so it can poke values into the relocation code before copying it - and an interrupt occuring which changes the page tables. The subsequent writes then hit read-only sections that trigger a data abort resulting in the above oops. Fix this by copying the relocation code, and then writing the variables into the destination, thereby avoiding the need to make the kernel text read/write. Reported-by: Giancarlo Ferrari Tested-by: Giancarlo Ferrari Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/include/asm/kexec-internal.h | 12 +++++++++ arch/arm/kernel/asm-offsets.c | 5 ++++ arch/arm/kernel/machine_kexec.c | 20 ++++++-------- arch/arm/kernel/relocate_kernel.S | 38 ++++++++------------------- 4 files changed, 36 insertions(+), 39 deletions(-) create mode 100644 arch/arm/include/asm/kexec-internal.h diff --git a/arch/arm/include/asm/kexec-internal.h b/arch/arm/include/asm/kexec-internal.h new file mode 100644 index 000000000000..ecc2322db7aa --- /dev/null +++ b/arch/arm/include/asm/kexec-internal.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARM_KEXEC_INTERNAL_H +#define _ARM_KEXEC_INTERNAL_H + +struct kexec_relocate_data { + unsigned long kexec_start_address; + unsigned long kexec_indirection_page; + unsigned long kexec_mach_type; + unsigned long kexec_r2; +}; + +#endif diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index c773b829ee8e..bfb05c93494d 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -15,6 +15,7 @@ #include #endif #include +#include #include #include #include @@ -190,5 +191,9 @@ int main(void) DEFINE(MPU_RGN_PRBAR, offsetof(struct mpu_rgn, prbar)); DEFINE(MPU_RGN_PRLAR, offsetof(struct mpu_rgn, prlar)); #endif + DEFINE(KEXEC_START_ADDR, offsetof(struct kexec_relocate_data, kexec_start_address)); + DEFINE(KEXEC_INDIR_PAGE, offsetof(struct kexec_relocate_data, kexec_indirection_page)); + DEFINE(KEXEC_MACH_TYPE, offsetof(struct kexec_relocate_data, kexec_mach_type)); + DEFINE(KEXEC_R2, offsetof(struct kexec_relocate_data, kexec_r2)); return 0; } diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 76300f3813e8..734adeb42df8 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -24,11 +25,6 @@ extern void relocate_new_kernel(void); extern const unsigned int relocate_new_kernel_size; -extern unsigned long kexec_start_address; -extern unsigned long kexec_indirection_page; -extern unsigned long kexec_mach_type; -extern unsigned long kexec_boot_atags; - static atomic_t waiting_for_crash_ipi; /* @@ -161,6 +157,7 @@ void (*kexec_reinit)(void); void machine_kexec(struct kimage *image) { unsigned long page_list, reboot_entry_phys; + struct kexec_relocate_data *data; void (*reboot_entry)(void); void *reboot_code_buffer; @@ -176,18 +173,17 @@ void machine_kexec(struct kimage *image) reboot_code_buffer = page_address(image->control_code_page); - /* Prepare parameters for reboot_code_buffer*/ - set_kernel_text_rw(); - kexec_start_address = image->start; - kexec_indirection_page = page_list; - kexec_mach_type = machine_arch_type; - kexec_boot_atags = image->arch.kernel_r2; - /* copy our kernel relocation code to the control code page */ reboot_entry = fncpy(reboot_code_buffer, &relocate_new_kernel, relocate_new_kernel_size); + data = reboot_code_buffer + relocate_new_kernel_size; + data->kexec_start_address = image->start; + data->kexec_indirection_page = page_list; + data->kexec_mach_type = machine_arch_type; + data->kexec_r2 = image->arch.kernel_r2; + /* get the identity mapping physical address for the reboot code */ reboot_entry_phys = virt_to_idmap(reboot_entry); diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S index 7eaa2ae7aff5..5e15b5912cb0 100644 --- a/arch/arm/kernel/relocate_kernel.S +++ b/arch/arm/kernel/relocate_kernel.S @@ -5,14 +5,16 @@ #include #include +#include #include .align 3 /* not needed for this code, but keeps fncpy() happy */ ENTRY(relocate_new_kernel) - ldr r0,kexec_indirection_page - ldr r1,kexec_start_address + adr r7, relocate_new_kernel_end + ldr r0, [r7, #KEXEC_INDIR_PAGE] + ldr r1, [r7, #KEXEC_START_ADDR] /* * If there is no indirection page (we are doing crashdumps) @@ -57,34 +59,16 @@ ENTRY(relocate_new_kernel) 2: /* Jump to relocated kernel */ - mov lr,r1 - mov r0,#0 - ldr r1,kexec_mach_type - ldr r2,kexec_boot_atags - ARM( ret lr ) - THUMB( bx lr ) - - .align - - .globl kexec_start_address -kexec_start_address: - .long 0x0 - - .globl kexec_indirection_page -kexec_indirection_page: - .long 0x0 - - .globl kexec_mach_type -kexec_mach_type: - .long 0x0 - - /* phy addr of the atags for the new kernel */ - .globl kexec_boot_atags -kexec_boot_atags: - .long 0x0 + mov lr, r1 + mov r0, #0 + ldr r1, [r7, #KEXEC_MACH_TYPE] + ldr r2, [r7, #KEXEC_R2] + ARM( ret lr ) + THUMB( bx lr ) ENDPROC(relocate_new_kernel) + .align 3 relocate_new_kernel_end: .globl relocate_new_kernel_size From 4f5416710e13eb4e1587f6c38e92e9134cf5f480 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Mar 2020 22:47:17 +0100 Subject: [PATCH 24/61] vmlinux.lds.h: Create section for protection against instrumentation [ Upstream commit 6553896666433e7efec589838b400a2a652b3ffa ] Some code pathes, especially the low level entry code, must be protected against instrumentation for various reasons: - Low level entry code can be a fragile beast, especially on x86. - With NO_HZ_FULL RCU state needs to be established before using it. Having a dedicated section for such code allows to validate with tooling that no unsafe functions are invoked. Add the .noinstr.text section and the noinstr attribute to mark functions. noinstr implies notrace. Kprobes will gain a section check later. Provide also a set of markers: instrumentation_begin()/end() These are used to mark code inside a noinstr function which calls into regular instrumentable text section as safe. The instrumentation markers are only active when CONFIG_DEBUG_ENTRY is enabled as the end marker emits a NOP to prevent the compiler from merging the annotation points. This means the objtool verification requires a kernel compiled with this option. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134100.075416272@linutronix.de Signed-off-by: Sasha Levin --- arch/powerpc/kernel/vmlinux.lds.S | 1 + include/asm-generic/sections.h | 3 ++ include/asm-generic/vmlinux.lds.h | 10 ++++++ include/linux/compiler.h | 53 +++++++++++++++++++++++++++++++ include/linux/compiler_types.h | 4 +++ scripts/mod/modpost.c | 2 +- 6 files changed, 72 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index a4e576019d79..3ea360cad337 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -102,6 +102,7 @@ SECTIONS #ifdef CONFIG_PPC64 *(.tramp.ftrace.text); #endif + NOINSTR_TEXT SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index d1779d442aa5..66397ed10acb 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -53,6 +53,9 @@ extern char __ctors_start[], __ctors_end[]; /* Start and end of .opd section - used for function descriptors. */ extern char __start_opd[], __end_opd[]; +/* Start and end of instrumentation protected text section */ +extern char __noinstr_text_start[], __noinstr_text_end[]; + extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 130f16cc0b86..9a4a5a43e886 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -510,6 +510,15 @@ #define RODATA RO_DATA_SECTION(4096) #define RO_DATA(align) RO_DATA_SECTION(align) +/* + * Non-instrumentable text section + */ +#define NOINSTR_TEXT \ + ALIGN_FUNCTION(); \ + __noinstr_text_start = .; \ + *(.noinstr.text) \ + __noinstr_text_end = .; + /* * .text section. Map to function alignment to avoid address changes * during second ld run in second ld pass when generating System.map @@ -524,6 +533,7 @@ *(TEXT_MAIN .text.fixup) \ *(.text.unlikely .text.unlikely.*) \ *(.text.unknown .text.unknown.*) \ + NOINSTR_TEXT \ *(.text..refcount) \ *(.ref.text) \ MEM_KEEP(init.text*) \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index f164a9b12813..9446e8fbe55c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -134,12 +134,65 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(.rodata..c_jump_table) +#ifdef CONFIG_DEBUG_ENTRY +/* Begin/end of an instrumentation safe region */ +#define instrumentation_begin() ({ \ + asm volatile("%c0:\n\t" \ + ".pushsection .discard.instr_begin\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) + +/* + * Because instrumentation_{begin,end}() can nest, objtool validation considers + * _begin() a +1 and _end() a -1 and computes a sum over the instructions. + * When the value is greater than 0, we consider instrumentation allowed. + * + * There is a problem with code like: + * + * noinstr void foo() + * { + * instrumentation_begin(); + * ... + * if (cond) { + * instrumentation_begin(); + * ... + * instrumentation_end(); + * } + * bar(); + * instrumentation_end(); + * } + * + * If instrumentation_end() would be an empty label, like all the other + * annotations, the inner _end(), which is at the end of a conditional block, + * would land on the instruction after the block. + * + * If we then consider the sum of the !cond path, we'll see that the call to + * bar() is with a 0-value, even though, we meant it to happen with a positive + * value. + * + * To avoid this, have _end() be a NOP instruction, this ensures it will be + * part of the condition block and does not escape. + */ +#define instrumentation_end() ({ \ + asm volatile("%c0: nop\n\t" \ + ".pushsection .discard.instr_end\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) +#endif /* CONFIG_DEBUG_ENTRY */ + #else #define annotate_reachable() #define annotate_unreachable() #define __annotate_jump_table #endif +#ifndef instrumentation_begin +#define instrumentation_begin() do { } while(0) +#define instrumentation_end() do { } while(0) +#endif + #ifndef ASM_UNREACHABLE # define ASM_UNREACHABLE #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 77433633572e..b94d08d055ff 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -118,6 +118,10 @@ struct ftrace_likely_data { #define notrace __attribute__((__no_instrument_function__)) #endif +/* Section for code which can't be instrumented at all */ +#define noinstr \ + noinline notrace __attribute((__section__(".noinstr.text"))) + /* * it doesn't make sense on ARM (currently the only user of __naked) * to trace naked functions because then mcount is called without diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 52f1152c9838..13cda6aa2688 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -960,7 +960,7 @@ static void check_section(const char *modname, struct elf_info *elf, #define DATA_SECTIONS ".data", ".data.rel" #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ - ".kprobes.text", ".cpuidle.text" + ".kprobes.text", ".cpuidle.text", ".noinstr.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ ".fixup", ".entry.text", ".exception.text", ".text.*", \ ".coldtext" From 8a5991c8214b8af3ee190431892983c334110baa Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 7 Dec 2020 17:05:33 +0000 Subject: [PATCH 25/61] lkdtm: don't move ctors to .rodata [ Upstream commit 3f618ab3323407ee4c6a6734a37eb6e9663ebfb9 ] When building with KASAN and LKDTM, clang may implictly generate an asan.module_ctor function in the LKDTM rodata object. The Makefile moves the lkdtm_rodata_do_nothing() function into .rodata by renaming the file's .text section to .rodata, and consequently also moves the ctor function into .rodata, leading to a boot time crash (splat below) when the ctor is invoked by do_ctors(). Let's prevent this by marking the function as noinstr rather than notrace, and renaming the file's .noinstr.text to .rodata. Marking the function as noinstr will prevent tracing and kprobes, and will inhibit any undesireable compiler instrumentation. The ctor function (if any) will be placed in .text and will work correctly. Example splat before this patch is applied: [ 0.916359] Unable to handle kernel execute from non-executable memory at virtual address ffffa0006b60f5ac [ 0.922088] Mem abort info: [ 0.922828] ESR = 0x8600000e [ 0.923635] EC = 0x21: IABT (current EL), IL = 32 bits [ 0.925036] SET = 0, FnV = 0 [ 0.925838] EA = 0, S1PTW = 0 [ 0.926714] swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000000427b3000 [ 0.928489] [ffffa0006b60f5ac] pgd=000000023ffff003, p4d=000000023ffff003, pud=000000023fffe003, pmd=0068000042000f01 [ 0.931330] Internal error: Oops: 8600000e [#1] PREEMPT SMP [ 0.932806] Modules linked in: [ 0.933617] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.0-rc7 #2 [ 0.935620] Hardware name: linux,dummy-virt (DT) [ 0.936924] pstate: 40400005 (nZcv daif +PAN -UAO -TCO BTYPE=--) [ 0.938609] pc : asan.module_ctor+0x0/0x14 [ 0.939759] lr : do_basic_setup+0x4c/0x70 [ 0.940889] sp : ffff27b600177e30 [ 0.941815] x29: ffff27b600177e30 x28: 0000000000000000 [ 0.943306] x27: 0000000000000000 x26: 0000000000000000 [ 0.944803] x25: 0000000000000000 x24: 0000000000000000 [ 0.946289] x23: 0000000000000001 x22: 0000000000000000 [ 0.947777] x21: ffffa0006bf4a890 x20: ffffa0006befb6c0 [ 0.949271] x19: ffffa0006bef9358 x18: 0000000000000068 [ 0.950756] x17: fffffffffffffff8 x16: 0000000000000000 [ 0.952246] x15: 0000000000000000 x14: 0000000000000000 [ 0.953734] x13: 00000000838a16d5 x12: 0000000000000001 [ 0.955223] x11: ffff94000da74041 x10: dfffa00000000000 [ 0.956715] x9 : 0000000000000000 x8 : ffffa0006b60f5ac [ 0.958199] x7 : f9f9f9f9f9f9f9f9 x6 : 000000000000003f [ 0.959683] x5 : 0000000000000040 x4 : 0000000000000000 [ 0.961178] x3 : ffffa0006bdc15a0 x2 : 0000000000000005 [ 0.962662] x1 : 00000000000000f9 x0 : ffffa0006bef9350 [ 0.964155] Call trace: [ 0.964844] asan.module_ctor+0x0/0x14 [ 0.965895] kernel_init_freeable+0x158/0x198 [ 0.967115] kernel_init+0x14/0x19c [ 0.968104] ret_from_fork+0x10/0x30 [ 0.969110] Code: 00000003 00000000 00000000 00000000 (00000000) [ 0.970815] ---[ end trace b5339784e20d015c ]--- Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Kees Cook Acked-by: Kees Cook Signed-off-by: Mark Rutland Link: https://lore.kernel.org/r/20201207170533.10738-1-mark.rutland@arm.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/lkdtm/Makefile | 2 +- drivers/misc/lkdtm/rodata.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile index c70b3822013f..30c8ac24635d 100644 --- a/drivers/misc/lkdtm/Makefile +++ b/drivers/misc/lkdtm/Makefile @@ -16,7 +16,7 @@ KCOV_INSTRUMENT_rodata.o := n OBJCOPYFLAGS := OBJCOPYFLAGS_rodata_objcopy.o := \ - --rename-section .text=.rodata,alloc,readonly,load + --rename-section .noinstr.text=.rodata,alloc,readonly,load targets += rodata.o rodata_objcopy.o $(obj)/rodata_objcopy.o: $(obj)/rodata.o FORCE $(call if_changed,objcopy) diff --git a/drivers/misc/lkdtm/rodata.c b/drivers/misc/lkdtm/rodata.c index 58d180af72cf..baacb876d1d9 100644 --- a/drivers/misc/lkdtm/rodata.c +++ b/drivers/misc/lkdtm/rodata.c @@ -5,7 +5,7 @@ */ #include "lkdtm.h" -void notrace lkdtm_rodata_do_nothing(void) +void noinstr lkdtm_rodata_do_nothing(void) { /* Does nothing. We just want an architecture agnostic "return". */ } From 69beec4a8b367fa37cfa82123d26f74a72ce7b4a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 26 Jan 2021 12:02:13 +0100 Subject: [PATCH 26/61] mt76: dma: fix a possible memory leak in mt76_add_fragment() [ Upstream commit 93a1d4791c10d443bc67044def7efee2991d48b7 ] Fix a memory leak in mt76_add_fragment routine returning the buffer to the page_frag_cache when we receive a new fragment and the skb_shared_info frag array is full. Fixes: b102f0c522cf6 ("mt76: fix array overflow on receiving too many fragments for a packet") Signed-off-by: Lorenzo Bianconi Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/4f9dd73407da88b2a552517ce8db242d86bf4d5c.1611616130.git.lorenzo@kernel.org Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/dma.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 026d996612fb..781952b686ed 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -452,15 +452,17 @@ static void mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, int len, bool more) { - struct page *page = virt_to_head_page(data); - int offset = data - page_address(page); struct sk_buff *skb = q->rx_head; struct skb_shared_info *shinfo = skb_shinfo(skb); if (shinfo->nr_frags < ARRAY_SIZE(shinfo->frags)) { - offset += q->buf_offset; + struct page *page = virt_to_head_page(data); + int offset = data - page_address(page) + q->buf_offset; + skb_add_rx_frag(skb, shinfo->nr_frags, page, offset, len, q->buf_size); + } else { + skb_free_frag(data); } if (more) From e29126678f652249eca4a6576d3b7a0522acd52f Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 29 Jan 2021 17:06:47 +0100 Subject: [PATCH 27/61] drm/vc4: hvs: Fix buffer overflow with the dlist handling [ Upstream commit facd93f4285c405f9a91b05166147cb39e860666 ] Commit 0a038c1c29a7 ("drm/vc4: Move LBM creation out of vc4_plane_mode_set()") changed the LBM allocation logic from first allocating the LBM memory for the plane to running mode_set, adding a gap in the LBM, and then running the dlist allocation filling that gap. The gap was introduced by incrementing the dlist array index, but was never checking whether or not we were over the array length, leading eventually to memory corruptions if we ever crossed this limit. vc4_dlist_write had that logic though, and was reallocating a larger dlist array when reaching the end of the buffer. Let's share the logic between both functions. Cc: Boris Brezillon Cc: Eric Anholt Fixes: 0a038c1c29a7 ("drm/vc4: Move LBM creation out of vc4_plane_mode_set()") Signed-off-by: Maxime Ripard Acked-by: Thomas Zimmermann Reviewed-by: Dave Stevenson Link: https://patchwork.freedesktop.org/patch/msgid/20210129160647.128373-1-maxime@cerno.tech Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_plane.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 5e5f90810aca..363f456ea713 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -205,7 +205,7 @@ static void vc4_plane_reset(struct drm_plane *plane) __drm_atomic_helper_plane_reset(plane, &vc4_state->base); } -static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) +static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state) { if (vc4_state->dlist_count == vc4_state->dlist_size) { u32 new_size = max(4u, vc4_state->dlist_count * 2); @@ -220,7 +220,15 @@ static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) vc4_state->dlist_size = new_size; } - vc4_state->dlist[vc4_state->dlist_count++] = val; + vc4_state->dlist_count++; +} + +static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) +{ + unsigned int idx = vc4_state->dlist_count; + + vc4_dlist_counter_increment(vc4_state); + vc4_state->dlist[idx] = val; } /* Returns the scl0/scl1 field based on whether the dimensions need to @@ -871,8 +879,10 @@ static int vc4_plane_mode_set(struct drm_plane *plane, * be set when calling vc4_plane_allocate_lbm(). */ if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || - vc4_state->y_scaling[1] != VC4_SCALING_NONE) - vc4_state->lbm_offset = vc4_state->dlist_count++; + vc4_state->y_scaling[1] != VC4_SCALING_NONE) { + vc4_state->lbm_offset = vc4_state->dlist_count; + vc4_dlist_counter_increment(vc4_state); + } if (num_planes > 1) { /* Emit Cb/Cr as channel 0 and Y as channel From 7e3a6b820535eb395784060ae26c5af579528fa0 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Wed, 27 Jan 2021 06:36:53 +0000 Subject: [PATCH 28/61] bpf: Check for integer overflow when using roundup_pow_of_two() [ Upstream commit 6183f4d3a0a2ad230511987c6c362ca43ec0055f ] On 32-bit architecture, roundup_pow_of_two() can return 0 when the argument has upper most bit set due to resulting 1UL << 32. Add a check for this case. Fixes: d5a3b1f69186 ("bpf: introduce BPF_MAP_TYPE_STACK_TRACE") Signed-off-by: Bui Quang Minh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210127063653.3576-1-minhquangbui99@gmail.com Signed-off-by: Sasha Levin --- kernel/bpf/stackmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 173e983619d7..fba2ade28fb3 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -112,6 +112,8 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) /* hash table size must be power of 2 */ n_buckets = roundup_pow_of_two(attr->max_entries); + if (!n_buckets) + return ERR_PTR(-E2BIG); cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); From 35c279e02b7ee4e93fe48754f09fffae6be2747c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 29 Jan 2021 20:57:43 +0100 Subject: [PATCH 29/61] netfilter: xt_recent: Fix attempt to update deleted entry [ Upstream commit b1bdde33b72366da20d10770ab7a49fe87b5e190 ] When both --reap and --update flag are specified, there's a code path at which the entry to be updated is reaped beforehand, which then leads to kernel crash. Reap only entries which won't be updated. Fixes kernel bugzilla #207773. Link: https://bugzilla.kernel.org/show_bug.cgi?id=207773 Reported-by: Reindl Harald Fixes: 0079c5aee348 ("netfilter: xt_recent: add an entry reaper") Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/xt_recent.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 6c2582a19766..3469b6073610 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -152,7 +152,8 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) /* * Drop entries with timestamps older then 'time'. */ -static void recent_entry_reap(struct recent_table *t, unsigned long time) +static void recent_entry_reap(struct recent_table *t, unsigned long time, + struct recent_entry *working, bool update) { struct recent_entry *e; @@ -161,6 +162,12 @@ static void recent_entry_reap(struct recent_table *t, unsigned long time) */ e = list_entry(t->lru_list.next, struct recent_entry, lru_list); + /* + * Do not reap the entry which are going to be updated. + */ + if (e == working && update) + return; + /* * The last time stamp is the most recent. */ @@ -303,7 +310,8 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) /* info->seconds must be non-zero */ if (info->check_set & XT_RECENT_REAP) - recent_entry_reap(t, time); + recent_entry_reap(t, time, e, + info->check_set & XT_RECENT_UPDATE && ret); } if (info->check_set & XT_RECENT_SET || From b110391d1e806167254d3c7ae5d637191d913175 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 Feb 2021 16:07:37 +0100 Subject: [PATCH 30/61] netfilter: nftables: fix possible UAF over chains from packet path in netns [ Upstream commit 767d1216bff82507c945e92fe719dff2083bb2f4 ] Although hooks are released via call_rcu(), chain and rule objects are immediately released while packets are still walking over these bits. This patch adds the .pre_exit callback which is invoked before synchronize_rcu() in the netns framework to stay safe. Remove a comment which is not valid anymore since the core does not use synchronize_net() anymore since 8c873e219970 ("netfilter: core: free hooks with call_rcu"). Suggested-by: Florian Westphal Fixes: df05ef874b28 ("netfilter: nf_tables: release objects on netns destruction") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 40216c2a7dd7..373ea0e49f12 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7696,6 +7696,17 @@ int __nft_release_basechain(struct nft_ctx *ctx) } EXPORT_SYMBOL_GPL(__nft_release_basechain); +static void __nft_release_hooks(struct net *net) +{ + struct nft_table *table; + struct nft_chain *chain; + + list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(chain, &table->chains, list) + nf_tables_unregister_hook(net, table, chain); + } +} + static void __nft_release_tables(struct net *net) { struct nft_flowtable *flowtable, *nf; @@ -7711,10 +7722,6 @@ static void __nft_release_tables(struct net *net) list_for_each_entry_safe(table, nt, &net->nft.tables, list) { ctx.family = table->family; - - list_for_each_entry(chain, &table->chains, list) - nf_tables_unregister_hook(net, table, chain); - /* No packets are walking on these chains anymore. */ ctx.table = table; list_for_each_entry(chain, &table->chains, list) { ctx.chain = chain; @@ -7762,6 +7769,11 @@ static int __net_init nf_tables_init_net(struct net *net) return 0; } +static void __net_exit nf_tables_pre_exit_net(struct net *net) +{ + __nft_release_hooks(net); +} + static void __net_exit nf_tables_exit_net(struct net *net) { mutex_lock(&net->nft.commit_mutex); @@ -7774,8 +7786,9 @@ static void __net_exit nf_tables_exit_net(struct net *net) } static struct pernet_operations nf_tables_net_ops = { - .init = nf_tables_init_net, - .exit = nf_tables_exit_net, + .init = nf_tables_init_net, + .pre_exit = nf_tables_pre_exit_net, + .exit = nf_tables_exit_net, }; static int __init nf_tables_module_init(void) From ff758e8a77f3efa5b0e7805d9dab459c65e1d49d Mon Sep 17 00:00:00 2001 From: Sven Auhagen Date: Tue, 2 Feb 2021 18:01:16 +0100 Subject: [PATCH 31/61] netfilter: flowtable: fix tcp and udp header checksum update [ Upstream commit 8d6bca156e47d68551750a384b3ff49384c67be3 ] When updating the tcp or udp header checksum on port nat the function inet_proto_csum_replace2 with the last parameter pseudohdr as true. This leads to an error in the case that GRO is used and packets are split up in GSO. The tcp or udp checksum of all packets is incorrect. The error is probably masked due to the fact the most network driver implement tcp/udp checksum offloading. It also only happens when GRO is applied and not on single packets. The error is most visible when using a pppoe connection which is not triggering the tcp/udp checksum offload. Fixes: ac2a66665e23 ("netfilter: add generic flow table infrastructure") Signed-off-by: Sven Auhagen Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_flow_table_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 128245efe84a..e05e5df803d6 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -354,7 +354,7 @@ static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, return -1; tcph = (void *)(skb_network_header(skb) + thoff); - inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true); + inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false); return 0; } @@ -371,7 +371,7 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, udph = (void *)(skb_network_header(skb) + thoff); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace2(&udph->check, skb, port, - new_port, true); + new_port, false); if (!udph->check) udph->check = CSUM_MANGLED_0; } From 71d2bd7921db2e87cfea2dac51e14b185a28ac2d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 2 Feb 2021 08:09:38 +0100 Subject: [PATCH 32/61] xen/netback: avoid race in xenvif_rx_ring_slots_available() [ Upstream commit ec7d8e7dd3a59528e305a18e93f1cb98f7faf83b ] Since commit 23025393dbeb3b8b3 ("xen/netback: use lateeoi irq binding") xenvif_rx_ring_slots_available() is no longer called only from the rx queue kernel thread, so it needs to access the rx queue with the associated queue held. Reported-by: Igor Druzhinin Fixes: 23025393dbeb3b8b3 ("xen/netback: use lateeoi irq binding") Signed-off-by: Juergen Gross Acked-by: Wei Liu Link: https://lore.kernel.org/r/20210202070938.7863-1-jgross@suse.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/xen-netback/rx.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index 9b62f65b630e..48e2006f96ce 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -38,10 +38,15 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) RING_IDX prod, cons; struct sk_buff *skb; int needed; + unsigned long flags; + + spin_lock_irqsave(&queue->rx_queue.lock, flags); skb = skb_peek(&queue->rx_queue); - if (!skb) + if (!skb) { + spin_unlock_irqrestore(&queue->rx_queue.lock, flags); return false; + } needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); if (skb_is_gso(skb)) @@ -49,6 +54,8 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) if (skb->sw_hash) needed++; + spin_unlock_irqrestore(&queue->rx_queue.lock, flags); + do { prod = queue->rx.sring->req_prod; cons = queue->rx.req_cons; From 6107338c2fa2ee6264fc26e50462061377e4d4f3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 4 Feb 2021 15:45:11 +0200 Subject: [PATCH 33/61] net: enetc: initialize the RFS and RSS memories [ Upstream commit 07bf34a50e327975b21a9dee64d220c3dcb72ee9 ] Michael tried to enable Advanced Error Reporting through the ENETC's Root Complex Event Collector, and the system started spitting out single bit correctable ECC errors coming from the ENETC interfaces: pcieport 0000:00:1f.0: AER: Multiple Corrected error received: 0000:00:00.0 fsl_enetc 0000:00:00.0: PCIe Bus Error: severity=Corrected, type=Transaction Layer, (Receiver ID) fsl_enetc 0000:00:00.0: device [1957:e100] error status/mask=00004000/00000000 fsl_enetc 0000:00:00.0: [14] CorrIntErr fsl_enetc 0000:00:00.1: PCIe Bus Error: severity=Corrected, type=Transaction Layer, (Receiver ID) fsl_enetc 0000:00:00.1: device [1957:e100] error status/mask=00004000/00000000 fsl_enetc 0000:00:00.1: [14] CorrIntErr Further investigating the port correctable memory error detect register (PCMEDR) shows that these AER errors have an associated SOURCE_ID of 6 (RFS/RSS): $ devmem 0x1f8010e10 32 0xC0000006 $ devmem 0x1f8050e10 32 0xC0000006 Discussion with the hardware design engineers reveals that on LS1028A, the hardware does not do initialization of that RFS/RSS memory, and that software should clear/initialize the entire table before starting to operate. That comes as a bit of a surprise, since the driver does not do initialization of the RFS memory. Also, the initialization of the Receive Side Scaling is done only partially. Even though the entire ENETC IP has a single shared flow steering memory, the flow steering service should returns matches only for TCAM entries that are within the range of the Station Interface that is doing the search. Therefore, it should be sufficient for a Station Interface to initialize all of its own entries in order to avoid any ECC errors, and only the Station Interfaces in use should need initialization. There are Physical Station Interfaces associated with PCIe PFs and Virtual Station Interfaces associated with PCIe VFs. We let the PF driver initialize the entire port's memory, which includes the RFS entries which are going to be used by the VF. Reported-by: Michael Walle Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers") Signed-off-by: Vladimir Oltean Tested-by: Michael Walle Reviewed-by: Jesse Brandeburg Link: https://lore.kernel.org/r/20210204134511.2640309-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/freescale/enetc/enetc_hw.h | 2 + .../net/ethernet/freescale/enetc/enetc_pf.c | 59 +++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 7428f62408a2..fac80831d532 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -181,6 +181,8 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PTCCBSR0(n) (0x1110 + (n) * 8) /* n = 0 to 7*/ #define ENETC_PTCCBSR1(n) (0x1114 + (n) * 8) /* n = 0 to 7*/ #define ENETC_RSSHASH_KEY_SIZE 40 +#define ENETC_PRSSCAPR 0x1404 +#define ENETC_PRSSCAPR_GET_NUM_RSS(val) (BIT((val) & 0xf) * 32) #define ENETC_PRSSK(n) (0x1410 + (n) * 4) /* n = [0..9] */ #define ENETC_PSIVLANFMR 0x1700 #define ENETC_PSIVLANFMR_VS BIT(0) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 74847aa644f1..22f70638a405 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -809,6 +809,51 @@ static void enetc_of_put_phy(struct enetc_ndev_priv *priv) of_node_put(priv->phy_node); } +/* Initialize the entire shared memory for the flow steering entries + * of this port (PF + VFs) + */ +static int enetc_init_port_rfs_memory(struct enetc_si *si) +{ + struct enetc_cmd_rfse rfse = {0}; + struct enetc_hw *hw = &si->hw; + int num_rfs, i, err = 0; + u32 val; + + val = enetc_port_rd(hw, ENETC_PRFSCAPR); + num_rfs = ENETC_PRFSCAPR_GET_NUM_RFS(val); + + for (i = 0; i < num_rfs; i++) { + err = enetc_set_fs_entry(si, &rfse, i); + if (err) + break; + } + + return err; +} + +static int enetc_init_port_rss_memory(struct enetc_si *si) +{ + struct enetc_hw *hw = &si->hw; + int num_rss, err; + int *rss_table; + u32 val; + + val = enetc_port_rd(hw, ENETC_PRSSCAPR); + num_rss = ENETC_PRSSCAPR_GET_NUM_RSS(val); + if (!num_rss) + return 0; + + rss_table = kcalloc(num_rss, sizeof(*rss_table), GFP_KERNEL); + if (!rss_table) + return -ENOMEM; + + err = enetc_set_rss_table(si, rss_table, num_rss); + + kfree(rss_table); + + return err; +} + static int enetc_pf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -863,6 +908,18 @@ static int enetc_pf_probe(struct pci_dev *pdev, goto err_alloc_si_res; } + err = enetc_init_port_rfs_memory(si); + if (err) { + dev_err(&pdev->dev, "Failed to initialize RFS memory\n"); + goto err_init_port_rfs; + } + + err = enetc_init_port_rss_memory(si); + if (err) { + dev_err(&pdev->dev, "Failed to initialize RSS memory\n"); + goto err_init_port_rss; + } + err = enetc_alloc_msix(priv); if (err) { dev_err(&pdev->dev, "MSIX alloc failed\n"); @@ -888,6 +945,8 @@ err_reg_netdev: enetc_mdio_remove(pf); enetc_of_put_phy(priv); enetc_free_msix(priv); +err_init_port_rss: +err_init_port_rfs: err_alloc_msix: enetc_free_si_resources(priv); err_alloc_si_res: From c0daa74b8ed74c70a76b9bca33c0b7c6796876c0 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 4 Feb 2021 20:50:34 +0300 Subject: [PATCH 34/61] selftests: txtimestamp: fix compilation issue [ Upstream commit 647b8dd5184665432cc8a2b5bca46a201f690c37 ] PACKET_TX_TIMESTAMP is defined in if_packet.h but it is not included in test. Include it instead of otherwise the error of redefinition arrives. Also fix the compiler warning about ambiguous control flow by adding explicit braces. Fixes: 8fe2f761cae9 ("net-timestamp: expand documentation") Suggested-by: Willem de Bruijn Signed-off-by: Vadim Fedorenko Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/1612461034-24524-1-git-send-email-vfedorenko@novek.ru Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../testing/selftests/networking/timestamping/txtimestamp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c index 7e386be47120..2fce2e8f47f5 100644 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -34,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -396,12 +396,12 @@ static void do_test(int family, unsigned int report_opt) total_len = cfg_payload_len; if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { total_len += sizeof(struct udphdr); - if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) + if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) { if (family == PF_INET) total_len += sizeof(struct iphdr); else total_len += sizeof(struct ipv6hdr); - + } /* special case, only rawv6_sendmsg: * pass proto in sin6_port if not connected * also see ANK comment in net/ipv4/raw.c From 3f8954cc624c7516455395c2e842d563f280515d Mon Sep 17 00:00:00 2001 From: Mohammad Athari Bin Ismail Date: Thu, 4 Feb 2021 22:03:16 +0800 Subject: [PATCH 35/61] net: stmmac: set TxQ mode back to DCB after disabling CBS [ Upstream commit f317e2ea8c88737aa36228167b2292baef3f0430 ] When disable CBS, mode_to_use parameter is not updated even the operation mode of Tx Queue is changed to Data Centre Bridging (DCB). Therefore, when tc_setup_cbs() function is called to re-enable CBS, the operation mode of Tx Queue remains at DCB, which causing CBS fails to work. This patch updates the value of mode_to_use parameter to MTL_QUEUE_DCB after operation mode of Tx Queue is changed to DCB in stmmac_dma_qmode() callback function. Fixes: 1f705bc61aee ("net: stmmac: Add support for CBS QDISC") Suggested-by: Vinicius Costa Gomes Signed-off-by: Mohammad Athari Bin Ismail Signed-off-by: Song, Yoong Siang Reviewed-by: Jesse Brandeburg Acked-by: Vinicius Costa Gomes Link: https://lore.kernel.org/r/1612447396-20351-1-git-send-email-yoong.siang.song@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 1d135b02ea02..52b453b60597 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -332,7 +332,12 @@ static int tc_setup_cbs(struct stmmac_priv *priv, priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_AVB; } else if (!qopt->enable) { - return stmmac_dma_qmode(priv, priv->ioaddr, queue, MTL_QUEUE_DCB); + ret = stmmac_dma_qmode(priv, priv->ioaddr, queue, + MTL_QUEUE_DCB); + if (ret) + return ret; + + priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB; } /* Port Transmit Rate and Speed Divider */ From 0142b957866f893da6480cf0b5764c188504e72b Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 2 Feb 2021 21:08:02 -0800 Subject: [PATCH 36/61] ibmvnic: Clear failover_pending if unable to schedule [ Upstream commit ef66a1eace968ff22a35f45e6e8ec36b668b6116 ] Normally we clear the failover_pending flag when processing the reset. But if we are unable to schedule a failover reset we must clear the flag ourselves. We could fail to schedule the reset if we are in PROBING state (eg: when booting via kexec) or because we could not allocate memory. Thanks to Cris Forno for helping isolate the problem and for testing. Fixes: 1d8504937478 ("powerpc/vnic: Extend "failover pending" window") Signed-off-by: Sukadev Bhattiprolu Tested-by: Cristobal Forno Link: https://lore.kernel.org/r/20210203050802.680772-1-sukadev@linux.ibm.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index c3079f436f6d..0f35eec967ae 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -4595,7 +4595,22 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, complete(&adapter->init_done); adapter->init_done_rc = -EIO; } - ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + if (rc && rc != -EBUSY) { + /* We were unable to schedule the failover + * reset either because the adapter was still + * probing (eg: during kexec) or we could not + * allocate memory. Clear the failover_pending + * flag since no one else will. We ignore + * EBUSY because it means either FAILOVER reset + * is already scheduled or the adapter is + * being removed. + */ + netdev_err(netdev, + "Error %ld scheduling failover reset\n", + rc); + adapter->failover_pending = false; + } break; case IBMVNIC_CRQ_INIT_COMPLETE: dev_info(dev, "Partner initialization complete\n"); From da1a5442ad2c7cdcc36814b7d39626bde618df99 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 5 Feb 2021 12:56:43 +0100 Subject: [PATCH 37/61] netfilter: conntrack: skip identical origin tuple in same zone only [ Upstream commit 07998281c268592963e1cd623fe6ab0270b65ae4 ] The origin skip check needs to re-test the zone. Else, we might skip a colliding tuple in the reply direction. This only occurs when using 'directional zones' where origin tuples reside in different zones but the reply tuples share the same zone. This causes the new conntrack entry to be dropped at confirmation time because NAT clash resolution was elided. Fixes: 4e35c1cb9460240 ("netfilter: nf_nat: skip nat clash resolution for same-origin entries") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 200cdad3ff3a..9a40312b1f16 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1091,7 +1091,8 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, * Let nf_ct_resolve_clash() deal with this later. */ if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && + nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) continue; NF_CT_STAT_INC_ATOMIC(net, found); From a045956a3b41ae27814f3855259ed2dcc5b3348f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 8 Feb 2021 16:43:30 +0100 Subject: [PATCH 38/61] x86/build: Disable CET instrumentation in the kernel for 32-bit too [ Upstream commit 256b92af784d5043eeb7d559b6d5963dcc2ecb10 ] Commit 20bf2b378729 ("x86/build: Disable CET instrumentation in the kernel") disabled CET instrumentation which gets added by default by the Ubuntu gcc9 and 10 by default, but did that only for 64-bit builds. It would still fail when building a 32-bit target. So disable CET for all x86 builds. Fixes: 20bf2b378729 ("x86/build: Disable CET instrumentation in the kernel") Reported-by: AC Signed-off-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: AC Link: https://lkml.kernel.org/r/YCCIgMHkzh/xT4ex@arch-chirva.localdomain Signed-off-by: Sasha Levin --- arch/x86/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b5e3bfd4face..8ca3cf7c5ec9 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -61,6 +61,9 @@ endif KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow KBUILD_CFLAGS += $(call cc-option,-mno-avx,) +# Intel CET isn't enabled in the kernel +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) + ifeq ($(CONFIG_X86_32),y) BITS := 32 UTS_MACHINE := i386 @@ -131,9 +134,6 @@ else KBUILD_CFLAGS += -mno-red-zone KBUILD_CFLAGS += -mcmodel=kernel - - # Intel CET isn't enabled in the kernel - KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) endif ifdef CONFIG_X86_X32 From beb85f4927e85d4babe24d492aa4327b283f74ae Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Tue, 9 Feb 2021 17:03:05 +0800 Subject: [PATCH 39/61] net: hns3: add a check for queue_id in hclge_reset_vf_queue() [ Upstream commit 67a69f84cab60484f02eb8cbc7a76edffbb28a25 ] The queue_id is received from vf, if use it directly, an out-of-bound issue may be caused, so add a check for this queue_id before using it in hclge_reset_vf_queue(). Fixes: 1a426f8b40fc ("net: hns3: fix the VF queue reset flow error") Signed-off-by: Yufeng Mo Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 6887b7fda6e0..08040cafc06b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -8563,12 +8563,19 @@ int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id) void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id) { + struct hnae3_handle *handle = &vport->nic; struct hclge_dev *hdev = vport->back; int reset_try_times = 0; int reset_status; u16 queue_gid; int ret; + if (queue_id >= handle->kinfo.num_tqps) { + dev_warn(&hdev->pdev->dev, "Invalid vf queue id(%u)\n", + queue_id); + return; + } + queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id); ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true); From 107cf5eede74c0b927a3154799bf71dd51862575 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 9 Feb 2021 13:42:07 -0800 Subject: [PATCH 40/61] firmware_loader: align .builtin_fw to 8 [ Upstream commit 793f49a87aae24e5bcf92ad98d764153fc936570 ] arm64 references the start address of .builtin_fw (__start_builtin_fw) with a pair of R_AARCH64_ADR_PREL_PG_HI21/R_AARCH64_LDST64_ABS_LO12_NC relocations. The compiler is allowed to emit the R_AARCH64_LDST64_ABS_LO12_NC relocation because struct builtin_fw in include/linux/firmware.h is 8-byte aligned. The R_AARCH64_LDST64_ABS_LO12_NC relocation requires the address to be a multiple of 8, which may not be the case if .builtin_fw is empty. Unconditionally align .builtin_fw to fix the linker error. 32-bit architectures could use ALIGN(4) but that would add unnecessary complexity, so just use ALIGN(8). Link: https://lkml.kernel.org/r/20201208054646.2913063-1-maskray@google.com Link: https://github.com/ClangBuiltLinux/linux/issues/1204 Fixes: 5658c76 ("firmware: allow firmware files to be built into kernel image") Signed-off-by: Fangrui Song Reported-by: kernel test robot Acked-by: Arnd Bergmann Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Tested-by: Douglas Anderson Acked-by: Nathan Chancellor Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 9a4a5a43e886..2267b7c763c6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -396,7 +396,7 @@ } \ \ /* Built-in firmware blobs */ \ - .builtin_fw : AT(ADDR(.builtin_fw) - LOAD_OFFSET) { \ + .builtin_fw : AT(ADDR(.builtin_fw) - LOAD_OFFSET) ALIGN(8) { \ __start_builtin_fw = .; \ KEEP(*(.builtin_fw)) \ __end_builtin_fw = .; \ From 3063b80b4bdf1b2443ec86e889007bd416a1f044 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 9 Feb 2021 18:58:57 +0100 Subject: [PATCH 41/61] drm/sun4i: tcon: set sync polarity for tcon1 channel [ Upstream commit 50791f5d7b6a14b388f46c8885f71d1b98216d1d ] Channel 1 has polarity bits for vsync and hsync signals but driver never sets them. It turns out that with pre-HDMI2 controllers seemingly there is no issue if polarity is not set. However, with HDMI2 controllers (H6) there often comes to de-synchronization due to phase shift. This causes flickering screen. It's safe to assume that similar issues might happen also with pre-HDMI2 controllers. Solve issue with setting vsync and hsync polarity. Note that display stacks with tcon top have polarity bits actually in tcon0 polarity register. Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Reviewed-by: Chen-Yu Tsai Tested-by: Andre Heider Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20210209175900.7092-3-jernej.skrabec@siol.net Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/sun4i/sun4i_tcon.h | 6 ++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index ae7ae432aa4a..6bf1425e8b0c 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -665,6 +665,30 @@ static void sun4i_tcon1_mode_set(struct sun4i_tcon *tcon, SUN4I_TCON1_BASIC5_V_SYNC(vsync) | SUN4I_TCON1_BASIC5_H_SYNC(hsync)); + /* Setup the polarity of multiple signals */ + if (tcon->quirks->polarity_in_ch0) { + val = 0; + + if (mode->flags & DRM_MODE_FLAG_PHSYNC) + val |= SUN4I_TCON0_IO_POL_HSYNC_POSITIVE; + + if (mode->flags & DRM_MODE_FLAG_PVSYNC) + val |= SUN4I_TCON0_IO_POL_VSYNC_POSITIVE; + + regmap_write(tcon->regs, SUN4I_TCON0_IO_POL_REG, val); + } else { + /* according to vendor driver, this bit must be always set */ + val = SUN4I_TCON1_IO_POL_UNKNOWN; + + if (mode->flags & DRM_MODE_FLAG_PHSYNC) + val |= SUN4I_TCON1_IO_POL_HSYNC_POSITIVE; + + if (mode->flags & DRM_MODE_FLAG_PVSYNC) + val |= SUN4I_TCON1_IO_POL_VSYNC_POSITIVE; + + regmap_write(tcon->regs, SUN4I_TCON1_IO_POL_REG, val); + } + /* Map output pins to channel 1 */ regmap_update_bits(tcon->regs, SUN4I_TCON_GCTL_REG, SUN4I_TCON_GCTL_IOMAP_MASK, @@ -1482,6 +1506,7 @@ static const struct sun4i_tcon_quirks sun8i_a83t_tv_quirks = { static const struct sun4i_tcon_quirks sun8i_r40_tv_quirks = { .has_channel_1 = true, + .polarity_in_ch0 = true, .set_mux = sun8i_r40_tcon_tv_set_mux, }; diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.h b/drivers/gpu/drm/sun4i/sun4i_tcon.h index a62ec826ae71..5bdbaf084782 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.h +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.h @@ -153,6 +153,11 @@ #define SUN4I_TCON1_BASIC5_V_SYNC(height) (((height) - 1) & 0x3ff) #define SUN4I_TCON1_IO_POL_REG 0xf0 +/* there is no documentation about this bit */ +#define SUN4I_TCON1_IO_POL_UNKNOWN BIT(26) +#define SUN4I_TCON1_IO_POL_HSYNC_POSITIVE BIT(25) +#define SUN4I_TCON1_IO_POL_VSYNC_POSITIVE BIT(24) + #define SUN4I_TCON1_IO_TRI_REG 0xf4 #define SUN4I_TCON_ECC_FIFO_REG 0xf8 @@ -224,6 +229,7 @@ struct sun4i_tcon_quirks { bool needs_de_be_mux; /* sun6i needs mux to select backend */ bool needs_edp_reset; /* a80 edp reset needed for tcon0 access */ bool supports_lvds; /* Does the TCON support an LVDS output? */ + bool polarity_in_ch0; /* some tcon1 channels have polarity bits in tcon0 pol register */ u8 dclk_min_div; /* minimum divider for TCON0 DCLK */ /* callback to handle tcon muxing options */ From 40db7dba50feb9f6c0ab7debe4c769d20cd62673 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 9 Feb 2021 18:58:59 +0100 Subject: [PATCH 42/61] drm/sun4i: Fix H6 HDMI PHY configuration [ Upstream commit 6a155216c48f2f65c8dcb02c4c27549c170d24a9 ] As it turns out, vendor HDMI PHY driver for H6 has a pretty big table of predefined values for various pixel clocks. However, most of them are not useful/tested because they come from reference driver code. Vendor PHY driver is concerned with only few of those, namely 27 MHz, 74.25 MHz, 148.5 MHz, 297 MHz and 594 MHz. These are all frequencies for standard CEA modes. Fix sun50i_h6_cur_ctr and sun50i_h6_phy_config with the values only for aforementioned frequencies. Table sun50i_h6_mpll_cfg doesn't need to be changed because values are actually frequency dependent and not so much SoC dependent. See i.MX6 documentation for explanation of those values for similar PHY. Fixes: c71c9b2fee17 ("drm/sun4i: Add support for Synopsys HDMI PHY") Tested-by: Andre Heider Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20210209175900.7092-5-jernej.skrabec@siol.net Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c index 43643ad31730..a4012ec13d4b 100644 --- a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c +++ b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c @@ -104,29 +104,21 @@ static const struct dw_hdmi_mpll_config sun50i_h6_mpll_cfg[] = { static const struct dw_hdmi_curr_ctrl sun50i_h6_cur_ctr[] = { /* pixelclk bpp8 bpp10 bpp12 */ - { 25175000, { 0x0000, 0x0000, 0x0000 }, }, { 27000000, { 0x0012, 0x0000, 0x0000 }, }, - { 59400000, { 0x0008, 0x0008, 0x0008 }, }, - { 72000000, { 0x0008, 0x0008, 0x001b }, }, - { 74250000, { 0x0013, 0x0013, 0x0013 }, }, - { 90000000, { 0x0008, 0x001a, 0x001b }, }, - { 118800000, { 0x001b, 0x001a, 0x001b }, }, - { 144000000, { 0x001b, 0x001a, 0x0034 }, }, - { 180000000, { 0x001b, 0x0033, 0x0034 }, }, - { 216000000, { 0x0036, 0x0033, 0x0034 }, }, - { 237600000, { 0x0036, 0x0033, 0x001b }, }, - { 288000000, { 0x0036, 0x001b, 0x001b }, }, - { 297000000, { 0x0019, 0x001b, 0x0019 }, }, - { 330000000, { 0x0036, 0x001b, 0x001b }, }, - { 594000000, { 0x003f, 0x001b, 0x001b }, }, + { 74250000, { 0x0013, 0x001a, 0x001b }, }, + { 148500000, { 0x0019, 0x0033, 0x0034 }, }, + { 297000000, { 0x0019, 0x001b, 0x001b }, }, + { 594000000, { 0x0010, 0x001b, 0x001b }, }, { ~0UL, { 0x0000, 0x0000, 0x0000 }, } }; static const struct dw_hdmi_phy_config sun50i_h6_phy_config[] = { /*pixelclk symbol term vlev*/ - { 74250000, 0x8009, 0x0004, 0x0232}, - { 148500000, 0x8029, 0x0004, 0x0273}, - { 594000000, 0x8039, 0x0004, 0x014a}, + { 27000000, 0x8009, 0x0007, 0x02b0 }, + { 74250000, 0x8009, 0x0006, 0x022d }, + { 148500000, 0x8029, 0x0006, 0x0270 }, + { 297000000, 0x8039, 0x0005, 0x01ab }, + { 594000000, 0x8029, 0x0000, 0x008a }, { ~0UL, 0x0000, 0x0000, 0x0000} }; From b02db23d2647c7c610eee74aaaaa33eae90c72fa Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 9 Feb 2021 18:59:00 +0100 Subject: [PATCH 43/61] drm/sun4i: dw-hdmi: Fix max. frequency for H6 [ Upstream commit 1926a0508d8947cf081280d85ff035300dc71da7 ] It turns out that reasoning for lowering max. supported frequency is wrong. Scrambling works just fine. Several now fixed bugs prevented proper functioning, even with rates lower than 340 MHz. Issues were just more pronounced with higher frequencies. Fix that by allowing max. supported frequency in HW and fix the comment. Fixes: cd9063757a22 ("drm/sun4i: DW HDMI: Lower max. supported rate for H6") Reviewed-by: Chen-Yu Tsai Tested-by: Andre Heider Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20210209175900.7092-6-jernej.skrabec@siol.net Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c index 12fe24195621..8f721be26477 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c @@ -49,11 +49,9 @@ sun8i_dw_hdmi_mode_valid_h6(struct drm_connector *connector, { /* * Controller support maximum of 594 MHz, which correlates to - * 4K@60Hz 4:4:4 or RGB. However, for frequencies greater than - * 340 MHz scrambling has to be enabled. Because scrambling is - * not yet implemented, just limit to 340 MHz for now. + * 4K@60Hz 4:4:4 or RGB. */ - if (mode->clock > 340000) + if (mode->clock > 594000) return MODE_CLOCK_HIGH; return MODE_OK; From 546d92a43c94e1f166e151f9d84142d91c8bc6e2 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 9 Feb 2021 18:58:56 +0100 Subject: [PATCH 44/61] clk: sunxi-ng: mp: fix parent rate change flag check [ Upstream commit 245090ab2636c0869527ce563afbfb8aff29e825 ] CLK_SET_RATE_PARENT flag is checked on parent clock instead of current one. Fix that. Fixes: 3f790433c3cb ("clk: sunxi-ng: Adjust MP clock parent rate when allowed") Reviewed-by: Chen-Yu Tsai Tested-by: Andre Heider Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20210209175900.7092-2-jernej.skrabec@siol.net Acked-by: Maxime Ripard Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/sunxi-ng/ccu_mp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu_mp.c b/drivers/clk/sunxi-ng/ccu_mp.c index fa4ecb915590..9d3a76604d94 100644 --- a/drivers/clk/sunxi-ng/ccu_mp.c +++ b/drivers/clk/sunxi-ng/ccu_mp.c @@ -108,7 +108,7 @@ static unsigned long ccu_mp_round_rate(struct ccu_mux_internal *mux, max_m = cmp->m.max ?: 1 << cmp->m.width; max_p = cmp->p.max ?: 1 << ((1 << cmp->p.width) - 1); - if (!(clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT)) { + if (!clk_hw_can_set_rate_parent(&cmp->common.hw)) { ccu_mp_find_best(*parent_rate, rate, max_m, max_p, &m, &p); rate = *parent_rate / p / m; } else { From 1019015a5dca64317325588164fb4cb9813e849d Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Fri, 5 Feb 2021 09:51:40 +0100 Subject: [PATCH 45/61] i2c: stm32f7: fix configuration of the digital filter [ Upstream commit 3d6a3d3a2a7a3a60a824e7c04e95fd50dec57812 ] The digital filter related computation are present in the driver however the programming of the filter within the IP is missing. The maximum value for the DNF is wrong and should be 15 instead of 16. Fixes: aeb068c57214 ("i2c: i2c-stm32f7: add driver") Signed-off-by: Alain Volmat Signed-off-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-stm32f7.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index b2634afe066d..a7977eef2ead 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -53,6 +53,8 @@ #define STM32F7_I2C_CR1_RXDMAEN BIT(15) #define STM32F7_I2C_CR1_TXDMAEN BIT(14) #define STM32F7_I2C_CR1_ANFOFF BIT(12) +#define STM32F7_I2C_CR1_DNF_MASK GENMASK(11, 8) +#define STM32F7_I2C_CR1_DNF(n) (((n) & 0xf) << 8) #define STM32F7_I2C_CR1_ERRIE BIT(7) #define STM32F7_I2C_CR1_TCIE BIT(6) #define STM32F7_I2C_CR1_STOPIE BIT(5) @@ -151,7 +153,7 @@ #define STM32F7_I2C_MAX_SLAVE 0x2 #define STM32F7_I2C_DNF_DEFAULT 0 -#define STM32F7_I2C_DNF_MAX 16 +#define STM32F7_I2C_DNF_MAX 15 #define STM32F7_I2C_ANALOG_FILTER_ENABLE 1 #define STM32F7_I2C_ANALOG_FILTER_DELAY_MIN 50 /* ns */ @@ -657,6 +659,13 @@ static void stm32f7_i2c_hw_config(struct stm32f7_i2c_dev *i2c_dev) else stm32f7_i2c_set_bits(i2c_dev->base + STM32F7_I2C_CR1, STM32F7_I2C_CR1_ANFOFF); + + /* Program the Digital Filter */ + stm32f7_i2c_clr_bits(i2c_dev->base + STM32F7_I2C_CR1, + STM32F7_I2C_CR1_DNF_MASK); + stm32f7_i2c_set_bits(i2c_dev->base + STM32F7_I2C_CR1, + STM32F7_I2C_CR1_DNF(i2c_dev->setup.dnf)); + stm32f7_i2c_set_bits(i2c_dev->base + STM32F7_I2C_CR1, STM32F7_I2C_CR1_PE); } From 0b49b82eeea2f2ff0ed0f7b34aaea31dc0073724 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 12 Feb 2021 20:52:54 -0800 Subject: [PATCH 46/61] h8300: fix PREEMPTION build, TI_PRE_COUNT undefined [ Upstream commit ade9679c159d5bbe14fb7e59e97daf6062872e2b ] Fix a build error for undefined 'TI_PRE_COUNT' by adding it to asm-offsets.c. h8300-linux-ld: arch/h8300/kernel/entry.o: in function `resume_kernel': (.text+0x29a): undefined reference to `TI_PRE_COUNT' Link: https://lkml.kernel.org/r/20210212021650.22740-1-rdunlap@infradead.org Fixes: df2078b8daa7 ("h8300: Low level entry") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Yoshinori Sato Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/h8300/kernel/asm-offsets.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/h8300/kernel/asm-offsets.c b/arch/h8300/kernel/asm-offsets.c index 85e60509f0a8..d4b53af657c8 100644 --- a/arch/h8300/kernel/asm-offsets.c +++ b/arch/h8300/kernel/asm-offsets.c @@ -63,6 +63,9 @@ int main(void) OFFSET(TI_FLAGS, thread_info, flags); OFFSET(TI_CPU, thread_info, cpu); OFFSET(TI_PRE, thread_info, preempt_count); +#ifdef CONFIG_PREEMPTION + DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); +#endif return 0; } From 35834bf0b210d49ad78b1408efed03091d1a9f39 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 13 Aug 2020 08:30:38 +0300 Subject: [PATCH 47/61] usb: dwc3: ulpi: fix checkpatch warning commit 2a499b45295206e7f3dc76edadde891c06cc4447 upstream no functional changes. Signed-off-by: Felipe Balbi Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ulpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c index bb8271531da7..a462ef54678a 100644 --- a/drivers/usb/dwc3/ulpi.c +++ b/drivers/usb/dwc3/ulpi.c @@ -19,7 +19,7 @@ static int dwc3_ulpi_busyloop(struct dwc3 *dwc) { - unsigned count = 1000; + unsigned int count = 1000; u32 reg; while (count--) { From b90e8d5d28d1b6811b0fb8543a342cefcdc2d46b Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 10 Dec 2020 11:50:07 +0300 Subject: [PATCH 48/61] usb: dwc3: ulpi: Replace CPU-based busyloop with Protocol-based one commit fca3f138105727c3a22edda32d02f91ce1bf11c9 upstream Originally the procedure of the ULPI transaction finish detection has been developed as a simple busy-loop with just decrementing counter and no delays. It's wrong since on different systems the loop will take a different time to complete. So if the system bus and CPU are fast enough to overtake the ULPI bus and the companion PHY reaction, then we'll get to take a false timeout error. Fix this by converting the busy-loop procedure to take the standard bus speed, address value and the registers access mode into account for the busy-loop delay calculation. Here is the way the fix works. It's known that the ULPI bus is clocked with 60MHz signal. In accordance with [1] the ULPI bus protocol is created so to spend 5 and 6 clock periods for immediate register write and read operations respectively, and 6 and 7 clock periods - for the extended register writes and reads. Based on that we can easily pre-calculate the time which will be needed for the controller to perform a requested IO operation. Note we'll still preserve the attempts counter in case if the DWC USB3 controller has got some internals delays. [1] UTMI+ Low Pin Interface (ULPI) Specification, Revision 1.1, October 20, 2004, pp. 30 - 36. Fixes: 88bc9d194ff6 ("usb: dwc3: add ULPI interface support") Acked-by: Heikki Krogerus Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20201210085008.13264-3-Sergey.Semin@baikalelectronics.ru Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ulpi.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c index a462ef54678a..ffe3440abb74 100644 --- a/drivers/usb/dwc3/ulpi.c +++ b/drivers/usb/dwc3/ulpi.c @@ -7,6 +7,8 @@ * Author: Heikki Krogerus */ +#include +#include #include #include "core.h" @@ -17,12 +19,22 @@ DWC3_GUSB2PHYACC_ADDR(ULPI_ACCESS_EXTENDED) | \ DWC3_GUSB2PHYACC_EXTEND_ADDR(a) : DWC3_GUSB2PHYACC_ADDR(a)) -static int dwc3_ulpi_busyloop(struct dwc3 *dwc) +#define DWC3_ULPI_BASE_DELAY DIV_ROUND_UP(NSEC_PER_SEC, 60000000L) + +static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read) { + unsigned long ns = 5L * DWC3_ULPI_BASE_DELAY; unsigned int count = 1000; u32 reg; + if (addr >= ULPI_EXT_VENDOR_SPECIFIC) + ns += DWC3_ULPI_BASE_DELAY; + + if (read) + ns += DWC3_ULPI_BASE_DELAY; + while (count--) { + ndelay(ns); reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0)); if (reg & DWC3_GUSB2PHYACC_DONE) return 0; @@ -47,7 +59,7 @@ static int dwc3_ulpi_read(struct device *dev, u8 addr) reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr); dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); - ret = dwc3_ulpi_busyloop(dwc); + ret = dwc3_ulpi_busyloop(dwc, addr, true); if (ret) return ret; @@ -71,7 +83,7 @@ static int dwc3_ulpi_write(struct device *dev, u8 addr, u8 val) reg |= DWC3_GUSB2PHYACC_WRITE | val; dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); - return dwc3_ulpi_busyloop(dwc); + return dwc3_ulpi_busyloop(dwc, addr, false); } static const struct ulpi_ops dwc3_ulpi_ops = { From 73bc75fda669b37605b3f8a6d0ff4f1dfda41eb5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 3 Feb 2021 08:47:56 +0000 Subject: [PATCH 49/61] rxrpc: Fix clearance of Tx/Rx ring when releasing a call commit 7b5eab57cac45e270a0ad624ba157c5b30b3d44d upstream. At the end of rxrpc_release_call(), rxrpc_cleanup_ring() is called to clear the Rx/Tx skbuff ring, but this doesn't lock the ring whilst it's accessing it. Unfortunately, rxrpc_resend() might be trying to retransmit a packet concurrently with this - and whilst it does lock the ring, this isn't protection against rxrpc_cleanup_call(). Fix this by removing the call to rxrpc_cleanup_ring() from rxrpc_release_call(). rxrpc_cleanup_ring() will be called again anyway from rxrpc_cleanup_call(). The earlier call is just an optimisation to recycle skbuffs more quickly. Alternative solutions include rxrpc_release_call() could try to cancel the work item or wait for it to complete or rxrpc_cleanup_ring() could lock when accessing the ring (which would require a bh lock). This can produce a report like the following: BUG: KASAN: use-after-free in rxrpc_send_data_packet+0x19b4/0x1e70 net/rxrpc/output.c:372 Read of size 4 at addr ffff888011606e04 by task kworker/0:0/5 ... Workqueue: krxrpcd rxrpc_process_call Call Trace: ... kasan_report.cold+0x79/0xd5 mm/kasan/report.c:413 rxrpc_send_data_packet+0x19b4/0x1e70 net/rxrpc/output.c:372 rxrpc_resend net/rxrpc/call_event.c:266 [inline] rxrpc_process_call+0x1634/0x1f60 net/rxrpc/call_event.c:412 process_one_work+0x98d/0x15f0 kernel/workqueue.c:2275 ... Allocated by task 2318: ... sock_alloc_send_pskb+0x793/0x920 net/core/sock.c:2348 rxrpc_send_data+0xb51/0x2bf0 net/rxrpc/sendmsg.c:358 rxrpc_do_sendmsg+0xc03/0x1350 net/rxrpc/sendmsg.c:744 rxrpc_sendmsg+0x420/0x630 net/rxrpc/af_rxrpc.c:560 ... Freed by task 2318: ... kfree_skb+0x140/0x3f0 net/core/skbuff.c:704 rxrpc_free_skb+0x11d/0x150 net/rxrpc/skbuff.c:78 rxrpc_cleanup_ring net/rxrpc/call_object.c:485 [inline] rxrpc_release_call+0x5dd/0x860 net/rxrpc/call_object.c:552 rxrpc_release_calls_on_socket+0x21c/0x300 net/rxrpc/call_object.c:579 rxrpc_release_sock net/rxrpc/af_rxrpc.c:885 [inline] rxrpc_release+0x263/0x5a0 net/rxrpc/af_rxrpc.c:916 __sock_release+0xcd/0x280 net/socket.c:597 ... The buggy address belongs to the object at ffff888011606dc0 which belongs to the cache skbuff_head_cache of size 232 Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Reported-by: syzbot+174de899852504e4a74a@syzkaller.appspotmail.com Reported-by: syzbot+3d1c772efafd3c38d007@syzkaller.appspotmail.com Signed-off-by: David Howells cc: Hillf Danton Link: https://lore.kernel.org/r/161234207610.653119.5287360098400436976.stgit@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/call_object.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 38a46167523f..f8233bc76c0e 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -507,8 +507,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) rxrpc_disconnect_call(call); if (call->security) call->security->free_call_crypto(call); - - rxrpc_cleanup_ring(call); _leave(""); } From 0b42ab078369318a99c925508f3532a7d5288b4f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 3 Feb 2021 14:29:52 -0500 Subject: [PATCH 50/61] udp: fix skb_copy_and_csum_datagram with odd segment sizes commit 52cbd23a119c6ebf40a527e53f3402d2ea38eccb upstream. When iteratively computing a checksum with csum_block_add, track the offset "pos" to correctly rotate in csum_block_add when offset is odd. The open coded implementation of skb_copy_and_csum_datagram did this. With the switch to __skb_datagram_iter calling csum_and_copy_to_iter, pos was reinitialized to 0 on each call. Bring back the pos by passing it along with the csum to the callback. Changes v1->v2 - pass csum value, instead of csump pointer (Alexander Duyck) Link: https://lore.kernel.org/netdev/20210128152353.GB27281@optiplex/ Fixes: 950fcaecd5cc ("datagram: consolidate datagram copy to iter helpers") Reported-by: Oliver Graute Signed-off-by: Willem de Bruijn Reviewed-by: Alexander Duyck Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20210203192952.1849843-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/linux/uio.h | 8 +++++++- lib/iov_iter.c | 24 ++++++++++++++---------- net/core/datagram.c | 12 ++++++++++-- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index ab5f523bc0df..b74d292b9960 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -261,7 +261,13 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) { i->count = count; } -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i); + +struct csum_state { + __wsum csum; + size_t off; +}; + +size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 639d5e7014c1..9ea6f7bb8309 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -570,12 +570,13 @@ static __wsum csum_and_memcpy(void *to, const void *from, size_t len, } static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, - __wsum *csum, struct iov_iter *i) + struct csum_state *csstate, + struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; + __wsum sum = csstate->csum; + size_t off = csstate->off; size_t n, r; - size_t off = 0; - __wsum sum = *csum; int idx; if (!sanity(i)) @@ -596,7 +597,8 @@ static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, addr += chunk; } i->count -= bytes; - *csum = sum; + csstate->csum = sum; + csstate->off = off; return bytes; } @@ -1484,18 +1486,19 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, } EXPORT_SYMBOL(csum_and_copy_from_iter_full); -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, +size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, struct iov_iter *i) { + struct csum_state *csstate = _csstate; const char *from = addr; - __wsum *csum = csump; __wsum sum, next; - size_t off = 0; + size_t off; if (unlikely(iov_iter_is_pipe(i))) - return csum_and_copy_to_pipe_iter(addr, bytes, csum, i); + return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i); - sum = *csum; + sum = csstate->csum; + off = csstate->off; if (unlikely(iov_iter_is_discard(i))) { WARN_ON(1); /* for now */ return 0; @@ -1524,7 +1527,8 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, off += v.iov_len; }) ) - *csum = sum; + csstate->csum = sum; + csstate->off = off; return bytes; } EXPORT_SYMBOL(csum_and_copy_to_iter); diff --git a/net/core/datagram.c b/net/core/datagram.c index 189ad4c73a3f..b0488f30f2c4 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -700,8 +700,16 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, __wsum *csump) { - return __skb_datagram_iter(skb, offset, to, len, true, - csum_and_copy_to_iter, csump); + struct csum_state csdata = { .csum = *csump }; + int ret; + + ret = __skb_datagram_iter(skb, offset, to, len, true, + csum_and_copy_to_iter, &csdata); + if (ret) + return ret; + + *csump = csdata.csum; + return 0; } /** From 5c53956a8da0cd09e17f1e256d38f1198602bc0f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 4 Feb 2021 18:33:51 +0200 Subject: [PATCH 51/61] net: dsa: call teardown method on probe failure commit 8fd54a73b7cda11548154451bdb4bde6d8ff74c7 upstream. Since teardown is supposed to undo the effects of the setup method, it should be called in the error path for dsa_switch_setup, not just in dsa_switch_teardown. Fixes: 5e3f847a02aa ("net: dsa: Add teardown callback for drivers") Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20210204163351.2929670-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/dsa/dsa2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 0f7f38c29579..70e6fc2edd30 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -399,18 +399,21 @@ static int dsa_switch_setup(struct dsa_switch *ds) ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) { err = -ENOMEM; - goto unregister_notifier; + goto teardown; } dsa_slave_mii_bus_init(ds); err = mdiobus_register(ds->slave_mii_bus); if (err < 0) - goto unregister_notifier; + goto teardown; } return 0; +teardown: + if (ds->ops->teardown) + ds->ops->teardown(ds); unregister_notifier: dsa_switch_unregister_notifier(ds); unregister_devlink: From 2085d886630a5efab216e677d03b51916cd77ff1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Feb 2021 13:31:46 -0800 Subject: [PATCH 52/61] net: gro: do not keep too many GRO packets in napi->rx_list commit 8dc1c444df193701910f5e80b5d4caaf705a8fb0 upstream. Commit c80794323e82 ("net: Fix packet reordering caused by GRO and listified RX cooperation") had the unfortunate effect of adding latencies in common workloads. Before the patch, GRO packets were immediately passed to upper stacks. After the patch, we can accumulate quite a lot of GRO packets (depdending on NAPI budget). My fix is counting in napi->rx_count number of segments instead of number of logical packets. Fixes: c80794323e82 ("net: Fix packet reordering caused by GRO and listified RX cooperation") Signed-off-by: Eric Dumazet Bisected-by: John Sperbeck Tested-by: Jian Yang Cc: Maxim Mikityanskiy Reviewed-by: Saeed Mahameed Reviewed-by: Edward Cree Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/r/20210204213146.4192368-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index ec66d13d2bda..d90e8bd87df8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5275,10 +5275,11 @@ static void gro_normal_list(struct napi_struct *napi) /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, * pass the whole batch up to the stack. */ -static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) +static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) { list_add_tail(&skb->list, &napi->rx_list); - if (++napi->rx_count >= gro_normal_batch) + napi->rx_count += segs; + if (napi->rx_count >= gro_normal_batch) gro_normal_list(napi); } @@ -5317,7 +5318,7 @@ static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) } out: - gro_normal_one(napi, skb); + gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count); return NET_RX_SUCCESS; } @@ -5608,7 +5609,7 @@ static gro_result_t napi_skb_finish(struct napi_struct *napi, { switch (ret) { case GRO_NORMAL: - gro_normal_one(napi, skb); + gro_normal_one(napi, skb, 1); break; case GRO_DROP: @@ -5696,7 +5697,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, __skb_push(skb, ETH_HLEN); skb->protocol = eth_type_trans(skb, skb->dev); if (ret == GRO_NORMAL) - gro_normal_one(napi, skb); + gro_normal_one(napi, skb, 1); break; case GRO_DROP: From 611d93fbea54ad1d85163ca258b764b70ae0cd89 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 5 Feb 2021 11:36:30 +1100 Subject: [PATCH 53/61] net: fix iteration for sctp transport seq_files commit af8085f3a4712c57d0dd415ad543bac85780375c upstream. The sctp transport seq_file iterators take a reference to the transport in the ->start and ->next functions and releases the reference in the ->show function. The preferred handling for such resources is to release them in the subsequent ->next or ->stop function call. Since Commit 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and interface") there is no guarantee that ->show will be called after ->next, so this function can now leak references. So move the sctp_transport_put() call to ->next and ->stop. Fixes: 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and interface") Reported-by: Xin Long Signed-off-by: NeilBrown Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sctp/proc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index f7da88ae20a5..982a87b3e11f 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -215,6 +215,12 @@ static void sctp_transport_seq_stop(struct seq_file *seq, void *v) { struct sctp_ht_iter *iter = seq->private; + if (v && v != SEQ_START_TOKEN) { + struct sctp_transport *transport = v; + + sctp_transport_put(transport); + } + sctp_transport_walk_stop(&iter->hti); } @@ -222,6 +228,12 @@ static void *sctp_transport_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sctp_ht_iter *iter = seq->private; + if (v && v != SEQ_START_TOKEN) { + struct sctp_transport *transport = v; + + sctp_transport_put(transport); + } + ++*pos; return sctp_transport_get_next(seq_file_net(seq), &iter->hti); @@ -277,8 +289,6 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sk->sk_rcvbuf); seq_printf(seq, "\n"); - sctp_transport_put(transport); - return 0; } @@ -354,8 +364,6 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "\n"); } - sctp_transport_put(transport); - return 0; } From a5f0b6f7b1c0ae835c1bd2fe487adf620b6f73e9 Mon Sep 17 00:00:00 2001 From: Norbert Slusarek Date: Fri, 5 Feb 2021 13:14:05 +0100 Subject: [PATCH 54/61] net/vmw_vsock: improve locking in vsock_connect_timeout() commit 3d0bc44d39bca615b72637e340317b7899b7f911 upstream. A possible locking issue in vsock_connect_timeout() was recognized by Eric Dumazet which might cause a null pointer dereference in vsock_transport_cancel_pkt(). This patch assures that vsock_transport_cancel_pkt() will be called within the lock, so a race condition won't occur which could result in vsk->transport to be set to NULL. Fixes: 380feae0def7 ("vsock: cancel packets when failing to connect") Reported-by: Eric Dumazet Signed-off-by: Norbert Slusarek Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/trinity-f8e0937a-cf0e-4d80-a76e-d9a958ba3ef1-1612535522360@3c-app-gmx-bap12 Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/af_vsock.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 3a074a03d382..89a57ed99abb 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1099,7 +1099,6 @@ static void vsock_connect_timeout(struct work_struct *work) { struct sock *sk; struct vsock_sock *vsk; - int cancel = 0; vsk = container_of(work, struct vsock_sock, connect_work.work); sk = sk_vsock(vsk); @@ -1110,11 +1109,9 @@ static void vsock_connect_timeout(struct work_struct *work) sk->sk_state = TCP_CLOSE; sk->sk_err = ETIMEDOUT; sk->sk_error_report(sk); - cancel = 1; + vsock_transport_cancel_pkt(vsk); } release_sock(sk); - if (cancel) - vsock_transport_cancel_pkt(vsk); sock_put(sk); } From 2545c5bd8316c7e3ba29312eb3dce431530ecce8 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 5 Feb 2021 17:37:32 -0800 Subject: [PATCH 55/61] net: watchdog: hold device global xmit lock during tx disable commit 3aa6bce9af0e25b735c9c1263739a5639a336ae8 upstream. Prevent netif_tx_disable() running concurrently with dev_watchdog() by taking the device global xmit lock. Otherwise, the recommended: netif_carrier_off(dev); netif_tx_disable(dev); driver shutdown sequence can happen after the watchdog has already checked carrier, resulting in possible false alarms. This is because netif_tx_lock() only sets the frozen bit without maintaining the locks on the individual queues. Fixes: c3f26a269c24 ("netdev: Fix lockdep warnings in multiqueue configurations.") Signed-off-by: Edwin Peer Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ec3081ab04c0..3e0692fd6282 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4044,6 +4044,7 @@ static inline void netif_tx_disable(struct net_device *dev) local_bh_disable(); cpu = smp_processor_id(); + spin_lock(&dev->tx_global_lock); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); @@ -4051,6 +4052,7 @@ static inline void netif_tx_disable(struct net_device *dev) netif_tx_stop_queue(txq); __netif_tx_unlock(txq); } + spin_unlock(&dev->tx_global_lock); local_bh_enable(); } From c025081b57df63f5a0b92ab57855904108c059d7 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 8 Feb 2021 15:44:54 +0100 Subject: [PATCH 56/61] vsock/virtio: update credit only if socket is not closed commit ce7536bc7398e2ae552d2fabb7e0e371a9f1fe46 upstream. If the socket is closed or is being released, some resources used by virtio_transport_space_update() such as 'vsk->trans' may be released. To avoid a use after free bug we should only update the available credit when we are sure the socket is still open and we have the lock held. Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko") Signed-off-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20210208144454.84438-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/virtio_transport_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index efbb521bff13..dde16a033a09 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1100,10 +1100,10 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, vsk = vsock_sk(sk); - space_available = virtio_transport_space_update(sk, pkt); - lock_sock(sk); + space_available = virtio_transport_space_update(sk, pkt); + /* Update CID in case it has changed after a transport reset event */ vsk->local_addr.svm_cid = dst.svm_cid; From ac79b1d94a88cb52cfd7624d5f32c833cf2f9f4d Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 9 Feb 2021 09:52:19 +0100 Subject: [PATCH 57/61] vsock: fix locking in vsock_shutdown() commit 1c5fae9c9a092574398a17facc31c533791ef232 upstream. In vsock_shutdown() we touched some socket fields without holding the socket lock, such as 'state' and 'sk_flags'. Also, after the introduction of multi-transport, we are accessing 'vsk->transport' in vsock_send_shutdown() without holding the lock and this call can be made while the connection is in progress, so the transport can change in the meantime. To avoid issues, we hold the socket lock when we enter in vsock_shutdown() and release it when we leave. Among the transports that implement the 'shutdown' callback, only hyperv_transport acquired the lock. Since the caller now holds it, we no longer take it. Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/af_vsock.c | 8 +++++--- net/vmw_vsock/hyperv_transport.c | 4 ---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 89a57ed99abb..5d323574d04f 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -808,10 +808,12 @@ static int vsock_shutdown(struct socket *sock, int mode) */ sk = sock->sk; + + lock_sock(sk); if (sock->state == SS_UNCONNECTED) { err = -ENOTCONN; if (sk->sk_type == SOCK_STREAM) - return err; + goto out; } else { sock->state = SS_DISCONNECTING; err = 0; @@ -820,10 +822,8 @@ static int vsock_shutdown(struct socket *sock, int mode) /* Receive and send shutdowns are treated alike. */ mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); if (mode) { - lock_sock(sk); sk->sk_shutdown |= mode; sk->sk_state_change(sk); - release_sock(sk); if (sk->sk_type == SOCK_STREAM) { sock_reset_flag(sk, SOCK_DONE); @@ -831,6 +831,8 @@ static int vsock_shutdown(struct socket *sock, int mode) } } +out: + release_sock(sk); return err; } diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 463cefc1e5ae..a3c57c048cbd 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -464,14 +464,10 @@ static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode) static int hvs_shutdown(struct vsock_sock *vsk, int mode) { - struct sock *sk = sk_vsock(vsk); - if (!(mode & SEND_SHUTDOWN)) return 0; - lock_sock(sk); hvs_shutdown_lock_held(vsk->trans, mode); - release_sock(sk); return 0; } From 99996cd558ba1183b062205e824eabc49c93343a Mon Sep 17 00:00:00 2001 From: Sabyrzhan Tasbolatov Date: Tue, 2 Feb 2021 02:32:33 +0600 Subject: [PATCH 58/61] net/rds: restrict iovecs length for RDS_CMSG_RDMA_ARGS commit a11148e6fcce2ae53f47f0a442d098d860b4f7db upstream. syzbot found WARNING in rds_rdma_extra_size [1] when RDS_CMSG_RDMA_ARGS control message is passed with user-controlled 0x40001 bytes of args->nr_local, causing order >= MAX_ORDER condition. The exact value 0x40001 can be checked with UIO_MAXIOV which is 0x400. So for kcalloc() 0x400 iovecs with sizeof(struct rds_iovec) = 0x10 is the closest limit, with 0x10 leftover. Same condition is currently done in rds_cmsg_rdma_args(). [1] WARNING: mm/page_alloc.c:5011 [..] Call Trace: alloc_pages_current+0x18c/0x2a0 mm/mempolicy.c:2267 alloc_pages include/linux/gfp.h:547 [inline] kmalloc_order+0x2e/0xb0 mm/slab_common.c:837 kmalloc_order_trace+0x14/0x120 mm/slab_common.c:853 kmalloc_array include/linux/slab.h:592 [inline] kcalloc include/linux/slab.h:621 [inline] rds_rdma_extra_size+0xb2/0x3b0 net/rds/rdma.c:568 rds_rm_size net/rds/send.c:928 [inline] Reported-by: syzbot+1bd2b07f93745fa38425@syzkaller.appspotmail.com Signed-off-by: Sabyrzhan Tasbolatov Acked-by: Santosh Shilimkar Link: https://lore.kernel.org/r/20210201203233.1324704-1-snovitoll@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/rds/rdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 8e10f954a22f..1c42a600fe7f 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -532,6 +532,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args, if (args->nr_local == 0) return -EINVAL; + if (args->nr_local > UIO_MAXIOV) + return -EMSGSIZE; + iov->iov = kcalloc(args->nr_local, sizeof(struct rds_iovec), GFP_KERNEL); From b55528a18e4066fa24fe44ede37db84af2c8015d Mon Sep 17 00:00:00 2001 From: Sabyrzhan Tasbolatov Date: Tue, 2 Feb 2021 15:20:59 +0600 Subject: [PATCH 59/61] net/qrtr: restrict user-controlled length in qrtr_tun_write_iter() commit 2a80c15812372e554474b1dba0b1d8e467af295d upstream. syzbot found WARNING in qrtr_tun_write_iter [1] when write_iter length exceeds KMALLOC_MAX_SIZE causing order >= MAX_ORDER condition. Additionally, there is no check for 0 length write. [1] WARNING: mm/page_alloc.c:5011 [..] Call Trace: alloc_pages_current+0x18c/0x2a0 mm/mempolicy.c:2267 alloc_pages include/linux/gfp.h:547 [inline] kmalloc_order+0x2e/0xb0 mm/slab_common.c:837 kmalloc_order_trace+0x14/0x120 mm/slab_common.c:853 kmalloc include/linux/slab.h:557 [inline] kzalloc include/linux/slab.h:682 [inline] qrtr_tun_write_iter+0x8a/0x180 net/qrtr/tun.c:83 call_write_iter include/linux/fs.h:1901 [inline] Reported-by: syzbot+c2a7e5c5211605a90865@syzkaller.appspotmail.com Signed-off-by: Sabyrzhan Tasbolatov Link: https://lore.kernel.org/r/20210202092059.1361381-1-snovitoll@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/qrtr/tun.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c index e35869e81766..997af345ce37 100644 --- a/net/qrtr/tun.c +++ b/net/qrtr/tun.c @@ -80,6 +80,12 @@ static ssize_t qrtr_tun_write_iter(struct kiocb *iocb, struct iov_iter *from) ssize_t ret; void *kbuf; + if (!len) + return -EINVAL; + + if (len > KMALLOC_MAX_SIZE) + return -ENOMEM; + kbuf = kzalloc(len, GFP_KERNEL); if (!kbuf) return -ENOMEM; From e8ffaca0fe27d489cb8385073cb13e1da1fda3ac Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 12 Nov 2020 11:31:55 +0100 Subject: [PATCH 60/61] ovl: expand warning in ovl_d_real() commit cef4cbff06fbc3be54d6d79ee139edecc2ee8598 upstream. There was a syzbot report with this warning but insufficient information... Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/super.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index d6b724beb304..87cf9b1d2eca 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -79,7 +79,7 @@ static void ovl_dentry_release(struct dentry *dentry) static struct dentry *ovl_d_real(struct dentry *dentry, const struct inode *inode) { - struct dentry *real; + struct dentry *real = NULL, *lower; /* It's an overlay file */ if (inode && d_inode(dentry) == inode) @@ -98,9 +98,10 @@ static struct dentry *ovl_d_real(struct dentry *dentry, if (real && !inode && ovl_has_upperdata(d_inode(dentry))) return real; - real = ovl_dentry_lowerdata(dentry); - if (!real) + lower = ovl_dentry_lowerdata(dentry); + if (!lower) goto bug; + real = lower; /* Handle recursion */ real = d_real(real, inode); @@ -108,8 +109,10 @@ static struct dentry *ovl_d_real(struct dentry *dentry, if (!inode || inode == d_inode(real)) return real; bug: - WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, - inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); + WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n", + __func__, dentry, inode ? inode->i_sb->s_id : "NULL", + inode ? inode->i_ino : 0, real, + real && d_inode(real) ? d_inode(real)->i_ino : 0); return dentry; } From 850e6a95deb5a9e6e922ace64bf2dd0ed290ecb7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 17 Feb 2021 10:35:20 +0100 Subject: [PATCH 61/61] Linux 5.4.99 Tested-by: Jason Self Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Tested-by: Shuah Khan Tested-by: Ross Schmidt Link: https://lore.kernel.org/r/20210215152715.401453874@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4f6bfcf434e8..a0491ba1d759 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 98 +SUBLEVEL = 99 EXTRAVERSION = NAME = Kleptomaniac Octopus