Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc

Pull sparc fixes from David Miller:

 - Fix DMA regression in 4.13 merge window, only certain chips can do
   64-bit DMA. From Dave Dushar.

 - Correct cpu cross-call algorithm to correctly detect stalled or stuck
   remote cpus, from Jane Chu.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc:
  sparc64: Measure receiver forward progress to avoid send mondo timeout
  SPARC64: Fix sun4v DMA panic
This commit is contained in:
Linus Torvalds 2017-07-17 15:08:29 -07:00
commit cb8c65ccff
5 changed files with 142 additions and 78 deletions

View File

@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR_CPUS];
void init_cur_cpu_trap(struct thread_info *); void init_cur_cpu_trap(struct thread_info *);
void setup_tba(void); void setup_tba(void);
extern int ncpus_probed; extern int ncpus_probed;
extern u64 cpu_mondo_counter[NR_CPUS];
unsigned long real_hard_smp_processor_id(void); unsigned long real_hard_smp_processor_id(void);

View File

@ -673,12 +673,14 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
static int dma_4v_supported(struct device *dev, u64 device_mask) static int dma_4v_supported(struct device *dev, u64 device_mask)
{ {
struct iommu *iommu = dev->archdata.iommu; struct iommu *iommu = dev->archdata.iommu;
u64 dma_addr_mask; u64 dma_addr_mask = iommu->dma_addr_mask;
if (device_mask > DMA_BIT_MASK(32) && iommu->atu) if (device_mask > DMA_BIT_MASK(32)) {
dma_addr_mask = iommu->atu->dma_addr_mask; if (iommu->atu)
else dma_addr_mask = iommu->atu->dma_addr_mask;
dma_addr_mask = iommu->dma_addr_mask; else
return 0;
}
if ((device_mask & dma_addr_mask) == dma_addr_mask) if ((device_mask & dma_addr_mask) == dma_addr_mask)
return 1; return 1;

View File

@ -622,22 +622,48 @@ static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
} }
} }
/* Multi-cpu list version. */ #define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid])
#define MONDO_USEC_WAIT_MIN 2
#define MONDO_USEC_WAIT_MAX 100
#define MONDO_RETRY_LIMIT 500000
/* Multi-cpu list version.
*
* Deliver xcalls to 'cnt' number of cpus in 'cpu_list'.
* Sometimes not all cpus receive the mondo, requiring us to re-send
* the mondo until all cpus have received, or cpus are truly stuck
* unable to receive mondo, and we timeout.
* Occasionally a target cpu strand is borrowed briefly by hypervisor to
* perform guest service, such as PCIe error handling. Consider the
* service time, 1 second overall wait is reasonable for 1 cpu.
* Here two in-between mondo check wait time are defined: 2 usec for
* single cpu quick turn around and up to 100usec for large cpu count.
* Deliver mondo to large number of cpus could take longer, we adjusts
* the retry count as long as target cpus are making forward progress.
*/
static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
{ {
int retries, this_cpu, prev_sent, i, saw_cpu_error; int this_cpu, tot_cpus, prev_sent, i, rem;
int usec_wait, retries, tot_retries;
u16 first_cpu = 0xffff;
unsigned long xc_rcvd = 0;
unsigned long status; unsigned long status;
int ecpuerror_id = 0;
int enocpu_id = 0;
u16 *cpu_list; u16 *cpu_list;
u16 cpu;
this_cpu = smp_processor_id(); this_cpu = smp_processor_id();
cpu_list = __va(tb->cpu_list_pa); cpu_list = __va(tb->cpu_list_pa);
usec_wait = cnt * MONDO_USEC_WAIT_MIN;
saw_cpu_error = 0; if (usec_wait > MONDO_USEC_WAIT_MAX)
retries = 0; usec_wait = MONDO_USEC_WAIT_MAX;
retries = tot_retries = 0;
tot_cpus = cnt;
prev_sent = 0; prev_sent = 0;
do { do {
int forward_progress, n_sent; int n_sent, mondo_delivered, target_cpu_busy;
status = sun4v_cpu_mondo_send(cnt, status = sun4v_cpu_mondo_send(cnt,
tb->cpu_list_pa, tb->cpu_list_pa,
@ -645,94 +671,113 @@ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
/* HV_EOK means all cpus received the xcall, we're done. */ /* HV_EOK means all cpus received the xcall, we're done. */
if (likely(status == HV_EOK)) if (likely(status == HV_EOK))
break; goto xcall_done;
/* If not these non-fatal errors, panic */
if (unlikely((status != HV_EWOULDBLOCK) &&
(status != HV_ECPUERROR) &&
(status != HV_ENOCPU)))
goto fatal_errors;
/* First, see if we made any forward progress. /* First, see if we made any forward progress.
*
* Go through the cpu_list, count the target cpus that have
* received our mondo (n_sent), and those that did not (rem).
* Re-pack cpu_list with the cpus remain to be retried in the
* front - this simplifies tracking the truly stalled cpus.
* *
* The hypervisor indicates successful sends by setting * The hypervisor indicates successful sends by setting
* cpu list entries to the value 0xffff. * cpu list entries to the value 0xffff.
*
* EWOULDBLOCK means some target cpus did not receive the
* mondo and retry usually helps.
*
* ECPUERROR means at least one target cpu is in error state,
* it's usually safe to skip the faulty cpu and retry.
*
* ENOCPU means one of the target cpu doesn't belong to the
* domain, perhaps offlined which is unexpected, but not
* fatal and it's okay to skip the offlined cpu.
*/ */
rem = 0;
n_sent = 0; n_sent = 0;
for (i = 0; i < cnt; i++) { for (i = 0; i < cnt; i++) {
if (likely(cpu_list[i] == 0xffff)) cpu = cpu_list[i];
if (likely(cpu == 0xffff)) {
n_sent++; n_sent++;
} else if ((status == HV_ECPUERROR) &&
(sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) {
ecpuerror_id = cpu + 1;
} else if (status == HV_ENOCPU && !cpu_online(cpu)) {
enocpu_id = cpu + 1;
} else {
cpu_list[rem++] = cpu;
}
} }
forward_progress = 0; /* No cpu remained, we're done. */
if (n_sent > prev_sent) if (rem == 0)
forward_progress = 1; break;
/* Otherwise, update the cpu count for retry. */
cnt = rem;
/* Record the overall number of mondos received by the
* first of the remaining cpus.
*/
if (first_cpu != cpu_list[0]) {
first_cpu = cpu_list[0];
xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
}
/* Was any mondo delivered successfully? */
mondo_delivered = (n_sent > prev_sent);
prev_sent = n_sent; prev_sent = n_sent;
/* If we get a HV_ECPUERROR, then one or more of the cpus /* or, was any target cpu busy processing other mondos? */
* in the list are in error state. Use the cpu_state() target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu));
* hypervisor call to find out which cpus are in error state. xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
/* Retry count is for no progress. If we're making progress,
* reset the retry count.
*/ */
if (unlikely(status == HV_ECPUERROR)) { if (likely(mondo_delivered || target_cpu_busy)) {
for (i = 0; i < cnt; i++) { tot_retries += retries;
long err; retries = 0;
u16 cpu; } else if (unlikely(retries > MONDO_RETRY_LIMIT)) {
goto fatal_mondo_timeout;
cpu = cpu_list[i];
if (cpu == 0xffff)
continue;
err = sun4v_cpu_state(cpu);
if (err == HV_CPU_STATE_ERROR) {
saw_cpu_error = (cpu + 1);
cpu_list[i] = 0xffff;
}
}
} else if (unlikely(status != HV_EWOULDBLOCK))
goto fatal_mondo_error;
/* Don't bother rewriting the CPU list, just leave the
* 0xffff and non-0xffff entries in there and the
* hypervisor will do the right thing.
*
* Only advance timeout state if we didn't make any
* forward progress.
*/
if (unlikely(!forward_progress)) {
if (unlikely(++retries > 10000))
goto fatal_mondo_timeout;
/* Delay a little bit to let other cpus catch up
* on their cpu mondo queue work.
*/
udelay(2 * cnt);
} }
/* Delay a little bit to let other cpus catch up on
* their cpu mondo queue work.
*/
if (!mondo_delivered)
udelay(usec_wait);
retries++;
} while (1); } while (1);
if (unlikely(saw_cpu_error)) xcall_done:
goto fatal_mondo_cpu_error; if (unlikely(ecpuerror_id > 0)) {
pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n",
this_cpu, ecpuerror_id - 1);
} else if (unlikely(enocpu_id > 0)) {
pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n",
this_cpu, enocpu_id - 1);
}
return; return;
fatal_mondo_cpu_error: fatal_errors:
printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " /* fatal errors include bad alignment, etc */
"(including %d) were in error state\n", pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n",
this_cpu, saw_cpu_error - 1); this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
return; panic("Unexpected SUN4V mondo error %lu\n", status);
fatal_mondo_timeout: fatal_mondo_timeout:
printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " /* some cpus being non-responsive to the cpu mondo */
" progress after %d retries.\n", pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n",
this_cpu, retries); this_cpu, first_cpu, (tot_retries + retries), tot_cpus);
goto dump_cpu_list_and_out; panic("SUN4V mondo timeout panic\n");
fatal_mondo_error:
printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
this_cpu, status);
printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
"mondo_block_pa(%lx)\n",
this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
dump_cpu_list_and_out:
printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
for (i = 0; i < cnt; i++)
printk("%u ", cpu_list[i]);
printk("]\n");
} }
static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);

View File

@ -26,6 +26,21 @@ sun4v_cpu_mondo:
ldxa [%g0] ASI_SCRATCHPAD, %g4 ldxa [%g0] ASI_SCRATCHPAD, %g4
sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4 sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4
/* Get smp_processor_id() into %g3 */
sethi %hi(trap_block), %g5
or %g5, %lo(trap_block), %g5
sub %g4, %g5, %g3
srlx %g3, TRAP_BLOCK_SZ_SHIFT, %g3
/* Increment cpu_mondo_counter[smp_processor_id()] */
sethi %hi(cpu_mondo_counter), %g5
or %g5, %lo(cpu_mondo_counter), %g5
sllx %g3, 3, %g3
add %g5, %g3, %g5
ldx [%g5], %g3
add %g3, 1, %g3
stx %g3, [%g5]
/* Get CPU mondo queue base phys address into %g7. */ /* Get CPU mondo queue base phys address into %g7. */
ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7 ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7

View File

@ -2733,6 +2733,7 @@ void do_getpsr(struct pt_regs *regs)
} }
} }
u64 cpu_mondo_counter[NR_CPUS] = {0};
struct trap_per_cpu trap_block[NR_CPUS]; struct trap_per_cpu trap_block[NR_CPUS];
EXPORT_SYMBOL(trap_block); EXPORT_SYMBOL(trap_block);