s390 update with improvements and bug fixes for 5.1-rc2

- Fix early free of the channel program in vfio
 
  - On AP device removal make sure that all messages are flushed
    with the driver still attached that queued the message
 
  - Limit brk randomization to 32MB to reduce the chance that the
    heap of ld.so is placed after the main stack
 
  - Add a rolling average for the steal time of a CPU, this will be
    needed for KVM to decide when to do busy waiting
 
  - Fix a warning in the CPU-MF code
 
  - Add a notification handler for AP configuration change to react
    faster to new AP devices
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQEcBAABCAAGBQJcnIq7AAoJEDjwexyKj9rgddUH/3VQP6BMvq2fwAsLqx8JeYgT
 082xzP2nHli3tO6m8fFHmtqrSg5KTEDfuQVafqp92LeEMKUNWQI6kRu7rXeAVBct
 M6hx21mqkm9VNjAlAjSq8IAUXP2K6/K0BMD5mYInYYYVRvJm3on4sHnkEj0kvXbm
 OGxwnNBd9UnH5g6ti2vW4cyDvs0aqj1eDbSudy5KedumQz5J2XdFPn4f4Ej6p2+t
 nuvlZFDnZ2Z4rliE3RFCuKExZR+YFZgS1urm6pcklncfvbJRsqFJ+nvhurskDUI3
 4gOp1Yv1tvGNv/cNVEtnz8g/Kg8/sI7evjQBtxhtEsV/W0sbZPnjCt+28Cf1DN4=
 =4nL7
 -----END PGP SIGNATURE-----

Merge tag 's390-5.1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 fixes from Martin Schwidefsky:
 "Improvements and bug fixes for 5.1-rc2:

   - Fix early free of the channel program in vfio

   - On AP device removal make sure that all messages are flushed with
     the driver still attached that queued the message

   - Limit brk randomization to 32MB to reduce the chance that the heap
     of ld.so is placed after the main stack

   - Add a rolling average for the steal time of a CPU, this will be
     needed for KVM to decide when to do busy waiting

   - Fix a warning in the CPU-MF code

   - Add a notification handler for AP configuration change to react
     faster to new AP devices"

* tag 's390-5.1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390/cpumf: Fix warning from check_processor_id
  zcrypt: handle AP Info notification from CHSC SEI command
  vfio: ccw: only free cp on final interrupt
  s390/vtime: steal time exponential moving average
  s390/zcrypt: revisit ap device remove procedure
  s390: limit brk randomization to 32MB
This commit is contained in:
Linus Torvalds 2019-03-28 08:35:32 -07:00
commit bfed6d0ffc
12 changed files with 155 additions and 69 deletions

View File

@ -360,4 +360,15 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
return reg1;
}
/*
* Interface to tell the AP bus code that a configuration
* change has happened. The bus code should at least do
* an ap bus resource rescan.
*/
#if IS_ENABLED(CONFIG_ZCRYPT)
void ap_bus_cfg_chg(void);
#else
static inline void ap_bus_cfg_chg(void){};
#endif
#endif /* _ASM_S390_AP_H_ */

View File

@ -252,11 +252,14 @@ do { \
/*
* Cache aliasing on the latest machines calls for a mapping granularity
* of 512KB. For 64-bit processes use a 512KB alignment and a randomization
* of up to 1GB. For 31-bit processes the virtual address space is limited,
* use no alignment and limit the randomization to 8MB.
* of 512KB for the anonymous mapping base. For 64-bit processes use a
* 512KB alignment and a randomization of up to 1GB. For 31-bit processes
* the virtual address space is limited, use no alignment and limit the
* randomization to 8MB.
* For the additional randomization of the program break use 32MB for
* 64-bit and 8MB for 31-bit.
*/
#define BRK_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ffffUL)
#define BRK_RND_MASK (is_compat_task() ? 0x7ffUL : 0x1fffUL)
#define MMAP_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ff80UL)
#define MMAP_ALIGN_MASK (is_compat_task() ? 0 : 0x7fUL)
#define STACK_RND_MASK MMAP_RND_MASK

View File

@ -91,52 +91,53 @@ struct lowcore {
__u64 hardirq_timer; /* 0x02e8 */
__u64 softirq_timer; /* 0x02f0 */
__u64 steal_timer; /* 0x02f8 */
__u64 last_update_timer; /* 0x0300 */
__u64 last_update_clock; /* 0x0308 */
__u64 int_clock; /* 0x0310 */
__u64 mcck_clock; /* 0x0318 */
__u64 clock_comparator; /* 0x0320 */
__u64 boot_clock[2]; /* 0x0328 */
__u64 avg_steal_timer; /* 0x0300 */
__u64 last_update_timer; /* 0x0308 */
__u64 last_update_clock; /* 0x0310 */
__u64 int_clock; /* 0x0318*/
__u64 mcck_clock; /* 0x0320 */
__u64 clock_comparator; /* 0x0328 */
__u64 boot_clock[2]; /* 0x0330 */
/* Current process. */
__u64 current_task; /* 0x0338 */
__u64 kernel_stack; /* 0x0340 */
__u64 current_task; /* 0x0340 */
__u64 kernel_stack; /* 0x0348 */
/* Interrupt, DAT-off and restartstack. */
__u64 async_stack; /* 0x0348 */
__u64 nodat_stack; /* 0x0350 */
__u64 restart_stack; /* 0x0358 */
__u64 async_stack; /* 0x0350 */
__u64 nodat_stack; /* 0x0358 */
__u64 restart_stack; /* 0x0360 */
/* Restart function and parameter. */
__u64 restart_fn; /* 0x0360 */
__u64 restart_data; /* 0x0368 */
__u64 restart_source; /* 0x0370 */
__u64 restart_fn; /* 0x0368 */
__u64 restart_data; /* 0x0370 */
__u64 restart_source; /* 0x0378 */
/* Address space pointer. */
__u64 kernel_asce; /* 0x0378 */
__u64 user_asce; /* 0x0380 */
__u64 vdso_asce; /* 0x0388 */
__u64 kernel_asce; /* 0x0380 */
__u64 user_asce; /* 0x0388 */
__u64 vdso_asce; /* 0x0390 */
/*
* The lpp and current_pid fields form a
* 64-bit value that is set as program
* parameter with the LPP instruction.
*/
__u32 lpp; /* 0x0390 */
__u32 current_pid; /* 0x0394 */
__u32 lpp; /* 0x0398 */
__u32 current_pid; /* 0x039c */
/* SMP info area */
__u32 cpu_nr; /* 0x0398 */
__u32 softirq_pending; /* 0x039c */
__u32 preempt_count; /* 0x03a0 */
__u32 spinlock_lockval; /* 0x03a4 */
__u32 spinlock_index; /* 0x03a8 */
__u32 fpu_flags; /* 0x03ac */
__u64 percpu_offset; /* 0x03b0 */
__u64 vdso_per_cpu_data; /* 0x03b8 */
__u64 machine_flags; /* 0x03c0 */
__u64 gmap; /* 0x03c8 */
__u8 pad_0x03d0[0x0400-0x03d0]; /* 0x03d0 */
__u32 cpu_nr; /* 0x03a0 */
__u32 softirq_pending; /* 0x03a4 */
__u32 preempt_count; /* 0x03a8 */
__u32 spinlock_lockval; /* 0x03ac */
__u32 spinlock_index; /* 0x03b0 */
__u32 fpu_flags; /* 0x03b4 */
__u64 percpu_offset; /* 0x03b8 */
__u64 vdso_per_cpu_data; /* 0x03c0 */
__u64 machine_flags; /* 0x03c8 */
__u64 gmap; /* 0x03d0 */
__u8 pad_0x03d8[0x0400-0x03d8]; /* 0x03d8 */
/* br %r1 trampoline */
__u16 br_r1_trampoline; /* 0x0400 */

View File

@ -196,23 +196,30 @@ static void cf_diag_perf_event_destroy(struct perf_event *event)
*/
static int __hw_perf_event_init(struct perf_event *event)
{
struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
struct perf_event_attr *attr = &event->attr;
struct cpu_cf_events *cpuhw;
enum cpumf_ctr_set i;
int err = 0;
debug_sprintf_event(cf_diag_dbg, 5,
"%s event %p cpu %d authorized %#x\n", __func__,
event, event->cpu, cpuhw->info.auth_ctl);
debug_sprintf_event(cf_diag_dbg, 5, "%s event %p cpu %d\n", __func__,
event, event->cpu);
event->hw.config = attr->config;
event->hw.config_base = 0;
local64_set(&event->count, 0);
/* Add all authorized counter sets to config_base */
/* Add all authorized counter sets to config_base. The
* the hardware init function is either called per-cpu or just once
* for all CPUS (event->cpu == -1). This depends on the whether
* counting is started for all CPUs or on a per workload base where
* the perf event moves from one CPU to another CPU.
* Checking the authorization on any CPU is fine as the hardware
* applies the same authorization settings to all CPUs.
*/
cpuhw = &get_cpu_var(cpu_cf_events);
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i])
event->hw.config_base |= cpumf_ctr_ctl[i];
put_cpu_var(cpu_cf_events);
/* No authorized counter sets, nothing to count/sample */
if (!event->hw.config_base) {

View File

@ -266,7 +266,8 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
lc->percpu_offset = __per_cpu_offset[cpu];
lc->kernel_asce = S390_lowcore.kernel_asce;
lc->machine_flags = S390_lowcore.machine_flags;
lc->user_timer = lc->system_timer = lc->steal_timer = 0;
lc->user_timer = lc->system_timer =
lc->steal_timer = lc->avg_steal_timer = 0;
__ctl_store(lc->cregs_save_area, 0, 15);
save_access_regs((unsigned int *) lc->access_regs_save_area);
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,

View File

@ -124,7 +124,7 @@ static void account_system_index_scaled(struct task_struct *p, u64 cputime,
*/
static int do_account_vtime(struct task_struct *tsk)
{
u64 timer, clock, user, guest, system, hardirq, softirq, steal;
u64 timer, clock, user, guest, system, hardirq, softirq;
timer = S390_lowcore.last_update_timer;
clock = S390_lowcore.last_update_clock;
@ -182,12 +182,6 @@ static int do_account_vtime(struct task_struct *tsk)
if (softirq)
account_system_index_scaled(tsk, softirq, CPUTIME_SOFTIRQ);
steal = S390_lowcore.steal_timer;
if ((s64) steal > 0) {
S390_lowcore.steal_timer = 0;
account_steal_time(cputime_to_nsecs(steal));
}
return virt_timer_forward(user + guest + system + hardirq + softirq);
}
@ -213,8 +207,19 @@ void vtime_task_switch(struct task_struct *prev)
*/
void vtime_flush(struct task_struct *tsk)
{
u64 steal, avg_steal;
if (do_account_vtime(tsk))
virt_timer_expire();
steal = S390_lowcore.steal_timer;
avg_steal = S390_lowcore.avg_steal_timer / 2;
if ((s64) steal > 0) {
S390_lowcore.steal_timer = 0;
account_steal_time(steal);
avg_steal += steal;
}
S390_lowcore.avg_steal_timer = avg_steal;
}
/*

View File

@ -24,6 +24,7 @@
#include <asm/crw.h>
#include <asm/isc.h>
#include <asm/ebcdic.h>
#include <asm/ap.h>
#include "css.h"
#include "cio.h"
@ -586,6 +587,15 @@ static void chsc_process_sei_scm_avail(struct chsc_sei_nt0_area *sei_area)
" failed (rc=%d).\n", ret);
}
static void chsc_process_sei_ap_cfg_chg(struct chsc_sei_nt0_area *sei_area)
{
CIO_CRW_EVENT(3, "chsc: ap config changed\n");
if (sei_area->rs != 5)
return;
ap_bus_cfg_chg();
}
static void chsc_process_sei_nt2(struct chsc_sei_nt2_area *sei_area)
{
switch (sei_area->cc) {
@ -612,6 +622,9 @@ static void chsc_process_sei_nt0(struct chsc_sei_nt0_area *sei_area)
case 2: /* i/o resource accessibility */
chsc_process_sei_res_acc(sei_area);
break;
case 3: /* ap config changed */
chsc_process_sei_ap_cfg_chg(sei_area);
break;
case 7: /* channel-path-availability information */
chsc_process_sei_chp_avail(sei_area);
break;

View File

@ -72,20 +72,24 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
{
struct vfio_ccw_private *private;
struct irb *irb;
bool is_final;
private = container_of(work, struct vfio_ccw_private, io_work);
irb = &private->irb;
is_final = !(scsw_actl(&irb->scsw) &
(SCSW_ACTL_DEVACT | SCSW_ACTL_SCHACT));
if (scsw_is_solicited(&irb->scsw)) {
cp_update_scsw(&private->cp, &irb->scsw);
cp_free(&private->cp);
if (is_final)
cp_free(&private->cp);
}
memcpy(private->io_region->irb_area, irb, sizeof(*irb));
if (private->io_trigger)
eventfd_signal(private->io_trigger, 1);
if (private->mdev)
if (private->mdev && is_final)
private->state = VFIO_CCW_STATE_IDLE;
}

View File

@ -810,11 +810,18 @@ static int ap_device_remove(struct device *dev)
struct ap_device *ap_dev = to_ap_dev(dev);
struct ap_driver *ap_drv = ap_dev->drv;
/* prepare ap queue device removal */
if (is_queue_dev(dev))
ap_queue_remove(to_ap_queue(dev));
ap_queue_prepare_remove(to_ap_queue(dev));
/* driver's chance to clean up gracefully */
if (ap_drv->remove)
ap_drv->remove(ap_dev);
/* now do the ap queue device remove */
if (is_queue_dev(dev))
ap_queue_remove(to_ap_queue(dev));
/* Remove queue/card from list of active queues/cards */
spin_lock_bh(&ap_list_lock);
if (is_card_dev(dev))
@ -860,6 +867,16 @@ void ap_bus_force_rescan(void)
}
EXPORT_SYMBOL(ap_bus_force_rescan);
/*
* A config change has happened, force an ap bus rescan.
*/
void ap_bus_cfg_chg(void)
{
AP_DBF(DBF_INFO, "%s config change, forcing bus rescan\n", __func__);
ap_bus_force_rescan();
}
/*
* hex2bitmap() - parse hex mask string and set bitmap.
* Valid strings are "0x012345678" with at least one valid hex number.

View File

@ -91,6 +91,7 @@ enum ap_state {
AP_STATE_WORKING,
AP_STATE_QUEUE_FULL,
AP_STATE_SUSPEND_WAIT,
AP_STATE_REMOVE, /* about to be removed from driver */
AP_STATE_UNBOUND, /* momentary not bound to a driver */
AP_STATE_BORKED, /* broken */
NR_AP_STATES
@ -252,6 +253,7 @@ void ap_bus_force_rescan(void);
void ap_queue_init_reply(struct ap_queue *aq, struct ap_message *ap_msg);
struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type);
void ap_queue_prepare_remove(struct ap_queue *aq);
void ap_queue_remove(struct ap_queue *aq);
void ap_queue_suspend(struct ap_device *ap_dev);
void ap_queue_resume(struct ap_device *ap_dev);

View File

@ -420,6 +420,10 @@ static ap_func_t *ap_jumptable[NR_AP_STATES][NR_AP_EVENTS] = {
[AP_EVENT_POLL] = ap_sm_suspend_read,
[AP_EVENT_TIMEOUT] = ap_sm_nop,
},
[AP_STATE_REMOVE] = {
[AP_EVENT_POLL] = ap_sm_nop,
[AP_EVENT_TIMEOUT] = ap_sm_nop,
},
[AP_STATE_UNBOUND] = {
[AP_EVENT_POLL] = ap_sm_nop,
[AP_EVENT_TIMEOUT] = ap_sm_nop,
@ -740,18 +744,31 @@ void ap_flush_queue(struct ap_queue *aq)
}
EXPORT_SYMBOL(ap_flush_queue);
void ap_queue_prepare_remove(struct ap_queue *aq)
{
spin_lock_bh(&aq->lock);
/* flush queue */
__ap_flush_queue(aq);
/* set REMOVE state to prevent new messages are queued in */
aq->state = AP_STATE_REMOVE;
del_timer_sync(&aq->timeout);
spin_unlock_bh(&aq->lock);
}
void ap_queue_remove(struct ap_queue *aq)
{
ap_flush_queue(aq);
del_timer_sync(&aq->timeout);
/* reset with zero, also clears irq registration */
/*
* all messages have been flushed and the state is
* AP_STATE_REMOVE. Now reset with zero which also
* clears the irq registration and move the state
* to AP_STATE_UNBOUND to signal that this queue
* is not used by any driver currently.
*/
spin_lock_bh(&aq->lock);
ap_zapq(aq->qid);
aq->state = AP_STATE_UNBOUND;
spin_unlock_bh(&aq->lock);
}
EXPORT_SYMBOL(ap_queue_remove);
void ap_queue_reinit_state(struct ap_queue *aq)
{
@ -760,4 +777,3 @@ void ap_queue_reinit_state(struct ap_queue *aq)
ap_wait(ap_sm_event(aq, AP_EVENT_POLL));
spin_unlock_bh(&aq->lock);
}
EXPORT_SYMBOL(ap_queue_reinit_state);

View File

@ -586,6 +586,7 @@ static inline bool zcrypt_check_queue(struct ap_perms *perms, int queue)
static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc,
struct zcrypt_queue *zq,
struct module **pmod,
unsigned int weight)
{
if (!zq || !try_module_get(zq->queue->ap_dev.drv->driver.owner))
@ -595,15 +596,15 @@ static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc,
atomic_add(weight, &zc->load);
atomic_add(weight, &zq->load);
zq->request_count++;
*pmod = zq->queue->ap_dev.drv->driver.owner;
return zq;
}
static inline void zcrypt_drop_queue(struct zcrypt_card *zc,
struct zcrypt_queue *zq,
struct module *mod,
unsigned int weight)
{
struct module *mod = zq->queue->ap_dev.drv->driver.owner;
zq->request_count--;
atomic_sub(weight, &zc->load);
atomic_sub(weight, &zq->load);
@ -653,6 +654,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
unsigned int weight, pref_weight;
unsigned int func_code;
int qid = 0, rc = -ENODEV;
struct module *mod;
trace_s390_zcrypt_req(mex, TP_ICARSAMODEXPO);
@ -706,7 +708,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
pref_weight = weight;
}
}
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@ -718,7 +720,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
rc = pref_zq->ops->rsa_modexpo(pref_zq, mex);
spin_lock(&zcrypt_list_lock);
zcrypt_drop_queue(pref_zc, pref_zq, weight);
zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out:
@ -735,6 +737,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
unsigned int weight, pref_weight;
unsigned int func_code;
int qid = 0, rc = -ENODEV;
struct module *mod;
trace_s390_zcrypt_req(crt, TP_ICARSACRT);
@ -788,7 +791,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
pref_weight = weight;
}
}
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@ -800,7 +803,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
rc = pref_zq->ops->rsa_modexpo_crt(pref_zq, crt);
spin_lock(&zcrypt_list_lock);
zcrypt_drop_queue(pref_zc, pref_zq, weight);
zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out:
@ -819,6 +822,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
unsigned int func_code;
unsigned short *domain;
int qid = 0, rc = -ENODEV;
struct module *mod;
trace_s390_zcrypt_req(xcRB, TB_ZSECSENDCPRB);
@ -865,7 +869,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
pref_weight = weight;
}
}
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@ -881,7 +885,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
rc = pref_zq->ops->send_cprb(pref_zq, xcRB, &ap_msg);
spin_lock(&zcrypt_list_lock);
zcrypt_drop_queue(pref_zc, pref_zq, weight);
zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out:
@ -932,6 +936,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms *perms,
unsigned int func_code;
struct ap_message ap_msg;
int qid = 0, rc = -ENODEV;
struct module *mod;
trace_s390_zcrypt_req(xcrb, TP_ZSENDEP11CPRB);
@ -1000,7 +1005,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms *perms,
pref_weight = weight;
}
}
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@ -1012,7 +1017,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms *perms,
rc = pref_zq->ops->send_ep11_cprb(pref_zq, xcrb, &ap_msg);
spin_lock(&zcrypt_list_lock);
zcrypt_drop_queue(pref_zc, pref_zq, weight);
zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out_free:
@ -1033,6 +1038,7 @@ static long zcrypt_rng(char *buffer)
struct ap_message ap_msg;
unsigned int domain;
int qid = 0, rc = -ENODEV;
struct module *mod;
trace_s390_zcrypt_req(buffer, TP_HWRNGCPRB);
@ -1064,7 +1070,7 @@ static long zcrypt_rng(char *buffer)
pref_weight = weight;
}
}
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@ -1076,7 +1082,7 @@ static long zcrypt_rng(char *buffer)
rc = pref_zq->ops->rng(pref_zq, buffer, &ap_msg);
spin_lock(&zcrypt_list_lock);
zcrypt_drop_queue(pref_zc, pref_zq, weight);
zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out: