Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler changes from Ingo Molnar:
 "Main changes:

   - scheduler side full-dynticks (user-space execution is undisturbed
     and receives no timer IRQs) preparation changes that convert the
     cputime accounting code to be full-dynticks ready, from Frederic
     Weisbecker.

   - Initial sched.h split-up changes, by Clark Williams

   - select_idle_sibling() performance improvement by Mike Galbraith:

        " 1 tbench pair (worst case) in a 10 core + SMT package:

          pre   15.22 MB/sec 1 procs
          post 252.01 MB/sec 1 procs "

  - sched_rr_get_interval() ABI fix/change.  We think this detail is not
    used by apps (so it's not an ABI in practice), but lets keep it
    under observation.

  - misc RT scheduling cleanups, optimizations"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
  sched/rt: Add <linux/sched/rt.h> header to <linux/init_task.h>
  cputime: Remove irqsave from seqlock readers
  sched, powerpc: Fix sched.h split-up build failure
  cputime: Restore CPU_ACCOUNTING config defaults for PPC64
  sched/rt: Move rt specific bits into new header file
  sched/rt: Add a tuning knob to allow changing SCHED_RR timeslice
  sched: Move sched.h sysctl bits into separate header
  sched: Fix signedness bug in yield_to()
  sched: Fix select_idle_sibling() bouncing cow syndrome
  sched/rt: Further simplify pick_rt_task()
  sched/rt: Do not account zero delta_exec in update_curr_rt()
  cputime: Safely read cputime of full dynticks CPUs
  kvm: Prepare to add generic guest entry/exit callbacks
  cputime: Use accessors to read task cputime stats
  cputime: Allow dynamic switch between tick/virtual based cputime accounting
  cputime: Generic on-demand virtual cputime accounting
  cputime: Move default nsecs_to_cputime() to jiffies based cputime file
  cputime: Librarize per nsecs resolution cputime definitions
  cputime: Avoid multiplication overflow on utime scaling
  context_tracking: Export context state for generic vtime
  ...

Fix up conflict in kernel/context_tracking.c due to comment additions.
This commit is contained in:
Linus Torvalds 2013-02-19 18:19:48 -08:00
commit d652e1eb8e
83 changed files with 1106 additions and 508 deletions

View File

@ -1139,6 +1139,7 @@ struct rusage32 {
SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
{
struct rusage32 r;
cputime_t utime, stime;
if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
return -EINVAL;
@ -1146,8 +1147,9 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
memset(&r, 0, sizeof(r));
switch (who) {
case RUSAGE_SELF:
jiffies_to_timeval32(current->utime, &r.ru_utime);
jiffies_to_timeval32(current->stime, &r.ru_stime);
task_cputime(current, &utime, &stime);
jiffies_to_timeval32(utime, &r.ru_utime);
jiffies_to_timeval32(stime, &r.ru_stime);
r.ru_minflt = current->min_flt;
r.ru_majflt = current->maj_flt;
break;

View File

@ -11,99 +11,19 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec.
* If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec.
* Otherwise we measure cpu time in jiffies using the generic definitions.
*/
#ifndef __IA64_CPUTIME_H
#define __IA64_CPUTIME_H
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#include <asm-generic/cputime.h>
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
# include <asm-generic/cputime.h>
#else
#include <linux/time.h>
#include <linux/jiffies.h>
#include <asm/processor.h>
typedef u64 __nocast cputime_t;
typedef u64 __nocast cputime64_t;
#define cputime_one_jiffy jiffies_to_cputime(1)
/*
* Convert cputime <-> jiffies (HZ)
*/
#define cputime_to_jiffies(__ct) \
((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
#define jiffies_to_cputime(__jif) \
(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
#define cputime64_to_jiffies64(__ct) \
((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
#define jiffies64_to_cputime64(__jif) \
(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
/*
* Convert cputime <-> microseconds
*/
#define cputime_to_usecs(__ct) \
((__force u64)(__ct) / NSEC_PER_USEC)
#define usecs_to_cputime(__usecs) \
(__force cputime_t)((__usecs) * NSEC_PER_USEC)
#define usecs_to_cputime64(__usecs) \
(__force cputime64_t)((__usecs) * NSEC_PER_USEC)
/*
* Convert cputime <-> seconds
*/
#define cputime_to_secs(__ct) \
((__force u64)(__ct) / NSEC_PER_SEC)
#define secs_to_cputime(__secs) \
(__force cputime_t)((__secs) * NSEC_PER_SEC)
/*
* Convert cputime <-> timespec (nsec)
*/
static inline cputime_t timespec_to_cputime(const struct timespec *val)
{
u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
return (__force cputime_t) ret;
}
static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
{
val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
}
/*
* Convert cputime <-> timeval (msec)
*/
static inline cputime_t timeval_to_cputime(struct timeval *val)
{
u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
return (__force cputime_t) ret;
}
static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
{
val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
}
/*
* Convert cputime <-> clock (USER_HZ)
*/
#define cputime_to_clock_t(__ct) \
((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
#define clock_t_to_cputime(__x) \
(__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
/*
* Convert cputime64 to clock.
*/
#define cputime64_to_clock_t(__ct) \
cputime_to_clock_t((__force cputime_t)__ct)
# include <asm/processor.h>
# include <asm-generic/cputime_nsecs.h>
extern void arch_vtime_task_switch(struct task_struct *tsk);
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* __IA64_CPUTIME_H */

View File

@ -31,7 +31,7 @@ struct thread_info {
mm_segment_t addr_limit; /* user-level address space limit */
int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */
struct restart_block restart_block;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
__u64 ac_stamp;
__u64 ac_leave;
__u64 ac_stime;
@ -69,7 +69,7 @@ struct thread_info {
#define task_stack_page(tsk) ((void *)(tsk))
#define __HAVE_THREAD_FUNCTIONS
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#define setup_thread_stack(p, org) \
*task_thread_info(p) = *task_thread_info(org); \
task_thread_info(p)->ac_stime = 0; \

View File

@ -1,5 +1,5 @@
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/* read ar.itc in advance, and use it before leaving bank 0 */
#define XEN_ACCOUNT_GET_STAMP \
MOV_FROM_ITC(pUStk, p6, r20, r2);

View File

@ -41,7 +41,7 @@ void foo(void)
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));

View File

@ -724,7 +724,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall)
#endif
.global __paravirt_work_processed_syscall;
__paravirt_work_processed_syscall:
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
adds r2=PT(LOADRS)+16,r12
MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave
adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@ -762,7 +762,7 @@ __paravirt_work_processed_syscall:
ld8 r29=[r2],16 // M0|1 load cr.ipsr
ld8 r28=[r3],16 // M0|1 load cr.iip
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
;;
ld8 r30=[r2],16 // M0|1 load cr.ifs
@ -793,7 +793,7 @@ __paravirt_work_processed_syscall:
ld8.fill r1=[r3],16 // M0|1 load r1
(pUStk) mov r17=1 // A
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
(pUStk) st1 [r15]=r17 // M2|3
#else
(pUStk) st1 [r14]=r17 // M2|3
@ -813,7 +813,7 @@ __paravirt_work_processed_syscall:
shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
COVER // B add current frame into dirty partition & set cr.ifs
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
mov r19=ar.bsp // M2 get new backing store pointer
st8 [r14]=r22 // M save time at leave
mov f10=f0 // F clear f10
@ -948,7 +948,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
adds r16=PT(CR_IPSR)+16,r12
adds r17=PT(CR_IIP)+16,r12
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
.pred.rel.mutex pUStk,pKStk
MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled
MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave
@ -981,7 +981,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
;;
ld8.fill r12=[r16],16
ld8.fill r13=[r17],16
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
#else
(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
@ -989,7 +989,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
;;
ld8 r20=[r16],16 // ar.fpsr
ld8.fill r15=[r17],16
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred
#endif
;;
@ -997,7 +997,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
ld8.fill r2=[r17]
(pUStk) mov r17=1
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
// mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;;
// mib : mov add br -> mib : ld8 add br
// bbb_ : br nop cover;; mbb_ : mov br cover;;

View File

@ -529,7 +529,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
nop.i 0
;;
mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting
#else
nop.m 0
@ -555,7 +555,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
br.call.sptk.many b7=ia64_syscall_setup // B
;;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
// mov.m r30=ar.itc is called in advance
add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2

View File

@ -1073,7 +1073,7 @@ END(ia64_native_sched_clock)
sched_clock = ia64_native_sched_clock
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
GLOBAL_ENTRY(cycle_to_cputime)
alloc r16=ar.pfs,1,0,0,0
addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
@ -1091,7 +1091,7 @@ GLOBAL_ENTRY(cycle_to_cputime)
shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
br.ret.sptk.many rp
END(cycle_to_cputime)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_IA64_BRL_EMU

View File

@ -784,7 +784,7 @@ ENTRY(break_fault)
(p8) adds r28=16,r28 // A switch cr.iip to next bundle
(p9) adds r8=1,r8 // A increment ei to next slot
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
;;
mov b6=r30 // I0 setup syscall handler branch reg early
#else
@ -801,7 +801,7 @@ ENTRY(break_fault)
//
///////////////////////////////////////////////////////////////////////
st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting
#else
mov b6=r30 // I0 setup syscall handler branch reg early
@ -817,7 +817,7 @@ ENTRY(break_fault)
cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited?
br.call.sptk.many b7=ia64_syscall_setup // B
1:
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
// mov.m r30=ar.itc is called in advance, and r13 is current
add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A
add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A
@ -1043,7 +1043,7 @@ END(ia64_syscall_setup)
DBG_FAULT(16)
FAULT(16)
#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
/*
* There is no particular reason for this code to be here, other than
* that there happens to be space here that would go unused otherwise.

View File

@ -4,7 +4,7 @@
#include "entry.h"
#include "paravirt_inst.h"
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/* read ar.itc in advance, and use it before leaving bank 0 */
#define ACCOUNT_GET_STAMP \
(pUStk) mov.m r20=ar.itc;

View File

@ -77,7 +77,7 @@ static struct clocksource clocksource_itc = {
};
static struct clocksource *itc_clocksource;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <linux/kernel_stat.h>
@ -136,13 +136,14 @@ void vtime_account_system(struct task_struct *tsk)
account_system_time(tsk, 0, delta, delta);
}
EXPORT_SYMBOL_GPL(vtime_account_system);
void vtime_account_idle(struct task_struct *tsk)
{
account_idle_time(vtime_delta(tsk));
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static irqreturn_t
timer_interrupt (int irq, void *dev_id)

View File

@ -1,6 +1,6 @@
CONFIG_PPC64=y
CONFIG_PPC_BOOK3E_64=y
# CONFIG_VIRT_CPU_ACCOUNTING is not set
# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
CONFIG_SMP=y
CONFIG_NR_CPUS=256
CONFIG_EXPERIMENTAL=y

View File

@ -1,6 +1,6 @@
CONFIG_PPC64=y
CONFIG_PPC_BOOK3E_64=y
# CONFIG_VIRT_CPU_ACCOUNTING is not set
# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_EXPERIMENTAL=y

View File

@ -1,6 +1,6 @@
CONFIG_PPC64=y
CONFIG_ALTIVEC=y
# CONFIG_VIRT_CPU_ACCOUNTING is not set
# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_EXPERIMENTAL=y

View File

@ -8,7 +8,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in
* If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in
* the same units as the timebase. Otherwise we measure cpu time
* in jiffies using the generic definitions.
*/
@ -16,7 +16,7 @@
#ifndef __POWERPC_CPUTIME_H
#define __POWERPC_CPUTIME_H
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <asm-generic/cputime.h>
#ifdef __KERNEL__
static inline void setup_cputime_one_jiffy(void) { }
@ -231,5 +231,5 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
#endif /* __KERNEL__ */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#endif /* __POWERPC_CPUTIME_H */

View File

@ -145,7 +145,7 @@ struct dtl_entry {
extern struct kmem_cache *dtl_cache;
/*
* When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls
* When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
* reading from the dispatch trace log. If other code wants to consume
* DTL entries, it can set this pointer to a function that will get
* called once for each DTL entry that gets processed.

View File

@ -24,7 +24,7 @@
* user_time and system_time fields in the paca.
*/
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#define ACCOUNT_CPU_USER_ENTRY(ra, rb)
#define ACCOUNT_CPU_USER_EXIT(ra, rb)
#define ACCOUNT_STOLEN_TIME
@ -70,7 +70,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
#endif /* CONFIG_PPC_SPLPAR */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
/*
* Macros for storing registers into and loading registers from

View File

@ -94,7 +94,7 @@ system_call_common:
addi r9,r1,STACK_FRAME_OVERHEAD
ld r11,exception_marker@toc(r2)
std r11,-16(r9) /* "regshere" marker */
#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION
beq 33f
/* if from user, see if there are any DTL entries to process */
@ -110,7 +110,7 @@ BEGIN_FW_FTR_SECTION
addi r9,r1,STACK_FRAME_OVERHEAD
33:
END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
/*
* A syscall should always be called with interrupts enabled

View File

@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
unsigned long ppc_tb_freq;
EXPORT_SYMBOL_GPL(ppc_tb_freq);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Factors for converting from cputime_t (timebase ticks) to
* jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
@ -347,6 +347,7 @@ void vtime_account_system(struct task_struct *tsk)
if (stolen)
account_steal_time(stolen);
}
EXPORT_SYMBOL_GPL(vtime_account_system);
void vtime_account_idle(struct task_struct *tsk)
{
@ -377,7 +378,7 @@ void vtime_account_user(struct task_struct *tsk)
account_user_time(tsk, utime, utimescaled);
}
#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#define calc_cputime_factors()
#endif

View File

@ -24,6 +24,7 @@
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/slab.h>

View File

@ -57,7 +57,7 @@ static u8 dtl_event_mask = 0x7;
*/
static int dtl_buf_entries = N_DISPATCH_LOG;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct dtl_ring {
u64 write_index;
struct dtl_entry *write_ptr;
@ -142,7 +142,7 @@ static u64 dtl_current_index(struct dtl *dtl)
return per_cpu(dtl_rings, dtl->cpu).write_index;
}
#else /* CONFIG_VIRT_CPU_ACCOUNTING */
#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static int dtl_start(struct dtl *dtl)
{
@ -188,7 +188,7 @@ static u64 dtl_current_index(struct dtl *dtl)
{
return lppaca_of(dtl->cpu).dtl_idx;
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static int dtl_enable(struct dtl *dtl)
{

View File

@ -281,7 +281,7 @@ static struct notifier_block pci_dn_reconfig_nb = {
struct kmem_cache *dtl_cache;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Allocate space for the dispatch trace log for all possible cpus
* and register the buffers with the hypervisor. This is used for
@ -332,12 +332,12 @@ static int alloc_dispatch_logs(void)
return 0;
}
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static inline int alloc_dispatch_logs(void)
{
return 0;
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static int alloc_dispatch_log_kmem_cache(void)
{

View File

@ -127,7 +127,7 @@ void vtime_account_user(struct task_struct *tsk)
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
*/
void vtime_account(struct task_struct *tsk)
void vtime_account_irq_enter(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
u64 timer, system;
@ -145,10 +145,10 @@ void vtime_account(struct task_struct *tsk)
virt_timer_forward(system);
}
EXPORT_SYMBOL_GPL(vtime_account);
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
void vtime_account_system(struct task_struct *tsk)
__attribute__((alias("vtime_account")));
__attribute__((alias("vtime_account_irq_enter")));
EXPORT_SYMBOL_GPL(vtime_account_system);
void __kprobes vtime_stop_cpu(void)

View File

@ -899,6 +899,7 @@ static void apm_cpu_idle(void)
static int use_apm_idle; /* = 0 */
static unsigned int last_jiffies; /* = 0 */
static unsigned int last_stime; /* = 0 */
cputime_t stime;
int apm_idle_done = 0;
unsigned int jiffies_since_last_check = jiffies - last_jiffies;
@ -906,23 +907,23 @@ static void apm_cpu_idle(void)
WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
recalc:
task_cputime(current, NULL, &stime);
if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
use_apm_idle = 0;
last_jiffies = jiffies;
last_stime = current->stime;
} else if (jiffies_since_last_check > idle_period) {
unsigned int idle_percentage;
idle_percentage = current->stime - last_stime;
idle_percentage = stime - last_stime;
idle_percentage *= 100;
idle_percentage /= jiffies_since_last_check;
use_apm_idle = (idle_percentage > idle_threshold);
if (apm_info.forbid_idle)
use_apm_idle = 0;
last_jiffies = jiffies;
last_stime = current->stime;
}
last_jiffies = jiffies;
last_stime = stime;
bucket = IDLE_LEAKY_MAX;
while (!need_resched()) {

View File

@ -5,6 +5,7 @@
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/sched/sysctl.h>
#include "blk.h"

View File

@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/mISDNif.h>
#include <linux/kthread.h>
#include <linux/sched.h>
#include "core.h"
static u_int *debug;
@ -202,6 +203,9 @@ static int
mISDNStackd(void *data)
{
struct mISDNstack *st = data;
#ifdef MISDN_MSG_STATS
cputime_t utime, stime;
#endif
int err = 0;
sigfillset(&current->blocked);
@ -303,9 +307,10 @@ mISDNStackd(void *data)
"msg %d sleep %d stopped\n",
dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt,
st->stopped_cnt);
task_cputime(st->thread, &utime, &stime);
printk(KERN_DEBUG
"mISDNStackd daemon for %s utime(%ld) stime(%ld)\n",
dev_name(&st->dev->dev), st->thread->utime, st->thread->stime);
dev_name(&st->dev->dev), utime, stime);
printk(KERN_DEBUG
"mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n",
dev_name(&st->dev->dev), st->thread->nvcsw, st->thread->nivcsw);

View File

@ -33,7 +33,7 @@
#include <linux/of_gpio.h>
#include <linux/pm_runtime.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/ioport.h>

View File

@ -15,7 +15,7 @@
*/
#include "csr_wifi_hip_unifi.h"
#include "unifi_priv.h"
#include <linux/sched/rt.h>
/*
* ---------------------------------------------------------------------------

View File

@ -15,7 +15,7 @@
#include "unifi_priv.h"
#include "csr_wifi_hip_unifi.h"
#include "csr_wifi_hip_conversions.h"
#include <linux/sched/rt.h>

View File

@ -15,6 +15,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/fs.h>

View File

@ -33,6 +33,7 @@
#include <linux/elf.h>
#include <linux/utsname.h>
#include <linux/coredump.h>
#include <linux/sched.h>
#include <asm/uaccess.h>
#include <asm/param.h>
#include <asm/page.h>
@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
} else {
cputime_to_timeval(p->utime, &prstatus->pr_utime);
cputime_to_timeval(p->stime, &prstatus->pr_stime);
cputime_t utime, stime;
task_cputime(p, &utime, &stime);
cputime_to_timeval(utime, &prstatus->pr_utime);
cputime_to_timeval(stime, &prstatus->pr_stime);
}
cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);

View File

@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
} else {
cputime_to_timeval(p->utime, &prstatus->pr_utime);
cputime_to_timeval(p->stime, &prstatus->pr_stime);
cputime_t utime, stime;
task_cputime(p, &utime, &stime);
cputime_to_timeval(utime, &prstatus->pr_utime);
cputime_to_timeval(stime, &prstatus->pr_stime);
}
cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);

View File

@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
do {
min_flt += t->min_flt;
maj_flt += t->maj_flt;
gtime += t->gtime;
gtime += task_gtime(t);
t = next_thread(t);
} while (t != task);
@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
min_flt = task->min_flt;
maj_flt = task->maj_flt;
task_cputime_adjusted(task, &utime, &stime);
gtime = task->gtime;
gtime = task_gtime(task);
}
/* scale priority and nice values from timeslices to -20..20 */

View File

@ -26,6 +26,7 @@
#include <linux/fs.h>
#include <linux/rcupdate.h>
#include <linux/hrtimer.h>
#include <linux/sched/rt.h>
#include <asm/uaccess.h>

View File

@ -4,66 +4,12 @@
#include <linux/time.h>
#include <linux/jiffies.h>
typedef unsigned long __nocast cputime_t;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
# include <asm-generic/cputime_jiffies.h>
#endif
#define cputime_one_jiffy jiffies_to_cputime(1)
#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
#define cputime_to_scaled(__ct) (__ct)
#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
typedef u64 __nocast cputime64_t;
#define cputime64_to_jiffies64(__ct) (__force u64)(__ct)
#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif)
#define nsecs_to_cputime64(__ct) \
jiffies64_to_cputime64(nsecs_to_jiffies64(__ct))
/*
* Convert cputime to microseconds and back.
*/
#define cputime_to_usecs(__ct) \
jiffies_to_usecs(cputime_to_jiffies(__ct))
#define usecs_to_cputime(__usec) \
jiffies_to_cputime(usecs_to_jiffies(__usec))
#define usecs_to_cputime64(__usec) \
jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
/*
* Convert cputime to seconds and back.
*/
#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ)
#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ)
/*
* Convert cputime to timespec and back.
*/
#define timespec_to_cputime(__val) \
jiffies_to_cputime(timespec_to_jiffies(__val))
#define cputime_to_timespec(__ct,__val) \
jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
/*
* Convert cputime to timeval and back.
*/
#define timeval_to_cputime(__val) \
jiffies_to_cputime(timeval_to_jiffies(__val))
#define cputime_to_timeval(__ct,__val) \
jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
/*
* Convert cputime to clock and back.
*/
#define cputime_to_clock_t(__ct) \
jiffies_to_clock_t(cputime_to_jiffies(__ct))
#define clock_t_to_cputime(__x) \
jiffies_to_cputime(clock_t_to_jiffies(__x))
/*
* Convert cputime64 to clock.
*/
#define cputime64_to_clock_t(__ct) \
jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
# include <asm-generic/cputime_nsecs.h>
#endif
#endif

View File

@ -0,0 +1,72 @@
#ifndef _ASM_GENERIC_CPUTIME_JIFFIES_H
#define _ASM_GENERIC_CPUTIME_JIFFIES_H
typedef unsigned long __nocast cputime_t;
#define cputime_one_jiffy jiffies_to_cputime(1)
#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
#define cputime_to_scaled(__ct) (__ct)
#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
typedef u64 __nocast cputime64_t;
#define cputime64_to_jiffies64(__ct) (__force u64)(__ct)
#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif)
/*
* Convert nanoseconds to cputime
*/
#define nsecs_to_cputime64(__nsec) \
jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec))
#define nsecs_to_cputime(__nsec) \
jiffies_to_cputime(nsecs_to_jiffies(__nsec))
/*
* Convert cputime to microseconds and back.
*/
#define cputime_to_usecs(__ct) \
jiffies_to_usecs(cputime_to_jiffies(__ct))
#define usecs_to_cputime(__usec) \
jiffies_to_cputime(usecs_to_jiffies(__usec))
#define usecs_to_cputime64(__usec) \
jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
/*
* Convert cputime to seconds and back.
*/
#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ)
#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ)
/*
* Convert cputime to timespec and back.
*/
#define timespec_to_cputime(__val) \
jiffies_to_cputime(timespec_to_jiffies(__val))
#define cputime_to_timespec(__ct,__val) \
jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
/*
* Convert cputime to timeval and back.
*/
#define timeval_to_cputime(__val) \
jiffies_to_cputime(timeval_to_jiffies(__val))
#define cputime_to_timeval(__ct,__val) \
jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
/*
* Convert cputime to clock and back.
*/
#define cputime_to_clock_t(__ct) \
jiffies_to_clock_t(cputime_to_jiffies(__ct))
#define clock_t_to_cputime(__x) \
jiffies_to_cputime(clock_t_to_jiffies(__x))
/*
* Convert cputime64 to clock.
*/
#define cputime64_to_clock_t(__ct) \
jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
#endif

View File

@ -0,0 +1,104 @@
/*
* Definitions for measuring cputime in nsecs resolution.
*
* Based on <arch/ia64/include/asm/cputime.h>
*
* Copyright (C) 2007 FUJITSU LIMITED
* Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
#ifndef _ASM_GENERIC_CPUTIME_NSECS_H
#define _ASM_GENERIC_CPUTIME_NSECS_H
typedef u64 __nocast cputime_t;
typedef u64 __nocast cputime64_t;
#define cputime_one_jiffy jiffies_to_cputime(1)
/*
* Convert cputime <-> jiffies (HZ)
*/
#define cputime_to_jiffies(__ct) \
((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
#define cputime_to_scaled(__ct) (__ct)
#define jiffies_to_cputime(__jif) \
(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
#define cputime64_to_jiffies64(__ct) \
((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
#define jiffies64_to_cputime64(__jif) \
(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
/*
* Convert cputime <-> nanoseconds
*/
#define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs))
/*
* Convert cputime <-> microseconds
*/
#define cputime_to_usecs(__ct) \
((__force u64)(__ct) / NSEC_PER_USEC)
#define usecs_to_cputime(__usecs) \
(__force cputime_t)((__usecs) * NSEC_PER_USEC)
#define usecs_to_cputime64(__usecs) \
(__force cputime64_t)((__usecs) * NSEC_PER_USEC)
/*
* Convert cputime <-> seconds
*/
#define cputime_to_secs(__ct) \
((__force u64)(__ct) / NSEC_PER_SEC)
#define secs_to_cputime(__secs) \
(__force cputime_t)((__secs) * NSEC_PER_SEC)
/*
* Convert cputime <-> timespec (nsec)
*/
static inline cputime_t timespec_to_cputime(const struct timespec *val)
{
u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
return (__force cputime_t) ret;
}
static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
{
val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
}
/*
* Convert cputime <-> timeval (msec)
*/
static inline cputime_t timeval_to_cputime(struct timeval *val)
{
u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
return (__force cputime_t) ret;
}
static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
{
val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
}
/*
* Convert cputime <-> clock (USER_HZ)
*/
#define cputime_to_clock_t(__ct) \
((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
#define clock_t_to_cputime(__x) \
(__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
/*
* Convert cputime64 to clock.
*/
#define cputime64_to_clock_t(__ct) \
cputime_to_clock_t((__force cputime_t)__ct)
#endif

View File

@ -3,12 +3,40 @@
#ifdef CONFIG_CONTEXT_TRACKING
#include <linux/sched.h>
#include <linux/percpu.h>
struct context_tracking {
/*
* When active is false, probes are unset in order
* to minimize overhead: TIF flags are cleared
* and calls to user_enter/exit are ignored. This
* may be further optimized using static keys.
*/
bool active;
enum {
IN_KERNEL = 0,
IN_USER,
} state;
};
DECLARE_PER_CPU(struct context_tracking, context_tracking);
static inline bool context_tracking_in_user(void)
{
return __this_cpu_read(context_tracking.state) == IN_USER;
}
static inline bool context_tracking_active(void)
{
return __this_cpu_read(context_tracking.active);
}
extern void user_enter(void);
extern void user_exit(void);
extern void context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next);
#else
static inline bool context_tracking_in_user(void) { return false; }
static inline void user_enter(void) { }
static inline void user_exit(void) { }
static inline void context_tracking_task_switch(struct task_struct *prev,

View File

@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void);
*/
#define __irq_enter() \
do { \
vtime_account_irq_enter(current); \
account_irq_enter_time(current); \
add_preempt_count(HARDIRQ_OFFSET); \
trace_hardirq_enter(); \
} while (0)
@ -169,7 +169,7 @@ extern void irq_enter(void);
#define __irq_exit() \
do { \
trace_hardirq_exit(); \
vtime_account_irq_exit(current); \
account_irq_exit_time(current); \
sub_preempt_count(HARDIRQ_OFFSET); \
} while (0)

View File

@ -10,7 +10,9 @@
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
#include <linux/securebits.h>
#include <linux/seqlock.h>
#include <net/net_namespace.h>
#include <linux/sched/rt.h>
#ifdef CONFIG_SMP
# define INIT_PUSHABLE_TASKS(tsk) \
@ -141,6 +143,15 @@ extern struct task_group root_task_group;
# define INIT_PERF_EVENTS(tsk)
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
# define INIT_VTIME(tsk) \
.vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
.vtime_snap = 0, \
.vtime_snap_whence = VTIME_SYS,
#else
# define INIT_VTIME(tsk)
#endif
#define INIT_TASK_COMM "swapper"
/*
@ -210,6 +221,7 @@ extern struct task_group root_task_group;
INIT_TRACE_RECURSION \
INIT_TASK_RCU_PREEMPT(tsk) \
INIT_CPUSET_SEQ \
INIT_VTIME(tsk) \
}

View File

@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
extern void account_steal_time(cputime_t);
extern void account_idle_time(cputime_t);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline void account_process_tick(struct task_struct *tsk, int user)
{
vtime_account_user(tsk);

View File

@ -22,6 +22,7 @@
#include <linux/rcupdate.h>
#include <linux/ratelimit.h>
#include <linux/err.h>
#include <linux/irqflags.h>
#include <asm/signal.h>
#include <linux/kvm.h>
@ -740,15 +741,52 @@ static inline int kvm_deassign_device(struct kvm *kvm,
}
#endif /* CONFIG_IOMMU_API */
static inline void kvm_guest_enter(void)
static inline void __guest_enter(void)
{
BUG_ON(preemptible());
/*
* This is running in ioctl context so we can avoid
* the call to vtime_account() with its unnecessary idle check.
*/
vtime_account_system_irqsafe(current);
vtime_account_system(current);
current->flags |= PF_VCPU;
}
static inline void __guest_exit(void)
{
/*
* This is running in ioctl context so we can avoid
* the call to vtime_account() with its unnecessary idle check.
*/
vtime_account_system(current);
current->flags &= ~PF_VCPU;
}
#ifdef CONFIG_CONTEXT_TRACKING
extern void guest_enter(void);
extern void guest_exit(void);
#else /* !CONFIG_CONTEXT_TRACKING */
static inline void guest_enter(void)
{
__guest_enter();
}
static inline void guest_exit(void)
{
__guest_exit();
}
#endif /* !CONFIG_CONTEXT_TRACKING */
static inline void kvm_guest_enter(void)
{
unsigned long flags;
BUG_ON(preemptible());
local_irq_save(flags);
guest_enter();
local_irq_restore(flags);
/* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
* is very similar to exiting to userspase from rcu point of view. In
@ -761,12 +799,11 @@ static inline void kvm_guest_enter(void)
static inline void kvm_guest_exit(void)
{
/*
* This is running in ioctl context so we can avoid
* the call to vtime_account() with its unnecessary idle check.
*/
vtime_account_system_irqsafe(current);
current->flags &= ~PF_VCPU;
unsigned long flags;
local_irq_save(flags);
guest_exit();
local_irq_restore(flags);
}
/*

View File

@ -304,19 +304,6 @@ static inline void lockup_detector_init(void)
}
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
extern unsigned int sysctl_hung_task_panic;
extern unsigned long sysctl_hung_task_check_count;
extern unsigned long sysctl_hung_task_timeout_secs;
extern unsigned long sysctl_hung_task_warnings;
extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos);
#else
/* Avoid need for ifdefs elsewhere in the code */
enum { sysctl_hung_task_timeout_secs = 0 };
#endif
/* Attach to any functions which should be ignored in wchan output. */
#define __sched __attribute__((__section__(".sched.text")))
@ -338,23 +325,6 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
struct nsproxy;
struct user_namespace;
/*
* Default maximum number of active map areas, this limits the number of vmas
* per mm struct. Users can overwrite this number by sysctl but there is a
* problem.
*
* When a program's coredump is generated as ELF format, a section is created
* per a vma. In ELF, the number of sections is represented in unsigned short.
* This means the number of sections should be smaller than 65535 at coredump.
* Because the kernel adds some informative sections to a image of program at
* generating coredump, we need some margin. The number of extra sections is
* 1-3 now and depends on arch. We use "5" as safe margin, here.
*/
#define MAPCOUNT_ELF_CORE_MARGIN (5)
#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
extern int sysctl_max_map_count;
#include <linux/aio.h>
#ifdef CONFIG_MMU
@ -1194,6 +1164,7 @@ struct sched_entity {
/* rq "owned" by this entity/group: */
struct cfs_rq *my_q;
#endif
/*
* Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
* removed when useful for applications beyond shares distribution (e.g.
@ -1208,6 +1179,7 @@ struct sched_entity {
struct sched_rt_entity {
struct list_head run_list;
unsigned long timeout;
unsigned long watchdog_stamp;
unsigned int time_slice;
struct sched_rt_entity *back;
@ -1220,11 +1192,6 @@ struct sched_rt_entity {
#endif
};
/*
* default timeslice is 100 msecs (used only for SCHED_RR tasks).
* Timeslices get refilled after they expire.
*/
#define RR_TIMESLICE (100 * HZ / 1000)
struct rcu_node;
@ -1367,6 +1334,15 @@ struct task_struct {
cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
struct cputime prev_cputime;
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
seqlock_t vtime_seqlock;
unsigned long long vtime_snap;
enum {
VTIME_SLEEPING = 0,
VTIME_USER,
VTIME_SYS,
} vtime_snap_whence;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts */
struct timespec start_time; /* monotonic time */
@ -1622,37 +1598,6 @@ static inline void set_numabalancing_state(bool enabled)
}
#endif
/*
* Priority of a process goes from 0..MAX_PRIO-1, valid RT
* priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
* tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
* values are inverted: lower p->prio value means higher priority.
*
* The MAX_USER_RT_PRIO value allows the actual maximum
* RT priority to be separate from the value exported to
* user-space. This allows kernel threads to set their
* priority to a value higher than any user task. Note:
* MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
*/
#define MAX_USER_RT_PRIO 100
#define MAX_RT_PRIO MAX_USER_RT_PRIO
#define MAX_PRIO (MAX_RT_PRIO + 40)
#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
static inline int rt_prio(int prio)
{
if (unlikely(prio < MAX_RT_PRIO))
return 1;
return 0;
}
static inline int rt_task(struct task_struct *p)
{
return rt_prio(p->prio);
}
static inline struct pid *task_pid(struct task_struct *task)
{
return task->pids[PIDTYPE_PID].pid;
@ -1792,6 +1737,37 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
}
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void task_cputime(struct task_struct *t,
cputime_t *utime, cputime_t *stime);
extern void task_cputime_scaled(struct task_struct *t,
cputime_t *utimescaled, cputime_t *stimescaled);
extern cputime_t task_gtime(struct task_struct *t);
#else
static inline void task_cputime(struct task_struct *t,
cputime_t *utime, cputime_t *stime)
{
if (utime)
*utime = t->utime;
if (stime)
*stime = t->stime;
}
static inline void task_cputime_scaled(struct task_struct *t,
cputime_t *utimescaled,
cputime_t *stimescaled)
{
if (utimescaled)
*utimescaled = t->utimescaled;
if (stimescaled)
*stimescaled = t->stimescaled;
}
static inline cputime_t task_gtime(struct task_struct *t)
{
return t->gtime;
}
#endif
extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
@ -2033,58 +2009,7 @@ extern void wake_up_idle_cpu(int cpu);
static inline void wake_up_idle_cpu(int cpu) { }
#endif
extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
enum sched_tunable_scaling {
SCHED_TUNABLESCALING_NONE,
SCHED_TUNABLESCALING_LOG,
SCHED_TUNABLESCALING_LINEAR,
SCHED_TUNABLESCALING_END,
};
extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
extern unsigned int sysctl_numa_balancing_scan_delay;
extern unsigned int sysctl_numa_balancing_scan_period_min;
extern unsigned int sysctl_numa_balancing_scan_period_max;
extern unsigned int sysctl_numa_balancing_scan_period_reset;
extern unsigned int sysctl_numa_balancing_scan_size;
extern unsigned int sysctl_numa_balancing_settle_count;
#ifdef CONFIG_SCHED_DEBUG
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_time_avg;
extern unsigned int sysctl_timer_migration;
extern unsigned int sysctl_sched_shares_window;
int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length,
loff_t *ppos);
#endif
#ifdef CONFIG_SCHED_DEBUG
static inline unsigned int get_sysctl_timer_migration(void)
{
return sysctl_timer_migration;
}
#else
static inline unsigned int get_sysctl_timer_migration(void)
{
return 1;
}
#endif
extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;
int sched_rt_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
#ifdef CONFIG_SCHED_AUTOGROUP
extern unsigned int sysctl_sched_autogroup_enabled;
extern void sched_autogroup_create_attach(struct task_struct *p);
extern void sched_autogroup_detach(struct task_struct *p);
extern void sched_autogroup_fork(struct signal_struct *sig);
@ -2100,30 +2025,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
static inline void sched_autogroup_exit(struct signal_struct *sig) { }
#endif
#ifdef CONFIG_CFS_BANDWIDTH
extern unsigned int sysctl_sched_cfs_bandwidth_slice;
#endif
#ifdef CONFIG_RT_MUTEXES
extern int rt_mutex_getprio(struct task_struct *p);
extern void rt_mutex_setprio(struct task_struct *p, int prio);
extern void rt_mutex_adjust_pi(struct task_struct *p);
static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
{
return tsk->pi_blocked_on != NULL;
}
#else
static inline int rt_mutex_getprio(struct task_struct *p)
{
return p->normal_prio;
}
# define rt_mutex_adjust_pi(p) do { } while (0)
static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
{
return false;
}
#endif
extern bool yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
@ -2753,8 +2654,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
extern void normalize_rt_tasks(void);
#ifdef CONFIG_CGROUP_SCHED
extern struct task_group root_task_group;

58
include/linux/sched/rt.h Normal file
View File

@ -0,0 +1,58 @@
#ifndef _SCHED_RT_H
#define _SCHED_RT_H
/*
* Priority of a process goes from 0..MAX_PRIO-1, valid RT
* priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
* tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
* values are inverted: lower p->prio value means higher priority.
*
* The MAX_USER_RT_PRIO value allows the actual maximum
* RT priority to be separate from the value exported to
* user-space. This allows kernel threads to set their
* priority to a value higher than any user task. Note:
* MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
*/
#define MAX_USER_RT_PRIO 100
#define MAX_RT_PRIO MAX_USER_RT_PRIO
#define MAX_PRIO (MAX_RT_PRIO + 40)
#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
static inline int rt_prio(int prio)
{
if (unlikely(prio < MAX_RT_PRIO))
return 1;
return 0;
}
static inline int rt_task(struct task_struct *p)
{
return rt_prio(p->prio);
}
#ifdef CONFIG_RT_MUTEXES
extern int rt_mutex_getprio(struct task_struct *p);
extern void rt_mutex_setprio(struct task_struct *p, int prio);
extern void rt_mutex_adjust_pi(struct task_struct *p);
static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
{
return tsk->pi_blocked_on != NULL;
}
#else
static inline int rt_mutex_getprio(struct task_struct *p)
{
return p->normal_prio;
}
# define rt_mutex_adjust_pi(p) do { } while (0)
static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
{
return false;
}
#endif
extern void normalize_rt_tasks(void);
#endif /* _SCHED_RT_H */

View File

@ -0,0 +1,110 @@
#ifndef _SCHED_SYSCTL_H
#define _SCHED_SYSCTL_H
#ifdef CONFIG_DETECT_HUNG_TASK
extern unsigned int sysctl_hung_task_panic;
extern unsigned long sysctl_hung_task_check_count;
extern unsigned long sysctl_hung_task_timeout_secs;
extern unsigned long sysctl_hung_task_warnings;
extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos);
#else
/* Avoid need for ifdefs elsewhere in the code */
enum { sysctl_hung_task_timeout_secs = 0 };
#endif
/*
* Default maximum number of active map areas, this limits the number of vmas
* per mm struct. Users can overwrite this number by sysctl but there is a
* problem.
*
* When a program's coredump is generated as ELF format, a section is created
* per a vma. In ELF, the number of sections is represented in unsigned short.
* This means the number of sections should be smaller than 65535 at coredump.
* Because the kernel adds some informative sections to a image of program at
* generating coredump, we need some margin. The number of extra sections is
* 1-3 now and depends on arch. We use "5" as safe margin, here.
*/
#define MAPCOUNT_ELF_CORE_MARGIN (5)
#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
extern int sysctl_max_map_count;
extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
enum sched_tunable_scaling {
SCHED_TUNABLESCALING_NONE,
SCHED_TUNABLESCALING_LOG,
SCHED_TUNABLESCALING_LINEAR,
SCHED_TUNABLESCALING_END,
};
extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
extern unsigned int sysctl_numa_balancing_scan_delay;
extern unsigned int sysctl_numa_balancing_scan_period_min;
extern unsigned int sysctl_numa_balancing_scan_period_max;
extern unsigned int sysctl_numa_balancing_scan_period_reset;
extern unsigned int sysctl_numa_balancing_scan_size;
extern unsigned int sysctl_numa_balancing_settle_count;
#ifdef CONFIG_SCHED_DEBUG
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_time_avg;
extern unsigned int sysctl_timer_migration;
extern unsigned int sysctl_sched_shares_window;
int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length,
loff_t *ppos);
#endif
#ifdef CONFIG_SCHED_DEBUG
static inline unsigned int get_sysctl_timer_migration(void)
{
return sysctl_timer_migration;
}
#else
static inline unsigned int get_sysctl_timer_migration(void)
{
return 1;
}
#endif
/*
* control realtime throttling:
*
* /proc/sys/kernel/sched_rt_period_us
* /proc/sys/kernel/sched_rt_runtime_us
*/
extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;
#ifdef CONFIG_CFS_BANDWIDTH
extern unsigned int sysctl_sched_cfs_bandwidth_slice;
#endif
#ifdef CONFIG_SCHED_AUTOGROUP
extern unsigned int sysctl_sched_autogroup_enabled;
#endif
/*
* default timeslice is 100 msecs (used only for SCHED_RR tasks).
* Timeslices get refilled after they expire.
*/
#define RR_TIMESLICE (100 * HZ / 1000)
extern int sched_rr_timeslice;
extern int sched_rr_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
extern int sched_rt_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
#endif /* _SCHED_SYSCTL_H */

View File

@ -23,12 +23,15 @@ static inline void bacct_add_tsk(struct user_namespace *user_ns,
#ifdef CONFIG_TASK_XACCT
extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p);
extern void acct_update_integrals(struct task_struct *tsk);
extern void acct_account_cputime(struct task_struct *tsk);
extern void acct_clear_integrals(struct task_struct *tsk);
#else
static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
{}
static inline void acct_update_integrals(struct task_struct *tsk)
{}
static inline void acct_account_cputime(struct task_struct *tsk)
{}
static inline void acct_clear_integrals(struct task_struct *tsk)
{}
#endif /* CONFIG_TASK_XACCT */

View File

@ -6,15 +6,46 @@ struct task_struct;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void vtime_task_switch(struct task_struct *prev);
extern void vtime_account_system(struct task_struct *tsk);
extern void vtime_account_system_irqsafe(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
extern void vtime_account_user(struct task_struct *tsk);
extern void vtime_account(struct task_struct *tsk);
#else
extern void vtime_account_irq_enter(struct task_struct *tsk);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline bool vtime_accounting_enabled(void) { return true; }
#endif
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
static inline void vtime_task_switch(struct task_struct *prev) { }
static inline void vtime_account_system(struct task_struct *tsk) { }
static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
static inline void vtime_account(struct task_struct *tsk) { }
static inline void vtime_account_user(struct task_struct *tsk) { }
static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
static inline bool vtime_accounting_enabled(void) { return false; }
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void arch_vtime_task_switch(struct task_struct *tsk);
extern void vtime_account_irq_exit(struct task_struct *tsk);
extern bool vtime_accounting_enabled(void);
extern void vtime_user_enter(struct task_struct *tsk);
static inline void vtime_user_exit(struct task_struct *tsk)
{
vtime_account_user(tsk);
}
extern void vtime_guest_enter(struct task_struct *tsk);
extern void vtime_guest_exit(struct task_struct *tsk);
extern void vtime_init_idle(struct task_struct *tsk);
#else
static inline void vtime_account_irq_exit(struct task_struct *tsk)
{
/* On hard|softirq exit we always account to hard|softirq cputime */
vtime_account_system(tsk);
}
static inline void vtime_user_enter(struct task_struct *tsk) { }
static inline void vtime_user_exit(struct task_struct *tsk) { }
static inline void vtime_guest_enter(struct task_struct *tsk) { }
static inline void vtime_guest_exit(struct task_struct *tsk) { }
static inline void vtime_init_idle(struct task_struct *tsk) { }
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
@ -23,25 +54,15 @@ extern void irqtime_account_irq(struct task_struct *tsk);
static inline void irqtime_account_irq(struct task_struct *tsk) { }
#endif
static inline void vtime_account_irq_enter(struct task_struct *tsk)
static inline void account_irq_enter_time(struct task_struct *tsk)
{
/*
* Hardirq can interrupt idle task anytime. So we need vtime_account()
* that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING.
* Softirq can also interrupt idle task directly if it calls
* local_bh_enable(). Such case probably don't exist but we never know.
* Ksoftirqd is not concerned because idle time is flushed on context
* switch. Softirqs in the end of hardirqs are also not a problem because
* the idle time is flushed on hardirq time already.
*/
vtime_account(tsk);
vtime_account_irq_enter(tsk);
irqtime_account_irq(tsk);
}
static inline void vtime_account_irq_exit(struct task_struct *tsk)
static inline void account_irq_exit_time(struct task_struct *tsk)
{
/* On hard|softirq exit we always account to hard|softirq cputime */
vtime_account_system(tsk);
vtime_account_irq_exit(tsk);
irqtime_account_irq(tsk);
}

View File

@ -322,10 +322,13 @@ source "kernel/time/Kconfig"
menu "CPU/Task time and stats accounting"
config VIRT_CPU_ACCOUNTING
bool
choice
prompt "Cputime accounting"
default TICK_CPU_ACCOUNTING if !PPC64
default VIRT_CPU_ACCOUNTING if PPC64
default VIRT_CPU_ACCOUNTING_NATIVE if PPC64
# Kind of a stub config for the pure tick based cputime accounting
config TICK_CPU_ACCOUNTING
@ -338,9 +341,10 @@ config TICK_CPU_ACCOUNTING
If unsure, say Y.
config VIRT_CPU_ACCOUNTING
config VIRT_CPU_ACCOUNTING_NATIVE
bool "Deterministic task and CPU time accounting"
depends on HAVE_VIRT_CPU_ACCOUNTING
select VIRT_CPU_ACCOUNTING
help
Select this option to enable more accurate task and CPU time
accounting. This is done by reading a CPU counter on each
@ -350,6 +354,23 @@ config VIRT_CPU_ACCOUNTING
this also enables accounting of stolen time on logically-partitioned
systems.
config VIRT_CPU_ACCOUNTING_GEN
bool "Full dynticks CPU time accounting"
depends on HAVE_CONTEXT_TRACKING && 64BIT
select VIRT_CPU_ACCOUNTING
select CONTEXT_TRACKING
help
Select this option to enable task and CPU time accounting on full
dynticks systems. This accounting is implemented by watching every
kernel-user boundaries using the context tracking subsystem.
The accounting is thus performed at the expense of some significant
overhead.
For now this is only useful if you are working on the full
dynticks subsystem development.
If unsure, say N.
config IRQ_TIME_ACCOUNTING
bool "Fine granularity task level IRQ time accounting"
depends on HAVE_IRQ_TIME_ACCOUNTING

View File

@ -2,6 +2,8 @@
#include <linux/export.h>
#include <linux/mqueue.h>
#include <linux/sched.h>
#include <linux/sched/sysctl.h>
#include <linux/sched/rt.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/mm.h>

View File

@ -566,6 +566,7 @@ out:
void acct_collect(long exitcode, int group_dead)
{
struct pacct_struct *pacct = &current->signal->pacct;
cputime_t utime, stime;
unsigned long vsize = 0;
if (group_dead && current->mm) {
@ -593,8 +594,9 @@ void acct_collect(long exitcode, int group_dead)
pacct->ac_flag |= ACORE;
if (current->flags & PF_SIGNALED)
pacct->ac_flag |= AXSIG;
pacct->ac_utime += current->utime;
pacct->ac_stime += current->stime;
task_cputime(current, &utime, &stime);
pacct->ac_utime += utime;
pacct->ac_stime += stime;
pacct->ac_minflt += current->min_flt;
pacct->ac_majflt += current->maj_flt;
spin_unlock_irq(&current->sighand->siglock);

View File

@ -15,26 +15,13 @@
*/
#include <linux/context_tracking.h>
#include <linux/kvm_host.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/export.h>
struct context_tracking {
/*
* When active is false, probes are unset in order
* to minimize overhead: TIF flags are cleared
* and calls to user_enter/exit are ignored. This
* may be further optimized using static keys.
*/
bool active;
enum {
IN_KERNEL = 0,
IN_USER,
} state;
};
static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
.active = true,
#endif
@ -70,7 +57,6 @@ void user_enter(void)
local_irq_save(flags);
if (__this_cpu_read(context_tracking.active) &&
__this_cpu_read(context_tracking.state) != IN_USER) {
__this_cpu_write(context_tracking.state, IN_USER);
/*
* At this stage, only low level arch entry code remains and
* then we'll run in userspace. We can assume there won't be
@ -78,7 +64,9 @@ void user_enter(void)
* user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
* on the tick.
*/
vtime_user_enter(current);
rcu_user_enter();
__this_cpu_write(context_tracking.state, IN_USER);
}
local_irq_restore(flags);
}
@ -104,16 +92,35 @@ void user_exit(void)
local_irq_save(flags);
if (__this_cpu_read(context_tracking.state) == IN_USER) {
__this_cpu_write(context_tracking.state, IN_KERNEL);
/*
* We are going to run code that may use RCU. Inform
* RCU core about that (ie: we may need the tick again).
*/
rcu_user_exit();
vtime_user_exit(current);
__this_cpu_write(context_tracking.state, IN_KERNEL);
}
local_irq_restore(flags);
}
void guest_enter(void)
{
if (vtime_accounting_enabled())
vtime_guest_enter(current);
else
__guest_enter();
}
EXPORT_SYMBOL_GPL(guest_enter);
void guest_exit(void)
{
if (vtime_accounting_enabled())
vtime_guest_exit(current);
else
__guest_exit();
}
EXPORT_SYMBOL_GPL(guest_exit);
/**
* context_tracking_task_switch - context switch the syscall callbacks

View File

@ -224,11 +224,13 @@ void clear_tasks_mm_cpumask(int cpu)
static inline void check_for_tasks(int cpu)
{
struct task_struct *p;
cputime_t utime, stime;
write_lock_irq(&tasklist_lock);
for_each_process(p) {
task_cputime(p, &utime, &stime);
if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
(p->utime || p->stime))
(utime || stime))
printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
"(state = %ld, flags = %x)\n",
p->comm, task_pid_nr(p), cpu,

View File

@ -106,6 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
unsigned long long t2, t3;
unsigned long flags;
struct timespec ts;
cputime_t utime, stime, stimescaled, utimescaled;
/* Though tsk->delays accessed later, early exit avoids
* unnecessary returning of other data
@ -114,12 +115,14 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
goto done;
tmp = (s64)d->cpu_run_real_total;
cputime_to_timespec(tsk->utime + tsk->stime, &ts);
task_cputime(tsk, &utime, &stime);
cputime_to_timespec(utime + stime, &ts);
tmp += timespec_to_ns(&ts);
d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
tmp = (s64)d->cpu_scaled_run_real_total;
cputime_to_timespec(tsk->utimescaled + tsk->stimescaled, &ts);
task_cputime_scaled(tsk, &utimescaled, &stimescaled);
cputime_to_timespec(utimescaled + stimescaled, &ts);
tmp += timespec_to_ns(&ts);
d->cpu_scaled_run_real_total =
(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;

View File

@ -85,6 +85,7 @@ static void __exit_signal(struct task_struct *tsk)
bool group_dead = thread_group_leader(tsk);
struct sighand_struct *sighand;
struct tty_struct *uninitialized_var(tty);
cputime_t utime, stime;
sighand = rcu_dereference_check(tsk->sighand,
lockdep_tasklist_lock_is_held());
@ -123,9 +124,10 @@ static void __exit_signal(struct task_struct *tsk)
* We won't ever get here for the group leader, since it
* will have been the last reference on the signal_struct.
*/
sig->utime += tsk->utime;
sig->stime += tsk->stime;
sig->gtime += tsk->gtime;
task_cputime(tsk, &utime, &stime);
sig->utime += utime;
sig->stime += stime;
sig->gtime += task_gtime(tsk);
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw;
@ -1092,7 +1094,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
sig = p->signal;
psig->cutime += tgutime + sig->cutime;
psig->cstime += tgstime + sig->cstime;
psig->cgtime += p->gtime + sig->gtime + sig->cgtime;
psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
psig->cmin_flt +=
p->min_flt + sig->min_flt + sig->cmin_flt;
psig->cmaj_flt +=

View File

@ -1233,6 +1233,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
p->prev_cputime.utime = p->prev_cputime.stime = 0;
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
seqlock_init(&p->vtime_seqlock);
p->vtime_snap = 0;
p->vtime_snap_whence = VTIME_SLEEPING;
#endif
#if defined(SPLIT_RSS_COUNTING)
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endif

View File

@ -60,6 +60,7 @@
#include <linux/pid.h>
#include <linux/nsproxy.h>
#include <linux/ptrace.h>
#include <linux/sched/rt.h>
#include <asm/futex.h>

View File

@ -44,6 +44,8 @@
#include <linux/err.h>
#include <linux/debugobjects.h>
#include <linux/sched.h>
#include <linux/sched/sysctl.h>
#include <linux/sched/rt.h>
#include <linux/timer.h>
#include <asm/uaccess.h>

View File

@ -16,6 +16,7 @@
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/task_work.h>
#include "internals.h"

View File

@ -19,6 +19,7 @@
*/
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>

View File

@ -155,11 +155,19 @@ static void bump_cpu_timer(struct k_itimer *timer,
static inline cputime_t prof_ticks(struct task_struct *p)
{
return p->utime + p->stime;
cputime_t utime, stime;
task_cputime(p, &utime, &stime);
return utime + stime;
}
static inline cputime_t virt_ticks(struct task_struct *p)
{
return p->utime;
cputime_t utime;
task_cputime(p, &utime, NULL);
return utime;
}
static int
@ -471,18 +479,23 @@ static void cleanup_timers(struct list_head *head,
*/
void posix_cpu_timers_exit(struct task_struct *tsk)
{
cputime_t utime, stime;
add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
sizeof(unsigned long long));
task_cputime(tsk, &utime, &stime);
cleanup_timers(tsk->cpu_timers,
tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
utime, stime, tsk->se.sum_exec_runtime);
}
void posix_cpu_timers_exit_group(struct task_struct *tsk)
{
struct signal_struct *const sig = tsk->signal;
cputime_t utime, stime;
task_cputime(tsk, &utime, &stime);
cleanup_timers(tsk->signal->cpu_timers,
tsk->utime + sig->utime, tsk->stime + sig->stime,
utime + sig->utime, stime + sig->stime,
tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
}
@ -1226,11 +1239,14 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
static inline int fastpath_timer_check(struct task_struct *tsk)
{
struct signal_struct *sig;
cputime_t utime, stime;
task_cputime(tsk, &utime, &stime);
if (!task_cputime_zero(&tsk->cputime_expires)) {
struct task_cputime task_sample = {
.utime = tsk->utime,
.stime = tsk->stime,
.utime = utime,
.stime = stime,
.sum_exec_runtime = tsk->se.sum_exec_runtime
};

View File

@ -17,6 +17,7 @@
* See rt.c in preempt-rt for proper credits and further information
*/
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/spinlock.h>

View File

@ -10,6 +10,7 @@
#include <linux/kthread.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/spinlock.h>
#include <linux/timer.h>
#include <linux/freezer.h>

View File

@ -13,6 +13,7 @@
#include <linux/spinlock.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/timer.h>
#include "rtmutex_common.h"

View File

@ -4371,7 +4371,7 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
struct task_struct *curr = current;
struct rq *rq, *p_rq;
unsigned long flags;
bool yielded = 0;
int yielded = 0;
local_irq_save(flags);
rq = this_rq();
@ -4667,6 +4667,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
*/
idle->sched_class = &idle_sched_class;
ftrace_graph_init_idle_task(idle, cpu);
vtime_init_idle(idle);
#if defined(CONFIG_SMP)
sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
#endif
@ -7508,6 +7509,25 @@ static int sched_rt_global_constraints(void)
}
#endif /* CONFIG_RT_GROUP_SCHED */
int sched_rr_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
static DEFINE_MUTEX(mutex);
mutex_lock(&mutex);
ret = proc_dointvec(table, write, buffer, lenp, ppos);
/* make sure that internally we keep jiffies */
/* also, writing zero resets timeslice to default */
if (!ret && write) {
sched_rr_timeslice = sched_rr_timeslice <= 0 ?
RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
}
mutex_unlock(&mutex);
return ret;
}
int sched_rt_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)

View File

@ -28,6 +28,8 @@
*/
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include "cpupri.h"
/* Convert between a 140 based task->prio, and our 102 based cpupri */

View File

@ -3,6 +3,7 @@
#include <linux/tsacct_kern.h>
#include <linux/kernel_stat.h>
#include <linux/static_key.h>
#include <linux/context_tracking.h>
#include "sched.h"
@ -163,7 +164,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
task_group_account_field(p, index, (__force u64) cputime);
/* Account for user time used */
acct_update_integrals(p);
acct_account_cputime(p);
}
/*
@ -213,7 +214,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
task_group_account_field(p, index, (__force u64) cputime);
/* Account for system time used */
acct_update_integrals(p);
acct_account_cputime(p);
}
/*
@ -295,6 +296,7 @@ static __always_inline bool steal_account_process_tick(void)
void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
{
struct signal_struct *sig = tsk->signal;
cputime_t utime, stime;
struct task_struct *t;
times->utime = sig->utime;
@ -308,16 +310,15 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
t = tsk;
do {
times->utime += t->utime;
times->stime += t->stime;
task_cputime(tsk, &utime, &stime);
times->utime += utime;
times->stime += stime;
times->sum_exec_runtime += task_sched_runtime(t);
} while_each_thread(tsk, t);
out:
rcu_read_unlock();
}
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
/*
* Account a tick to a process and cpustat
@ -382,11 +383,12 @@ static void irqtime_account_idle_ticks(int ticks)
irqtime_account_process_tick(current, 0, rq);
}
#else /* CONFIG_IRQ_TIME_ACCOUNTING */
static void irqtime_account_idle_ticks(int ticks) {}
static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
static inline void irqtime_account_idle_ticks(int ticks) {}
static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
struct rq *rq) {}
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Account a single tick of cpu time.
* @p: the process that the cpu time gets accounted to
@ -397,6 +399,9 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
if (vtime_accounting_enabled())
return;
if (sched_clock_irqtime) {
irqtime_account_process_tick(p, user_tick, rq);
return;
@ -438,8 +443,7 @@ void account_idle_ticks(unsigned long ticks)
account_idle_time(jiffies_to_cputime(ticks));
}
#endif
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
/*
* Use precise platform statistics if available:
@ -461,25 +465,20 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
*st = cputime.stime;
}
void vtime_account_system_irqsafe(struct task_struct *tsk)
{
unsigned long flags;
local_irq_save(flags);
vtime_account_system(tsk);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe);
#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
void vtime_task_switch(struct task_struct *prev)
{
if (!vtime_accounting_enabled())
return;
if (is_idle_task(prev))
vtime_account_idle(prev);
else
vtime_account_system(prev);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
vtime_account_user(prev);
#endif
arch_vtime_task_switch(prev);
}
#endif
@ -493,27 +492,40 @@ void vtime_task_switch(struct task_struct *prev)
* vtime_account().
*/
#ifndef __ARCH_HAS_VTIME_ACCOUNT
void vtime_account(struct task_struct *tsk)
void vtime_account_irq_enter(struct task_struct *tsk)
{
if (in_interrupt() || !is_idle_task(tsk))
vtime_account_system(tsk);
else
vtime_account_idle(tsk);
if (!vtime_accounting_enabled())
return;
if (!in_interrupt()) {
/*
* If we interrupted user, context_tracking_in_user()
* is 1 because the context tracking don't hook
* on irq entry/exit. This way we know if
* we need to flush user time on kernel entry.
*/
if (context_tracking_in_user()) {
vtime_account_user(tsk);
return;
}
if (is_idle_task(tsk)) {
vtime_account_idle(tsk);
return;
}
}
vtime_account_system(tsk);
}
EXPORT_SYMBOL_GPL(vtime_account);
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
#else
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
#ifndef nsecs_to_cputime
# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
#endif
static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total)
{
u64 temp = (__force u64) rtime;
temp *= (__force u64) utime;
temp *= (__force u64) stime;
if (sizeof(cputime_t) == 4)
temp = div_u64(temp, (__force u32) total);
@ -531,10 +543,10 @@ static void cputime_adjust(struct task_cputime *curr,
struct cputime *prev,
cputime_t *ut, cputime_t *st)
{
cputime_t rtime, utime, total;
cputime_t rtime, stime, total;
utime = curr->utime;
total = utime + curr->stime;
stime = curr->stime;
total = stime + curr->utime;
/*
* Tick based cputime accounting depend on random scheduling
@ -549,17 +561,17 @@ static void cputime_adjust(struct task_cputime *curr,
rtime = nsecs_to_cputime(curr->sum_exec_runtime);
if (total)
utime = scale_utime(utime, rtime, total);
stime = scale_stime(stime, rtime, total);
else
utime = rtime;
stime = rtime;
/*
* If the tick based count grows faster than the scheduler one,
* the result of the scaling may go backward.
* Let's enforce monotonicity.
*/
prev->utime = max(prev->utime, utime);
prev->stime = max(prev->stime, rtime - prev->utime);
prev->stime = max(prev->stime, stime);
prev->utime = max(prev->utime, rtime - prev->stime);
*ut = prev->utime;
*st = prev->stime;
@ -568,11 +580,10 @@ static void cputime_adjust(struct task_cputime *curr,
void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
struct task_cputime cputime = {
.utime = p->utime,
.stime = p->stime,
.sum_exec_runtime = p->se.sum_exec_runtime,
};
task_cputime(p, &cputime.utime, &cputime.stime);
cputime_adjust(&cputime, &p->prev_cputime, ut, st);
}
@ -586,4 +597,221 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
thread_group_cputime(p, &cputime);
cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
}
#endif
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
static unsigned long long vtime_delta(struct task_struct *tsk)
{
unsigned long long clock;
clock = sched_clock();
if (clock < tsk->vtime_snap)
return 0;
return clock - tsk->vtime_snap;
}
static cputime_t get_vtime_delta(struct task_struct *tsk)
{
unsigned long long delta = vtime_delta(tsk);
WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
tsk->vtime_snap += delta;
/* CHECKME: always safe to convert nsecs to cputime? */
return nsecs_to_cputime(delta);
}
static void __vtime_account_system(struct task_struct *tsk)
{
cputime_t delta_cpu = get_vtime_delta(tsk);
account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
}
void vtime_account_system(struct task_struct *tsk)
{
if (!vtime_accounting_enabled())
return;
write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_account_irq_exit(struct task_struct *tsk)
{
if (!vtime_accounting_enabled())
return;
write_seqlock(&tsk->vtime_seqlock);
if (context_tracking_in_user())
tsk->vtime_snap_whence = VTIME_USER;
__vtime_account_system(tsk);
write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_account_user(struct task_struct *tsk)
{
cputime_t delta_cpu;
if (!vtime_accounting_enabled())
return;
delta_cpu = get_vtime_delta(tsk);
write_seqlock(&tsk->vtime_seqlock);
tsk->vtime_snap_whence = VTIME_SYS;
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_user_enter(struct task_struct *tsk)
{
if (!vtime_accounting_enabled())
return;
write_seqlock(&tsk->vtime_seqlock);
tsk->vtime_snap_whence = VTIME_USER;
__vtime_account_system(tsk);
write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_guest_enter(struct task_struct *tsk)
{
write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
current->flags |= PF_VCPU;
write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_guest_exit(struct task_struct *tsk)
{
write_seqlock(&tsk->vtime_seqlock);
__vtime_account_system(tsk);
current->flags &= ~PF_VCPU;
write_sequnlock(&tsk->vtime_seqlock);
}
void vtime_account_idle(struct task_struct *tsk)
{
cputime_t delta_cpu = get_vtime_delta(tsk);
account_idle_time(delta_cpu);
}
bool vtime_accounting_enabled(void)
{
return context_tracking_active();
}
void arch_vtime_task_switch(struct task_struct *prev)
{
write_seqlock(&prev->vtime_seqlock);
prev->vtime_snap_whence = VTIME_SLEEPING;
write_sequnlock(&prev->vtime_seqlock);
write_seqlock(&current->vtime_seqlock);
current->vtime_snap_whence = VTIME_SYS;
current->vtime_snap = sched_clock();
write_sequnlock(&current->vtime_seqlock);
}
void vtime_init_idle(struct task_struct *t)
{
unsigned long flags;
write_seqlock_irqsave(&t->vtime_seqlock, flags);
t->vtime_snap_whence = VTIME_SYS;
t->vtime_snap = sched_clock();
write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
}
cputime_t task_gtime(struct task_struct *t)
{
unsigned int seq;
cputime_t gtime;
do {
seq = read_seqbegin(&t->vtime_seqlock);
gtime = t->gtime;
if (t->flags & PF_VCPU)
gtime += vtime_delta(t);
} while (read_seqretry(&t->vtime_seqlock, seq));
return gtime;
}
/*
* Fetch cputime raw values from fields of task_struct and
* add up the pending nohz execution time since the last
* cputime snapshot.
*/
static void
fetch_task_cputime(struct task_struct *t,
cputime_t *u_dst, cputime_t *s_dst,
cputime_t *u_src, cputime_t *s_src,
cputime_t *udelta, cputime_t *sdelta)
{
unsigned int seq;
unsigned long long delta;
do {
*udelta = 0;
*sdelta = 0;
seq = read_seqbegin(&t->vtime_seqlock);
if (u_dst)
*u_dst = *u_src;
if (s_dst)
*s_dst = *s_src;
/* Task is sleeping, nothing to add */
if (t->vtime_snap_whence == VTIME_SLEEPING ||
is_idle_task(t))
continue;
delta = vtime_delta(t);
/*
* Task runs either in user or kernel space, add pending nohz time to
* the right place.
*/
if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
*udelta = delta;
} else {
if (t->vtime_snap_whence == VTIME_SYS)
*sdelta = delta;
}
} while (read_seqretry(&t->vtime_seqlock, seq));
}
void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
{
cputime_t udelta, sdelta;
fetch_task_cputime(t, utime, stime, &t->utime,
&t->stime, &udelta, &sdelta);
if (utime)
*utime += udelta;
if (stime)
*stime += sdelta;
}
void task_cputime_scaled(struct task_struct *t,
cputime_t *utimescaled, cputime_t *stimescaled)
{
cputime_t udelta, sdelta;
fetch_task_cputime(t, utimescaled, stimescaled,
&t->utimescaled, &t->stimescaled, &udelta, &sdelta);
if (utimescaled)
*utimescaled += cputime_to_scaled(udelta);
if (stimescaled)
*stimescaled += cputime_to_scaled(sdelta);
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */

View File

@ -1680,9 +1680,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
}
/* ensure we never gain time by being placed backwards. */
vruntime = max_vruntime(se->vruntime, vruntime);
se->vruntime = vruntime;
se->vruntime = max_vruntime(se->vruntime, vruntime);
}
static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
@ -3254,25 +3252,18 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
*/
static int select_idle_sibling(struct task_struct *p, int target)
{
int cpu = smp_processor_id();
int prev_cpu = task_cpu(p);
struct sched_domain *sd;
struct sched_group *sg;
int i;
int i = task_cpu(p);
if (idle_cpu(target))
return target;
/*
* If the task is going to be woken-up on this cpu and if it is
* already idle, then it is the right target.
* If the prevous cpu is cache affine and idle, don't be stupid.
*/
if (target == cpu && idle_cpu(cpu))
return cpu;
/*
* If the task is going to be woken-up on the cpu where it previously
* ran and if it is currently idle, then it the right target.
*/
if (target == prev_cpu && idle_cpu(prev_cpu))
return prev_cpu;
if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
return i;
/*
* Otherwise, iterate the domains and find an elegible idle cpu.
@ -3286,7 +3277,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
goto next;
for_each_cpu(i, sched_group_cpus(sg)) {
if (!idle_cpu(i))
if (i == target || !idle_cpu(i))
goto next;
}
@ -6101,7 +6092,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
* idle runqueue:
*/
if (rq->cfs.load.weight)
rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se));
return rr_interval;
}

View File

@ -7,6 +7,8 @@
#include <linux/slab.h>
int sched_rr_timeslice = RR_TIMESLICE;
static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
struct rt_bandwidth def_rt_bandwidth;
@ -925,8 +927,8 @@ static void update_curr_rt(struct rq *rq)
return;
delta_exec = rq->clock_task - curr->se.exec_start;
if (unlikely((s64)delta_exec < 0))
delta_exec = 0;
if (unlikely((s64)delta_exec <= 0))
return;
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
@ -1427,8 +1429,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
{
if (!task_running(rq, p) &&
(cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
(p->nr_cpus_allowed > 1))
cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
return 1;
return 0;
}
@ -1889,8 +1890,11 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
* we may need to handle the pulling of RT tasks
* now.
*/
if (p->on_rq && !rq->rt.rt_nr_running)
pull_rt_task(rq);
if (!p->on_rq || rq->rt.rt_nr_running)
return;
if (pull_rt_task(rq))
resched_task(rq->curr);
}
void init_sched_rt_class(void)
@ -1985,7 +1989,11 @@ static void watchdog(struct rq *rq, struct task_struct *p)
if (soft != RLIM_INFINITY) {
unsigned long next;
p->rt.timeout++;
if (p->rt.watchdog_stamp != jiffies) {
p->rt.timeout++;
p->rt.watchdog_stamp = jiffies;
}
next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
if (p->rt.timeout > next)
p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
@ -2010,7 +2018,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
if (--p->rt.time_slice)
return;
p->rt.time_slice = RR_TIMESLICE;
p->rt.time_slice = sched_rr_timeslice;
/*
* Requeue to the end of queue if we (and all of our ancestors) are the
@ -2041,7 +2049,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
* Time slice is 0 for SCHED_FIFO tasks
*/
if (task->policy == SCHED_RR)
return RR_TIMESLICE;
return sched_rr_timeslice;
else
return 0;
}

View File

@ -1,5 +1,7 @@
#include <linux/sched.h>
#include <linux/sched/sysctl.h>
#include <linux/sched/rt.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/stop_machine.h>

View File

@ -1632,6 +1632,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
unsigned long flags;
struct sighand_struct *psig;
bool autoreap = false;
cputime_t utime, stime;
BUG_ON(sig == -1);
@ -1669,8 +1670,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
task_uid(tsk));
rcu_read_unlock();
info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime);
info.si_stime = cputime_to_clock_t(tsk->stime + tsk->signal->stime);
task_cputime(tsk, &utime, &stime);
info.si_utime = cputime_to_clock_t(utime + tsk->signal->utime);
info.si_stime = cputime_to_clock_t(stime + tsk->signal->stime);
info.si_status = tsk->exit_code & 0x7f;
if (tsk->exit_code & 0x80)
@ -1734,6 +1736,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
unsigned long flags;
struct task_struct *parent;
struct sighand_struct *sighand;
cputime_t utime, stime;
if (for_ptracer) {
parent = tsk->parent;
@ -1752,8 +1755,9 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
rcu_read_unlock();
info.si_utime = cputime_to_clock_t(tsk->utime);
info.si_stime = cputime_to_clock_t(tsk->stime);
task_cputime(tsk, &utime, &stime);
info.si_utime = cputime_to_clock_t(utime);
info.si_stime = cputime_to_clock_t(stime);
info.si_code = why;
switch (why) {

View File

@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void)
current->flags &= ~PF_MEMALLOC;
pending = local_softirq_pending();
vtime_account_irq_enter(current);
account_irq_enter_time(current);
__local_bh_disable((unsigned long)__builtin_return_address(0),
SOFTIRQ_OFFSET);
@ -272,7 +272,7 @@ restart:
lockdep_softirq_exit();
vtime_account_irq_exit(current);
account_irq_exit_time(current);
__local_bh_enable(SOFTIRQ_OFFSET);
tsk_restore_flags(current, old_flags, PF_MEMALLOC);
}
@ -341,7 +341,7 @@ static inline void invoke_softirq(void)
*/
void irq_exit(void)
{
vtime_account_irq_exit(current);
account_irq_exit_time(current);
trace_hardirq_exit();
sub_preempt_count(IRQ_EXIT_OFFSET);
if (!in_interrupt() && local_softirq_pending())

View File

@ -61,6 +61,7 @@
#include <linux/kmod.h>
#include <linux/capability.h>
#include <linux/binfmts.h>
#include <linux/sched/sysctl.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
@ -403,6 +404,13 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = sched_rt_handler,
},
{
.procname = "sched_rr_timeslice_ms",
.data = &sched_rr_timeslice,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = sched_rr_handler,
},
#ifdef CONFIG_SCHED_AUTOGROUP
{
.procname = "sched_autogroup_enabled",

View File

@ -632,8 +632,11 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
{
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
unsigned long ticks;
if (vtime_accounting_enabled())
return;
/*
* We stopped the tick in idle. Update process times would miss the
* time we slept as update_process_times does only a 1 tick

View File

@ -39,6 +39,7 @@
#include <linux/kallsyms.h>
#include <linux/irq_work.h>
#include <linux/sched.h>
#include <linux/sched/sysctl.h>
#include <linux/slab.h>
#include <asm/uaccess.h>

View File

@ -39,6 +39,7 @@
#include <linux/poll.h>
#include <linux/nmi.h>
#include <linux/fs.h>
#include <linux/sched/rt.h>
#include "trace.h"
#include "trace_output.h"

View File

@ -15,8 +15,8 @@
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/sched/rt.h>
#include <trace/events/sched.h>
#include "trace.h"
static struct trace_array *wakeup_trace;

View File

@ -32,6 +32,7 @@ void bacct_add_tsk(struct user_namespace *user_ns,
{
const struct cred *tcred;
struct timespec uptime, ts;
cputime_t utime, stime, utimescaled, stimescaled;
u64 ac_etime;
BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
@ -65,10 +66,15 @@ void bacct_add_tsk(struct user_namespace *user_ns,
stats->ac_ppid = pid_alive(tsk) ?
task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0;
rcu_read_unlock();
stats->ac_utime = cputime_to_usecs(tsk->utime);
stats->ac_stime = cputime_to_usecs(tsk->stime);
stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled);
stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled);
task_cputime(tsk, &utime, &stime);
stats->ac_utime = cputime_to_usecs(utime);
stats->ac_stime = cputime_to_usecs(stime);
task_cputime_scaled(tsk, &utimescaled, &stimescaled);
stats->ac_utimescaled = cputime_to_usecs(utimescaled);
stats->ac_stimescaled = cputime_to_usecs(stimescaled);
stats->ac_minflt = tsk->min_flt;
stats->ac_majflt = tsk->maj_flt;
@ -115,11 +121,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
#undef KB
#undef MB
/**
* acct_update_integrals - update mm integral fields in task_struct
* @tsk: task_struct for accounting
*/
void acct_update_integrals(struct task_struct *tsk)
static void __acct_update_integrals(struct task_struct *tsk,
cputime_t utime, cputime_t stime)
{
if (likely(tsk->mm)) {
cputime_t time, dtime;
@ -128,7 +131,7 @@ void acct_update_integrals(struct task_struct *tsk)
u64 delta;
local_irq_save(flags);
time = tsk->stime + tsk->utime;
time = stime + utime;
dtime = time - tsk->acct_timexpd;
jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
delta = value.tv_sec;
@ -144,6 +147,27 @@ void acct_update_integrals(struct task_struct *tsk)
}
}
/**
* acct_update_integrals - update mm integral fields in task_struct
* @tsk: task_struct for accounting
*/
void acct_update_integrals(struct task_struct *tsk)
{
cputime_t utime, stime;
task_cputime(tsk, &utime, &stime);
__acct_update_integrals(tsk, utime, stime);
}
/**
* acct_account_cputime - update mm integral after cputime update
* @tsk: task_struct for accounting
*/
void acct_account_cputime(struct task_struct *tsk)
{
__acct_update_integrals(tsk, tsk->utime, tsk->stime);
}
/**
* acct_clear_integrals - clear the mm integral fields in task_struct
* @tsk: task_struct whose accounting fields are cleared

View File

@ -23,6 +23,7 @@
#include <linux/module.h>
#include <linux/sysctl.h>
#include <linux/smpboot.h>
#include <linux/sched/rt.h>
#include <asm/irq_regs.h>
#include <linux/kvm_para.h>

View File

@ -32,6 +32,7 @@
#include <linux/khugepaged.h>
#include <linux/uprobes.h>
#include <linux/rbtree_augmented.h>
#include <linux/sched/sysctl.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>

View File

@ -19,6 +19,7 @@
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/mmu_notifier.h>
#include <linux/sched/sysctl.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>

View File

@ -29,6 +29,7 @@
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/audit.h>
#include <linux/sched/sysctl.h>
#include <asm/uaccess.h>
#include <asm/tlb.h>

View File

@ -35,6 +35,7 @@
#include <linux/buffer_head.h> /* __set_page_dirty_buffers */
#include <linux/pagevec.h>
#include <linux/timer.h>
#include <linux/sched/rt.h>
#include <trace/events/writeback.h>
/*

View File

@ -58,6 +58,7 @@
#include <linux/prefetch.h>
#include <linux/migrate.h>
#include <linux/page-debug-flags.h>
#include <linux/sched/rt.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>