Added ck2 patchset with BFS357
/kernel/sched_bfs.c
blob:6319f828298861b37d9c74097fe26e9aa7bb3947 -> blob:168e84a745778904e5a69cafe0c13396ba938602
--- kernel/sched_bfs.c
+++ kernel/sched_bfs.c
@@ -85,7 +85,7 @@
#define idleprio_task(p) unlikely((p)->policy == SCHED_IDLEPRIO)
#define iso_task(p) unlikely((p)->policy == SCHED_ISO)
#define iso_queue(rq) unlikely((rq)->rq_policy == SCHED_ISO)
-#define ISO_PERIOD ((5 * HZ * num_online_cpus()) + 1)
+#define ISO_PERIOD ((5 * HZ * grq.noc) + 1)
/*
* Convert user-nice values [ -20 ... 0 ... 19 ]
@@ -120,7 +120,7 @@
#define NS_TO_MS(TIME) ((TIME) >> 20)
#define NS_TO_US(TIME) ((TIME) >> 10)
-#define RESCHED_US (100) /* Reschedule if less than this many us left */
+#define RESCHED_US (100) /* Reschedule if less than this many μs left */
/*
* This is the time all tasks within the same priority round robin.
@@ -145,7 +145,7 @@ static int prio_ratios[PRIO_RANGE] __rea
* The quota handed out to tasks of all priority levels when refilling their
* time_slice.
*/
-static inline unsigned long timeslice(void)
+static inline int timeslice(void)
{
return MS_TO_US(rr_interval);
}
@@ -167,6 +167,7 @@ struct global_rq {
cpumask_t cpu_idle_map;
int idle_cpus;
#endif
+ int noc; /* num_online_cpus stored and updated when it changes */
u64 niffies; /* Nanosecond jiffies */
unsigned long last_jiffy; /* Last jiffy we updated niffies */
@@ -187,7 +188,6 @@ struct rq {
#ifdef CONFIG_NO_HZ
unsigned char in_nohz_recently;
#endif
- struct task_struct *last_task;
#endif
struct task_struct *curr, *idle;
@@ -210,6 +210,8 @@ struct rq {
#ifdef CONFIG_SMP
int cpu; /* cpu of this runqueue */
int online;
+ int scaling; /* This CPU is managed by a scaling CPU freq governor */
+ struct task_struct *sticky_task;
struct root_domain *rd;
struct sched_domain *sd;
@@ -226,7 +228,11 @@ struct rq {
#endif
u64 last_niffy; /* Last time this RQ updated grq.niffies */
#endif
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+ u64 prev_irq_time;
+#endif
u64 clock, old_clock, last_tick;
+ u64 clock_task;
int dither;
#ifdef CONFIG_SCHEDSTATS
@@ -398,9 +404,17 @@ static inline void update_clocks(struct
* when we're not updating niffies.
* Looking up task_rq must be done under grq.lock to be safe.
*/
+static u64 irq_time_cpu(int cpu);
+
static inline void update_rq_clock(struct rq *rq)
{
- rq->clock = sched_clock_cpu(cpu_of(rq));
+ int cpu = cpu_of(rq);
+ u64 irq_time;
+
+ rq->clock = sched_clock_cpu(cpu);
+ irq_time = irq_time_cpu(cpu);
+ if (rq->clock - irq_time > rq->clock_task)
+ rq->clock_task = rq->clock - irq_time;
}
static inline int task_running(struct task_struct *p)
@@ -743,26 +757,17 @@ static int suitable_idle_cpus(struct tas
static void resched_task(struct task_struct *p);
-/*
- * last_task stores the last non-idle task scheduled on the local rq for
- * cache warmth testing.
- */
-static inline void set_last_task(struct rq *rq, struct task_struct *p)
-{
- rq->last_task = p;
-}
-
-#define CPUIDLE_CACHE_BUSY (1)
-#define CPUIDLE_DIFF_CPU (2)
-#define CPUIDLE_THREAD_BUSY (4)
-#define CPUIDLE_DIFF_NODE (8)
+#define CPUIDLE_DIFF_THREAD (1)
+#define CPUIDLE_DIFF_CORE (2)
+#define CPUIDLE_CACHE_BUSY (4)
+#define CPUIDLE_DIFF_CPU (8)
+#define CPUIDLE_THREAD_BUSY (16)
+#define CPUIDLE_DIFF_NODE (32)
/*
* The best idle CPU is chosen according to the CPUIDLE ranking above where the
- * lowest value would give the most suitable CPU to schedule p onto next. We
- * iterate from the last CPU upwards instead of using for_each_cpu_mask so as
- * to be able to break out immediately if the last CPU is idle. The order works
- * out to be the following:
+ * lowest value would give the most suitable CPU to schedule p onto next. The
+ * order works out to be the following:
*
* Same core, idle or busy cache, idle threads
* Other core, same cache, idle or busy cache, idle threads.
@@ -774,96 +779,82 @@ static inline void set_last_task(struct
* Other node, other CPU, idle cache, idle threads.
* Other node, other CPU, busy cache, idle threads.
* Other node, other CPU, busy threads.
- *
- * If p was the last task running on this rq, then regardless of where
- * it has been running since then, it is cache warm on this rq.
*/
-static void resched_best_idle(struct task_struct *p)
+static void
+resched_best_mask(unsigned long best_cpu, struct rq *rq, cpumask_t *tmpmask)
{
- unsigned long cpu_tmp, best_cpu, best_ranking;
- cpumask_t tmpmask;
- struct rq *rq;
- int iterate;
+ unsigned long cpu_tmp, best_ranking;
- cpus_and(tmpmask, p->cpus_allowed, grq.cpu_idle_map);
- iterate = cpus_weight(tmpmask);
- best_cpu = task_cpu(p);
- /*
- * Start below the last CPU and work up with next_cpu as the last
- * CPU might not be idle or affinity might not allow it.
- */
- cpu_tmp = best_cpu - 1;
- rq = cpu_rq(best_cpu);
best_ranking = ~0UL;
- do {
+ for_each_cpu_mask(cpu_tmp, *tmpmask) {
unsigned long ranking;
struct rq *tmp_rq;
ranking = 0;
- cpu_tmp = next_cpu(cpu_tmp, tmpmask);
- if (cpu_tmp >= nr_cpu_ids) {
- cpu_tmp = -1;
- cpu_tmp = next_cpu(cpu_tmp, tmpmask);
- }
tmp_rq = cpu_rq(cpu_tmp);
- if (rq->cpu_locality[cpu_tmp]) {
- /* Check rq->last_task hasn't been dereferenced */
- if (rq->last_task && p != rq->last_task) {
#ifdef CONFIG_NUMA
- if (rq->cpu_locality[cpu_tmp] > 1)
- ranking |= CPUIDLE_DIFF_NODE;
+ if (rq->cpu_locality[cpu_tmp] > 3)
+ ranking |= CPUIDLE_DIFF_NODE;
+ else
#endif
- ranking |= CPUIDLE_DIFF_CPU;
- }
- }
+ if (rq->cpu_locality[cpu_tmp] > 2)
+ ranking |= CPUIDLE_DIFF_CPU;
#ifdef CONFIG_SCHED_MC
+ if (rq->cpu_locality[cpu_tmp] == 2)
+ ranking |= CPUIDLE_DIFF_CORE;
if (!(tmp_rq->cache_idle(cpu_tmp)))
ranking |= CPUIDLE_CACHE_BUSY;
#endif
#ifdef CONFIG_SCHED_SMT
+ if (rq->cpu_locality[cpu_tmp] == 1)
+ ranking |= CPUIDLE_DIFF_THREAD;
if (!(tmp_rq->siblings_idle(cpu_tmp)))
ranking |= CPUIDLE_THREAD_BUSY;
#endif
if (ranking < best_ranking) {
best_cpu = cpu_tmp;
- if (ranking <= 1)
+ if (ranking == 0)
break;
best_ranking = ranking;
}
- } while (--iterate > 0);
+ }
resched_task(cpu_rq(best_cpu)->curr);
}
+static void resched_best_idle(struct task_struct *p)
+{
+ cpumask_t tmpmask;
+
+ cpus_and(tmpmask, p->cpus_allowed, grq.cpu_idle_map);
+ resched_best_mask(task_cpu(p), task_rq(p), &tmpmask);
+}
+
static inline void resched_suitable_idle(struct task_struct *p)
{
if (suitable_idle_cpus(p))
resched_best_idle(p);
}
-
/*
- * The cpu cache locality difference between CPUs is used to determine how far
- * to offset the virtual deadline. "One" difference in locality means that one
- * timeslice difference is allowed longer for the cpu local tasks. This is
- * enough in the common case when tasks are up to 2* number of CPUs to keep
- * tasks within their shared cache CPUs only. CPUs on different nodes or not
- * even in this domain (NUMA) have "3" difference, allowing 4 times longer
- * deadlines before being taken onto another cpu, allowing for 2* the double
- * seen by separate CPUs above.
- * Simple summary: Virtual deadlines are equal on shared cache CPUs, double
- * on separate CPUs and quadruple in separate NUMA nodes.
+ * Flags to tell us whether this CPU is running a CPU frequency governor that
+ * has slowed its speed or not. No locking required as the very rare wrongly
+ * read value would be harmless.
*/
-static inline int
-cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
+void cpu_scaling(int cpu)
{
- /* Check rq->last_task hasn't been dereferenced */
- if (likely(rq->last_task)) {
- if (rq->last_task == p)
- return 0;
- }
- return rq->cpu_locality[cpu_of(task_rq)] * task_timeslice(p);
+ cpu_rq(cpu)->scaling = 1;
+}
+
+void cpu_nonscaling(int cpu)
+{
+ cpu_rq(cpu)->scaling = 0;
+}
+
+static inline int scaling_rq(struct rq *rq)
+{
+ return rq->scaling;
}
#else /* CONFIG_SMP */
static inline void inc_qnr(void)
@@ -896,16 +887,25 @@ static inline void resched_suitable_idle
{
}
-static inline int
-cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
+void cpu_scaling(int __unused)
{
- return 0;
}
-static inline void set_last_task(struct rq *rq, struct task_struct *p)
+void cpu_nonscaling(int __unused)
{
}
+
+/*
+ * Although CPUs can scale in UP, there is nowhere else for tasks to go so this
+ * always returns 0.
+ */
+static inline int scaling_rq(struct rq *rq)
+{
+ return 0;
+}
#endif /* CONFIG_SMP */
+EXPORT_SYMBOL_GPL(cpu_scaling);
+EXPORT_SYMBOL_GPL(cpu_nonscaling);
/*
* activate_idle_task - move idle task to the _front_ of runqueue.
@@ -1001,6 +1001,82 @@ void set_task_cpu(struct task_struct *p,
smp_wmb();
task_thread_info(p)->cpu = cpu;
}
+
+static inline void clear_sticky(struct task_struct *p)
+{
+ p->sticky = 0;
+}
+
+static inline int task_sticky(struct task_struct *p)
+{
+ return p->sticky;
+}
+
+/* Reschedule the best idle CPU that is not this one. */
+static void
+resched_closest_idle(struct rq *rq, unsigned long cpu, struct task_struct *p)
+{
+ cpumask_t tmpmask;
+
+ cpus_and(tmpmask, p->cpus_allowed, grq.cpu_idle_map);
+ cpu_clear(cpu, tmpmask);
+ if (cpus_empty(tmpmask))
+ return;
+ resched_best_mask(cpu, rq, &tmpmask);
+}
+
+/*
+ * We set the sticky flag on a task that is descheduled involuntarily meaning
+ * it is awaiting further CPU time. If the last sticky task is still sticky
+ * but unlucky enough to not be the next task scheduled, we unstick it and try
+ * to find it an idle CPU. Realtime tasks do not stick to minimise their
+ * latency at all times.
+ */
+static inline void
+swap_sticky(struct rq *rq, unsigned long cpu, struct task_struct *p)
+{
+ if (rq->sticky_task) {
+ if (rq->sticky_task == p) {
+ p->sticky = 1;
+ return;
+ }
+ if (rq->sticky_task->sticky) {
+ rq->sticky_task->sticky = 0;
+ resched_closest_idle(rq, cpu, rq->sticky_task);
+ }
+ }
+ if (!rt_task(p)) {
+ p->sticky = 1;
+ rq->sticky_task = p;
+ } else {
+ resched_closest_idle(rq, cpu, p);
+ rq->sticky_task = NULL;
+ }
+}
+
+static inline void unstick_task(struct rq *rq, struct task_struct *p)
+{
+ rq->sticky_task = NULL;
+ clear_sticky(p);
+}
+#else
+static inline void clear_sticky(struct task_struct *p)
+{
+}
+
+static inline int task_sticky(struct task_struct *p)
+{
+ return 0;
+}
+
+static inline void
+swap_sticky(struct rq *rq, unsigned long cpu, struct task_struct *p)
+{
+}
+
+static inline void unstick_task(struct rq *rq, struct task_struct *p)
+{
+}
#endif
/*
@@ -1011,6 +1087,7 @@ static inline void take_task(struct rq *
{
set_task_cpu(p, cpu_of(rq));
dequeue_task(p);
+ clear_sticky(p);
dec_qnr();
}
@@ -1348,7 +1425,7 @@ static inline int online_cpus(struct tas
*/
static inline int needs_other_cpu(struct task_struct *p, int cpu)
{
- if (unlikely(!cpu_isset(cpu, p->cpus_allowed) && online_cpus(p)))
+ if (unlikely(!cpu_isset(cpu, p->cpus_allowed)))
return 1;
return 0;
}
@@ -1365,25 +1442,31 @@ static void try_preempt(struct task_stru
int highest_prio;
cpumask_t tmp;
- /* IDLEPRIO tasks never preempt anything */
- if (p->policy == SCHED_IDLEPRIO)
- return;
+ /*
+ * We clear the sticky flag here because for a task to have called
+ * try_preempt with the sticky flag enabled means some complicated
+ * re-scheduling has occurred and we should ignore the sticky flag.
+ */
+ clear_sticky(p);
if (suitable_idle_cpus(p)) {
resched_best_idle(p);
return;
}
- if (online_cpus(p))
+ /* IDLEPRIO tasks never preempt anything */
+ if (p->policy == SCHED_IDLEPRIO)
+ return;
+
+ if (likely(online_cpus(p)))
cpus_and(tmp, cpu_online_map, p->cpus_allowed);
else
- (cpumask_copy(&tmp, &cpu_online_map));
+ return;
latest_deadline = 0;
highest_prio = -1;
for_each_cpu_mask(cpu, tmp) {
- u64 offset_deadline;
struct rq *rq;
int rq_prio;
@@ -1392,12 +1475,9 @@ static void try_preempt(struct task_stru
if (rq_prio < highest_prio)
continue;
- offset_deadline = rq->rq_deadline -
- cache_distance(this_rq, rq, p);
-
- if (rq_prio > highest_prio || (rq_prio == highest_prio &&
- deadline_after(offset_deadline, latest_deadline))) {
- latest_deadline = offset_deadline;
+ if (rq_prio > highest_prio ||
+ deadline_after(rq->rq_deadline, latest_deadline)) {
+ latest_deadline = rq->rq_deadline;
highest_prio = rq_prio;
highest_prio_rq = rq;
}
@@ -1591,6 +1671,7 @@ void sched_fork(struct task_struct *p, i
#endif
p->oncpu = 0;
+ clear_sticky(p);
#ifdef CONFIG_PREEMPT
/* Want to start with kernel preemption disabled. */
@@ -1836,14 +1917,14 @@ context_switch(struct rq *rq, struct tas
*/
arch_start_context_switch(prev);
- if (unlikely(!mm)) {
+ if (!mm) {
next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next);
} else
switch_mm(oldmm, mm, next);
- if (unlikely(!prev->mm)) {
+ if (!prev->mm) {
prev->active_mm = NULL;
rq->prev_mm = oldmm;
}
@@ -1931,8 +2012,7 @@ unsigned long nr_active(void)
unsigned long this_cpu_load(void)
{
return this_rq()->rq_running +
- (queued_notrunning() + nr_uninterruptible()) /
- (1 + num_online_cpus());
+ ((queued_notrunning() + nr_uninterruptible()) / grq.noc);
}
/* Variables and functions for calc_load */
@@ -1985,6 +2065,81 @@ DEFINE_PER_CPU(struct kernel_stat, kstat
EXPORT_PER_CPU_SYMBOL(kstat);
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+
+/*
+ * There are no locks covering percpu hardirq/softirq time.
+ * They are only modified in account_system_vtime, on corresponding CPU
+ * with interrupts disabled. So, writes are safe.
+ * They are read and saved off onto struct rq in update_rq_clock().
+ * This may result in other CPU reading this CPU's irq time and can
+ * race with irq/account_system_vtime on this CPU. We would either get old
+ * or new value (or semi updated value on 32 bit) with a side effect of
+ * accounting a slice of irq time to wrong task when irq is in progress
+ * while we read rq->clock. That is a worthy compromise in place of having
+ * locks on each irq in account_system_time.
+ */
+static DEFINE_PER_CPU(u64, cpu_hardirq_time);
+static DEFINE_PER_CPU(u64, cpu_softirq_time);
+
+static DEFINE_PER_CPU(u64, irq_start_time);
+static int sched_clock_irqtime;
+
+void enable_sched_clock_irqtime(void)
+{
+ sched_clock_irqtime = 1;
+}
+
+void disable_sched_clock_irqtime(void)
+{
+ sched_clock_irqtime = 0;
+}
+
+static u64 irq_time_cpu(int cpu)
+{
+ if (!sched_clock_irqtime)
+ return 0;
+
+ return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
+}
+
+void account_system_vtime(struct task_struct *curr)
+{
+ unsigned long flags;
+ int cpu;
+ u64 now, delta;
+
+ if (!sched_clock_irqtime)
+ return;
+
+ local_irq_save(flags);
+
+ cpu = smp_processor_id();
+ now = sched_clock_cpu(cpu);
+ delta = now - per_cpu(irq_start_time, cpu);
+ per_cpu(irq_start_time, cpu) = now;
+ /*
+ * We do not account for softirq time from ksoftirqd here.
+ * We want to continue accounting softirq time to ksoftirqd thread
+ * in that case, so as not to confuse scheduler with a special task
+ * that do not consume any time, but still wants to run.
+ */
+ if (hardirq_count())
+ per_cpu(cpu_hardirq_time, cpu) += delta;
+ else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
+ per_cpu(cpu_softirq_time, cpu) += delta;
+
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(account_system_vtime);
+#else
+
+static u64 irq_time_cpu(int cpu)
+{
+ return 0;
+}
+#endif
+
/*
* On each tick, see what percentage of that tick was attributed to each
* component and add the percentage to the _pc values. Once a _pc value has
@@ -2029,9 +2184,13 @@ pc_system_time(struct rq *rq, struct tas
}
p->sched_time += ns;
- if (hardirq_count() - hardirq_offset)
+ if (hardirq_count() - hardirq_offset) {
rq->irq_pc += pc;
- else if (softirq_count()) {
+ if (rq->irq_pc >= 100) {
+ rq->irq_pc %= 100;
+ cpustat->irq = cputime64_add(cpustat->irq, tmp);
+ }
+ } else if (softirq_count()) {
rq->softirq_pc += pc;
if (rq->softirq_pc >= 100) {
rq->softirq_pc %= 100;
@@ -2416,7 +2575,7 @@ static void task_running_tick(struct rq
* Tasks that were scheduled in the first half of a tick are not
* allowed to run into the 2nd half of the next tick if they will
* run out of time slice in the interim. Otherwise, if they have
- * less than 100us of time slice left they will be rescheduled.
+ * less than RESCHED_US μs of time slice left they will be rescheduled.
*/
if (rq->dither) {
if (rq->rq_time_slice > HALF_JIFFY_US)
@@ -2539,9 +2698,14 @@ static inline u64 static_deadline_diff(i
return prio_deadline_diff(USER_PRIO(static_prio));
}
+static inline int longest_deadline_diff(void)
+{
+ return prio_deadline_diff(39);
+}
+
static inline int ms_longest_deadline_diff(void)
{
- return NS_TO_MS(prio_deadline_diff(39));
+ return NS_TO_MS(longest_deadline_diff());
}
/*
@@ -2611,7 +2775,19 @@ retry:
goto out_take;
}
- dl = p->deadline + cache_distance(task_rq(p), rq, p);
+ /*
+ * Soft affinity happens here by not scheduling a task with
+ * its sticky flag set that ran on a different CPU last when
+ * the CPU is scaling, or by greatly biasing against its
+ * deadline when not.
+ */
+ if (task_rq(p) != rq && task_sticky(p)) {
+ if (scaling_rq(rq))
+ continue;
+ else
+ dl = p->deadline + longest_deadline_diff();
+ } else
+ dl = p->deadline;
/*
* No rt tasks. Find the earliest deadline task. Now we're in
@@ -2689,7 +2865,7 @@ static inline void set_rq_task(struct rq
{
rq->rq_time_slice = p->time_slice;
rq->rq_deadline = p->deadline;
- rq->rq_last_ran = p->last_ran;
+ rq->rq_last_ran = p->last_ran = rq->clock;
rq->rq_policy = p->policy;
rq->rq_prio = p->prio;
if (p != rq->idle)
@@ -2768,14 +2944,8 @@ need_resched_nonpreemptible:
*/
grq_unlock_irq();
goto rerun_prev_unlocked;
- } else {
- /*
- * If prev got kicked off by a task that has to
- * run on this CPU for affinity reasons then
- * there may be an idle CPU it can go to.
- */
- resched_suitable_idle(prev);
- }
+ } else
+ swap_sticky(rq, cpu, prev);
}
return_task(prev, deactivate);
}
@@ -2790,17 +2960,24 @@ need_resched_nonpreemptible:
set_cpuidle_map(cpu);
} else {
next = earliest_deadline_task(rq, idle);
- prefetch(next);
- prefetch_stack(next);
- clear_cpuidle_map(cpu);
+ if (likely(next->prio != PRIO_LIMIT)) {
+ prefetch(next);
+ prefetch_stack(next);
+ clear_cpuidle_map(cpu);
+ } else
+ set_cpuidle_map(cpu);
}
if (likely(prev != next)) {
+ /*
+ * Don't stick tasks when a real time task is going to run as
+ * they may literally get stuck.
+ */
+ if (rt_task(next))
+ unstick_task(rq, prev);
sched_info_switch(prev, next);
perf_event_task_sched_out(prev, next, cpu);
- if (prev != idle)
- set_last_task(rq, prev);
set_rq_task(rq, next);
grq.nr_switches++;
prev->oncpu = 0;
@@ -3627,8 +3804,8 @@ recheck:
* SCHED_BATCH is 0.
*/
if (param->sched_priority < 0 ||
- (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
- (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
+ (p->mm && param->sched_priority > MAX_USER_RT_PRIO - 1) ||
+ (!p->mm && param->sched_priority > MAX_RT_PRIO - 1))
return -EINVAL;
if (is_rt_policy(policy) != (param->sched_priority != 0))
return -EINVAL;
@@ -4349,10 +4526,12 @@ void init_idle(struct task_struct *idle,
idle->prio = PRIO_LIMIT;
set_rq_task(rq, idle);
idle->cpus_allowed = cpumask_of_cpu(cpu);
+ /* Silence PROVE_RCU */
+ rcu_read_lock();
set_task_cpu(idle, cpu);
+ rcu_read_unlock();
rq->curr = rq->idle = idle;
idle->oncpu = 1;
- set_cpuidle_map(cpu);
grq_unlock_irqrestore(&flags);
/* Set the preempt count _outside_ the spinlocks! */
@@ -4579,6 +4758,30 @@ void move_task_off_dead_cpu(int dead_cpu
}
+/* Run through task list and find tasks affined to just the dead cpu, then
+ * allocate a new affinity */
+static void break_sole_affinity(int src_cpu)
+{
+ struct task_struct *p, *t;
+
+ do_each_thread(t, p) {
+ if (!online_cpus(p)) {
+ cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
+ /*
+ * Don't tell them about moving exiting tasks or
+ * kernel threads (both mm NULL), since they never
+ * leave kernel.
+ */
+ if (p->mm && printk_ratelimit()) {
+ printk(KERN_INFO "process %d (%s) no "
+ "longer affine to cpu %d\n",
+ task_pid_nr(p), p->comm, src_cpu);
+ }
+ }
+ clear_sticky(p);
+ } while_each_thread(t, p);
+}
+
/*
* Schedules idle task to be the next runnable task on current CPU.
* It does so by boosting its priority to highest possible.
@@ -4599,6 +4802,7 @@ void sched_idle_next(void)
* and interrupts disabled on the current cpu.
*/
grq_lock_irqsave(&flags);
+ break_sole_affinity(this_cpu);
__setscheduler(idle, rq, SCHED_FIFO, MAX_RT_PRIO - 1);
@@ -4836,6 +5040,7 @@ migration_call(struct notifier_block *nf
set_rq_online(rq);
}
+ grq.noc = num_online_cpus();
grq_unlock_irqrestore(&flags);
break;
@@ -4866,6 +5071,7 @@ migration_call(struct notifier_block *nf
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq);
}
+ grq.noc = num_online_cpus();
grq_unlock_irqrestore(&flags);
break;
#endif
@@ -6389,7 +6595,7 @@ static int cache_cpu_idle(unsigned long
void __init sched_init_smp(void)
{
struct sched_domain *sd;
- int cpu, cpus;
+ int cpu;
cpumask_var_t non_isolated_cpus;
@@ -6423,14 +6629,6 @@ void __init sched_init_smp(void)
BUG();
free_cpumask_var(non_isolated_cpus);
- /*
- * Assume that every added cpu gives us slightly less overall latency
- * allowing us to increase the base rr_interval, non-linearly and with
- * an upper bound.
- */
- cpus = num_online_cpus();
- rr_interval = rr_interval * (4 * cpus + 4) / (cpus + 6);
-
grq_lock_irq();
/*
* Set up the relative cache distance of each online cpu from each
@@ -6459,10 +6657,12 @@ void __init sched_init_smp(void)
cpumask_set_cpu(other_cpu, &rq->cache_siblings);
}
#endif
- if (sd->level <= SD_LV_MC)
- locality = 0;
- else if (sd->level <= SD_LV_NODE)
+ if (sd->level <= SD_LV_SIBLING)
locality = 1;
+ else if (sd->level <= SD_LV_MC)
+ locality = 2;
+ else if (sd->level <= SD_LV_NODE)
+ locality = 3;
else
continue;
@@ -6517,6 +6717,7 @@ void __init sched_init(void)
grq.last_jiffy = jiffies;
spin_lock_init(&grq.iso_lock);
grq.iso_ticks = grq.iso_refractory = 0;
+ grq.noc = 1;
#ifdef CONFIG_SMP
init_defrootdomain();
grq.qnr = grq.idle_cpus = 0;
@@ -6530,6 +6731,7 @@ void __init sched_init(void)
rq->iowait_pc = rq->idle_pc = 0;
rq->dither = 0;
#ifdef CONFIG_SMP
+ rq->sticky_task = NULL;
rq->last_niffy = 0;
rq->sd = NULL;
rq->rd = NULL;
@@ -6568,7 +6770,7 @@ void __init sched_init(void)
if (i == j)
rq->cpu_locality[j] = 0;
else
- rq->cpu_locality[j] = 3;
+ rq->cpu_locality[j] = 4;
}
}
#endif