git - ziggy471-frankenstein-kernel.git/blobdiff

blob:6319f828298861b37d9c74097fe26e9aa7bb3947 -> blob:168e84a745778904e5a69cafe0c13396ba938602

--- kernel/sched_bfs.c

+++ kernel/sched_bfs.c

@@ -85,7 +85,7 @@

#define idleprio_task(p) unlikely((p)->policy == SCHED_IDLEPRIO)

#define iso_task(p) unlikely((p)->policy == SCHED_ISO)

#define iso_queue(rq) unlikely((rq)->rq_policy == SCHED_ISO)

-#define ISO_PERIOD ((5 * HZ * num_online_cpus()) + 1)

+#define ISO_PERIOD ((5 * HZ * grq.noc) + 1)

* Convert user-nice values [ -20 ... 0 ... 19 ]

@@ -120,7 +120,7 @@

#define NS_TO_MS(TIME) ((TIME) >> 20)

#define NS_TO_US(TIME) ((TIME) >> 10)

-#define RESCHED_US (100) /* Reschedule if less than this many us left */

+#define RESCHED_US (100) /* Reschedule if less than this many μs left */

* This is the time all tasks within the same priority round robin.

@@ -145,7 +145,7 @@ static int prio_ratios[PRIO_RANGE] __rea

* The quota handed out to tasks of all priority levels when refilling their

* time_slice.

-static inline unsigned long timeslice(void)

+static inline int timeslice(void)

{

return MS_TO_US(rr_interval);

}

@@ -167,6 +167,7 @@ struct global_rq {

cpumask_t cpu_idle_map;

int idle_cpus;

#endif

+ int noc; /* num_online_cpus stored and updated when it changes */

u64 niffies; /* Nanosecond jiffies */

unsigned long last_jiffy; /* Last jiffy we updated niffies */

@@ -187,7 +188,6 @@ struct rq {

#ifdef CONFIG_NO_HZ

unsigned char in_nohz_recently;

#endif

- struct task_struct *last_task;

#endif

struct task_struct *curr, *idle;

@@ -210,6 +210,8 @@ struct rq {

#ifdef CONFIG_SMP

int cpu; /* cpu of this runqueue */

int online;

+ int scaling; /* This CPU is managed by a scaling CPU freq governor */

+ struct task_struct *sticky_task;

struct root_domain *rd;

struct sched_domain *sd;

@@ -226,7 +228,11 @@ struct rq {

#endif

u64 last_niffy; /* Last time this RQ updated grq.niffies */

#endif

+#ifdef CONFIG_IRQ_TIME_ACCOUNTING

+ u64 prev_irq_time;

+#endif

u64 clock, old_clock, last_tick;

+ u64 clock_task;

int dither;

#ifdef CONFIG_SCHEDSTATS

@@ -398,9 +404,17 @@ static inline void update_clocks(struct

* when we're not updating niffies.

* Looking up task_rq must be done under grq.lock to be safe.

+static u64 irq_time_cpu(int cpu);

static inline void update_rq_clock(struct rq *rq)

{

- rq->clock = sched_clock_cpu(cpu_of(rq));

+ int cpu = cpu_of(rq);

+ u64 irq_time;

+ rq->clock = sched_clock_cpu(cpu);

+ irq_time = irq_time_cpu(cpu);

+ if (rq->clock - irq_time > rq->clock_task)

+ rq->clock_task = rq->clock - irq_time;

}

static inline int task_running(struct task_struct *p)

@@ -743,26 +757,17 @@ static int suitable_idle_cpus(struct tas

static void resched_task(struct task_struct *p);

-/*

- * last_task stores the last non-idle task scheduled on the local rq for

- * cache warmth testing.

- */

-static inline void set_last_task(struct rq *rq, struct task_struct *p)

- rq->last_task = p;

-#define CPUIDLE_CACHE_BUSY (1)

-#define CPUIDLE_DIFF_CPU (2)

-#define CPUIDLE_THREAD_BUSY (4)

-#define CPUIDLE_DIFF_NODE (8)

+#define CPUIDLE_DIFF_THREAD (1)

+#define CPUIDLE_DIFF_CORE (2)

+#define CPUIDLE_CACHE_BUSY (4)

+#define CPUIDLE_DIFF_CPU (8)

+#define CPUIDLE_THREAD_BUSY (16)

+#define CPUIDLE_DIFF_NODE (32)

* The best idle CPU is chosen according to the CPUIDLE ranking above where the

- * lowest value would give the most suitable CPU to schedule p onto next. We

- * iterate from the last CPU upwards instead of using for_each_cpu_mask so as

- * to be able to break out immediately if the last CPU is idle. The order works

- * out to be the following:

+ * lowest value would give the most suitable CPU to schedule p onto next. The

+ * order works out to be the following:

* Same core, idle or busy cache, idle threads

* Other core, same cache, idle or busy cache, idle threads.

@@ -774,96 +779,82 @@ static inline void set_last_task(struct

* Other node, other CPU, idle cache, idle threads.

* Other node, other CPU, busy cache, idle threads.

* Other node, other CPU, busy threads.

- *

- * If p was the last task running on this rq, then regardless of where

- * it has been running since then, it is cache warm on this rq.

-static void resched_best_idle(struct task_struct *p)

+static void

+resched_best_mask(unsigned long best_cpu, struct rq *rq, cpumask_t *tmpmask)

{

- unsigned long cpu_tmp, best_cpu, best_ranking;

- cpumask_t tmpmask;

- struct rq *rq;

- int iterate;

+ unsigned long cpu_tmp, best_ranking;

- cpus_and(tmpmask, p->cpus_allowed, grq.cpu_idle_map);

- iterate = cpus_weight(tmpmask);

- best_cpu = task_cpu(p);

- /*

- * Start below the last CPU and work up with next_cpu as the last

- * CPU might not be idle or affinity might not allow it.

- */

- cpu_tmp = best_cpu - 1;

- rq = cpu_rq(best_cpu);

best_ranking = ~0UL;

- do {

+ for_each_cpu_mask(cpu_tmp, *tmpmask) {

unsigned long ranking;

struct rq *tmp_rq;

ranking = 0;

- cpu_tmp = next_cpu(cpu_tmp, tmpmask);

- if (cpu_tmp >= nr_cpu_ids) {

- cpu_tmp = -1;

- cpu_tmp = next_cpu(cpu_tmp, tmpmask);

- }

tmp_rq = cpu_rq(cpu_tmp);

- if (rq->cpu_locality[cpu_tmp]) {

- /* Check rq->last_task hasn't been dereferenced */

- if (rq->last_task && p != rq->last_task) {

#ifdef CONFIG_NUMA

- if (rq->cpu_locality[cpu_tmp] > 1)

- ranking |= CPUIDLE_DIFF_NODE;

+ if (rq->cpu_locality[cpu_tmp] > 3)

+ ranking |= CPUIDLE_DIFF_NODE;

+ else

#endif

- ranking |= CPUIDLE_DIFF_CPU;

- }

+ if (rq->cpu_locality[cpu_tmp] > 2)

+ ranking |= CPUIDLE_DIFF_CPU;

#ifdef CONFIG_SCHED_MC

+ if (rq->cpu_locality[cpu_tmp] == 2)

+ ranking |= CPUIDLE_DIFF_CORE;

if (!(tmp_rq->cache_idle(cpu_tmp)))

ranking |= CPUIDLE_CACHE_BUSY;

#endif

#ifdef CONFIG_SCHED_SMT

+ if (rq->cpu_locality[cpu_tmp] == 1)

+ ranking |= CPUIDLE_DIFF_THREAD;

if (!(tmp_rq->siblings_idle(cpu_tmp)))

ranking |= CPUIDLE_THREAD_BUSY;

#endif

if (ranking < best_ranking) {

best_cpu = cpu_tmp;

- if (ranking <= 1)

+ if (ranking == 0)

break;

best_ranking = ranking;

}

- } while (--iterate > 0);

+ }

resched_task(cpu_rq(best_cpu)->curr);

}

+static void resched_best_idle(struct task_struct *p)

+ cpumask_t tmpmask;

+ cpus_and(tmpmask, p->cpus_allowed, grq.cpu_idle_map);

+ resched_best_mask(task_cpu(p), task_rq(p), &tmpmask);

static inline void resched_suitable_idle(struct task_struct *p)

{

if (suitable_idle_cpus(p))

resched_best_idle(p);

}

- * The cpu cache locality difference between CPUs is used to determine how far

- * to offset the virtual deadline. "One" difference in locality means that one

- * timeslice difference is allowed longer for the cpu local tasks. This is

- * enough in the common case when tasks are up to 2* number of CPUs to keep

- * tasks within their shared cache CPUs only. CPUs on different nodes or not

- * even in this domain (NUMA) have "3" difference, allowing 4 times longer

- * deadlines before being taken onto another cpu, allowing for 2* the double

- * seen by separate CPUs above.

- * Simple summary: Virtual deadlines are equal on shared cache CPUs, double

- * on separate CPUs and quadruple in separate NUMA nodes.

+ * Flags to tell us whether this CPU is running a CPU frequency governor that

+ * has slowed its speed or not. No locking required as the very rare wrongly

+ * read value would be harmless.

-static inline int

-cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)

+void cpu_scaling(int cpu)

{

- /* Check rq->last_task hasn't been dereferenced */

- if (likely(rq->last_task)) {

- if (rq->last_task == p)

- return 0;

- }

- return rq->cpu_locality[cpu_of(task_rq)] * task_timeslice(p);

+ cpu_rq(cpu)->scaling = 1;

+void cpu_nonscaling(int cpu)

+ cpu_rq(cpu)->scaling = 0;

+static inline int scaling_rq(struct rq *rq)

+ return rq->scaling;

}

#else /* CONFIG_SMP */

static inline void inc_qnr(void)

@@ -896,16 +887,25 @@ static inline void resched_suitable_idle

{

}

-static inline int

-cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)

+void cpu_scaling(int __unused)

{

- return 0;

}

-static inline void set_last_task(struct rq *rq, struct task_struct *p)

+void cpu_nonscaling(int __unused)

{

}

+/*

+ * Although CPUs can scale in UP, there is nowhere else for tasks to go so this

+ * always returns 0.

+ */

+static inline int scaling_rq(struct rq *rq)

+ return 0;

#endif /* CONFIG_SMP */

+EXPORT_SYMBOL_GPL(cpu_scaling);

+EXPORT_SYMBOL_GPL(cpu_nonscaling);

* activate_idle_task - move idle task to the _front_ of runqueue.

@@ -1001,6 +1001,82 @@ void set_task_cpu(struct task_struct *p,

smp_wmb();

task_thread_info(p)->cpu = cpu;

}

+static inline void clear_sticky(struct task_struct *p)

+ p->sticky = 0;

+static inline int task_sticky(struct task_struct *p)

+ return p->sticky;

+/* Reschedule the best idle CPU that is not this one. */

+static void

+resched_closest_idle(struct rq *rq, unsigned long cpu, struct task_struct *p)

+ cpumask_t tmpmask;

+ cpus_and(tmpmask, p->cpus_allowed, grq.cpu_idle_map);

+ cpu_clear(cpu, tmpmask);

+ if (cpus_empty(tmpmask))

+ return;

+ resched_best_mask(cpu, rq, &tmpmask);

+/*

+ * We set the sticky flag on a task that is descheduled involuntarily meaning

+ * it is awaiting further CPU time. If the last sticky task is still sticky

+ * but unlucky enough to not be the next task scheduled, we unstick it and try

+ * to find it an idle CPU. Realtime tasks do not stick to minimise their

+ * latency at all times.

+ */

+static inline void

+swap_sticky(struct rq *rq, unsigned long cpu, struct task_struct *p)

+ if (rq->sticky_task) {

+ if (rq->sticky_task == p) {

+ p->sticky = 1;

+ return;

+ }

+ if (rq->sticky_task->sticky) {

+ rq->sticky_task->sticky = 0;

+ resched_closest_idle(rq, cpu, rq->sticky_task);

+ }

+ if (!rt_task(p)) {

+ p->sticky = 1;

+ rq->sticky_task = p;

+ } else {

+ resched_closest_idle(rq, cpu, p);

+ rq->sticky_task = NULL;

+ }

+static inline void unstick_task(struct rq *rq, struct task_struct *p)

+ rq->sticky_task = NULL;

+ clear_sticky(p);

+#else

+static inline void clear_sticky(struct task_struct *p)

+static inline int task_sticky(struct task_struct *p)

+ return 0;

+static inline void

+swap_sticky(struct rq *rq, unsigned long cpu, struct task_struct *p)

+static inline void unstick_task(struct rq *rq, struct task_struct *p)

#endif

@@ -1011,6 +1087,7 @@ static inline void take_task(struct rq *

{

set_task_cpu(p, cpu_of(rq));

dequeue_task(p);

+ clear_sticky(p);

dec_qnr();

}

@@ -1348,7 +1425,7 @@ static inline int online_cpus(struct tas

static inline int needs_other_cpu(struct task_struct *p, int cpu)

{

- if (unlikely(!cpu_isset(cpu, p->cpus_allowed) && online_cpus(p)))

+ if (unlikely(!cpu_isset(cpu, p->cpus_allowed)))

return 1;

return 0;

}

@@ -1365,25 +1442,31 @@ static void try_preempt(struct task_stru

int highest_prio;

cpumask_t tmp;

- /* IDLEPRIO tasks never preempt anything */

- if (p->policy == SCHED_IDLEPRIO)

- return;

+ /*

+ * We clear the sticky flag here because for a task to have called

+ * try_preempt with the sticky flag enabled means some complicated

+ * re-scheduling has occurred and we should ignore the sticky flag.

+ */

+ clear_sticky(p);

if (suitable_idle_cpus(p)) {

resched_best_idle(p);

return;

}

- if (online_cpus(p))

+ /* IDLEPRIO tasks never preempt anything */

+ if (p->policy == SCHED_IDLEPRIO)

+ return;

+ if (likely(online_cpus(p)))

cpus_and(tmp, cpu_online_map, p->cpus_allowed);

else

- (cpumask_copy(&tmp, &cpu_online_map));

+ return;

latest_deadline = 0;

highest_prio = -1;

for_each_cpu_mask(cpu, tmp) {

- u64 offset_deadline;

struct rq *rq;

int rq_prio;

@@ -1392,12 +1475,9 @@ static void try_preempt(struct task_stru

if (rq_prio < highest_prio)

continue;

- offset_deadline = rq->rq_deadline -

- cache_distance(this_rq, rq, p);

- if (rq_prio > highest_prio || (rq_prio == highest_prio &&

- deadline_after(offset_deadline, latest_deadline))) {

- latest_deadline = offset_deadline;

+ if (rq_prio > highest_prio ||

+ deadline_after(rq->rq_deadline, latest_deadline)) {

+ latest_deadline = rq->rq_deadline;

highest_prio = rq_prio;

highest_prio_rq = rq;

}

@@ -1591,6 +1671,7 @@ void sched_fork(struct task_struct *p, i

#endif

p->oncpu = 0;

+ clear_sticky(p);

#ifdef CONFIG_PREEMPT

/* Want to start with kernel preemption disabled. */

@@ -1836,14 +1917,14 @@ context_switch(struct rq *rq, struct tas

arch_start_context_switch(prev);

- if (unlikely(!mm)) {

+ if (!mm) {

next->active_mm = oldmm;

atomic_inc(&oldmm->mm_count);

enter_lazy_tlb(oldmm, next);

} else

switch_mm(oldmm, mm, next);

- if (unlikely(!prev->mm)) {

+ if (!prev->mm) {

prev->active_mm = NULL;

rq->prev_mm = oldmm;

}

@@ -1931,8 +2012,7 @@ unsigned long nr_active(void)

unsigned long this_cpu_load(void)

{

return this_rq()->rq_running +

- (queued_notrunning() + nr_uninterruptible()) /

- (1 + num_online_cpus());

+ ((queued_notrunning() + nr_uninterruptible()) / grq.noc);

}

/* Variables and functions for calc_load */

@@ -1985,6 +2065,81 @@ DEFINE_PER_CPU(struct kernel_stat, kstat

EXPORT_PER_CPU_SYMBOL(kstat);

+#ifdef CONFIG_IRQ_TIME_ACCOUNTING

+/*

+ * There are no locks covering percpu hardirq/softirq time.

+ * They are only modified in account_system_vtime, on corresponding CPU

+ * with interrupts disabled. So, writes are safe.

+ * They are read and saved off onto struct rq in update_rq_clock().

+ * This may result in other CPU reading this CPU's irq time and can

+ * race with irq/account_system_vtime on this CPU. We would either get old

+ * or new value (or semi updated value on 32 bit) with a side effect of

+ * accounting a slice of irq time to wrong task when irq is in progress

+ * while we read rq->clock. That is a worthy compromise in place of having

+ * locks on each irq in account_system_time.

+ */

+static DEFINE_PER_CPU(u64, cpu_hardirq_time);

+static DEFINE_PER_CPU(u64, cpu_softirq_time);

+static DEFINE_PER_CPU(u64, irq_start_time);

+static int sched_clock_irqtime;

+void enable_sched_clock_irqtime(void)

+ sched_clock_irqtime = 1;

+void disable_sched_clock_irqtime(void)

+ sched_clock_irqtime = 0;

+static u64 irq_time_cpu(int cpu)

+ if (!sched_clock_irqtime)

+ return 0;

+ return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);

+void account_system_vtime(struct task_struct *curr)

+ unsigned long flags;

+ int cpu;

+ u64 now, delta;

+ if (!sched_clock_irqtime)

+ return;

+ local_irq_save(flags);

+ cpu = smp_processor_id();

+ now = sched_clock_cpu(cpu);

+ delta = now - per_cpu(irq_start_time, cpu);

+ per_cpu(irq_start_time, cpu) = now;

+ /*

+ * We do not account for softirq time from ksoftirqd here.

+ * We want to continue accounting softirq time to ksoftirqd thread

+ * in that case, so as not to confuse scheduler with a special task

+ * that do not consume any time, but still wants to run.

+ */

+ if (hardirq_count())

+ per_cpu(cpu_hardirq_time, cpu) += delta;

+ else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))

+ per_cpu(cpu_softirq_time, cpu) += delta;

+ local_irq_restore(flags);

+EXPORT_SYMBOL_GPL(account_system_vtime);

+#else

+static u64 irq_time_cpu(int cpu)

+ return 0;

+#endif

* On each tick, see what percentage of that tick was attributed to each

* component and add the percentage to the _pc values. Once a _pc value has

@@ -2029,9 +2184,13 @@ pc_system_time(struct rq *rq, struct tas

}

p->sched_time += ns;

- if (hardirq_count() - hardirq_offset)

+ if (hardirq_count() - hardirq_offset) {

rq->irq_pc += pc;

- else if (softirq_count()) {

+ if (rq->irq_pc >= 100) {

+ rq->irq_pc %= 100;

+ cpustat->irq = cputime64_add(cpustat->irq, tmp);

+ }

+ } else if (softirq_count()) {

rq->softirq_pc += pc;

if (rq->softirq_pc >= 100) {

rq->softirq_pc %= 100;

@@ -2416,7 +2575,7 @@ static void task_running_tick(struct rq

* Tasks that were scheduled in the first half of a tick are not

* allowed to run into the 2nd half of the next tick if they will

* run out of time slice in the interim. Otherwise, if they have

- * less than 100us of time slice left they will be rescheduled.

+ * less than RESCHED_US μs of time slice left they will be rescheduled.

if (rq->dither) {

if (rq->rq_time_slice > HALF_JIFFY_US)

@@ -2539,9 +2698,14 @@ static inline u64 static_deadline_diff(i

return prio_deadline_diff(USER_PRIO(static_prio));

}

+static inline int longest_deadline_diff(void)

+ return prio_deadline_diff(39);

static inline int ms_longest_deadline_diff(void)

{

- return NS_TO_MS(prio_deadline_diff(39));

+ return NS_TO_MS(longest_deadline_diff());

}

@@ -2611,7 +2775,19 @@ retry:

goto out_take;

}

- dl = p->deadline + cache_distance(task_rq(p), rq, p);

+ /*

+ * Soft affinity happens here by not scheduling a task with

+ * its sticky flag set that ran on a different CPU last when

+ * the CPU is scaling, or by greatly biasing against its

+ * deadline when not.

+ */

+ if (task_rq(p) != rq && task_sticky(p)) {

+ if (scaling_rq(rq))

+ continue;

+ else

+ dl = p->deadline + longest_deadline_diff();

+ } else

+ dl = p->deadline;

* No rt tasks. Find the earliest deadline task. Now we're in

@@ -2689,7 +2865,7 @@ static inline void set_rq_task(struct rq

{

rq->rq_time_slice = p->time_slice;

rq->rq_deadline = p->deadline;

- rq->rq_last_ran = p->last_ran;

+ rq->rq_last_ran = p->last_ran = rq->clock;

rq->rq_policy = p->policy;

rq->rq_prio = p->prio;

if (p != rq->idle)

@@ -2768,14 +2944,8 @@ need_resched_nonpreemptible:

grq_unlock_irq();

goto rerun_prev_unlocked;

- } else {

- /*

- * If prev got kicked off by a task that has to

- * run on this CPU for affinity reasons then

- * there may be an idle CPU it can go to.

- */

- resched_suitable_idle(prev);

- }

+ } else

+ swap_sticky(rq, cpu, prev);

}

return_task(prev, deactivate);

}

@@ -2790,17 +2960,24 @@ need_resched_nonpreemptible:

set_cpuidle_map(cpu);

} else {

next = earliest_deadline_task(rq, idle);

- prefetch(next);

- prefetch_stack(next);

- clear_cpuidle_map(cpu);

+ if (likely(next->prio != PRIO_LIMIT)) {

+ prefetch(next);

+ prefetch_stack(next);

+ clear_cpuidle_map(cpu);

+ } else

+ set_cpuidle_map(cpu);

}

if (likely(prev != next)) {

+ /*

+ * Don't stick tasks when a real time task is going to run as

+ * they may literally get stuck.

+ */

+ if (rt_task(next))

+ unstick_task(rq, prev);

sched_info_switch(prev, next);

perf_event_task_sched_out(prev, next, cpu);

- if (prev != idle)

- set_last_task(rq, prev);

set_rq_task(rq, next);

grq.nr_switches++;

prev->oncpu = 0;

@@ -3627,8 +3804,8 @@ recheck:

* SCHED_BATCH is 0.

if (param->sched_priority < 0 ||

- (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||

- (!p->mm && param->sched_priority > MAX_RT_PRIO-1))

+ (p->mm && param->sched_priority > MAX_USER_RT_PRIO - 1) ||

+ (!p->mm && param->sched_priority > MAX_RT_PRIO - 1))

return -EINVAL;

if (is_rt_policy(policy) != (param->sched_priority != 0))

return -EINVAL;

@@ -4349,10 +4526,12 @@ void init_idle(struct task_struct *idle,

idle->prio = PRIO_LIMIT;

set_rq_task(rq, idle);

idle->cpus_allowed = cpumask_of_cpu(cpu);

+ /* Silence PROVE_RCU */

+ rcu_read_lock();

set_task_cpu(idle, cpu);

+ rcu_read_unlock();

rq->curr = rq->idle = idle;

idle->oncpu = 1;

- set_cpuidle_map(cpu);

grq_unlock_irqrestore(&flags);

/* Set the preempt count _outside_ the spinlocks! */

@@ -4579,6 +4758,30 @@ void move_task_off_dead_cpu(int dead_cpu

}

+/* Run through task list and find tasks affined to just the dead cpu, then

+ * allocate a new affinity */

+static void break_sole_affinity(int src_cpu)

+ struct task_struct *p, *t;

+ do_each_thread(t, p) {

+ if (!online_cpus(p)) {

+ cpumask_copy(&p->cpus_allowed, cpu_possible_mask);

+ /*

+ * Don't tell them about moving exiting tasks or

+ * kernel threads (both mm NULL), since they never

+ * leave kernel.

+ */

+ if (p->mm && printk_ratelimit()) {

+ printk(KERN_INFO "process %d (%s) no "

+ "longer affine to cpu %d\n",

+ task_pid_nr(p), p->comm, src_cpu);

+ }

+ clear_sticky(p);

+ } while_each_thread(t, p);

* Schedules idle task to be the next runnable task on current CPU.

* It does so by boosting its priority to highest possible.

@@ -4599,6 +4802,7 @@ void sched_idle_next(void)

* and interrupts disabled on the current cpu.

grq_lock_irqsave(&flags);

+ break_sole_affinity(this_cpu);

__setscheduler(idle, rq, SCHED_FIFO, MAX_RT_PRIO - 1);

@@ -4836,6 +5040,7 @@ migration_call(struct notifier_block *nf

set_rq_online(rq);

}

+ grq.noc = num_online_cpus();

grq_unlock_irqrestore(&flags);

break;

@@ -4866,6 +5071,7 @@ migration_call(struct notifier_block *nf

BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));

set_rq_offline(rq);

}

+ grq.noc = num_online_cpus();

grq_unlock_irqrestore(&flags);

break;

#endif

@@ -6389,7 +6595,7 @@ static int cache_cpu_idle(unsigned long

void __init sched_init_smp(void)

{

struct sched_domain *sd;

- int cpu, cpus;

+ int cpu;

cpumask_var_t non_isolated_cpus;

@@ -6423,14 +6629,6 @@ void __init sched_init_smp(void)

BUG();

free_cpumask_var(non_isolated_cpus);

- /*

- * Assume that every added cpu gives us slightly less overall latency

- * allowing us to increase the base rr_interval, non-linearly and with

- * an upper bound.

- */

- cpus = num_online_cpus();

- rr_interval = rr_interval * (4 * cpus + 4) / (cpus + 6);

grq_lock_irq();

* Set up the relative cache distance of each online cpu from each

@@ -6459,10 +6657,12 @@ void __init sched_init_smp(void)

cpumask_set_cpu(other_cpu, &rq->cache_siblings);

}

#endif

- if (sd->level <= SD_LV_MC)

- locality = 0;

- else if (sd->level <= SD_LV_NODE)

+ if (sd->level <= SD_LV_SIBLING)

locality = 1;

+ else if (sd->level <= SD_LV_MC)

+ locality = 2;

+ else if (sd->level <= SD_LV_NODE)

+ locality = 3;

else

continue;

@@ -6517,6 +6717,7 @@ void __init sched_init(void)

grq.last_jiffy = jiffies;

spin_lock_init(&grq.iso_lock);

grq.iso_ticks = grq.iso_refractory = 0;

+ grq.noc = 1;

#ifdef CONFIG_SMP

init_defrootdomain();

grq.qnr = grq.idle_cpus = 0;

@@ -6530,6 +6731,7 @@ void __init sched_init(void)

rq->iowait_pc = rq->idle_pc = 0;

rq->dither = 0;

#ifdef CONFIG_SMP

+ rq->sticky_task = NULL;

rq->last_niffy = 0;

rq->sd = NULL;

rq->rd = NULL;

@@ -6568,7 +6770,7 @@ void __init sched_init(void)

if (i == j)

rq->cpu_locality[j] = 0;

else

- rq->cpu_locality[j] = 3;

+ rq->cpu_locality[j] = 4;

}

#endif