--- 6319f828298861b37d9c74097fe26e9aa7bb3947 +++ 168e84a745778904e5a69cafe0c13396ba938602 @@ -85,7 +85,7 @@ #define idleprio_task(p) unlikely((p)->policy == SCHED_IDLEPRIO) #define iso_task(p) unlikely((p)->policy == SCHED_ISO) #define iso_queue(rq) unlikely((rq)->rq_policy == SCHED_ISO) -#define ISO_PERIOD ((5 * HZ * num_online_cpus()) + 1) +#define ISO_PERIOD ((5 * HZ * grq.noc) + 1) /* * Convert user-nice values [ -20 ... 0 ... 19 ] @@ -120,7 +120,7 @@ #define NS_TO_MS(TIME) ((TIME) >> 20) #define NS_TO_US(TIME) ((TIME) >> 10) -#define RESCHED_US (100) /* Reschedule if less than this many us left */ +#define RESCHED_US (100) /* Reschedule if less than this many μs left */ /* * This is the time all tasks within the same priority round robin. @@ -145,7 +145,7 @@ static int prio_ratios[PRIO_RANGE] __rea * The quota handed out to tasks of all priority levels when refilling their * time_slice. */ -static inline unsigned long timeslice(void) +static inline int timeslice(void) { return MS_TO_US(rr_interval); } @@ -167,6 +167,7 @@ struct global_rq { cpumask_t cpu_idle_map; int idle_cpus; #endif + int noc; /* num_online_cpus stored and updated when it changes */ u64 niffies; /* Nanosecond jiffies */ unsigned long last_jiffy; /* Last jiffy we updated niffies */ @@ -187,7 +188,6 @@ struct rq { #ifdef CONFIG_NO_HZ unsigned char in_nohz_recently; #endif - struct task_struct *last_task; #endif struct task_struct *curr, *idle; @@ -210,6 +210,8 @@ struct rq { #ifdef CONFIG_SMP int cpu; /* cpu of this runqueue */ int online; + int scaling; /* This CPU is managed by a scaling CPU freq governor */ + struct task_struct *sticky_task; struct root_domain *rd; struct sched_domain *sd; @@ -226,7 +228,11 @@ struct rq { #endif u64 last_niffy; /* Last time this RQ updated grq.niffies */ #endif +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + u64 prev_irq_time; +#endif u64 clock, old_clock, last_tick; + u64 clock_task; int dither; #ifdef CONFIG_SCHEDSTATS @@ -398,9 +404,17 @@ static inline void update_clocks(struct * when we're not updating niffies. * Looking up task_rq must be done under grq.lock to be safe. */ +static u64 irq_time_cpu(int cpu); + static inline void update_rq_clock(struct rq *rq) { - rq->clock = sched_clock_cpu(cpu_of(rq)); + int cpu = cpu_of(rq); + u64 irq_time; + + rq->clock = sched_clock_cpu(cpu); + irq_time = irq_time_cpu(cpu); + if (rq->clock - irq_time > rq->clock_task) + rq->clock_task = rq->clock - irq_time; } static inline int task_running(struct task_struct *p) @@ -743,26 +757,17 @@ static int suitable_idle_cpus(struct tas static void resched_task(struct task_struct *p); -/* - * last_task stores the last non-idle task scheduled on the local rq for - * cache warmth testing. - */ -static inline void set_last_task(struct rq *rq, struct task_struct *p) -{ - rq->last_task = p; -} - -#define CPUIDLE_CACHE_BUSY (1) -#define CPUIDLE_DIFF_CPU (2) -#define CPUIDLE_THREAD_BUSY (4) -#define CPUIDLE_DIFF_NODE (8) +#define CPUIDLE_DIFF_THREAD (1) +#define CPUIDLE_DIFF_CORE (2) +#define CPUIDLE_CACHE_BUSY (4) +#define CPUIDLE_DIFF_CPU (8) +#define CPUIDLE_THREAD_BUSY (16) +#define CPUIDLE_DIFF_NODE (32) /* * The best idle CPU is chosen according to the CPUIDLE ranking above where the - * lowest value would give the most suitable CPU to schedule p onto next. We - * iterate from the last CPU upwards instead of using for_each_cpu_mask so as - * to be able to break out immediately if the last CPU is idle. The order works - * out to be the following: + * lowest value would give the most suitable CPU to schedule p onto next. The + * order works out to be the following: * * Same core, idle or busy cache, idle threads * Other core, same cache, idle or busy cache, idle threads. @@ -774,96 +779,82 @@ static inline void set_last_task(struct * Other node, other CPU, idle cache, idle threads. * Other node, other CPU, busy cache, idle threads. * Other node, other CPU, busy threads. - * - * If p was the last task running on this rq, then regardless of where - * it has been running since then, it is cache warm on this rq. */ -static void resched_best_idle(struct task_struct *p) +static void +resched_best_mask(unsigned long best_cpu, struct rq *rq, cpumask_t *tmpmask) { - unsigned long cpu_tmp, best_cpu, best_ranking; - cpumask_t tmpmask; - struct rq *rq; - int iterate; + unsigned long cpu_tmp, best_ranking; - cpus_and(tmpmask, p->cpus_allowed, grq.cpu_idle_map); - iterate = cpus_weight(tmpmask); - best_cpu = task_cpu(p); - /* - * Start below the last CPU and work up with next_cpu as the last - * CPU might not be idle or affinity might not allow it. - */ - cpu_tmp = best_cpu - 1; - rq = cpu_rq(best_cpu); best_ranking = ~0UL; - do { + for_each_cpu_mask(cpu_tmp, *tmpmask) { unsigned long ranking; struct rq *tmp_rq; ranking = 0; - cpu_tmp = next_cpu(cpu_tmp, tmpmask); - if (cpu_tmp >= nr_cpu_ids) { - cpu_tmp = -1; - cpu_tmp = next_cpu(cpu_tmp, tmpmask); - } tmp_rq = cpu_rq(cpu_tmp); - if (rq->cpu_locality[cpu_tmp]) { - /* Check rq->last_task hasn't been dereferenced */ - if (rq->last_task && p != rq->last_task) { #ifdef CONFIG_NUMA - if (rq->cpu_locality[cpu_tmp] > 1) - ranking |= CPUIDLE_DIFF_NODE; + if (rq->cpu_locality[cpu_tmp] > 3) + ranking |= CPUIDLE_DIFF_NODE; + else #endif - ranking |= CPUIDLE_DIFF_CPU; - } - } + if (rq->cpu_locality[cpu_tmp] > 2) + ranking |= CPUIDLE_DIFF_CPU; #ifdef CONFIG_SCHED_MC + if (rq->cpu_locality[cpu_tmp] == 2) + ranking |= CPUIDLE_DIFF_CORE; if (!(tmp_rq->cache_idle(cpu_tmp))) ranking |= CPUIDLE_CACHE_BUSY; #endif #ifdef CONFIG_SCHED_SMT + if (rq->cpu_locality[cpu_tmp] == 1) + ranking |= CPUIDLE_DIFF_THREAD; if (!(tmp_rq->siblings_idle(cpu_tmp))) ranking |= CPUIDLE_THREAD_BUSY; #endif if (ranking < best_ranking) { best_cpu = cpu_tmp; - if (ranking <= 1) + if (ranking == 0) break; best_ranking = ranking; } - } while (--iterate > 0); + } resched_task(cpu_rq(best_cpu)->curr); } +static void resched_best_idle(struct task_struct *p) +{ + cpumask_t tmpmask; + + cpus_and(tmpmask, p->cpus_allowed, grq.cpu_idle_map); + resched_best_mask(task_cpu(p), task_rq(p), &tmpmask); +} + static inline void resched_suitable_idle(struct task_struct *p) { if (suitable_idle_cpus(p)) resched_best_idle(p); } - /* - * The cpu cache locality difference between CPUs is used to determine how far - * to offset the virtual deadline. "One" difference in locality means that one - * timeslice difference is allowed longer for the cpu local tasks. This is - * enough in the common case when tasks are up to 2* number of CPUs to keep - * tasks within their shared cache CPUs only. CPUs on different nodes or not - * even in this domain (NUMA) have "3" difference, allowing 4 times longer - * deadlines before being taken onto another cpu, allowing for 2* the double - * seen by separate CPUs above. - * Simple summary: Virtual deadlines are equal on shared cache CPUs, double - * on separate CPUs and quadruple in separate NUMA nodes. + * Flags to tell us whether this CPU is running a CPU frequency governor that + * has slowed its speed or not. No locking required as the very rare wrongly + * read value would be harmless. */ -static inline int -cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p) +void cpu_scaling(int cpu) { - /* Check rq->last_task hasn't been dereferenced */ - if (likely(rq->last_task)) { - if (rq->last_task == p) - return 0; - } - return rq->cpu_locality[cpu_of(task_rq)] * task_timeslice(p); + cpu_rq(cpu)->scaling = 1; +} + +void cpu_nonscaling(int cpu) +{ + cpu_rq(cpu)->scaling = 0; +} + +static inline int scaling_rq(struct rq *rq) +{ + return rq->scaling; } #else /* CONFIG_SMP */ static inline void inc_qnr(void) @@ -896,16 +887,25 @@ static inline void resched_suitable_idle { } -static inline int -cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p) +void cpu_scaling(int __unused) { - return 0; } -static inline void set_last_task(struct rq *rq, struct task_struct *p) +void cpu_nonscaling(int __unused) { } + +/* + * Although CPUs can scale in UP, there is nowhere else for tasks to go so this + * always returns 0. + */ +static inline int scaling_rq(struct rq *rq) +{ + return 0; +} #endif /* CONFIG_SMP */ +EXPORT_SYMBOL_GPL(cpu_scaling); +EXPORT_SYMBOL_GPL(cpu_nonscaling); /* * activate_idle_task - move idle task to the _front_ of runqueue. @@ -1001,6 +1001,82 @@ void set_task_cpu(struct task_struct *p, smp_wmb(); task_thread_info(p)->cpu = cpu; } + +static inline void clear_sticky(struct task_struct *p) +{ + p->sticky = 0; +} + +static inline int task_sticky(struct task_struct *p) +{ + return p->sticky; +} + +/* Reschedule the best idle CPU that is not this one. */ +static void +resched_closest_idle(struct rq *rq, unsigned long cpu, struct task_struct *p) +{ + cpumask_t tmpmask; + + cpus_and(tmpmask, p->cpus_allowed, grq.cpu_idle_map); + cpu_clear(cpu, tmpmask); + if (cpus_empty(tmpmask)) + return; + resched_best_mask(cpu, rq, &tmpmask); +} + +/* + * We set the sticky flag on a task that is descheduled involuntarily meaning + * it is awaiting further CPU time. If the last sticky task is still sticky + * but unlucky enough to not be the next task scheduled, we unstick it and try + * to find it an idle CPU. Realtime tasks do not stick to minimise their + * latency at all times. + */ +static inline void +swap_sticky(struct rq *rq, unsigned long cpu, struct task_struct *p) +{ + if (rq->sticky_task) { + if (rq->sticky_task == p) { + p->sticky = 1; + return; + } + if (rq->sticky_task->sticky) { + rq->sticky_task->sticky = 0; + resched_closest_idle(rq, cpu, rq->sticky_task); + } + } + if (!rt_task(p)) { + p->sticky = 1; + rq->sticky_task = p; + } else { + resched_closest_idle(rq, cpu, p); + rq->sticky_task = NULL; + } +} + +static inline void unstick_task(struct rq *rq, struct task_struct *p) +{ + rq->sticky_task = NULL; + clear_sticky(p); +} +#else +static inline void clear_sticky(struct task_struct *p) +{ +} + +static inline int task_sticky(struct task_struct *p) +{ + return 0; +} + +static inline void +swap_sticky(struct rq *rq, unsigned long cpu, struct task_struct *p) +{ +} + +static inline void unstick_task(struct rq *rq, struct task_struct *p) +{ +} #endif /* @@ -1011,6 +1087,7 @@ static inline void take_task(struct rq * { set_task_cpu(p, cpu_of(rq)); dequeue_task(p); + clear_sticky(p); dec_qnr(); } @@ -1348,7 +1425,7 @@ static inline int online_cpus(struct tas */ static inline int needs_other_cpu(struct task_struct *p, int cpu) { - if (unlikely(!cpu_isset(cpu, p->cpus_allowed) && online_cpus(p))) + if (unlikely(!cpu_isset(cpu, p->cpus_allowed))) return 1; return 0; } @@ -1365,25 +1442,31 @@ static void try_preempt(struct task_stru int highest_prio; cpumask_t tmp; - /* IDLEPRIO tasks never preempt anything */ - if (p->policy == SCHED_IDLEPRIO) - return; + /* + * We clear the sticky flag here because for a task to have called + * try_preempt with the sticky flag enabled means some complicated + * re-scheduling has occurred and we should ignore the sticky flag. + */ + clear_sticky(p); if (suitable_idle_cpus(p)) { resched_best_idle(p); return; } - if (online_cpus(p)) + /* IDLEPRIO tasks never preempt anything */ + if (p->policy == SCHED_IDLEPRIO) + return; + + if (likely(online_cpus(p))) cpus_and(tmp, cpu_online_map, p->cpus_allowed); else - (cpumask_copy(&tmp, &cpu_online_map)); + return; latest_deadline = 0; highest_prio = -1; for_each_cpu_mask(cpu, tmp) { - u64 offset_deadline; struct rq *rq; int rq_prio; @@ -1392,12 +1475,9 @@ static void try_preempt(struct task_stru if (rq_prio < highest_prio) continue; - offset_deadline = rq->rq_deadline - - cache_distance(this_rq, rq, p); - - if (rq_prio > highest_prio || (rq_prio == highest_prio && - deadline_after(offset_deadline, latest_deadline))) { - latest_deadline = offset_deadline; + if (rq_prio > highest_prio || + deadline_after(rq->rq_deadline, latest_deadline)) { + latest_deadline = rq->rq_deadline; highest_prio = rq_prio; highest_prio_rq = rq; } @@ -1591,6 +1671,7 @@ void sched_fork(struct task_struct *p, i #endif p->oncpu = 0; + clear_sticky(p); #ifdef CONFIG_PREEMPT /* Want to start with kernel preemption disabled. */ @@ -1836,14 +1917,14 @@ context_switch(struct rq *rq, struct tas */ arch_start_context_switch(prev); - if (unlikely(!mm)) { + if (!mm) { next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); enter_lazy_tlb(oldmm, next); } else switch_mm(oldmm, mm, next); - if (unlikely(!prev->mm)) { + if (!prev->mm) { prev->active_mm = NULL; rq->prev_mm = oldmm; } @@ -1931,8 +2012,7 @@ unsigned long nr_active(void) unsigned long this_cpu_load(void) { return this_rq()->rq_running + - (queued_notrunning() + nr_uninterruptible()) / - (1 + num_online_cpus()); + ((queued_notrunning() + nr_uninterruptible()) / grq.noc); } /* Variables and functions for calc_load */ @@ -1985,6 +2065,81 @@ DEFINE_PER_CPU(struct kernel_stat, kstat EXPORT_PER_CPU_SYMBOL(kstat); +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + +/* + * There are no locks covering percpu hardirq/softirq time. + * They are only modified in account_system_vtime, on corresponding CPU + * with interrupts disabled. So, writes are safe. + * They are read and saved off onto struct rq in update_rq_clock(). + * This may result in other CPU reading this CPU's irq time and can + * race with irq/account_system_vtime on this CPU. We would either get old + * or new value (or semi updated value on 32 bit) with a side effect of + * accounting a slice of irq time to wrong task when irq is in progress + * while we read rq->clock. That is a worthy compromise in place of having + * locks on each irq in account_system_time. + */ +static DEFINE_PER_CPU(u64, cpu_hardirq_time); +static DEFINE_PER_CPU(u64, cpu_softirq_time); + +static DEFINE_PER_CPU(u64, irq_start_time); +static int sched_clock_irqtime; + +void enable_sched_clock_irqtime(void) +{ + sched_clock_irqtime = 1; +} + +void disable_sched_clock_irqtime(void) +{ + sched_clock_irqtime = 0; +} + +static u64 irq_time_cpu(int cpu) +{ + if (!sched_clock_irqtime) + return 0; + + return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); +} + +void account_system_vtime(struct task_struct *curr) +{ + unsigned long flags; + int cpu; + u64 now, delta; + + if (!sched_clock_irqtime) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + now = sched_clock_cpu(cpu); + delta = now - per_cpu(irq_start_time, cpu); + per_cpu(irq_start_time, cpu) = now; + /* + * We do not account for softirq time from ksoftirqd here. + * We want to continue accounting softirq time to ksoftirqd thread + * in that case, so as not to confuse scheduler with a special task + * that do not consume any time, but still wants to run. + */ + if (hardirq_count()) + per_cpu(cpu_hardirq_time, cpu) += delta; + else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) + per_cpu(cpu_softirq_time, cpu) += delta; + + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(account_system_vtime); +#else + +static u64 irq_time_cpu(int cpu) +{ + return 0; +} +#endif + /* * On each tick, see what percentage of that tick was attributed to each * component and add the percentage to the _pc values. Once a _pc value has @@ -2029,9 +2184,13 @@ pc_system_time(struct rq *rq, struct tas } p->sched_time += ns; - if (hardirq_count() - hardirq_offset) + if (hardirq_count() - hardirq_offset) { rq->irq_pc += pc; - else if (softirq_count()) { + if (rq->irq_pc >= 100) { + rq->irq_pc %= 100; + cpustat->irq = cputime64_add(cpustat->irq, tmp); + } + } else if (softirq_count()) { rq->softirq_pc += pc; if (rq->softirq_pc >= 100) { rq->softirq_pc %= 100; @@ -2416,7 +2575,7 @@ static void task_running_tick(struct rq * Tasks that were scheduled in the first half of a tick are not * allowed to run into the 2nd half of the next tick if they will * run out of time slice in the interim. Otherwise, if they have - * less than 100us of time slice left they will be rescheduled. + * less than RESCHED_US μs of time slice left they will be rescheduled. */ if (rq->dither) { if (rq->rq_time_slice > HALF_JIFFY_US) @@ -2539,9 +2698,14 @@ static inline u64 static_deadline_diff(i return prio_deadline_diff(USER_PRIO(static_prio)); } +static inline int longest_deadline_diff(void) +{ + return prio_deadline_diff(39); +} + static inline int ms_longest_deadline_diff(void) { - return NS_TO_MS(prio_deadline_diff(39)); + return NS_TO_MS(longest_deadline_diff()); } /* @@ -2611,7 +2775,19 @@ retry: goto out_take; } - dl = p->deadline + cache_distance(task_rq(p), rq, p); + /* + * Soft affinity happens here by not scheduling a task with + * its sticky flag set that ran on a different CPU last when + * the CPU is scaling, or by greatly biasing against its + * deadline when not. + */ + if (task_rq(p) != rq && task_sticky(p)) { + if (scaling_rq(rq)) + continue; + else + dl = p->deadline + longest_deadline_diff(); + } else + dl = p->deadline; /* * No rt tasks. Find the earliest deadline task. Now we're in @@ -2689,7 +2865,7 @@ static inline void set_rq_task(struct rq { rq->rq_time_slice = p->time_slice; rq->rq_deadline = p->deadline; - rq->rq_last_ran = p->last_ran; + rq->rq_last_ran = p->last_ran = rq->clock; rq->rq_policy = p->policy; rq->rq_prio = p->prio; if (p != rq->idle) @@ -2768,14 +2944,8 @@ need_resched_nonpreemptible: */ grq_unlock_irq(); goto rerun_prev_unlocked; - } else { - /* - * If prev got kicked off by a task that has to - * run on this CPU for affinity reasons then - * there may be an idle CPU it can go to. - */ - resched_suitable_idle(prev); - } + } else + swap_sticky(rq, cpu, prev); } return_task(prev, deactivate); } @@ -2790,17 +2960,24 @@ need_resched_nonpreemptible: set_cpuidle_map(cpu); } else { next = earliest_deadline_task(rq, idle); - prefetch(next); - prefetch_stack(next); - clear_cpuidle_map(cpu); + if (likely(next->prio != PRIO_LIMIT)) { + prefetch(next); + prefetch_stack(next); + clear_cpuidle_map(cpu); + } else + set_cpuidle_map(cpu); } if (likely(prev != next)) { + /* + * Don't stick tasks when a real time task is going to run as + * they may literally get stuck. + */ + if (rt_task(next)) + unstick_task(rq, prev); sched_info_switch(prev, next); perf_event_task_sched_out(prev, next, cpu); - if (prev != idle) - set_last_task(rq, prev); set_rq_task(rq, next); grq.nr_switches++; prev->oncpu = 0; @@ -3627,8 +3804,8 @@ recheck: * SCHED_BATCH is 0. */ if (param->sched_priority < 0 || - (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || - (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) + (p->mm && param->sched_priority > MAX_USER_RT_PRIO - 1) || + (!p->mm && param->sched_priority > MAX_RT_PRIO - 1)) return -EINVAL; if (is_rt_policy(policy) != (param->sched_priority != 0)) return -EINVAL; @@ -4349,10 +4526,12 @@ void init_idle(struct task_struct *idle, idle->prio = PRIO_LIMIT; set_rq_task(rq, idle); idle->cpus_allowed = cpumask_of_cpu(cpu); + /* Silence PROVE_RCU */ + rcu_read_lock(); set_task_cpu(idle, cpu); + rcu_read_unlock(); rq->curr = rq->idle = idle; idle->oncpu = 1; - set_cpuidle_map(cpu); grq_unlock_irqrestore(&flags); /* Set the preempt count _outside_ the spinlocks! */ @@ -4579,6 +4758,30 @@ void move_task_off_dead_cpu(int dead_cpu } +/* Run through task list and find tasks affined to just the dead cpu, then + * allocate a new affinity */ +static void break_sole_affinity(int src_cpu) +{ + struct task_struct *p, *t; + + do_each_thread(t, p) { + if (!online_cpus(p)) { + cpumask_copy(&p->cpus_allowed, cpu_possible_mask); + /* + * Don't tell them about moving exiting tasks or + * kernel threads (both mm NULL), since they never + * leave kernel. + */ + if (p->mm && printk_ratelimit()) { + printk(KERN_INFO "process %d (%s) no " + "longer affine to cpu %d\n", + task_pid_nr(p), p->comm, src_cpu); + } + } + clear_sticky(p); + } while_each_thread(t, p); +} + /* * Schedules idle task to be the next runnable task on current CPU. * It does so by boosting its priority to highest possible. @@ -4599,6 +4802,7 @@ void sched_idle_next(void) * and interrupts disabled on the current cpu. */ grq_lock_irqsave(&flags); + break_sole_affinity(this_cpu); __setscheduler(idle, rq, SCHED_FIFO, MAX_RT_PRIO - 1); @@ -4836,6 +5040,7 @@ migration_call(struct notifier_block *nf set_rq_online(rq); } + grq.noc = num_online_cpus(); grq_unlock_irqrestore(&flags); break; @@ -4866,6 +5071,7 @@ migration_call(struct notifier_block *nf BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_offline(rq); } + grq.noc = num_online_cpus(); grq_unlock_irqrestore(&flags); break; #endif @@ -6389,7 +6595,7 @@ static int cache_cpu_idle(unsigned long void __init sched_init_smp(void) { struct sched_domain *sd; - int cpu, cpus; + int cpu; cpumask_var_t non_isolated_cpus; @@ -6423,14 +6629,6 @@ void __init sched_init_smp(void) BUG(); free_cpumask_var(non_isolated_cpus); - /* - * Assume that every added cpu gives us slightly less overall latency - * allowing us to increase the base rr_interval, non-linearly and with - * an upper bound. - */ - cpus = num_online_cpus(); - rr_interval = rr_interval * (4 * cpus + 4) / (cpus + 6); - grq_lock_irq(); /* * Set up the relative cache distance of each online cpu from each @@ -6459,10 +6657,12 @@ void __init sched_init_smp(void) cpumask_set_cpu(other_cpu, &rq->cache_siblings); } #endif - if (sd->level <= SD_LV_MC) - locality = 0; - else if (sd->level <= SD_LV_NODE) + if (sd->level <= SD_LV_SIBLING) locality = 1; + else if (sd->level <= SD_LV_MC) + locality = 2; + else if (sd->level <= SD_LV_NODE) + locality = 3; else continue; @@ -6517,6 +6717,7 @@ void __init sched_init(void) grq.last_jiffy = jiffies; spin_lock_init(&grq.iso_lock); grq.iso_ticks = grq.iso_refractory = 0; + grq.noc = 1; #ifdef CONFIG_SMP init_defrootdomain(); grq.qnr = grq.idle_cpus = 0; @@ -6530,6 +6731,7 @@ void __init sched_init(void) rq->iowait_pc = rq->idle_pc = 0; rq->dither = 0; #ifdef CONFIG_SMP + rq->sticky_task = NULL; rq->last_niffy = 0; rq->sd = NULL; rq->rd = NULL; @@ -6568,7 +6770,7 @@ void __init sched_init(void) if (i == j) rq->cpu_locality[j] = 0; else - rq->cpu_locality[j] = 3; + rq->cpu_locality[j] = 4; } } #endif