Patch 2.6.32.33 to 2.6.32.35
/kernel/sched.c
blob:c10b6c0ed83bcd755a5640ca9caf33d10590d59b -> blob:95297f2c284ebeed72c63256397c783efbc289d4
--- kernel/sched.c
+++ kernel/sched.c
@@ -237,7 +237,7 @@ static void destroy_rt_bandwidth(struct
*/
static DEFINE_MUTEX(sched_domains_mutex);
-#ifdef CONFIG_CGROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
#include <linux/cgroup.h>
@@ -247,7 +247,13 @@ static LIST_HEAD(task_groups);
/* task group related information */
struct task_group {
+#ifdef CONFIG_CGROUP_SCHED
struct cgroup_subsys_state css;
+#endif
+
+#ifdef CONFIG_USER_SCHED
+ uid_t uid;
+#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
/* schedulable entities of this group on each cpu */
@@ -272,7 +278,35 @@ struct task_group {
struct list_head children;
};
+#ifdef CONFIG_USER_SCHED
+
+/* Helper function to pass uid information to create_sched_user() */
+void set_tg_uid(struct user_struct *user)
+{
+ user->tg->uid = user->uid;
+}
+
+/*
+ * Root task group.
+ * Every UID task group (including init_task_group aka UID-0) will
+ * be a child to this group.
+ */
+struct task_group root_task_group;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+/* Default task group's sched entity on each cpu */
+static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
+/* Default task group's cfs_rq on each cpu */
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq);
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
+#ifdef CONFIG_RT_GROUP_SCHED
+static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq);
+#endif /* CONFIG_RT_GROUP_SCHED */
+#else /* !CONFIG_USER_SCHED */
#define root_task_group init_task_group
+#endif /* CONFIG_USER_SCHED */
/* task_group_lock serializes add/remove of task groups and also changes to
* a task group's cpu shares.
@@ -288,7 +322,11 @@ static int root_task_group_empty(void)
}
#endif
+#ifdef CONFIG_USER_SCHED
+# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
+#else /* !CONFIG_USER_SCHED */
# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
+#endif /* CONFIG_USER_SCHED */
/*
* A weight of 0 or 1 can cause arithmetics problems.
@@ -314,7 +352,13 @@ static inline struct task_group *task_gr
{
struct task_group *tg;
-#ifdef CONFIG_CGROUP_SCHED
+#ifdef CONFIG_USER_SCHED
+ rcu_read_lock();
+ tg = __task_cred(p)->user->tg;
+ rcu_read_unlock();
+
+ return tg;
+#elif defined(CONFIG_CGROUP_SCHED)
struct cgroup_subsys_state *css;
css = task_subsys_state(p, cpu_cgroup_subsys_id);
@@ -350,7 +394,7 @@ static inline struct task_group *task_gr
return NULL;
}
-#endif /* CONFIG_CGROUP_SCHED */
+#endif /* CONFIG_GROUP_SCHED */
/* CFS-related fields in a runqueue */
struct cfs_rq {
@@ -534,7 +578,6 @@ struct rq {
struct mm_struct *prev_mm;
u64 clock;
- u64 clock_task;
atomic_t nr_iowait;
@@ -542,8 +585,6 @@ struct rq {
struct root_domain *rd;
struct sched_domain *sd;
- unsigned long cpu_power;
-
unsigned char idle_at_tick;
/* For active balancing */
int post_schedule;
@@ -564,10 +605,6 @@ struct rq {
u64 avg_idle;
#endif
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
- u64 prev_irq_time;
-#endif
-
/* calc_load related fields */
unsigned long calc_load_update;
long calc_load_active;
@@ -605,7 +642,11 @@ struct rq {
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
+static inline
+void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
+{
+ rq->curr->sched_class->check_preempt_curr(rq, p, flags);
+}
static inline int cpu_of(struct rq *rq)
{
@@ -632,20 +673,9 @@ static inline int cpu_of(struct rq *rq)
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() (&__raw_get_cpu_var(runqueues))
-static u64 irq_time_cpu(int cpu);
-static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time);
-
inline void update_rq_clock(struct rq *rq)
{
- int cpu = cpu_of(rq);
- u64 irq_time;
-
rq->clock = sched_clock_cpu(cpu_of(rq));
- irq_time = irq_time_cpu(cpu);
- if (rq->clock - irq_time > rq->clock_task)
- rq->clock_task = rq->clock - irq_time;
-
- sched_irq_time_avg_update(rq, irq_time);
}
/*
@@ -1278,10 +1308,6 @@ static void resched_task(struct task_str
static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
{
}
-
-static void sched_avg_update(struct rq *rq)
-{
-}
#endif /* CONFIG_SMP */
#if BITS_PER_LONG == 32
@@ -1531,9 +1557,24 @@ static unsigned long target_load(int cpu
return max(rq->cpu_load[type-1], total);
}
+static struct sched_group *group_of(int cpu)
+{
+ struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);
+
+ if (!sd)
+ return NULL;
+
+ return sd->groups;
+}
+
static unsigned long power_of(int cpu)
{
- return cpu_rq(cpu)->cpu_power;
+ struct sched_group *group = group_of(cpu);
+
+ if (!group)
+ return SCHED_LOAD_SCALE;
+
+ return group->cpu_power;
}
static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
@@ -1815,94 +1856,6 @@ static inline void __set_task_cpu(struct
#endif
}
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-
-/*
- * There are no locks covering percpu hardirq/softirq time.
- * They are only modified in account_system_vtime, on corresponding CPU
- * with interrupts disabled. So, writes are safe.
- * They are read and saved off onto struct rq in update_rq_clock().
- * This may result in other CPU reading this CPU's irq time and can
- * race with irq/account_system_vtime on this CPU. We would either get old
- * or new value (or semi updated value on 32 bit) with a side effect of
- * accounting a slice of irq time to wrong task when irq is in progress
- * while we read rq->clock. That is a worthy compromise in place of having
- * locks on each irq in account_system_time.
- */
-static DEFINE_PER_CPU(u64, cpu_hardirq_time);
-static DEFINE_PER_CPU(u64, cpu_softirq_time);
-
-static DEFINE_PER_CPU(u64, irq_start_time);
-static int sched_clock_irqtime;
-
-void enable_sched_clock_irqtime(void)
-{
- sched_clock_irqtime = 1;
-}
-
-void disable_sched_clock_irqtime(void)
-{
- sched_clock_irqtime = 0;
-}
-
-static u64 irq_time_cpu(int cpu)
-{
- if (!sched_clock_irqtime)
- return 0;
-
- return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
-}
-
-void account_system_vtime(struct task_struct *curr)
-{
- unsigned long flags;
- int cpu;
- u64 now, delta;
-
- if (!sched_clock_irqtime)
- return;
-
- local_irq_save(flags);
-
- cpu = smp_processor_id();
- now = sched_clock_cpu(cpu);
- delta = now - per_cpu(irq_start_time, cpu);
- per_cpu(irq_start_time, cpu) = now;
- /*
- * We do not account for softirq time from ksoftirqd here.
- * We want to continue accounting softirq time to ksoftirqd thread
- * in that case, so as not to confuse scheduler with a special task
- * that do not consume any time, but still wants to run.
- */
- if (hardirq_count())
- per_cpu(cpu_hardirq_time, cpu) += delta;
- else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
- per_cpu(cpu_softirq_time, cpu) += delta;
-
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(account_system_vtime);
-
-static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time)
-{
- if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) {
- u64 delta_irq = curr_irq_time - rq->prev_irq_time;
- rq->prev_irq_time = curr_irq_time;
- sched_rt_avg_update(rq, delta_irq);
- }
-}
-
-#else
-
-static u64 irq_time_cpu(int cpu)
-{
- return 0;
-}
-
-static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { }
-
-#endif
-
#include "sched_stats.h"
#include "sched_idletask.c"
#include "sched_fair.c"
@@ -1929,8 +1882,8 @@ static void dec_nr_running(struct rq *rq
static void set_load_weight(struct task_struct *p)
{
if (task_has_rt_policy(p)) {
- p->se.load.weight = 0;
- p->se.load.inv_weight = WMULT_CONST;
+ p->se.load.weight = prio_to_weight[0] * 2;
+ p->se.load.inv_weight = prio_to_wmult[0] >> 1;
return;
}
@@ -2111,9 +2064,6 @@ task_hot(struct task_struct *p, u64 now,
if (p->sched_class != &fair_sched_class)
return 0;
- if (unlikely(p->policy == SCHED_IDLE))
- return 0;
-
/*
* Buddy candidates are cache hot:
*/
@@ -2385,24 +2335,6 @@ void task_oncpu_function_call(struct tas
preempt_enable();
}
-static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
-{
- const struct sched_class *class;
-
- if (p->sched_class == rq->curr->sched_class) {
- rq->curr->sched_class->check_preempt_curr(rq, p, flags);
- } else {
- for_each_class(class) {
- if (class == rq->curr->sched_class)
- break;
- if (class == p->sched_class) {
- resched_task(rq->curr);
- break;
- }
- }
- }
-}
-
#ifdef CONFIG_SMP
/*
* ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
@@ -3224,8 +3156,6 @@ static void update_cpu_load(struct rq *t
this_rq->calc_load_update += LOAD_FREQ;
calc_load_account_active(this_rq);
}
-
- sched_avg_update(this_rq);
}
#ifdef CONFIG_SMP
@@ -3357,7 +3287,7 @@ int can_migrate_task(struct task_struct
* 2) too many balance attempts have failed.
*/
- tsk_cache_hot = task_hot(p, rq->clock_task, sd);
+ tsk_cache_hot = task_hot(p, rq->clock, sd);
if (!tsk_cache_hot ||
sd->nr_balance_failed > sd->cache_nice_tries) {
#ifdef CONFIG_SCHEDSTATS
@@ -3540,17 +3470,12 @@ struct sd_lb_stats {
unsigned long this_load;
unsigned long this_load_per_task;
unsigned long this_nr_running;
- unsigned long this_has_capacity;
- unsigned int this_idle_cpus;
/* Statistics of the busiest group */
- unsigned int busiest_idle_cpus;
unsigned long max_load;
unsigned long busiest_load_per_task;
unsigned long busiest_nr_running;
unsigned long busiest_group_capacity;
- unsigned long busiest_has_capacity;
- unsigned int busiest_group_weight;
int group_imb; /* Is there imbalance in this sd */
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -3572,10 +3497,7 @@ struct sg_lb_stats {
unsigned long sum_nr_running; /* Nr tasks running in the group */
unsigned long sum_weighted_load; /* Weighted load of group's tasks */
unsigned long group_capacity;
- unsigned long idle_cpus;
- unsigned long group_weight;
int group_imb; /* Is there an imbalance in the group ? */
- int group_has_capacity; /* Is there extra capacity in the group? */
};
/**
@@ -3785,14 +3707,10 @@ unsigned long scale_rt_power(int cpu)
struct rq *rq = cpu_rq(cpu);
u64 total, available;
- total = sched_avg_period() + (rq->clock - rq->age_stamp);
+ sched_avg_update(rq);
- if (unlikely(total < rq->rt_avg)) {
- /* Ensures that power won't end up being negative */
- available = 0;
- } else {
- available = total - rq->rt_avg;
- }
+ total = sched_avg_period() + (rq->clock - rq->age_stamp);
+ available = total - rq->rt_avg;
if (unlikely((s64)total < SCHED_LOAD_SCALE))
total = SCHED_LOAD_SCALE;
@@ -3830,7 +3748,6 @@ static void update_cpu_power(struct sche
if (!power)
power = 1;
- cpu_rq(cpu)->cpu_power = power;
sdg->cpu_power = power;
}
@@ -3875,7 +3792,7 @@ static inline void update_sg_lb_stats(st
int local_group, const struct cpumask *cpus,
int *balance, struct sg_lb_stats *sgs)
{
- unsigned long load, max_cpu_load, min_cpu_load, max_nr_running;
+ unsigned long load, max_cpu_load, min_cpu_load;
int i;
unsigned int balance_cpu = -1, first_idle_cpu = 0;
unsigned long avg_load_per_task = 0;
@@ -3889,7 +3806,6 @@ static inline void update_sg_lb_stats(st
/* Tally up the load of all CPUs in the group */
max_cpu_load = 0;
min_cpu_load = ~0UL;
- max_nr_running = 0;
for_each_cpu_and(i, sched_group_cpus(group), cpus) {
struct rq *rq = cpu_rq(i);
@@ -3907,10 +3823,8 @@ static inline void update_sg_lb_stats(st
load = target_load(i, load_idx);
} else {
load = source_load(i, load_idx);
- if (load > max_cpu_load) {
+ if (load > max_cpu_load)
max_cpu_load = load;
- max_nr_running = rq->nr_running;
- }
if (min_cpu_load > load)
min_cpu_load = load;
}
@@ -3918,8 +3832,7 @@ static inline void update_sg_lb_stats(st
sgs->group_load += load;
sgs->sum_nr_running += rq->nr_running;
sgs->sum_weighted_load += weighted_cpuload(i);
- if (idle_cpu(i))
- sgs->idle_cpus++;
+
}
/*
@@ -3949,14 +3862,11 @@ static inline void update_sg_lb_stats(st
if (sgs->sum_nr_running)
avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
- if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task && max_nr_running > 1)
+ if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
sgs->group_imb = 1;
- sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
- sgs->group_weight = group->group_weight;
-
- if (sgs->group_capacity > sgs->sum_nr_running)
- sgs->group_has_capacity = 1;
+ sgs->group_capacity =
+ DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
}
/**
@@ -4003,14 +3913,9 @@ static inline void update_sd_lb_stats(st
/*
* In case the child domain prefers tasks go to siblings
* first, lower the group capacity to one so that we'll try
- * and move all the excess tasks away. We lower the capacity
- * of a group only if the local group has the capacity to fit
- * these excess tasks, i.e. nr_running < group_capacity. The
- * extra check prevents the case where you always pull from the
- * heaviest group when it is already under-utilized (possible
- * with a large weight task outweighs the tasks on the system).
+ * and move all the excess tasks away.
*/
- if (prefer_sibling && !local_group && sds->this_has_capacity)
+ if (prefer_sibling)
sgs.group_capacity = min(sgs.group_capacity, 1UL);
if (local_group) {
@@ -4018,19 +3923,14 @@ static inline void update_sd_lb_stats(st
sds->this = group;
sds->this_nr_running = sgs.sum_nr_running;
sds->this_load_per_task = sgs.sum_weighted_load;
- sds->this_has_capacity = sgs.group_has_capacity;
- sds->this_idle_cpus = sgs.idle_cpus;
} else if (sgs.avg_load > sds->max_load &&
(sgs.sum_nr_running > sgs.group_capacity ||
sgs.group_imb)) {
sds->max_load = sgs.avg_load;
sds->busiest = group;
sds->busiest_nr_running = sgs.sum_nr_running;
- sds->busiest_idle_cpus = sgs.idle_cpus;
sds->busiest_group_capacity = sgs.group_capacity;
- sds->busiest_group_weight = sgs.group_weight;
sds->busiest_load_per_task = sgs.sum_weighted_load;
- sds->busiest_has_capacity = sgs.group_has_capacity;
sds->group_imb = sgs.group_imb;
}
@@ -4176,7 +4076,6 @@ static inline void calculate_imbalance(s
return fix_small_imbalance(sds, this_cpu, imbalance);
}
-
/******* find_busiest_group() helpers end here *********************/
/**
@@ -4228,11 +4127,6 @@ find_busiest_group(struct sched_domain *
* 4) This group is more busy than the avg busieness at this
* sched_domain.
* 5) The imbalance is within the specified limit.
- *
- * Note: when doing newidle balance, if the local group has excess
- * capacity (i.e. nr_running < group_capacity) and the busiest group
- * does not have any capacity, we force a load balance to pull tasks
- * to the local group. In this case, we skip past checks 3, 4 and 5.
*/
if (balance && !(*balance))
goto ret;
@@ -4240,11 +4134,6 @@ find_busiest_group(struct sched_domain *
if (!sds.busiest || sds.busiest_nr_running == 0)
goto out_balanced;
- /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
- if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity &&
- !sds.busiest_has_capacity)
- goto force_balance;
-
if (sds.this_load >= sds.max_load)
goto out_balanced;
@@ -4253,28 +4142,9 @@ find_busiest_group(struct sched_domain *
if (sds.this_load >= sds.avg_load)
goto out_balanced;
- /*
- * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
- * And to check for busy balance use !idle_cpu instead of
- * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
- * even when they are idle.
- */
- if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
- if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
- goto out_balanced;
- } else {
- /*
- * This cpu is idle. If the busiest group load doesn't
- * have more tasks than the number of available cpu's and
- * there is no imbalance between this and busiest group
- * wrt to idle cpu's, it is balanced.
- */
- if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
- sds.busiest_nr_running <= sds.busiest_group_weight)
- goto out_balanced;
- }
+ if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
+ goto out_balanced;
-force_balance:
/* Looks like there is an imbalance. Compute it */
calculate_imbalance(&sds, this_cpu, imbalance);
return sds.busiest;
@@ -4430,14 +4300,7 @@ redo:
if (!ld_moved) {
schedstat_inc(sd, lb_failed[idle]);
- /*
- * Increment the failure counter only on periodic balance.
- * We do not want newidle balance, which can be very
- * frequent, pollute the failure counter causing
- * excessive cache_hot migrations and active balances.
- */
- if (idle != CPU_NEWLY_IDLE)
- sd->nr_balance_failed++;
+ sd->nr_balance_failed++;
if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {
@@ -5182,7 +5045,7 @@ static u64 do_task_delta_exec(struct tas
if (task_current(rq, p)) {
update_rq_clock(rq);
- ns = rq->clock_task - p->se.exec_start;
+ ns = rq->clock - p->se.exec_start;
if ((s64)ns < 0)
ns = 0;
}
@@ -5326,7 +5189,7 @@ void account_system_time(struct task_str
tmp = cputime_to_cputime64(cputime);
if (hardirq_count() - hardirq_offset)
cpustat->irq = cputime64_add(cpustat->irq, tmp);
- else if (in_serving_softirq())
+ else if (softirq_count())
cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
else
cpustat->system = cputime64_add(cpustat->system, tmp);
@@ -7270,19 +7133,7 @@ void __cpuinit init_idle(struct task_str
idle->se.exec_start = sched_clock();
cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
- /*
- * We're having a chicken and egg problem, even though we are
- * holding rq->lock, the cpu isn't yet set to this cpu so the
- * lockdep check in task_group() will fail.
- *
- * Similar case to sched_fork(). / Alternatively we could
- * use task_rq_lock() here and obtain the other rq->lock.
- *
- * Silence PROVE_RCU
- */
- rcu_read_lock();
__set_task_cpu(idle, cpu);
- rcu_read_unlock();
rq->curr = rq->idle = idle;
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
@@ -7300,7 +7151,7 @@ void __cpuinit init_idle(struct task_str
* The idle tasks have their own, simple scheduling class:
*/
idle->sched_class = &idle_sched_class;
- ftrace_graph_init_idle_task(idle, cpu);
+ ftrace_graph_init_task(idle);
}
/*
@@ -8789,8 +8640,6 @@ static void init_sched_groups_power(int
if (cpu != group_first_cpu(sd->groups))
return;
- sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
-
child = sd->child;
sd->groups->cpu_power = 0;
@@ -9674,6 +9523,9 @@ void __init sched_init(void)
#ifdef CONFIG_RT_GROUP_SCHED
alloc_size += 2 * nr_cpu_ids * sizeof(void **);
#endif
+#ifdef CONFIG_USER_SCHED
+ alloc_size *= 2;
+#endif
#ifdef CONFIG_CPUMASK_OFFSTACK
alloc_size += num_possible_cpus() * cpumask_size();
#endif
@@ -9691,6 +9543,13 @@ void __init sched_init(void)
init_task_group.cfs_rq = (struct cfs_rq **)ptr;
ptr += nr_cpu_ids * sizeof(void **);
+#ifdef CONFIG_USER_SCHED
+ root_task_group.se = (struct sched_entity **)ptr;
+ ptr += nr_cpu_ids * sizeof(void **);
+
+ root_task_group.cfs_rq = (struct cfs_rq **)ptr;
+ ptr += nr_cpu_ids * sizeof(void **);
+#endif /* CONFIG_USER_SCHED */
#endif /* CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_RT_GROUP_SCHED
init_task_group.rt_se = (struct sched_rt_entity **)ptr;
@@ -9699,6 +9558,13 @@ void __init sched_init(void)
init_task_group.rt_rq = (struct rt_rq **)ptr;
ptr += nr_cpu_ids * sizeof(void **);
+#ifdef CONFIG_USER_SCHED
+ root_task_group.rt_se = (struct sched_rt_entity **)ptr;
+ ptr += nr_cpu_ids * sizeof(void **);
+
+ root_task_group.rt_rq = (struct rt_rq **)ptr;
+ ptr += nr_cpu_ids * sizeof(void **);
+#endif /* CONFIG_USER_SCHED */
#endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_CPUMASK_OFFSTACK
for_each_possible_cpu(i) {
@@ -9718,14 +9584,24 @@ void __init sched_init(void)
#ifdef CONFIG_RT_GROUP_SCHED
init_rt_bandwidth(&init_task_group.rt_bandwidth,
global_rt_period(), global_rt_runtime());
+#ifdef CONFIG_USER_SCHED
+ init_rt_bandwidth(&root_task_group.rt_bandwidth,
+ global_rt_period(), RUNTIME_INF);
+#endif /* CONFIG_USER_SCHED */
#endif /* CONFIG_RT_GROUP_SCHED */
-#ifdef CONFIG_CGROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
list_add(&init_task_group.list, &task_groups);
INIT_LIST_HEAD(&init_task_group.children);
- autogroup_init(&init_task);
-#endif /* CONFIG_CGROUP_SCHED */
+#ifdef CONFIG_USER_SCHED
+ INIT_LIST_HEAD(&root_task_group.children);
+ init_task_group.parent = &root_task_group;
+ list_add(&init_task_group.siblings, &root_task_group.children);
+#endif /* CONFIG_USER_SCHED */
+
+ autogroup_init(&init_task);
+#endif /* CONFIG_GROUP_SCHED */
#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
@@ -9765,6 +9641,25 @@ void __init sched_init(void)
* directly in rq->cfs (i.e init_task_group->se[] = NULL).
*/
init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL);
+#elif defined CONFIG_USER_SCHED
+ root_task_group.shares = NICE_0_LOAD;
+ init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, 0, NULL);
+ /*
+ * In case of task-groups formed thr' the user id of tasks,
+ * init_task_group represents tasks belonging to root user.
+ * Hence it forms a sibling of all subsequent groups formed.
+ * In this case, init_task_group gets only a fraction of overall
+ * system cpu resource, based on the weight assigned to root
+ * user's cpu share (INIT_TASK_GROUP_LOAD). This is accomplished
+ * by letting tasks of init_task_group sit in a separate cfs_rq
+ * (init_tg_cfs_rq) and having one entity represent this group of
+ * tasks in rq->cfs (i.e init_task_group->se[] != NULL).
+ */
+ init_tg_cfs_entry(&init_task_group,
+ &per_cpu(init_tg_cfs_rq, i),
+ &per_cpu(init_sched_entity, i), i, 1,
+ root_task_group.se[i]);
+
#endif
#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -9787,7 +9682,6 @@ void __init sched_init(void)
#ifdef CONFIG_SMP
rq->sd = NULL;
rq->rd = NULL;
- rq->cpu_power = SCHED_LOAD_SCALE;
rq->post_schedule = 0;
rq->active_balance = 0;
rq->next_balance = jiffies;
@@ -10171,7 +10065,7 @@ static inline void unregister_rt_sched_g
}
#endif /* CONFIG_RT_GROUP_SCHED */
-#ifdef CONFIG_CGROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
static void free_sched_group(struct task_group *tg)
{
free_fair_sched_group(tg);
@@ -10262,12 +10156,12 @@ void __sched_move_task(struct task_struc
if (unlikely(running))
tsk->sched_class->put_prev_task(rq, tsk);
+ set_task_rq(tsk, task_cpu(tsk));
+
#ifdef CONFIG_FAIR_GROUP_SCHED
- if (tsk->sched_class->task_move_group)
- tsk->sched_class->task_move_group(tsk, on_rq);
- else
+ if (tsk->sched_class->moved_group)
+ tsk->sched_class->moved_group(tsk, on_rq);
#endif
- set_task_rq(tsk, task_cpu(tsk));
if (unlikely(running))
tsk->sched_class->set_curr_task(rq);
@@ -10285,7 +10179,7 @@ void sched_move_task(struct task_struct
task_rq_unlock(rq, &flags);
}
-#endif /* CONFIG_CGROUP_SCHED */
+#endif /* CONFIG_GROUP_SCHED */
#ifdef CONFIG_FAIR_GROUP_SCHED
static void __set_se_shares(struct sched_entity *se, unsigned long shares)
@@ -10427,6 +10321,13 @@ static int tg_schedulable(struct task_gr
runtime = d->rt_runtime;
}
+#ifdef CONFIG_USER_SCHED
+ if (tg == &root_task_group) {
+ period = global_rt_period();
+ runtime = global_rt_runtime();
+ }
+#endif
+
/*
* Cannot have more runtime than the period.
*/
@@ -11202,4 +11103,4 @@ void synchronize_sched_expedited(void)
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
#endif /* #else #ifndef CONFIG_SMP */
-#endif /* CONFIG_SCHED_BFS */
+#endif /* CONFIG_SCHED_BFS */