git - ziggy471-frankenstein-kernel.git/blobdiff

blob:c10b6c0ed83bcd755a5640ca9caf33d10590d59b -> blob:95297f2c284ebeed72c63256397c783efbc289d4

--- kernel/sched.c

+++ kernel/sched.c

@@ -237,7 +237,7 @@ static void destroy_rt_bandwidth(struct

static DEFINE_MUTEX(sched_domains_mutex);

-#ifdef CONFIG_CGROUP_SCHED

+#ifdef CONFIG_GROUP_SCHED

#include <linux/cgroup.h>

@@ -247,7 +247,13 @@ static LIST_HEAD(task_groups);

/* task group related information */

struct task_group {

+#ifdef CONFIG_CGROUP_SCHED

struct cgroup_subsys_state css;

+#endif

+#ifdef CONFIG_USER_SCHED

+ uid_t uid;

+#endif

#ifdef CONFIG_FAIR_GROUP_SCHED

/* schedulable entities of this group on each cpu */

@@ -272,7 +278,35 @@ struct task_group {

struct list_head children;

};

+#ifdef CONFIG_USER_SCHED

+/* Helper function to pass uid information to create_sched_user() */

+void set_tg_uid(struct user_struct *user)

+ user->tg->uid = user->uid;

+/*

+ * Root task group.

+ * Every UID task group (including init_task_group aka UID-0) will

+ * be a child to this group.

+ */

+struct task_group root_task_group;

+#ifdef CONFIG_FAIR_GROUP_SCHED

+/* Default task group's sched entity on each cpu */

+static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);

+/* Default task group's cfs_rq on each cpu */

+static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq);

+#endif /* CONFIG_FAIR_GROUP_SCHED */

+#ifdef CONFIG_RT_GROUP_SCHED

+static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);

+static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq);

+#endif /* CONFIG_RT_GROUP_SCHED */

+#else /* !CONFIG_USER_SCHED */

#define root_task_group init_task_group

+#endif /* CONFIG_USER_SCHED */

/* task_group_lock serializes add/remove of task groups and also changes to

* a task group's cpu shares.

@@ -288,7 +322,11 @@ static int root_task_group_empty(void)

}

#endif

+#ifdef CONFIG_USER_SCHED

+# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)

+#else /* !CONFIG_USER_SCHED */

# define INIT_TASK_GROUP_LOAD NICE_0_LOAD

+#endif /* CONFIG_USER_SCHED */

* A weight of 0 or 1 can cause arithmetics problems.

@@ -314,7 +352,13 @@ static inline struct task_group *task_gr

{

struct task_group *tg;

-#ifdef CONFIG_CGROUP_SCHED

+#ifdef CONFIG_USER_SCHED

+ rcu_read_lock();

+ tg = __task_cred(p)->user->tg;

+ rcu_read_unlock();

+ return tg;

+#elif defined(CONFIG_CGROUP_SCHED)

struct cgroup_subsys_state *css;

css = task_subsys_state(p, cpu_cgroup_subsys_id);

@@ -350,7 +394,7 @@ static inline struct task_group *task_gr

return NULL;

}

-#endif /* CONFIG_CGROUP_SCHED */

+#endif /* CONFIG_GROUP_SCHED */

/* CFS-related fields in a runqueue */

struct cfs_rq {

@@ -534,7 +578,6 @@ struct rq {

struct mm_struct *prev_mm;

u64 clock;

- u64 clock_task;

atomic_t nr_iowait;

@@ -542,8 +585,6 @@ struct rq {

struct root_domain *rd;

struct sched_domain *sd;

- unsigned long cpu_power;

unsigned char idle_at_tick;

/* For active balancing */

int post_schedule;

@@ -564,10 +605,6 @@ struct rq {

u64 avg_idle;

#endif

-#ifdef CONFIG_IRQ_TIME_ACCOUNTING

- u64 prev_irq_time;

-#endif

/* calc_load related fields */

unsigned long calc_load_update;

long calc_load_active;

@@ -605,7 +642,11 @@ struct rq {

static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);

-static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);

+static inline

+void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)

+ rq->curr->sched_class->check_preempt_curr(rq, p, flags);

static inline int cpu_of(struct rq *rq)

{

@@ -632,20 +673,9 @@ static inline int cpu_of(struct rq *rq)

#define cpu_curr(cpu) (cpu_rq(cpu)->curr)

#define raw_rq() (&__raw_get_cpu_var(runqueues))

-static u64 irq_time_cpu(int cpu);

-static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time);

inline void update_rq_clock(struct rq *rq)

{

- int cpu = cpu_of(rq);

- u64 irq_time;

rq->clock = sched_clock_cpu(cpu_of(rq));

- irq_time = irq_time_cpu(cpu);

- if (rq->clock - irq_time > rq->clock_task)

- rq->clock_task = rq->clock - irq_time;

- sched_irq_time_avg_update(rq, irq_time);

}

@@ -1278,10 +1308,6 @@ static void resched_task(struct task_str

static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)

{

}

-static void sched_avg_update(struct rq *rq)

#endif /* CONFIG_SMP */

#if BITS_PER_LONG == 32

@@ -1531,9 +1557,24 @@ static unsigned long target_load(int cpu

return max(rq->cpu_load[type-1], total);

}

+static struct sched_group *group_of(int cpu)

+ struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);

+ if (!sd)

+ return NULL;

+ return sd->groups;

static unsigned long power_of(int cpu)

{

- return cpu_rq(cpu)->cpu_power;

+ struct sched_group *group = group_of(cpu);

+ if (!group)

+ return SCHED_LOAD_SCALE;

+ return group->cpu_power;

}

static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);

@@ -1815,94 +1856,6 @@ static inline void __set_task_cpu(struct

#endif

}

-#ifdef CONFIG_IRQ_TIME_ACCOUNTING

-/*

- * There are no locks covering percpu hardirq/softirq time.

- * They are only modified in account_system_vtime, on corresponding CPU

- * with interrupts disabled. So, writes are safe.

- * They are read and saved off onto struct rq in update_rq_clock().

- * This may result in other CPU reading this CPU's irq time and can

- * race with irq/account_system_vtime on this CPU. We would either get old

- * or new value (or semi updated value on 32 bit) with a side effect of

- * accounting a slice of irq time to wrong task when irq is in progress

- * while we read rq->clock. That is a worthy compromise in place of having

- * locks on each irq in account_system_time.

- */

-static DEFINE_PER_CPU(u64, cpu_hardirq_time);

-static DEFINE_PER_CPU(u64, cpu_softirq_time);

-static DEFINE_PER_CPU(u64, irq_start_time);

-static int sched_clock_irqtime;

-void enable_sched_clock_irqtime(void)

- sched_clock_irqtime = 1;

-void disable_sched_clock_irqtime(void)

- sched_clock_irqtime = 0;

-static u64 irq_time_cpu(int cpu)

- if (!sched_clock_irqtime)

- return 0;

- return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);

-void account_system_vtime(struct task_struct *curr)

- unsigned long flags;

- int cpu;

- u64 now, delta;

- if (!sched_clock_irqtime)

- return;

- local_irq_save(flags);

- cpu = smp_processor_id();

- now = sched_clock_cpu(cpu);

- delta = now - per_cpu(irq_start_time, cpu);

- per_cpu(irq_start_time, cpu) = now;

- /*

- * We do not account for softirq time from ksoftirqd here.

- * We want to continue accounting softirq time to ksoftirqd thread

- * in that case, so as not to confuse scheduler with a special task

- * that do not consume any time, but still wants to run.

- */

- if (hardirq_count())

- per_cpu(cpu_hardirq_time, cpu) += delta;

- else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))

- per_cpu(cpu_softirq_time, cpu) += delta;

- local_irq_restore(flags);

-EXPORT_SYMBOL_GPL(account_system_vtime);

-static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time)

- if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) {

- u64 delta_irq = curr_irq_time - rq->prev_irq_time;

- rq->prev_irq_time = curr_irq_time;

- sched_rt_avg_update(rq, delta_irq);

- }

-#else

-static u64 irq_time_cpu(int cpu)

- return 0;

-static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { }

-#endif

#include "sched_stats.h"

#include "sched_idletask.c"

#include "sched_fair.c"

@@ -1929,8 +1882,8 @@ static void dec_nr_running(struct rq *rq

static void set_load_weight(struct task_struct *p)

{

if (task_has_rt_policy(p)) {

- p->se.load.weight = 0;

- p->se.load.inv_weight = WMULT_CONST;

+ p->se.load.weight = prio_to_weight[0] * 2;

+ p->se.load.inv_weight = prio_to_wmult[0] >> 1;

return;

}

@@ -2111,9 +2064,6 @@ task_hot(struct task_struct *p, u64 now,

if (p->sched_class != &fair_sched_class)

return 0;

- if (unlikely(p->policy == SCHED_IDLE))

- return 0;

* Buddy candidates are cache hot:

@@ -2385,24 +2335,6 @@ void task_oncpu_function_call(struct tas

preempt_enable();

}

-static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)

- const struct sched_class *class;

- if (p->sched_class == rq->curr->sched_class) {

- rq->curr->sched_class->check_preempt_curr(rq, p, flags);

- } else {

- for_each_class(class) {

- if (class == rq->curr->sched_class)

- break;

- if (class == p->sched_class) {

- resched_task(rq->curr);

- break;

- }

#ifdef CONFIG_SMP

* ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.

@@ -3224,8 +3156,6 @@ static void update_cpu_load(struct rq *t

this_rq->calc_load_update += LOAD_FREQ;

calc_load_account_active(this_rq);

}

- sched_avg_update(this_rq);

}

#ifdef CONFIG_SMP

@@ -3357,7 +3287,7 @@ int can_migrate_task(struct task_struct

* 2) too many balance attempts have failed.

- tsk_cache_hot = task_hot(p, rq->clock_task, sd);

+ tsk_cache_hot = task_hot(p, rq->clock, sd);

if (!tsk_cache_hot ||

sd->nr_balance_failed > sd->cache_nice_tries) {

#ifdef CONFIG_SCHEDSTATS

@@ -3540,17 +3470,12 @@ struct sd_lb_stats {

unsigned long this_load;

unsigned long this_load_per_task;

unsigned long this_nr_running;

- unsigned long this_has_capacity;

- unsigned int this_idle_cpus;

/* Statistics of the busiest group */

- unsigned int busiest_idle_cpus;

unsigned long max_load;

unsigned long busiest_load_per_task;

unsigned long busiest_nr_running;

unsigned long busiest_group_capacity;

- unsigned long busiest_has_capacity;

- unsigned int busiest_group_weight;

int group_imb; /* Is there imbalance in this sd */

#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)

@@ -3572,10 +3497,7 @@ struct sg_lb_stats {

unsigned long sum_nr_running; /* Nr tasks running in the group */

unsigned long sum_weighted_load; /* Weighted load of group's tasks */

unsigned long group_capacity;

- unsigned long idle_cpus;

- unsigned long group_weight;

int group_imb; /* Is there an imbalance in the group ? */

- int group_has_capacity; /* Is there extra capacity in the group? */

};

/**

@@ -3785,14 +3707,10 @@ unsigned long scale_rt_power(int cpu)

struct rq *rq = cpu_rq(cpu);

u64 total, available;

- total = sched_avg_period() + (rq->clock - rq->age_stamp);

+ sched_avg_update(rq);

- if (unlikely(total < rq->rt_avg)) {

- /* Ensures that power won't end up being negative */

- available = 0;

- } else {

- available = total - rq->rt_avg;

- }

+ total = sched_avg_period() + (rq->clock - rq->age_stamp);

+ available = total - rq->rt_avg;

if (unlikely((s64)total < SCHED_LOAD_SCALE))

total = SCHED_LOAD_SCALE;

@@ -3830,7 +3748,6 @@ static void update_cpu_power(struct sche

if (!power)

power = 1;

- cpu_rq(cpu)->cpu_power = power;

sdg->cpu_power = power;

}

@@ -3875,7 +3792,7 @@ static inline void update_sg_lb_stats(st

int local_group, const struct cpumask *cpus,

int *balance, struct sg_lb_stats *sgs)

{

- unsigned long load, max_cpu_load, min_cpu_load, max_nr_running;

+ unsigned long load, max_cpu_load, min_cpu_load;

int i;

unsigned int balance_cpu = -1, first_idle_cpu = 0;

unsigned long avg_load_per_task = 0;

@@ -3889,7 +3806,6 @@ static inline void update_sg_lb_stats(st

/* Tally up the load of all CPUs in the group */

max_cpu_load = 0;

min_cpu_load = ~0UL;

- max_nr_running = 0;

for_each_cpu_and(i, sched_group_cpus(group), cpus) {

struct rq *rq = cpu_rq(i);

@@ -3907,10 +3823,8 @@ static inline void update_sg_lb_stats(st

load = target_load(i, load_idx);

} else {

load = source_load(i, load_idx);

- if (load > max_cpu_load) {

+ if (load > max_cpu_load)

max_cpu_load = load;

- max_nr_running = rq->nr_running;

- }

if (min_cpu_load > load)

min_cpu_load = load;

}

@@ -3918,8 +3832,7 @@ static inline void update_sg_lb_stats(st

sgs->group_load += load;

sgs->sum_nr_running += rq->nr_running;

sgs->sum_weighted_load += weighted_cpuload(i);

- if (idle_cpu(i))

- sgs->idle_cpus++;

}

@@ -3949,14 +3862,11 @@ static inline void update_sg_lb_stats(st

if (sgs->sum_nr_running)

avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;

- if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task && max_nr_running > 1)

+ if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)

sgs->group_imb = 1;

- sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);

- sgs->group_weight = group->group_weight;

- if (sgs->group_capacity > sgs->sum_nr_running)

- sgs->group_has_capacity = 1;

+ sgs->group_capacity =

+ DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);

}

/**

@@ -4003,14 +3913,9 @@ static inline void update_sd_lb_stats(st

* In case the child domain prefers tasks go to siblings

* first, lower the group capacity to one so that we'll try

- * and move all the excess tasks away. We lower the capacity

- * of a group only if the local group has the capacity to fit

- * these excess tasks, i.e. nr_running < group_capacity. The

- * extra check prevents the case where you always pull from the

- * heaviest group when it is already under-utilized (possible

- * with a large weight task outweighs the tasks on the system).

+ * and move all the excess tasks away.

- if (prefer_sibling && !local_group && sds->this_has_capacity)

+ if (prefer_sibling)

sgs.group_capacity = min(sgs.group_capacity, 1UL);

if (local_group) {

@@ -4018,19 +3923,14 @@ static inline void update_sd_lb_stats(st

sds->this = group;

sds->this_nr_running = sgs.sum_nr_running;

sds->this_load_per_task = sgs.sum_weighted_load;

- sds->this_has_capacity = sgs.group_has_capacity;

- sds->this_idle_cpus = sgs.idle_cpus;

} else if (sgs.avg_load > sds->max_load &&

(sgs.sum_nr_running > sgs.group_capacity ||

sgs.group_imb)) {

sds->max_load = sgs.avg_load;

sds->busiest = group;

sds->busiest_nr_running = sgs.sum_nr_running;

- sds->busiest_idle_cpus = sgs.idle_cpus;

sds->busiest_group_capacity = sgs.group_capacity;

- sds->busiest_group_weight = sgs.group_weight;

sds->busiest_load_per_task = sgs.sum_weighted_load;

- sds->busiest_has_capacity = sgs.group_has_capacity;

sds->group_imb = sgs.group_imb;

}

@@ -4176,7 +4076,6 @@ static inline void calculate_imbalance(s

return fix_small_imbalance(sds, this_cpu, imbalance);

}

/******* find_busiest_group() helpers end here *********************/

/**

@@ -4228,11 +4127,6 @@ find_busiest_group(struct sched_domain *

* 4) This group is more busy than the avg busieness at this

* sched_domain.

* 5) The imbalance is within the specified limit.

- *

- * Note: when doing newidle balance, if the local group has excess

- * capacity (i.e. nr_running < group_capacity) and the busiest group

- * does not have any capacity, we force a load balance to pull tasks

- * to the local group. In this case, we skip past checks 3, 4 and 5.

if (balance && !(*balance))

goto ret;

@@ -4240,11 +4134,6 @@ find_busiest_group(struct sched_domain *

if (!sds.busiest || sds.busiest_nr_running == 0)

goto out_balanced;

- /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */

- if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity &&

- !sds.busiest_has_capacity)

- goto force_balance;

if (sds.this_load >= sds.max_load)

goto out_balanced;

@@ -4253,28 +4142,9 @@ find_busiest_group(struct sched_domain *

if (sds.this_load >= sds.avg_load)

goto out_balanced;

- /*

- * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.

- * And to check for busy balance use !idle_cpu instead of

- * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE

- * even when they are idle.

- */

- if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {

- if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)

- goto out_balanced;

- } else {

- /*

- * This cpu is idle. If the busiest group load doesn't

- * have more tasks than the number of available cpu's and

- * there is no imbalance between this and busiest group

- * wrt to idle cpu's, it is balanced.

- */

- if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&

- sds.busiest_nr_running <= sds.busiest_group_weight)

- goto out_balanced;

- }

+ if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)

+ goto out_balanced;

-force_balance:

/* Looks like there is an imbalance. Compute it */

calculate_imbalance(&sds, this_cpu, imbalance);

return sds.busiest;

@@ -4430,14 +4300,7 @@ redo:

if (!ld_moved) {

schedstat_inc(sd, lb_failed[idle]);

- /*

- * Increment the failure counter only on periodic balance.

- * We do not want newidle balance, which can be very

- * frequent, pollute the failure counter causing

- * excessive cache_hot migrations and active balances.

- */

- if (idle != CPU_NEWLY_IDLE)

- sd->nr_balance_failed++;

+ sd->nr_balance_failed++;

if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {

@@ -5182,7 +5045,7 @@ static u64 do_task_delta_exec(struct tas

if (task_current(rq, p)) {

update_rq_clock(rq);

- ns = rq->clock_task - p->se.exec_start;

+ ns = rq->clock - p->se.exec_start;

if ((s64)ns < 0)

ns = 0;

}

@@ -5326,7 +5189,7 @@ void account_system_time(struct task_str

tmp = cputime_to_cputime64(cputime);

if (hardirq_count() - hardirq_offset)

cpustat->irq = cputime64_add(cpustat->irq, tmp);

- else if (in_serving_softirq())

+ else if (softirq_count())

cpustat->softirq = cputime64_add(cpustat->softirq, tmp);

else

cpustat->system = cputime64_add(cpustat->system, tmp);

@@ -7270,19 +7133,7 @@ void __cpuinit init_idle(struct task_str

idle->se.exec_start = sched_clock();

cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));

- /*

- * We're having a chicken and egg problem, even though we are

- * holding rq->lock, the cpu isn't yet set to this cpu so the

- * lockdep check in task_group() will fail.

- *

- * Similar case to sched_fork(). / Alternatively we could

- * use task_rq_lock() here and obtain the other rq->lock.

- *

- * Silence PROVE_RCU

- */

- rcu_read_lock();

__set_task_cpu(idle, cpu);

- rcu_read_unlock();

rq->curr = rq->idle = idle;

#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)

@@ -7300,7 +7151,7 @@ void __cpuinit init_idle(struct task_str

* The idle tasks have their own, simple scheduling class:

idle->sched_class = &idle_sched_class;

- ftrace_graph_init_idle_task(idle, cpu);

+ ftrace_graph_init_task(idle);

}

@@ -8789,8 +8640,6 @@ static void init_sched_groups_power(int

if (cpu != group_first_cpu(sd->groups))

return;

- sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));

child = sd->child;

sd->groups->cpu_power = 0;

@@ -9674,6 +9523,9 @@ void __init sched_init(void)

#ifdef CONFIG_RT_GROUP_SCHED

alloc_size += 2 * nr_cpu_ids * sizeof(void **);

#endif

+#ifdef CONFIG_USER_SCHED

+ alloc_size *= 2;

+#endif

#ifdef CONFIG_CPUMASK_OFFSTACK

alloc_size += num_possible_cpus() * cpumask_size();

#endif

@@ -9691,6 +9543,13 @@ void __init sched_init(void)

init_task_group.cfs_rq = (struct cfs_rq **)ptr;

ptr += nr_cpu_ids * sizeof(void **);

+#ifdef CONFIG_USER_SCHED

+ root_task_group.se = (struct sched_entity **)ptr;

+ ptr += nr_cpu_ids * sizeof(void **);

+ root_task_group.cfs_rq = (struct cfs_rq **)ptr;

+ ptr += nr_cpu_ids * sizeof(void **);

+#endif /* CONFIG_USER_SCHED */

#endif /* CONFIG_FAIR_GROUP_SCHED */

#ifdef CONFIG_RT_GROUP_SCHED

init_task_group.rt_se = (struct sched_rt_entity **)ptr;

@@ -9699,6 +9558,13 @@ void __init sched_init(void)

init_task_group.rt_rq = (struct rt_rq **)ptr;

ptr += nr_cpu_ids * sizeof(void **);

+#ifdef CONFIG_USER_SCHED

+ root_task_group.rt_se = (struct sched_rt_entity **)ptr;

+ ptr += nr_cpu_ids * sizeof(void **);

+ root_task_group.rt_rq = (struct rt_rq **)ptr;

+ ptr += nr_cpu_ids * sizeof(void **);

+#endif /* CONFIG_USER_SCHED */

#endif /* CONFIG_RT_GROUP_SCHED */

#ifdef CONFIG_CPUMASK_OFFSTACK

for_each_possible_cpu(i) {

@@ -9718,14 +9584,24 @@ void __init sched_init(void)

#ifdef CONFIG_RT_GROUP_SCHED

init_rt_bandwidth(&init_task_group.rt_bandwidth,

global_rt_period(), global_rt_runtime());

+#ifdef CONFIG_USER_SCHED

+ init_rt_bandwidth(&root_task_group.rt_bandwidth,

+ global_rt_period(), RUNTIME_INF);

+#endif /* CONFIG_USER_SCHED */

#endif /* CONFIG_RT_GROUP_SCHED */

-#ifdef CONFIG_CGROUP_SCHED

+#ifdef CONFIG_GROUP_SCHED

list_add(&init_task_group.list, &task_groups);

INIT_LIST_HEAD(&init_task_group.children);

- autogroup_init(&init_task);

-#endif /* CONFIG_CGROUP_SCHED */

+#ifdef CONFIG_USER_SCHED

+ INIT_LIST_HEAD(&root_task_group.children);

+ init_task_group.parent = &root_task_group;

+ list_add(&init_task_group.siblings, &root_task_group.children);

+#endif /* CONFIG_USER_SCHED */

+ autogroup_init(&init_task);

+#endif /* CONFIG_GROUP_SCHED */

#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP

update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),

@@ -9765,6 +9641,25 @@ void __init sched_init(void)

* directly in rq->cfs (i.e init_task_group->se[] = NULL).

init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL);

+#elif defined CONFIG_USER_SCHED

+ root_task_group.shares = NICE_0_LOAD;

+ init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, 0, NULL);

+ /*

+ * In case of task-groups formed thr' the user id of tasks,

+ * init_task_group represents tasks belonging to root user.

+ * Hence it forms a sibling of all subsequent groups formed.

+ * In this case, init_task_group gets only a fraction of overall

+ * system cpu resource, based on the weight assigned to root

+ * user's cpu share (INIT_TASK_GROUP_LOAD). This is accomplished

+ * by letting tasks of init_task_group sit in a separate cfs_rq

+ * (init_tg_cfs_rq) and having one entity represent this group of

+ * tasks in rq->cfs (i.e init_task_group->se[] != NULL).

+ */

+ init_tg_cfs_entry(&init_task_group,

+ &per_cpu(init_tg_cfs_rq, i),

+ &per_cpu(init_sched_entity, i), i, 1,

+ root_task_group.se[i]);

#endif

#endif /* CONFIG_FAIR_GROUP_SCHED */

@@ -9787,7 +9682,6 @@ void __init sched_init(void)

#ifdef CONFIG_SMP

rq->sd = NULL;

rq->rd = NULL;

- rq->cpu_power = SCHED_LOAD_SCALE;

rq->post_schedule = 0;

rq->active_balance = 0;

rq->next_balance = jiffies;

@@ -10171,7 +10065,7 @@ static inline void unregister_rt_sched_g

}

#endif /* CONFIG_RT_GROUP_SCHED */

-#ifdef CONFIG_CGROUP_SCHED

+#ifdef CONFIG_GROUP_SCHED

static void free_sched_group(struct task_group *tg)

{

free_fair_sched_group(tg);

@@ -10262,12 +10156,12 @@ void __sched_move_task(struct task_struc

if (unlikely(running))

tsk->sched_class->put_prev_task(rq, tsk);

+ set_task_rq(tsk, task_cpu(tsk));

#ifdef CONFIG_FAIR_GROUP_SCHED

- if (tsk->sched_class->task_move_group)

- tsk->sched_class->task_move_group(tsk, on_rq);

- else

+ if (tsk->sched_class->moved_group)

+ tsk->sched_class->moved_group(tsk, on_rq);

#endif

- set_task_rq(tsk, task_cpu(tsk));

if (unlikely(running))

tsk->sched_class->set_curr_task(rq);

@@ -10285,7 +10179,7 @@ void sched_move_task(struct task_struct

task_rq_unlock(rq, &flags);

}

-#endif /* CONFIG_CGROUP_SCHED */

+#endif /* CONFIG_GROUP_SCHED */

#ifdef CONFIG_FAIR_GROUP_SCHED

static void __set_se_shares(struct sched_entity *se, unsigned long shares)

@@ -10427,6 +10321,13 @@ static int tg_schedulable(struct task_gr

runtime = d->rt_runtime;

}

+#ifdef CONFIG_USER_SCHED

+ if (tg == &root_task_group) {

+ period = global_rt_period();

+ runtime = global_rt_runtime();

+ }

+#endif

* Cannot have more runtime than the period.

@@ -11202,4 +11103,4 @@ void synchronize_sched_expedited(void)

EXPORT_SYMBOL_GPL(synchronize_sched_expedited);

#endif /* #else #ifndef CONFIG_SMP */

-#endif /* CONFIG_SCHED_BFS */

+#endif /* CONFIG_SCHED_BFS */