aboutsummaryrefslogtreecommitdiff
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c320
1 files changed, 211 insertions, 109 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3e00526f52e..a85678865c5 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -36,6 +36,7 @@
#include <linux/list.h>
#include <linux/mempolicy.h>
#include <linux/mm.h>
+#include <linux/memory.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/namei.h>
@@ -83,7 +84,7 @@ struct cpuset {
struct cgroup_subsys_state css;
unsigned long flags; /* "unsigned long" so bitops work */
- cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
+ cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */
struct cpuset *parent; /* my parent */
@@ -194,8 +195,6 @@ static int cpuset_mems_generation;
static struct cpuset top_cpuset = {
.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
- .cpus_allowed = CPU_MASK_ALL,
- .mems_allowed = NODE_MASK_ALL,
};
/*
@@ -239,6 +238,17 @@ static struct cpuset top_cpuset = {
static DEFINE_MUTEX(callback_mutex);
/*
+ * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist
+ * buffers. They are statically allocated to prevent using excess stack
+ * when calling cpuset_print_task_mems_allowed().
+ */
+#define CPUSET_NAME_LEN (128)
+#define CPUSET_NODELIST_LEN (256)
+static char cpuset_name[CPUSET_NAME_LEN];
+static char cpuset_nodelist[CPUSET_NODELIST_LEN];
+static DEFINE_SPINLOCK(cpuset_buffer_lock);
+
+/*
* This is ugly, but preserves the userspace API for existing cpuset
* users. If someone tries to mount the "cpuset" filesystem, we
* silently switch it to mount "cgroup" instead
@@ -266,7 +276,7 @@ static struct file_system_type cpuset_fs_type = {
};
/*
- * Return in *pmask the portion of a cpusets's cpus_allowed that
+ * Return in pmask the portion of a cpusets's cpus_allowed that
* are online. If none are online, walk up the cpuset hierarchy
* until we find one that does have some online cpus. If we get
* all the way to the top and still haven't found any online cpus,
@@ -279,15 +289,16 @@ static struct file_system_type cpuset_fs_type = {
* Call with callback_mutex held.
*/
-static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
+static void guarantee_online_cpus(const struct cpuset *cs,
+ struct cpumask *pmask)
{
- while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map))
+ while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask))
cs = cs->parent;
if (cs)
- cpus_and(*pmask, cs->cpus_allowed, cpu_online_map);
+ cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask);
else
- *pmask = cpu_online_map;
- BUG_ON(!cpus_intersects(*pmask, cpu_online_map));
+ cpumask_copy(pmask, cpu_online_mask);
+ BUG_ON(!cpumask_intersects(pmask, cpu_online_mask));
}
/*
@@ -363,14 +374,9 @@ void cpuset_update_task_memory_state(void)
struct task_struct *tsk = current;
struct cpuset *cs;
- if (task_cs(tsk) == &top_cpuset) {
- /* Don't need rcu for top_cpuset. It's never freed. */
- my_cpusets_mem_gen = top_cpuset.mems_generation;
- } else {
- rcu_read_lock();
- my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
- rcu_read_unlock();
- }
+ rcu_read_lock();
+ my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
+ rcu_read_unlock();
if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
mutex_lock(&callback_mutex);
@@ -402,12 +408,43 @@ void cpuset_update_task_memory_state(void)
static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
{
- return cpus_subset(p->cpus_allowed, q->cpus_allowed) &&
+ return cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
nodes_subset(p->mems_allowed, q->mems_allowed) &&
is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
is_mem_exclusive(p) <= is_mem_exclusive(q);
}
+/**
+ * alloc_trial_cpuset - allocate a trial cpuset
+ * @cs: the cpuset that the trial cpuset duplicates
+ */
+static struct cpuset *alloc_trial_cpuset(const struct cpuset *cs)
+{
+ struct cpuset *trial;
+
+ trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
+ if (!trial)
+ return NULL;
+
+ if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) {
+ kfree(trial);
+ return NULL;
+ }
+ cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
+
+ return trial;
+}
+
+/**
+ * free_trial_cpuset - free the trial cpuset
+ * @trial: the trial cpuset to be freed
+ */
+static void free_trial_cpuset(struct cpuset *trial)
+{
+ free_cpumask_var(trial->cpus_allowed);
+ kfree(trial);
+}
+
/*
* validate_change() - Used to validate that any proposed cpuset change
* follows the structural rules for cpusets.
@@ -457,7 +494,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
c = cgroup_cs(cont);
if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
c != cur &&
- cpus_intersects(trial->cpus_allowed, c->cpus_allowed))
+ cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
return -EINVAL;
if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
c != cur &&
@@ -467,7 +504,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
/* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */
if (cgroup_task_count(cur->css.cgroup)) {
- if (cpus_empty(trial->cpus_allowed) ||
+ if (cpumask_empty(trial->cpus_allowed) ||
nodes_empty(trial->mems_allowed)) {
return -ENOSPC;
}
@@ -482,7 +519,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
*/
static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
{
- return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
+ return cpumask_intersects(a->cpus_allowed, b->cpus_allowed);
}
static void
@@ -507,7 +544,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
cp = list_first_entry(&q, struct cpuset, stack_list);
list_del(q.next);
- if (cpus_empty(cp->cpus_allowed))
+ if (cpumask_empty(cp->cpus_allowed))
continue;
if (is_sched_load_balance(cp))
@@ -531,7 +568,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
* load balancing domains (sched domains) as specified by that partial
* partition.
*
- * See "What is sched_load_balance" in Documentation/cpusets.txt
+ * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt
* for a background explanation of this.
*
* Does not return errors, on the theory that the callers of this
@@ -574,7 +611,8 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
* element of the partition (one sched domain) to be passed to
* partition_sched_domains().
*/
-static int generate_sched_domains(cpumask_t **domains,
+/* FIXME: see the FIXME in partition_sched_domains() */
+static int generate_sched_domains(struct cpumask **domains,
struct sched_domain_attr **attributes)
{
LIST_HEAD(q); /* queue of cpusets to be scanned */
@@ -582,19 +620,18 @@ static int generate_sched_domains(cpumask_t **domains,
struct cpuset **csa; /* array of all cpuset ptrs */
int csn; /* how many cpuset ptrs in csa so far */
int i, j, k; /* indices for partition finding loops */
- cpumask_t *doms; /* resulting partition; i.e. sched domains */
+ struct cpumask *doms; /* resulting partition; i.e. sched domains */
struct sched_domain_attr *dattr; /* attributes for custom domains */
- int ndoms; /* number of sched domains in result */
- int nslot; /* next empty doms[] cpumask_t slot */
+ int ndoms = 0; /* number of sched domains in result */
+ int nslot; /* next empty doms[] struct cpumask slot */
- ndoms = 0;
doms = NULL;
dattr = NULL;
csa = NULL;
/* Special case for the 99% of systems with one, full, sched domain */
if (is_sched_load_balance(&top_cpuset)) {
- doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+ doms = kmalloc(cpumask_size(), GFP_KERNEL);
if (!doms)
goto done;
@@ -603,7 +640,7 @@ static int generate_sched_domains(cpumask_t **domains,
*dattr = SD_ATTR_INIT;
update_domain_attr_tree(dattr, &top_cpuset);
}
- *doms = top_cpuset.cpus_allowed;
+ cpumask_copy(doms, top_cpuset.cpus_allowed);
ndoms = 1;
goto done;
@@ -622,7 +659,7 @@ static int generate_sched_domains(cpumask_t **domains,
cp = list_first_entry(&q, struct cpuset, stack_list);
list_del(q.next);
- if (cpus_empty(cp->cpus_allowed))
+ if (cpumask_empty(cp->cpus_allowed))
continue;
/*
@@ -673,11 +710,9 @@ restart:
* Now we know how many domains to create.
* Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
*/
- doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
- if (!doms) {
- ndoms = 0;
+ doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL);
+ if (!doms)
goto done;
- }
/*
* The rest of the code, including the scheduler, can deal with
@@ -687,7 +722,7 @@ restart:
for (nslot = 0, i = 0; i < csn; i++) {
struct cpuset *a = csa[i];
- cpumask_t *dp;
+ struct cpumask *dp;
int apn = a->pn;
if (apn < 0) {
@@ -710,14 +745,14 @@ restart:
continue;
}
- cpus_clear(*dp);
+ cpumask_clear(dp);
if (dattr)
*(dattr + nslot) = SD_ATTR_INIT;
for (j = i; j < csn; j++) {
struct cpuset *b = csa[j];
if (apn == b->pn) {
- cpus_or(*dp, *dp, b->cpus_allowed);
+ cpumask_or(dp, dp, b->cpus_allowed);
if (dattr)
update_domain_attr_tree(dattr + nslot, b);
@@ -732,6 +767,13 @@ restart:
done:
kfree(csa);
+ /*
+ * Fallback to the default domain if kmalloc() failed.
+ * See comments in partition_sched_domains().
+ */
+ if (doms == NULL)
+ ndoms = 1;
+
*domains = doms;
*attributes = dattr;
return ndoms;
@@ -750,7 +792,7 @@ done:
static void do_rebuild_sched_domains(struct work_struct *unused)
{
struct sched_domain_attr *attr;
- cpumask_t *doms;
+ struct cpumask *doms;
int ndoms;
get_online_cpus();
@@ -819,7 +861,7 @@ void rebuild_sched_domains(void)
static int cpuset_test_cpumask(struct task_struct *tsk,
struct cgroup_scanner *scan)
{
- return !cpus_equal(tsk->cpus_allowed,
+ return !cpumask_equal(&tsk->cpus_allowed,
(cgroup_cs(scan->cg))->cpus_allowed);
}
@@ -837,7 +879,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk,
static void cpuset_change_cpumask(struct task_struct *tsk,
struct cgroup_scanner *scan)
{
- set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));
+ set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed));
}
/**
@@ -869,10 +911,10 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
* @cs: the cpuset to consider
* @buf: buffer of cpu numbers written to this cpuset
*/
-static int update_cpumask(struct cpuset *cs, const char *buf)
+static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+ const char *buf)
{
struct ptr_heap heap;
- struct cpuset trialcs;
int retval;
int is_load_balanced;
@@ -880,8 +922,6 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
if (cs == &top_cpuset)
return -EACCES;
- trialcs = *cs;
-
/*
* An empty cpus_allowed is ok only if the cpuset has no tasks.
* Since cpulist_parse() fails on an empty mask, we special case
@@ -889,31 +929,31 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
* with tasks have cpus.
*/
if (!*buf) {
- cpus_clear(trialcs.cpus_allowed);
+ cpumask_clear(trialcs->cpus_allowed);
} else {
- retval = cpulist_parse(buf, trialcs.cpus_allowed);
+ retval = cpulist_parse(buf, trialcs->cpus_allowed);
if (retval < 0)
return retval;
- if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map))
+ if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask))
return -EINVAL;
}
- retval = validate_change(cs, &trialcs);
+ retval = validate_change(cs, trialcs);
if (retval < 0)
return retval;
/* Nothing to do if the cpus didn't change */
- if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
+ if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
return 0;
retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
if (retval)
return retval;
- is_load_balanced = is_sched_load_balance(&trialcs);
+ is_load_balanced = is_sched_load_balance(trialcs);
mutex_lock(&callback_mutex);
- cs->cpus_allowed = trialcs.cpus_allowed;
+ cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
mutex_unlock(&callback_mutex);
/*
@@ -1001,7 +1041,7 @@ static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
fudge = 10; /* spare mmarray[] slots */
- fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
+ fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */
retval = -ENOMEM;
/*
@@ -1088,9 +1128,9 @@ done:
* lock each such tasks mm->mmap_sem, scan its vma's and rebind
* their mempolicies to the cpusets new mems_allowed.
*/
-static int update_nodemask(struct cpuset *cs, const char *buf)
+static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
+ const char *buf)
{
- struct cpuset trialcs;
nodemask_t oldmem;
int retval;
@@ -1101,8 +1141,6 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
if (cs == &top_cpuset)
return -EACCES;
- trialcs = *cs;
-
/*
* An empty mems_allowed is ok iff there are no tasks in the cpuset.
* Since nodelist_parse() fails on an empty mask, we special case
@@ -1110,27 +1148,27 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
* with tasks have memory.
*/
if (!*buf) {
- nodes_clear(trialcs.mems_allowed);
+ nodes_clear(trialcs->mems_allowed);
} else {
- retval = nodelist_parse(buf, trialcs.mems_allowed);
+ retval = nodelist_parse(buf, trialcs->mems_allowed);
if (retval < 0)
goto done;
- if (!nodes_subset(trialcs.mems_allowed,
+ if (!nodes_subset(trialcs->mems_allowed,
node_states[N_HIGH_MEMORY]))
return -EINVAL;
}
oldmem = cs->mems_allowed;
- if (nodes_equal(oldmem, trialcs.mems_allowed)) {
+ if (nodes_equal(oldmem, trialcs->mems_allowed)) {
retval = 0; /* Too easy - nothing to do */
goto done;
}
- retval = validate_change(cs, &trialcs);
+ retval = validate_change(cs, trialcs);
if (retval < 0)
goto done;
mutex_lock(&callback_mutex);
- cs->mems_allowed = trialcs.mems_allowed;
+ cs->mems_allowed = trialcs->mems_allowed;
cs->mems_generation = cpuset_mems_generation++;
mutex_unlock(&callback_mutex);
@@ -1151,7 +1189,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
if (val != cs->relax_domain_level) {
cs->relax_domain_level = val;
- if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
+ if (!cpumask_empty(cs->cpus_allowed) &&
+ is_sched_load_balance(cs))
async_rebuild_sched_domains();
}
@@ -1170,31 +1209,36 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
int turning_on)
{
- struct cpuset trialcs;
+ struct cpuset *trialcs;
int err;
int balance_flag_changed;
- trialcs = *cs;
+ trialcs = alloc_trial_cpuset(cs);
+ if (!trialcs)
+ return -ENOMEM;
+
if (turning_on)
- set_bit(bit, &trialcs.flags);
+ set_bit(bit, &trialcs->flags);
else
- clear_bit(bit, &trialcs.flags);
+ clear_bit(bit, &trialcs->flags);
- err = validate_change(cs, &trialcs);
+ err = validate_change(cs, trialcs);
if (err < 0)
- return err;
+ goto out;
balance_flag_changed = (is_sched_load_balance(cs) !=
- is_sched_load_balance(&trialcs));
+ is_sched_load_balance(trialcs));
mutex_lock(&callback_mutex);
- cs->flags = trialcs.flags;
+ cs->flags = trialcs->flags;
mutex_unlock(&callback_mutex);
- if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed)
+ if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
async_rebuild_sched_domains();
- return 0;
+out:
+ free_trial_cpuset(trialcs);
+ return err;
}
/*
@@ -1295,42 +1339,47 @@ static int fmeter_getrate(struct fmeter *fmp)
return val;
}
+/* Protected by cgroup_lock */
+static cpumask_var_t cpus_attach;
+
/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
static int cpuset_can_attach(struct cgroup_subsys *ss,
struct cgroup *cont, struct task_struct *tsk)
{
struct cpuset *cs = cgroup_cs(cont);
+ int ret = 0;
- if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
+ if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
return -ENOSPC;
- if (tsk->flags & PF_THREAD_BOUND) {
- cpumask_t mask;
+ if (tsk->flags & PF_THREAD_BOUND) {
mutex_lock(&callback_mutex);
- mask = cs->cpus_allowed;
+ if (!cpumask_equal(&tsk->cpus_allowed, cs->cpus_allowed))
+ ret = -EINVAL;
mutex_unlock(&callback_mutex);
- if (!cpus_equal(tsk->cpus_allowed, mask))
- return -EINVAL;
}
- return security_task_setscheduler(tsk, 0, NULL);
+ return ret < 0 ? ret : security_task_setscheduler(tsk, 0, NULL);
}
static void cpuset_attach(struct cgroup_subsys *ss,
struct cgroup *cont, struct cgroup *oldcont,
struct task_struct *tsk)
{
- cpumask_t cpus;
nodemask_t from, to;
struct mm_struct *mm;
struct cpuset *cs = cgroup_cs(cont);
struct cpuset *oldcs = cgroup_cs(oldcont);
int err;
- mutex_lock(&callback_mutex);
- guarantee_online_cpus(cs, &cpus);
- err = set_cpus_allowed_ptr(tsk, &cpus);
- mutex_unlock(&callback_mutex);
+ if (cs == &top_cpuset) {
+ cpumask_copy(cpus_attach, cpu_possible_mask);
+ } else {
+ mutex_lock(&callback_mutex);
+ guarantee_online_cpus(cs, cpus_attach);
+ mutex_unlock(&callback_mutex);
+ }
+ err = set_cpus_allowed_ptr(tsk, cpus_attach);
if (err)
return;
@@ -1343,7 +1392,6 @@ static void cpuset_attach(struct cgroup_subsys *ss,
cpuset_migrate_mm(mm, &from, &to);
mmput(mm);
}
-
}
/* The various types of files and directories in a cpuset file system */
@@ -1438,21 +1486,29 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
const char *buf)
{
int retval = 0;
+ struct cpuset *cs = cgroup_cs(cgrp);
+ struct cpuset *trialcs;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
+ trialcs = alloc_trial_cpuset(cs);
+ if (!trialcs)
+ return -ENOMEM;
+
switch (cft->private) {
case FILE_CPULIST:
- retval = update_cpumask(cgroup_cs(cgrp), buf);
+ retval = update_cpumask(cs, trialcs, buf);
break;
case FILE_MEMLIST:
- retval = update_nodemask(cgroup_cs(cgrp), buf);
+ retval = update_nodemask(cs, trialcs, buf);
break;
default:
retval = -EINVAL;
break;
}
+
+ free_trial_cpuset(trialcs);
cgroup_unlock();
return retval;
}
@@ -1471,13 +1527,13 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
{
- cpumask_t mask;
+ int ret;
mutex_lock(&callback_mutex);
- mask = cs->cpus_allowed;
+ ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
mutex_unlock(&callback_mutex);
- return cpulist_scnprintf(page, PAGE_SIZE, mask);
+ return ret;
}
static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
@@ -1713,7 +1769,7 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
parent_cs = cgroup_cs(parent);
cs->mems_allowed = parent_cs->mems_allowed;
- cs->cpus_allowed = parent_cs->cpus_allowed;
+ cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
return;
}
@@ -1739,6 +1795,10 @@ static struct cgroup_subsys_state *cpuset_create(
cs = kmalloc(sizeof(*cs), GFP_KERNEL);
if (!cs)
return ERR_PTR(-ENOMEM);
+ if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) {
+ kfree(cs);
+ return ERR_PTR(-ENOMEM);
+ }
cpuset_update_task_memory_state();
cs->flags = 0;
@@ -1747,7 +1807,7 @@ static struct cgroup_subsys_state *cpuset_create(
if (is_spread_slab(parent))
set_bit(CS_SPREAD_SLAB, &cs->flags);
set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
- cpus_clear(cs->cpus_allowed);
+ cpumask_clear(cs->cpus_allowed);
nodes_clear(cs->mems_allowed);
cs->mems_generation = cpuset_mems_generation++;
fmeter_init(&cs->fmeter);
@@ -1774,6 +1834,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
number_of_cpusets--;
+ free_cpumask_var(cs->cpus_allowed);
kfree(cs);
}
@@ -1797,6 +1858,8 @@ struct cgroup_subsys cpuset_subsys = {
int __init cpuset_init_early(void)
{
+ alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed);
+
top_cpuset.mems_generation = cpuset_mems_generation++;
return 0;
}
@@ -1812,7 +1875,7 @@ int __init cpuset_init(void)
{
int err = 0;
- cpus_setall(top_cpuset.cpus_allowed);
+ cpumask_setall(top_cpuset.cpus_allowed);
nodes_setall(top_cpuset.mems_allowed);
fmeter_init(&top_cpuset.fmeter);
@@ -1824,6 +1887,9 @@ int __init cpuset_init(void)
if (err < 0)
return err;
+ if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
+ BUG();
+
number_of_cpusets = 1;
return 0;
}
@@ -1898,7 +1964,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
* has online cpus, so can't be empty).
*/
parent = cs->parent;
- while (cpus_empty(parent->cpus_allowed) ||
+ while (cpumask_empty(parent->cpus_allowed) ||
nodes_empty(parent->mems_allowed))
parent = parent->parent;
@@ -1939,7 +2005,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
}
/* Continue past cpusets with all cpus, mems online */
- if (cpus_subset(cp->cpus_allowed, cpu_online_map) &&
+ if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) &&
nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
continue;
@@ -1947,13 +2013,14 @@ static void scan_for_empty_cpusets(struct cpuset *root)
/* Remove offline cpus and mems from this cpuset. */
mutex_lock(&callback_mutex);
- cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
+ cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
+ cpu_online_mask);
nodes_and(cp->mems_allowed, cp->mems_allowed,
node_states[N_HIGH_MEMORY]);
mutex_unlock(&callback_mutex);
/* Move tasks from the empty cpuset to a parent */
- if (cpus_empty(cp->cpus_allowed) ||
+ if (cpumask_empty(cp->cpus_allowed) ||
nodes_empty(cp->mems_allowed))
remove_tasks_in_empty_cpuset(cp);
else {
@@ -1979,7 +2046,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
unsigned long phase, void *unused_cpu)
{
struct sched_domain_attr *attr;
- cpumask_t *doms;
+ struct cpumask *doms;
int ndoms;
switch (phase) {
@@ -1994,7 +2061,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
}
cgroup_lock();
- top_cpuset.cpus_allowed = cpu_online_map;
+ cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
scan_for_empty_cpusets(&top_cpuset);
ndoms = generate_sched_domains(&doms, &attr);
cgroup_unlock();
@@ -2011,12 +2078,23 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
* Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
* See also the previous routine cpuset_track_online_cpus().
*/
-void cpuset_track_online_nodes(void)
+static int cpuset_track_online_nodes(struct notifier_block *self,
+ unsigned long action, void *arg)
{
cgroup_lock();
- top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
- scan_for_empty_cpusets(&top_cpuset);
+ switch (action) {
+ case MEM_ONLINE:
+ top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
+ break;
+ case MEM_OFFLINE:
+ top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
+ scan_for_empty_cpusets(&top_cpuset);
+ break;
+ default:
+ break;
+ }
cgroup_unlock();
+ return NOTIFY_OK;
}
#endif
@@ -2028,24 +2106,25 @@ void cpuset_track_online_nodes(void)
void __init cpuset_init_smp(void)
{
- top_cpuset.cpus_allowed = cpu_online_map;
+ cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
hotcpu_notifier(cpuset_track_online_cpus, 0);
+ hotplug_memory_notifier(cpuset_track_online_nodes, 10);
}
/**
* cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
* @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
- * @pmask: pointer to cpumask_t variable to receive cpus_allowed set.
+ * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
*
- * Description: Returns the cpumask_t cpus_allowed of the cpuset
+ * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
* attached to the specified @tsk. Guaranteed to return some non-empty
* subset of cpu_online_map, even if this means going outside the
* tasks cpuset.
**/
-void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
+void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
{
mutex_lock(&callback_mutex);
cpuset_cpus_allowed_locked(tsk, pmask);
@@ -2056,7 +2135,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
* cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
* Must be called with callback_mutex held.
**/
-void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask)
+void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
{
task_lock(tsk);
guarantee_online_cpus(task_cs(tsk), pmask);
@@ -2339,6 +2418,29 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
}
+/**
+ * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
+ * @task: pointer to task_struct of some task.
+ *
+ * Description: Prints @task's name, cpuset name, and cached copy of its
+ * mems_allowed to the kernel log. Must hold task_lock(task) to allow
+ * dereferencing task_cs(task).
+ */
+void cpuset_print_task_mems_allowed(struct task_struct *tsk)
+{
+ struct dentry *dentry;
+
+ dentry = task_cs(tsk)->css.cgroup->dentry;
+ spin_lock(&cpuset_buffer_lock);
+ snprintf(cpuset_name, CPUSET_NAME_LEN,
+ dentry ? (const char *)dentry->d_name.name : "/");
+ nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
+ tsk->mems_allowed);
+ printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
+ tsk->comm, cpuset_name, cpuset_nodelist);
+ spin_unlock(&cpuset_buffer_lock);
+}
+
/*
* Collection of memory_pressure is suppressed unless
* this flag is enabled by writing "1" to the special