aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorDavid Vrabel <david.vrabel@csr.com>2009-01-22 19:12:32 +0000
committerDavid Vrabel <david.vrabel@csr.com>2009-01-22 19:12:32 +0000
commitdff243f7cb3a2ebbb09093066c1bc4a90ff5b3a4 (patch)
tree1c63e7c2f879cd322ca785671b74b4ff796dd24c /kernel
parenta5e6ced58d423cb09c4fc0087dcfdb0b5deb5e1c (diff)
parentf3b8436ad9a8ad36b3c9fa1fe030c7f38e5d3d0b (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-upstream
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile6
-rw-r--r--kernel/acct.c2
-rw-r--r--kernel/async.c346
-rw-r--r--kernel/capability.c4
-rw-r--r--kernel/cgroup.c276
-rw-r--r--kernel/cpu.c6
-rw-r--r--kernel/cpuset.c253
-rw-r--r--kernel/cred.c5
-rw-r--r--kernel/exec_domain.c3
-rw-r--r--kernel/exit.c17
-rw-r--r--kernel/fork.c12
-rw-r--r--kernel/futex.c17
-rw-r--r--kernel/hrtimer.c4
-rw-r--r--kernel/irq/autoprobe.c5
-rw-r--r--kernel/itimer.c7
-rw-r--r--kernel/kexec.c5
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/module.c12
-rw-r--r--kernel/ns_cgroup.c2
-rw-r--r--kernel/pid.c8
-rw-r--r--kernel/posix-timers.c43
-rw-r--r--kernel/power/Makefile3
-rw-r--r--kernel/power/disk.c6
-rw-r--r--kernel/power/snapshot.c370
-rw-r--r--kernel/power/swsusp.c122
-rw-r--r--kernel/printk.c9
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/rcutorture.c113
-rw-r--r--kernel/res_counter.c44
-rw-r--r--kernel/resource.c62
-rw-r--r--kernel/sched.c61
-rw-r--r--kernel/sched_debug.c21
-rw-r--r--kernel/sched_fair.c39
-rw-r--r--kernel/signal.c59
-rw-r--r--kernel/sys.c70
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c27
-rw-r--r--kernel/time.c14
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/timer.c18
-rw-r--r--kernel/trace/ring_buffer.c8
-rw-r--r--kernel/uid16.c39
-rw-r--r--kernel/up.c21
43 files changed, 1485 insertions, 661 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index e1c5bf3365c..170a9213c1b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
- notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o
+ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
+ async.o
ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
@@ -40,6 +41,9 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
+ifneq ($(CONFIG_SMP),y)
+obj-y += up.o
+endif
obj-$(CONFIG_SMP) += spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
diff --git a/kernel/acct.c b/kernel/acct.c
index d57b7cbb98b..7afa3156416 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -277,7 +277,7 @@ static int acct_on(char *name)
* should be written. If the filename is NULL, accounting will be
* shutdown.
*/
-asmlinkage long sys_acct(const char __user *name)
+SYSCALL_DEFINE1(acct, const char __user *, name)
{
int error;
diff --git a/kernel/async.c b/kernel/async.c
new file mode 100644
index 00000000000..608b32b4281
--- /dev/null
+++ b/kernel/async.c
@@ -0,0 +1,346 @@
+/*
+ * async.c: Asynchronous function calls for boot performance
+ *
+ * (C) Copyright 2009 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+
+/*
+
+Goals and Theory of Operation
+
+The primary goal of this feature is to reduce the kernel boot time,
+by doing various independent hardware delays and discovery operations
+decoupled and not strictly serialized.
+
+More specifically, the asynchronous function call concept allows
+certain operations (primarily during system boot) to happen
+asynchronously, out of order, while these operations still
+have their externally visible parts happen sequentially and in-order.
+(not unlike how out-of-order CPUs retire their instructions in order)
+
+Key to the asynchronous function call implementation is the concept of
+a "sequence cookie" (which, although it has an abstracted type, can be
+thought of as a monotonically incrementing number).
+
+The async core will assign each scheduled event such a sequence cookie and
+pass this to the called functions.
+
+The asynchronously called function should before doing a globally visible
+operation, such as registering device numbers, call the
+async_synchronize_cookie() function and pass in its own cookie. The
+async_synchronize_cookie() function will make sure that all asynchronous
+operations that were scheduled prior to the operation corresponding with the
+cookie have completed.
+
+Subsystem/driver initialization code that scheduled asynchronous probe
+functions, but which shares global resources with other drivers/subsystems
+that do not use the asynchronous call feature, need to do a full
+synchronization with the async_synchronize_full() function, before returning
+from their init function. This is to maintain strict ordering between the
+asynchronous and synchronous parts of the kernel.
+
+*/
+
+#include <linux/async.h>
+#include <linux/module.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <asm/atomic.h>
+
+static async_cookie_t next_cookie = 1;
+
+#define MAX_THREADS 256
+#define MAX_WORK 32768
+
+static LIST_HEAD(async_pending);
+static LIST_HEAD(async_running);
+static DEFINE_SPINLOCK(async_lock);
+
+static int async_enabled = 0;
+
+struct async_entry {
+ struct list_head list;
+ async_cookie_t cookie;
+ async_func_ptr *func;
+ void *data;
+ struct list_head *running;
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(async_done);
+static DECLARE_WAIT_QUEUE_HEAD(async_new);
+
+static atomic_t entry_count;
+static atomic_t thread_count;
+
+extern int initcall_debug;
+
+
+/*
+ * MUST be called with the lock held!
+ */
+static async_cookie_t __lowest_in_progress(struct list_head *running)
+{
+ struct async_entry *entry;
+ if (!list_empty(running)) {
+ entry = list_first_entry(running,
+ struct async_entry, list);
+ return entry->cookie;
+ } else if (!list_empty(&async_pending)) {
+ entry = list_first_entry(&async_pending,
+ struct async_entry, list);
+ return entry->cookie;
+ } else {
+ /* nothing in progress... next_cookie is "infinity" */
+ return next_cookie;
+ }
+
+}
+
+static async_cookie_t lowest_in_progress(struct list_head *running)
+{
+ unsigned long flags;
+ async_cookie_t ret;
+
+ spin_lock_irqsave(&async_lock, flags);
+ ret = __lowest_in_progress(running);
+ spin_unlock_irqrestore(&async_lock, flags);
+ return ret;
+}
+/*
+ * pick the first pending entry and run it
+ */
+static void run_one_entry(void)
+{
+ unsigned long flags;
+ struct async_entry *entry;
+ ktime_t calltime, delta, rettime;
+
+ /* 1) pick one task from the pending queue */
+
+ spin_lock_irqsave(&async_lock, flags);
+ if (list_empty(&async_pending))
+ goto out;
+ entry = list_first_entry(&async_pending, struct async_entry, list);
+
+ /* 2) move it to the running queue */
+ list_del(&entry->list);
+ list_add_tail(&entry->list, &async_running);
+ spin_unlock_irqrestore(&async_lock, flags);
+
+ /* 3) run it (and print duration)*/
+ if (initcall_debug && system_state == SYSTEM_BOOTING) {
+ printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current));
+ calltime = ktime_get();
+ }
+ entry->func(entry->data, entry->cookie);
+ if (initcall_debug && system_state == SYSTEM_BOOTING) {
+ rettime = ktime_get();
+ delta = ktime_sub(rettime, calltime);
+ printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie,
+ entry->func, ktime_to_ns(delta) >> 10);
+ }
+
+ /* 4) remove it from the running queue */
+ spin_lock_irqsave(&async_lock, flags);
+ list_del(&entry->list);
+
+ /* 5) free the entry */
+ kfree(entry);
+ atomic_dec(&entry_count);
+
+ spin_unlock_irqrestore(&async_lock, flags);
+
+ /* 6) wake up any waiters. */
+ wake_up(&async_done);
+ return;
+
+out:
+ spin_unlock_irqrestore(&async_lock, flags);
+}
+
+
+static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct list_head *running)
+{
+ struct async_entry *entry;
+ unsigned long flags;
+ async_cookie_t newcookie;
+
+
+ /* allow irq-off callers */
+ entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC);
+
+ /*
+ * If we're out of memory or if there's too much work
+ * pending already, we execute synchronously.
+ */
+ if (!async_enabled || !entry || atomic_read(&entry_count) > MAX_WORK) {
+ kfree(entry);
+ spin_lock_irqsave(&async_lock, flags);
+ newcookie = next_cookie++;
+ spin_unlock_irqrestore(&async_lock, flags);
+
+ /* low on memory.. run synchronously */
+ ptr(data, newcookie);
+ return newcookie;
+ }
+ entry->func = ptr;
+ entry->data = data;
+ entry->running = running;
+
+ spin_lock_irqsave(&async_lock, flags);
+ newcookie = entry->cookie = next_cookie++;
+ list_add_tail(&entry->list, &async_pending);
+ atomic_inc(&entry_count);
+ spin_unlock_irqrestore(&async_lock, flags);
+ wake_up(&async_new);
+ return newcookie;
+}
+
+async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
+{
+ return __async_schedule(ptr, data, &async_pending);
+}
+EXPORT_SYMBOL_GPL(async_schedule);
+
+async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running)
+{
+ return __async_schedule(ptr, data, running);
+}
+EXPORT_SYMBOL_GPL(async_schedule_special);
+
+void async_synchronize_full(void)
+{
+ do {
+ async_synchronize_cookie(next_cookie);
+ } while (!list_empty(&async_running) || !list_empty(&async_pending));
+}
+EXPORT_SYMBOL_GPL(async_synchronize_full);
+
+void async_synchronize_full_special(struct list_head *list)
+{
+ async_synchronize_cookie_special(next_cookie, list);
+}
+EXPORT_SYMBOL_GPL(async_synchronize_full_special);
+
+void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running)
+{
+ ktime_t starttime, delta, endtime;
+
+ if (initcall_debug && system_state == SYSTEM_BOOTING) {
+ printk("async_waiting @ %i\n", task_pid_nr(current));
+ starttime = ktime_get();
+ }
+
+ wait_event(async_done, lowest_in_progress(running) >= cookie);
+
+ if (initcall_debug && system_state == SYSTEM_BOOTING) {
+ endtime = ktime_get();
+ delta = ktime_sub(endtime, starttime);
+
+ printk("async_continuing @ %i after %lli usec\n",
+ task_pid_nr(current), ktime_to_ns(delta) >> 10);
+ }
+}
+EXPORT_SYMBOL_GPL(async_synchronize_cookie_special);
+
+void async_synchronize_cookie(async_cookie_t cookie)
+{
+ async_synchronize_cookie_special(cookie, &async_running);
+}
+EXPORT_SYMBOL_GPL(async_synchronize_cookie);
+
+
+static int async_thread(void *unused)
+{
+ DECLARE_WAITQUEUE(wq, current);
+ add_wait_queue(&async_new, &wq);
+
+ while (!kthread_should_stop()) {
+ int ret = HZ;
+ set_current_state(TASK_INTERRUPTIBLE);
+ /*
+ * check the list head without lock.. false positives
+ * are dealt with inside run_one_entry() while holding
+ * the lock.
+ */
+ rmb();
+ if (!list_empty(&async_pending))
+ run_one_entry();
+ else
+ ret = schedule_timeout(HZ);
+
+ if (ret == 0) {
+ /*
+ * we timed out, this means we as thread are redundant.
+ * we sign off and die, but we to avoid any races there
+ * is a last-straw check to see if work snuck in.
+ */
+ atomic_dec(&thread_count);
+ wmb(); /* manager must see our departure first */
+ if (list_empty(&async_pending))
+ break;
+ /*
+ * woops work came in between us timing out and us
+ * signing off; we need to stay alive and keep working.
+ */
+ atomic_inc(&thread_count);
+ }
+ }
+ remove_wait_queue(&async_new, &wq);
+
+ return 0;
+}
+
+static int async_manager_thread(void *unused)
+{
+ DECLARE_WAITQUEUE(wq, current);
+ add_wait_queue(&async_new, &wq);
+
+ while (!kthread_should_stop()) {
+ int tc, ec;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ tc = atomic_read(&thread_count);
+ rmb();
+ ec = atomic_read(&entry_count);
+
+ while (tc < ec && tc < MAX_THREADS) {
+ kthread_run(async_thread, NULL, "async/%i", tc);
+ atomic_inc(&thread_count);
+ tc++;
+ }
+
+ schedule();
+ }
+ remove_wait_queue(&async_new, &wq);
+
+ return 0;
+}
+
+static int __init async_init(void)
+{
+ if (async_enabled)
+ kthread_run(async_manager_thread, NULL, "async/mgr");
+ return 0;
+}
+
+static int __init setup_async(char *str)
+{
+ async_enabled = 1;
+ return 1;
+}
+
+__setup("fastboot", setup_async);
+
+
+core_initcall(async_init);
diff --git a/kernel/capability.c b/kernel/capability.c
index 688926e496b..4e17041963f 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -161,7 +161,7 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
*
* Returns 0 on success and < 0 on error.
*/
-asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
+SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
{
int ret = 0;
pid_t pid;
@@ -235,7 +235,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
*
* Returns 0 on success and < 0 on error.
*/
-asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
+SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
{
struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
unsigned i, tocopy;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f221446aa02..c29831076e7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -84,7 +84,7 @@ struct cgroupfs_root {
/* Tracks how many cgroups are currently defined in hierarchy.*/
int number_of_cgroups;
- /* A list running through the mounted hierarchies */
+ /* A list running through the active hierarchies */
struct list_head root_list;
/* Hierarchy-specific flags */
@@ -148,8 +148,8 @@ static int notify_on_release(const struct cgroup *cgrp)
#define for_each_subsys(_root, _ss) \
list_for_each_entry(_ss, &_root->subsys_list, sibling)
-/* for_each_root() allows you to iterate across the active hierarchies */
-#define for_each_root(_root) \
+/* for_each_active_root() allows you to iterate across the active hierarchies */
+#define for_each_active_root(_root) \
list_for_each_entry(_root, &roots, root_list)
/* the list of cgroups eligible for automatic release. Protected by
@@ -271,7 +271,7 @@ static void __put_css_set(struct css_set *cg, int taskexit)
rcu_read_lock();
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
- struct cgroup *cgrp = cg->subsys[i]->cgroup;
+ struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup);
if (atomic_dec_and_test(&cgrp->count) &&
notify_on_release(cgrp)) {
if (taskexit)
@@ -384,6 +384,25 @@ static int allocate_cg_links(int count, struct list_head *tmp)
return 0;
}
+/**
+ * link_css_set - a helper function to link a css_set to a cgroup
+ * @tmp_cg_links: cg_cgroup_link objects allocated by allocate_cg_links()
+ * @cg: the css_set to be linked
+ * @cgrp: the destination cgroup
+ */
+static void link_css_set(struct list_head *tmp_cg_links,
+ struct css_set *cg, struct cgroup *cgrp)
+{
+ struct cg_cgroup_link *link;
+
+ BUG_ON(list_empty(tmp_cg_links));
+ link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
+ cgrp_link_list);
+ link->cg = cg;
+ list_move(&link->cgrp_link_list, &cgrp->css_sets);
+ list_add(&link->cg_link_list, &cg->cg_links);
+}
+
/*
* find_css_set() takes an existing cgroup group and a
* cgroup object, and returns a css_set object that's
@@ -399,7 +418,6 @@ static struct css_set *find_css_set(
int i;
struct list_head tmp_cg_links;
- struct cg_cgroup_link *link;
struct hlist_head *hhead;
@@ -444,26 +462,11 @@ static struct css_set *find_css_set(
* only do it for the first subsystem in each
* hierarchy
*/
- if (ss->root->subsys_list.next == &ss->sibling) {
- BUG_ON(list_empty(&tmp_cg_links));
- link = list_entry(tmp_cg_links.next,
- struct cg_cgroup_link,
- cgrp_link_list);
- list_del(&link->cgrp_link_list);
- list_add(&link->cgrp_link_list, &cgrp->css_sets);
- link->cg = res;
- list_add(&link->cg_link_list, &res->cg_links);
- }
- }
- if (list_empty(&rootnode.subsys_list)) {
- link = list_entry(tmp_cg_links.next,
- struct cg_cgroup_link,
- cgrp_link_list);
- list_del(&link->cgrp_link_list);
- list_add(&link->cgrp_link_list, &dummytop->css_sets);
- link->cg = res;
- list_add(&link->cg_link_list, &res->cg_links);
+ if (ss->root->subsys_list.next == &ss->sibling)
+ link_css_set(&tmp_cg_links, res, cgrp);
}
+ if (list_empty(&rootnode.subsys_list))
+ link_css_set(&tmp_cg_links, res, dummytop);
BUG_ON(!list_empty(&tmp_cg_links));
@@ -586,11 +589,18 @@ static void cgroup_call_pre_destroy(struct cgroup *cgrp)
{
struct cgroup_subsys *ss;
for_each_subsys(cgrp->root, ss)
- if (ss->pre_destroy && cgrp->subsys[ss->subsys_id])
+ if (ss->pre_destroy)
ss->pre_destroy(ss, cgrp);
return;
}
+static void free_cgroup_rcu(struct rcu_head *obj)
+{
+ struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
+
+ kfree(cgrp);
+}
+
static void cgroup_diput(struct dentry *dentry, struct inode *inode)
{
/* is dentry a directory ? if so, kfree() associated cgroup */
@@ -610,19 +620,19 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
/*
* Release the subsystem state objects.
*/
- for_each_subsys(cgrp->root, ss) {
- if (cgrp->subsys[ss->subsys_id])
- ss->destroy(ss, cgrp);
- }
+ for_each_subsys(cgrp->root, ss)
+ ss->destroy(ss, cgrp);
cgrp->root->number_of_cgroups--;
mutex_unlock(&cgroup_mutex);
- /* Drop the active superblock reference that we took when we
- * created the cgroup */
+ /*
+ * Drop the active superblock reference that we took when we
+ * created the cgroup
+ */
deactivate_super(cgrp->root->sb);
- kfree(cgrp);
+ call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
}
iput(inode);
}
@@ -712,23 +722,26 @@ static int rebind_subsystems(struct cgroupfs_root *root,
BUG_ON(cgrp->subsys[i]);
BUG_ON(!dummytop->subsys[i]);
BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
+ mutex_lock(&ss->hierarchy_mutex);
cgrp->subsys[i] = dummytop->subsys[i];
cgrp->subsys[i]->cgroup = cgrp;
- list_add(&ss->sibling, &root->subsys_list);
- rcu_assign_pointer(ss->root, root);
+ list_move(&ss->sibling, &root->subsys_list);
+ ss->root = root;
if (ss->bind)
ss->bind(ss, cgrp);
-
+ mutex_unlock(&ss->hierarchy_mutex);
} else if (bit & removed_bits) {
/* We're removing this subsystem */
BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
+ mutex_lock(&ss->hierarchy_mutex);
if (ss->bind)
ss->bind(ss, dummytop);
dummytop->subsys[i]->cgroup = dummytop;
cgrp->subsys[i] = NULL;
- rcu_assign_pointer(subsys[i]->root, &rootnode);
- list_del(&ss->sibling);
+ subsys[i]->root = &rootnode;
+ list_move(&ss->sibling, &rootnode.subsys_list);
+ mutex_unlock(&ss->hierarchy_mutex);
} else if (bit & final_bits) {
/* Subsystem state should already exist */
BUG_ON(!cgrp->subsys[i]);
@@ -990,7 +1003,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
root = NULL;
} else {
/* New superblock */
- struct cgroup *cgrp = &root->top_cgroup;
+ struct cgroup *root_cgrp = &root->top_cgroup;
struct inode *inode;
int i;
@@ -1031,7 +1044,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
list_add(&root->root_list, &roots);
root_count++;
- sb->s_root->d_fsdata = &root->top_cgroup;
+ sb->s_root->d_fsdata = root_cgrp;
root->top_cgroup.dentry = sb->s_root;
/* Link the top cgroup in this hierarchy into all
@@ -1042,29 +1055,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
struct hlist_node *node;
struct css_set *cg;
- hlist_for_each_entry(cg, node, hhead, hlist) {
- struct cg_cgroup_link *link;
-
- BUG_ON(list_empty(&tmp_cg_links));
- link = list_entry(tmp_cg_links.next,
- struct cg_cgroup_link,
- cgrp_link_list);
- list_del(&link->cgrp_link_list);
- link->cg = cg;
- list_add(&link->cgrp_link_list,
- &root->top_cgroup.css_sets);
- list_add(&link->cg_link_list, &cg->cg_links);
- }
+ hlist_for_each_entry(cg, node, hhead, hlist)
+ link_css_set(&tmp_cg_links, cg, root_cgrp);
}
write_unlock(&css_set_lock);
free_cg_links(&tmp_cg_links);
- BUG_ON(!list_empty(&cgrp->sibling));
- BUG_ON(!list_empty(&cgrp->children));
+ BUG_ON(!list_empty(&root_cgrp->sibling));
+ BUG_ON(!list_empty(&root_cgrp->children));
BUG_ON(root->number_of_cgroups != 1);
- cgroup_populate_dir(cgrp);
+ cgroup_populate_dir(root_cgrp);
mutex_unlock(&inode->i_mutex);
mutex_unlock(&cgroup_mutex);
}
@@ -1113,10 +1115,9 @@ static void cgroup_kill_sb(struct super_block *sb) {
}
write_unlock(&css_set_lock);
- if (!list_empty(&root->root_list)) {
- list_del(&root->root_list);
- root_count--;
- }
+ list_del(&root->root_list);
+ root_count--;
+
mutex_unlock(&cgroup_mutex);
kfree(root);
@@ -1145,14 +1146,16 @@ static inline struct cftype *__d_cft(struct dentry *dentry)
* @buf: the buffer to write the path into
* @buflen: the length of the buffer
*
- * Called with cgroup_mutex held. Writes path of cgroup into buf.
- * Returns 0 on success, -errno on error.
+ * Called with cgroup_mutex held or else with an RCU-protected cgroup
+ * reference. Writes path of cgroup into buf. Returns 0 on success,
+ * -errno on error.
*/
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
{
char *start;
+ struct dentry *dentry = rcu_dereference(cgrp->dentry);
- if (cgrp == dummytop) {
+ if (!dentry || cgrp == dummytop) {
/*
* Inactive subsystems have no dentry for their root
* cgroup
@@ -1165,13 +1168,14 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
*--start = '\0';
for (;;) {
- int len = cgrp->dentry->d_name.len;
+ int len = dentry->d_name.len;
if ((start -= len) < buf)
return -ENAMETOOLONG;
memcpy(start, cgrp->dentry->d_name.name, len);
cgrp = cgrp->parent;
if (!cgrp)
break;
+ dentry = rcu_dereference(cgrp->dentry);
if (!cgrp->parent)
continue;
if (--start < buf)
@@ -1216,7 +1220,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
int retval = 0;
struct cgroup_subsys *ss;
struct cgroup *oldcgrp;
- struct css_set *cg = tsk->cgroups;
+ struct css_set *cg;
struct css_set *newcg;
struct cgroupfs_root *root = cgrp->root;
int subsys_id;
@@ -1236,11 +1240,16 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
}
}
+ task_lock(tsk);
+ cg = tsk->cgroups;
+ get_css_set(cg);
+ task_unlock(tsk);
/*
* Locate or allocate a new css_set for this task,
* based on its final set of cgroups
*/
newcg = find_css_set(cg, cgrp);
+ put_css_set(cg);
if (!newcg)
return -ENOMEM;
@@ -1445,7 +1454,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
struct cftype *cft = __d_cft(file->f_dentry);
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
- if (!cft || cgroup_is_removed(cgrp))
+ if (cgroup_is_removed(cgrp))
return -ENODEV;
if (cft->write)
return cft->write(cgrp, cft, file, buf, nbytes, ppos);
@@ -1490,7 +1499,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf,
struct cftype *cft = __d_cft(file->f_dentry);
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
- if (!cft || cgroup_is_removed(cgrp))
+ if (cgroup_is_removed(cgrp))
return -ENODEV;
if (cft->read)
@@ -1554,10 +1563,8 @@ static int cgroup_file_open(struct inode *inode, struct file *file)
err = generic_file_open(inode, file);
if (err)
return err;
-
cft = __d_cft(file->f_dentry);
- if (!cft)
- return -ENODEV;
+
if (cft->read_map || cft->read_seq_string) {
struct cgroup_seqfile_state *state =
kzalloc(sizeof(*state), GFP_USER);
@@ -1671,7 +1678,7 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
if (!error) {
dentry->d_fsdata = cgrp;
inc_nlink(parent->d_inode);
- cgrp->dentry = dentry;
+ rcu_assign_pointer(cgrp->dentry, dentry);
dget(dentry);
}
dput(dentry);
@@ -1812,6 +1819,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
{
struct task_struct *res;
struct list_head *l = it->task;
+ struct cg_cgroup_link *link;
/* If the iterator cg is NULL, we have no tasks */
if (!it->cg_link)
@@ -1819,7 +1827,8 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
res = list_entry(l, struct task_struct, cg_list);
/* Advance iterator to find next entry */
l = l->next;
- if (l == &res->cgroups->tasks) {
+ link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
+ if (l == &link->cg->tasks) {
/* We reached the end of this task list - move on to
* the next cg_cgroup_link */
cgroup_advance_iter(cgrp, it);
@@ -2013,14 +2022,16 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
*/
static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
{
- int n = 0;
+ int n = 0, pid;
struct cgroup_iter it;
struct task_struct *tsk;
cgroup_iter_start(cgrp, &it);
while ((tsk = cgroup_iter_next(cgrp, &it))) {
if (unlikely(n == npids))
break;
- pidarray[n++] = task_pid_vnr(tsk);
+ pid = task_pid_vnr(tsk);
+ if (pid > 0)
+ pidarray[n++] = pid;
}
cgroup_iter_end(cgrp, &it);
return n;
@@ -2052,7 +2063,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
ret = 0;
cgrp = dentry->d_fsdata;
- rcu_read_lock();
cgroup_iter_start(cgrp, &it);
while ((tsk = cgroup_iter_next(cgrp, &it))) {
@@ -2077,7 +2087,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
}
cgroup_iter_end(cgrp, &it);
- rcu_read_unlock();
err:
return ret;
}
@@ -2324,7 +2333,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
struct cgroup *cgrp)
{
css->cgroup = cgrp;
- atomic_set(&css->refcnt, 0);
+ atomic_set(&css->refcnt, 1);
css->flags = 0;
if (cgrp == dummytop)
set_bit(CSS_ROOT, &css->flags);
@@ -2332,6 +2341,29 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
cgrp->subsys[ss->subsys_id] = css;
}
+static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
+{
+ /* We need to take each hierarchy_mutex in a consistent order */
+ int i;
+
+ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+ struct cgroup_subsys *ss = subsys[i];
+ if (ss->root == root)
+ mutex_lock_nested(&ss->hierarchy_mutex, i);
+ }
+}
+
+static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
+{
+ int i;
+
+ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+ struct cgroup_subsys *ss = subsys[i];
+ if (ss->root == root)
+ mutex_unlock(&ss->hierarchy_mutex);
+ }
+}
+
/*
* cgroup_create - create a cgroup
* @parent: cgroup that will be parent of the new cgroup
@@ -2380,7 +2412,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
init_cgroup_css(css, ss, cgrp);
}
+ cgroup_lock_hierarchy(root);
list_add(&cgrp->sibling, &cgrp->parent->children);
+ cgroup_unlock_hierarchy(root);
root->number_of_cgroups++;
err = cgroup_create_dir(cgrp, dentry, mode);
@@ -2431,7 +2465,7 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
{
/* Check the reference count on each subsystem. Since we
* already established that there are no tasks in the
- * cgroup, if the css refcount is also 0, then there should
+ * cgroup, if the css refcount is also 1, then there should
* be no outstanding references, so the subsystem is safe to
* destroy. We scan across all subsystems rather than using
* the per-hierarchy linked list of mounted subsystems since
@@ -2452,19 +2486,67 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
* matter, since it can only happen if the cgroup
* has been deleted and hence no longer needs the
* release agent to be called anyway. */
- if (css && atomic_read(&css->refcnt))
+ if (css && (atomic_read(&css->refcnt) > 1))
return 1;
}
return 0;
}
+/*
+ * Atomically mark all (or else none) of the cgroup's CSS objects as
+ * CSS_REMOVED. Return true on success, or false if the cgroup has
+ * busy subsystems. Call with cgroup_mutex held
+ */
+
+static int cgroup_clear_css_refs(struct cgroup *cgrp)
+{
+ struct cgroup_subsys *ss;
+ unsigned long flags;
+ bool failed = false;
+ local_irq_save(flags);
+ for_each_subsys(cgrp->root, ss) {
+ struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+ int refcnt;
+ do {
+ /* We can only remove a CSS with a refcnt==1 */
+ refcnt = atomic_read(&css->refcnt);
+ if (refcnt > 1) {
+ failed = true;
+ goto done;
+ }
+ BUG_ON(!refcnt);
+ /*
+ * Drop the refcnt to 0 while we check other
+ * subsystems. This will cause any racing
+ * css_tryget() to spin until we set the
+ * CSS_REMOVED bits or abort
+ */
+ } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt);
+ }
+ done:
+ for_each_subsys(cgrp->root, ss) {
+ struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+ if (failed) {
+ /*
+ * Restore old refcnt if we previously managed
+ * to clear it from 1 to 0
+ */
+ if (!atomic_read(&css->refcnt))
+ atomic_set(&css->refcnt, 1);
+ } else {
+ /* Commit the fact that the CSS is removed */
+ set_bit(CSS_REMOVED, &css->flags);
+ }
+ }
+ local_irq_restore(flags);
+ return !failed;
+}
+
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
{
struct cgroup *cgrp = dentry->d_fsdata;
struct dentry *d;
struct cgroup *parent;
- struct super_block *sb;
- struct cgroupfs_root *root;
/* the vfs holds both inode->i_mutex already */
@@ -2487,12 +2569,10 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
mutex_lock(&cgroup_mutex);
parent = cgrp->parent;
- root = cgrp->root;
- sb = root->sb;
if (atomic_read(&cgrp->count)
|| !list_empty(&cgrp->children)
- || cgroup_has_css_refs(cgrp)) {
+ || !cgroup_clear_css_refs(cgrp)) {
mutex_unlock(&cgroup_mutex);
return -EBUSY;
}
@@ -2502,8 +2582,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
if (!list_empty(&cgrp->release_list))
list_del(&cgrp->release_list);
spin_unlock(&release_list_lock);
- /* delete my sibling from parent->children */
+
+ cgroup_lock_hierarchy(cgrp->root);
+ /* delete this cgroup from parent->children */
list_del(&cgrp->sibling);
+ cgroup_unlock_hierarchy(cgrp->root);
+
spin_lock(&cgrp->dentry->d_lock);
d = dget(cgrp->dentry);
spin_unlock(&d->d_lock);
@@ -2525,6 +2609,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
/* Create the top cgroup state for this subsystem */
+ list_add(&ss->sibling, &rootnode.subsys_list);
ss->root = &rootnode;
css = ss->create(ss, dummytop);
/* We don't handle early failures gracefully */
@@ -2544,6 +2629,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
* need to invoke fork callbacks here. */
BUG_ON(!list_empty(&init_task.tasks));
+ mutex_init(&ss->hierarchy_mutex);
ss->active = 1;
}
@@ -2562,7 +2648,6 @@ int __init cgroup_init_early(void)
INIT_HLIST_NODE(&init_css_set.hlist);
css_set_count = 1;
init_cgroup_root(&rootnode);
- list_add(&rootnode.root_list, &roots);
root_count = 1;
init_task.cgroups = &init_css_set;
@@ -2669,15 +2754,12 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
mutex_lock(&cgroup_mutex);
- for_each_root(root) {
+ for_each_active_root(root) {
struct cgroup_subsys *ss;
struct cgroup *cgrp;
int subsys_id;
int count = 0;
- /* Skip this hierarchy if it has no active subsystems */
- if (!root->actual_subsys_bits)
- continue;
seq_printf(m, "%lu:", root->subsys_bits);
for_each_subsys(root, ss)
seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
@@ -2800,8 +2882,10 @@ void cgroup_post_fork(struct task_struct *child)
{
if (use_task_css_set_links) {
write_lock(&css_set_lock);
+ task_lock(child);
if (list_empty(&child->cg_list))
list_add(&child->cg_list, &child->cgroups->tasks);
+ task_unlock(child);
write_unlock(&css_set_lock);
}
}
@@ -2907,6 +2991,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
mutex_unlock(&cgroup_mutex);
return 0;
}
+ task_lock(tsk);
cg = tsk->cgroups;
parent = task_cgroup(tsk, subsys->subsys_id);
@@ -2919,6 +3004,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
/* Keep the cgroup alive */
get_css_set(cg);
+ task_unlock(tsk);
mutex_unlock(&cgroup_mutex);
/* Now do the VFS work to create a cgroup */
@@ -2937,7 +3023,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
}
/* Create the cgroup directory, which also creates the cgroup */
- ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
+ ret = vfs_mkdir(inode, dentry, 0755);
child = __d_cgrp(dentry);
dput(dentry);
if (ret) {
@@ -2947,13 +3033,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
goto out_release;
}
- if (!child) {
- printk(KERN_INFO
- "Couldn't find new cgroup %s\n", nodename);
- ret = -ENOMEM;
- goto out_release;
- }
-
/* The cgroup now exists. Retake cgroup_mutex and check
* that we're still in the same state that we thought we
* were. */
@@ -3049,7 +3128,8 @@ void __css_put(struct cgroup_subsys_state *css)
{
struct cgroup *cgrp = css->cgroup;
rcu_read_lock();
- if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) {
+ if ((atomic_dec_return(&css->refcnt) == 1) &&
+ notify_on_release(cgrp)) {
set_bit(CGRP_RELEASABLE, &cgrp->flags);
check_for_release(cgrp);
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 30e74dd6d01..79e40f00dcb 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -379,8 +379,11 @@ static cpumask_var_t frozen_cpus;
int disable_nonboot_cpus(void)
{
- int cpu, first_cpu, error = 0;
+ int cpu, first_cpu, error;
+ error = stop_machine_create();
+ if (error)
+ return error;
cpu_maps_update_begin();
first_cpu = cpumask_first(cpu_online_mask);
/* We take down all of the non-boot CPUs in one shot to avoid races
@@ -409,6 +412,7 @@ int disable_nonboot_cpus(void)
printk(KERN_ERR "Non-boot CPUs are not disabled\n");
}
cpu_maps_update_done();
+ stop_machine_destroy();
return error;
}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 345ace5117d..a85678865c5 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -84,7 +84,7 @@ struct cpuset {
struct cgroup_subsys_state css;
unsigned long flags; /* "unsigned long" so bitops work */
- cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
+ cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */
struct cpuset *parent; /* my parent */
@@ -195,8 +195,6 @@ static int cpuset_mems_generation;
static struct cpuset top_cpuset = {
.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
- .cpus_allowed = CPU_MASK_ALL,
- .mems_allowed = NODE_MASK_ALL,
};
/*
@@ -278,7 +276,7 @@ static struct file_system_type cpuset_fs_type = {
};
/*
- * Return in *pmask the portion of a cpusets's cpus_allowed that
+ * Return in pmask the portion of a cpusets's cpus_allowed that
* are online. If none are online, walk up the cpuset hierarchy
* until we find one that does have some online cpus. If we get
* all the way to the top and still haven't found any online cpus,
@@ -291,15 +289,16 @@ static struct file_system_type cpuset_fs_type = {
* Call with callback_mutex held.
*/
-static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
+static void guarantee_online_cpus(const struct cpuset *cs,
+ struct cpumask *pmask)
{
- while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map))
+ while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask))
cs = cs->parent;
if (cs)
- cpus_and(*pmask, cs->cpus_allowed, cpu_online_map);
+ cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask);
else
- *pmask = cpu_online_map;
- BUG_ON(!cpus_intersects(*pmask, cpu_online_map));
+ cpumask_copy(pmask, cpu_online_mask);
+ BUG_ON(!cpumask_intersects(pmask, cpu_online_mask));
}
/*
@@ -375,14 +374,9 @@ void cpuset_update_task_memory_state(void)
struct task_struct *tsk = current;
struct cpuset *cs;
- if (task_cs(tsk) == &top_cpuset) {
- /* Don't need rcu for top_cpuset. It's never freed. */
- my_cpusets_mem_gen = top_cpuset.mems_generation;
- } else {
- rcu_read_lock();
- my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
- rcu_read_unlock();
- }
+ rcu_read_lock();
+ my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
+ rcu_read_unlock();
if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
mutex_lock(&callback_mutex);
@@ -414,12 +408,43 @@ void cpuset_update_task_memory_state(void)
static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
{
- return cpus_subset(p->cpus_allowed, q->cpus_allowed) &&
+ return cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
nodes_subset(p->mems_allowed, q->mems_allowed) &&
is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
is_mem_exclusive(p) <= is_mem_exclusive(q);
}
+/**
+ * alloc_trial_cpuset - allocate a trial cpuset
+ * @cs: the cpuset that the trial cpuset duplicates
+ */
+static struct cpuset *alloc_trial_cpuset(const struct cpuset *cs)
+{
+ struct cpuset *trial;
+
+ trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
+ if (!trial)
+ return NULL;
+
+ if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) {
+ kfree(trial);
+ return NULL;
+ }
+ cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
+
+ return trial;
+}
+
+/**
+ * free_trial_cpuset - free the trial cpuset
+ * @trial: the trial cpuset to be freed
+ */
+static void free_trial_cpuset(struct cpuset *trial)
+{
+ free_cpumask_var(trial->cpus_allowed);
+ kfree(trial);
+}
+
/*
* validate_change() - Used to validate that any proposed cpuset change
* follows the structural rules for cpusets.
@@ -469,7 +494,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
c = cgroup_cs(cont);
if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
c != cur &&
- cpus_intersects(trial->cpus_allowed, c->cpus_allowed))
+ cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
return -EINVAL;
if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
c != cur &&
@@ -479,7 +504,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
/* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */
if (cgroup_task_count(cur->css.cgroup)) {
- if (cpus_empty(trial->cpus_allowed) ||
+ if (cpumask_empty(trial->cpus_allowed) ||
nodes_empty(trial->mems_allowed)) {
return -ENOSPC;
}
@@ -494,7 +519,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
*/
static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
{
- return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
+ return cpumask_intersects(a->cpus_allowed, b->cpus_allowed);
}
static void
@@ -519,7 +544,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
cp = list_first_entry(&q, struct cpuset, stack_list);
list_del(q.next);
- if (cpus_empty(cp->cpus_allowed))
+ if (cpumask_empty(cp->cpus_allowed))
continue;
if (is_sched_load_balance(cp))
@@ -543,7 +568,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
* load balancing domains (sched domains) as specified by that partial
* partition.
*
- * See "What is sched_load_balance" in Documentation/cpusets.txt
+ * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt
* for a background explanation of this.
*
* Does not return errors, on the theory that the callers of this
@@ -586,7 +611,8 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
* element of the partition (one sched domain) to be passed to
* partition_sched_domains().
*/
-static int generate_sched_domains(cpumask_t **domains,
+/* FIXME: see the FIXME in partition_sched_domains() */
+static int generate_sched_domains(struct cpumask **domains,
struct sched_domain_attr **attributes)
{
LIST_HEAD(q); /* queue of cpusets to be scanned */
@@ -594,10 +620,10 @@ static int generate_sched_domains(cpumask_t **domains,
struct cpuset **csa; /* array of all cpuset ptrs */
int csn; /* how many cpuset ptrs in csa so far */
int i, j, k; /* indices for partition finding loops */
- cpumask_t *doms; /* resulting partition; i.e. sched domains */
+ struct cpumask *doms; /* resulting partition; i.e. sched domains */
struct sched_domain_attr *dattr; /* attributes for custom domains */
int ndoms = 0; /* number of sched domains in result */
- int nslot; /* next empty doms[] cpumask_t slot */
+ int nslot; /* next empty doms[] struct cpumask slot */
doms = NULL;
dattr = NULL;
@@ -605,7 +631,7 @@ static int generate_sched_domains(cpumask_t **domains,
/* Special case for the 99% of systems with one, full, sched domain */
if (is_sched_load_balance(&top_cpuset)) {
- doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+ doms = kmalloc(cpumask_size(), GFP_KERNEL);
if (!doms)
goto done;
@@ -614,7 +640,7 @@ static int generate_sched_domains(cpumask_t **domains,
*dattr = SD_ATTR_INIT;
update_domain_attr_tree(dattr, &top_cpuset);
}
- *doms = top_cpuset.cpus_allowed;
+ cpumask_copy(doms, top_cpuset.cpus_allowed);
ndoms = 1;
goto done;
@@ -633,7 +659,7 @@ static int generate_sched_domains(cpumask_t **domains,
cp = list_first_entry(&q, struct cpuset, stack_list);
list_del(q.next);
- if (cpus_empty(cp->cpus_allowed))
+ if (cpumask_empty(cp->cpus_allowed))
continue;
/*
@@ -684,7 +710,7 @@ restart:
* Now we know how many domains to create.
* Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
*/
- doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
+ doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL);
if (!doms)
goto done;
@@ -696,7 +722,7 @@ restart:
for (nslot = 0, i = 0; i < csn; i++) {
struct cpuset *a = csa[i];
- cpumask_t *dp;
+ struct cpumask *dp;
int apn = a->pn;
if (apn < 0) {
@@ -719,14 +745,14 @@ restart:
continue;
}
- cpus_clear(*dp);
+ cpumask_clear(dp);
if (dattr)
*(dattr + nslot) = SD_ATTR_INIT;
for (j = i; j < csn; j++) {
struct cpuset *b = csa[j];
if (apn == b->pn) {
- cpus_or(*dp, *dp, b->cpus_allowed);
+ cpumask_or(dp, dp, b->cpus_allowed);
if (dattr)
update_domain_attr_tree(dattr + nslot, b);
@@ -766,7 +792,7 @@ done:
static void do_rebuild_sched_domains(struct work_struct *unused)
{
struct sched_domain_attr *attr;
- cpumask_t *doms;
+ struct cpumask *doms;
int ndoms;
get_online_cpus();
@@ -835,7 +861,7 @@ void rebuild_sched_domains(void)
static int cpuset_test_cpumask(struct task_struct *tsk,
struct cgroup_scanner *scan)
{
- return !cpus_equal(tsk->cpus_allowed,
+ return !cpumask_equal(&tsk->cpus_allowed,
(cgroup_cs(scan->cg))->cpus_allowed);
}
@@ -853,7 +879,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk,
static void cpuset_change_cpumask(struct task_struct *tsk,
struct cgroup_scanner *scan)
{
- set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));
+ set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed));
}
/**
@@ -885,10 +911,10 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
* @cs: the cpuset to consider
* @buf: buffer of cpu numbers written to this cpuset
*/
-static int update_cpumask(struct cpuset *cs, const char *buf)
+static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+ const char *buf)
{
struct ptr_heap heap;
- struct cpuset trialcs;
int retval;
int is_load_balanced;
@@ -896,8 +922,6 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
if (cs == &top_cpuset)
return -EACCES;
- trialcs = *cs;
-
/*
* An empty cpus_allowed is ok only if the cpuset has no tasks.
* Since cpulist_parse() fails on an empty mask, we special case
@@ -905,31 +929,31 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
* with tasks have cpus.
*/
if (!*buf) {
- cpus_clear(trialcs.cpus_allowed);
+ cpumask_clear(trialcs->cpus_allowed);
} else {
- retval = cpulist_parse(buf, &trialcs.cpus_allowed);
+ retval = cpulist_parse(buf, trialcs->cpus_allowed);
if (retval < 0)
return retval;
- if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map))
+ if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask))
return -EINVAL;
}
- retval = validate_change(cs, &trialcs);
+ retval = validate_change(cs, trialcs);
if (retval < 0)
return retval;
/* Nothing to do if the cpus didn't change */
- if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
+ if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
return 0;
retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
if (retval)
return retval;
- is_load_balanced = is_sched_load_balance(&trialcs);
+ is_load_balanced = is_sched_load_balance(trialcs);
mutex_lock(&callback_mutex);
- cs->cpus_allowed = trialcs.cpus_allowed;
+ cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
mutex_unlock(&callback_mutex);
/*
@@ -1017,7 +1041,7 @@ static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
fudge = 10; /* spare mmarray[] slots */
- fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
+ fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */
retval = -ENOMEM;
/*
@@ -1104,9 +1128,9 @@ done:
* lock each such tasks mm->mmap_sem, scan its vma's and rebind
* their mempolicies to the cpusets new mems_allowed.
*/
-static int update_nodemask(struct cpuset *cs, const char *buf)
+static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
+ const char *buf)
{
- struct cpuset trialcs;
nodemask_t oldmem;
int retval;
@@ -1117,8 +1141,6 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
if (cs == &top_cpuset)
return -EACCES;
- trialcs = *cs;
-
/*
* An empty mems_allowed is ok iff there are no tasks in the cpuset.
* Since nodelist_parse() fails on an empty mask, we special case
@@ -1126,27 +1148,27 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
* with tasks have memory.
*/
if (!*buf) {
- nodes_clear(trialcs.mems_allowed);
+ nodes_clear(trialcs->mems_allowed);
} else {
- retval = nodelist_parse(buf, trialcs.mems_allowed);
+ retval = nodelist_parse(buf, trialcs->mems_allowed);
if (retval < 0)
goto done;
- if (!nodes_subset(trialcs.mems_allowed,
+ if (!nodes_subset(trialcs->mems_allowed,
node_states[N_HIGH_MEMORY]))
return -EINVAL;
}
oldmem = cs->mems_allowed;
- if (nodes_equal(oldmem, trialcs.mems_allowed)) {
+ if (nodes_equal(oldmem, trialcs->mems_allowed)) {
retval = 0; /* Too easy - nothing to do */
goto done;
}
- retval = validate_change(cs, &trialcs);
+ retval = validate_change(cs, trialcs);
if (retval < 0)
goto done;
mutex_lock(&callback_mutex);
- cs->mems_allowed = trialcs.mems_allowed;
+ cs->mems_allowed = trialcs->mems_allowed;
cs->mems_generation = cpuset_mems_generation++;
mutex_unlock(&callback_mutex);
@@ -1167,7 +1189,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
if (val != cs->relax_domain_level) {
cs->relax_domain_level = val;
- if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
+ if (!cpumask_empty(cs->cpus_allowed) &&
+ is_sched_load_balance(cs))
async_rebuild_sched_domains();
}
@@ -1186,31 +1209,36 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
int turning_on)
{
- struct cpuset trialcs;
+ struct cpuset *trialcs;
int err;
int balance_flag_changed;
- trialcs = *cs;
+ trialcs = alloc_trial_cpuset(cs);
+ if (!trialcs)
+ return -ENOMEM;
+
if (turning_on)
- set_bit(bit, &trialcs.flags);
+ set_bit(bit, &trialcs->flags);
else
- clear_bit(bit, &trialcs.flags);
+ clear_bit(bit, &trialcs->flags);
- err = validate_change(cs, &trialcs);
+ err = validate_change(cs, trialcs);
if (err < 0)
- return err;
+ goto out;
balance_flag_changed = (is_sched_load_balance(cs) !=
- is_sched_load_balance(&trialcs));
+ is_sched_load_balance(trialcs));
mutex_lock(&callback_mutex);
- cs->flags = trialcs.flags;
+ cs->flags = trialcs->flags;
mutex_unlock(&callback_mutex);
- if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed)
+ if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
async_rebuild_sched_domains();
- return 0;
+out:
+ free_trial_cpuset(trialcs);
+ return err;
}
/*
@@ -1311,42 +1339,47 @@ static int fmeter_getrate(struct fmeter *fmp)
return val;
}
+/* Protected by cgroup_lock */
+static cpumask_var_t cpus_attach;
+
/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
static int cpuset_can_attach(struct cgroup_subsys *ss,
struct cgroup *cont, struct task_struct *tsk)
{
struct cpuset *cs = cgroup_cs(cont);
+ int ret = 0;
- if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
+ if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
return -ENOSPC;
- if (tsk->flags & PF_THREAD_BOUND) {
- cpumask_t mask;
+ if (tsk->flags & PF_THREAD_BOUND) {
mutex_lock(&callback_mutex);
- mask = cs->cpus_allowed;
+ if (!cpumask_equal(&tsk->cpus_allowed, cs->cpus_allowed))
+ ret = -EINVAL;
mutex_unlock(&callback_mutex);
- if (!cpus_equal(tsk->cpus_allowed, mask))
- return -EINVAL;
}
- return security_task_setscheduler(tsk, 0, NULL);
+ return ret < 0 ? ret : security_task_setscheduler(tsk, 0, NULL);
}
static void cpuset_attach(struct cgroup_subsys *ss,
struct cgroup *cont, struct cgroup *oldcont,
struct task_struct *tsk)
{
- cpumask_t cpus;
nodemask_t from, to;
struct mm_struct *mm;
struct cpuset *cs = cgroup_cs(cont);
struct cpuset *oldcs = cgroup_cs(oldcont);
int err;
- mutex_lock(&callback_mutex);
- guarantee_online_cpus(cs, &cpus);
- err = set_cpus_allowed_ptr(tsk, &cpus);
- mutex_unlock(&callback_mutex);
+ if (cs == &top_cpuset) {
+ cpumask_copy(cpus_attach, cpu_possible_mask);
+ } else {
+ mutex_lock(&callback_mutex);
+ guarantee_online_cpus(cs, cpus_attach);
+ mutex_unlock(&callback_mutex);
+ }
+ err = set_cpus_allowed_ptr(tsk, cpus_attach);
if (err)
return;
@@ -1359,7 +1392,6 @@ static void cpuset_attach(struct cgroup_subsys *ss,
cpuset_migrate_mm(mm, &from, &to);
mmput(mm);
}
-
}
/* The various types of files and directories in a cpuset file system */
@@ -1454,21 +1486,29 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
const char *buf)
{
int retval = 0;
+ struct cpuset *cs = cgroup_cs(cgrp);
+ struct cpuset *trialcs;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
+ trialcs = alloc_trial_cpuset(cs);
+ if (!trialcs)
+ return -ENOMEM;
+
switch (cft->private) {
case FILE_CPULIST:
- retval = update_cpumask(cgroup_cs(cgrp), buf);
+ retval = update_cpumask(cs, trialcs, buf);
break;
case FILE_MEMLIST:
- retval = update_nodemask(cgroup_cs(cgrp), buf);
+ retval = update_nodemask(cs, trialcs, buf);
break;
default:
retval = -EINVAL;
break;
}
+
+ free_trial_cpuset(trialcs);
cgroup_unlock();
return retval;
}
@@ -1487,13 +1527,13 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
{
- cpumask_t mask;
+ int ret;
mutex_lock(&callback_mutex);
- mask = cs->cpus_allowed;
+ ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
mutex_unlock(&callback_mutex);
- return cpulist_scnprintf(page, PAGE_SIZE, &mask);
+ return ret;
}
static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
@@ -1729,7 +1769,7 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
parent_cs = cgroup_cs(parent);
cs->mems_allowed = parent_cs->mems_allowed;
- cs->cpus_allowed = parent_cs->cpus_allowed;
+ cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
return;
}
@@ -1755,6 +1795,10 @@ static struct cgroup_subsys_state *cpuset_create(
cs = kmalloc(sizeof(*cs), GFP_KERNEL);
if (!cs)
return ERR_PTR(-ENOMEM);
+ if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) {
+ kfree(cs);
+ return ERR_PTR(-ENOMEM);
+ }
cpuset_update_task_memory_state();
cs->flags = 0;
@@ -1763,7 +1807,7 @@ static struct cgroup_subsys_state *cpuset_create(
if (is_spread_slab(parent))
set_bit(CS_SPREAD_SLAB, &cs->flags);
set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
- cpus_clear(cs->cpus_allowed);
+ cpumask_clear(cs->cpus_allowed);
nodes_clear(cs->mems_allowed);
cs->mems_generation = cpuset_mems_generation++;
fmeter_init(&cs->fmeter);
@@ -1790,6 +1834,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
number_of_cpusets--;
+ free_cpumask_var(cs->cpus_allowed);
kfree(cs);
}
@@ -1813,6 +1858,8 @@ struct cgroup_subsys cpuset_subsys = {
int __init cpuset_init_early(void)
{
+ alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed);
+
top_cpuset.mems_generation = cpuset_mems_generation++;
return 0;
}
@@ -1828,7 +1875,7 @@ int __init cpuset_init(void)
{
int err = 0;
- cpus_setall(top_cpuset.cpus_allowed);
+ cpumask_setall(top_cpuset.cpus_allowed);
nodes_setall(top_cpuset.mems_allowed);
fmeter_init(&top_cpuset.fmeter);
@@ -1840,6 +1887,9 @@ int __init cpuset_init(void)
if (err < 0)
return err;
+ if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
+ BUG();
+
number_of_cpusets = 1;
return 0;
}
@@ -1914,7 +1964,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
* has online cpus, so can't be empty).
*/
parent = cs->parent;
- while (cpus_empty(parent->cpus_allowed) ||
+ while (cpumask_empty(parent->cpus_allowed) ||
nodes_empty(parent->mems_allowed))
parent = parent->parent;
@@ -1955,7 +2005,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
}
/* Continue past cpusets with all cpus, mems online */
- if (cpus_subset(cp->cpus_allowed, cpu_online_map) &&
+ if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) &&
nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
continue;
@@ -1963,13 +2013,14 @@ static void scan_for_empty_cpusets(struct cpuset *root)
/* Remove offline cpus and mems from this cpuset. */
mutex_lock(&callback_mutex);
- cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
+ cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
+ cpu_online_mask);
nodes_and(cp->mems_allowed, cp->mems_allowed,
node_states[N_HIGH_MEMORY]);
mutex_unlock(&callback_mutex);
/* Move tasks from the empty cpuset to a parent */
- if (cpus_empty(cp->cpus_allowed) ||
+ if (cpumask_empty(cp->cpus_allowed) ||
nodes_empty(cp->mems_allowed))
remove_tasks_in_empty_cpuset(cp);
else {
@@ -1995,7 +2046,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
unsigned long phase, void *unused_cpu)
{
struct sched_domain_attr *attr;
- cpumask_t *doms;
+ struct cpumask *doms;
int ndoms;
switch (phase) {
@@ -2010,7 +2061,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
}
cgroup_lock();
- top_cpuset.cpus_allowed = cpu_online_map;
+ cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
scan_for_empty_cpusets(&top_cpuset);
ndoms = generate_sched_domains(&doms, &attr);
cgroup_unlock();
@@ -2055,7 +2106,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
void __init cpuset_init_smp(void)
{
- top_cpuset.cpus_allowed = cpu_online_map;
+ cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
hotcpu_notifier(cpuset_track_online_cpus, 0);
@@ -2065,15 +2116,15 @@ void __init cpuset_init_smp(void)
/**
* cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
* @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
- * @pmask: pointer to cpumask_t variable to receive cpus_allowed set.
+ * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
*
- * Description: Returns the cpumask_t cpus_allowed of the cpuset
+ * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
* attached to the specified @tsk. Guaranteed to return some non-empty
* subset of cpu_online_map, even if this means going outside the
* tasks cpuset.
**/
-void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
+void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
{
mutex_lock(&callback_mutex);
cpuset_cpus_allowed_locked(tsk, pmask);
@@ -2084,7 +2135,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
* cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
* Must be called with callback_mutex held.
**/
-void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask)
+void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
{
task_lock(tsk);
guarantee_online_cpus(task_cs(tsk), pmask);
diff --git a/kernel/cred.c b/kernel/cred.c
index ff7bc071991..3a039189d70 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -372,7 +372,8 @@ int commit_creds(struct cred *new)
old->fsuid != new->fsuid ||
old->fsgid != new->fsgid ||
!cap_issubset(new->cap_permitted, old->cap_permitted)) {
- set_dumpable(task->mm, suid_dumpable);
+ if (task->mm)
+ set_dumpable(task->mm, suid_dumpable);
task->pdeath_signal = 0;
smp_wmb();
}
@@ -506,6 +507,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
else
old = get_cred(&init_cred);
+ *new = *old;
get_uid(new->user);
get_group_info(new->group_info);
@@ -529,6 +531,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
error:
put_cred(new);
+ put_cred(old);
return NULL;
}
EXPORT_SYMBOL(prepare_kernel_cred);
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 0511716e942..667c841c295 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -209,8 +209,7 @@ static int __init proc_execdomains_init(void)
module_init(proc_execdomains_init);
#endif
-asmlinkage long
-sys_personality(u_long personality)
+SYSCALL_DEFINE1(personality, u_long, personality)
{
u_long old = current->personality;
diff --git a/kernel/exit.c b/kernel/exit.c
index c7740fa3252..f80dec3f187 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1141,7 +1141,7 @@ NORET_TYPE void complete_and_exit(struct completion *comp, long code)
EXPORT_SYMBOL(complete_and_exit);
-asmlinkage long sys_exit(int error_code)
+SYSCALL_DEFINE1(exit, int, error_code)
{
do_exit((error_code&0xff)<<8);
}
@@ -1182,9 +1182,11 @@ do_group_exit(int exit_code)
* wait4()-ing process will get the correct exit code - even if this
* thread is not the thread group leader.
*/
-asmlinkage void sys_exit_group(int error_code)
+SYSCALL_DEFINE1(exit_group, int, error_code)
{
do_group_exit((error_code & 0xff) << 8);
+ /* NOTREACHED */
+ return 0;
}
static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
@@ -1752,9 +1754,8 @@ end:
return retval;
}
-asmlinkage long sys_waitid(int which, pid_t upid,
- struct siginfo __user *infop, int options,
- struct rusage __user *ru)
+SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
+ infop, int, options, struct rusage __user *, ru)
{
struct pid *pid = NULL;
enum pid_type type;
@@ -1793,8 +1794,8 @@ asmlinkage long sys_waitid(int which, pid_t upid,
return ret;
}
-asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr,
- int options, struct rusage __user *ru)
+SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
+ int, options, struct rusage __user *, ru)
{
struct pid *pid = NULL;
enum pid_type type;
@@ -1831,7 +1832,7 @@ asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr,
* sys_waitpid() remains for compatibility. waitpid() should be
* implemented by calling sys_wait4() from libc.a.
*/
-asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options)
+SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)
{
return sys_wait4(pid, stat_addr, options, NULL);
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 7b8f2a78be3..bf0cef8bbdf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -901,7 +901,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
clear_freeze_flag(p);
}
-asmlinkage long sys_set_tid_address(int __user *tidptr)
+SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
{
current->clear_child_tid = tidptr;
@@ -1126,12 +1126,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (pid != &init_struct_pid) {
retval = -ENOMEM;
- pid = alloc_pid(task_active_pid_ns(p));
+ pid = alloc_pid(p->nsproxy->pid_ns);
if (!pid)
goto bad_fork_cleanup_io;
if (clone_flags & CLONE_NEWPID) {
- retval = pid_ns_prepare_proc(task_active_pid_ns(p));
+ retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
if (retval < 0)
goto bad_fork_free_pid;
}
@@ -1481,12 +1481,10 @@ void __init proc_caches_init(void)
fs_cachep = kmem_cache_create("fs_cache",
sizeof(struct fs_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
- vm_area_cachep = kmem_cache_create("vm_area_struct",
- sizeof(struct vm_area_struct), 0,
- SLAB_PANIC, NULL);
mm_cachep = kmem_cache_create("mm_struct",
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+ mmap_init();
}
/*
@@ -1605,7 +1603,7 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp
* constructed. Here we are modifying the current, active,
* task_struct.
*/
-asmlinkage long sys_unshare(unsigned long unshare_flags)
+SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
{
int err = 0;
struct fs_struct *fs, *new_fs = NULL;
diff --git a/kernel/futex.c b/kernel/futex.c
index 002aa189eb0..f89d373a9c6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1733,9 +1733,8 @@ pi_faulted:
* @head: pointer to the list-head
* @len: length of the list-head, as userspace expects
*/
-asmlinkage long
-sys_set_robust_list(struct robust_list_head __user *head,
- size_t len)
+SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
+ size_t, len)
{
if (!futex_cmpxchg_enabled)
return -ENOSYS;
@@ -1756,9 +1755,9 @@ sys_set_robust_list(struct robust_list_head __user *head,
* @head_ptr: pointer to a list-head pointer, the kernel fills it in
* @len_ptr: pointer to a length field, the kernel fills in the header size
*/
-asmlinkage long
-sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr,
- size_t __user *len_ptr)
+SYSCALL_DEFINE3(get_robust_list, int, pid,
+ struct robust_list_head __user * __user *, head_ptr,
+ size_t __user *, len_ptr)
{
struct robust_list_head __user *head;
unsigned long ret;
@@ -1978,9 +1977,9 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
}
-asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
- struct timespec __user *utime, u32 __user *uaddr2,
- u32 val3)
+SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
+ struct timespec __user *, utime, u32 __user *, uaddr2,
+ u32, val3)
{
struct timespec ts;
ktime_t t, *tp = NULL;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 1455b7651b6..2dc30c59c5f 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1467,8 +1467,8 @@ out:
return ret;
}
-asmlinkage long
-sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp)
+SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
+ struct timespec __user *, rmtp)
{
struct timespec tu;
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index cc0f7321b8c..1de9700f416 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
+#include <linux/async.h>
#include "internals.h"
@@ -34,6 +35,10 @@ unsigned long probe_irq_on(void)
unsigned int status;
int i;
+ /*
+ * quiesce the kernel, or at least the asynchronous portion
+ */
+ async_synchronize_full();
mutex_lock(&probing_active);
/*
* something may have generated an irq long ago and we want to
diff --git a/kernel/itimer.c b/kernel/itimer.c
index db7c358b9a0..6a5fe93dd8b 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -100,7 +100,7 @@ int do_getitimer(int which, struct itimerval *value)
return 0;
}
-asmlinkage long sys_getitimer(int which, struct itimerval __user *value)
+SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value)
{
int error = -EFAULT;
struct itimerval get_buffer;
@@ -260,9 +260,8 @@ unsigned int alarm_setitimer(unsigned int seconds)
return it_old.it_value.tv_sec;
}
-asmlinkage long sys_setitimer(int which,
- struct itimerval __user *value,
- struct itimerval __user *ovalue)
+SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
+ struct itimerval __user *, ovalue)
{
struct itimerval set_buffer, get_buffer;
int error;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 3fb855ad6aa..8a6d7b08864 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -934,9 +934,8 @@ struct kimage *kexec_crash_image;
static DEFINE_MUTEX(kexec_mutex);
-asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
- struct kexec_segment __user *segments,
- unsigned long flags)
+SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
+ struct kexec_segment __user *, segments, unsigned long, flags)
{
struct kimage **dest_image, *image;
int result;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1b9cbdc0127..7ba8cd9845c 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -123,7 +123,7 @@ static int collect_garbage_slots(void);
static int __kprobes check_safety(void)
{
int ret = 0;
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM)
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER)
ret = freeze_processes();
if (ret == 0) {
struct task_struct *p, *q;
diff --git a/kernel/module.c b/kernel/module.c
index 496dcb57b60..e8b51d41dd7 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -50,6 +50,7 @@
#include <asm/sections.h>
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
+#include <linux/async.h>
#if 0
#define DEBUGP printk
@@ -742,8 +743,8 @@ static void wait_for_zero_refcount(struct module *mod)
mutex_lock(&module_mutex);
}
-asmlinkage long
-sys_delete_module(const char __user *name_user, unsigned int flags)
+SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
+ unsigned int, flags)
{
struct module *mod;
char name[MODULE_NAME_LEN];
@@ -816,6 +817,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
mod->exit();
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_GOING, mod);
+ async_synchronize_full();
mutex_lock(&module_mutex);
/* Store the name of the last unloaded module for diagnostic purposes */
strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
@@ -2294,10 +2296,8 @@ static noinline struct module *load_module(void __user *umod,
}
/* This is where the real work happens */
-asmlinkage long
-sys_init_module(void __user *umod,
- unsigned long len,
- const char __user *uargs)
+SYSCALL_DEFINE3(init_module, void __user *, umod,
+ unsigned long, len, const char __user *, uargs)
{
struct module *mod;
int ret = 0;
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
index 43c2111cd54..78bc3fdac0d 100644
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -13,7 +13,6 @@
struct ns_cgroup {
struct cgroup_subsys_state css;
- spinlock_t lock;
};
struct cgroup_subsys ns_subsys;
@@ -84,7 +83,6 @@ static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
if (!ns_cgroup)
return ERR_PTR(-ENOMEM);
- spin_lock_init(&ns_cgroup->lock);
return &ns_cgroup->css;
}
diff --git a/kernel/pid.c b/kernel/pid.c
index 064e76afa50..1b3586fe753 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -474,8 +474,14 @@ pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
}
EXPORT_SYMBOL(task_session_nr_ns);
+struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
+{
+ return ns_of_pid(task_pid(tsk));
+}
+EXPORT_SYMBOL_GPL(task_active_pid_ns);
+
/*
- * Used by proc to find the first pid that is greater then or equal to nr.
+ * Used by proc to find the first pid that is greater than or equal to nr.
*
* If there is a pid at nr this function is exactly the same as find_pid_ns.
*/
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 887c63787de..052ec4d195c 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -477,10 +477,9 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
/* Create a POSIX.1b interval timer. */
-asmlinkage long
-sys_timer_create(const clockid_t which_clock,
- struct sigevent __user *timer_event_spec,
- timer_t __user * created_timer_id)
+SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
+ struct sigevent __user *, timer_event_spec,
+ timer_t __user *, created_timer_id)
{
struct k_itimer *new_timer;
int error, new_timer_id;
@@ -661,8 +660,8 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
}
/* Get the time remaining on a POSIX.1b interval timer. */
-asmlinkage long
-sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting)
+SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
+ struct itimerspec __user *, setting)
{
struct k_itimer *timr;
struct itimerspec cur_setting;
@@ -691,8 +690,7 @@ sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting)
* the call back to do_schedule_next_timer(). So all we need to do is
* to pick up the frozen overrun.
*/
-asmlinkage long
-sys_timer_getoverrun(timer_t timer_id)
+SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
{
struct k_itimer *timr;
int overrun;
@@ -760,10 +758,9 @@ common_timer_set(struct k_itimer *timr, int flags,
}
/* Set a POSIX.1b interval timer */
-asmlinkage long
-sys_timer_settime(timer_t timer_id, int flags,
- const struct itimerspec __user *new_setting,
- struct itimerspec __user *old_setting)
+SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
+ const struct itimerspec __user *, new_setting,
+ struct itimerspec __user *, old_setting)
{
struct k_itimer *timr;
struct itimerspec new_spec, old_spec;
@@ -816,8 +813,7 @@ static inline int timer_delete_hook(struct k_itimer *timer)
}
/* Delete a POSIX.1b interval timer. */
-asmlinkage long
-sys_timer_delete(timer_t timer_id)
+SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
{
struct k_itimer *timer;
unsigned long flags;
@@ -903,8 +899,8 @@ int do_posix_clock_nonanosleep(const clockid_t clock, int flags,
}
EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep);
-asmlinkage long sys_clock_settime(const clockid_t which_clock,
- const struct timespec __user *tp)
+SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
+ const struct timespec __user *, tp)
{
struct timespec new_tp;
@@ -916,8 +912,8 @@ asmlinkage long sys_clock_settime(const clockid_t which_clock,
return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp));
}
-asmlinkage long
-sys_clock_gettime(const clockid_t which_clock, struct timespec __user *tp)
+SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
+ struct timespec __user *,tp)
{
struct timespec kernel_tp;
int error;
@@ -933,8 +929,8 @@ sys_clock_gettime(const clockid_t which_clock, struct timespec __user *tp)
}
-asmlinkage long
-sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp)
+SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
+ struct timespec __user *, tp)
{
struct timespec rtn_tp;
int error;
@@ -963,10 +959,9 @@ static int common_nsleep(const clockid_t which_clock, int flags,
which_clock);
}
-asmlinkage long
-sys_clock_nanosleep(const clockid_t which_clock, int flags,
- const struct timespec __user *rqtp,
- struct timespec __user *rmtp)
+SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
+ const struct timespec __user *, rqtp,
+ struct timespec __user *, rmtp)
{
struct timespec t;
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 597823b5b70..d7a10167a25 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -4,7 +4,8 @@ EXTRA_CFLAGS += -DDEBUG
endif
obj-y := main.o
-obj-$(CONFIG_PM_SLEEP) += process.o console.o
+obj-$(CONFIG_PM_SLEEP) += console.o
+obj-$(CONFIG_FREEZER) += process.o
obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index f77d3819ef5..45e8541ab7e 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -258,12 +258,12 @@ int hibernation_snapshot(int platform_mode)
{
int error;
- /* Free memory before shutting down devices. */
- error = swsusp_shrink_memory();
+ error = platform_begin(platform_mode);
if (error)
return error;
- error = platform_begin(platform_mode);
+ /* Free memory before shutting down devices. */
+ error = swsusp_shrink_memory();
if (error)
goto Close;
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5d2ab836e99..f5fc2d7680f 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -25,6 +25,7 @@
#include <linux/syscalls.h>
#include <linux/console.h>
#include <linux/highmem.h>
+#include <linux/list.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -192,12 +193,6 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
return ret;
}
-static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
-{
- free_list_of_pages(ca->chain, clear_page_nosave);
- memset(ca, 0, sizeof(struct chain_allocator));
-}
-
/**
* Data types related to memory bitmaps.
*
@@ -233,7 +228,7 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3)
struct bm_block {
- struct bm_block *next; /* next element of the list */
+ struct list_head hook; /* hook into a list of bitmap blocks */
unsigned long start_pfn; /* pfn represented by the first bit */
unsigned long end_pfn; /* pfn represented by the last bit plus 1 */
unsigned long *data; /* bitmap representing pages */
@@ -244,24 +239,15 @@ static inline unsigned long bm_block_bits(struct bm_block *bb)
return bb->end_pfn - bb->start_pfn;
}
-struct zone_bitmap {
- struct zone_bitmap *next; /* next element of the list */
- unsigned long start_pfn; /* minimal pfn in this zone */
- unsigned long end_pfn; /* maximal pfn in this zone plus 1 */
- struct bm_block *bm_blocks; /* list of bitmap blocks */
- struct bm_block *cur_block; /* recently used bitmap block */
-};
-
/* strcut bm_position is used for browsing memory bitmaps */
struct bm_position {
- struct zone_bitmap *zone_bm;
struct bm_block *block;
int bit;
};
struct memory_bitmap {
- struct zone_bitmap *zone_bm_list; /* list of zone bitmaps */
+ struct list_head blocks; /* list of bitmap blocks */
struct linked_page *p_list; /* list of pages used to store zone
* bitmap objects and bitmap block
* objects
@@ -273,11 +259,7 @@ struct memory_bitmap {
static void memory_bm_position_reset(struct memory_bitmap *bm)
{
- struct zone_bitmap *zone_bm;
-
- zone_bm = bm->zone_bm_list;
- bm->cur.zone_bm = zone_bm;
- bm->cur.block = zone_bm->bm_blocks;
+ bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
bm->cur.bit = 0;
}
@@ -285,151 +267,184 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
/**
* create_bm_block_list - create a list of block bitmap objects
+ * @nr_blocks - number of blocks to allocate
+ * @list - list to put the allocated blocks into
+ * @ca - chain allocator to be used for allocating memory
*/
-
-static inline struct bm_block *
-create_bm_block_list(unsigned int nr_blocks, struct chain_allocator *ca)
+static int create_bm_block_list(unsigned long pages,
+ struct list_head *list,
+ struct chain_allocator *ca)
{
- struct bm_block *bblist = NULL;
+ unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
while (nr_blocks-- > 0) {
struct bm_block *bb;
bb = chain_alloc(ca, sizeof(struct bm_block));
if (!bb)
- return NULL;
-
- bb->next = bblist;
- bblist = bb;
+ return -ENOMEM;
+ list_add(&bb->hook, list);
}
- return bblist;
+
+ return 0;
}
+struct mem_extent {
+ struct list_head hook;
+ unsigned long start;
+ unsigned long end;
+};
+
/**
- * create_zone_bm_list - create a list of zone bitmap objects
+ * free_mem_extents - free a list of memory extents
+ * @list - list of extents to empty
*/
+static void free_mem_extents(struct list_head *list)
+{
+ struct mem_extent *ext, *aux;
-static inline struct zone_bitmap *
-create_zone_bm_list(unsigned int nr_zones, struct chain_allocator *ca)
+ list_for_each_entry_safe(ext, aux, list, hook) {
+ list_del(&ext->hook);
+ kfree(ext);
+ }
+}
+
+/**
+ * create_mem_extents - create a list of memory extents representing
+ * contiguous ranges of PFNs
+ * @list - list to put the extents into
+ * @gfp_mask - mask to use for memory allocations
+ */
+static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
{
- struct zone_bitmap *zbmlist = NULL;
+ struct zone *zone;
- while (nr_zones-- > 0) {
- struct zone_bitmap *zbm;
+ INIT_LIST_HEAD(list);
- zbm = chain_alloc(ca, sizeof(struct zone_bitmap));
- if (!zbm)
- return NULL;
+ for_each_zone(zone) {
+ unsigned long zone_start, zone_end;
+ struct mem_extent *ext, *cur, *aux;
+
+ if (!populated_zone(zone))
+ continue;
- zbm->next = zbmlist;
- zbmlist = zbm;
+ zone_start = zone->zone_start_pfn;
+ zone_end = zone->zone_start_pfn + zone->spanned_pages;
+
+ list_for_each_entry(ext, list, hook)
+ if (zone_start <= ext->end)
+ break;
+
+ if (&ext->hook == list || zone_end < ext->start) {
+ /* New extent is necessary */
+ struct mem_extent *new_ext;
+
+ new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
+ if (!new_ext) {
+ free_mem_extents(list);
+ return -ENOMEM;
+ }
+ new_ext->start = zone_start;
+ new_ext->end = zone_end;
+ list_add_tail(&new_ext->hook, &ext->hook);
+ continue;
+ }
+
+ /* Merge this zone's range of PFNs with the existing one */
+ if (zone_start < ext->start)
+ ext->start = zone_start;
+ if (zone_end > ext->end)
+ ext->end = zone_end;
+
+ /* More merging may be possible */
+ cur = ext;
+ list_for_each_entry_safe_continue(cur, aux, list, hook) {
+ if (zone_end < cur->start)
+ break;
+ if (zone_end < cur->end)
+ ext->end = cur->end;
+ list_del(&cur->hook);
+ kfree(cur);
+ }
}
- return zbmlist;
+
+ return 0;
}
/**
* memory_bm_create - allocate memory for a memory bitmap
*/
-
static int
memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
{
struct chain_allocator ca;
- struct zone *zone;
- struct zone_bitmap *zone_bm;
- struct bm_block *bb;
- unsigned int nr;
+ struct list_head mem_extents;
+ struct mem_extent *ext;
+ int error;
chain_init(&ca, gfp_mask, safe_needed);
+ INIT_LIST_HEAD(&bm->blocks);
- /* Compute the number of zones */
- nr = 0;
- for_each_zone(zone)
- if (populated_zone(zone))
- nr++;
-
- /* Allocate the list of zones bitmap objects */
- zone_bm = create_zone_bm_list(nr, &ca);
- bm->zone_bm_list = zone_bm;
- if (!zone_bm) {
- chain_free(&ca, PG_UNSAFE_CLEAR);
- return -ENOMEM;
- }
-
- /* Initialize the zone bitmap objects */
- for_each_zone(zone) {
- unsigned long pfn;
+ error = create_mem_extents(&mem_extents, gfp_mask);
+ if (error)
+ return error;
- if (!populated_zone(zone))
- continue;
+ list_for_each_entry(ext, &mem_extents, hook) {
+ struct bm_block *bb;
+ unsigned long pfn = ext->start;
+ unsigned long pages = ext->end - ext->start;
- zone_bm->start_pfn = zone->zone_start_pfn;
- zone_bm->end_pfn = zone->zone_start_pfn + zone->spanned_pages;
- /* Allocate the list of bitmap block objects */
- nr = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
- bb = create_bm_block_list(nr, &ca);
- zone_bm->bm_blocks = bb;
- zone_bm->cur_block = bb;
- if (!bb)
- goto Free;
+ bb = list_entry(bm->blocks.prev, struct bm_block, hook);
- nr = zone->spanned_pages;
- pfn = zone->zone_start_pfn;
- /* Initialize the bitmap block objects */
- while (bb) {
- unsigned long *ptr;
+ error = create_bm_block_list(pages, bm->blocks.prev, &ca);
+ if (error)
+ goto Error;
- ptr = get_image_page(gfp_mask, safe_needed);
- bb->data = ptr;
- if (!ptr)
- goto Free;
+ list_for_each_entry_continue(bb, &bm->blocks, hook) {
+ bb->data = get_image_page(gfp_mask, safe_needed);
+ if (!bb->data) {
+ error = -ENOMEM;
+ goto Error;
+ }
bb->start_pfn = pfn;
- if (nr >= BM_BITS_PER_BLOCK) {
+ if (pages >= BM_BITS_PER_BLOCK) {
pfn += BM_BITS_PER_BLOCK;
- nr -= BM_BITS_PER_BLOCK;
+ pages -= BM_BITS_PER_BLOCK;
} else {
/* This is executed only once in the loop */
- pfn += nr;
+ pfn += pages;
}
bb->end_pfn = pfn;
- bb = bb->next;
}
- zone_bm = zone_bm->next;
}
+
bm->p_list = ca.chain;
memory_bm_position_reset(bm);
- return 0;
+ Exit:
+ free_mem_extents(&mem_extents);
+ return error;
- Free:
+ Error:
bm->p_list = ca.chain;
memory_bm_free(bm, PG_UNSAFE_CLEAR);
- return -ENOMEM;
+ goto Exit;
}
/**
* memory_bm_free - free memory occupied by the memory bitmap @bm
*/
-
static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
{
- struct zone_bitmap *zone_bm;
+ struct bm_block *bb;
- /* Free the list of bit blocks for each zone_bitmap object */
- zone_bm = bm->zone_bm_list;
- while (zone_bm) {
- struct bm_block *bb;
+ list_for_each_entry(bb, &bm->blocks, hook)
+ if (bb->data)
+ free_image_page(bb->data, clear_nosave_free);
- bb = zone_bm->bm_blocks;
- while (bb) {
- if (bb->data)
- free_image_page(bb->data, clear_nosave_free);
- bb = bb->next;
- }
- zone_bm = zone_bm->next;
- }
free_list_of_pages(bm->p_list, clear_nosave_free);
- bm->zone_bm_list = NULL;
+
+ INIT_LIST_HEAD(&bm->blocks);
}
/**
@@ -437,38 +452,33 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
* to given pfn. The cur_zone_bm member of @bm and the cur_block member
* of @bm->cur_zone_bm are updated.
*/
-
static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
void **addr, unsigned int *bit_nr)
{
- struct zone_bitmap *zone_bm;
struct bm_block *bb;
- /* Check if the pfn is from the current zone */
- zone_bm = bm->cur.zone_bm;
- if (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
- zone_bm = bm->zone_bm_list;
- /* We don't assume that the zones are sorted by pfns */
- while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
- zone_bm = zone_bm->next;
-
- if (!zone_bm)
- return -EFAULT;
- }
- bm->cur.zone_bm = zone_bm;
- }
- /* Check if the pfn corresponds to the current bitmap block */
- bb = zone_bm->cur_block;
+ /*
+ * Check if the pfn corresponds to the current bitmap block and find
+ * the block where it fits if this is not the case.
+ */
+ bb = bm->cur.block;
if (pfn < bb->start_pfn)
- bb = zone_bm->bm_blocks;
+ list_for_each_entry_continue_reverse(bb, &bm->blocks, hook)
+ if (pfn >= bb->start_pfn)
+ break;
- while (pfn >= bb->end_pfn) {
- bb = bb->next;
+ if (pfn >= bb->end_pfn)
+ list_for_each_entry_continue(bb, &bm->blocks, hook)
+ if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
+ break;
- BUG_ON(!bb);
- }
- zone_bm->cur_block = bb;
+ if (&bb->hook == &bm->blocks)
+ return -EFAULT;
+
+ /* The block has been found */
+ bm->cur.block = bb;
pfn -= bb->start_pfn;
+ bm->cur.bit = pfn + 1;
*bit_nr = pfn;
*addr = bb->data;
return 0;
@@ -519,6 +529,14 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
return test_bit(bit, addr);
}
+static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
+{
+ void *addr;
+ unsigned int bit;
+
+ return !memory_bm_find_bit(bm, pfn, &addr, &bit);
+}
+
/**
* memory_bm_next_pfn - find the pfn that corresponds to the next set bit
* in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is
@@ -530,29 +548,21 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
{
- struct zone_bitmap *zone_bm;
struct bm_block *bb;
int bit;
+ bb = bm->cur.block;
do {
- bb = bm->cur.block;
- do {
- bit = bm->cur.bit;
- bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
- if (bit < bm_block_bits(bb))
- goto Return_pfn;
-
- bb = bb->next;
- bm->cur.block = bb;
- bm->cur.bit = 0;
- } while (bb);
- zone_bm = bm->cur.zone_bm->next;
- if (zone_bm) {
- bm->cur.zone_bm = zone_bm;
- bm->cur.block = zone_bm->bm_blocks;
- bm->cur.bit = 0;
- }
- } while (zone_bm);
+ bit = bm->cur.bit;
+ bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
+ if (bit < bm_block_bits(bb))
+ goto Return_pfn;
+
+ bb = list_entry(bb->hook.next, struct bm_block, hook);
+ bm->cur.block = bb;
+ bm->cur.bit = 0;
+ } while (&bb->hook != &bm->blocks);
+
memory_bm_position_reset(bm);
return BM_END_OF_MAP;
@@ -808,8 +818,7 @@ static unsigned int count_free_highmem_pages(void)
* We should save the page if it isn't Nosave or NosaveFree, or Reserved,
* and it isn't a part of a free chunk of pages.
*/
-
-static struct page *saveable_highmem_page(unsigned long pfn)
+static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
{
struct page *page;
@@ -817,6 +826,8 @@ static struct page *saveable_highmem_page(unsigned long pfn)
return NULL;
page = pfn_to_page(pfn);
+ if (page_zone(page) != zone)
+ return NULL;
BUG_ON(!PageHighMem(page));
@@ -846,13 +857,16 @@ unsigned int count_highmem_pages(void)
mark_free_pages(zone);
max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
- if (saveable_highmem_page(pfn))
+ if (saveable_highmem_page(zone, pfn))
n++;
}
return n;
}
#else
-static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
+static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
+{
+ return NULL;
+}
#endif /* CONFIG_HIGHMEM */
/**
@@ -863,8 +877,7 @@ static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
* of pages statically defined as 'unsaveable', and it isn't a part of
* a free chunk of pages.
*/
-
-static struct page *saveable_page(unsigned long pfn)
+static struct page *saveable_page(struct zone *zone, unsigned long pfn)
{
struct page *page;
@@ -872,6 +885,8 @@ static struct page *saveable_page(unsigned long pfn)
return NULL;
page = pfn_to_page(pfn);
+ if (page_zone(page) != zone)
+ return NULL;
BUG_ON(PageHighMem(page));
@@ -903,7 +918,7 @@ unsigned int count_data_pages(void)
mark_free_pages(zone);
max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
- if(saveable_page(pfn))
+ if (saveable_page(zone, pfn))
n++;
}
return n;
@@ -944,7 +959,7 @@ static inline struct page *
page_is_saveable(struct zone *zone, unsigned long pfn)
{
return is_highmem(zone) ?
- saveable_highmem_page(pfn) : saveable_page(pfn);
+ saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
}
static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
@@ -966,7 +981,7 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
* data modified by kmap_atomic()
*/
safe_copy_page(buffer, s_page);
- dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0);
+ dst = kmap_atomic(d_page, KM_USER0);
memcpy(dst, buffer, PAGE_SIZE);
kunmap_atomic(dst, KM_USER0);
} else {
@@ -975,7 +990,7 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
}
}
#else
-#define page_is_saveable(zone, pfn) saveable_page(pfn)
+#define page_is_saveable(zone, pfn) saveable_page(zone, pfn)
static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
{
@@ -1459,9 +1474,7 @@ load_header(struct swsusp_info *info)
* unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
* the corresponding bit in the memory bitmap @bm
*/
-
-static inline void
-unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
{
int j;
@@ -1469,8 +1482,13 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
if (unlikely(buf[j] == BM_END_OF_MAP))
break;
- memory_bm_set_bit(bm, buf[j]);
+ if (memory_bm_pfn_present(bm, buf[j]))
+ memory_bm_set_bit(bm, buf[j]);
+ else
+ return -EFAULT;
}
+
+ return 0;
}
/* List of "safe" pages that may be used to store data loaded from the suspend
@@ -1608,7 +1626,7 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
if (!pbe) {
swsusp_free();
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
pbe->orig_page = page;
if (safe_highmem_pages > 0) {
@@ -1677,7 +1695,7 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
static inline void *
get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
{
- return NULL;
+ return ERR_PTR(-EINVAL);
}
static inline void copy_last_highmem_page(void) {}
@@ -1788,8 +1806,13 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
{
struct pbe *pbe;
- struct page *page = pfn_to_page(memory_bm_next_pfn(bm));
+ struct page *page;
+ unsigned long pfn = memory_bm_next_pfn(bm);
+ if (pfn == BM_END_OF_MAP)
+ return ERR_PTR(-EFAULT);
+
+ page = pfn_to_page(pfn);
if (PageHighMem(page))
return get_highmem_page_buffer(page, ca);
@@ -1805,7 +1828,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
pbe = chain_alloc(ca, sizeof(struct pbe));
if (!pbe) {
swsusp_free();
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
pbe->orig_address = page_address(page);
pbe->address = safe_pages_list;
@@ -1868,7 +1891,10 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
return error;
} else if (handle->prev <= nr_meta_pages) {
- unpack_orig_pfns(buffer, &copy_bm);
+ error = unpack_orig_pfns(buffer, &copy_bm);
+ if (error)
+ return error;
+
if (handle->prev == nr_meta_pages) {
error = prepare_image(&orig_bm, &copy_bm);
if (error)
@@ -1879,12 +1905,14 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
restore_pblist = NULL;
handle->buffer = get_buffer(&orig_bm, &ca);
handle->sync_read = 0;
- if (!handle->buffer)
- return -ENOMEM;
+ if (IS_ERR(handle->buffer))
+ return PTR_ERR(handle->buffer);
}
} else {
copy_last_highmem_page();
handle->buffer = get_buffer(&orig_bm, &ca);
+ if (IS_ERR(handle->buffer))
+ return PTR_ERR(handle->buffer);
if (handle->buffer != buffer)
handle->sync_read = 0;
}
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 023ff2a31d8..a92c9145155 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -262,3 +262,125 @@ int swsusp_shrink_memory(void)
return 0;
}
+
+/*
+ * Platforms, like ACPI, may want us to save some memory used by them during
+ * hibernation and to restore the contents of this memory during the subsequent
+ * resume. The code below implements a mechanism allowing us to do that.
+ */
+
+struct nvs_page {
+ unsigned long phys_start;
+ unsigned int size;
+ void *kaddr;
+ void *data;
+ struct list_head node;
+};
+
+static LIST_HEAD(nvs_list);
+
+/**
+ * hibernate_nvs_register - register platform NVS memory region to save
+ * @start - physical address of the region
+ * @size - size of the region
+ *
+ * The NVS region need not be page-aligned (both ends) and we arrange
+ * things so that the data from page-aligned addresses in this region will
+ * be copied into separate RAM pages.
+ */
+int hibernate_nvs_register(unsigned long start, unsigned long size)
+{
+ struct nvs_page *entry, *next;
+
+ while (size > 0) {
+ unsigned int nr_bytes;
+
+ entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
+ if (!entry)
+ goto Error;
+
+ list_add_tail(&entry->node, &nvs_list);
+ entry->phys_start = start;
+ nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
+ entry->size = (size < nr_bytes) ? size : nr_bytes;
+
+ start += entry->size;
+ size -= entry->size;
+ }
+ return 0;
+
+ Error:
+ list_for_each_entry_safe(entry, next, &nvs_list, node) {
+ list_del(&entry->node);
+ kfree(entry);
+ }
+ return -ENOMEM;
+}
+
+/**
+ * hibernate_nvs_free - free data pages allocated for saving NVS regions
+ */
+void hibernate_nvs_free(void)
+{
+ struct nvs_page *entry;
+
+ list_for_each_entry(entry, &nvs_list, node)
+ if (entry->data) {
+ free_page((unsigned long)entry->data);
+ entry->data = NULL;
+ if (entry->kaddr) {
+ iounmap(entry->kaddr);
+ entry->kaddr = NULL;
+ }
+ }
+}
+
+/**
+ * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
+ */
+int hibernate_nvs_alloc(void)
+{
+ struct nvs_page *entry;
+
+ list_for_each_entry(entry, &nvs_list, node) {
+ entry->data = (void *)__get_free_page(GFP_KERNEL);
+ if (!entry->data) {
+ hibernate_nvs_free();
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+/**
+ * hibernate_nvs_save - save NVS memory regions
+ */
+void hibernate_nvs_save(void)
+{
+ struct nvs_page *entry;
+
+ printk(KERN_INFO "PM: Saving platform NVS memory\n");
+
+ list_for_each_entry(entry, &nvs_list, node)
+ if (entry->data) {
+ entry->kaddr = ioremap(entry->phys_start, entry->size);
+ memcpy(entry->data, entry->kaddr, entry->size);
+ }
+}
+
+/**
+ * hibernate_nvs_restore - restore NVS memory regions
+ *
+ * This function is going to be called with interrupts disabled, so it
+ * cannot iounmap the virtual addresses used to access the NVS region.
+ */
+void hibernate_nvs_restore(void)
+{
+ struct nvs_page *entry;
+
+ printk(KERN_INFO "PM: Restoring platform NVS memory\n");
+
+ list_for_each_entry(entry, &nvs_list, node)
+ if (entry->data)
+ memcpy(entry->kaddr, entry->data, entry->size);
+}
diff --git a/kernel/printk.c b/kernel/printk.c
index e651ab05655..69188f226a9 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -382,7 +382,7 @@ out:
return error;
}
-asmlinkage long sys_syslog(int type, char __user *buf, int len)
+SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
{
return do_syslog(type, buf, len);
}
@@ -619,7 +619,7 @@ static int acquire_console_semaphore_for_printk(unsigned int cpu)
static const char recursion_bug_msg [] =
KERN_CRIT "BUG: recent printk recursion!\n";
static int recursion_bug;
- static int new_text_line = 1;
+static int new_text_line = 1;
static char printk_buf[1024];
asmlinkage int vprintk(const char *fmt, va_list args)
@@ -742,11 +742,6 @@ EXPORT_SYMBOL(vprintk);
#else
-asmlinkage long sys_syslog(int type, char __user *buf, int len)
-{
- return -ENOSYS;
-}
-
static void call_console_drivers(unsigned start, unsigned end)
{
}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 29dc700e198..c9cf48b21f0 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -574,7 +574,7 @@ struct task_struct *ptrace_get_task_struct(pid_t pid)
#define arch_ptrace_attach(child) do { } while (0)
#endif
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data)
{
struct task_struct *child;
long ret;
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 1cff28db56b..7c4142a79f0 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -136,29 +136,47 @@ static int stutter_pause_test = 0;
#endif
int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
-#define FULLSTOP_SHUTDOWN 1 /* Bail due to system shutdown/panic. */
-#define FULLSTOP_CLEANUP 2 /* Orderly shutdown. */
-static int fullstop; /* stop generating callbacks at test end. */
-DEFINE_MUTEX(fullstop_mutex); /* protect fullstop transitions and */
- /* spawning of kthreads. */
+/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */
+
+#define FULLSTOP_DONTSTOP 0 /* Normal operation. */
+#define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */
+#define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */
+static int fullstop = FULLSTOP_RMMOD;
+DEFINE_MUTEX(fullstop_mutex); /* Protect fullstop transitions and spawning */
+ /* of kthreads. */
/*
- * Detect and respond to a signal-based shutdown.
+ * Detect and respond to a system shutdown.
*/
static int
rcutorture_shutdown_notify(struct notifier_block *unused1,
unsigned long unused2, void *unused3)
{
- if (fullstop)
- return NOTIFY_DONE;
mutex_lock(&fullstop_mutex);
- if (!fullstop)
+ if (fullstop == FULLSTOP_DONTSTOP)
fullstop = FULLSTOP_SHUTDOWN;
+ else
+ printk(KERN_WARNING /* but going down anyway, so... */
+ "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
mutex_unlock(&fullstop_mutex);
return NOTIFY_DONE;
}
/*
+ * Absorb kthreads into a kernel function that won't return, so that
+ * they won't ever access module text or data again.
+ */
+static void rcutorture_shutdown_absorb(char *title)
+{
+ if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
+ printk(KERN_NOTICE
+ "rcutorture thread %s parking due to system shutdown\n",
+ title);
+ schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT);
+ }
+}
+
+/*
* Allocate an element from the rcu_tortures pool.
*/
static struct rcu_torture *
@@ -219,13 +237,14 @@ rcu_random(struct rcu_random_state *rrsp)
}
static void
-rcu_stutter_wait(void)
+rcu_stutter_wait(char *title)
{
- while ((stutter_pause_test || !rcutorture_runnable) && !fullstop) {
+ while (stutter_pause_test || !rcutorture_runnable) {
if (rcutorture_runnable)
schedule_timeout_interruptible(1);
else
schedule_timeout_interruptible(round_jiffies_relative(HZ));
+ rcutorture_shutdown_absorb(title);
}
}
@@ -287,7 +306,7 @@ rcu_torture_cb(struct rcu_head *p)
int i;
struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu);
- if (fullstop) {
+ if (fullstop != FULLSTOP_DONTSTOP) {
/* Test is ending, just drop callbacks on the floor. */
/* The next initialization will pick up the pieces. */
return;
@@ -619,10 +638,11 @@ rcu_torture_writer(void *arg)
}
rcu_torture_current_version++;
oldbatch = cur_ops->completed();
- rcu_stutter_wait();
- } while (!kthread_should_stop() && !fullstop);
+ rcu_stutter_wait("rcu_torture_writer");
+ } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping");
- while (!kthread_should_stop() && fullstop != FULLSTOP_SHUTDOWN)
+ rcutorture_shutdown_absorb("rcu_torture_writer");
+ while (!kthread_should_stop())
schedule_timeout_uninterruptible(1);
return 0;
}
@@ -643,11 +663,12 @@ rcu_torture_fakewriter(void *arg)
schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
udelay(rcu_random(&rand) & 0x3ff);
cur_ops->sync();
- rcu_stutter_wait();
- } while (!kthread_should_stop() && !fullstop);
+ rcu_stutter_wait("rcu_torture_fakewriter");
+ } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
- while (!kthread_should_stop() && fullstop != FULLSTOP_SHUTDOWN)
+ rcutorture_shutdown_absorb("rcu_torture_fakewriter");
+ while (!kthread_should_stop())
schedule_timeout_uninterruptible(1);
return 0;
}
@@ -752,12 +773,13 @@ rcu_torture_reader(void *arg)
preempt_enable();
cur_ops->readunlock(idx);
schedule();
- rcu_stutter_wait();
- } while (!kthread_should_stop() && !fullstop);
+ rcu_stutter_wait("rcu_torture_reader");
+ } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
+ rcutorture_shutdown_absorb("rcu_torture_reader");
if (irqreader && cur_ops->irqcapable)
del_timer_sync(&t);
- while (!kthread_should_stop() && fullstop != FULLSTOP_SHUTDOWN)
+ while (!kthread_should_stop())
schedule_timeout_uninterruptible(1);
return 0;
}
@@ -854,7 +876,8 @@ rcu_torture_stats(void *arg)
do {
schedule_timeout_interruptible(stat_interval * HZ);
rcu_torture_stats_print();
- } while (!kthread_should_stop() && !fullstop);
+ rcutorture_shutdown_absorb("rcu_torture_stats");
+ } while (!kthread_should_stop());
VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping");
return 0;
}
@@ -866,52 +889,49 @@ static int rcu_idle_cpu; /* Force all torture tasks off this CPU */
*/
static void rcu_torture_shuffle_tasks(void)
{
- cpumask_var_t tmp_mask;
+ cpumask_t tmp_mask;
int i;
- if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL))
- BUG();
-
- cpumask_setall(tmp_mask);
+ cpus_setall(tmp_mask);
get_online_cpus();
/* No point in shuffling if there is only one online CPU (ex: UP) */
- if (num_online_cpus() == 1)
- goto out;
+ if (num_online_cpus() == 1) {
+ put_online_cpus();
+ return;
+ }
if (rcu_idle_cpu != -1)
- cpumask_clear_cpu(rcu_idle_cpu, tmp_mask);
+ cpu_clear(rcu_idle_cpu, tmp_mask);
- set_cpus_allowed_ptr(current, tmp_mask);
+ set_cpus_allowed_ptr(current, &tmp_mask);
if (reader_tasks) {
for (i = 0; i < nrealreaders; i++)
if (reader_tasks[i])
set_cpus_allowed_ptr(reader_tasks[i],
- tmp_mask);
+ &tmp_mask);
}
if (fakewriter_tasks) {
for (i = 0; i < nfakewriters; i++)
if (fakewriter_tasks[i])
set_cpus_allowed_ptr(fakewriter_tasks[i],
- tmp_mask);
+ &tmp_mask);
}
if (writer_task)
- set_cpus_allowed_ptr(writer_task, tmp_mask);
+ set_cpus_allowed_ptr(writer_task, &tmp_mask);
if (stats_task)
- set_cpus_allowed_ptr(stats_task, tmp_mask);
+ set_cpus_allowed_ptr(stats_task, &tmp_mask);
if (rcu_idle_cpu == -1)
rcu_idle_cpu = num_online_cpus() - 1;
else
rcu_idle_cpu--;
-out:
put_online_cpus();
- free_cpumask_var(tmp_mask);
}
/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
@@ -925,7 +945,8 @@ rcu_torture_shuffle(void *arg)
do {
schedule_timeout_interruptible(shuffle_interval * HZ);
rcu_torture_shuffle_tasks();
- } while (!kthread_should_stop() && !fullstop);
+ rcutorture_shutdown_absorb("rcu_torture_shuffle");
+ } while (!kthread_should_stop());
VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping");
return 0;
}
@@ -940,10 +961,11 @@ rcu_torture_stutter(void *arg)
do {
schedule_timeout_interruptible(stutter * HZ);
stutter_pause_test = 1;
- if (!kthread_should_stop() && !fullstop)
+ if (!kthread_should_stop())
schedule_timeout_interruptible(stutter * HZ);
stutter_pause_test = 0;
- } while (!kthread_should_stop() && !fullstop);
+ rcutorture_shutdown_absorb("rcu_torture_stutter");
+ } while (!kthread_should_stop());
VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping");
return 0;
}
@@ -970,15 +992,16 @@ rcu_torture_cleanup(void)
int i;
mutex_lock(&fullstop_mutex);
- if (!fullstop) {
- /* If being signaled, let it happen, then exit. */
+ if (fullstop == FULLSTOP_SHUTDOWN) {
+ printk(KERN_WARNING /* but going down anyway, so... */
+ "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
mutex_unlock(&fullstop_mutex);
- schedule_timeout_interruptible(10 * HZ);
+ schedule_timeout_uninterruptible(10);
if (cur_ops->cb_barrier != NULL)
cur_ops->cb_barrier();
return;
}
- fullstop = FULLSTOP_CLEANUP;
+ fullstop = FULLSTOP_RMMOD;
mutex_unlock(&fullstop_mutex);
unregister_reboot_notifier(&rcutorture_nb);
if (stutter_task) {
@@ -1078,7 +1101,7 @@ rcu_torture_init(void)
else
nrealreaders = 2 * num_online_cpus();
rcu_torture_print_module_parms("Start of test");
- fullstop = 0;
+ fullstop = FULLSTOP_DONTSTOP;
/* Set up the freelist. */
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index f275c8eca77..bf8e7534c80 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -15,10 +15,11 @@
#include <linux/uaccess.h>
#include <linux/mm.h>
-void res_counter_init(struct res_counter *counter)
+void res_counter_init(struct res_counter *counter, struct res_counter *parent)
{
spin_lock_init(&counter->lock);
counter->limit = (unsigned long long)LLONG_MAX;
+ counter->parent = parent;
}
int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
@@ -34,14 +35,34 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
return 0;
}
-int res_counter_charge(struct res_counter *counter, unsigned long val)
+int res_counter_charge(struct res_counter *counter, unsigned long val,
+ struct res_counter **limit_fail_at)
{
int ret;
unsigned long flags;
-
- spin_lock_irqsave(&counter->lock, flags);
- ret = res_counter_charge_locked(counter, val);
- spin_unlock_irqrestore(&counter->lock, flags);
+ struct res_counter *c, *u;
+
+ *limit_fail_at = NULL;
+ local_irq_save(flags);
+ for (c = counter; c != NULL; c = c->parent) {
+ spin_lock(&c->lock);
+ ret = res_counter_charge_locked(c, val);
+ spin_unlock(&c->lock);
+ if (ret < 0) {
+ *limit_fail_at = c;
+ goto undo;
+ }
+ }
+ ret = 0;
+ goto done;
+undo:
+ for (u = counter; u != c; u = u->parent) {
+ spin_lock(&u->lock);
+ res_counter_uncharge_locked(u, val);
+ spin_unlock(&u->lock);
+ }
+done:
+ local_irq_restore(flags);
return ret;
}
@@ -56,10 +77,15 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
void res_counter_uncharge(struct res_counter *counter, unsigned long val)
{
unsigned long flags;
+ struct res_counter *c;
- spin_lock_irqsave(&counter->lock, flags);
- res_counter_uncharge_locked(counter, val);
- spin_unlock_irqrestore(&counter->lock, flags);
+ local_irq_save(flags);
+ for (c = counter; c != NULL; c = c->parent) {
+ spin_lock(&c->lock);
+ res_counter_uncharge_locked(c, val);
+ spin_unlock(&c->lock);
+ }
+ local_irq_restore(flags);
}
diff --git a/kernel/resource.c b/kernel/resource.c
index e633106b12f..fd5d7d574bb 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -620,10 +620,11 @@ resource_size_t resource_alignment(struct resource *res)
* @start: resource start address
* @n: resource region size
* @name: reserving caller's ID string
+ * @flags: IO resource flags
*/
struct resource * __request_region(struct resource *parent,
resource_size_t start, resource_size_t n,
- const char *name)
+ const char *name, int flags)
{
struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
@@ -634,6 +635,7 @@ struct resource * __request_region(struct resource *parent,
res->start = start;
res->end = start + n - 1;
res->flags = IORESOURCE_BUSY;
+ res->flags |= flags;
write_lock(&resource_lock);
@@ -679,7 +681,7 @@ int __check_region(struct resource *parent, resource_size_t start,
{
struct resource * res;
- res = __request_region(parent, start, n, "check-region");
+ res = __request_region(parent, start, n, "check-region", 0);
if (!res)
return -EBUSY;
@@ -776,7 +778,7 @@ struct resource * __devm_request_region(struct device *dev,
dr->start = start;
dr->n = n;
- res = __request_region(parent, start, n, name);
+ res = __request_region(parent, start, n, name, 0);
if (res)
devres_add(dev, dr);
else
@@ -876,3 +878,57 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
return err;
}
+
+#ifdef CONFIG_STRICT_DEVMEM
+static int strict_iomem_checks = 1;
+#else
+static int strict_iomem_checks;
+#endif
+
+/*
+ * check if an address is reserved in the iomem resource tree
+ * returns 1 if reserved, 0 if not reserved.
+ */
+int iomem_is_exclusive(u64 addr)
+{
+ struct resource *p = &iomem_resource;
+ int err = 0;
+ loff_t l;
+ int size = PAGE_SIZE;
+
+ if (!strict_iomem_checks)
+ return 0;
+
+ addr = addr & PAGE_MASK;
+
+ read_lock(&resource_lock);
+ for (p = p->child; p ; p = r_next(NULL, p, &l)) {
+ /*
+ * We can probably skip the resources without
+ * IORESOURCE_IO attribute?
+ */
+ if (p->start >= addr + size)
+ break;
+ if (p->end < addr)
+ continue;
+ if (p->flags & IORESOURCE_BUSY &&
+ p->flags & IORESOURCE_EXCLUSIVE) {
+ err = 1;
+ break;
+ }
+ }
+ read_unlock(&resource_lock);
+
+ return err;
+}
+
+static int __init strict_iomem(char *str)
+{
+ if (strstr(str, "relaxed"))
+ strict_iomem_checks = 0;
+ if (strstr(str, "strict"))
+ strict_iomem_checks = 1;
+ return 1;
+}
+
+__setup("iomem=", strict_iomem);
diff --git a/kernel/sched.c b/kernel/sched.c
index 2e3545f57e7..52bbf1c842a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -125,6 +125,9 @@ DEFINE_TRACE(sched_switch);
DEFINE_TRACE(sched_migrate_task);
#ifdef CONFIG_SMP
+
+static void double_rq_lock(struct rq *rq1, struct rq *rq2);
+
/*
* Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
* Since cpu_power is a 'constant', we can use a reciprocal divide.
@@ -1320,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
* slice expiry etc.
*/
-#define WEIGHT_IDLEPRIO 2
-#define WMULT_IDLEPRIO (1 << 31)
+#define WEIGHT_IDLEPRIO 3
+#define WMULT_IDLEPRIO 1431655765
/*
* Nice levels are multiplicative, with a gentle 10% change for every
@@ -3728,8 +3731,13 @@ redo:
}
double_unlock_balance(this_rq, busiest);
+ /*
+ * Should not call ttwu while holding a rq->lock
+ */
+ spin_unlock(&this_rq->lock);
if (active_balance)
wake_up_process(busiest->migration_thread);
+ spin_lock(&this_rq->lock);
} else
sd->nr_balance_failed = 0;
@@ -4432,7 +4440,7 @@ void __kprobes sub_preempt_count(int val)
/*
* Underflow?
*/
- if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
+ if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
return;
/*
* Is the spinlock portion underflowing?
@@ -5118,7 +5126,7 @@ int can_nice(const struct task_struct *p, const int nice)
* sys_setpriority is a more generic, but much slower function that
* does similar things.
*/
-asmlinkage long sys_nice(int increment)
+SYSCALL_DEFINE1(nice, int, increment)
{
long nice, retval;
@@ -5425,8 +5433,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
* @policy: new policy.
* @param: structure containing the new RT priority.
*/
-asmlinkage long
-sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
+ struct sched_param __user *, param)
{
/* negative values for policy are not valid */
if (policy < 0)
@@ -5440,7 +5448,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
* @pid: the pid in question.
* @param: structure containing the new RT priority.
*/
-asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
{
return do_sched_setscheduler(pid, -1, param);
}
@@ -5449,7 +5457,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
* sys_sched_getscheduler - get the policy (scheduling class) of a thread
* @pid: the pid in question.
*/
-asmlinkage long sys_sched_getscheduler(pid_t pid)
+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
{
struct task_struct *p;
int retval;
@@ -5474,7 +5482,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid)
* @pid: the pid in question.
* @param: structure containing the RT priority.
*/
-asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
{
struct sched_param lp;
struct task_struct *p;
@@ -5592,8 +5600,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to the new cpu mask
*/
-asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
- unsigned long __user *user_mask_ptr)
+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
+ unsigned long __user *, user_mask_ptr)
{
cpumask_var_t new_mask;
int retval;
@@ -5640,8 +5648,8 @@ out_unlock:
* @len: length in bytes of the bitmask pointed to by user_mask_ptr
* @user_mask_ptr: user-space pointer to hold the current cpu mask
*/
-asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
- unsigned long __user *user_mask_ptr)
+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
+ unsigned long __user *, user_mask_ptr)
{
int ret;
cpumask_var_t mask;
@@ -5670,7 +5678,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
* This function yields the current CPU to other tasks. If there are no
* other threads running on this CPU then this function will return.
*/
-asmlinkage long sys_sched_yield(void)
+SYSCALL_DEFINE0(sched_yield)
{
struct rq *rq = this_rq_lock();
@@ -5811,7 +5819,7 @@ long __sched io_schedule_timeout(long timeout)
* this syscall returns the maximum rt_priority that can be used
* by a given scheduling class.
*/
-asmlinkage long sys_sched_get_priority_max(int policy)
+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
{
int ret = -EINVAL;
@@ -5836,7 +5844,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
* this syscall returns the minimum rt_priority that can be used
* by a given scheduling class.
*/
-asmlinkage long sys_sched_get_priority_min(int policy)
+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
{
int ret = -EINVAL;
@@ -5861,8 +5869,8 @@ asmlinkage long sys_sched_get_priority_min(int policy)
* this syscall writes the default timeslice value of a given process
* into the user-space timespec buffer. A value of '0' means infinity.
*/
-asmlinkage
-long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ struct timespec __user *, interval)
{
struct task_struct *p;
unsigned int time_slice;
@@ -7277,10 +7285,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
* groups, so roll our own. Now each node has its own list of groups which
* gets dynamically allocated.
*/
-static DEFINE_PER_CPU(struct sched_domain, node_domains);
+static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
static struct sched_group ***sched_group_nodes_bycpu;
-static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
+static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
@@ -7555,7 +7563,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
#ifdef CONFIG_NUMA
if (cpumask_weight(cpu_map) >
SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
- sd = &per_cpu(allnodes_domains, i);
+ sd = &per_cpu(allnodes_domains, i).sd;
SD_INIT(sd, ALLNODES);
set_domain_attribute(sd, attr);
cpumask_copy(sched_domain_span(sd), cpu_map);
@@ -7565,7 +7573,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
} else
p = NULL;
- sd = &per_cpu(node_domains, i);
+ sd = &per_cpu(node_domains, i).sd;
SD_INIT(sd, NODE);
set_domain_attribute(sd, attr);
sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
@@ -7683,7 +7691,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
for_each_cpu(j, nodemask) {
struct sched_domain *sd;
- sd = &per_cpu(node_domains, j);
+ sd = &per_cpu(node_domains, j).sd;
sd->groups = sg;
}
sg->__cpu_power = 0;
@@ -9042,6 +9050,13 @@ static int tg_schedulable(struct task_group *tg, void *data)
runtime = d->rt_runtime;
}
+#ifdef CONFIG_USER_SCHED
+ if (tg == &root_task_group) {
+ period = global_rt_period();
+ runtime = global_rt_runtime();
+ }
+#endif
+
/*
* Cannot have more runtime than the period.
*/
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 4293cfa9681..16eeba4e416 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -145,6 +145,19 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
read_unlock_irqrestore(&tasklist_lock, flags);
}
+#if defined(CONFIG_CGROUP_SCHED) && \
+ (defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED))
+static void task_group_path(struct task_group *tg, char *buf, int buflen)
+{
+ /* may be NULL if the underlying cgroup isn't fully-created yet */
+ if (!tg->css.cgroup) {
+ buf[0] = '\0';
+ return;
+ }
+ cgroup_path(tg->css.cgroup, buf, buflen);
+}
+#endif
+
void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
{
s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
@@ -154,10 +167,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
unsigned long flags;
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
- char path[128] = "";
+ char path[128];
struct task_group *tg = cfs_rq->tg;
- cgroup_path(tg->css.cgroup, path, sizeof(path));
+ task_group_path(tg, path, sizeof(path));
SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
@@ -208,10 +221,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
{
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
- char path[128] = "";
+ char path[128];
struct task_group *tg = rt_rq->tg;
- cgroup_path(tg->css.cgroup, path, sizeof(path));
+ task_group_path(tg, path, sizeof(path));
SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
#else
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index e0c0b4bc3f0..5cc1c162044 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -283,7 +283,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
struct sched_entity,
run_node);
- if (vruntime == cfs_rq->min_vruntime)
+ if (!cfs_rq->curr)
vruntime = se->vruntime;
else
vruntime = min_vruntime(vruntime, se->vruntime);
@@ -429,7 +429,10 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq);
for_each_sched_entity(se) {
- struct load_weight *load = &cfs_rq->load;
+ struct load_weight *load;
+
+ cfs_rq = cfs_rq_of(se);
+ load = &cfs_rq->load;
if (unlikely(!se->on_rq)) {
struct load_weight lw = cfs_rq->load;
@@ -677,9 +680,13 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
unsigned long thresh = sysctl_sched_latency;
/*
- * convert the sleeper threshold into virtual time
+ * Convert the sleeper threshold into virtual time.
+ * SCHED_IDLE is a special sub-class. We care about
+ * fairness only relative to other SCHED_IDLE tasks,
+ * all of which have the same weight.
*/
- if (sched_feat(NORMALIZED_SLEEPER))
+ if (sched_feat(NORMALIZED_SLEEPER) &&
+ task_of(se)->policy != SCHED_IDLE)
thresh = calc_delta_fair(thresh, se);
vruntime -= thresh;
@@ -1340,14 +1347,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
static void set_last_buddy(struct sched_entity *se)
{
- for_each_sched_entity(se)
- cfs_rq_of(se)->last = se;
+ if (likely(task_of(se)->policy != SCHED_IDLE)) {
+ for_each_sched_entity(se)
+ cfs_rq_of(se)->last = se;
+ }
}
static void set_next_buddy(struct sched_entity *se)
{
- for_each_sched_entity(se)
- cfs_rq_of(se)->next = se;
+ if (likely(task_of(se)->policy != SCHED_IDLE)) {
+ for_each_sched_entity(se)
+ cfs_rq_of(se)->next = se;
+ }
}
/*
@@ -1393,12 +1404,18 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
return;
/*
- * Batch tasks do not preempt (their preemption is driven by
+ * Batch and idle tasks do not preempt (their preemption is driven by
* the tick):
*/
- if (unlikely(p->policy == SCHED_BATCH))
+ if (unlikely(p->policy != SCHED_NORMAL))
return;
+ /* Idle tasks are by definition preempted by everybody. */
+ if (unlikely(curr->policy == SCHED_IDLE)) {
+ resched_task(curr);
+ return;
+ }
+
if (!sched_feat(WAKEUP_PREEMPT))
return;
@@ -1617,8 +1634,6 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
}
}
-#define swap(a, b) do { typeof(a) tmp = (a); (a) = (b); (b) = tmp; } while (0)
-
/*
* Share the fairness runtime between parent and child, thus the
* total amount of pressure for CPU stays equal - new tasks
diff --git a/kernel/signal.c b/kernel/signal.c
index 3152ac3b62e..e73759783dc 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1961,7 +1961,7 @@ EXPORT_SYMBOL(unblock_all_signals);
* System call entry points.
*/
-asmlinkage long sys_restart_syscall(void)
+SYSCALL_DEFINE0(restart_syscall)
{
struct restart_block *restart = &current_thread_info()->restart_block;
return restart->fn(restart);
@@ -2014,8 +2014,8 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
return error;
}
-asmlinkage long
-sys_rt_sigprocmask(int how, sigset_t __user *set, sigset_t __user *oset, size_t sigsetsize)
+SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set,
+ sigset_t __user *, oset, size_t, sigsetsize)
{
int error = -EINVAL;
sigset_t old_set, new_set;
@@ -2074,8 +2074,7 @@ out:
return error;
}
-asmlinkage long
-sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize)
+SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize)
{
return do_sigpending(set, sigsetsize);
}
@@ -2146,11 +2145,9 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
#endif
-asmlinkage long
-sys_rt_sigtimedwait(const sigset_t __user *uthese,
- siginfo_t __user *uinfo,
- const struct timespec __user *uts,
- size_t sigsetsize)
+SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
+ siginfo_t __user *, uinfo, const struct timespec __user *, uts,
+ size_t, sigsetsize)
{
int ret, sig;
sigset_t these;
@@ -2223,8 +2220,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese,
return ret;
}
-asmlinkage long
-sys_kill(pid_t pid, int sig)
+SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
{
struct siginfo info;
@@ -2283,7 +2279,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig)
* exists but it's not belonging to the target process anymore. This
* method solves the problem of threads exiting and PIDs getting reused.
*/
-asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig)
+SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig)
{
/* This is only valid for single tasks */
if (pid <= 0 || tgid <= 0)
@@ -2295,8 +2291,7 @@ asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig)
/*
* Send a signal to only one task, even if it's a CLONE_THREAD task.
*/
-asmlinkage long
-sys_tkill(pid_t pid, int sig)
+SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
{
/* This is only valid for single tasks */
if (pid <= 0)
@@ -2305,8 +2300,8 @@ sys_tkill(pid_t pid, int sig)
return do_tkill(0, pid, sig);
}
-asmlinkage long
-sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo)
+SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
+ siginfo_t __user *, uinfo)
{
siginfo_t info;
@@ -2434,8 +2429,7 @@ out:
#ifdef __ARCH_WANT_SYS_SIGPENDING
-asmlinkage long
-sys_sigpending(old_sigset_t __user *set)
+SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
{
return do_sigpending(set, sizeof(*set));
}
@@ -2446,8 +2440,8 @@ sys_sigpending(old_sigset_t __user *set)
/* Some platforms have their own version with special arguments others
support only sys_rt_sigprocmask. */
-asmlinkage long
-sys_sigprocmask(int how, old_sigset_t __user *set, old_sigset_t __user *oset)
+SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set,
+ old_sigset_t __user *, oset)
{
int error;
old_sigset_t old_set, new_set;
@@ -2497,11 +2491,10 @@ out:
#endif /* __ARCH_WANT_SYS_SIGPROCMASK */
#ifdef __ARCH_WANT_SYS_RT_SIGACTION
-asmlinkage long
-sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- size_t sigsetsize)
+SYSCALL_DEFINE4(rt_sigaction, int, sig,
+ const struct sigaction __user *, act,
+ struct sigaction __user *, oact,
+ size_t, sigsetsize)
{
struct k_sigaction new_sa, old_sa;
int ret = -EINVAL;
@@ -2531,15 +2524,13 @@ out:
/*
* For backwards compatibility. Functionality superseded by sigprocmask.
*/
-asmlinkage long
-sys_sgetmask(void)
+SYSCALL_DEFINE0(sgetmask)
{
/* SMP safe */
return current->blocked.sig[0];
}
-asmlinkage long
-sys_ssetmask(int newmask)
+SYSCALL_DEFINE1(ssetmask, int, newmask)
{
int old;
@@ -2559,8 +2550,7 @@ sys_ssetmask(int newmask)
/*
* For backwards compatibility. Functionality superseded by sigaction.
*/
-asmlinkage unsigned long
-sys_signal(int sig, __sighandler_t handler)
+SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler)
{
struct k_sigaction new_sa, old_sa;
int ret;
@@ -2577,8 +2567,7 @@ sys_signal(int sig, __sighandler_t handler)
#ifdef __ARCH_WANT_SYS_PAUSE
-asmlinkage long
-sys_pause(void)
+SYSCALL_DEFINE0(pause)
{
current->state = TASK_INTERRUPTIBLE;
schedule();
@@ -2588,7 +2577,7 @@ sys_pause(void)
#endif
#ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND
-asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize)
+SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
{
sigset_t newset;
diff --git a/kernel/sys.c b/kernel/sys.c
index 763c3c17ded..e7dc0e10a48 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -143,7 +143,7 @@ out:
return error;
}
-asmlinkage long sys_setpriority(int which, int who, int niceval)
+SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
{
struct task_struct *g, *p;
struct user_struct *user;
@@ -208,7 +208,7 @@ out:
* has been offset by 20 (ie it returns 40..1 instead of -20..19)
* to stay compatible.
*/
-asmlinkage long sys_getpriority(int which, int who)
+SYSCALL_DEFINE2(getpriority, int, which, int, who)
{
struct task_struct *g, *p;
struct user_struct *user;
@@ -355,7 +355,8 @@ EXPORT_SYMBOL_GPL(kernel_power_off);
*
* reboot doesn't sync: do that yourself before calling this.
*/
-asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user * arg)
+SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
+ void __user *, arg)
{
char buffer[256];
@@ -478,7 +479,7 @@ void ctrl_alt_del(void)
* SMP: There are not races, the GIDs are checked only by filesystem
* operations (as far as semantic preservation is concerned).
*/
-asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
+SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
{
const struct cred *old;
struct cred *new;
@@ -529,7 +530,7 @@ error:
*
* SMP: Same implicit races as above.
*/
-asmlinkage long sys_setgid(gid_t gid)
+SYSCALL_DEFINE1(setgid, gid_t, gid)
{
const struct cred *old;
struct cred *new;
@@ -597,7 +598,7 @@ static int set_user(struct cred *new)
* 100% compatible with BSD. A program which uses just setuid() will be
* 100% compatible with POSIX with saved IDs.
*/
-asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
+SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
{
const struct cred *old;
struct cred *new;
@@ -661,7 +662,7 @@ error:
* will allow a root program to temporarily drop privileges and be able to
* regain them by swapping the real and effective uid.
*/
-asmlinkage long sys_setuid(uid_t uid)
+SYSCALL_DEFINE1(setuid, uid_t, uid)
{
const struct cred *old;
struct cred *new;
@@ -705,7 +706,7 @@ error:
* This function implements a generic ability to update ruid, euid,
* and suid. This allows you to implement the 4.4 compatible seteuid().
*/
-asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
+SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
{
const struct cred *old;
struct cred *new;
@@ -756,7 +757,7 @@ error:
return retval;
}
-asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid)
+SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid)
{
const struct cred *cred = current_cred();
int retval;
@@ -771,7 +772,7 @@ asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __us
/*
* Same as above, but for rgid, egid, sgid.
*/
-asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
+SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
{
const struct cred *old;
struct cred *new;
@@ -814,7 +815,7 @@ error:
return retval;
}
-asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid)
+SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid)
{
const struct cred *cred = current_cred();
int retval;
@@ -833,7 +834,7 @@ asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __us
* whatever uid it wants to). It normally shadows "euid", except when
* explicitly set by setfsuid() or for access..
*/
-asmlinkage long sys_setfsuid(uid_t uid)
+SYSCALL_DEFINE1(setfsuid, uid_t, uid)
{
const struct cred *old;
struct cred *new;
@@ -870,7 +871,7 @@ change_okay:
/*
* Samma på svenska..
*/
-asmlinkage long sys_setfsgid(gid_t gid)
+SYSCALL_DEFINE1(setfsgid, gid_t, gid)
{
const struct cred *old;
struct cred *new;
@@ -919,7 +920,7 @@ void do_sys_times(struct tms *tms)
tms->tms_cstime = cputime_to_clock_t(cstime);
}
-asmlinkage long sys_times(struct tms __user * tbuf)
+SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
{
if (tbuf) {
struct tms tmp;
@@ -944,7 +945,7 @@ asmlinkage long sys_times(struct tms __user * tbuf)
* Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
* LBT 04.03.94
*/
-asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
+SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
{
struct task_struct *p;
struct task_struct *group_leader = current->group_leader;
@@ -1015,7 +1016,7 @@ out:
return err;
}
-asmlinkage long sys_getpgid(pid_t pid)
+SYSCALL_DEFINE1(getpgid, pid_t, pid)
{
struct task_struct *p;
struct pid *grp;
@@ -1045,14 +1046,14 @@ out:
#ifdef __ARCH_WANT_SYS_GETPGRP
-asmlinkage long sys_getpgrp(void)
+SYSCALL_DEFINE0(getpgrp)
{
return sys_getpgid(0);
}
#endif
-asmlinkage long sys_getsid(pid_t pid)
+SYSCALL_DEFINE1(getsid, pid_t, pid)
{
struct task_struct *p;
struct pid *sid;
@@ -1080,7 +1081,7 @@ out:
return retval;
}
-asmlinkage long sys_setsid(void)
+SYSCALL_DEFINE0(setsid)
{
struct task_struct *group_leader = current->group_leader;
struct pid *sid = task_pid(group_leader);
@@ -1311,7 +1312,7 @@ int set_current_groups(struct group_info *group_info)
EXPORT_SYMBOL(set_current_groups);
-asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
+SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist)
{
const struct cred *cred = current_cred();
int i;
@@ -1340,7 +1341,7 @@ out:
* without another task interfering.
*/
-asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist)
+SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
{
struct group_info *group_info;
int retval;
@@ -1394,7 +1395,7 @@ EXPORT_SYMBOL(in_egroup_p);
DECLARE_RWSEM(uts_sem);
-asmlinkage long sys_newuname(struct new_utsname __user * name)
+SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
{
int errno = 0;
@@ -1405,7 +1406,7 @@ asmlinkage long sys_newuname(struct new_utsname __user * name)
return errno;
}
-asmlinkage long sys_sethostname(char __user *name, int len)
+SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
{
int errno;
char tmp[__NEW_UTS_LEN];
@@ -1429,7 +1430,7 @@ asmlinkage long sys_sethostname(char __user *name, int len)
#ifdef __ARCH_WANT_SYS_GETHOSTNAME
-asmlinkage long sys_gethostname(char __user *name, int len)
+SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
{
int i, errno;
struct new_utsname *u;
@@ -1454,7 +1455,7 @@ asmlinkage long sys_gethostname(char __user *name, int len)
* Only setdomainname; getdomainname can be implemented by calling
* uname()
*/
-asmlinkage long sys_setdomainname(char __user *name, int len)
+SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
{
int errno;
char tmp[__NEW_UTS_LEN];
@@ -1477,7 +1478,7 @@ asmlinkage long sys_setdomainname(char __user *name, int len)
return errno;
}
-asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim)
+SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
{
if (resource >= RLIM_NLIMITS)
return -EINVAL;
@@ -1496,7 +1497,8 @@ asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim)
* Back compatibility for getrlimit. Needed for some apps.
*/
-asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim)
+SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
+ struct rlimit __user *, rlim)
{
struct rlimit x;
if (resource >= RLIM_NLIMITS)
@@ -1514,7 +1516,7 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
#endif
-asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
+SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
{
struct rlimit new_rlim, *old_rlim;
int retval;
@@ -1687,7 +1689,7 @@ int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
}
-asmlinkage long sys_getrusage(int who, struct rusage __user *ru)
+SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
{
if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
who != RUSAGE_THREAD)
@@ -1695,14 +1697,14 @@ asmlinkage long sys_getrusage(int who, struct rusage __user *ru)
return getrusage(current, who, ru);
}
-asmlinkage long sys_umask(int mask)
+SYSCALL_DEFINE1(umask, int, mask)
{
mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
return mask;
}
-asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
- unsigned long arg4, unsigned long arg5)
+SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
+ unsigned long, arg4, unsigned long, arg5)
{
struct task_struct *me = current;
unsigned char comm[sizeof(me->comm)];
@@ -1815,8 +1817,8 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
return error;
}
-asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep,
- struct getcpu_cache __user *unused)
+SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
+ struct getcpu_cache __user *, unused)
{
int err = 0;
int cpu = raw_smp_processor_id();
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index e14a2328170..27dad296738 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -131,6 +131,7 @@ cond_syscall(sys_io_destroy);
cond_syscall(sys_io_submit);
cond_syscall(sys_io_cancel);
cond_syscall(sys_io_getevents);
+cond_syscall(sys_syslog);
/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 92f6e5bc3c2..368d1638ee7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -82,6 +82,9 @@ extern int percpu_pagelist_fraction;
extern int compat_log;
extern int latencytop_enabled;
extern int sysctl_nr_open_min, sysctl_nr_open_max;
+#ifndef CONFIG_MMU
+extern int sysctl_nr_trim_pages;
+#endif
#ifdef CONFIG_RCU_TORTURE_TEST
extern int rcutorture_runnable;
#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
@@ -141,6 +144,7 @@ extern int acct_parm[];
#ifdef CONFIG_IA64
extern int no_unaligned_warning;
+extern int unaligned_dump_stack;
#endif
#ifdef CONFIG_RT_MUTEXES
@@ -778,6 +782,14 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "unaligned-dump-stack",
+ .data = &unaligned_dump_stack,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#endif
#ifdef CONFIG_DETECT_SOFTLOCKUP
{
@@ -1102,6 +1114,17 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
+#else
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nr_trim_pages",
+ .data = &sysctl_nr_trim_pages,
+ .maxlen = sizeof(sysctl_nr_trim_pages),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ },
#endif
{
.ctl_name = VM_LAPTOP_MODE,
@@ -1674,7 +1697,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol
return error;
}
-asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
+SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
{
struct __sysctl_args tmp;
int error;
@@ -2975,7 +2998,7 @@ int sysctl_ms_jiffies(struct ctl_table *table,
#else /* CONFIG_SYSCTL_SYSCALL */
-asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
+SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
{
struct __sysctl_args tmp;
int error;
diff --git a/kernel/time.c b/kernel/time.c
index 4886e3ce83a..29511943871 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -60,7 +60,7 @@ EXPORT_SYMBOL(sys_tz);
* why not move it into the appropriate arch directory (for those
* architectures that need it).
*/
-asmlinkage long sys_time(time_t __user * tloc)
+SYSCALL_DEFINE1(time, time_t __user *, tloc)
{
time_t i = get_seconds();
@@ -79,7 +79,7 @@ asmlinkage long sys_time(time_t __user * tloc)
* architectures that need it).
*/
-asmlinkage long sys_stime(time_t __user *tptr)
+SYSCALL_DEFINE1(stime, time_t __user *, tptr)
{
struct timespec tv;
int err;
@@ -99,8 +99,8 @@ asmlinkage long sys_stime(time_t __user *tptr)
#endif /* __ARCH_WANT_SYS_TIME */
-asmlinkage long sys_gettimeofday(struct timeval __user *tv,
- struct timezone __user *tz)
+SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
+ struct timezone __user *, tz)
{
if (likely(tv != NULL)) {
struct timeval ktv;
@@ -184,8 +184,8 @@ int do_sys_settimeofday(struct timespec *tv, struct timezone *tz)
return 0;
}
-asmlinkage long sys_settimeofday(struct timeval __user *tv,
- struct timezone __user *tz)
+SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv,
+ struct timezone __user *, tz)
{
struct timeval user_tv;
struct timespec new_ts;
@@ -205,7 +205,7 @@ asmlinkage long sys_settimeofday(struct timeval __user *tv,
return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL);
}
-asmlinkage long sys_adjtimex(struct timex __user *txc_p)
+SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
{
struct timex txc; /* Local copy of parameter */
int ret;
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 1ca99557e92..06f197560f3 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -45,7 +45,7 @@
*
* The value 8 is somewhat carefully chosen, as anything
* larger can result in overflows. NSEC_PER_JIFFY grows as
- * HZ shrinks, so values greater then 8 overflow 32bits when
+ * HZ shrinks, so values greater than 8 overflow 32bits when
* HZ=100.
*/
#define JIFFIES_SHIFT 8
diff --git a/kernel/timer.c b/kernel/timer.c
index dee3f641a7a..13dd64fe143 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1129,7 +1129,7 @@ void do_timer(unsigned long ticks)
* For backwards compatibility? This can be done in libc so Alpha
* and all newer ports shouldn't need it.
*/
-asmlinkage unsigned long sys_alarm(unsigned int seconds)
+SYSCALL_DEFINE1(alarm, unsigned int, seconds)
{
return alarm_setitimer(seconds);
}
@@ -1152,7 +1152,7 @@ asmlinkage unsigned long sys_alarm(unsigned int seconds)
*
* This is SMP safe as current->tgid does not change.
*/
-asmlinkage long sys_getpid(void)
+SYSCALL_DEFINE0(getpid)
{
return task_tgid_vnr(current);
}
@@ -1163,7 +1163,7 @@ asmlinkage long sys_getpid(void)
* value of ->real_parent under rcu_read_lock(), see
* release_task()->call_rcu(delayed_put_task_struct).
*/
-asmlinkage long sys_getppid(void)
+SYSCALL_DEFINE0(getppid)
{
int pid;
@@ -1174,25 +1174,25 @@ asmlinkage long sys_getppid(void)
return pid;
}
-asmlinkage long sys_getuid(void)
+SYSCALL_DEFINE0(getuid)
{
/* Only we change this so SMP safe */
return current_uid();
}
-asmlinkage long sys_geteuid(void)
+SYSCALL_DEFINE0(geteuid)
{
/* Only we change this so SMP safe */
return current_euid();
}
-asmlinkage long sys_getgid(void)
+SYSCALL_DEFINE0(getgid)
{
/* Only we change this so SMP safe */
return current_gid();
}
-asmlinkage long sys_getegid(void)
+SYSCALL_DEFINE0(getegid)
{
/* Only we change this so SMP safe */
return current_egid();
@@ -1308,7 +1308,7 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
EXPORT_SYMBOL(schedule_timeout_uninterruptible);
/* Thread ID - the internal kernel "pid" */
-asmlinkage long sys_gettid(void)
+SYSCALL_DEFINE0(gettid)
{
return task_pid_vnr(current);
}
@@ -1400,7 +1400,7 @@ out:
return 0;
}
-asmlinkage long sys_sysinfo(struct sysinfo __user *info)
+SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
{
struct sysinfo val;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a9d9760dc7b..8b0daf0662e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -168,7 +168,13 @@ rb_event_length(struct ring_buffer_event *event)
*/
unsigned ring_buffer_event_length(struct ring_buffer_event *event)
{
- return rb_event_length(event);
+ unsigned length = rb_event_length(event);
+ if (event->type != RINGBUF_TYPE_DATA)
+ return length;
+ length -= RB_EVNT_HDR_SIZE;
+ if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
+ length -= sizeof(event->array[0]);
+ return length;
}
EXPORT_SYMBOL_GPL(ring_buffer_event_length);
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 2460c3199b5..0314501688b 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -17,7 +17,7 @@
#include <asm/uaccess.h>
-asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gid_t group)
+SYSCALL_DEFINE3(chown16, const char __user *, filename, old_uid_t, user, old_gid_t, group)
{
long ret = sys_chown(filename, low2highuid(user), low2highgid(group));
/* avoid REGPARM breakage on x86: */
@@ -25,7 +25,7 @@ asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gi
return ret;
}
-asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_gid_t group)
+SYSCALL_DEFINE3(lchown16, const char __user *, filename, old_uid_t, user, old_gid_t, group)
{
long ret = sys_lchown(filename, low2highuid(user), low2highgid(group));
/* avoid REGPARM breakage on x86: */
@@ -33,7 +33,7 @@ asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_g
return ret;
}
-asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group)
+SYSCALL_DEFINE3(fchown16, unsigned int, fd, old_uid_t, user, old_gid_t, group)
{
long ret = sys_fchown(fd, low2highuid(user), low2highgid(group));
/* avoid REGPARM breakage on x86: */
@@ -41,7 +41,7 @@ asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group)
return ret;
}
-asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid)
+SYSCALL_DEFINE2(setregid16, old_gid_t, rgid, old_gid_t, egid)
{
long ret = sys_setregid(low2highgid(rgid), low2highgid(egid));
/* avoid REGPARM breakage on x86: */
@@ -49,7 +49,7 @@ asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid)
return ret;
}
-asmlinkage long sys_setgid16(old_gid_t gid)
+SYSCALL_DEFINE1(setgid16, old_gid_t, gid)
{
long ret = sys_setgid(low2highgid(gid));
/* avoid REGPARM breakage on x86: */
@@ -57,7 +57,7 @@ asmlinkage long sys_setgid16(old_gid_t gid)
return ret;
}
-asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid)
+SYSCALL_DEFINE2(setreuid16, old_uid_t, ruid, old_uid_t, euid)
{
long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid));
/* avoid REGPARM breakage on x86: */
@@ -65,7 +65,7 @@ asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid)
return ret;
}
-asmlinkage long sys_setuid16(old_uid_t uid)
+SYSCALL_DEFINE1(setuid16, old_uid_t, uid)
{
long ret = sys_setuid(low2highuid(uid));
/* avoid REGPARM breakage on x86: */
@@ -73,7 +73,7 @@ asmlinkage long sys_setuid16(old_uid_t uid)
return ret;
}
-asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid)
+SYSCALL_DEFINE3(setresuid16, old_uid_t, ruid, old_uid_t, euid, old_uid_t, suid)
{
long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid),
low2highuid(suid));
@@ -82,7 +82,7 @@ asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid)
return ret;
}
-asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid)
+SYSCALL_DEFINE3(getresuid16, old_uid_t __user *, ruid, old_uid_t __user *, euid, old_uid_t __user *, suid)
{
const struct cred *cred = current_cred();
int retval;
@@ -94,7 +94,7 @@ asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid,
return retval;
}
-asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid)
+SYSCALL_DEFINE3(setresgid16, old_gid_t, rgid, old_gid_t, egid, old_gid_t, sgid)
{
long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid),
low2highgid(sgid));
@@ -103,7 +103,8 @@ asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid)
return ret;
}
-asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid)
+
+SYSCALL_DEFINE3(getresgid16, old_gid_t __user *, rgid, old_gid_t __user *, egid, old_gid_t __user *, sgid)
{
const struct cred *cred = current_cred();
int retval;
@@ -115,7 +116,7 @@ asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid,
return retval;
}
-asmlinkage long sys_setfsuid16(old_uid_t uid)
+SYSCALL_DEFINE1(setfsuid16, old_uid_t, uid)
{
long ret = sys_setfsuid(low2highuid(uid));
/* avoid REGPARM breakage on x86: */
@@ -123,7 +124,7 @@ asmlinkage long sys_setfsuid16(old_uid_t uid)
return ret;
}
-asmlinkage long sys_setfsgid16(old_gid_t gid)
+SYSCALL_DEFINE1(setfsgid16, old_gid_t, gid)
{
long ret = sys_setfsgid(low2highgid(gid));
/* avoid REGPARM breakage on x86: */
@@ -161,7 +162,7 @@ static int groups16_from_user(struct group_info *group_info,
return 0;
}
-asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist)
+SYSCALL_DEFINE2(getgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
{
const struct cred *cred = current_cred();
int i;
@@ -184,7 +185,7 @@ out:
return i;
}
-asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist)
+SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
{
struct group_info *group_info;
int retval;
@@ -209,22 +210,22 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist)
return retval;
}
-asmlinkage long sys_getuid16(void)
+SYSCALL_DEFINE0(getuid16)
{
return high2lowuid(current_uid());
}
-asmlinkage long sys_geteuid16(void)
+SYSCALL_DEFINE0(geteuid16)
{
return high2lowuid(current_euid());
}
-asmlinkage long sys_getgid16(void)
+SYSCALL_DEFINE0(getgid16)
{
return high2lowgid(current_gid());
}
-asmlinkage long sys_getegid16(void)
+SYSCALL_DEFINE0(getegid16)
{
return high2lowgid(current_egid());
}
diff --git a/kernel/up.c b/kernel/up.c
new file mode 100644
index 00000000000..1ff27a28bb7
--- /dev/null
+++ b/kernel/up.c
@@ -0,0 +1,21 @@
+/*
+ * Uniprocessor-only support functions. The counterpart to kernel/smp.c
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+
+int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+ int wait)
+{
+ WARN_ON(cpu != 0);
+
+ local_irq_disable();
+ (func)(info);
+ local_irq_enable();
+
+ return 0;
+}
+EXPORT_SYMBOL(smp_call_function_single);