aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_tree.c13
-rw-r--r--kernel/auditsc.c1
-rw-r--r--kernel/cpu.c24
-rw-r--r--kernel/kfifo.c361
-rw-r--r--kernel/kthread.c23
-rw-r--r--kernel/perf_event.c34
-rw-r--r--kernel/resource.c32
-rw-r--r--kernel/sched.c307
-rw-r--r--kernel/sched_clock.c23
-rw-r--r--kernel/sched_fair.c53
-rw-r--r--kernel/sched_rt.c4
-rw-r--r--kernel/signal.c25
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/time.c1
-rw-r--r--kernel/time/clockevents.c18
-rw-r--r--kernel/time/timekeeping.c27
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/trace_kprobe.c31
-rw-r--r--kernel/trace/trace_sysprof.c1
19 files changed, 660 insertions, 322 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 2451dc6f328..4b05bd9479d 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -277,7 +277,7 @@ static void untag_chunk(struct node *p)
owner->root = NULL;
}
- for (i = j = 0; i < size; i++, j++) {
+ for (i = j = 0; j <= size; i++, j++) {
struct audit_tree *s;
if (&chunk->owners[j] == p) {
list_del_init(&p->list);
@@ -290,7 +290,7 @@ static void untag_chunk(struct node *p)
if (!s) /* result of earlier fallback */
continue;
get_tree(s);
- list_replace_init(&chunk->owners[i].list, &new->owners[j].list);
+ list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
}
list_replace_rcu(&chunk->hash, &new->hash);
@@ -373,15 +373,17 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
- put_inotify_watch(watch);
+ put_inotify_watch(&old->watch);
return 0;
}
}
spin_unlock(&hash_lock);
chunk = alloc_chunk(old->count + 1);
- if (!chunk)
+ if (!chunk) {
+ put_inotify_watch(&old->watch);
return -ENOMEM;
+ }
mutex_lock(&inode->inotify_mutex);
if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) {
@@ -425,7 +427,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
spin_unlock(&hash_lock);
inotify_evict_watch(&old->watch);
mutex_unlock(&inode->inotify_mutex);
- put_inotify_watch(&old->watch);
+ put_inotify_watch(&old->watch); /* pair to inotify_find_watch */
+ put_inotify_watch(&old->watch); /* and kill it */
return 0;
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 267e484f019..fc0f928167e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -250,7 +250,6 @@ struct audit_context {
#endif
};
-#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
static inline int open_arg(int flags, int mask)
{
int n = ACC_MODE(flags);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 291ac586f37..1c8ddd6ee94 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -209,6 +209,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
return -ENOMEM;
cpu_hotplug_begin();
+ set_cpu_active(cpu, false);
err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
hcpu, -1, &nr_calls);
if (err == NOTIFY_BAD) {
@@ -280,18 +281,6 @@ int __ref cpu_down(unsigned int cpu)
goto out;
}
- set_cpu_active(cpu, false);
-
- /*
- * Make sure the all cpus did the reschedule and are not
- * using stale version of the cpu_active_mask.
- * This is not strictly necessary becuase stop_machine()
- * that we run down the line already provides the required
- * synchronization. But it's really a side effect and we do not
- * want to depend on the innards of the stop_machine here.
- */
- synchronize_sched();
-
err = _cpu_down(cpu, 0);
out:
@@ -382,19 +371,12 @@ int disable_nonboot_cpus(void)
return error;
cpu_maps_update_begin();
first_cpu = cpumask_first(cpu_online_mask);
- /* We take down all of the non-boot CPUs in one shot to avoid races
+ /*
+ * We take down all of the non-boot CPUs in one shot to avoid races
* with the userspace trying to use the CPU hotplug at the same time
*/
cpumask_clear(frozen_cpus);
- for_each_online_cpu(cpu) {
- if (cpu == first_cpu)
- continue;
- set_cpu_active(cpu, false);
- }
-
- synchronize_sched();
-
printk("Disabling non-boot CPUs ...\n");
for_each_online_cpu(cpu) {
if (cpu == first_cpu)
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 3765ff3c1bb..e92d519f93b 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -1,6 +1,7 @@
/*
- * A simple kernel FIFO implementation.
+ * A generic kernel FIFO implementation.
*
+ * Copyright (C) 2009 Stefani Seibold <stefani@seibold.net>
* Copyright (C) 2004 Stelian Pop <stelian@popies.net>
*
* This program is free software; you can redistribute it and/or modify
@@ -25,50 +26,48 @@
#include <linux/err.h>
#include <linux/kfifo.h>
#include <linux/log2.h>
+#include <linux/uaccess.h>
+
+static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+ unsigned int size)
+{
+ fifo->buffer = buffer;
+ fifo->size = size;
+
+ kfifo_reset(fifo);
+}
/**
- * kfifo_init - allocates a new FIFO using a preallocated buffer
+ * kfifo_init - initialize a FIFO using a preallocated buffer
+ * @fifo: the fifo to assign the buffer
* @buffer: the preallocated buffer to be used.
* @size: the size of the internal buffer, this have to be a power of 2.
- * @gfp_mask: get_free_pages mask, passed to kmalloc()
- * @lock: the lock to be used to protect the fifo buffer
*
- * Do NOT pass the kfifo to kfifo_free() after use! Simply free the
- * &struct kfifo with kfree().
*/
-struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
- gfp_t gfp_mask, spinlock_t *lock)
+void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size)
{
- struct kfifo *fifo;
-
/* size must be a power of 2 */
BUG_ON(!is_power_of_2(size));
- fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
- if (!fifo)
- return ERR_PTR(-ENOMEM);
-
- fifo->buffer = buffer;
- fifo->size = size;
- fifo->in = fifo->out = 0;
- fifo->lock = lock;
-
- return fifo;
+ _kfifo_init(fifo, buffer, size);
}
EXPORT_SYMBOL(kfifo_init);
/**
- * kfifo_alloc - allocates a new FIFO and its internal buffer
- * @size: the size of the internal buffer to be allocated.
+ * kfifo_alloc - allocates a new FIFO internal buffer
+ * @fifo: the fifo to assign then new buffer
+ * @size: the size of the buffer to be allocated, this have to be a power of 2.
* @gfp_mask: get_free_pages mask, passed to kmalloc()
- * @lock: the lock to be used to protect the fifo buffer
+ *
+ * This function dynamically allocates a new fifo internal buffer
*
* The size will be rounded-up to a power of 2.
+ * The buffer will be release with kfifo_free().
+ * Return 0 if no error, otherwise the an error code
*/
-struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
+int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
{
unsigned char *buffer;
- struct kfifo *ret;
/*
* round up to the next power of 2, since our 'let the indices
@@ -80,48 +79,91 @@ struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
}
buffer = kmalloc(size, gfp_mask);
- if (!buffer)
- return ERR_PTR(-ENOMEM);
-
- ret = kfifo_init(buffer, size, gfp_mask, lock);
+ if (!buffer) {
+ _kfifo_init(fifo, 0, 0);
+ return -ENOMEM;
+ }
- if (IS_ERR(ret))
- kfree(buffer);
+ _kfifo_init(fifo, buffer, size);
- return ret;
+ return 0;
}
EXPORT_SYMBOL(kfifo_alloc);
/**
- * kfifo_free - frees the FIFO
+ * kfifo_free - frees the FIFO internal buffer
* @fifo: the fifo to be freed.
*/
void kfifo_free(struct kfifo *fifo)
{
kfree(fifo->buffer);
- kfree(fifo);
}
EXPORT_SYMBOL(kfifo_free);
/**
- * __kfifo_put - puts some data into the FIFO, no locking version
+ * kfifo_skip - skip output data
* @fifo: the fifo to be used.
- * @buffer: the data to be added.
- * @len: the length of the data to be added.
- *
- * This function copies at most @len bytes from the @buffer into
- * the FIFO depending on the free space, and returns the number of
- * bytes copied.
- *
- * Note that with only one concurrent reader and one concurrent
- * writer, you don't need extra locking to use these functions.
+ * @len: number of bytes to skip
*/
-unsigned int __kfifo_put(struct kfifo *fifo,
- const unsigned char *buffer, unsigned int len)
+void kfifo_skip(struct kfifo *fifo, unsigned int len)
+{
+ if (len < kfifo_len(fifo)) {
+ __kfifo_add_out(fifo, len);
+ return;
+ }
+ kfifo_reset_out(fifo);
+}
+EXPORT_SYMBOL(kfifo_skip);
+
+static inline void __kfifo_in_data(struct kfifo *fifo,
+ const void *from, unsigned int len, unsigned int off)
{
unsigned int l;
- len = min(len, fifo->size - fifo->in + fifo->out);
+ /*
+ * Ensure that we sample the fifo->out index -before- we
+ * start putting bytes into the kfifo.
+ */
+
+ smp_mb();
+
+ off = __kfifo_off(fifo, fifo->in + off);
+
+ /* first put the data starting from fifo->in to buffer end */
+ l = min(len, fifo->size - off);
+ memcpy(fifo->buffer + off, from, l);
+
+ /* then put the rest (if any) at the beginning of the buffer */
+ memcpy(fifo->buffer, from + l, len - l);
+}
+
+static inline void __kfifo_out_data(struct kfifo *fifo,
+ void *to, unsigned int len, unsigned int off)
+{
+ unsigned int l;
+
+ /*
+ * Ensure that we sample the fifo->in index -before- we
+ * start removing bytes from the kfifo.
+ */
+
+ smp_rmb();
+
+ off = __kfifo_off(fifo, fifo->out + off);
+
+ /* first get the data from fifo->out until the end of the buffer */
+ l = min(len, fifo->size - off);
+ memcpy(to, fifo->buffer + off, l);
+
+ /* then get the rest (if any) from the beginning of the buffer */
+ memcpy(to + l, fifo->buffer, len - l);
+}
+
+static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
+ const void __user *from, unsigned int len, unsigned int off)
+{
+ unsigned int l;
+ int ret;
/*
* Ensure that we sample the fifo->out index -before- we
@@ -130,68 +172,229 @@ unsigned int __kfifo_put(struct kfifo *fifo,
smp_mb();
+ off = __kfifo_off(fifo, fifo->in + off);
+
/* first put the data starting from fifo->in to buffer end */
- l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
- memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l);
+ l = min(len, fifo->size - off);
+ ret = copy_from_user(fifo->buffer + off, from, l);
+
+ if (unlikely(ret))
+ return ret + len - l;
/* then put the rest (if any) at the beginning of the buffer */
- memcpy(fifo->buffer, buffer + l, len - l);
+ return copy_from_user(fifo->buffer, from + l, len - l);
+}
+
+static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
+ void __user *to, unsigned int len, unsigned int off)
+{
+ unsigned int l;
+ int ret;
/*
- * Ensure that we add the bytes to the kfifo -before-
- * we update the fifo->in index.
+ * Ensure that we sample the fifo->in index -before- we
+ * start removing bytes from the kfifo.
*/
- smp_wmb();
+ smp_rmb();
+
+ off = __kfifo_off(fifo, fifo->out + off);
+
+ /* first get the data from fifo->out until the end of the buffer */
+ l = min(len, fifo->size - off);
+ ret = copy_to_user(to, fifo->buffer + off, l);
+
+ if (unlikely(ret))
+ return ret + len - l;
+
+ /* then get the rest (if any) from the beginning of the buffer */
+ return copy_to_user(to + l, fifo->buffer, len - l);
+}
+
+unsigned int __kfifo_in_n(struct kfifo *fifo,
+ const void *from, unsigned int len, unsigned int recsize)
+{
+ if (kfifo_avail(fifo) < len + recsize)
+ return len + 1;
+
+ __kfifo_in_data(fifo, from, len, recsize);
+ return 0;
+}
+EXPORT_SYMBOL(__kfifo_in_n);
- fifo->in += len;
+/**
+ * kfifo_in - puts some data into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @from buffer into
+ * the FIFO depending on the free space, and returns the number of
+ * bytes copied.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_in(struct kfifo *fifo, const unsigned char *from,
+ unsigned int len)
+{
+ len = min(kfifo_avail(fifo), len);
+ __kfifo_in_data(fifo, from, len, 0);
+ __kfifo_add_in(fifo, len);
return len;
}
-EXPORT_SYMBOL(__kfifo_put);
+EXPORT_SYMBOL(kfifo_in);
+
+unsigned int __kfifo_in_generic(struct kfifo *fifo,
+ const void *from, unsigned int len, unsigned int recsize)
+{
+ return __kfifo_in_rec(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_in_generic);
+
+unsigned int __kfifo_out_n(struct kfifo *fifo,
+ void *to, unsigned int len, unsigned int recsize)
+{
+ if (kfifo_len(fifo) < len + recsize)
+ return len;
+
+ __kfifo_out_data(fifo, to, len, recsize);
+ __kfifo_add_out(fifo, len + recsize);
+ return 0;
+}
+EXPORT_SYMBOL(__kfifo_out_n);
/**
- * __kfifo_get - gets some data from the FIFO, no locking version
+ * kfifo_out - gets some data from the FIFO
* @fifo: the fifo to be used.
- * @buffer: where the data must be copied.
+ * @to: where the data must be copied.
* @len: the size of the destination buffer.
*
* This function copies at most @len bytes from the FIFO into the
- * @buffer and returns the number of copied bytes.
+ * @to buffer and returns the number of copied bytes.
*
* Note that with only one concurrent reader and one concurrent
* writer, you don't need extra locking to use these functions.
*/
-unsigned int __kfifo_get(struct kfifo *fifo,
- unsigned char *buffer, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len)
{
- unsigned int l;
+ len = min(kfifo_len(fifo), len);
- len = min(len, fifo->in - fifo->out);
+ __kfifo_out_data(fifo, to, len, 0);
+ __kfifo_add_out(fifo, len);
- /*
- * Ensure that we sample the fifo->in index -before- we
- * start removing bytes from the kfifo.
- */
+ return len;
+}
+EXPORT_SYMBOL(kfifo_out);
- smp_rmb();
+unsigned int __kfifo_out_generic(struct kfifo *fifo,
+ void *to, unsigned int len, unsigned int recsize,
+ unsigned int *total)
+{
+ return __kfifo_out_rec(fifo, to, len, recsize, total);
+}
+EXPORT_SYMBOL(__kfifo_out_generic);
- /* first get the data from fifo->out until the end of the buffer */
- l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
- memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
+unsigned int __kfifo_from_user_n(struct kfifo *fifo,
+ const void __user *from, unsigned int len, unsigned int recsize)
+{
+ if (kfifo_avail(fifo) < len + recsize)
+ return len + 1;
- /* then get the rest (if any) from the beginning of the buffer */
- memcpy(buffer + l, fifo->buffer, len - l);
+ return __kfifo_from_user_data(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_from_user_n);
- /*
- * Ensure that we remove the bytes from the kfifo -before-
- * we update the fifo->out index.
- */
+/**
+ * kfifo_from_user - puts some data from user space into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: pointer to the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @from into the
+ * FIFO depending and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_from_user(struct kfifo *fifo,
+ const void __user *from, unsigned int len)
+{
+ len = min(kfifo_avail(fifo), len);
+ len -= __kfifo_from_user_data(fifo, from, len, 0);
+ __kfifo_add_in(fifo, len);
+ return len;
+}
+EXPORT_SYMBOL(kfifo_from_user);
- smp_mb();
+unsigned int __kfifo_from_user_generic(struct kfifo *fifo,
+ const void __user *from, unsigned int len, unsigned int recsize)
+{
+ return __kfifo_from_user_rec(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_from_user_generic);
- fifo->out += len;
+unsigned int __kfifo_to_user_n(struct kfifo *fifo,
+ void __user *to, unsigned int len, unsigned int reclen,
+ unsigned int recsize)
+{
+ unsigned int ret;
+
+ if (kfifo_len(fifo) < reclen + recsize)
+ return len;
+
+ ret = __kfifo_to_user_data(fifo, to, reclen, recsize);
+ if (likely(ret == 0))
+ __kfifo_add_out(fifo, reclen + recsize);
+
+ return ret;
+}
+EXPORT_SYMBOL(__kfifo_to_user_n);
+
+/**
+ * kfifo_to_user - gets data from the FIFO and write it to user space
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ *
+ * This function copies at most @len bytes from the FIFO into the
+ * @to buffer and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_to_user(struct kfifo *fifo,
+ void __user *to, unsigned int len)
+{
+ len = min(kfifo_len(fifo), len);
+ len -= __kfifo_to_user_data(fifo, to, len, 0);
+ __kfifo_add_out(fifo, len);
return len;
}
-EXPORT_SYMBOL(__kfifo_get);
+EXPORT_SYMBOL(kfifo_to_user);
+
+unsigned int __kfifo_to_user_generic(struct kfifo *fifo,
+ void __user *to, unsigned int len, unsigned int recsize,
+ unsigned int *total)
+{
+ return __kfifo_to_user_rec(fifo, to, len, recsize, total);
+}
+EXPORT_SYMBOL(__kfifo_to_user_generic);
+
+unsigned int __kfifo_peek_generic(struct kfifo *fifo, unsigned int recsize)
+{
+ if (recsize == 0)
+ return kfifo_avail(fifo);
+
+ return __kfifo_peek_n(fifo, recsize);
+}
+EXPORT_SYMBOL(__kfifo_peek_generic);
+
+void __kfifo_skip_generic(struct kfifo *fifo, unsigned int recsize)
+{
+ __kfifo_skip_rec(fifo, recsize);
+}
+EXPORT_SYMBOL(__kfifo_skip_generic);
+
diff --git a/kernel/kthread.c b/kernel/kthread.c
index ab7ae57773e..fbb6222fe7e 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -150,6 +150,29 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
EXPORT_SYMBOL(kthread_create);
/**
+ * kthread_bind - bind a just-created kthread to a cpu.
+ * @p: thread created by kthread_create().
+ * @cpu: cpu (might not be online, must be possible) for @k to run on.
+ *
+ * Description: This function is equivalent to set_cpus_allowed(),
+ * except that @cpu doesn't need to be online, and the thread must be
+ * stopped (i.e., just returned from kthread_create()).
+ */
+void kthread_bind(struct task_struct *p, unsigned int cpu)
+{
+ /* Must have done schedule() in kthread() before we set_task_cpu */
+ if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
+ WARN_ON(1);
+ return;
+ }
+
+ p->cpus_allowed = cpumask_of_cpu(cpu);
+ p->rt.nr_cpus_allowed = 1;
+ p->flags |= PF_THREAD_BOUND;
+}
+EXPORT_SYMBOL(kthread_bind);
+
+/**
* kthread_stop - stop a thread created by kthread_create().
* @k: thread created by kthread_create().
*
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 97d1a3dd7a5..1f38270f08c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1381,6 +1381,9 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
if (event->state != PERF_EVENT_STATE_ACTIVE)
continue;
+ if (event->cpu != -1 && event->cpu != smp_processor_id())
+ continue;
+
hwc = &event->hw;
interrupts = hwc->interrupts;
@@ -3265,6 +3268,9 @@ static void perf_event_task_output(struct perf_event *event,
static int perf_event_task_match(struct perf_event *event)
{
+ if (event->cpu != -1 && event->cpu != smp_processor_id())
+ return 0;
+
if (event->attr.comm || event->attr.mmap || event->attr.task)
return 1;
@@ -3290,12 +3296,11 @@ static void perf_event_task_event(struct perf_task_event *task_event)
rcu_read_lock();
cpuctx = &get_cpu_var(perf_cpu_context);
perf_event_task_ctx(&cpuctx->ctx, task_event);
- put_cpu_var(perf_cpu_context);
-
if (!ctx)
ctx = rcu_dereference(task_event->task->perf_event_ctxp);
if (ctx)
perf_event_task_ctx(ctx, task_event);
+ put_cpu_var(perf_cpu_context);
rcu_read_unlock();
}
@@ -3372,6 +3377,9 @@ static void perf_event_comm_output(struct perf_event *event,
static int perf_event_comm_match(struct perf_event *event)
{
+ if (event->cpu != -1 && event->cpu != smp_processor_id())
+ return 0;
+
if (event->attr.comm)
return 1;
@@ -3408,15 +3416,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
rcu_read_lock();
cpuctx = &get_cpu_var(perf_cpu_context);
perf_event_comm_ctx(&cpuctx->ctx, comm_event);
- put_cpu_var(perf_cpu_context);
-
- /*
- * doesn't really matter which of the child contexts the
- * events ends up in.
- */
ctx = rcu_dereference(current->perf_event_ctxp);
if (ctx)
perf_event_comm_ctx(ctx, comm_event);
+ put_cpu_var(perf_cpu_context);
rcu_read_unlock();
}
@@ -3491,6 +3494,9 @@ static void perf_event_mmap_output(struct perf_event *event,
static int perf_event_mmap_match(struct perf_event *event,
struct perf_mmap_event *mmap_event)
{
+ if (event->cpu != -1 && event->cpu != smp_processor_id())
+ return 0;
+
if (event->attr.mmap)
return 1;
@@ -3564,15 +3570,10 @@ got_name:
rcu_read_lock();
cpuctx = &get_cpu_var(perf_cpu_context);
perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
- put_cpu_var(perf_cpu_context);
-
- /*
- * doesn't really matter which of the child contexts the
- * events ends up in.
- */
ctx = rcu_dereference(current->perf_event_ctxp);
if (ctx)
perf_event_mmap_ctx(ctx, mmap_event);
+ put_cpu_var(perf_cpu_context);
rcu_read_unlock();
kfree(buf);
@@ -3863,6 +3864,9 @@ static int perf_swevent_match(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
+ if (event->cpu != -1 && event->cpu != smp_processor_id())
+ return 0;
+
if (!perf_swevent_is_counting(event))
return 0;
@@ -4720,7 +4724,7 @@ SYSCALL_DEFINE5(perf_event_open,
if (IS_ERR(event))
goto err_put_context;
- err = anon_inode_getfd("[perf_event]", &perf_fops, event, 0);
+ err = anon_inode_getfd("[perf_event]", &perf_fops, event, O_RDWR);
if (err < 0)
goto err_free_put_context;
diff --git a/kernel/resource.c b/kernel/resource.c
index dc15686b7a7..af96c1e4b54 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -308,37 +308,37 @@ static int find_resource(struct resource *root, struct resource *new,
void *alignf_data)
{
struct resource *this = root->child;
- resource_size_t start, end;
+ struct resource tmp = *new;
- start = root->start;
+ tmp.start = root->start;
/*
* Skip past an allocated resource that starts at 0, since the assignment
- * of this->start - 1 to new->end below would cause an underflow.
+ * of this->start - 1 to tmp->end below would cause an underflow.
*/
if (this && this->start == 0) {
- start = this->end + 1;
+ tmp.start = this->end + 1;
this = this->sibling;
}
for(;;) {
if (this)
- end = this->start - 1;
+ tmp.end = this->start - 1;
else
- end = root->end;
- if (start < min)
- start = min;
- if (end > max)
- end = max;
- start = ALIGN(start, align);
+ tmp.end = root->end;
+ if (tmp.start < min)
+ tmp.start = min;
+ if (tmp.end > max)
+ tmp.end = max;
+ tmp.start = ALIGN(tmp.start, align);
if (alignf)
- alignf(alignf_data, new, size, align);
- if (start < end && end - start >= size - 1) {
- new->start = start;
- new->end = start + size - 1;
+ alignf(alignf_data, &tmp, size, align);
+ if (tmp.start < tmp.end && tmp.end - tmp.start >= size - 1) {
+ new->start = tmp.start;
+ new->end = tmp.start + size - 1;
return 0;
}
if (!this)
break;
- start = this->end + 1;
+ tmp.start = this->end + 1;
this = this->sibling;
}
return -EBUSY;
diff --git a/kernel/sched.c b/kernel/sched.c
index 18cceeecce3..c535cc4f642 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2002,39 +2002,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
p->sched_class->prio_changed(rq, p, oldprio, running);
}
-/**
- * kthread_bind - bind a just-created kthread to a cpu.
- * @p: thread created by kthread_create().
- * @cpu: cpu (might not be online, must be possible) for @k to run on.
- *
- * Description: This function is equivalent to set_cpus_allowed(),
- * except that @cpu doesn't need to be online, and the thread must be
- * stopped (i.e., just returned from kthread_create()).
- *
- * Function lives here instead of kthread.c because it messes with
- * scheduler internals which require locking.
- */
-void kthread_bind(struct task_struct *p, unsigned int cpu)
-{
- struct rq *rq = cpu_rq(cpu);
- unsigned long flags;
-
- /* Must have done schedule() in kthread() before we set_task_cpu */
- if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
- WARN_ON(1);
- return;
- }
-
- raw_spin_lock_irqsave(&rq->lock, flags);
- update_rq_clock(rq);
- set_task_cpu(p, cpu);
- p->cpus_allowed = cpumask_of_cpu(cpu);
- p->rt.nr_cpus_allowed = 1;
- p->flags |= PF_THREAD_BOUND;
- raw_spin_unlock_irqrestore(&rq->lock, flags);
-}
-EXPORT_SYMBOL(kthread_bind);
-
#ifdef CONFIG_SMP
/*
* Is this task likely cache-hot:
@@ -2044,6 +2011,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
{
s64 delta;
+ if (p->sched_class != &fair_sched_class)
+ return 0;
+
/*
* Buddy candidates are cache hot:
*/
@@ -2052,9 +2022,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
&p->se == cfs_rq_of(&p->se)->last))
return 1;
- if (p->sched_class != &fair_sched_class)
- return 0;
-
if (sysctl_sched_migration_cost == -1)
return 1;
if (sysctl_sched_migration_cost == 0)
@@ -2065,22 +2032,23 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
return delta < (s64)sysctl_sched_migration_cost;
}
-
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
{
- int old_cpu = task_cpu(p);
- struct cfs_rq *old_cfsrq = task_cfs_rq(p),
- *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
+#ifdef CONFIG_SCHED_DEBUG
+ /*
+ * We should never call set_task_cpu() on a blocked task,
+ * ttwu() will sort out the placement.
+ */
+ WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
+ !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
+#endif
trace_sched_migrate_task(p, new_cpu);
- if (old_cpu != new_cpu) {
+ if (task_cpu(p) != new_cpu) {
p->se.nr_migrations++;
- perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
- 1, 1, NULL, 0);
+ perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);
}
- p->se.vruntime -= old_cfsrq->min_vruntime -
- new_cfsrq->min_vruntime;
__set_task_cpu(p, new_cpu);
}
@@ -2105,13 +2073,10 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
/*
* If the task is not on a runqueue (and not running), then
- * it is sufficient to simply update the task's cpu field.
+ * the next wake-up will properly place the task.
*/
- if (!p->se.on_rq && !task_running(rq, p)) {
- update_rq_clock(rq);
- set_task_cpu(p, dest_cpu);
+ if (!p->se.on_rq && !task_running(rq, p))
return 0;
- }
init_completion(&req->done);
req->task = p;
@@ -2317,10 +2282,73 @@ void task_oncpu_function_call(struct task_struct *p,
}
#ifdef CONFIG_SMP
+static int select_fallback_rq(int cpu, struct task_struct *p)
+{
+ int dest_cpu;
+ const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
+
+ /* Look for allowed, online CPU in same node. */
+ for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
+ if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+ return dest_cpu;
+
+ /* Any allowed, online CPU? */
+ dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
+ if (dest_cpu < nr_cpu_ids)
+ return dest_cpu;
+
+ /* No more Mr. Nice Guy. */
+ if (dest_cpu >= nr_cpu_ids) {
+ rcu_read_lock();
+ cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
+ rcu_read_unlock();
+ dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
+
+ /*
+ * Don't tell them about moving exiting tasks or
+ * kernel threads (both mm NULL), since they never
+ * leave kernel.
+ */
+ if (p->mm && printk_ratelimit()) {
+ printk(KERN_INFO "process %d (%s) no "
+ "longer affine to cpu%d\n",
+ task_pid_nr(p), p->comm, cpu);
+ }
+ }
+
+ return dest_cpu;
+}
+
+/*
+ * Called from:
+ *
+ * - fork, @p is stable because it isn't on the tasklist yet
+ *
+ * - exec, @p is unstable, retry loop
+ *
+ * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
+ * we should be good.
+ */
static inline
int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
{
- return p->sched_class->select_task_rq(p, sd_flags, wake_flags);
+ int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
+
+ /*
+ * In order not to call set_task_cpu() on a blocking task we need
+ * to rely on ttwu() to place the task on a valid ->cpus_allowed
+ * cpu.
+ *
+ * Since this is common to all placement strategies, this lives here.
+ *
+ * [ this allows ->select_task() to simply return task_cpu(p) and
+ * not worry about this generic constraint ]
+ */
+ if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
+ !cpu_online(cpu)))
+ cpu = select_fallback_rq(task_cpu(p), p);
+
+ return cpu;
}
#endif
@@ -2375,6 +2403,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
if (task_contributes_to_load(p))
rq->nr_uninterruptible--;
p->state = TASK_WAKING;
+
+ if (p->sched_class->task_waking)
+ p->sched_class->task_waking(rq, p);
+
__task_rq_unlock(rq);
cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
@@ -2438,8 +2470,8 @@ out_running:
p->state = TASK_RUNNING;
#ifdef CONFIG_SMP
- if (p->sched_class->task_wake_up)
- p->sched_class->task_wake_up(rq, p);
+ if (p->sched_class->task_woken)
+ p->sched_class->task_woken(rq, p);
if (unlikely(rq->idle_stamp)) {
u64 delta = rq->clock - rq->idle_stamp;
@@ -2538,14 +2570,6 @@ static void __sched_fork(struct task_struct *p)
#ifdef CONFIG_PREEMPT_NOTIFIERS
INIT_HLIST_HEAD(&p->preempt_notifiers);
#endif
-
- /*
- * We mark the process as running here, but have not actually
- * inserted it onto the runqueue yet. This guarantees that
- * nobody will actually run it, and a signal or other external
- * event cannot wake it up and insert it on the runqueue either.
- */
- p->state = TASK_RUNNING;
}
/*
@@ -2556,6 +2580,12 @@ void sched_fork(struct task_struct *p, int clone_flags)
int cpu = get_cpu();
__sched_fork(p);
+ /*
+ * We mark the process as waking here. This guarantees that
+ * nobody will actually run it, and a signal or other external
+ * event cannot wake it up and insert it on the runqueue either.
+ */
+ p->state = TASK_WAKING;
/*
* Revert to default priority/policy on fork if requested.
@@ -2624,14 +2654,15 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
struct rq *rq;
rq = task_rq_lock(p, &flags);
- BUG_ON(p->state != TASK_RUNNING);
+ BUG_ON(p->state != TASK_WAKING);
+ p->state = TASK_RUNNING;
update_rq_clock(rq);
activate_task(rq, p, 0);
trace_sched_wakeup_new(rq, p, 1);
check_preempt_curr(rq, p, WF_FORK);
#ifdef CONFIG_SMP
- if (p->sched_class->task_wake_up)
- p->sched_class->task_wake_up(rq, p);
+ if (p->sched_class->task_woken)
+ p->sched_class->task_woken(rq, p);
#endif
task_rq_unlock(rq, &flags);
}
@@ -3101,21 +3132,36 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
}
/*
- * If dest_cpu is allowed for this process, migrate the task to it.
- * This is accomplished by forcing the cpu_allowed mask to only
- * allow dest_cpu, which will force the cpu onto dest_cpu. Then
- * the cpu_allowed mask is restored.
+ * sched_exec - execve() is a valuable balancing opportunity, because at
+ * this point the task has the smallest effective memory and cache footprint.
*/
-static void sched_migrate_task(struct task_struct *p, int dest_cpu)
+void sched_exec(void)
{
+ struct task_struct *p = current;
struct migration_req req;
+ int dest_cpu, this_cpu;
unsigned long flags;
struct rq *rq;
+again:
+ this_cpu = get_cpu();
+ dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0);
+ if (dest_cpu == this_cpu) {
+ put_cpu();
+ return;
+ }
+
rq = task_rq_lock(p, &flags);
+ put_cpu();
+
+ /*
+ * select_task_rq() can race against ->cpus_allowed
+ */
if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
- || unlikely(!cpu_active(dest_cpu)))
- goto out;
+ || unlikely(!cpu_active(dest_cpu))) {
+ task_rq_unlock(rq, &flags);
+ goto again;
+ }
/* force the process onto the specified CPU */
if (migrate_task(p, dest_cpu, &req)) {
@@ -3130,24 +3176,10 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
return;
}
-out:
task_rq_unlock(rq, &flags);
}
/*
- * sched_exec - execve() is a valuable balancing opportunity, because at
- * this point the task has the smallest effective memory and cache footprint.
- */
-void sched_exec(void)
-{
- int new_cpu, this_cpu = get_cpu();
- new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0);
- put_cpu();
- if (new_cpu != this_cpu)
- sched_migrate_task(current, new_cpu);
-}
-
-/*
* pull_task - move a task from a remote runqueue to the local runqueue.
* Both runqueues must be locked.
*/
@@ -5911,14 +5943,15 @@ EXPORT_SYMBOL(wait_for_completion_killable);
*/
bool try_wait_for_completion(struct completion *x)
{
+ unsigned long flags;
int ret = 1;
- spin_lock_irq(&x->wait.lock);
+ spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
else
x->done--;
- spin_unlock_irq(&x->wait.lock);
+ spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
}
EXPORT_SYMBOL(try_wait_for_completion);
@@ -5933,12 +5966,13 @@ EXPORT_SYMBOL(try_wait_for_completion);
*/
bool completion_done(struct completion *x)
{
+ unsigned long flags;
int ret = 1;
- spin_lock_irq(&x->wait.lock);
+ spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
- spin_unlock_irq(&x->wait.lock);
+ spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
}
EXPORT_SYMBOL(completion_done);
@@ -6457,7 +6491,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
return -EINVAL;
retval = -ESRCH;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
p = find_process_by_pid(pid);
if (p) {
retval = security_task_getscheduler(p);
@@ -6465,7 +6499,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
retval = p->policy
| (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return retval;
}
@@ -6483,7 +6517,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
if (!param || pid < 0)
return -EINVAL;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
p = find_process_by_pid(pid);
retval = -ESRCH;
if (!p)
@@ -6494,7 +6528,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
goto out_unlock;
lp.sched_priority = p->rt_priority;
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
/*
* This one might sleep, we cannot do it with a spinlock held ...
@@ -6504,7 +6538,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
return retval;
out_unlock:
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return retval;
}
@@ -6515,22 +6549,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
int retval;
get_online_cpus();
- read_lock(&tasklist_lock);
+ rcu_read_lock();
p = find_process_by_pid(pid);
if (!p) {
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
put_online_cpus();
return -ESRCH;
}
- /*
- * It is not safe to call set_cpus_allowed with the
- * tasklist_lock held. We will bump the task_struct's
- * usage count and then drop tasklist_lock.
- */
+ /* Prevent p going away */
get_task_struct(p);
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
retval = -ENOMEM;
@@ -6616,7 +6646,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
int retval;
get_online_cpus();
- read_lock(&tasklist_lock);
+ rcu_read_lock();
retval = -ESRCH;
p = find_process_by_pid(pid);
@@ -6632,7 +6662,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
task_rq_unlock(rq, &flags);
out_unlock:
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
put_online_cpus();
return retval;
@@ -6876,7 +6906,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
return -EINVAL;
retval = -ESRCH;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
p = find_process_by_pid(pid);
if (!p)
goto out_unlock;
@@ -6889,13 +6919,13 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
time_slice = p->sched_class->get_rr_interval(rq, p);
task_rq_unlock(rq, &flags);
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
jiffies_to_timespec(time_slice, &t);
retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
return retval;
out_unlock:
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return retval;
}
@@ -6986,6 +7016,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
raw_spin_lock_irqsave(&rq->lock, flags);
__sched_fork(idle);
+ idle->state = TASK_RUNNING;
idle->se.exec_start = sched_clock();
cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
@@ -7100,7 +7131,23 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
struct rq *rq;
int ret = 0;
+ /*
+ * Since we rely on wake-ups to migrate sleeping tasks, don't change
+ * the ->cpus_allowed mask from under waking tasks, which would be
+ * possible when we change rq->lock in ttwu(), so synchronize against
+ * TASK_WAKING to avoid that.
+ */
+again:
+ while (p->state == TASK_WAKING)
+ cpu_relax();
+
rq = task_rq_lock(p, &flags);
+
+ if (p->state == TASK_WAKING) {
+ task_rq_unlock(rq, &flags);
+ goto again;
+ }
+
if (!cpumask_intersects(new_mask, cpu_active_mask)) {
ret = -EINVAL;
goto out;
@@ -7156,7 +7203,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
{
struct rq *rq_dest, *rq_src;
- int ret = 0, on_rq;
+ int ret = 0;
if (unlikely(!cpu_active(dest_cpu)))
return ret;
@@ -7172,12 +7219,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
goto fail;
- on_rq = p->se.on_rq;
- if (on_rq)
+ /*
+ * If we're not on a rq, the next wake-up will ensure we're
+ * placed properly.
+ */
+ if (p->se.on_rq) {
deactivate_task(rq_src, p, 0);
-
- set_task_cpu(p, dest_cpu);
- if (on_rq) {
+ set_task_cpu(p, dest_cpu);
activate_task(rq_dest, p, 0);
check_preempt_curr(rq_dest, p, 0);
}
@@ -7273,37 +7321,10 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
{
int dest_cpu;
- const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
again:
- /* Look for allowed, online CPU in same node. */
- for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
- if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
- goto move;
-
- /* Any allowed, online CPU? */
- dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
- if (dest_cpu < nr_cpu_ids)
- goto move;
-
- /* No more Mr. Nice Guy. */
- if (dest_cpu >= nr_cpu_ids) {
- cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
- dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
-
- /*
- * Don't tell them about moving exiting tasks or
- * kernel threads (both mm NULL), since they never
- * leave kernel.
- */
- if (p->mm && printk_ratelimit()) {
- printk(KERN_INFO "process %d (%s) no "
- "longer affine to cpu%d\n",
- task_pid_nr(p), p->comm, dead_cpu);
- }
- }
+ dest_cpu = select_fallback_rq(dead_cpu, p);
-move:
/* It can have affinity changed while we were choosing. */
if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
goto again;
@@ -9668,7 +9689,7 @@ void __init sched_init(void)
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
static inline int preempt_count_equals(int preempt_offset)
{
- int nested = preempt_count() & ~PREEMPT_ACTIVE;
+ int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
}
@@ -10083,7 +10104,7 @@ void sched_move_task(struct task_struct *tsk)
#ifdef CONFIG_FAIR_GROUP_SCHED
if (tsk->sched_class->moved_group)
- tsk->sched_class->moved_group(tsk);
+ tsk->sched_class->moved_group(tsk, on_rq);
#endif
if (unlikely(running))
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 479ce5682d7..5b496132c28 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -236,6 +236,18 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
+unsigned long long cpu_clock(int cpu)
+{
+ unsigned long long clock;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ clock = sched_clock_cpu(cpu);
+ local_irq_restore(flags);
+
+ return clock;
+}
+
#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
void sched_clock_init(void)
@@ -251,17 +263,12 @@ u64 sched_clock_cpu(int cpu)
return sched_clock();
}
-#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
unsigned long long cpu_clock(int cpu)
{
- unsigned long long clock;
- unsigned long flags;
+ return sched_clock_cpu(cpu);
+}
- local_irq_save(flags);
- clock = sched_clock_cpu(cpu);
- local_irq_restore(flags);
+#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
- return clock;
-}
EXPORT_SYMBOL_GPL(cpu_clock);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5bedf6e3ebf..42ac3c9f66f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -510,6 +510,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
curr->sum_exec_runtime += delta_exec;
schedstat_add(cfs_rq, exec_clock, delta_exec);
delta_exec_weighted = calc_delta_fair(delta_exec, curr);
+
curr->vruntime += delta_exec_weighted;
update_min_vruntime(cfs_rq);
}
@@ -765,16 +766,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
se->vruntime = vruntime;
}
+#define ENQUEUE_WAKEUP 1
+#define ENQUEUE_MIGRATE 2
+
static void
-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
+enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
/*
+ * Update the normalized vruntime before updating min_vruntime
+ * through callig update_curr().
+ */
+ if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
+ se->vruntime += cfs_rq->min_vruntime;
+
+ /*
* Update run-time statistics of the 'current'.
*/
update_curr(cfs_rq);
account_entity_enqueue(cfs_rq, se);
- if (wakeup) {
+ if (flags & ENQUEUE_WAKEUP) {
place_entity(cfs_rq, se, 0);
enqueue_sleeper(cfs_rq, se);
}
@@ -828,6 +839,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
__dequeue_entity(cfs_rq, se);
account_entity_dequeue(cfs_rq, se);
update_min_vruntime(cfs_rq);
+
+ /*
+ * Normalize the entity after updating the min_vruntime because the
+ * update can refer to the ->curr item and we need to reflect this
+ * movement in our normalized position.
+ */
+ if (!sleep)
+ se->vruntime -= cfs_rq->min_vruntime;
}
/*
@@ -1038,13 +1057,19 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
+ int flags = 0;
+
+ if (wakeup)
+ flags |= ENQUEUE_WAKEUP;
+ if (p->state == TASK_WAKING)
+ flags |= ENQUEUE_MIGRATE;
for_each_sched_entity(se) {
if (se->on_rq)
break;
cfs_rq = cfs_rq_of(se);
- enqueue_entity(cfs_rq, se, wakeup);
- wakeup = 1;
+ enqueue_entity(cfs_rq, se, flags);
+ flags = ENQUEUE_WAKEUP;
}
hrtick_update(rq);
@@ -1120,6 +1145,14 @@ static void yield_task_fair(struct rq *rq)
#ifdef CONFIG_SMP
+static void task_waking_fair(struct rq *rq, struct task_struct *p)
+{
+ struct sched_entity *se = &p->se;
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+ se->vruntime -= cfs_rq->min_vruntime;
+}
+
#ifdef CONFIG_FAIR_GROUP_SCHED
/*
* effective_load() calculates the load change as seen from the root_task_group
@@ -1429,6 +1462,9 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
}
for_each_domain(cpu, tmp) {
+ if (!(tmp->flags & SD_LOAD_BALANCE))
+ continue;
+
/*
* If power savings logic is enabled for a domain, see if we
* are not overloaded, if so, don't balance wider.
@@ -1975,6 +2011,8 @@ static void task_fork_fair(struct task_struct *p)
resched_task(rq->curr);
}
+ se->vruntime -= cfs_rq->min_vruntime;
+
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
@@ -2028,12 +2066,13 @@ static void set_curr_task_fair(struct rq *rq)
}
#ifdef CONFIG_FAIR_GROUP_SCHED
-static void moved_group_fair(struct task_struct *p)
+static void moved_group_fair(struct task_struct *p, int on_rq)
{
struct cfs_rq *cfs_rq = task_cfs_rq(p);
update_curr(cfs_rq);
- place_entity(cfs_rq, &p->se, 1);
+ if (!on_rq)
+ place_entity(cfs_rq, &p->se, 1);
}
#endif
@@ -2073,6 +2112,8 @@ static const struct sched_class fair_sched_class = {
.move_one_task = move_one_task_fair,
.rq_online = rq_online_fair,
.rq_offline = rq_offline_fair,
+
+ .task_waking = task_waking_fair,
#endif
.set_curr_task = set_curr_task_fair,
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d2ea2828164..f48328ac216 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1472,7 +1472,7 @@ static void post_schedule_rt(struct rq *rq)
* If we are not running and we are not going to reschedule soon, we should
* try to push tasks away now
*/
-static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
+static void task_woken_rt(struct rq *rq, struct task_struct *p)
{
if (!task_running(rq, p) &&
!test_tsk_need_resched(rq->curr) &&
@@ -1753,7 +1753,7 @@ static const struct sched_class rt_sched_class = {
.rq_offline = rq_offline_rt,
.pre_schedule = pre_schedule_rt,
.post_schedule = post_schedule_rt,
- .task_wake_up = task_wake_up_rt,
+ .task_woken = task_woken_rt,
.switched_from = switched_from_rt,
#endif
diff --git a/kernel/signal.c b/kernel/signal.c
index 1814e68e4de..d09692b4037 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -218,13 +218,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
struct user_struct *user;
/*
- * We won't get problems with the target's UID changing under us
- * because changing it requires RCU be used, and if t != current, the
- * caller must be holding the RCU readlock (by way of a spinlock) and
- * we use RCU protection here
+ * Protect access to @t credentials. This can go away when all
+ * callers hold rcu read lock.
*/
+ rcu_read_lock();
user = get_uid(__task_cred(t)->user);
atomic_inc(&user->sigpending);
+ rcu_read_unlock();
if (override_rlimit ||
atomic_read(&user->sigpending) <=
@@ -1179,11 +1179,12 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
int ret = -EINVAL;
struct task_struct *p;
const struct cred *pcred;
+ unsigned long flags;
if (!valid_signal(sig))
return ret;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
p = pid_task(pid, PIDTYPE_PID);
if (!p) {
ret = -ESRCH;
@@ -1199,14 +1200,16 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
ret = security_task_kill(p, info, sig, secid);
if (ret)
goto out_unlock;
- if (sig && p->sighand) {
- unsigned long flags;
- spin_lock_irqsave(&p->sighand->siglock, flags);
- ret = __send_signal(sig, info, p, 1, 0);
- spin_unlock_irqrestore(&p->sighand->siglock, flags);
+
+ if (sig) {
+ if (lock_task_sighand(p, &flags)) {
+ ret = __send_signal(sig, info, p, 1, 0);
+ unlock_task_sighand(p, &flags);
+ } else
+ ret = -ESRCH;
}
out_unlock:
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
diff --git a/kernel/sys.c b/kernel/sys.c
index 20ccfb5da6a..26a6b73a6b8 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -162,6 +162,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
if (niceval > 19)
niceval = 19;
+ rcu_read_lock();
read_lock(&tasklist_lock);
switch (which) {
case PRIO_PROCESS:
@@ -199,6 +200,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
}
out_unlock:
read_unlock(&tasklist_lock);
+ rcu_read_unlock();
out:
return error;
}
diff --git a/kernel/time.c b/kernel/time.c
index c6324d96009..804798005d1 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -136,6 +136,7 @@ static inline void warp_clock(void)
write_seqlock_irq(&xtime_lock);
wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
xtime.tv_sec += sys_tz.tz_minuteswest * 60;
+ update_xtime_cache(0);
write_sequnlock_irq(&xtime_lock);
clock_was_set();
}
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 3d5fc0fd1cc..6f740d9f094 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -238,8 +238,9 @@ void clockevents_exchange_device(struct clock_event_device *old,
*/
void clockevents_notify(unsigned long reason, void *arg)
{
- struct list_head *node, *tmp;
+ struct clock_event_device *dev, *tmp;
unsigned long flags;
+ int cpu;
raw_spin_lock_irqsave(&clockevents_lock, flags);
clockevents_do_notify(reason, arg);
@@ -250,8 +251,19 @@ void clockevents_notify(unsigned long reason, void *arg)
* Unregister the clock event devices which were
* released from the users in the notify chain.
*/
- list_for_each_safe(node, tmp, &clockevents_released)
- list_del(node);
+ list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
+ list_del(&dev->list);
+ /*
+ * Now check whether the CPU has left unused per cpu devices
+ */
+ cpu = *((int *)arg);
+ list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
+ if (cpumask_test_cpu(cpu, dev->cpumask) &&
+ cpumask_weight(dev->cpumask) == 1) {
+ BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+ list_del(&dev->list);
+ }
+ }
break;
default:
break;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index af4135f0582..7faaa32fbf4 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -165,6 +165,13 @@ struct timespec raw_time;
/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;
+static struct timespec xtime_cache __attribute__ ((aligned (16)));
+void update_xtime_cache(u64 nsec)
+{
+ xtime_cache = xtime;
+ timespec_add_ns(&xtime_cache, nsec);
+}
+
/* must hold xtime_lock */
void timekeeping_leap_insert(int leapsecond)
{
@@ -325,6 +332,8 @@ int do_settimeofday(struct timespec *tv)
xtime = *tv;
+ update_xtime_cache(0);
+
timekeeper.ntp_error = 0;
ntp_clear();
@@ -550,6 +559,7 @@ void __init timekeeping_init(void)
}
set_normalized_timespec(&wall_to_monotonic,
-boot.tv_sec, -boot.tv_nsec);
+ update_xtime_cache(0);
total_sleep_time.tv_sec = 0;
total_sleep_time.tv_nsec = 0;
write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -583,6 +593,7 @@ static int timekeeping_resume(struct sys_device *dev)
wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
total_sleep_time = timespec_add_safe(total_sleep_time, ts);
}
+ update_xtime_cache(0);
/* re-base the last cycle value */
timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
timekeeper.ntp_error = 0;
@@ -722,6 +733,7 @@ static void timekeeping_adjust(s64 offset)
timekeeper.ntp_error_shift;
}
+
/**
* logarithmic_accumulation - shifted accumulation of cycles
*
@@ -765,6 +777,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
return offset;
}
+
/**
* update_wall_time - Uses the current clocksource to increment the wall time
*
@@ -774,6 +787,7 @@ void update_wall_time(void)
{
struct clocksource *clock;
cycle_t offset;
+ u64 nsecs;
int shift = 0, maxshift;
/* Make sure we're fully resumed: */
@@ -839,6 +853,9 @@ void update_wall_time(void)
timekeeper.ntp_error += timekeeper.xtime_nsec <<
timekeeper.ntp_error_shift;
+ nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
+ update_xtime_cache(nsecs);
+
/* check to see if there is a new clocksource to use */
update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
}
@@ -875,13 +892,13 @@ void monotonic_to_bootbased(struct timespec *ts)
unsigned long get_seconds(void)
{
- return xtime.tv_sec;
+ return xtime_cache.tv_sec;
}
EXPORT_SYMBOL(get_seconds);
struct timespec __current_kernel_time(void)
{
- return xtime;
+ return xtime_cache;
}
struct timespec current_kernel_time(void)
@@ -891,7 +908,8 @@ struct timespec current_kernel_time(void)
do {
seq = read_seqbegin(&xtime_lock);
- now = xtime;
+
+ now = xtime_cache;
} while (read_seqretry(&xtime_lock, seq));
return now;
@@ -905,7 +923,8 @@ struct timespec get_monotonic_coarse(void)
do {
seq = read_seqbegin(&xtime_lock);
- now = xtime;
+
+ now = xtime_cache;
mono = wall_to_monotonic;
} while (read_seqretry(&xtime_lock, seq));
diff --git a/kernel/timer.c b/kernel/timer.c
index 5db5a8d2681..15533b79239 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -656,8 +656,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
debug_activate(timer, expires);
- new_base = __get_cpu_var(tvec_bases);
-
cpu = smp_processor_id();
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7ecab06547a..375f81a568d 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -282,6 +282,18 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
struct pt_regs *regs);
+/* Check the name is good for event/group */
+static int check_event_name(const char *name)
+{
+ if (!isalpha(*name) && *name != '_')
+ return 0;
+ while (*++name != '\0') {
+ if (!isalpha(*name) && !isdigit(*name) && *name != '_')
+ return 0;
+ }
+ return 1;
+}
+
/*
* Allocate new trace_probe and initialize it (including kprobes).
*/
@@ -293,10 +305,11 @@ static struct trace_probe *alloc_trace_probe(const char *group,
int nargs, int is_return)
{
struct trace_probe *tp;
+ int ret = -ENOMEM;
tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
if (!tp)
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(ret);
if (symbol) {
tp->symbol = kstrdup(symbol, GFP_KERNEL);
@@ -312,14 +325,20 @@ static struct trace_probe *alloc_trace_probe(const char *group,
else
tp->rp.kp.pre_handler = kprobe_dispatcher;
- if (!event)
+ if (!event || !check_event_name(event)) {
+ ret = -EINVAL;
goto error;
+ }
+
tp->call.name = kstrdup(event, GFP_KERNEL);
if (!tp->call.name)
goto error;
- if (!group)
+ if (!group || !check_event_name(group)) {
+ ret = -EINVAL;
goto error;
+ }
+
tp->call.system = kstrdup(group, GFP_KERNEL);
if (!tp->call.system)
goto error;
@@ -330,7 +349,7 @@ error:
kfree(tp->call.name);
kfree(tp->symbol);
kfree(tp);
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(ret);
}
static void free_probe_arg(struct probe_arg *arg)
@@ -695,10 +714,10 @@ static int create_trace_probe(int argc, char **argv)
if (!event) {
/* Make a new event name */
if (symbol)
- snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
+ snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
is_return ? 'r' : 'p', symbol, offset);
else
- snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
+ snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
is_return ? 'r' : 'p', addr);
event = buf;
}
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index f6693969287..a7974a552ca 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -93,6 +93,7 @@ static const struct stacktrace_ops backtrace_ops = {
.warning_symbol = backtrace_warning_symbol,
.stack = backtrace_stack,
.address = backtrace_address,
+ .walk_stack = print_context_stack,
};
static int