aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/capability.c21
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/kexec.c66
-rw-r--r--kernel/lockdep.c14
-rw-r--r--kernel/lockdep_internals.h13
-rw-r--r--kernel/lockdep_proc.c15
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/nsproxy.c1
-rw-r--r--kernel/power/disk.c13
-rw-r--r--kernel/power/main.c5
-rw-r--r--kernel/power/swap.c1
-rw-r--r--kernel/ptrace.c5
-rw-r--r--kernel/rcupdate.c1
-rw-r--r--kernel/resource.c88
-rw-r--r--kernel/sched.c54
-rw-r--r--kernel/sched_clock.c84
-rw-r--r--kernel/sched_features.h2
-rw-r--r--kernel/sched_rt.c15
-rw-r--r--kernel/signal.c5
-rw-r--r--kernel/smp.c10
-rw-r--r--kernel/softlockup.c3
-rw-r--r--kernel/spinlock.c3
-rw-r--r--kernel/sys.c10
-rw-r--r--kernel/time/tick-sched.c6
-rw-r--r--kernel/user_namespace.c1
-rw-r--r--kernel/utsname.c1
-rw-r--r--kernel/utsname_sysctl.c1
27 files changed, 263 insertions, 181 deletions
diff --git a/kernel/capability.c b/kernel/capability.c
index 0101e847603..33e51e78c2d 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -486,17 +486,22 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
return ret;
}
-int __capable(struct task_struct *t, int cap)
+/**
+ * capable - Determine if the current task has a superior capability in effect
+ * @cap: The capability to be tested for
+ *
+ * Return true if the current task has the given superior capability currently
+ * available for use, false if not.
+ *
+ * This sets PF_SUPERPRIV on the task if the capability is available on the
+ * assumption that it's about to be used.
+ */
+int capable(int cap)
{
- if (security_capable(t, cap) == 0) {
- t->flags |= PF_SUPERPRIV;
+ if (has_capability(current, cap)) {
+ current->flags |= PF_SUPERPRIV;
return 1;
}
return 0;
}
-
-int capable(int cap)
-{
- return __capable(current, cap);
-}
EXPORT_SYMBOL(capable);
diff --git a/kernel/exit.c b/kernel/exit.c
index 38ec4063014..75c64738763 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -918,8 +918,8 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
/* mt-exec, de_thread() is waiting for us */
if (thread_group_leader(tsk) &&
- tsk->signal->notify_count < 0 &&
- tsk->signal->group_exit_task)
+ tsk->signal->group_exit_task &&
+ tsk->signal->notify_count < 0)
wake_up_process(tsk->signal->group_exit_task);
write_unlock_irq(&tasklist_lock);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index c8a4370e2a3..59f3f0df35d 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -12,7 +12,7 @@
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/kexec.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/highmem.h>
#include <linux/syscalls.h>
@@ -77,7 +77,7 @@ int kexec_should_crash(struct task_struct *p)
*
* The code for the transition from the current kernel to the
* the new kernel is placed in the control_code_buffer, whose size
- * is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single
+ * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single
* page of memory is necessary, but some architectures require more.
* Because this memory must be identity mapped in the transition from
* virtual to physical addresses it must live in the range
@@ -242,7 +242,7 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
*/
result = -ENOMEM;
image->control_code_page = kimage_alloc_control_pages(image,
- get_order(KEXEC_CONTROL_CODE_SIZE));
+ get_order(KEXEC_CONTROL_PAGE_SIZE));
if (!image->control_code_page) {
printk(KERN_ERR "Could not allocate control_code_buffer\n");
goto out;
@@ -317,7 +317,7 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
*/
result = -ENOMEM;
image->control_code_page = kimage_alloc_control_pages(image,
- get_order(KEXEC_CONTROL_CODE_SIZE));
+ get_order(KEXEC_CONTROL_PAGE_SIZE));
if (!image->control_code_page) {
printk(KERN_ERR "Could not allocate control_code_buffer\n");
goto out;
@@ -924,19 +924,14 @@ static int kimage_load_segment(struct kimage *image,
*/
struct kimage *kexec_image;
struct kimage *kexec_crash_image;
-/*
- * A home grown binary mutex.
- * Nothing can wait so this mutex is safe to use
- * in interrupt context :)
- */
-static int kexec_lock;
+
+static DEFINE_MUTEX(kexec_mutex);
asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
struct kexec_segment __user *segments,
unsigned long flags)
{
struct kimage **dest_image, *image;
- int locked;
int result;
/* We only trust the superuser with rebooting the system. */
@@ -972,8 +967,7 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
*
* KISS: always take the mutex.
*/
- locked = xchg(&kexec_lock, 1);
- if (locked)
+ if (!mutex_trylock(&kexec_mutex))
return -EBUSY;
dest_image = &kexec_image;
@@ -1015,8 +1009,7 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
image = xchg(dest_image, image);
out:
- locked = xchg(&kexec_lock, 0); /* Release the mutex */
- BUG_ON(!locked);
+ mutex_unlock(&kexec_mutex);
kimage_free(image);
return result;
@@ -1063,10 +1056,7 @@ asmlinkage long compat_sys_kexec_load(unsigned long entry,
void crash_kexec(struct pt_regs *regs)
{
- int locked;
-
-
- /* Take the kexec_lock here to prevent sys_kexec_load
+ /* Take the kexec_mutex here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
* we are using after a panic on a different cpu.
*
@@ -1074,8 +1064,7 @@ void crash_kexec(struct pt_regs *regs)
* of memory the xchg(&kexec_crash_image) would be
* sufficient. But since I reuse the memory...
*/
- locked = xchg(&kexec_lock, 1);
- if (!locked) {
+ if (mutex_trylock(&kexec_mutex)) {
if (kexec_crash_image) {
struct pt_regs fixed_regs;
crash_setup_regs(&fixed_regs, regs);
@@ -1083,8 +1072,7 @@ void crash_kexec(struct pt_regs *regs)
machine_crash_shutdown(&fixed_regs);
machine_kexec(kexec_crash_image);
}
- locked = xchg(&kexec_lock, 0);
- BUG_ON(!locked);
+ mutex_unlock(&kexec_mutex);
}
}
@@ -1426,25 +1414,23 @@ static int __init crash_save_vmcoreinfo_init(void)
module_init(crash_save_vmcoreinfo_init)
-/**
- * kernel_kexec - reboot the system
- *
- * Move into place and start executing a preloaded standalone
- * executable. If nothing was preloaded return an error.
+/*
+ * Move into place and start executing a preloaded standalone
+ * executable. If nothing was preloaded return an error.
*/
int kernel_kexec(void)
{
int error = 0;
- if (xchg(&kexec_lock, 1))
+ if (!mutex_trylock(&kexec_mutex))
return -EBUSY;
if (!kexec_image) {
error = -EINVAL;
goto Unlock;
}
- if (kexec_image->preserve_context) {
#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context) {
mutex_lock(&pm_mutex);
pm_prepare_console();
error = freeze_processes();
@@ -1459,6 +1445,7 @@ int kernel_kexec(void)
error = disable_nonboot_cpus();
if (error)
goto Resume_devices;
+ device_pm_lock();
local_irq_disable();
/* At this point, device_suspend() has been called,
* but *not* device_power_down(). We *must*
@@ -1470,26 +1457,22 @@ int kernel_kexec(void)
error = device_power_down(PMSG_FREEZE);
if (error)
goto Enable_irqs;
- save_processor_state();
+ } else
#endif
- } else {
- blocking_notifier_call_chain(&reboot_notifier_list,
- SYS_RESTART, NULL);
- system_state = SYSTEM_RESTART;
- device_shutdown();
- sysdev_shutdown();
+ {
+ kernel_restart_prepare(NULL);
printk(KERN_EMERG "Starting new kernel\n");
machine_shutdown();
}
machine_kexec(kexec_image);
- if (kexec_image->preserve_context) {
#ifdef CONFIG_KEXEC_JUMP
- restore_processor_state();
+ if (kexec_image->preserve_context) {
device_power_up(PMSG_RESTORE);
Enable_irqs:
local_irq_enable();
+ device_pm_unlock();
enable_nonboot_cpus();
Resume_devices:
device_resume(PMSG_RESTORE);
@@ -1499,11 +1482,10 @@ int kernel_kexec(void)
Restore_console:
pm_restore_console();
mutex_unlock(&pm_mutex);
-#endif
}
+#endif
Unlock:
- xchg(&kexec_lock, 0);
-
+ mutex_unlock(&kexec_mutex);
return error;
}
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 1aa91fd6b06..dbda475b13b 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -875,11 +875,11 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
if (!entry)
return 0;
- entry->class = this;
- entry->distance = distance;
if (!save_trace(&entry->trace))
return 0;
+ entry->class = this;
+ entry->distance = distance;
/*
* Since we never remove from the dependency list, the list can
* be walked lockless by other CPUs, it's only allocation
@@ -1759,11 +1759,10 @@ static void check_chain_key(struct task_struct *curr)
hlock = curr->held_locks + i;
if (chain_key != hlock->prev_chain_key) {
debug_locks_off();
- printk("hm#1, depth: %u [%u], %016Lx != %016Lx\n",
+ WARN(1, "hm#1, depth: %u [%u], %016Lx != %016Lx\n",
curr->lockdep_depth, i,
(unsigned long long)chain_key,
(unsigned long long)hlock->prev_chain_key);
- WARN_ON(1);
return;
}
id = hlock->class_idx - 1;
@@ -1778,11 +1777,10 @@ static void check_chain_key(struct task_struct *curr)
}
if (chain_key != curr->curr_chain_key) {
debug_locks_off();
- printk("hm#2, depth: %u [%u], %016Lx != %016Lx\n",
+ WARN(1, "hm#2, depth: %u [%u], %016Lx != %016Lx\n",
curr->lockdep_depth, i,
(unsigned long long)chain_key,
(unsigned long long)curr->curr_chain_key);
- WARN_ON(1);
}
#endif
}
@@ -2584,7 +2582,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
hlock->trylock = trylock;
hlock->read = read;
hlock->check = check;
- hlock->hardirqs_off = hardirqs_off;
+ hlock->hardirqs_off = !!hardirqs_off;
#ifdef CONFIG_LOCK_STAT
hlock->waittime_stamp = 0;
hlock->holdtime_stamp = sched_clock();
@@ -3031,7 +3029,7 @@ found_it:
stats = get_lock_stats(hlock_class(hlock));
if (point < ARRAY_SIZE(stats->contention_point))
- stats->contention_point[i]++;
+ stats->contention_point[point]++;
if (lock->cpu != smp_processor_id())
stats->bounces[bounce_contended + !!hlock->read]++;
put_lock_stats(stats);
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index 55db193d366..56b196932c0 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -50,8 +50,21 @@ extern unsigned int nr_process_chains;
extern unsigned int max_lockdep_depth;
extern unsigned int max_recursion_depth;
+#ifdef CONFIG_PROVE_LOCKING
extern unsigned long lockdep_count_forward_deps(struct lock_class *);
extern unsigned long lockdep_count_backward_deps(struct lock_class *);
+#else
+static inline unsigned long
+lockdep_count_forward_deps(struct lock_class *class)
+{
+ return 0;
+}
+static inline unsigned long
+lockdep_count_backward_deps(struct lock_class *class)
+{
+ return 0;
+}
+#endif
#ifdef CONFIG_DEBUG_LOCKDEP
/*
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index fa19aee604c..20dbcbf9c7d 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -82,7 +82,6 @@ static void print_name(struct seq_file *m, struct lock_class *class)
static int l_show(struct seq_file *m, void *v)
{
- unsigned long nr_forward_deps, nr_backward_deps;
struct lock_class *class = v;
struct lock_list *entry;
char c1, c2, c3, c4;
@@ -96,11 +95,10 @@ static int l_show(struct seq_file *m, void *v)
#ifdef CONFIG_DEBUG_LOCKDEP
seq_printf(m, " OPS:%8ld", class->ops);
#endif
- nr_forward_deps = lockdep_count_forward_deps(class);
- seq_printf(m, " FD:%5ld", nr_forward_deps);
-
- nr_backward_deps = lockdep_count_backward_deps(class);
- seq_printf(m, " BD:%5ld", nr_backward_deps);
+#ifdef CONFIG_PROVE_LOCKING
+ seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class));
+ seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class));
+#endif
get_usage_chars(class, &c1, &c2, &c3, &c4);
seq_printf(m, " %c%c%c%c", c1, c2, c3, c4);
@@ -325,7 +323,9 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
nr_hardirq_read_unsafe++;
+#ifdef CONFIG_PROVE_LOCKING
sum_forward_deps += lockdep_count_forward_deps(class);
+#endif
}
#ifdef CONFIG_DEBUG_LOCKDEP
DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
@@ -472,8 +472,9 @@ static void snprint_time(char *buf, size_t bufsiz, s64 nr)
{
unsigned long rem;
+ nr += 5; /* for display rounding */
rem = do_div(nr, 1000); /* XXX: do_div_signed */
- snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, ((int)rem+5)/10);
+ snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, (int)rem/10);
}
static void seq_time(struct seq_file *m, s64 time)
diff --git a/kernel/module.c b/kernel/module.c
index 08864d257eb..9db11911e04 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1799,7 +1799,7 @@ static void *module_alloc_update_bounds(unsigned long size)
/* Allocate and load the module: note that size of section 0 is always
zero, and we rely on this for optional sections. */
-static struct module *load_module(void __user *umod,
+static noinline struct module *load_module(void __user *umod,
unsigned long len,
const char __user *uargs)
{
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 21575fc46d0..1d3ef29a258 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -14,7 +14,6 @@
*/
#include <linux/module.h>
-#include <linux/version.h>
#include <linux/nsproxy.h>
#include <linux/init_task.h>
#include <linux/mnt_namespace.h>
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index f011e0870b5..bbd85c60f74 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -21,6 +21,7 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
+#include <linux/ftrace.h>
#include "power.h"
@@ -255,7 +256,7 @@ static int create_image(int platform_mode)
int hibernation_snapshot(int platform_mode)
{
- int error;
+ int error, ftrace_save;
/* Free memory before shutting down devices. */
error = swsusp_shrink_memory();
@@ -267,6 +268,7 @@ int hibernation_snapshot(int platform_mode)
goto Close;
suspend_console();
+ ftrace_save = __ftrace_enabled_save();
error = device_suspend(PMSG_FREEZE);
if (error)
goto Recover_platform;
@@ -296,6 +298,7 @@ int hibernation_snapshot(int platform_mode)
Resume_devices:
device_resume(in_suspend ?
(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+ __ftrace_enabled_restore(ftrace_save);
resume_console();
Close:
platform_end(platform_mode);
@@ -366,10 +369,11 @@ static int resume_target_kernel(void)
int hibernation_restore(int platform_mode)
{
- int error;
+ int error, ftrace_save;
pm_prepare_console();
suspend_console();
+ ftrace_save = __ftrace_enabled_save();
error = device_suspend(PMSG_QUIESCE);
if (error)
goto Finish;
@@ -384,6 +388,7 @@ int hibernation_restore(int platform_mode)
platform_restore_cleanup(platform_mode);
device_resume(PMSG_RECOVER);
Finish:
+ __ftrace_enabled_restore(ftrace_save);
resume_console();
pm_restore_console();
return error;
@@ -396,7 +401,7 @@ int hibernation_restore(int platform_mode)
int hibernation_platform_enter(void)
{
- int error;
+ int error, ftrace_save;
if (!hibernation_ops)
return -ENOSYS;
@@ -411,6 +416,7 @@ int hibernation_platform_enter(void)
goto Close;
suspend_console();
+ ftrace_save = __ftrace_enabled_save();
error = device_suspend(PMSG_HIBERNATE);
if (error) {
if (hibernation_ops->recover)
@@ -445,6 +451,7 @@ int hibernation_platform_enter(void)
hibernation_ops->finish();
Resume_devices:
device_resume(PMSG_RESTORE);
+ __ftrace_enabled_restore(ftrace_save);
resume_console();
Close:
hibernation_ops->end();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 0b7476f5d2a..540b16b6856 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -21,6 +21,7 @@
#include <linux/freezer.h>
#include <linux/vmstat.h>
#include <linux/syscalls.h>
+#include <linux/ftrace.h>
#include "power.h"
@@ -310,7 +311,7 @@ static int suspend_enter(suspend_state_t state)
*/
int suspend_devices_and_enter(suspend_state_t state)
{
- int error;
+ int error, ftrace_save;
if (!suspend_ops)
return -ENOSYS;
@@ -321,6 +322,7 @@ int suspend_devices_and_enter(suspend_state_t state)
goto Close;
}
suspend_console();
+ ftrace_save = __ftrace_enabled_save();
suspend_test_start();
error = device_suspend(PMSG_SUSPEND);
if (error) {
@@ -352,6 +354,7 @@ int suspend_devices_and_enter(suspend_state_t state)
suspend_test_start();
device_resume(PMSG_RESUME);
suspend_test_finish("resume devices");
+ __ftrace_enabled_restore(ftrace_save);
resume_console();
Close:
if (suspend_ops->end)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index a0abf9a463f..80ccac849e4 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -14,7 +14,6 @@
#include <linux/module.h>
#include <linux/file.h>
#include <linux/utsname.h>
-#include <linux/version.h>
#include <linux/delay.h>
#include <linux/bitops.h>
#include <linux/genhd.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 082b3fcb32a..356699a96d5 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -140,7 +140,7 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
if (!dumpable && !capable(CAP_SYS_PTRACE))
return -EPERM;
- return security_ptrace(current, task, mode);
+ return security_ptrace_may_access(task, mode);
}
bool ptrace_may_access(struct task_struct *task, unsigned int mode)
@@ -499,8 +499,7 @@ repeat:
goto repeat;
}
- ret = security_ptrace(current->parent, current,
- PTRACE_MODE_ATTACH);
+ ret = security_ptrace_traceme(current->parent);
/*
* Set the ptrace bit in the process ptrace flags.
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f14f372cf6f..467d5940f62 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -77,6 +77,7 @@ void wakeme_after_rcu(struct rcu_head *head)
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
* and may be nested.
*/
+void synchronize_rcu(void); /* Makes kernel-doc tools happy */
synchronize_rcu_xxx(synchronize_rcu, call_rcu)
EXPORT_SYMBOL_GPL(synchronize_rcu);
diff --git a/kernel/resource.c b/kernel/resource.c
index f5b518eabef..03d796c1b2e 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -362,35 +362,21 @@ int allocate_resource(struct resource *root, struct resource *new,
EXPORT_SYMBOL(allocate_resource);
-/**
- * insert_resource - Inserts a resource in the resource tree
- * @parent: parent of the new resource
- * @new: new resource to insert
- *
- * Returns 0 on success, -EBUSY if the resource can't be inserted.
- *
- * This function is equivalent to request_resource when no conflict
- * happens. If a conflict happens, and the conflicting resources
- * entirely fit within the range of the new resource, then the new
- * resource is inserted and the conflicting resources become children of
- * the new resource.
+/*
+ * Insert a resource into the resource tree. If successful, return NULL,
+ * otherwise return the conflicting resource (compare to __request_resource())
*/
-int insert_resource(struct resource *parent, struct resource *new)
+static struct resource * __insert_resource(struct resource *parent, struct resource *new)
{
- int result;
struct resource *first, *next;
- write_lock(&resource_lock);
-
for (;; parent = first) {
- result = 0;
first = __request_resource(parent, new);
if (!first)
- goto out;
+ return first;
- result = -EBUSY;
if (first == parent)
- goto out;
+ return first;
if ((first->start > new->start) || (first->end < new->end))
break;
@@ -401,15 +387,13 @@ int insert_resource(struct resource *parent, struct resource *new)
for (next = first; ; next = next->sibling) {
/* Partial overlap? Bad, and unfixable */
if (next->start < new->start || next->end > new->end)
- goto out;
+ return next;
if (!next->sibling)
break;
if (next->sibling->start > new->end)
break;
}
- result = 0;
-
new->parent = parent;
new->sibling = next->sibling;
new->child = first;
@@ -426,10 +410,64 @@ int insert_resource(struct resource *parent, struct resource *new)
next = next->sibling;
next->sibling = new;
}
+ return NULL;
+}
- out:
+/**
+ * insert_resource - Inserts a resource in the resource tree
+ * @parent: parent of the new resource
+ * @new: new resource to insert
+ *
+ * Returns 0 on success, -EBUSY if the resource can't be inserted.
+ *
+ * This function is equivalent to request_resource when no conflict
+ * happens. If a conflict happens, and the conflicting resources
+ * entirely fit within the range of the new resource, then the new
+ * resource is inserted and the conflicting resources become children of
+ * the new resource.
+ */
+int insert_resource(struct resource *parent, struct resource *new)
+{
+ struct resource *conflict;
+
+ write_lock(&resource_lock);
+ conflict = __insert_resource(parent, new);
+ write_unlock(&resource_lock);
+ return conflict ? -EBUSY : 0;
+}
+
+/**
+ * insert_resource_expand_to_fit - Insert a resource into the resource tree
+ * @root: root resource descriptor
+ * @new: new resource to insert
+ *
+ * Insert a resource into the resource tree, possibly expanding it in order
+ * to make it encompass any conflicting resources.
+ */
+void insert_resource_expand_to_fit(struct resource *root, struct resource *new)
+{
+ if (new->parent)
+ return;
+
+ write_lock(&resource_lock);
+ for (;;) {
+ struct resource *conflict;
+
+ conflict = __insert_resource(root, new);
+ if (!conflict)
+ break;
+ if (conflict == root)
+ break;
+
+ /* Ok, expand resource to cover the conflict, then try again .. */
+ if (conflict->start < new->start)
+ new->start = conflict->start;
+ if (conflict->end > new->end)
+ new->end = conflict->end;
+
+ printk("Expanded resource %s due to conflict with %s\n", new->name, conflict->name);
+ }
write_unlock(&resource_lock);
- return result;
}
/**
diff --git a/kernel/sched.c b/kernel/sched.c
index d601fb0406c..9a1ddb84e26 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -808,9 +808,9 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
/*
* ratelimit for updating the group shares.
- * default: 0.5ms
+ * default: 0.25ms
*/
-const_debug unsigned int sysctl_sched_shares_ratelimit = 500000;
+unsigned int sysctl_sched_shares_ratelimit = 250000;
/*
* period over which we measure -rt task cpu usage in us.
@@ -4669,6 +4669,52 @@ int __sched wait_for_completion_killable(struct completion *x)
}
EXPORT_SYMBOL(wait_for_completion_killable);
+/**
+ * try_wait_for_completion - try to decrement a completion without blocking
+ * @x: completion structure
+ *
+ * Returns: 0 if a decrement cannot be done without blocking
+ * 1 if a decrement succeeded.
+ *
+ * If a completion is being used as a counting completion,
+ * attempt to decrement the counter without blocking. This
+ * enables us to avoid waiting if the resource the completion
+ * is protecting is not available.
+ */
+bool try_wait_for_completion(struct completion *x)
+{
+ int ret = 1;
+
+ spin_lock_irq(&x->wait.lock);
+ if (!x->done)
+ ret = 0;
+ else
+ x->done--;
+ spin_unlock_irq(&x->wait.lock);
+ return ret;
+}
+EXPORT_SYMBOL(try_wait_for_completion);
+
+/**
+ * completion_done - Test to see if a completion has any waiters
+ * @x: completion structure
+ *
+ * Returns: 0 if there are waiters (wait_for_completion() in progress)
+ * 1 if there are no waiters.
+ *
+ */
+bool completion_done(struct completion *x)
+{
+ int ret = 1;
+
+ spin_lock_irq(&x->wait.lock);
+ if (!x->done)
+ ret = 0;
+ spin_unlock_irq(&x->wait.lock);
+ return ret;
+}
+EXPORT_SYMBOL(completion_done);
+
static long __sched
sleep_on_common(wait_queue_head_t *q, int state, long timeout)
{
@@ -5740,6 +5786,8 @@ static inline void sched_init_granularity(void)
sysctl_sched_latency = limit;
sysctl_sched_wakeup_granularity *= factor;
+
+ sysctl_sched_shares_ratelimit *= factor;
}
#ifdef CONFIG_SMP
@@ -8462,8 +8510,8 @@ struct task_group *sched_create_group(struct task_group *parent)
WARN_ON(!parent); /* root should already exist */
tg->parent = parent;
- list_add_rcu(&tg->siblings, &parent->children);
INIT_LIST_HEAD(&tg->children);
+ list_add_rcu(&tg->siblings, &parent->children);
spin_unlock_irqrestore(&task_group_lock, flags);
return tg;
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 204991a0bfa..e8ab096ddfe 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -12,19 +12,17 @@
*
* Create a semi stable clock from a mixture of other events, including:
* - gtod
- * - jiffies
* - sched_clock()
* - explicit idle events
*
* We use gtod as base and the unstable clock deltas. The deltas are filtered,
- * making it monotonic and keeping it within an expected window. This window
- * is set up using jiffies.
+ * making it monotonic and keeping it within an expected window.
*
* Furthermore, explicit sleep and wakeup hooks allow us to account for time
* that is otherwise invisible (TSC gets stopped).
*
* The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
- * consistent between cpus (never more than 1 jiffies difference).
+ * consistent between cpus (never more than 2 jiffies difference).
*/
#include <linux/sched.h>
#include <linux/percpu.h>
@@ -54,7 +52,6 @@ struct sched_clock_data {
*/
raw_spinlock_t lock;
- unsigned long tick_jiffies;
u64 tick_raw;
u64 tick_gtod;
u64 clock;
@@ -75,14 +72,12 @@ static inline struct sched_clock_data *cpu_sdc(int cpu)
void sched_clock_init(void)
{
u64 ktime_now = ktime_to_ns(ktime_get());
- unsigned long now_jiffies = jiffies;
int cpu;
for_each_possible_cpu(cpu) {
struct sched_clock_data *scd = cpu_sdc(cpu);
scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
- scd->tick_jiffies = now_jiffies;
scd->tick_raw = 0;
scd->tick_gtod = ktime_now;
scd->clock = ktime_now;
@@ -92,46 +87,51 @@ void sched_clock_init(void)
}
/*
+ * min,max except they take wrapping into account
+ */
+
+static inline u64 wrap_min(u64 x, u64 y)
+{
+ return (s64)(x - y) < 0 ? x : y;
+}
+
+static inline u64 wrap_max(u64 x, u64 y)
+{
+ return (s64)(x - y) > 0 ? x : y;
+}
+
+/*
* update the percpu scd from the raw @now value
*
* - filter out backward motion
- * - use jiffies to generate a min,max window to clip the raw values
+ * - use the GTOD tick value to create a window to filter crazy TSC values
*/
static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
{
- unsigned long now_jiffies = jiffies;
- long delta_jiffies = now_jiffies - scd->tick_jiffies;
- u64 clock = scd->clock;
- u64 min_clock, max_clock;
s64 delta = now - scd->tick_raw;
+ u64 clock, min_clock, max_clock;
WARN_ON_ONCE(!irqs_disabled());
- min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
- if (unlikely(delta < 0)) {
- clock++;
- goto out;
- }
+ if (unlikely(delta < 0))
+ delta = 0;
- max_clock = min_clock + TICK_NSEC;
+ /*
+ * scd->clock = clamp(scd->tick_gtod + delta,
+ * max(scd->tick_gtod, scd->clock),
+ * scd->tick_gtod + TICK_NSEC);
+ */
- if (unlikely(clock + delta > max_clock)) {
- if (clock < max_clock)
- clock = max_clock;
- else
- clock++;
- } else {
- clock += delta;
- }
+ clock = scd->tick_gtod + delta;
+ min_clock = wrap_max(scd->tick_gtod, scd->clock);
+ max_clock = scd->tick_gtod + TICK_NSEC;
- out:
- if (unlikely(clock < min_clock))
- clock = min_clock;
+ clock = wrap_max(clock, min_clock);
+ clock = wrap_min(clock, max_clock);
- scd->tick_jiffies = now_jiffies;
scd->clock = clock;
- return clock;
+ return scd->clock;
}
static void lock_double_clock(struct sched_clock_data *data1,
@@ -171,7 +171,7 @@ u64 sched_clock_cpu(int cpu)
* larger time as the latest time for both
* runqueues. (this creates monotonic movement)
*/
- if (likely(remote_clock < this_clock)) {
+ if (likely((s64)(remote_clock - this_clock) < 0)) {
clock = this_clock;
scd->clock = clock;
} else {
@@ -207,14 +207,9 @@ void sched_clock_tick(void)
now = sched_clock();
__raw_spin_lock(&scd->lock);
- __update_sched_clock(scd, now);
- /*
- * update tick_gtod after __update_sched_clock() because that will
- * already observe 1 new jiffy; adding a new tick_gtod to that would
- * increase the clock 2 jiffies.
- */
scd->tick_raw = now;
scd->tick_gtod = now_gtod;
+ __update_sched_clock(scd, now);
__raw_spin_unlock(&scd->lock);
}
@@ -232,18 +227,7 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
*/
void sched_clock_idle_wakeup_event(u64 delta_ns)
{
- struct sched_clock_data *scd = this_scd();
-
- /*
- * Override the previous timestamp and ignore all
- * sched_clock() deltas that occured while we idled,
- * and use the PM-provided delta_ns to advance the
- * rq clock:
- */
- __raw_spin_lock(&scd->lock);
- scd->clock += delta_ns;
- __raw_spin_unlock(&scd->lock);
-
+ sched_clock_tick();
touch_softlockup_watchdog();
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 862b06bd560..9353ca78154 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -8,6 +8,6 @@ SCHED_FEAT(SYNC_WAKEUPS, 1)
SCHED_FEAT(HRTICK, 1)
SCHED_FEAT(DOUBLE_TICK, 0)
SCHED_FEAT(ASYM_GRAN, 1)
-SCHED_FEAT(LB_BIAS, 0)
+SCHED_FEAT(LB_BIAS, 1)
SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
SCHED_FEAT(ASYM_EFF_LOAD, 1)
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 6163e4cf885..552310798da 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -199,6 +199,8 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{
+ if (rt_rq->rt_nr_running)
+ resched_task(rq_of_rt_rq(rt_rq)->curr);
}
static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
@@ -298,7 +300,7 @@ static void __disable_runtime(struct rq *rq)
struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
s64 diff;
- if (iter == rt_rq)
+ if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
continue;
spin_lock(&iter->rt_runtime_lock);
@@ -438,9 +440,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
{
u64 runtime = sched_rt_runtime(rt_rq);
- if (runtime == RUNTIME_INF)
- return 0;
-
if (rt_rq->rt_throttled)
return rt_rq_throttled(rt_rq);
@@ -491,9 +490,11 @@ static void update_curr_rt(struct rq *rq)
rt_rq = rt_rq_of_se(rt_se);
spin_lock(&rt_rq->rt_runtime_lock);
- rt_rq->rt_time += delta_exec;
- if (sched_rt_runtime_exceeded(rt_rq))
- resched_task(curr);
+ if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
+ rt_rq->rt_time += delta_exec;
+ if (sched_rt_runtime_exceeded(rt_rq))
+ resched_task(curr);
+ }
spin_unlock(&rt_rq->rt_runtime_lock);
}
}
diff --git a/kernel/signal.c b/kernel/signal.c
index c539f60c6f4..e661b01d340 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1338,6 +1338,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
struct siginfo info;
unsigned long flags;
struct sighand_struct *psig;
+ int ret = sig;
BUG_ON(sig == -1);
@@ -1402,7 +1403,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
* is implementation-defined: we do (if you don't want
* it, just use SIG_IGN instead).
*/
- tsk->exit_signal = -1;
+ ret = tsk->exit_signal = -1;
if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
sig = -1;
}
@@ -1411,7 +1412,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
__wake_up_parent(tsk, tsk->parent);
spin_unlock_irqrestore(&psig->siglock, flags);
- return sig;
+ return ret;
}
static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
diff --git a/kernel/smp.c b/kernel/smp.c
index 782e2b93e46..f362a855377 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -210,8 +210,10 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
{
struct call_single_data d;
unsigned long flags;
- /* prevent preemption and reschedule on another processor */
+ /* prevent preemption and reschedule on another processor,
+ as well as CPU removal */
int me = get_cpu();
+ int err = 0;
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
@@ -220,7 +222,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
local_irq_save(flags);
func(info);
local_irq_restore(flags);
- } else {
+ } else if ((unsigned)cpu < NR_CPUS && cpu_online(cpu)) {
struct call_single_data *data = NULL;
if (!wait) {
@@ -236,10 +238,12 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
data->func = func;
data->info = info;
generic_exec_single(cpu, data);
+ } else {
+ err = -ENXIO; /* CPU not online */
}
put_cpu();
- return 0;
+ return err;
}
EXPORT_SYMBOL(smp_call_function_single);
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index b75b492fbfc..cb838ee93a8 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -233,7 +233,8 @@ static void check_hung_uninterruptible_tasks(int this_cpu)
do_each_thread(g, t) {
if (!--max_count)
goto unlock;
- if (t->state & TASK_UNINTERRUPTIBLE)
+ /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
+ if (t->state == TASK_UNINTERRUPTIBLE)
check_hung_task(t, now);
} while_each_thread(g, t);
unlock:
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 44baeea94ab..29ab20749dd 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -290,7 +290,6 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
}
-
EXPORT_SYMBOL(_spin_lock_nested);
unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
@@ -312,7 +311,6 @@ unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclas
#endif
return flags;
}
-
EXPORT_SYMBOL(_spin_lock_irqsave_nested);
void __lockfunc _spin_lock_nest_lock(spinlock_t *lock,
@@ -322,7 +320,6 @@ void __lockfunc _spin_lock_nest_lock(spinlock_t *lock,
spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
}
-
EXPORT_SYMBOL(_spin_lock_nest_lock);
#endif
diff --git a/kernel/sys.c b/kernel/sys.c
index c01858090a9..038a7bc0901 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -169,9 +169,9 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
pgrp = find_vpid(who);
else
pgrp = task_pgrp(current);
- do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
+ do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
error = set_one_prio(p, niceval, error);
- } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
+ } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
break;
case PRIO_USER:
user = current->user;
@@ -229,11 +229,11 @@ asmlinkage long sys_getpriority(int which, int who)
pgrp = find_vpid(who);
else
pgrp = task_pgrp(current);
- do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
+ do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
niceval = 20 - task_nice(p);
if (niceval > retval)
retval = niceval;
- } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
+ } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
break;
case PRIO_USER:
user = current->user;
@@ -274,7 +274,7 @@ void emergency_restart(void)
}
EXPORT_SYMBOL_GPL(emergency_restart);
-static void kernel_restart_prepare(char *cmd)
+void kernel_restart_prepare(char *cmd)
{
blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
system_state = SYSTEM_RESTART;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f5da526424a..7a46bde78c6 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -643,17 +643,21 @@ void tick_setup_sched_timer(void)
ts->nohz_mode = NOHZ_MODE_HIGHRES;
#endif
}
+#endif /* HIGH_RES_TIMERS */
+#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS
void tick_cancel_sched_timer(int cpu)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+# ifdef CONFIG_HIGH_RES_TIMERS
if (ts->sched_timer.base)
hrtimer_cancel(&ts->sched_timer);
+# endif
ts->nohz_mode = NOHZ_MODE_INACTIVE;
}
-#endif /* HIGH_RES_TIMERS */
+#endif
/**
* Async notification about clocksource changes
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index a9ab0596de4..532858fa5b8 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -6,7 +6,6 @@
*/
#include <linux/module.h>
-#include <linux/version.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/user_namespace.h>
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 64d398f1244..815237a55af 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -12,7 +12,6 @@
#include <linux/module.h>
#include <linux/uts.h>
#include <linux/utsname.h>
-#include <linux/version.h>
#include <linux/err.h>
#include <linux/slab.h>
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index fe3a56c2256..4ab9659d269 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -12,7 +12,6 @@
#include <linux/module.h>
#include <linux/uts.h>
#include <linux/utsname.h>
-#include <linux/version.h>
#include <linux/sysctl.h>
static void *get_uts(ctl_table *table, int write)