aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/compat.c35
-rw-r--r--kernel/cpu.c38
-rw-r--r--kernel/delayacct.c15
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c12
-rw-r--r--kernel/futex.c7
-rw-r--r--kernel/irq/chip.c35
-rw-r--r--kernel/irq/handle.c4
-rw-r--r--kernel/irq/manage.c9
-rw-r--r--kernel/kmod.c8
-rw-r--r--kernel/lockdep.c21
-rw-r--r--kernel/module.c35
-rw-r--r--kernel/mutex-debug.c2
-rw-r--r--kernel/nsproxy.c6
-rw-r--r--kernel/posix-cpu-timers.c27
-rw-r--r--kernel/power/disk.c37
-rw-r--r--kernel/power/swap.c3
-rw-r--r--kernel/printk.c21
-rw-r--r--kernel/sched.c18
-rw-r--r--kernel/signal.c15
-rw-r--r--kernel/spinlock.c21
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/sysctl.c34
-rw-r--r--kernel/taskstats.c94
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/tsacct.c17
-rw-r--r--kernel/unwind.c327
-rw-r--r--kernel/user.c11
-rw-r--r--kernel/workqueue.c6
30 files changed, 642 insertions, 223 deletions
diff --git a/kernel/compat.c b/kernel/compat.c
index 75573e5d27b..6952dd05730 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -678,7 +678,7 @@ int get_compat_sigevent(struct sigevent *event,
? -EFAULT : 0;
}
-long compat_get_bitmap(unsigned long *mask, compat_ulong_t __user *umask,
+long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
unsigned long bitmap_size)
{
int i, j;
@@ -982,4 +982,37 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages,
}
return sys_move_pages(pid, nr_pages, pages, nodes, status, flags);
}
+
+asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
+ compat_ulong_t maxnode,
+ const compat_ulong_t __user *old_nodes,
+ const compat_ulong_t __user *new_nodes)
+{
+ unsigned long __user *old = NULL;
+ unsigned long __user *new = NULL;
+ nodemask_t tmp_mask;
+ unsigned long nr_bits;
+ unsigned long size;
+
+ nr_bits = min_t(unsigned long, maxnode - 1, MAX_NUMNODES);
+ size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
+ if (old_nodes) {
+ if (compat_get_bitmap(nodes_addr(tmp_mask), old_nodes, nr_bits))
+ return -EFAULT;
+ old = compat_alloc_user_space(new_nodes ? size * 2 : size);
+ if (new_nodes)
+ new = old + size / sizeof(unsigned long);
+ if (copy_to_user(old, nodes_addr(tmp_mask), size))
+ return -EFAULT;
+ }
+ if (new_nodes) {
+ if (compat_get_bitmap(nodes_addr(tmp_mask), new_nodes, nr_bits))
+ return -EFAULT;
+ if (new == NULL)
+ new = compat_alloc_user_space(size);
+ if (copy_to_user(new, nodes_addr(tmp_mask), size))
+ return -EFAULT;
+ }
+ return sys_migrate_pages(pid, nr_bits + 1, old, new);
+}
#endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 32c96628463..272254f20d9 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -19,7 +19,7 @@
static DEFINE_MUTEX(cpu_add_remove_lock);
static DEFINE_MUTEX(cpu_bitmask_lock);
-static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain);
+static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
* Should always be manipulated under cpu_add_remove_lock
@@ -58,8 +58,8 @@ void unlock_cpu_hotplug(void)
recursive_depth--;
return;
}
- mutex_unlock(&cpu_bitmask_lock);
recursive = NULL;
+ mutex_unlock(&cpu_bitmask_lock);
}
EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
@@ -68,7 +68,11 @@ EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
/* Need to know about CPUs going up/down? */
int __cpuinit register_cpu_notifier(struct notifier_block *nb)
{
- return blocking_notifier_chain_register(&cpu_chain, nb);
+ int ret;
+ mutex_lock(&cpu_add_remove_lock);
+ ret = raw_notifier_chain_register(&cpu_chain, nb);
+ mutex_unlock(&cpu_add_remove_lock);
+ return ret;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -77,7 +81,9 @@ EXPORT_SYMBOL(register_cpu_notifier);
void unregister_cpu_notifier(struct notifier_block *nb)
{
- blocking_notifier_chain_unregister(&cpu_chain, nb);
+ mutex_lock(&cpu_add_remove_lock);
+ raw_notifier_chain_unregister(&cpu_chain, nb);
+ mutex_unlock(&cpu_add_remove_lock);
}
EXPORT_SYMBOL(unregister_cpu_notifier);
@@ -126,7 +132,7 @@ static int _cpu_down(unsigned int cpu)
if (!cpu_online(cpu))
return -EINVAL;
- err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
+ err = raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
(void *)(long)cpu);
if (err == NOTIFY_BAD) {
printk("%s: attempt to take down CPU %u failed\n",
@@ -144,18 +150,18 @@ static int _cpu_down(unsigned int cpu)
p = __stop_machine_run(take_cpu_down, NULL, cpu);
mutex_unlock(&cpu_bitmask_lock);
- if (IS_ERR(p)) {
+ if (IS_ERR(p) || cpu_online(cpu)) {
/* CPU didn't die: tell everyone. Can't complain. */
- if (blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED,
+ if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED,
(void *)(long)cpu) == NOTIFY_BAD)
BUG();
- err = PTR_ERR(p);
- goto out_allowed;
- }
-
- if (cpu_online(cpu))
+ if (IS_ERR(p)) {
+ err = PTR_ERR(p);
+ goto out_allowed;
+ }
goto out_thread;
+ }
/* Wait for it to sleep (leaving idle task). */
while (!idle_cpu(cpu))
@@ -169,7 +175,7 @@ static int _cpu_down(unsigned int cpu)
put_cpu();
/* CPU is completely dead: tell everyone. Too late to complain. */
- if (blocking_notifier_call_chain(&cpu_chain, CPU_DEAD,
+ if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD,
(void *)(long)cpu) == NOTIFY_BAD)
BUG();
@@ -206,7 +212,7 @@ static int __devinit _cpu_up(unsigned int cpu)
if (cpu_online(cpu) || !cpu_present(cpu))
return -EINVAL;
- ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
+ ret = raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
if (ret == NOTIFY_BAD) {
printk("%s: attempt to bring up CPU %u failed\n",
__FUNCTION__, cpu);
@@ -223,11 +229,11 @@ static int __devinit _cpu_up(unsigned int cpu)
BUG_ON(!cpu_online(cpu));
/* Now call notifier in preparation. */
- blocking_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
+ raw_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
out_notify:
if (ret != 0)
- blocking_notifier_call_chain(&cpu_chain,
+ raw_notifier_call_chain(&cpu_chain,
CPU_UP_CANCELED, hcpu);
return ret;
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 36752f124c6..66a0ea48751 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -66,6 +66,7 @@ static void delayacct_end(struct timespec *start, struct timespec *end,
{
struct timespec ts;
s64 ns;
+ unsigned long flags;
do_posix_clock_monotonic_gettime(end);
ts = timespec_sub(*end, *start);
@@ -73,10 +74,10 @@ static void delayacct_end(struct timespec *start, struct timespec *end,
if (ns < 0)
return;
- spin_lock(&current->delays->lock);
+ spin_lock_irqsave(&current->delays->lock, flags);
*total += ns;
(*count)++;
- spin_unlock(&current->delays->lock);
+ spin_unlock_irqrestore(&current->delays->lock, flags);
}
void __delayacct_blkio_start(void)
@@ -104,6 +105,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
s64 tmp;
struct timespec ts;
unsigned long t1,t2,t3;
+ unsigned long flags;
/* Though tsk->delays accessed later, early exit avoids
* unnecessary returning of other data
@@ -136,14 +138,14 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
- spin_lock(&tsk->delays->lock);
+ spin_lock_irqsave(&tsk->delays->lock, flags);
tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
d->blkio_count += tsk->delays->blkio_count;
d->swapin_count += tsk->delays->swapin_count;
- spin_unlock(&tsk->delays->lock);
+ spin_unlock_irqrestore(&tsk->delays->lock, flags);
done:
return 0;
@@ -152,11 +154,12 @@ done:
__u64 __delayacct_blkio_ticks(struct task_struct *tsk)
{
__u64 ret;
+ unsigned long flags;
- spin_lock(&tsk->delays->lock);
+ spin_lock_irqsave(&tsk->delays->lock, flags);
ret = nsec_to_clock_t(tsk->delays->blkio_delay +
tsk->delays->swapin_delay);
- spin_unlock(&tsk->delays->lock);
+ spin_unlock_irqrestore(&tsk->delays->lock, flags);
return ret;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index f250a5e3e28..06de6c4e8ca 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -128,6 +128,7 @@ static void __exit_signal(struct task_struct *tsk)
flush_sigqueue(&tsk->pending);
if (sig) {
flush_sigqueue(&sig->shared_pending);
+ taskstats_tgid_free(sig);
__cleanup_signal(sig);
}
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 7dc6140baac..8cdd3e72ba5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -830,7 +830,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
if (clone_flags & CLONE_THREAD) {
atomic_inc(&current->signal->count);
atomic_inc(&current->signal->live);
- taskstats_tgid_alloc(current->signal);
+ taskstats_tgid_alloc(current);
return 0;
}
sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
@@ -897,7 +897,6 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
void __cleanup_signal(struct signal_struct *sig)
{
exit_thread_group_keys(sig);
- taskstats_tgid_free(sig);
kmem_cache_free(signal_cachep, sig);
}
@@ -984,6 +983,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (!p)
goto fork_out;
+ rt_mutex_init_task(p);
+
#ifdef CONFIG_TRACE_IRQFLAGS
DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
@@ -1088,8 +1089,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->lockdep_recursion = 0;
#endif
- rt_mutex_init_task(p);
-
#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
#endif
@@ -1316,9 +1315,8 @@ struct task_struct * __devinit fork_idle(int cpu)
struct pt_regs regs;
task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
- if (!task)
- return ERR_PTR(-ENOMEM);
- init_idle(task, cpu);
+ if (!IS_ERR(task))
+ init_idle(task, cpu);
return task;
}
diff --git a/kernel/futex.c b/kernel/futex.c
index b364e002619..93ef30ba209 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1507,6 +1507,13 @@ static int futex_fd(u32 __user *uaddr, int signal)
struct futex_q *q;
struct file *filp;
int ret, err;
+ static unsigned long printk_interval;
+
+ if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
+ printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
+ "will be removed from the kernel in June 2007\n",
+ current->comm);
+ }
ret = -EINVAL;
if (!valid_signal(signal))
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 11c99697acf..ebfd24a4185 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -233,6 +233,8 @@ void irq_chip_set_defaults(struct irq_chip *chip)
chip->shutdown = chip->disable;
if (!chip->name)
chip->name = chip->typename;
+ if (!chip->end)
+ chip->end = dummy_irq_chip.end;
}
static inline void mask_ack_irq(struct irq_desc *desc, int irq)
@@ -499,7 +501,8 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
#endif /* CONFIG_SMP */
void
-__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained)
+__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
+ const char *name)
{
struct irq_desc *desc;
unsigned long flags;
@@ -540,6 +543,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained)
desc->depth = 1;
}
desc->handle_irq = handle;
+ desc->name = name;
if (handle != handle_bad_irq && is_chained) {
desc->status &= ~IRQ_DISABLED;
@@ -555,30 +559,13 @@ set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
irq_flow_handler_t handle)
{
set_irq_chip(irq, chip);
- __set_irq_handler(irq, handle, 0);
+ __set_irq_handler(irq, handle, 0, NULL);
}
-/*
- * Get a descriptive string for the highlevel handler, for
- * /proc/interrupts output:
- */
-const char *
-handle_irq_name(irq_flow_handler_t handle)
+void
+set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+ irq_flow_handler_t handle, const char *name)
{
- if (handle == handle_level_irq)
- return "level ";
- if (handle == handle_fasteoi_irq)
- return "fasteoi";
- if (handle == handle_edge_irq)
- return "edge ";
- if (handle == handle_simple_irq)
- return "simple ";
-#ifdef CONFIG_SMP
- if (handle == handle_percpu_irq)
- return "percpu ";
-#endif
- if (handle == handle_bad_irq)
- return "bad ";
-
- return NULL;
+ set_irq_chip(irq, chip);
+ __set_irq_handler(irq, handle, 0, name);
}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 42aa6f1a3f0..a681912bc89 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -231,10 +231,10 @@ fastcall unsigned int __do_IRQ(unsigned int irq)
spin_unlock(&desc->lock);
action_ret = handle_IRQ_event(irq, action);
-
- spin_lock(&desc->lock);
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
+
+ spin_lock(&desc->lock);
if (likely(!(desc->status & IRQ_PENDING)))
break;
desc->status &= ~IRQ_PENDING;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 6879202afe9..b385878c6e8 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -216,6 +216,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
{
struct irq_desc *desc = irq_desc + irq;
struct irqaction *old, **p;
+ const char *old_name = NULL;
unsigned long flags;
int shared = 0;
@@ -255,8 +256,10 @@ int setup_irq(unsigned int irq, struct irqaction *new)
* set the trigger type must match.
*/
if (!((old->flags & new->flags) & IRQF_SHARED) ||
- ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK))
+ ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK)) {
+ old_name = old->name;
goto mismatch;
+ }
#if defined(CONFIG_IRQ_PER_CPU)
/* All handlers must agree on per-cpuness */
@@ -322,11 +325,13 @@ int setup_irq(unsigned int irq, struct irqaction *new)
return 0;
mismatch:
- spin_unlock_irqrestore(&desc->lock, flags);
if (!(new->flags & IRQF_PROBE_SHARED)) {
printk(KERN_ERR "IRQ handler type mismatch for IRQ %d\n", irq);
+ if (old_name)
+ printk(KERN_ERR "current handler: %s\n", old_name);
dump_stack();
}
+ spin_unlock_irqrestore(&desc->lock, flags);
return -EBUSY;
}
diff --git a/kernel/kmod.c b/kernel/kmod.c
index bb4e29d924e..2b76dee2849 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -307,14 +307,14 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
return 0;
f = create_write_pipe();
- if (!f)
- return -ENOMEM;
+ if (IS_ERR(f))
+ return PTR_ERR(f);
*filp = f;
f = create_read_pipe(f);
- if (!f) {
+ if (IS_ERR(f)) {
free_write_pipe(*filp);
- return -ENOMEM;
+ return PTR_ERR(f);
}
sub_info.stdin = f;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 805a322a565..c9fefdb1a7d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -575,6 +575,8 @@ static noinline int print_circular_bug_tail(void)
return 0;
}
+#define RECURSION_LIMIT 40
+
static int noinline print_infinite_recursion_bug(void)
{
__raw_spin_unlock(&hash_lock);
@@ -595,7 +597,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
debug_atomic_inc(&nr_cyclic_check_recursions);
if (depth > max_recursion_depth)
max_recursion_depth = depth;
- if (depth >= 20)
+ if (depth >= RECURSION_LIMIT)
return print_infinite_recursion_bug();
/*
* Check this lock's dependency list:
@@ -645,7 +647,7 @@ find_usage_forwards(struct lock_class *source, unsigned int depth)
if (depth > max_recursion_depth)
max_recursion_depth = depth;
- if (depth >= 20)
+ if (depth >= RECURSION_LIMIT)
return print_infinite_recursion_bug();
debug_atomic_inc(&nr_find_usage_forwards_checks);
@@ -684,7 +686,7 @@ find_usage_backwards(struct lock_class *source, unsigned int depth)
if (depth > max_recursion_depth)
max_recursion_depth = depth;
- if (depth >= 20)
+ if (depth >= RECURSION_LIMIT)
return print_infinite_recursion_bug();
debug_atomic_inc(&nr_find_usage_backwards_checks);
@@ -1079,7 +1081,8 @@ static int static_obj(void *obj)
*/
for_each_possible_cpu(i) {
start = (unsigned long) &__per_cpu_start + per_cpu_offset(i);
- end = (unsigned long) &__per_cpu_end + per_cpu_offset(i);
+ end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM
+ + per_cpu_offset(i);
if ((addr >= start) && (addr < end))
return 1;
@@ -1174,7 +1177,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
* itself, so actual lookup of the hash should be once per lock object.
*/
static inline struct lock_class *
-register_lock_class(struct lockdep_map *lock, unsigned int subclass)
+register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
{
struct lockdep_subclass_key *key;
struct list_head *hash_head;
@@ -1246,7 +1249,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass)
out_unlock_set:
__raw_spin_unlock(&hash_lock);
- if (!subclass)
+ if (!subclass || force)
lock->class_cache = class;
DEBUG_LOCKS_WARN_ON(class->subclass != subclass);
@@ -1934,7 +1937,7 @@ void trace_softirqs_off(unsigned long ip)
* Initialize a lock instance's lock-class mapping info:
*/
void lockdep_init_map(struct lockdep_map *lock, const char *name,
- struct lock_class_key *key)
+ struct lock_class_key *key, int subclass)
{
if (unlikely(!debug_locks))
return;
@@ -1954,6 +1957,8 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
lock->name = name;
lock->key = key;
lock->class_cache = NULL;
+ if (subclass)
+ register_lock_class(lock, subclass, 1);
}
EXPORT_SYMBOL_GPL(lockdep_init_map);
@@ -1992,7 +1997,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
* Not cached yet or subclass?
*/
if (unlikely(!class)) {
- class = register_lock_class(lock, subclass);
+ class = register_lock_class(lock, subclass, 0);
if (!class)
return 0;
}
diff --git a/kernel/module.c b/kernel/module.c
index 67009bd56c5..45e01cb6010 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1086,22 +1086,35 @@ static int mod_sysfs_setup(struct module *mod,
goto out;
kobj_set_kset_s(&mod->mkobj, module_subsys);
mod->mkobj.mod = mod;
- err = kobject_register(&mod->mkobj.kobj);
+
+ /* delay uevent until full sysfs population */
+ kobject_init(&mod->mkobj.kobj);
+ err = kobject_add(&mod->mkobj.kobj);
if (err)
goto out;
+ mod->drivers_dir = kobject_add_dir(&mod->mkobj.kobj, "drivers");
+ if (!mod->drivers_dir)
+ goto out_unreg;
+
err = module_param_sysfs_setup(mod, kparam, num_params);
if (err)
- goto out_unreg;
+ goto out_unreg_drivers;
err = module_add_modinfo_attrs(mod);
if (err)
- goto out_unreg;
+ goto out_unreg_param;
+ kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
return 0;
+out_unreg_drivers:
+ kobject_unregister(mod->drivers_dir);
+out_unreg_param:
+ module_param_sysfs_remove(mod);
out_unreg:
- kobject_unregister(&mod->mkobj.kobj);
+ kobject_del(&mod->mkobj.kobj);
+ kobject_put(&mod->mkobj.kobj);
out:
return err;
}
@@ -1110,6 +1123,7 @@ static void mod_kobject_remove(struct module *mod)
{
module_remove_modinfo_attrs(mod);
module_param_sysfs_remove(mod);
+ kobject_unregister(mod->drivers_dir);
kobject_unregister(&mod->mkobj.kobj);
}
@@ -1342,7 +1356,7 @@ static void set_license(struct module *mod, const char *license)
if (!license_is_gpl_compatible(license)) {
if (!(tainted & TAINT_PROPRIETARY_MODULE))
- printk(KERN_WARNING "%s: module license '%s' taints"
+ printk(KERN_WARNING "%s: module license '%s' taints "
"kernel.\n", mod->name, license);
add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
}
@@ -1718,7 +1732,7 @@ static struct module *load_module(void __user *umod,
set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
if (strcmp(mod->name, "ndiswrapper") == 0)
- add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
+ add_taint(TAINT_PROPRIETARY_MODULE);
if (strcmp(mod->name, "driverloader") == 0)
add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
@@ -2275,11 +2289,14 @@ void print_modules(void)
void module_add_driver(struct module *mod, struct device_driver *drv)
{
+ int no_warn;
+
if (!mod || !drv)
return;
- /* Don't check return code; this call is idempotent */
- sysfs_create_link(&drv->kobj, &mod->mkobj.kobj, "module");
+ /* Don't check return codes; these calls are idempotent */
+ no_warn = sysfs_create_link(&drv->kobj, &mod->mkobj.kobj, "module");
+ no_warn = sysfs_create_link(mod->drivers_dir, &drv->kobj, drv->name);
}
EXPORT_SYMBOL(module_add_driver);
@@ -2288,6 +2305,8 @@ void module_remove_driver(struct device_driver *drv)
if (!drv)
return;
sysfs_remove_link(&drv->kobj, "module");
+ if (drv->owner && drv->owner->drivers_dir)
+ sysfs_remove_link(drv->owner->drivers_dir, drv->name);
}
EXPORT_SYMBOL(module_remove_driver);
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index e3203c654dd..18651641a7b 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -91,7 +91,7 @@ void debug_mutex_init(struct mutex *lock, const char *name,
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
- lockdep_init_map(&lock->dep_map, name, key);
+ lockdep_init_map(&lock->dep_map, name, key, 0);
#endif
lock->owner = NULL;
lock->magic = lock;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 6ebdb82a0ce..674aceb7335 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -44,11 +44,9 @@ static inline struct nsproxy *clone_namespaces(struct nsproxy *orig)
{
struct nsproxy *ns;
- ns = kmalloc(sizeof(struct nsproxy), GFP_KERNEL);
- if (ns) {
- memcpy(ns, orig, sizeof(struct nsproxy));
+ ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL);
+ if (ns)
atomic_set(&ns->count, 1);
- }
return ns;
}
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 479b16b44f7..7c3e1e6dfb5 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -88,6 +88,19 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
}
/*
+ * Divide and limit the result to res >= 1
+ *
+ * This is necessary to prevent signal delivery starvation, when the result of
+ * the division would be rounded down to 0.
+ */
+static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div)
+{
+ cputime_t res = cputime_div(time, div);
+
+ return max_t(cputime_t, res, 1);
+}
+
+/*
* Update expiry time from increment, and increase overrun count,
* given the current clock sample.
*/
@@ -483,8 +496,8 @@ static void process_timer_rebalance(struct task_struct *p,
BUG();
break;
case CPUCLOCK_PROF:
- left = cputime_div(cputime_sub(expires.cpu, val.cpu),
- nthreads);
+ left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
+ nthreads);
do {
if (likely(!(t->flags & PF_EXITING))) {
ticks = cputime_add(prof_ticks(t), left);
@@ -498,8 +511,8 @@ static void process_timer_rebalance(struct task_struct *p,
} while (t != p);
break;
case CPUCLOCK_VIRT:
- left = cputime_div(cputime_sub(expires.cpu, val.cpu),
- nthreads);
+ left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
+ nthreads);
do {
if (likely(!(t->flags & PF_EXITING))) {
ticks = cputime_add(virt_ticks(t), left);
@@ -515,6 +528,7 @@ static void process_timer_rebalance(struct task_struct *p,
case CPUCLOCK_SCHED:
nsleft = expires.sched - val.sched;
do_div(nsleft, nthreads);
+ nsleft = max_t(unsigned long long, nsleft, 1);
do {
if (likely(!(t->flags & PF_EXITING))) {
ns = t->sched_time + nsleft;
@@ -1159,12 +1173,13 @@ static void check_process_timers(struct task_struct *tsk,
prof_left = cputime_sub(prof_expires, utime);
prof_left = cputime_sub(prof_left, stime);
- prof_left = cputime_div(prof_left, nthreads);
+ prof_left = cputime_div_non_zero(prof_left, nthreads);
virt_left = cputime_sub(virt_expires, utime);
- virt_left = cputime_div(virt_left, nthreads);
+ virt_left = cputime_div_non_zero(virt_left, nthreads);
if (sched_expires) {
sched_left = sched_expires - sched_time;
do_div(sched_left, nthreads);
+ sched_left = max_t(unsigned long long, sched_left, 1);
} else {
sched_left = 0;
}
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index d3a158a6031..b1fb7866b0b 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -71,7 +71,7 @@ static inline void platform_finish(void)
static int prepare_processes(void)
{
- int error;
+ int error = 0;
pm_prepare_console();
@@ -84,6 +84,12 @@ static int prepare_processes(void)
goto thaw;
}
+ if (pm_disk_mode == PM_DISK_TESTPROC) {
+ printk("swsusp debug: Waiting for 5 seconds.\n");
+ mdelay(5000);
+ goto thaw;
+ }
+
/* Free memory before shutting down devices. */
if (!(error = swsusp_shrink_memory()))
return 0;
@@ -120,13 +126,21 @@ int pm_suspend_disk(void)
if (error)
return error;
+ if (pm_disk_mode == PM_DISK_TESTPROC)
+ goto Thaw;
+
suspend_console();
error = device_suspend(PMSG_FREEZE);
if (error) {
resume_console();
printk("Some devices failed to suspend\n");
- unprepare_processes();
- return error;
+ goto Thaw;
+ }
+
+ if (pm_disk_mode == PM_DISK_TEST) {
+ printk("swsusp debug: Waiting for 5 seconds.\n");
+ mdelay(5000);
+ goto Done;
}
pr_debug("PM: snapshotting memory.\n");
@@ -143,16 +157,17 @@ int pm_suspend_disk(void)
power_down(pm_disk_mode);
else {
swsusp_free();
- unprepare_processes();
- return error;
+ goto Thaw;
}
- } else
+ } else {
pr_debug("PM: Image restored successfully.\n");
+ }
swsusp_free();
Done:
device_resume();
resume_console();
+ Thaw:
unprepare_processes();
return error;
}
@@ -249,6 +264,8 @@ static const char * const pm_disk_modes[] = {
[PM_DISK_PLATFORM] = "platform",
[PM_DISK_SHUTDOWN] = "shutdown",
[PM_DISK_REBOOT] = "reboot",
+ [PM_DISK_TEST] = "test",
+ [PM_DISK_TESTPROC] = "testproc",
};
/**
@@ -303,17 +320,19 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
}
}
if (mode) {
- if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT)
+ if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT ||
+ mode == PM_DISK_TEST || mode == PM_DISK_TESTPROC) {
pm_disk_mode = mode;
- else {
+ } else {
if (pm_ops && pm_ops->enter &&
(mode == pm_ops->pm_disk_mode))
pm_disk_mode = mode;
else
error = -EINVAL;
}
- } else
+ } else {
error = -EINVAL;
+ }
pr_debug("PM: suspend-to-disk mode set to '%s'\n",
pm_disk_modes[mode]);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 9b2ee5344de..1a3b0dd2c3f 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -425,7 +425,8 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
bio_set_pages_dirty(bio);
bio_put(bio);
} else {
- get_page(page);
+ if (rw == READ)
+ get_page(page); /* These pages are freed later */
bio->bi_private = *bio_chain;
*bio_chain = bio;
submit_bio(rw | (1 << BIO_RW_SYNC), bio);
diff --git a/kernel/printk.c b/kernel/printk.c
index f7d427ef503..66426552fbf 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -31,6 +31,7 @@
#include <linux/security.h>
#include <linux/bootmem.h>
#include <linux/syscalls.h>
+#include <linux/jiffies.h>
#include <asm/uaccess.h>
@@ -1101,3 +1102,23 @@ int printk_ratelimit(void)
printk_ratelimit_burst);
}
EXPORT_SYMBOL(printk_ratelimit);
+
+/**
+ * printk_timed_ratelimit - caller-controlled printk ratelimiting
+ * @caller_jiffies: pointer to caller's state
+ * @interval_msecs: minimum interval between prints
+ *
+ * printk_timed_ratelimit() returns true if more than @interval_msecs
+ * milliseconds have elapsed since the last time printk_timed_ratelimit()
+ * returned true.
+ */
+bool printk_timed_ratelimit(unsigned long *caller_jiffies,
+ unsigned int interval_msecs)
+{
+ if (*caller_jiffies == 0 || time_after(jiffies, *caller_jiffies)) {
+ *caller_jiffies = jiffies + msecs_to_jiffies(interval_msecs);
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL(printk_timed_ratelimit);
diff --git a/kernel/sched.c b/kernel/sched.c
index 094b5687eef..3399701c680 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -160,15 +160,6 @@
#define TASK_PREEMPTS_CURR(p, rq) \
((p)->prio < (rq)->curr->prio)
-/*
- * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
- * to time slice values: [800ms ... 100ms ... 5ms]
- *
- * The higher a thread's priority, the bigger timeslices
- * it gets during one round of execution. But even the lowest
- * priority thread gets MIN_TIMESLICE worth of execution time.
- */
-
#define SCALE_PRIO(x, prio) \
max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE)
@@ -180,6 +171,15 @@ static unsigned int static_prio_timeslice(int static_prio)
return SCALE_PRIO(DEF_TIMESLICE, static_prio);
}
+/*
+ * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
+ * to time slice values: [800ms ... 100ms ... 5ms]
+ *
+ * The higher a thread's priority, the bigger timeslices
+ * it gets during one round of execution. But even the lowest
+ * priority thread gets MIN_TIMESLICE worth of execution time.
+ */
+
static inline unsigned int task_timeslice(struct task_struct *p)
{
return static_prio_timeslice(p->static_prio);
diff --git a/kernel/signal.c b/kernel/signal.c
index 7ed8d5304be..df18c167a2a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -267,18 +267,25 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
int override_rlimit)
{
struct sigqueue *q = NULL;
+ struct user_struct *user;
- atomic_inc(&t->user->sigpending);
+ /*
+ * In order to avoid problems with "switch_user()", we want to make
+ * sure that the compiler doesn't re-load "t->user"
+ */
+ user = t->user;
+ barrier();
+ atomic_inc(&user->sigpending);
if (override_rlimit ||
- atomic_read(&t->user->sigpending) <=
+ atomic_read(&user->sigpending) <=
t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
q = kmem_cache_alloc(sigqueue_cachep, flags);
if (unlikely(q == NULL)) {
- atomic_dec(&t->user->sigpending);
+ atomic_dec(&user->sigpending);
} else {
INIT_LIST_HEAD(&q->list);
q->flags = 0;
- q->user = get_uid(t->user);
+ q->user = get_uid(user);
}
return(q);
}
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 476c3741511..2c6c2bf8551 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -293,6 +293,27 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
}
EXPORT_SYMBOL(_spin_lock_nested);
+unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ preempt_disable();
+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ /*
+ * On lockdep we dont want the hand-coded irq-enable of
+ * _raw_spin_lock_flags() code, because lockdep assumes
+ * that interrupts are not re-enabled during lock-acquire:
+ */
+#ifdef CONFIG_PROVE_SPIN_LOCKING
+ _raw_spin_lock(lock);
+#else
+ _raw_spin_lock_flags(lock, &flags);
+#endif
+ return flags;
+}
+
+EXPORT_SYMBOL(_spin_lock_irqsave_nested);
#endif
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 7a3b2e75f04..d7306d0f3df 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -49,6 +49,7 @@ cond_syscall(compat_sys_get_robust_list);
cond_syscall(sys_epoll_create);
cond_syscall(sys_epoll_ctl);
cond_syscall(sys_epoll_wait);
+cond_syscall(sys_epoll_pwait);
cond_syscall(sys_semget);
cond_syscall(sys_semop);
cond_syscall(sys_semtimedop);
@@ -134,6 +135,7 @@ cond_syscall(sys_madvise);
cond_syscall(sys_mremap);
cond_syscall(sys_remap_file_pages);
cond_syscall(compat_sys_move_pages);
+cond_syscall(compat_sys_migrate_pages);
/* block-layer dependent */
cond_syscall(sys_bdflush);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8020fb273c4..09e569f4792 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -136,8 +136,10 @@ static int parse_table(int __user *, int, void __user *, size_t __user *,
static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos);
+#ifdef CONFIG_PROC_SYSCTL
static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
static ctl_table root_table[];
static struct ctl_table_header root_table_header =
@@ -542,6 +544,7 @@ static ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
+#ifdef CONFIG_PROC_SYSCTL
{
.ctl_name = KERN_CADPID,
.procname = "cad_pid",
@@ -550,6 +553,7 @@ static ctl_table kern_table[] = {
.mode = 0600,
.proc_handler = &proc_do_cad_pid,
},
+#endif
{
.ctl_name = KERN_MAX_THREADS,
.procname = "threads-max",
@@ -1311,7 +1315,9 @@ repeat:
return -ENOTDIR;
if (get_user(n, name))
return -EFAULT;
- for ( ; table->ctl_name; table++) {
+ for ( ; table->ctl_name || table->procname; table++) {
+ if (!table->ctl_name)
+ continue;
if (n == table->ctl_name || table->ctl_name == CTL_ANY) {
int error;
if (table->child) {
@@ -1528,7 +1534,7 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root,
int len;
mode_t mode;
- for (; table->ctl_name; table++) {
+ for (; table->ctl_name || table->procname; table++) {
/* Can't do anything without a proc name. */
if (!table->procname)
continue;
@@ -1575,7 +1581,7 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root,
static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root)
{
struct proc_dir_entry *de;
- for (; table->ctl_name; table++) {
+ for (; table->ctl_name || table->procname; table++) {
if (!(de = table->de))
continue;
if (de->mode & S_IFDIR) {
@@ -2676,13 +2682,33 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
{
static int msg_count;
+ struct __sysctl_args tmp;
+ int name[CTL_MAXNAME];
+ int i;
+
+ /* Read in the sysctl name for better debug message logging */
+ if (copy_from_user(&tmp, args, sizeof(tmp)))
+ return -EFAULT;
+ if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME)
+ return -ENOTDIR;
+ for (i = 0; i < tmp.nlen; i++)
+ if (get_user(name[i], tmp.name + i))
+ return -EFAULT;
+
+ /* Ignore accesses to kernel.version */
+ if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
+ goto out;
if (msg_count < 5) {
msg_count++;
printk(KERN_INFO
"warning: process `%s' used the removed sysctl "
- "system call\n", current->comm);
+ "system call with ", current->comm);
+ for (i = 0; i < tmp.nlen; i++)
+ printk("%d.", name[i]);
+ printk("\n");
}
+out:
return -ENOSYS;
}
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 5d6a8c54ee8..d3d28919d4b 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -77,7 +77,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
/*
* If new attributes are added, please revisit this allocation
*/
- skb = nlmsg_new(genlmsg_total_size(size), GFP_KERNEL);
+ skb = genlmsg_new(size, GFP_KERNEL);
if (!skb)
return -ENOMEM;
@@ -85,13 +85,9 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
int seq = get_cpu_var(taskstats_seqnum)++;
put_cpu_var(taskstats_seqnum);
- reply = genlmsg_put(skb, 0, seq,
- family.id, 0, 0,
- cmd, family.version);
+ reply = genlmsg_put(skb, 0, seq, &family, 0, cmd);
} else
- reply = genlmsg_put(skb, info->snd_pid, info->snd_seq,
- family.id, 0, 0,
- cmd, family.version);
+ reply = genlmsg_put_reply(skb, info, &family, 0, cmd);
if (reply == NULL) {
nlmsg_free(skb);
return -EINVAL;
@@ -174,21 +170,19 @@ static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
up_write(&listeners->sem);
}
-static int fill_pid(pid_t pid, struct task_struct *pidtsk,
+static int fill_pid(pid_t pid, struct task_struct *tsk,
struct taskstats *stats)
{
int rc = 0;
- struct task_struct *tsk = pidtsk;
- if (!pidtsk) {
- read_lock(&tasklist_lock);
+ if (!tsk) {
+ rcu_read_lock();
tsk = find_task_by_pid(pid);
- if (!tsk) {
- read_unlock(&tasklist_lock);
+ if (tsk)
+ get_task_struct(tsk);
+ rcu_read_unlock();
+ if (!tsk)
return -ESRCH;
- }
- get_task_struct(tsk);
- read_unlock(&tasklist_lock);
} else
get_task_struct(tsk);
@@ -214,39 +208,30 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk,
}
-static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,
+static int fill_tgid(pid_t tgid, struct task_struct *first,
struct taskstats *stats)
{
- struct task_struct *tsk, *first;
+ struct task_struct *tsk;
unsigned long flags;
+ int rc = -ESRCH;
/*
* Add additional stats from live tasks except zombie thread group
* leaders who are already counted with the dead tasks
*/
- first = tgidtsk;
- if (!first) {
- read_lock(&tasklist_lock);
+ rcu_read_lock();
+ if (!first)
first = find_task_by_pid(tgid);
- if (!first) {
- read_unlock(&tasklist_lock);
- return -ESRCH;
- }
- get_task_struct(first);
- read_unlock(&tasklist_lock);
- } else
- get_task_struct(first);
- /* Start with stats from dead tasks */
- spin_lock_irqsave(&first->signal->stats_lock, flags);
+ if (!first || !lock_task_sighand(first, &flags))
+ goto out;
+
if (first->signal->stats)
memcpy(stats, first->signal->stats, sizeof(*stats));
- spin_unlock_irqrestore(&first->signal->stats_lock, flags);
tsk = first;
- read_lock(&tasklist_lock);
do {
- if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk))
+ if (tsk->exit_state)
continue;
/*
* Accounting subsystem can call its functions here to
@@ -257,15 +242,18 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,
delayacct_add_tsk(stats, tsk);
} while_each_thread(first, tsk);
- read_unlock(&tasklist_lock);
- stats->version = TASKSTATS_VERSION;
+ unlock_task_sighand(first, &flags);
+ rc = 0;
+out:
+ rcu_read_unlock();
+
+ stats->version = TASKSTATS_VERSION;
/*
* Accounting subsytems can also add calls here to modify
* fields of taskstats.
*/
-
- return 0;
+ return rc;
}
@@ -273,7 +261,7 @@ static void fill_tgid_exit(struct task_struct *tsk)
{
unsigned long flags;
- spin_lock_irqsave(&tsk->signal->stats_lock, flags);
+ spin_lock_irqsave(&tsk->sighand->siglock, flags);
if (!tsk->signal->stats)
goto ret;
@@ -285,7 +273,7 @@ static void fill_tgid_exit(struct task_struct *tsk)
*/
delayacct_add_tsk(tsk->signal->stats, tsk);
ret:
- spin_unlock_irqrestore(&tsk->signal->stats_lock, flags);
+ spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
return;
}
@@ -419,7 +407,7 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
return send_reply(rep_skb, info->snd_pid);
nla_put_failure:
- return genlmsg_cancel(rep_skb, reply);
+ rc = genlmsg_cancel(rep_skb, reply);
err:
nlmsg_free(rep_skb);
return rc;
@@ -461,24 +449,26 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
size_t size;
int is_thread_group;
struct nlattr *na;
- unsigned long flags;
- if (!family_registered || !tidstats)
+ if (!family_registered)
return;
- spin_lock_irqsave(&tsk->signal->stats_lock, flags);
- is_thread_group = tsk->signal->stats ? 1 : 0;
- spin_unlock_irqrestore(&tsk->signal->stats_lock, flags);
-
- rc = 0;
/*
* Size includes space for nested attributes
*/
size = nla_total_size(sizeof(u32)) +
nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
- if (is_thread_group)
- size = 2 * size; /* PID + STATS + TGID + STATS */
+ is_thread_group = (tsk->signal->stats != NULL);
+ if (is_thread_group) {
+ /* PID + STATS + TGID + STATS */
+ size = 2 * size;
+ /* fill the tsk->signal->stats structure */
+ fill_tgid_exit(tsk);
+ }
+
+ if (!tidstats)
+ return;
rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size);
if (rc < 0)
@@ -498,11 +488,8 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
goto send;
/*
- * tsk has/had a thread group so fill the tsk->signal->stats structure
* Doesn't matter if tsk is the leader or the last group member leaving
*/
-
- fill_tgid_exit(tsk);
if (!group_dead)
goto send;
@@ -519,7 +506,6 @@ send:
nla_put_failure:
genlmsg_cancel(rep_skb, reply);
- goto ret;
err_skb:
nlmsg_free(rep_skb);
ret:
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 126bb30c4af..a99b2a6e6a0 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -57,7 +57,7 @@ static cycle_t jiffies_read(void)
struct clocksource clocksource_jiffies = {
.name = "jiffies",
- .rating = 0, /* lowest rating*/
+ .rating = 1, /* lowest valid rating*/
.read = jiffies_read,
.mask = 0xffffffff, /*32bits*/
.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 47195fa0ec4..3afeaa3a73f 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -161,9 +161,9 @@ void second_overflow(void)
time_adjust += MAX_TICKADJ;
tick_length -= MAX_TICKADJ_SCALED;
} else {
- time_adjust = 0;
tick_length += (s64)(time_adjust * NSEC_PER_USEC /
HZ) << TICK_LENGTH_SHIFT;
+ time_adjust = 0;
}
}
}
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index db443221ba5..96f77013d3f 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -36,7 +36,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
/* calculate task elapsed time in timespec */
do_posix_clock_monotonic_gettime(&uptime);
- ts = timespec_sub(uptime, current->group_leader->start_time);
+ ts = timespec_sub(uptime, tsk->start_time);
/* rebase elapsed time to usec */
ac_etime = timespec_to_ns(&ts);
do_div(ac_etime, NSEC_PER_USEC);
@@ -58,7 +58,10 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
stats->ac_uid = tsk->uid;
stats->ac_gid = tsk->gid;
stats->ac_pid = tsk->pid;
- stats->ac_ppid = (tsk->parent) ? tsk->parent->pid : 0;
+ rcu_read_lock();
+ stats->ac_ppid = pid_alive(tsk) ?
+ rcu_dereference(tsk->real_parent)->tgid : 0;
+ rcu_read_unlock();
stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC;
stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC;
stats->ac_minflt = tsk->min_flt;
@@ -77,13 +80,17 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
*/
void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
{
+ struct mm_struct *mm;
+
/* convert pages-jiffies to Mbyte-usec */
stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;
stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB;
- if (p->mm) {
+ mm = get_task_mm(p);
+ if (mm) {
/* adjust to KB unit */
- stats->hiwater_rss = p->mm->hiwater_rss * PAGE_SIZE / KB;
- stats->hiwater_vm = p->mm->hiwater_vm * PAGE_SIZE / KB;
+ stats->hiwater_rss = mm->hiwater_rss * PAGE_SIZE / KB;
+ stats->hiwater_vm = mm->hiwater_vm * PAGE_SIZE / KB;
+ mmput(mm);
}
stats->read_char = p->rchar;
stats->write_char = p->wchar;
diff --git a/kernel/unwind.c b/kernel/unwind.c
index 2e2368607aa..ed0a21d4a90 100644
--- a/kernel/unwind.c
+++ b/kernel/unwind.c
@@ -11,13 +11,15 @@
#include <linux/unwind.h>
#include <linux/module.h>
-#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/sort.h>
#include <linux/stop_machine.h>
#include <asm/sections.h>
#include <asm/uaccess.h>
#include <asm/unaligned.h>
extern char __start_unwind[], __end_unwind[];
+extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];
#define MAX_STACK_DEPTH 8
@@ -100,6 +102,8 @@ static struct unwind_table {
} core, init;
const void *address;
unsigned long size;
+ const unsigned char *header;
+ unsigned long hdrsz;
struct unwind_table *link;
const char *name;
} root_table;
@@ -145,6 +149,10 @@ static struct unwind_table *find_table(unsigned long pc)
return table;
}
+static unsigned long read_pointer(const u8 **pLoc,
+ const void *end,
+ signed ptrType);
+
static void init_unwind_table(struct unwind_table *table,
const char *name,
const void *core_start,
@@ -152,14 +160,30 @@ static void init_unwind_table(struct unwind_table *table,
const void *init_start,
unsigned long init_size,
const void *table_start,
- unsigned long table_size)
+ unsigned long table_size,
+ const u8 *header_start,
+ unsigned long header_size)
{
+ const u8 *ptr = header_start + 4;
+ const u8 *end = header_start + header_size;
+
table->core.pc = (unsigned long)core_start;
table->core.range = core_size;
table->init.pc = (unsigned long)init_start;
table->init.range = init_size;
table->address = table_start;
table->size = table_size;
+ /* See if the linker provided table looks valid. */
+ if (header_size <= 4
+ || header_start[0] != 1
+ || (void *)read_pointer(&ptr, end, header_start[1]) != table_start
+ || header_start[2] == DW_EH_PE_omit
+ || read_pointer(&ptr, end, header_start[2]) <= 0
+ || header_start[3] == DW_EH_PE_omit)
+ header_start = NULL;
+ table->hdrsz = header_size;
+ smp_wmb();
+ table->header = header_start;
table->link = NULL;
table->name = name;
}
@@ -169,7 +193,143 @@ void __init unwind_init(void)
init_unwind_table(&root_table, "kernel",
_text, _end - _text,
NULL, 0,
- __start_unwind, __end_unwind - __start_unwind);
+ __start_unwind, __end_unwind - __start_unwind,
+ __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);
+}
+
+static const u32 bad_cie, not_fde;
+static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
+static signed fde_pointer_type(const u32 *cie);
+
+struct eh_frame_hdr_table_entry {
+ unsigned long start, fde;
+};
+
+static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
+{
+ const struct eh_frame_hdr_table_entry *e1 = p1;
+ const struct eh_frame_hdr_table_entry *e2 = p2;
+
+ return (e1->start > e2->start) - (e1->start < e2->start);
+}
+
+static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
+{
+ struct eh_frame_hdr_table_entry *e1 = p1;
+ struct eh_frame_hdr_table_entry *e2 = p2;
+ unsigned long v;
+
+ v = e1->start;
+ e1->start = e2->start;
+ e2->start = v;
+ v = e1->fde;
+ e1->fde = e2->fde;
+ e2->fde = v;
+}
+
+static void __init setup_unwind_table(struct unwind_table *table,
+ void *(*alloc)(unsigned long))
+{
+ const u8 *ptr;
+ unsigned long tableSize = table->size, hdrSize;
+ unsigned n;
+ const u32 *fde;
+ struct {
+ u8 version;
+ u8 eh_frame_ptr_enc;
+ u8 fde_count_enc;
+ u8 table_enc;
+ unsigned long eh_frame_ptr;
+ unsigned int fde_count;
+ struct eh_frame_hdr_table_entry table[];
+ } __attribute__((__packed__)) *header;
+
+ if (table->header)
+ return;
+
+ if (table->hdrsz)
+ printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n",
+ table->name);
+
+ if (tableSize & (sizeof(*fde) - 1))
+ return;
+
+ for (fde = table->address, n = 0;
+ tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
+ tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+ const u32 *cie = cie_for_fde(fde, table);
+ signed ptrType;
+
+ if (cie == &not_fde)
+ continue;
+ if (cie == NULL
+ || cie == &bad_cie
+ || (ptrType = fde_pointer_type(cie)) < 0)
+ return;
+ ptr = (const u8 *)(fde + 2);
+ if (!read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType))
+ return;
+ ++n;
+ }
+
+ if (tableSize || !n)
+ return;
+
+ hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
+ + 2 * n * sizeof(unsigned long);
+ header = alloc(hdrSize);
+ if (!header)
+ return;
+ header->version = 1;
+ header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native;
+ header->fde_count_enc = DW_EH_PE_abs|DW_EH_PE_data4;
+ header->table_enc = DW_EH_PE_abs|DW_EH_PE_native;
+ put_unaligned((unsigned long)table->address, &header->eh_frame_ptr);
+ BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
+ % __alignof(typeof(header->fde_count)));
+ header->fde_count = n;
+
+ BUILD_BUG_ON(offsetof(typeof(*header), table)
+ % __alignof(typeof(*header->table)));
+ for (fde = table->address, tableSize = table->size, n = 0;
+ tableSize;
+ tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+ const u32 *cie = fde + 1 - fde[1] / sizeof(*fde);
+
+ if (!fde[1])
+ continue; /* this is a CIE */
+ ptr = (const u8 *)(fde + 2);
+ header->table[n].start = read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ fde_pointer_type(cie));
+ header->table[n].fde = (unsigned long)fde;
+ ++n;
+ }
+ WARN_ON(n != header->fde_count);
+
+ sort(header->table,
+ n,
+ sizeof(*header->table),
+ cmp_eh_frame_hdr_table_entries,
+ swap_eh_frame_hdr_table_entries);
+
+ table->hdrsz = hdrSize;
+ smp_wmb();
+ table->header = (const void *)header;
+}
+
+static void *__init balloc(unsigned long sz)
+{
+ return __alloc_bootmem_nopanic(sz,
+ sizeof(unsigned int),
+ __pa(MAX_DMA_ADDRESS));
+}
+
+void __init unwind_setup(void)
+{
+ setup_unwind_table(&root_table, balloc);
}
#ifdef CONFIG_MODULES
@@ -193,7 +353,8 @@ void *unwind_add_table(struct module *module,
init_unwind_table(table, module->name,
module->module_core, module->core_size,
module->module_init, module->init_size,
- table_start, table_size);
+ table_start, table_size,
+ NULL, 0);
if (last_table)
last_table->link = table;
@@ -303,6 +464,26 @@ static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
return value;
}
+static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
+{
+ const u32 *cie;
+
+ if (!*fde || (*fde & (sizeof(*fde) - 1)))
+ return &bad_cie;
+ if (!fde[1])
+ return &not_fde; /* this is a CIE */
+ if ((fde[1] & (sizeof(*fde) - 1))
+ || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address)
+ return NULL; /* this is not a valid FDE */
+ cie = fde + 1 - fde[1] / sizeof(*fde);
+ if (*cie <= sizeof(*cie) + 4
+ || *cie >= fde[1] - sizeof(*fde)
+ || (*cie & (sizeof(*cie) - 1))
+ || cie[1])
+ return NULL; /* this is not a (valid) CIE */
+ return cie;
+}
+
static unsigned long read_pointer(const u8 **pLoc,
const void *end,
signed ptrType)
@@ -610,49 +791,108 @@ int unwind(struct unwind_frame_info *frame)
unsigned i;
signed ptrType = -1;
uleb128_t retAddrReg = 0;
- struct unwind_table *table;
+ const struct unwind_table *table;
struct unwind_state state;
if (UNW_PC(frame) == 0)
return -EINVAL;
if ((table = find_table(pc)) != NULL
&& !(table->size & (sizeof(*fde) - 1))) {
- unsigned long tableSize = table->size;
-
- for (fde = table->address;
- tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
- tableSize -= sizeof(*fde) + *fde,
- fde += 1 + *fde / sizeof(*fde)) {
- if (!*fde || (*fde & (sizeof(*fde) - 1)))
- break;
- if (!fde[1])
- continue; /* this is a CIE */
- if ((fde[1] & (sizeof(*fde) - 1))
- || fde[1] > (unsigned long)(fde + 1)
- - (unsigned long)table->address)
- continue; /* this is not a valid FDE */
- cie = fde + 1 - fde[1] / sizeof(*fde);
- if (*cie <= sizeof(*cie) + 4
- || *cie >= fde[1] - sizeof(*fde)
- || (*cie & (sizeof(*cie) - 1))
- || cie[1]
- || (ptrType = fde_pointer_type(cie)) < 0) {
- cie = NULL; /* this is not a (valid) CIE */
- continue;
+ const u8 *hdr = table->header;
+ unsigned long tableSize;
+
+ smp_rmb();
+ if (hdr && hdr[0] == 1) {
+ switch(hdr[3] & DW_EH_PE_FORM) {
+ case DW_EH_PE_native: tableSize = sizeof(unsigned long); break;
+ case DW_EH_PE_data2: tableSize = 2; break;
+ case DW_EH_PE_data4: tableSize = 4; break;
+ case DW_EH_PE_data8: tableSize = 8; break;
+ default: tableSize = 0; break;
}
+ ptr = hdr + 4;
+ end = hdr + table->hdrsz;
+ if (tableSize
+ && read_pointer(&ptr, end, hdr[1])
+ == (unsigned long)table->address
+ && (i = read_pointer(&ptr, end, hdr[2])) > 0
+ && i == (end - ptr) / (2 * tableSize)
+ && !((end - ptr) % (2 * tableSize))) {
+ do {
+ const u8 *cur = ptr + (i / 2) * (2 * tableSize);
+
+ startLoc = read_pointer(&cur,
+ cur + tableSize,
+ hdr[3]);
+ if (pc < startLoc)
+ i /= 2;
+ else {
+ ptr = cur - tableSize;
+ i = (i + 1) / 2;
+ }
+ } while (startLoc && i > 1);
+ if (i == 1
+ && (startLoc = read_pointer(&ptr,
+ ptr + tableSize,
+ hdr[3])) != 0
+ && pc >= startLoc)
+ fde = (void *)read_pointer(&ptr,
+ ptr + tableSize,
+ hdr[3]);
+ }
+ }
+
+ if (fde != NULL) {
+ cie = cie_for_fde(fde, table);
ptr = (const u8 *)(fde + 2);
- startLoc = read_pointer(&ptr,
- (const u8 *)(fde + 1) + *fde,
- ptrType);
- endLoc = startLoc
- + read_pointer(&ptr,
- (const u8 *)(fde + 1) + *fde,
- ptrType & DW_EH_PE_indirect
- ? ptrType
- : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed));
- if (pc >= startLoc && pc < endLoc)
- break;
- cie = NULL;
+ if(cie != NULL
+ && cie != &bad_cie
+ && cie != &not_fde
+ && (ptrType = fde_pointer_type(cie)) >= 0
+ && read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType) == startLoc) {
+ if (!(ptrType & DW_EH_PE_indirect))
+ ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
+ endLoc = startLoc
+ + read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType);
+ if(pc >= endLoc)
+ fde = NULL;
+ } else
+ fde = NULL;
+ }
+ if (fde == NULL) {
+ for (fde = table->address, tableSize = table->size;
+ cie = NULL, tableSize > sizeof(*fde)
+ && tableSize - sizeof(*fde) >= *fde;
+ tableSize -= sizeof(*fde) + *fde,
+ fde += 1 + *fde / sizeof(*fde)) {
+ cie = cie_for_fde(fde, table);
+ if (cie == &bad_cie) {
+ cie = NULL;
+ break;
+ }
+ if (cie == NULL
+ || cie == &not_fde
+ || (ptrType = fde_pointer_type(cie)) < 0)
+ continue;
+ ptr = (const u8 *)(fde + 2);
+ startLoc = read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType);
+ if (!startLoc)
+ continue;
+ if (!(ptrType & DW_EH_PE_indirect))
+ ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
+ endLoc = startLoc
+ + read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType);
+ if (pc >= startLoc && pc < endLoc)
+ break;
+ }
}
}
if (cie != NULL) {
@@ -698,8 +938,11 @@ int unwind(struct unwind_frame_info *frame)
else {
retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
/* skip augmentation */
- if (((const char *)(cie + 2))[1] == 'z')
- ptr += get_uleb128(&ptr, end);
+ if (((const char *)(cie + 2))[1] == 'z') {
+ uleb128_t augSize = get_uleb128(&ptr, end);
+
+ ptr += augSize;
+ }
if (ptr > end
|| retAddrReg >= ARRAY_SIZE(reg_info)
|| REG_INVALID(retAddrReg)
@@ -723,9 +966,7 @@ int unwind(struct unwind_frame_info *frame)
if (cie == NULL || fde == NULL) {
#ifdef CONFIG_FRAME_POINTER
unsigned long top, bottom;
-#endif
-#ifdef CONFIG_FRAME_POINTER
top = STACK_TOP(frame->task);
bottom = STACK_BOTTOM(frame->task);
# if FRAME_RETADDR_OFFSET < 0
diff --git a/kernel/user.c b/kernel/user.c
index 6408c042429..220e586127a 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -187,6 +187,17 @@ void switch_uid(struct user_struct *new_user)
atomic_dec(&old_user->processes);
switch_uid_keyring(new_user);
current->user = new_user;
+
+ /*
+ * We need to synchronize with __sigqueue_alloc()
+ * doing a get_uid(p->user).. If that saw the old
+ * user value, we need to wait until it has exited
+ * its critical region before we can free the old
+ * structure.
+ */
+ smp_mb();
+ spin_unlock_wait(&current->sighand->siglock);
+
free_uid(old_user);
suid_keys(current);
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3df9bfc7ff7..17c2f03d2c2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -99,7 +99,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
* @wq: workqueue to use
* @work: work to queue
*
- * Returns non-zero if it was successfully added.
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
*
* We queue the work to the CPU it was submitted, but there is no
* guarantee that it will be processed by that CPU.
@@ -138,7 +138,7 @@ static void delayed_work_timer_fn(unsigned long __data)
* @work: work to queue
* @delay: number of jiffies to wait before queueing
*
- * Returns non-zero if it was successfully added.
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
*/
int fastcall queue_delayed_work(struct workqueue_struct *wq,
struct work_struct *work, unsigned long delay)
@@ -169,7 +169,7 @@ EXPORT_SYMBOL_GPL(queue_delayed_work);
* @work: work to queue
* @delay: number of jiffies to wait before queueing
*
- * Returns non-zero if it was successfully added.
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
*/
int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct work_struct *work, unsigned long delay)