diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/acct.c | 6 | ||||
-rw-r--r-- | kernel/auditsc.c | 5 | ||||
-rw-r--r-- | kernel/capability.c | 2 | ||||
-rw-r--r-- | kernel/compat.c | 35 | ||||
-rw-r--r-- | kernel/cpuset.c | 106 | ||||
-rw-r--r-- | kernel/exit.c | 40 | ||||
-rw-r--r-- | kernel/fork.c | 15 | ||||
-rw-r--r-- | kernel/futex.c | 10 | ||||
-rw-r--r-- | kernel/hrtimer.c | 20 | ||||
-rw-r--r-- | kernel/irq/chip.c | 6 | ||||
-rw-r--r-- | kernel/kexec.c | 8 | ||||
-rw-r--r-- | kernel/kfifo.c | 28 | ||||
-rw-r--r-- | kernel/kmod.c | 12 | ||||
-rw-r--r-- | kernel/lockdep.c | 17 | ||||
-rw-r--r-- | kernel/module.c | 26 | ||||
-rw-r--r-- | kernel/panic.c | 1 | ||||
-rw-r--r-- | kernel/params.c | 15 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 101 | ||||
-rw-r--r-- | kernel/posix-timers.c | 21 | ||||
-rw-r--r-- | kernel/ptrace.c | 1 | ||||
-rw-r--r-- | kernel/rcutorture.c | 8 | ||||
-rw-r--r-- | kernel/relay.c | 36 | ||||
-rw-r--r-- | kernel/rtmutex.c | 51 | ||||
-rw-r--r-- | kernel/sched.c | 74 | ||||
-rw-r--r-- | kernel/signal.c | 6 | ||||
-rw-r--r-- | kernel/softirq.c | 4 | ||||
-rw-r--r-- | kernel/softlockup.c | 3 | ||||
-rw-r--r-- | kernel/spinlock.c | 11 | ||||
-rw-r--r-- | kernel/stop_machine.c | 3 | ||||
-rw-r--r-- | kernel/sys.c | 9 | ||||
-rw-r--r-- | kernel/sys_ni.c | 5 | ||||
-rw-r--r-- | kernel/sysctl.c | 13 | ||||
-rw-r--r-- | kernel/timer.c | 53 | ||||
-rw-r--r-- | kernel/unwind.c | 4 |
34 files changed, 511 insertions, 244 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 2a7c933651c..f4330acead4 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -483,10 +483,14 @@ static void do_acct_process(struct file *file) ac.ac_ppid = current->parent->tgid; #endif - read_lock(&tasklist_lock); /* pin current->signal */ + mutex_lock(&tty_mutex); + /* FIXME: Whoever is responsible for current->signal locking needs + to use the same locking all over the kernel and document it */ + read_lock(&tasklist_lock); ac.ac_tty = current->signal->tty ? old_encode_dev(tty_devnum(current->signal->tty)) : 0; read_unlock(&tasklist_lock); + mutex_unlock(&tty_mutex); spin_lock_irq(¤t->sighand->siglock); ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index fb83c5cb8c3..10514763175 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -817,6 +817,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_format(ab, " success=%s exit=%ld", (context->return_valid==AUDITSC_SUCCESS)?"yes":"no", context->return_code); + + mutex_lock(&tty_mutex); if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) tty = tsk->signal->tty->name; else @@ -838,6 +840,9 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->gid, context->euid, context->suid, context->fsuid, context->egid, context->sgid, context->fsgid, tty); + + mutex_unlock(&tty_mutex); + audit_log_task_info(ab, tsk); if (context->filterkey) { audit_log_format(ab, " key="); diff --git a/kernel/capability.c b/kernel/capability.c index c7685ad00a9..edb845a6e84 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -133,7 +133,7 @@ static inline int cap_set_all(kernel_cap_t *effective, int found = 0; do_each_thread(g, target) { - if (target == current || target->pid == 1) + if (target == current || is_init(target)) continue; found = 1; if (security_capset_check(target, effective, inheritable, diff --git a/kernel/compat.c b/kernel/compat.c index 126dee9530a..b4fbd838cd7 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -22,9 +22,12 @@ #include <linux/security.h> #include <linux/timex.h> #include <linux/migrate.h> +#include <linux/posix-timers.h> #include <asm/uaccess.h> +extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); + int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) { return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) || @@ -601,6 +604,30 @@ long compat_sys_clock_getres(clockid_t which_clock, return err; } +static long compat_clock_nanosleep_restart(struct restart_block *restart) +{ + long err; + mm_segment_t oldfs; + struct timespec tu; + struct compat_timespec *rmtp = (struct compat_timespec *)(restart->arg1); + + restart->arg1 = (unsigned long) &tu; + oldfs = get_fs(); + set_fs(KERNEL_DS); + err = clock_nanosleep_restart(restart); + set_fs(oldfs); + + if ((err == -ERESTART_RESTARTBLOCK) && rmtp && + put_compat_timespec(&tu, rmtp)) + return -EFAULT; + + if (err == -ERESTART_RESTARTBLOCK) { + restart->fn = compat_clock_nanosleep_restart; + restart->arg1 = (unsigned long) rmtp; + } + return err; +} + long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, struct compat_timespec __user *rqtp, struct compat_timespec __user *rmtp) @@ -608,6 +635,7 @@ long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, long err; mm_segment_t oldfs; struct timespec in, out; + struct restart_block *restart; if (get_compat_timespec(&in, rqtp)) return -EFAULT; @@ -618,9 +646,16 @@ long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, (struct timespec __user *) &in, (struct timespec __user *) &out); set_fs(oldfs); + if ((err == -ERESTART_RESTARTBLOCK) && rmtp && put_compat_timespec(&out, rmtp)) return -EFAULT; + + if (err == -ERESTART_RESTARTBLOCK) { + restart = ¤t_thread_info()->restart_block; + restart->fn = compat_clock_nanosleep_restart; + restart->arg1 = (unsigned long) rmtp; + } return err; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1b32c2c04c1..8c3c400cce9 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -240,7 +240,7 @@ static struct super_block *cpuset_sb; * A cpuset can only be deleted if both its 'count' of using tasks * is zero, and its list of 'children' cpusets is empty. Since all * tasks in the system use _some_ cpuset, and since there is always at - * least one task in the system (init, pid == 1), therefore, top_cpuset + * least one task in the system (init), therefore, top_cpuset * always has either children cpusets and/or using tasks. So we don't * need a special hack to ensure that top_cpuset cannot be deleted. * @@ -912,6 +912,10 @@ static int update_nodemask(struct cpuset *cs, char *buf) int fudge; int retval; + /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */ + if (cs == &top_cpuset) + return -EACCES; + trialcs = *cs; retval = nodelist_parse(buf, trialcs.mems_allowed); if (retval < 0) @@ -1221,7 +1225,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) task_lock(tsk); oldcs = tsk->cpuset; - if (!oldcs) { + /* + * After getting 'oldcs' cpuset ptr, be sure still not exiting. + * If 'oldcs' might be the top_cpuset due to the_top_cpuset_hack + * then fail this attach_task(), to avoid breaking top_cpuset.count. + */ + if (tsk->flags & PF_EXITING) { task_unlock(tsk); mutex_unlock(&callback_mutex); put_task_struct(tsk); @@ -2036,33 +2045,104 @@ out: return err; } +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG) /* - * The top_cpuset tracks what CPUs and Memory Nodes are online, - * period. This is necessary in order to make cpusets transparent - * (of no affect) on systems that are actively using CPU hotplug - * but making no active use of cpusets. - * - * This handles CPU hotplug (cpuhp) events. If someday Memory - * Nodes can be hotplugged (dynamically changing node_online_map) - * then we should handle that too, perhaps in a similar way. + * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs + * or memory nodes, we need to walk over the cpuset hierarchy, + * removing that CPU or node from all cpusets. If this removes the + * last CPU or node from a cpuset, then the guarantee_online_cpus() + * or guarantee_online_mems() code will use that emptied cpusets + * parent online CPUs or nodes. Cpusets that were already empty of + * CPUs or nodes are left empty. + * + * This routine is intentionally inefficient in a couple of regards. + * It will check all cpusets in a subtree even if the top cpuset of + * the subtree has no offline CPUs or nodes. It checks both CPUs and + * nodes, even though the caller could have been coded to know that + * only one of CPUs or nodes needed to be checked on a given call. + * This was done to minimize text size rather than cpu cycles. + * + * Call with both manage_mutex and callback_mutex held. + * + * Recursive, on depth of cpuset subtree. */ -#ifdef CONFIG_HOTPLUG_CPU -static int cpuset_handle_cpuhp(struct notifier_block *nb, - unsigned long phase, void *cpu) +static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur) +{ + struct cpuset *c; + + /* Each of our child cpusets mems must be online */ + list_for_each_entry(c, &cur->children, sibling) { + guarantee_online_cpus_mems_in_subtree(c); + if (!cpus_empty(c->cpus_allowed)) + guarantee_online_cpus(c, &c->cpus_allowed); + if (!nodes_empty(c->mems_allowed)) + guarantee_online_mems(c, &c->mems_allowed); + } +} + +/* + * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track + * cpu_online_map and node_online_map. Force the top cpuset to track + * whats online after any CPU or memory node hotplug or unplug event. + * + * To ensure that we don't remove a CPU or node from the top cpuset + * that is currently in use by a child cpuset (which would violate + * the rule that cpusets must be subsets of their parent), we first + * call the recursive routine guarantee_online_cpus_mems_in_subtree(). + * + * Since there are two callers of this routine, one for CPU hotplug + * events and one for memory node hotplug events, we could have coded + * two separate routines here. We code it as a single common routine + * in order to minimize text size. + */ + +static void common_cpu_mem_hotplug_unplug(void) { mutex_lock(&manage_mutex); mutex_lock(&callback_mutex); + guarantee_online_cpus_mems_in_subtree(&top_cpuset); top_cpuset.cpus_allowed = cpu_online_map; + top_cpuset.mems_allowed = node_online_map; mutex_unlock(&callback_mutex); mutex_unlock(&manage_mutex); +} +#endif + +#ifdef CONFIG_HOTPLUG_CPU +/* + * The top_cpuset tracks what CPUs and Memory Nodes are online, + * period. This is necessary in order to make cpusets transparent + * (of no affect) on systems that are actively using CPU hotplug + * but making no active use of cpusets. + * + * This routine ensures that top_cpuset.cpus_allowed tracks + * cpu_online_map on each CPU hotplug (cpuhp) event. + */ +static int cpuset_handle_cpuhp(struct notifier_block *nb, + unsigned long phase, void *cpu) +{ + common_cpu_mem_hotplug_unplug(); return 0; } #endif +#ifdef CONFIG_MEMORY_HOTPLUG +/* + * Keep top_cpuset.mems_allowed tracking node_online_map. + * Call this routine anytime after you change node_online_map. + * See also the previous routine cpuset_handle_cpuhp(). + */ + +void cpuset_track_online_nodes() +{ + common_cpu_mem_hotplug_unplug(); +} +#endif + /** * cpuset_init_smp - initialize cpus_allowed * diff --git a/kernel/exit.c b/kernel/exit.c index d891883420f..c189de2927a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -38,6 +38,7 @@ #include <linux/pipe_fs_i.h> #include <linux/audit.h> /* for audit_free() */ #include <linux/resource.h> +#include <linux/blkdev.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -219,7 +220,7 @@ static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task) do_each_task_pid(pgrp, PIDTYPE_PGID, p) { if (p == ignored_task || p->exit_state - || p->real_parent->pid == 1) + || is_init(p->real_parent)) continue; if (process_group(p->real_parent) != pgrp && p->real_parent->signal->session == p->signal->session) { @@ -249,17 +250,6 @@ static int has_stopped_jobs(int pgrp) do_each_task_pid(pgrp, PIDTYPE_PGID, p) { if (p->state != TASK_STOPPED) continue; - - /* If p is stopped by a debugger on a signal that won't - stop it, then don't count p as stopped. This isn't - perfect but it's a good approximation. */ - if (unlikely (p->ptrace) - && p->exit_code != SIGSTOP - && p->exit_code != SIGTSTP - && p->exit_code != SIGTTOU - && p->exit_code != SIGTTIN) - continue; - retval = 1; break; } while_each_task_pid(pgrp, PIDTYPE_PGID, p); @@ -292,9 +282,7 @@ static void reparent_to_init(void) /* Set the exit signal to SIGCHLD so we signal init on exit */ current->exit_signal = SIGCHLD; - if ((current->policy == SCHED_NORMAL || - current->policy == SCHED_BATCH) - && (task_nice(current) < 0)) + if (!has_rt_policy(current) && (task_nice(current) < 0)) set_user_nice(current, 0); /* cpus_allowed? */ /* rt_priority? */ @@ -487,6 +475,18 @@ void fastcall put_files_struct(struct files_struct *files) EXPORT_SYMBOL(put_files_struct); +void reset_files_struct(struct task_struct *tsk, struct files_struct *files) +{ + struct files_struct *old; + + old = tsk->files; + task_lock(tsk); + tsk->files = files; + task_unlock(tsk); + put_files_struct(old); +} +EXPORT_SYMBOL(reset_files_struct); + static inline void __exit_files(struct task_struct *tsk) { struct files_struct * files = tsk->files; @@ -954,15 +954,15 @@ fastcall NORET_TYPE void do_exit(long code) if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); - /* PF_DEAD causes final put_task_struct after we schedule. */ preempt_disable(); - BUG_ON(tsk->flags & PF_DEAD); - tsk->flags |= PF_DEAD; + /* causes final put_task_struct in finish_task_switch(). */ + tsk->state = TASK_DEAD; schedule(); BUG(); /* Avoid "noreturn function does return". */ - for (;;) ; + for (;;) + cpu_relax(); /* For when BUG is null */ } EXPORT_SYMBOL_GPL(do_exit); @@ -971,7 +971,7 @@ NORET_TYPE void complete_and_exit(struct completion *comp, long code) { if (comp) complete(comp); - + do_exit(code); } diff --git a/kernel/fork.c b/kernel/fork.c index a0dad84567c..1c999f3e0b4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -183,7 +183,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); atomic_set(&tsk->fs_excl, 0); +#ifdef CONFIG_BLK_DEV_IO_TRACE tsk->btrace_seq = 0; +#endif tsk->splice_pipe = NULL; return tsk; } @@ -1061,7 +1063,11 @@ static struct task_struct *copy_process(unsigned long clone_flags, #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW + p->hardirqs_enabled = 1; +#else p->hardirqs_enabled = 0; +#endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; @@ -1144,7 +1150,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ - p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ @@ -1167,6 +1172,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); + /* for sys_ioprio_set(IOPRIO_WHO_PGRP) */ + p->ioprio = current->ioprio; + /* * The task hasn't been attached yet, so its cpus_allowed mask will * not be changed, nor will its assigned CPU. @@ -1226,11 +1234,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, } } - /* - * inherit ioprio - */ - p->ioprio = current->ioprio; - if (likely(p->pid)) { add_parent(p); if (unlikely(p->ptrace & PT_PTRACED)) diff --git a/kernel/futex.c b/kernel/futex.c index 9d260e838cf..4b6770e9806 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -389,7 +389,7 @@ static struct task_struct * futex_find_get_task(pid_t pid) { struct task_struct *p; - read_lock(&tasklist_lock); + rcu_read_lock(); p = find_task_by_pid(pid); if (!p) goto out_unlock; @@ -403,7 +403,7 @@ static struct task_struct * futex_find_get_task(pid_t pid) } get_task_struct(p); out_unlock: - read_unlock(&tasklist_lock); + rcu_read_unlock(); return p; } @@ -1624,7 +1624,7 @@ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr, struct task_struct *p; ret = -ESRCH; - read_lock(&tasklist_lock); + rcu_read_lock(); p = find_task_by_pid(pid); if (!p) goto err_unlock; @@ -1633,7 +1633,7 @@ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr, !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->robust_list; - read_unlock(&tasklist_lock); + rcu_read_unlock(); } if (put_user(sizeof(*head), len_ptr)) @@ -1641,7 +1641,7 @@ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr, return put_user(head, head_ptr); err_unlock: - read_unlock(&tasklist_lock); + rcu_read_unlock(); return ret; } diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 21c38a7e666..d0ba190dfeb 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -693,7 +693,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod return t->task == NULL; } -static long __sched nanosleep_restart(struct restart_block *restart) +long __sched hrtimer_nanosleep_restart(struct restart_block *restart) { struct hrtimer_sleeper t; struct timespec __user *rmtp; @@ -702,13 +702,13 @@ static long __sched nanosleep_restart(struct restart_block *restart) restart->fn = do_no_restart_syscall; - hrtimer_init(&t.timer, restart->arg3, HRTIMER_ABS); - t.timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0; + hrtimer_init(&t.timer, restart->arg0, HRTIMER_ABS); + t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2; if (do_nanosleep(&t, HRTIMER_ABS)) return 0; - rmtp = (struct timespec __user *) restart->arg2; + rmtp = (struct timespec __user *) restart->arg1; if (rmtp) { time = ktime_sub(t.timer.expires, t.timer.base->get_time()); if (time.tv64 <= 0) @@ -718,7 +718,7 @@ static long __sched nanosleep_restart(struct restart_block *restart) return -EFAULT; } - restart->fn = nanosleep_restart; + restart->fn = hrtimer_nanosleep_restart; /* The other values in restart are already filled in */ return -ERESTART_RESTARTBLOCK; @@ -751,11 +751,11 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, } restart = ¤t_thread_info()->restart_block; - restart->fn = nanosleep_restart; - restart->arg0 = t.timer.expires.tv64 & 0xFFFFFFFF; - restart->arg1 = t.timer.expires.tv64 >> 32; - restart->arg2 = (unsigned long) rmtp; - restart->arg3 = (unsigned long) t.timer.base->index; + restart->fn = hrtimer_nanosleep_restart; + restart->arg0 = (unsigned long) t.timer.base->index; + restart->arg1 = (unsigned long) rmtp; + restart->arg2 = t.timer.expires.tv64 & 0xFFFFFFFF; + restart->arg3 = t.timer.expires.tv64 >> 32; return -ERESTART_RESTARTBLOCK; } diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index ac1f850d493..736cb0bd498 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -40,10 +40,6 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip) spin_lock_irqsave(&desc->lock, flags); irq_chip_set_defaults(chip); desc->chip = chip; - /* - * For compatibility only: - */ - desc->chip = chip; spin_unlock_irqrestore(&desc->lock, flags); return 0; @@ -146,7 +142,7 @@ static void default_disable(unsigned int irq) struct irq_desc *desc = irq_desc + irq; if (!(desc->status & IRQ_DELAYED_DISABLE)) - irq_desc[irq].chip->mask(irq); + desc->chip->mask(irq); } /* diff --git a/kernel/kexec.c b/kernel/kexec.c index 50087ecf337..fcdd5d2bc3f 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -40,7 +40,7 @@ struct resource crashk_res = { int kexec_should_crash(struct task_struct *p) { - if (in_interrupt() || !p->pid || p->pid == 1 || panic_on_oops) + if (in_interrupt() || !p->pid || is_init(p) || panic_on_oops) return 1; return 0; } @@ -995,7 +995,8 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, image = xchg(dest_image, image); out: - xchg(&kexec_lock, 0); /* Release the mutex */ + locked = xchg(&kexec_lock, 0); /* Release the mutex */ + BUG_ON(!locked); kimage_free(image); return result; @@ -1061,7 +1062,8 @@ void crash_kexec(struct pt_regs *regs) machine_crash_shutdown(&fixed_regs); machine_kexec(kexec_crash_image); } - xchg(&kexec_lock, 0); + locked = xchg(&kexec_lock, 0); + BUG_ON(!locked); } } diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 64ab045c3d9..5d1d907378a 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -122,6 +122,13 @@ unsigned int __kfifo_put(struct kfifo *fifo, len = min(len, fifo->size - fifo->in + fifo->out); + /* + * Ensure that we sample the fifo->out index -before- we + * start putting bytes into the kfifo. + */ + + smp_mb(); + /* first put the data starting from fifo->in to buffer end */ l = min(len, fifo->size - (fifo->in & (fifo->size - 1))); memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l); @@ -129,6 +136,13 @@ unsigned int __kfifo_put(struct kfifo *fifo, /* then put the rest (if any) at the beginning of the buffer */ memcpy(fifo->buffer, buffer + l, len - l); + /* + * Ensure that we add the bytes to the kfifo -before- + * we update the fifo->in index. + */ + + smp_wmb(); + fifo->in += len; return len; @@ -154,6 +168,13 @@ unsigned int __kfifo_get(struct kfifo *fifo, len = min(len, fifo->in - fifo->out); + /* + * Ensure that we sample the fifo->in index -before- we + * start removing bytes from the kfifo. + */ + + smp_rmb(); + /* first get the data from fifo->out until the end of the buffer */ l = min(len, fifo->size - (fifo->out & (fifo->size - 1))); memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l); @@ -161,6 +182,13 @@ unsigned int __kfifo_get(struct kfifo *fifo, /* then get the rest (if any) from the beginning of the buffer */ memcpy(buffer + l, fifo->buffer, len - l); + /* + * Ensure that we remove the bytes from the kfifo -before- + * we update the fifo->out index. + */ + + smp_mb(); + fifo->out += len; return len; diff --git a/kernel/kmod.c b/kernel/kmod.c index 5c470c57fb5..842f8015d7f 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -176,6 +176,8 @@ static int wait_for_helper(void *data) if (pid < 0) { sub_info->retval = pid; } else { + int ret; + /* * Normally it is bogus to call wait4() from in-kernel because * wait4() wants to write the exit code to a userspace address. @@ -185,7 +187,15 @@ static int wait_for_helper(void *data) * * Thus the __user pointer cast is valid here. */ - sys_wait4(pid, (int __user *) &sub_info->retval, 0, NULL); + sys_wait4(pid, (int __user *)&ret, 0, NULL); + + /* + * If ret is 0, either ____call_usermodehelper failed and the + * real error code is already in sub_info->retval or + * sub_info->retval is 0 anyway, so don't mess with it then. + */ + if (ret) + sub_info->retval = ret; } complete(sub_info->complete); diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c088e5542e8..e596525669e 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -36,6 +36,7 @@ #include <linux/stacktrace.h> #include <linux/debug_locks.h> #include <linux/irqflags.h> +#include <linux/utsname.h> #include <asm/sections.h> @@ -121,8 +122,8 @@ static struct list_head chainhash_table[CHAINHASH_SIZE]; * unique. */ #define iterate_chain_key(key1, key2) \ - (((key1) << MAX_LOCKDEP_KEYS_BITS/2) ^ \ - ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS/2)) ^ \ + (((key1) << MAX_LOCKDEP_KEYS_BITS) ^ \ + ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \ (key2)) void lockdep_off(void) @@ -515,6 +516,13 @@ print_circular_bug_entry(struct lock_list *target, unsigned int depth) return 0; } +static void print_kernel_version(void) +{ + printk("%s %.*s\n", system_utsname.release, + (int)strcspn(system_utsname.version, " "), + system_utsname.version); +} + /* * When a circular dependency is detected, print the * header first: @@ -531,6 +539,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth) printk("\n=======================================================\n"); printk( "[ INFO: possible circular locking dependency detected ]\n"); + print_kernel_version(); printk( "-------------------------------------------------------\n"); printk("%s/%d is trying to acquire lock:\n", curr->comm, curr->pid); @@ -712,6 +721,7 @@ print_bad_irq_dependency(struct task_struct *curr, printk("\n======================================================\n"); printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", irqclass, irqclass); + print_kernel_version(); printk( "------------------------------------------------------\n"); printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", curr->comm, curr->pid, @@ -793,6 +803,7 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, printk("\n=============================================\n"); printk( "[ INFO: possible recursive locking detected ]\n"); + print_kernel_version(); printk( "---------------------------------------------\n"); printk("%s/%d is trying to acquire lock:\n", curr->comm, curr->pid); @@ -1375,6 +1386,7 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other, printk("\n=========================================================\n"); printk( "[ INFO: possible irq lock inversion dependency detected ]\n"); + print_kernel_version(); printk( "---------------------------------------------------------\n"); printk("%s/%d just changed the state of lock:\n", curr->comm, curr->pid); @@ -1469,6 +1481,7 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, printk("\n=================================\n"); printk( "[ INFO: inconsistent lock state ]\n"); + print_kernel_version(); printk( "---------------------------------\n"); printk("inconsistent {%s} -> {%s} usage.\n", diff --git a/kernel/module.c b/kernel/module.c index b7fe6e84096..05625d5dc75 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -933,6 +933,15 @@ static ssize_t module_sect_show(struct module_attribute *mattr, return sprintf(buf, "0x%lx\n", sattr->address); } +static void free_sect_attrs(struct module_sect_attrs *sect_attrs) +{ + int section; + + for (section = 0; section < sect_attrs->nsections; section++) + kfree(sect_attrs->attrs[section].name); + kfree(sect_attrs); +} + static void add_sect_attrs(struct module *mod, unsigned int nsect, char *secstrings, Elf_Shdr *sechdrs) { @@ -949,21 +958,26 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, + nloaded * sizeof(sect_attrs->attrs[0]), sizeof(sect_attrs->grp.attrs[0])); size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.attrs[0]); - if (! (sect_attrs = kmalloc(size[0] + size[1], GFP_KERNEL))) + sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL); + if (sect_attrs == NULL) return; /* Setup section attributes. */ sect_attrs->grp.name = "sections"; sect_attrs->grp.attrs = (void *)sect_attrs + size[0]; + sect_attrs->nsections = 0; sattr = §_attrs->attrs[0]; gattr = §_attrs->grp.attrs[0]; for (i = 0; i < nsect; i++) { if (! (sechdrs[i].sh_flags & SHF_ALLOC)) continue; sattr->address = sechdrs[i].sh_addr; - strlcpy(sattr->name, secstrings + sechdrs[i].sh_name, - MODULE_SECT_NAME_LEN); + sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, + GFP_KERNEL); + if (sattr->name == NULL) + goto out; + sect_attrs->nsections++; sattr->mattr.show = module_sect_show; sattr->mattr.store = NULL; sattr->mattr.attr.name = sattr->name; @@ -979,7 +993,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, mod->sect_attrs = sect_attrs; return; out: - kfree(sect_attrs); + free_sect_attrs(sect_attrs); } static void remove_sect_attrs(struct module *mod) @@ -989,13 +1003,13 @@ static void remove_sect_attrs(struct module *mod) &mod->sect_attrs->grp); /* We are positive that no one is using any sect attrs * at this point. Deallocate immediately. */ - kfree(mod->sect_attrs); + free_sect_attrs(mod->sect_attrs); mod->sect_attrs = NULL; } } - #else + static inline void add_sect_attrs(struct module *mod, unsigned int nsect, char *sectstrings, Elf_Shdr *sechdrs) { diff --git a/kernel/panic.c b/kernel/panic.c index 6ceb664fb52..525e365f723 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -21,7 +21,6 @@ #include <linux/debug_locks.h> int panic_on_oops; -int panic_on_unrecovered_nmi; int tainted; static int pause_on_oops; static int pause_on_oops_flag; diff --git a/kernel/params.c b/kernel/params.c index 91aea7aa532..f406655d665 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -547,6 +547,7 @@ static void __init kernel_param_sysfs_setup(const char *name, unsigned int name_skip) { struct module_kobject *mk; + int ret; mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); BUG_ON(!mk); @@ -554,7 +555,8 @@ static void __init kernel_param_sysfs_setup(const char *name, mk->mod = THIS_MODULE; kobj_set_kset_s(mk, module_subsys); kobject_set_name(&mk->kobj, name); - kobject_register(&mk->kobj); + ret = kobject_register(&mk->kobj); + BUG_ON(ret < 0); /* no need to keep the kobject if no parameter is exported */ if (!param_sysfs_setup(mk, kparam, num_params, name_skip)) { @@ -684,13 +686,20 @@ decl_subsys(module, &module_ktype, NULL); */ static int __init param_sysfs_init(void) { - subsystem_register(&module_subsys); + int ret; + + ret = subsystem_register(&module_subsys); + if (ret < 0) { + printk(KERN_WARNING "%s (%d): subsystem_register error: %d\n", + __FILE__, __LINE__, ret); + return ret; + } param_sysfs_builtin(); return 0; } -__initcall(param_sysfs_init); +subsys_initcall(param_sysfs_init); EXPORT_SYMBOL(param_set_byte); EXPORT_SYMBOL(param_get_byte); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index d38d9ec3276..479b16b44f7 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1393,25 +1393,13 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, } } -static long posix_cpu_clock_nanosleep_restart(struct restart_block *); - -int posix_cpu_nsleep(const clockid_t which_clock, int flags, - struct timespec *rqtp, struct timespec __user *rmtp) +static int do_cpu_nanosleep(const clockid_t which_clock, int flags, + struct timespec *rqtp, struct itimerspec *it) { - struct restart_block *restart_block = - ¤t_thread_info()->restart_block; struct k_itimer timer; int error; /* - * Diagnose required errors first. - */ - if (CPUCLOCK_PERTHREAD(which_clock) && - (CPUCLOCK_PID(which_clock) == 0 || - CPUCLOCK_PID(which_clock) == current->pid)) - return -EINVAL; - - /* * Set up a temporary timer and then wait for it to go off. */ memset(&timer, 0, sizeof timer); @@ -1422,11 +1410,12 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags, timer.it_process = current; if (!error) { static struct itimerspec zero_it; - struct itimerspec it = { .it_value = *rqtp, - .it_interval = {} }; + + memset(it, 0, sizeof *it); + it->it_value = *rqtp; spin_lock_irq(&timer.it_lock); - error = posix_cpu_timer_set(&timer, flags, &it, NULL); + error = posix_cpu_timer_set(&timer, flags, it, NULL); if (error) { spin_unlock_irq(&timer.it_lock); return error; @@ -1454,49 +1443,89 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags, * We were interrupted by a signal. */ sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp); - posix_cpu_timer_set(&timer, 0, &zero_it, &it); + posix_cpu_timer_set(&timer, 0, &zero_it, it); spin_unlock_irq(&timer.it_lock); - if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) { + if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) { /* * It actually did fire already. */ return 0; } + error = -ERESTART_RESTARTBLOCK; + } + + return error; +} + +int posix_cpu_nsleep(const clockid_t which_clock, int flags, + struct timespec *rqtp, struct timespec __user *rmtp) +{ + struct restart_block *restart_block = + ¤t_thread_info()->restart_block; + struct itimerspec it; + int error; + + /* + * Diagnose required errors first. + */ + if (CPUCLOCK_PERTHREAD(which_clock) && + (CPUCLOCK_PID(which_clock) == 0 || + CPUCLOCK_PID(which_clock) == current->pid)) + return -EINVAL; + + error = do_cpu_nanosleep(which_clock, flags, rqtp, &it); + + if (error == -ERESTART_RESTARTBLOCK) { + + if (flags & TIMER_ABSTIME) + return -ERESTARTNOHAND; /* - * Report back to the user the time still remaining. - */ - if (rmtp != NULL && !(flags & TIMER_ABSTIME) && - copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) + * Report back to the user the time still remaining. + */ + if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) return -EFAULT; - restart_block->fn = posix_cpu_clock_nanosleep_restart; - /* Caller already set restart_block->arg1 */ + restart_block->fn = posix_cpu_nsleep_restart; restart_block->arg0 = which_clock; restart_block->arg1 = (unsigned long) rmtp; restart_block->arg2 = rqtp->tv_sec; restart_block->arg3 = rqtp->tv_nsec; - - error = -ERESTART_RESTARTBLOCK; } - return error; } -static long -posix_cpu_clock_nanosleep_restart(struct restart_block *restart_block) +long posix_cpu_nsleep_restart(struct restart_block *restart_block) { clockid_t which_clock = restart_block->arg0; struct timespec __user *rmtp; struct timespec t; + struct itimerspec it; + int error; rmtp = (struct timespec __user *) restart_block->arg1; t.tv_sec = restart_block->arg2; t.tv_nsec = restart_block->arg3; restart_block->fn = do_no_restart_syscall; - return posix_cpu_nsleep(which_clock, TIMER_ABSTIME, &t, rmtp); + error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it); + + if (error == -ERESTART_RESTARTBLOCK) { + /* + * Report back to the user the time still remaining. + */ + if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) + return -EFAULT; + + restart_block->fn = posix_cpu_nsleep_restart; + restart_block->arg0 = which_clock; + restart_block->arg1 = (unsigned long) rmtp; + restart_block->arg2 = t.tv_sec; + restart_block->arg3 = t.tv_nsec; + } + return error; + } @@ -1524,6 +1553,10 @@ static int process_cpu_nsleep(const clockid_t which_clock, int flags, { return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp); } +static long process_cpu_nsleep_restart(struct restart_block *restart_block) +{ + return -EINVAL; +} static int thread_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) { @@ -1544,6 +1577,10 @@ static int thread_cpu_nsleep(const clockid_t which_clock, int flags, { return -EINVAL; } +static long thread_cpu_nsleep_restart(struct restart_block *restart_block) +{ + return -EINVAL; +} static __init int init_posix_cpu_timers(void) { @@ -1553,6 +1590,7 @@ static __init int init_posix_cpu_timers(void) .clock_set = do_posix_clock_nosettime, .timer_create = process_cpu_timer_create, .nsleep = process_cpu_nsleep, + .nsleep_restart = process_cpu_nsleep_restart, }; struct k_clock thread = { .clock_getres = thread_cpu_clock_getres, @@ -1560,6 +1598,7 @@ static __init int init_posix_cpu_timers(void) .clock_set = do_posix_clock_nosettime, .timer_create = thread_cpu_timer_create, .nsleep = thread_cpu_nsleep, + .nsleep_restart = thread_cpu_nsleep_restart, }; register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process); diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index ac6dc874442..e5ebcc1ec3a 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -973,3 +973,24 @@ sys_clock_nanosleep(const clockid_t which_clock, int flags, return CLOCK_DISPATCH(which_clock, nsleep, (which_clock, flags, &t, rmtp)); } + +/* + * nanosleep_restart for monotonic and realtime clocks + */ +static int common_nsleep_restart(struct restart_block *restart_block) +{ + return hrtimer_nanosleep_restart(restart_block); +} + +/* + * This will restart clock_nanosleep. This is required only by + * compat_clock_nanosleep_restart for now. + */ +long +clock_nanosleep_restart(struct restart_block *restart_block) +{ + clockid_t which_clock = restart_block->arg0; + + return CLOCK_DISPATCH(which_clock, nsleep_restart, + (restart_block)); +} diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8aad0331d82..4d50e06fd74 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -440,6 +440,7 @@ struct task_struct *ptrace_get_task_struct(pid_t pid) child = find_task_by_pid(pid); if (child) get_task_struct(child); + read_unlock(&tasklist_lock); if (!child) return ERR_PTR(-ESRCH); diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 4d1c3d24712..4f2c4272d59 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -192,13 +192,13 @@ static struct rcu_torture_ops *cur_ops = NULL; * Definitions for rcu torture testing. */ -static int rcu_torture_read_lock(void) +static int rcu_torture_read_lock(void) __acquires(RCU) { rcu_read_lock(); return 0; } -static void rcu_torture_read_unlock(int idx) +static void rcu_torture_read_unlock(int idx) __releases(RCU) { rcu_read_unlock(); } @@ -250,13 +250,13 @@ static struct rcu_torture_ops rcu_ops = { * Definitions for rcu_bh torture testing. */ -static int rcu_bh_torture_read_lock(void) +static int rcu_bh_torture_read_lock(void) __acquires(RCU_BH) { rcu_read_lock_bh(); return 0; } -static void rcu_bh_torture_read_unlock(int idx) +static void rcu_bh_torture_read_unlock(int idx) __releases(RCU_BH) { rcu_read_unlock_bh(); } diff --git a/kernel/relay.c b/kernel/relay.c index 85786ff2a4f..1d63ecddfa7 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -95,7 +95,7 @@ int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) * @buf: the buffer struct * @size: total size of the buffer * - * Returns a pointer to the resulting buffer, NULL if unsuccessful. The + * Returns a pointer to the resulting buffer, %NULL if unsuccessful. The * passed in size will get page aligned, if it isn't already. */ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size) @@ -132,10 +132,9 @@ depopulate: /** * relay_create_buf - allocate and initialize a channel buffer - * @alloc_size: size of the buffer to allocate - * @n_subbufs: number of sub-buffers in the channel + * @chan: the relay channel * - * Returns channel buffer if successful, NULL otherwise + * Returns channel buffer if successful, %NULL otherwise. */ struct rchan_buf *relay_create_buf(struct rchan *chan) { @@ -163,6 +162,7 @@ free_buf: /** * relay_destroy_channel - free the channel struct + * @kref: target kernel reference that contains the relay channel * * Should only be called from kref_put(). */ @@ -194,6 +194,7 @@ void relay_destroy_buf(struct rchan_buf *buf) /** * relay_remove_buf - remove a channel buffer + * @kref: target kernel reference that contains the relay buffer * * Removes the file from the fileystem, which also frees the * rchan_buf_struct and the channel buffer. Should only be called from @@ -374,7 +375,7 @@ void relay_reset(struct rchan *chan) } EXPORT_SYMBOL_GPL(relay_reset); -/** +/* * relay_open_buf - create a new relay channel buffer * * Internal - used by relay_open(). @@ -448,12 +449,12 @@ static inline void setup_callbacks(struct rchan *chan, /** * relay_open - create a new relay channel * @base_filename: base name of files to create - * @parent: dentry of parent directory, NULL for root directory + * @parent: dentry of parent directory, %NULL for root directory * @subbuf_size: size of sub-buffers * @n_subbufs: number of sub-buffers * @cb: client callback functions * - * Returns channel pointer if successful, NULL otherwise. + * Returns channel pointer if successful, %NULL otherwise. * * Creates a channel buffer for each cpu using the sizes and * attributes specified. The created channel buffer files @@ -585,7 +586,7 @@ EXPORT_SYMBOL_GPL(relay_switch_subbuf); * subbufs_consumed should be the number of sub-buffers newly consumed, * not the total consumed. * - * NOTE: kernel clients don't need to call this function if the channel + * NOTE: Kernel clients don't need to call this function if the channel * mode is 'overwrite'. */ void relay_subbufs_consumed(struct rchan *chan, @@ -641,7 +642,7 @@ EXPORT_SYMBOL_GPL(relay_close); * relay_flush - close the channel * @chan: the channel * - * Flushes all channel buffers i.e. forces buffer switch. + * Flushes all channel buffers, i.e. forces buffer switch. */ void relay_flush(struct rchan *chan) { @@ -729,7 +730,7 @@ static int relay_file_release(struct inode *inode, struct file *filp) return 0; } -/** +/* * relay_file_read_consume - update the consumed count for the buffer */ static void relay_file_read_consume(struct rchan_buf *buf, @@ -756,7 +757,7 @@ static void relay_file_read_consume(struct rchan_buf *buf, } } -/** +/* * relay_file_read_avail - boolean, are there unconsumed bytes available? */ static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos) @@ -793,6 +794,8 @@ static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos) /** * relay_file_read_subbuf_avail - return bytes available in sub-buffer + * @read_pos: file read position + * @buf: relay channel buffer */ static size_t relay_file_read_subbuf_avail(size_t read_pos, struct rchan_buf *buf) @@ -818,6 +821,8 @@ static size_t relay_file_read_subbuf_avail(size_t read_pos, /** * relay_file_read_start_pos - find the first available byte to read + * @read_pos: file read position + * @buf: relay channel buffer * * If the read_pos is in the middle of padding, return the * position of the first actually available byte, otherwise @@ -844,6 +849,9 @@ static size_t relay_file_read_start_pos(size_t read_pos, /** * relay_file_read_end_pos - return the new read position + * @read_pos: file read position + * @buf: relay channel buffer + * @count: number of bytes to be read */ static size_t relay_file_read_end_pos(struct rchan_buf *buf, size_t read_pos, @@ -865,7 +873,7 @@ static size_t relay_file_read_end_pos(struct rchan_buf *buf, return end_pos; } -/** +/* * subbuf_read_actor - read up to one subbuf's worth of data */ static int subbuf_read_actor(size_t read_start, @@ -890,7 +898,7 @@ static int subbuf_read_actor(size_t read_start, return ret; } -/** +/* * subbuf_send_actor - send up to one subbuf's worth of data */ static int subbuf_send_actor(size_t read_start, @@ -933,7 +941,7 @@ typedef int (*subbuf_actor_t) (size_t read_start, read_descriptor_t *desc, read_actor_t actor); -/** +/* * relay_file_read_subbufs - read count bytes, bridging subbuf boundaries */ static inline ssize_t relay_file_read_subbufs(struct file *filp, diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 3e13a1e5856..4ab17da46fd 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -251,6 +251,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, /* Grab the next task */ task = rt_mutex_owner(lock); + get_task_struct(task); spin_lock_irqsave(&task->pi_lock, flags); if (waiter == rt_mutex_top_waiter(lock)) { @@ -269,7 +270,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, __rt_mutex_adjust_prio(task); } - get_task_struct(task); spin_unlock_irqrestore(&task->pi_lock, flags); top_waiter = rt_mutex_top_waiter(lock); @@ -409,7 +409,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex_waiter *top_waiter = waiter; unsigned long flags; - int boost = 0, res; + int chain_walk = 0, res; spin_lock_irqsave(¤t->pi_lock, flags); __rt_mutex_adjust_prio(current); @@ -433,25 +433,23 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, plist_add(&waiter->pi_list_entry, &owner->pi_waiters); __rt_mutex_adjust_prio(owner); - if (owner->pi_blocked_on) { - boost = 1; - /* gets dropped in rt_mutex_adjust_prio_chain()! */ - get_task_struct(owner); - } - spin_unlock_irqrestore(&owner->pi_lock, flags); - } - else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) { - spin_lock_irqsave(&owner->pi_lock, flags); - if (owner->pi_blocked_on) { - boost = 1; - /* gets dropped in rt_mutex_adjust_prio_chain()! */ - get_task_struct(owner); - } + if (owner->pi_blocked_on) + chain_walk = 1; spin_unlock_irqrestore(&owner->pi_lock, flags); } - if (!boost) + else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) + chain_walk = 1; + + if (!chain_walk) return 0; + /* + * The owner can't disappear while holding a lock, + * so the owner struct is protected by wait_lock. + * Gets dropped in rt_mutex_adjust_prio_chain()! + */ + get_task_struct(owner); + spin_unlock(&lock->wait_lock); res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, @@ -532,7 +530,7 @@ static void remove_waiter(struct rt_mutex *lock, int first = (waiter == rt_mutex_top_waiter(lock)); struct task_struct *owner = rt_mutex_owner(lock); unsigned long flags; - int boost = 0; + int chain_walk = 0; spin_lock_irqsave(¤t->pi_lock, flags); plist_del(&waiter->list_entry, &lock->wait_list); @@ -554,19 +552,20 @@ static void remove_waiter(struct rt_mutex *lock, } __rt_mutex_adjust_prio(owner); - if (owner->pi_blocked_on) { - boost = 1; - /* gets dropped in rt_mutex_adjust_prio_chain()! */ - get_task_struct(owner); - } + if (owner->pi_blocked_on) + chain_walk = 1; + spin_unlock_irqrestore(&owner->pi_lock, flags); } WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); - if (!boost) + if (!chain_walk) return; + /* gets dropped in rt_mutex_adjust_prio_chain()! */ + get_task_struct(owner); + spin_unlock(&lock->wait_lock); rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); @@ -592,10 +591,10 @@ void rt_mutex_adjust_pi(struct task_struct *task) return; } - /* gets dropped in rt_mutex_adjust_prio_chain()! */ - get_task_struct(task); spin_unlock_irqrestore(&task->pi_lock, flags); + /* gets dropped in rt_mutex_adjust_prio_chain()! */ + get_task_struct(task); rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); } diff --git a/kernel/sched.c b/kernel/sched.c index 5c848fd4e46..74f169ac077 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1755,27 +1755,27 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) __releases(rq->lock) { struct mm_struct *mm = rq->prev_mm; - unsigned long prev_task_flags; + long prev_state; rq->prev_mm = NULL; /* * A task struct has one reference for the use as "current". - * If a task dies, then it sets EXIT_ZOMBIE in tsk->exit_state and - * calls schedule one last time. The schedule call will never return, - * and the scheduled task must drop that reference. - * The test for EXIT_ZOMBIE must occur while the runqueue locks are + * If a task dies, then it sets TASK_DEAD in tsk->state and calls + * schedule one last time. The schedule call will never return, and + * the scheduled task must drop that reference. + * The test for TASK_DEAD must occur while the runqueue locks are * still held, otherwise prev could be scheduled on another cpu, die * there before we look at prev->state, and then the reference would * be dropped twice. * Manfred Spraul <manfred@colorfullife.com> */ - prev_task_flags = prev->flags; + prev_state = prev->state; finish_arch_switch(prev); finish_lock_switch(rq, prev); if (mm) mmdrop(mm); - if (unlikely(prev_task_flags & PF_DEAD)) { + if (unlikely(prev_state == TASK_DEAD)) { /* * Remove function-return probe instances associated with this * task and put them back on the free list. @@ -3348,9 +3348,6 @@ need_resched_nonpreemptible: spin_lock_irq(&rq->lock); - if (unlikely(prev->flags & PF_DEAD)) - prev->state = EXIT_DEAD; - switch_count = &prev->nivcsw; if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { switch_count = &prev->nvcsw; @@ -4080,6 +4077,8 @@ static void __setscheduler(struct task_struct *p, int policy, int prio) * @p: the task in question. * @policy: new policy. * @param: structure containing the new RT priority. + * + * NOTE: the task may be already dead */ int sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param) @@ -4107,28 +4106,32 @@ recheck: (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) return -EINVAL; - if ((policy == SCHED_NORMAL || policy == SCHED_BATCH) - != (param->sched_priority == 0)) + if (is_rt_policy(policy) != (param->sched_priority != 0)) return -EINVAL; /* * Allow unprivileged RT tasks to decrease priority: */ if (!capable(CAP_SYS_NICE)) { - /* - * can't change policy, except between SCHED_NORMAL - * and SCHED_BATCH: - */ - if (((policy != SCHED_NORMAL && p->policy != SCHED_BATCH) && - (policy != SCHED_BATCH && p->policy != SCHED_NORMAL)) && - !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) - return -EPERM; - /* can't increase priority */ - if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) && - param->sched_priority > p->rt_priority && - param->sched_priority > - p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) - return -EPERM; + if (is_rt_policy(policy)) { + unsigned long rlim_rtprio; + unsigned long flags; + + if (!lock_task_sighand(p, &flags)) + return -ESRCH; + rlim_rtprio = p->signal->rlim[RLIMIT_RTPRIO].rlim_cur; + unlock_task_sighand(p, &flags); + + /* can't set/change the rt policy */ + if (policy != p->policy && !rlim_rtprio) + return -EPERM; + + /* can't increase priority */ + if (param->sched_priority > p->rt_priority && + param->sched_priority > rlim_rtprio) + return -EPERM; + } + /* can't change other user's priorities */ if ((current->euid != p->euid) && (current->euid != p->uid)) @@ -4193,14 +4196,13 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) return -EINVAL; if (copy_from_user(&lparam, param, sizeof(struct sched_param))) return -EFAULT; - read_lock_irq(&tasklist_lock); + + rcu_read_lock(); + retval = -ESRCH; p = find_process_by_pid(pid); - if (!p) { - read_unlock_irq(&tasklist_lock); - return -ESRCH; - } - retval = sched_setscheduler(p, policy, &lparam); - read_unlock_irq(&tasklist_lock); + if (p != NULL) + retval = sched_setscheduler(p, policy, &lparam); + rcu_read_unlock(); return retval; } @@ -5151,7 +5153,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD); /* Cannot have done final schedule yet: would have vanished. */ - BUG_ON(p->flags & PF_DEAD); + BUG_ON(p->state == TASK_DEAD); get_task_struct(p); @@ -5272,9 +5274,11 @@ static struct notifier_block __cpuinitdata migration_notifier = { int __init migration_init(void) { void *cpu = (void *)(long)smp_processor_id(); + int err; /* Start one for the boot CPU: */ - migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); + err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); + BUG_ON(err == NOTIFY_BAD); migration_call(&migration_notifier, CPU_ONLINE, cpu); register_cpu_notifier(&migration_notifier); diff --git a/kernel/signal.c b/kernel/signal.c index 05853a7337e..fb5da6d19f1 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -417,9 +417,8 @@ static int collect_signal(int sig, struct sigpending *list, siginfo_t *info) static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, siginfo_t *info) { - int sig = 0; + int sig = next_signal(pending, mask); - sig = next_signal(pending, mask); if (sig) { if (current->notifier) { if (sigismember(current->notifier_mask, sig)) { @@ -432,9 +431,7 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, if (!collect_signal(sig, pending, info)) sig = 0; - } - recalc_sigpending(); return sig; } @@ -451,6 +448,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) if (!signr) signr = __dequeue_signal(&tsk->signal->shared_pending, mask, info); + recalc_sigpending_tsk(tsk); if (signr && unlikely(sig_kernel_stop(signr))) { /* * Set a marker that we have dequeued a stop signal. Our diff --git a/kernel/softirq.c b/kernel/softirq.c index 3789ca98197..bf25015dce1 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -612,7 +612,9 @@ static struct notifier_block __cpuinitdata cpu_nfb = { __init int spawn_ksoftirqd(void) { void *cpu = (void *)(long)smp_processor_id(); - cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); + int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); + + BUG_ON(err == NOTIFY_BAD); cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); register_cpu_notifier(&cpu_nfb); return 0; diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 03e6a2b0b78..50afeb81330 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -149,8 +149,9 @@ static struct notifier_block __cpuinitdata cpu_nfb = { __init void spawn_softlockup_task(void) { void *cpu = (void *)(long)smp_processor_id(); + int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); - cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); + BUG_ON(err == NOTIFY_BAD); cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); register_cpu_notifier(&cpu_nfb); diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 9644a41e0be..d48143eafbf 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -21,17 +21,6 @@ #include <linux/debug_locks.h> #include <linux/module.h> -/* - * Generic declaration of the raw read_trylock() function, - * architectures are supposed to optimize this: - */ -int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock) -{ - __raw_read_lock(lock); - return 1; -} -EXPORT_SYMBOL(generic__raw_read_trylock); - int __lockfunc _spin_trylock(spinlock_t *lock) { preempt_disable(); diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 51cacd111db..12458040e66 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -1,3 +1,6 @@ +/* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation. + * GPL v2 and any later version. + */ #include <linux/stop_machine.h> #include <linux/kthread.h> #include <linux/sched.h> diff --git a/kernel/sys.c b/kernel/sys.c index 3f894775488..b88806c6624 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -612,7 +612,6 @@ void kernel_restart(char *cmd) } else { printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); } - printk(".\n"); machine_restart(cmd); } EXPORT_SYMBOL_GPL(kernel_restart); @@ -2084,12 +2083,12 @@ asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, * padding */ unsigned long t0, t1; - get_user(t0, &cache->t0); - get_user(t1, &cache->t1); + get_user(t0, &cache->blob[0]); + get_user(t1, &cache->blob[1]); t0++; t1++; - put_user(t0, &cache->t0); - put_user(t1, &cache->t1); + put_user(t0, &cache->blob[0]); + put_user(t1, &cache->blob[1]); } return err ? -EFAULT : 0; } diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 6991bece67e..7a3b2e75f04 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -134,3 +134,8 @@ cond_syscall(sys_madvise); cond_syscall(sys_mremap); cond_syscall(sys_remap_file_pages); cond_syscall(compat_sys_move_pages); + +/* block-layer dependent */ +cond_syscall(sys_bdflush); +cond_syscall(sys_ioprio_set); +cond_syscall(sys_ioprio_get); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8bfa7d117c5..c57c4532e29 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -52,6 +52,10 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); +#ifdef CONFIG_X86 +#include <asm/nmi.h> +#endif + #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ @@ -74,13 +78,6 @@ extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; extern int compat_log; -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) -int unknown_nmi_panic; -int nmi_watchdog_enabled; -extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, - void __user *, size_t *, loff_t *); -#endif - /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; static int minolduid; @@ -1915,7 +1912,7 @@ int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, return -EPERM; } - op = (current->pid == 1) ? OP_SET : OP_AND; + op = is_init(current) ? OP_SET : OP_AND; return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, do_proc_dointvec_bset_conv,&op); } diff --git a/kernel/timer.c b/kernel/timer.c index 1d7dd6267c2..4f55622b0d3 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -136,7 +136,7 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) list_add_tail(&timer->entry, vec); } -/*** +/** * init_timer - initialize a timer. * @timer: the timer to be initialized * @@ -175,6 +175,7 @@ static inline void detach_timer(struct timer_list *timer, */ static tvec_base_t *lock_timer_base(struct timer_list *timer, unsigned long *flags) + __acquires(timer->base->lock) { tvec_base_t *base; @@ -235,7 +236,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) EXPORT_SYMBOL(__mod_timer); -/*** +/** * add_timer_on - start a timer on a particular CPU * @timer: the timer to be added * @cpu: the CPU to start it on @@ -255,9 +256,10 @@ void add_timer_on(struct timer_list *timer, int cpu) } -/*** +/** * mod_timer - modify a timer's timeout * @timer: the timer to be modified + * @expires: new timeout in jiffies * * mod_timer is a more efficient way to update the expire field of an * active timer (if the timer is inactive it will be activated) @@ -291,7 +293,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires) EXPORT_SYMBOL(mod_timer); -/*** +/** * del_timer - deactive a timer. * @timer: the timer to be deactivated * @@ -323,7 +325,10 @@ int del_timer(struct timer_list *timer) EXPORT_SYMBOL(del_timer); #ifdef CONFIG_SMP -/* +/** + * try_to_del_timer_sync - Try to deactivate a timer + * @timer: timer do del + * * This function tries to deactivate a timer. Upon successful (ret >= 0) * exit the timer is not queued and the handler is not running on any CPU. * @@ -351,7 +356,7 @@ out: return ret; } -/*** +/** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated * @@ -401,15 +406,15 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index) return index; } -/*** +#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) + +/** * __run_timers - run all expired timers (if any) on this CPU. * @base: the timer vector to be processed. * * This function cascades all vectors and executes all expired timer * vectors. */ -#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) - static inline void __run_timers(tvec_base_t *base) { struct timer_list *timer; @@ -970,7 +975,7 @@ void __init timekeeping_init(void) static int timekeeping_suspended; -/* +/** * timekeeping_resume - Resumes the generic timekeeping subsystem. * @dev: unused * @@ -1106,7 +1111,7 @@ static void clocksource_adjust(struct clocksource *clock, s64 offset) clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift); } -/* +/** * update_wall_time - Uses the current clocksource to increment the wall time * * Called from the timer interrupt, must hold a write on xtime_lock. @@ -1217,10 +1222,8 @@ static inline void calc_load(unsigned long ticks) unsigned long active_tasks; /* fixed-point */ static int count = LOAD_FREQ; - count -= ticks; - if (count < 0) { - count += LOAD_FREQ; - active_tasks = count_active_tasks(); + active_tasks = count_active_tasks(); + for (count -= ticks; count < 0; count += LOAD_FREQ) { CALC_LOAD(avenrun[0], EXP_1, active_tasks); CALC_LOAD(avenrun[1], EXP_5, active_tasks); CALC_LOAD(avenrun[2], EXP_15, active_tasks); @@ -1265,11 +1268,8 @@ void run_local_timers(void) * Called by the timer interrupt. xtime_lock must already be taken * by the timer IRQ! */ -static inline void update_times(void) +static inline void update_times(unsigned long ticks) { - unsigned long ticks; - - ticks = jiffies - wall_jiffies; wall_jiffies += ticks; update_wall_time(); calc_load(ticks); @@ -1281,12 +1281,10 @@ static inline void update_times(void) * jiffies is defined in the linker script... */ -void do_timer(struct pt_regs *regs) +void do_timer(unsigned long ticks) { - jiffies_64++; - /* prevent loading jiffies before storing new jiffies_64 value. */ - barrier(); - update_times(); + jiffies_64 += ticks; + update_times(ticks); } #ifdef __ARCH_WANT_SYS_ALARM @@ -1470,8 +1468,9 @@ asmlinkage long sys_gettid(void) return current->pid; } -/* +/** * sys_sysinfo - fill in sysinfo struct + * @info: pointer to buffer to fill */ asmlinkage long sys_sysinfo(struct sysinfo __user *info) { @@ -1688,8 +1687,10 @@ static struct notifier_block __cpuinitdata timers_nb = { void __init init_timers(void) { - timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, + int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); + + BUG_ON(err == NOTIFY_BAD); register_cpu_notifier(&timers_nb); open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); } diff --git a/kernel/unwind.c b/kernel/unwind.c index 3430475fcd8..2e2368607aa 100644 --- a/kernel/unwind.c +++ b/kernel/unwind.c @@ -102,7 +102,7 @@ static struct unwind_table { unsigned long size; struct unwind_table *link; const char *name; -} root_table, *last_table; +} root_table; struct unwind_item { enum item_location { @@ -174,6 +174,8 @@ void __init unwind_init(void) #ifdef CONFIG_MODULES +static struct unwind_table *last_table; + /* Must be called with module_mutex held. */ void *unwind_add_table(struct module *module, const void *table_start, |