aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/capability.c16
-rw-r--r--kernel/cpu.c3
-rw-r--r--kernel/cpuset.c152
-rw-r--r--kernel/exec_domain.c1
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/irq/Makefile3
-rw-r--r--kernel/irq/manage.c23
-rw-r--r--kernel/irq/migration.c65
-rw-r--r--kernel/itimer.c103
-rw-r--r--kernel/ksysfs.c4
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/module.c202
-rw-r--r--kernel/params.c12
-rw-r--r--kernel/power/smp.c4
-rw-r--r--kernel/printk.c76
-rw-r--r--kernel/rcupdate.c5
-rw-r--r--kernel/rcutorture.c33
-rw-r--r--kernel/softlockup.c55
-rw-r--r--kernel/sys.c68
-rw-r--r--kernel/sysctl.c19
-rw-r--r--kernel/time.c59
-rw-r--r--kernel/timer.c74
-rw-r--r--kernel/user.c10
23 files changed, 549 insertions, 441 deletions
diff --git a/kernel/capability.c b/kernel/capability.c
index bfa3c92e16f..1a4d8a40d3f 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -233,3 +233,19 @@ out:
return ret;
}
+
+int __capable(struct task_struct *t, int cap)
+{
+ if (security_capable(t, cap) == 0) {
+ t->flags |= PF_SUPERPRIV;
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(__capable);
+
+int capable(int cap)
+{
+ return __capable(current, cap);
+}
+EXPORT_SYMBOL(capable);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e882c6babf4..8be22bd8093 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -223,8 +223,7 @@ int __devinit cpu_up(unsigned int cpu)
ret = __cpu_up(cpu);
if (ret != 0)
goto out_notify;
- if (!cpu_online(cpu))
- BUG();
+ BUG_ON(!cpu_online(cpu));
/* Now call notifier in preparation. */
notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index c86ee051b73..18aea1bd128 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -4,15 +4,14 @@
* Processor and Memory placement constraints for sets of tasks.
*
* Copyright (C) 2003 BULL SA.
- * Copyright (C) 2004 Silicon Graphics, Inc.
+ * Copyright (C) 2004-2006 Silicon Graphics, Inc.
*
* Portions derived from Patrick Mochel's sysfs code.
* sysfs is Copyright (c) 2001-3 Patrick Mochel
- * Portions Copyright (c) 2004 Silicon Graphics, Inc.
*
- * 2003-10-10 Written by Simon Derr <simon.derr@bull.net>
+ * 2003-10-10 Written by Simon Derr.
* 2003-10-22 Updates by Stephen Hemminger.
- * 2004 May-July Rework by Paul Jackson <pj@sgi.com>
+ * 2004 May-July Rework by Paul Jackson.
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
@@ -108,37 +107,49 @@ typedef enum {
CS_MEM_EXCLUSIVE,
CS_MEMORY_MIGRATE,
CS_REMOVED,
- CS_NOTIFY_ON_RELEASE
+ CS_NOTIFY_ON_RELEASE,
+ CS_SPREAD_PAGE,
+ CS_SPREAD_SLAB,
} cpuset_flagbits_t;
/* convenient tests for these bits */
static inline int is_cpu_exclusive(const struct cpuset *cs)
{
- return !!test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
+ return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
}
static inline int is_mem_exclusive(const struct cpuset *cs)
{
- return !!test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
+ return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
}
static inline int is_removed(const struct cpuset *cs)
{
- return !!test_bit(CS_REMOVED, &cs->flags);
+ return test_bit(CS_REMOVED, &cs->flags);
}
static inline int notify_on_release(const struct cpuset *cs)
{
- return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
+ return test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
}
static inline int is_memory_migrate(const struct cpuset *cs)
{
- return !!test_bit(CS_MEMORY_MIGRATE, &cs->flags);
+ return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
+}
+
+static inline int is_spread_page(const struct cpuset *cs)
+{
+ return test_bit(CS_SPREAD_PAGE, &cs->flags);
+}
+
+static inline int is_spread_slab(const struct cpuset *cs)
+{
+ return test_bit(CS_SPREAD_SLAB, &cs->flags);
}
/*
- * Increment this atomic integer everytime any cpuset changes its
+ * Increment this integer everytime any cpuset changes its
* mems_allowed value. Users of cpusets can track this generation
* number, and avoid having to lock and reload mems_allowed unless
* the cpuset they're using changes generation.
@@ -152,8 +163,11 @@ static inline int is_memory_migrate(const struct cpuset *cs)
* on every visit to __alloc_pages(), to efficiently check whether
* its current->cpuset->mems_allowed has changed, requiring an update
* of its current->mems_allowed.
+ *
+ * Since cpuset_mems_generation is guarded by manage_mutex,
+ * there is no need to mark it atomic.
*/
-static atomic_t cpuset_mems_generation = ATOMIC_INIT(1);
+static int cpuset_mems_generation;
static struct cpuset top_cpuset = {
.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
@@ -657,6 +671,14 @@ void cpuset_update_task_memory_state(void)
cs = tsk->cpuset; /* Maybe changed when task not locked */
guarantee_online_mems(cs, &tsk->mems_allowed);
tsk->cpuset_mems_generation = cs->mems_generation;
+ if (is_spread_page(cs))
+ tsk->flags |= PF_SPREAD_PAGE;
+ else
+ tsk->flags &= ~PF_SPREAD_PAGE;
+ if (is_spread_slab(cs))
+ tsk->flags |= PF_SPREAD_SLAB;
+ else
+ tsk->flags &= ~PF_SPREAD_SLAB;
task_unlock(tsk);
mutex_unlock(&callback_mutex);
mpol_rebind_task(tsk, &tsk->mems_allowed);
@@ -858,8 +880,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
mutex_lock(&callback_mutex);
cs->mems_allowed = trialcs.mems_allowed;
- atomic_inc(&cpuset_mems_generation);
- cs->mems_generation = atomic_read(&cpuset_mems_generation);
+ cs->mems_generation = cpuset_mems_generation++;
mutex_unlock(&callback_mutex);
set_cpuset_being_rebound(cs); /* causes mpol_copy() rebind */
@@ -957,7 +978,8 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
/*
* update_flag - read a 0 or a 1 in a file and update associated flag
* bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
- * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE)
+ * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE,
+ * CS_SPREAD_PAGE, CS_SPREAD_SLAB)
* cs: the cpuset to update
* buf: the buffer where we read the 0 or 1
*
@@ -1188,6 +1210,8 @@ typedef enum {
FILE_NOTIFY_ON_RELEASE,
FILE_MEMORY_PRESSURE_ENABLED,
FILE_MEMORY_PRESSURE,
+ FILE_SPREAD_PAGE,
+ FILE_SPREAD_SLAB,
FILE_TASKLIST,
} cpuset_filetype_t;
@@ -1247,6 +1271,14 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
case FILE_MEMORY_PRESSURE:
retval = -EACCES;
break;
+ case FILE_SPREAD_PAGE:
+ retval = update_flag(CS_SPREAD_PAGE, cs, buffer);
+ cs->mems_generation = cpuset_mems_generation++;
+ break;
+ case FILE_SPREAD_SLAB:
+ retval = update_flag(CS_SPREAD_SLAB, cs, buffer);
+ cs->mems_generation = cpuset_mems_generation++;
+ break;
case FILE_TASKLIST:
retval = attach_task(cs, buffer, &pathbuf);
break;
@@ -1356,6 +1388,12 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
case FILE_MEMORY_PRESSURE:
s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter));
break;
+ case FILE_SPREAD_PAGE:
+ *s++ = is_spread_page(cs) ? '1' : '0';
+ break;
+ case FILE_SPREAD_SLAB:
+ *s++ = is_spread_slab(cs) ? '1' : '0';
+ break;
default:
retval = -EINVAL;
goto out;
@@ -1719,6 +1757,16 @@ static struct cftype cft_memory_pressure = {
.private = FILE_MEMORY_PRESSURE,
};
+static struct cftype cft_spread_page = {
+ .name = "memory_spread_page",
+ .private = FILE_SPREAD_PAGE,
+};
+
+static struct cftype cft_spread_slab = {
+ .name = "memory_spread_slab",
+ .private = FILE_SPREAD_SLAB,
+};
+
static int cpuset_populate_dir(struct dentry *cs_dentry)
{
int err;
@@ -1737,6 +1785,10 @@ static int cpuset_populate_dir(struct dentry *cs_dentry)
return err;
if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0)
return err;
+ if ((err = cpuset_add_file(cs_dentry, &cft_spread_page)) < 0)
+ return err;
+ if ((err = cpuset_add_file(cs_dentry, &cft_spread_slab)) < 0)
+ return err;
if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
return err;
return 0;
@@ -1765,13 +1817,16 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
cs->flags = 0;
if (notify_on_release(parent))
set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
+ if (is_spread_page(parent))
+ set_bit(CS_SPREAD_PAGE, &cs->flags);
+ if (is_spread_slab(parent))
+ set_bit(CS_SPREAD_SLAB, &cs->flags);
cs->cpus_allowed = CPU_MASK_NONE;
cs->mems_allowed = NODE_MASK_NONE;
atomic_set(&cs->count, 0);
INIT_LIST_HEAD(&cs->sibling);
INIT_LIST_HEAD(&cs->children);
- atomic_inc(&cpuset_mems_generation);
- cs->mems_generation = atomic_read(&cpuset_mems_generation);
+ cs->mems_generation = cpuset_mems_generation++;
fmeter_init(&cs->fmeter);
cs->parent = parent;
@@ -1861,7 +1916,7 @@ int __init cpuset_init_early(void)
struct task_struct *tsk = current;
tsk->cpuset = &top_cpuset;
- tsk->cpuset->mems_generation = atomic_read(&cpuset_mems_generation);
+ tsk->cpuset->mems_generation = cpuset_mems_generation++;
return 0;
}
@@ -1880,8 +1935,7 @@ int __init cpuset_init(void)
top_cpuset.mems_allowed = NODE_MASK_ALL;
fmeter_init(&top_cpuset.fmeter);
- atomic_inc(&cpuset_mems_generation);
- top_cpuset.mems_generation = atomic_read(&cpuset_mems_generation);
+ top_cpuset.mems_generation = cpuset_mems_generation++;
init_task.cpuset = &top_cpuset;
@@ -1972,7 +2026,7 @@ void cpuset_fork(struct task_struct *child)
* because tsk is already marked PF_EXITING, so attach_task() won't
* mess with it, or task is a failed fork, never visible to attach_task.
*
- * Hack:
+ * the_top_cpuset_hack:
*
* Set the exiting tasks cpuset to the root cpuset (top_cpuset).
*
@@ -2011,7 +2065,7 @@ void cpuset_exit(struct task_struct *tsk)
struct cpuset *cs;
cs = tsk->cpuset;
- tsk->cpuset = &top_cpuset; /* Hack - see comment above */
+ tsk->cpuset = &top_cpuset; /* the_top_cpuset_hack - see above */
if (notify_on_release(cs)) {
char *pathbuf = NULL;
@@ -2151,7 +2205,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{
int node; /* node that zone z is on */
const struct cpuset *cs; /* current cpuset ancestors */
- int allowed = 1; /* is allocation in zone z allowed? */
+ int allowed; /* is allocation in zone z allowed? */
if (in_interrupt())
return 1;
@@ -2204,6 +2258,44 @@ void cpuset_unlock(void)
}
/**
+ * cpuset_mem_spread_node() - On which node to begin search for a page
+ *
+ * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for
+ * tasks in a cpuset with is_spread_page or is_spread_slab set),
+ * and if the memory allocation used cpuset_mem_spread_node()
+ * to determine on which node to start looking, as it will for
+ * certain page cache or slab cache pages such as used for file
+ * system buffers and inode caches, then instead of starting on the
+ * local node to look for a free page, rather spread the starting
+ * node around the tasks mems_allowed nodes.
+ *
+ * We don't have to worry about the returned node being offline
+ * because "it can't happen", and even if it did, it would be ok.
+ *
+ * The routines calling guarantee_online_mems() are careful to
+ * only set nodes in task->mems_allowed that are online. So it
+ * should not be possible for the following code to return an
+ * offline node. But if it did, that would be ok, as this routine
+ * is not returning the node where the allocation must be, only
+ * the node where the search should start. The zonelist passed to
+ * __alloc_pages() will include all nodes. If the slab allocator
+ * is passed an offline node, it will fall back to the local node.
+ * See kmem_cache_alloc_node().
+ */
+
+int cpuset_mem_spread_node(void)
+{
+ int node;
+
+ node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed);
+ if (node == MAX_NUMNODES)
+ node = first_node(current->mems_allowed);
+ current->cpuset_mem_spread_rotor = node;
+ return node;
+}
+EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
+
+/**
* cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
* @p: pointer to task_struct of some other task.
*
@@ -2284,12 +2376,12 @@ void __cpuset_memory_pressure_bump(void)
* - No need to task_lock(tsk) on this tsk->cpuset reference, as it
* doesn't really matter if tsk->cpuset changes after we read it,
* and we take manage_mutex, keeping attach_task() from changing it
- * anyway.
+ * anyway. No need to check that tsk->cpuset != NULL, thanks to
+ * the_top_cpuset_hack in cpuset_exit(), which sets an exiting tasks
+ * cpuset to top_cpuset.
*/
-
static int proc_cpuset_show(struct seq_file *m, void *v)
{
- struct cpuset *cs;
struct task_struct *tsk;
char *buf;
int retval = 0;
@@ -2300,13 +2392,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
tsk = m->private;
mutex_lock(&manage_mutex);
- cs = tsk->cpuset;
- if (!cs) {
- retval = -EINVAL;
- goto out;
- }
-
- retval = cpuset_path(cs, buf, PAGE_SIZE);
+ retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE);
if (retval < 0)
goto out;
seq_puts(m, buf);
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 867d6dbeb57..c01cead2cfd 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -140,6 +140,7 @@ __set_personality(u_long personality)
ep = lookup_exec_domain(personality);
if (ep == current_thread_info()->exec_domain) {
current->personality = personality;
+ module_put(ep->module);
return 0;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index c21bae8c93b..a02063903aa 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1021,6 +1021,7 @@ static task_t *copy_process(unsigned long clone_flags,
p->mempolicy = NULL;
goto bad_fork_cleanup_cpuset;
}
+ mpol_fix_fork_child_flag(p);
#endif
#ifdef CONFIG_DEBUG_MUTEXES
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 49378738ff5..2b33f852be3 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -1,5 +1,4 @@
-obj-y := handle.o manage.o spurious.o
+obj-y := handle.o manage.o spurious.o migration.o
obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
obj-$(CONFIG_PROC_FS) += proc.o
-
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 97d5559997d..6edfcef291e 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -204,10 +204,14 @@ int setup_irq(unsigned int irq, struct irqaction * new)
p = &desc->action;
if ((old = *p) != NULL) {
/* Can't share interrupts unless both agree to */
- if (!(old->flags & new->flags & SA_SHIRQ)) {
- spin_unlock_irqrestore(&desc->lock,flags);
- return -EBUSY;
- }
+ if (!(old->flags & new->flags & SA_SHIRQ))
+ goto mismatch;
+
+#if defined(ARCH_HAS_IRQ_PER_CPU) && defined(SA_PERCPU_IRQ)
+ /* All handlers must agree on per-cpuness */
+ if ((old->flags & IRQ_PER_CPU) != (new->flags & IRQ_PER_CPU))
+ goto mismatch;
+#endif
/* add new interrupt at end of irq queue */
do {
@@ -218,7 +222,10 @@ int setup_irq(unsigned int irq, struct irqaction * new)
}
*p = new;
-
+#if defined(ARCH_HAS_IRQ_PER_CPU) && defined(SA_PERCPU_IRQ)
+ if (new->flags & SA_PERCPU_IRQ)
+ desc->status |= IRQ_PER_CPU;
+#endif
if (!shared) {
desc->depth = 0;
desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT |
@@ -236,6 +243,12 @@ int setup_irq(unsigned int irq, struct irqaction * new)
register_handler_proc(irq, new);
return 0;
+
+mismatch:
+ spin_unlock_irqrestore(&desc->lock, flags);
+ printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__);
+ dump_stack();
+ return -EBUSY;
}
/**
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
new file mode 100644
index 00000000000..52a8655fa08
--- /dev/null
+++ b/kernel/irq/migration.c
@@ -0,0 +1,65 @@
+#include <linux/irq.h>
+
+#if defined(CONFIG_GENERIC_PENDING_IRQ)
+
+void set_pending_irq(unsigned int irq, cpumask_t mask)
+{
+ irq_desc_t *desc = irq_desc + irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ desc->move_irq = 1;
+ pending_irq_cpumask[irq] = mask;
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+void move_native_irq(int irq)
+{
+ cpumask_t tmp;
+ irq_desc_t *desc = irq_descp(irq);
+
+ if (likely(!desc->move_irq))
+ return;
+
+ /*
+ * Paranoia: cpu-local interrupts shouldn't be calling in here anyway.
+ */
+ if (CHECK_IRQ_PER_CPU(desc->status)) {
+ WARN_ON(1);
+ return;
+ }
+
+ desc->move_irq = 0;
+
+ if (likely(cpus_empty(pending_irq_cpumask[irq])))
+ return;
+
+ if (!desc->handler->set_affinity)
+ return;
+
+ assert_spin_locked(&desc->lock);
+
+ cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
+
+ /*
+ * If there was a valid mask to work with, please
+ * do the disable, re-program, enable sequence.
+ * This is *not* particularly important for level triggered
+ * but in a edge trigger case, we might be setting rte
+ * when an active trigger is comming in. This could
+ * cause some ioapics to mal-function.
+ * Being paranoid i guess!
+ */
+ if (unlikely(!cpus_empty(tmp))) {
+ if (likely(!(desc->status & IRQ_DISABLED)))
+ desc->handler->disable(irq);
+
+ desc->handler->set_affinity(irq,tmp);
+
+ if (likely(!(desc->status & IRQ_DISABLED)))
+ desc->handler->enable(irq);
+ }
+ cpus_clear(pending_irq_cpumask[irq]);
+}
+
+#endif
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 379be2f8c84..680e6b70c87 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -143,6 +143,60 @@ int it_real_fn(void *data)
return HRTIMER_NORESTART;
}
+/*
+ * We do not care about correctness. We just sanitize the values so
+ * the ktime_t operations which expect normalized values do not
+ * break. This converts negative values to long timeouts similar to
+ * the code in kernel versions < 2.6.16
+ *
+ * Print a limited number of warning messages when an invalid timeval
+ * is detected.
+ */
+static void fixup_timeval(struct timeval *tv, int interval)
+{
+ static int warnlimit = 10;
+ unsigned long tmp;
+
+ if (warnlimit > 0) {
+ warnlimit--;
+ printk(KERN_WARNING
+ "setitimer: %s (pid = %d) provided "
+ "invalid timeval %s: tv_sec = %ld tv_usec = %ld\n",
+ current->comm, current->pid,
+ interval ? "it_interval" : "it_value",
+ tv->tv_sec, (long) tv->tv_usec);
+ }
+
+ tmp = tv->tv_usec;
+ if (tmp >= USEC_PER_SEC) {
+ tv->tv_usec = tmp % USEC_PER_SEC;
+ tv->tv_sec += tmp / USEC_PER_SEC;
+ }
+
+ tmp = tv->tv_sec;
+ if (tmp > LONG_MAX)
+ tv->tv_sec = LONG_MAX;
+}
+
+/*
+ * Returns true if the timeval is in canonical form
+ */
+#define timeval_valid(t) \
+ (((t)->tv_sec >= 0) && (((unsigned long) (t)->tv_usec) < USEC_PER_SEC))
+
+/*
+ * Check for invalid timevals, sanitize them and print a limited
+ * number of warnings.
+ */
+static void check_itimerval(struct itimerval *value) {
+
+ if (unlikely(!timeval_valid(&value->it_value)))
+ fixup_timeval(&value->it_value, 0);
+
+ if (unlikely(!timeval_valid(&value->it_interval)))
+ fixup_timeval(&value->it_interval, 1);
+}
+
int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
{
struct task_struct *tsk = current;
@@ -150,6 +204,18 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
ktime_t expires;
cputime_t cval, cinterval, nval, ninterval;
+ /*
+ * Validate the timevals in value.
+ *
+ * Note: Although the spec requires that invalid values shall
+ * return -EINVAL, we just fixup the value and print a limited
+ * number of warnings in order not to break users of this
+ * historical misfeature.
+ *
+ * Scheduled for replacement in March 2007
+ */
+ check_itimerval(value);
+
switch (which) {
case ITIMER_REAL:
again:
@@ -226,6 +292,43 @@ again:
return 0;
}
+/**
+ * alarm_setitimer - set alarm in seconds
+ *
+ * @seconds: number of seconds until alarm
+ * 0 disables the alarm
+ *
+ * Returns the remaining time in seconds of a pending timer or 0 when
+ * the timer is not active.
+ *
+ * On 32 bit machines the seconds value is limited to (INT_MAX/2) to avoid
+ * negative timeval settings which would cause immediate expiry.
+ */
+unsigned int alarm_setitimer(unsigned int seconds)
+{
+ struct itimerval it_new, it_old;
+
+#if BITS_PER_LONG < 64
+ if (seconds > INT_MAX)
+ seconds = INT_MAX;
+#endif
+ it_new.it_value.tv_sec = seconds;
+ it_new.it_value.tv_usec = 0;
+ it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+
+ do_setitimer(ITIMER_REAL, &it_new, &it_old);
+
+ /*
+ * We can't return 0 if we have an alarm pending ... And we'd
+ * better return too much than too little anyway
+ */
+ if ((!it_old.it_value.tv_sec && it_old.it_value.tv_usec) ||
+ it_old.it_value.tv_usec >= 500000)
+ it_old.it_value.tv_sec++;
+
+ return it_old.it_value.tv_sec;
+}
+
asmlinkage long sys_setitimer(int which,
struct itimerval __user *value,
struct itimerval __user *ovalue)
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index f2690ed7453..f119e098e67 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -22,7 +22,7 @@ static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
static struct subsys_attribute _name##_attr = \
__ATTR(_name, 0644, _name##_show, _name##_store)
-#ifdef CONFIG_HOTPLUG
+#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
/* current uevent sequence number */
static ssize_t uevent_seqnum_show(struct subsystem *subsys, char *page)
{
@@ -52,7 +52,7 @@ decl_subsys(kernel, NULL, NULL);
EXPORT_SYMBOL_GPL(kernel_subsys);
static struct attribute * kernel_attrs[] = {
-#ifdef CONFIG_HOTPLUG
+#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
&uevent_seqnum_attr.attr,
&uevent_helper_attr.attr,
#endif
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 6a5373868a9..c5f3c6613b6 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -115,7 +115,9 @@ static void keventd_create_kthread(void *_create)
create->result = ERR_PTR(pid);
} else {
wait_for_completion(&create->started);
+ read_lock(&tasklist_lock);
create->result = find_task_by_pid(pid);
+ read_unlock(&tasklist_lock);
}
complete(&create->done);
}
diff --git a/kernel/module.c b/kernel/module.c
index fb404299082..ddfe45ac2fd 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -233,24 +233,6 @@ static unsigned long __find_symbol(const char *name,
return 0;
}
-/* Find a symbol in this elf symbol table */
-static unsigned long find_local_symbol(Elf_Shdr *sechdrs,
- unsigned int symindex,
- const char *strtab,
- const char *name)
-{
- unsigned int i;
- Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr;
-
- /* Search (defined) internal symbols first. */
- for (i = 1; i < sechdrs[symindex].sh_size/sizeof(*sym); i++) {
- if (sym[i].st_shndx != SHN_UNDEF
- && strcmp(name, strtab + sym[i].st_name) == 0)
- return sym[i].st_value;
- }
- return 0;
-}
-
/* Search for module by name: must hold module_mutex. */
static struct module *find_module(const char *name)
{
@@ -785,139 +767,6 @@ static struct module_attribute *modinfo_attrs[] = {
NULL,
};
-#ifdef CONFIG_OBSOLETE_MODPARM
-/* Bounds checking done below */
-static int obsparm_copy_string(const char *val, struct kernel_param *kp)
-{
- strcpy(kp->arg, val);
- return 0;
-}
-
-static int set_obsolete(const char *val, struct kernel_param *kp)
-{
- unsigned int min, max;
- unsigned int size, maxsize;
- int dummy;
- char *endp;
- const char *p;
- struct obsolete_modparm *obsparm = kp->arg;
-
- if (!val) {
- printk(KERN_ERR "Parameter %s needs an argument\n", kp->name);
- return -EINVAL;
- }
-
- /* type is: [min[-max]]{b,h,i,l,s} */
- p = obsparm->type;
- min = simple_strtol(p, &endp, 10);
- if (endp == obsparm->type)
- min = max = 1;
- else if (*endp == '-') {
- p = endp+1;
- max = simple_strtol(p, &endp, 10);
- } else
- max = min;
- switch (*endp) {
- case 'b':
- return param_array(kp->name, val, min, max, obsparm->addr,
- 1, param_set_byte, &dummy);
- case 'h':
- return param_array(kp->name, val, min, max, obsparm->addr,
- sizeof(short), param_set_short, &dummy);
- case 'i':
- return param_array(kp->name, val, min, max, obsparm->addr,
- sizeof(int), param_set_int, &dummy);
- case 'l':
- return param_array(kp->name, val, min, max, obsparm->addr,
- sizeof(long), param_set_long, &dummy);
- case 's':
- return param_array(kp->name, val, min, max, obsparm->addr,
- sizeof(char *), param_set_charp, &dummy);
-
- case 'c':
- /* Undocumented: 1-5c50 means 1-5 strings of up to 49 chars,
- and the decl is "char xxx[5][50];" */
- p = endp+1;
- maxsize = simple_strtol(p, &endp, 10);
- /* We check lengths here (yes, this is a hack). */
- p = val;
- while (p[size = strcspn(p, ",")]) {
- if (size >= maxsize)
- goto oversize;
- p += size+1;
- }
- if (size >= maxsize)
- goto oversize;
- return param_array(kp->name, val, min, max, obsparm->addr,
- maxsize, obsparm_copy_string, &dummy);
- }
- printk(KERN_ERR "Unknown obsolete parameter type %s\n", obsparm->type);
- return -EINVAL;
- oversize:
- printk(KERN_ERR
- "Parameter %s doesn't fit in %u chars.\n", kp->name, maxsize);
- return -EINVAL;
-}
-
-static int obsolete_params(const char *name,
- char *args,
- struct obsolete_modparm obsparm[],
- unsigned int num,
- Elf_Shdr *sechdrs,
- unsigned int symindex,
- const char *strtab)
-{
- struct kernel_param *kp;
- unsigned int i;
- int ret;
-
- kp = kmalloc(sizeof(kp[0]) * num, GFP_KERNEL);
- if (!kp)
- return -ENOMEM;
-
- for (i = 0; i < num; i++) {
- char sym_name[128 + sizeof(MODULE_SYMBOL_PREFIX)];
-
- snprintf(sym_name, sizeof(sym_name), "%s%s",
- MODULE_SYMBOL_PREFIX, obsparm[i].name);
-
- kp[i].name = obsparm[i].name;
- kp[i].perm = 000;
- kp[i].set = set_obsolete;
- kp[i].get = NULL;
- obsparm[i].addr
- = (void *)find_local_symbol(sechdrs, symindex, strtab,
- sym_name);
- if (!obsparm[i].addr) {
- printk("%s: falsely claims to have parameter %s\n",
- name, obsparm[i].name);
- ret = -EINVAL;
- goto out;
- }
- kp[i].arg = &obsparm[i];
- }
-
- ret = parse_args(name, args, kp, num, NULL);
- out:
- kfree(kp);
- return ret;
-}
-#else
-static int obsolete_params(const char *name,
- char *args,
- struct obsolete_modparm obsparm[],
- unsigned int num,
- Elf_Shdr *sechdrs,
- unsigned int symindex,
- const char *strtab)
-{
- if (num != 0)
- printk(KERN_WARNING "%s: Ignoring obsolete parameters\n",
- name);
- return 0;
-}
-#endif /* CONFIG_OBSOLETE_MODPARM */
-
static const char vermagic[] = VERMAGIC_STRING;
#ifdef CONFIG_MODVERSIONS
@@ -1572,7 +1421,6 @@ static struct module *load_module(void __user *umod,
exportindex, modindex, obsparmindex, infoindex, gplindex,
crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex,
gplfuturecrcindex;
- long arglen;
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1691,23 +1539,11 @@ static struct module *load_module(void __user *umod,
}
/* Now copy in args */
- arglen = strlen_user(uargs);
- if (!arglen) {
- err = -EFAULT;
- goto free_hdr;
- }
- args = kmalloc(arglen, GFP_KERNEL);
- if (!args) {
- err = -ENOMEM;
+ args = strndup_user(uargs, ~0UL >> 1);
+ if (IS_ERR(args)) {
+ err = PTR_ERR(args);
goto free_hdr;
}
- if (copy_from_user(args, uargs, arglen) != 0) {
- err = -EFAULT;
- goto free_mod;
- }
-
- /* Userspace could have altered the string after the strlen_user() */
- args[arglen - 1] = '\0';
if (find_module(mod->name)) {
err = -EEXIST;
@@ -1887,27 +1723,17 @@ static struct module *load_module(void __user *umod,
set_fs(old_fs);
mod->args = args;
- if (obsparmindex) {
- err = obsolete_params(mod->name, mod->args,
- (struct obsolete_modparm *)
- sechdrs[obsparmindex].sh_addr,
- sechdrs[obsparmindex].sh_size
- / sizeof(struct obsolete_modparm),
- sechdrs, symindex,
- (char *)sechdrs[strindex].sh_addr);
- if (setupindex)
- printk(KERN_WARNING "%s: Ignoring new-style "
- "parameters in presence of obsolete ones\n",
- mod->name);
- } else {
- /* Size of section 0 is 0, so this works well if no params */
- err = parse_args(mod->name, mod->args,
- (struct kernel_param *)
- sechdrs[setupindex].sh_addr,
- sechdrs[setupindex].sh_size
- / sizeof(struct kernel_param),
- NULL);
- }
+ if (obsparmindex)
+ printk(KERN_WARNING "%s: Ignoring obsolete parameters\n",
+ mod->name);
+
+ /* Size of section 0 is 0, so this works well if no params */
+ err = parse_args(mod->name, mod->args,
+ (struct kernel_param *)
+ sechdrs[setupindex].sh_addr,
+ sechdrs[setupindex].sh_size
+ / sizeof(struct kernel_param),
+ NULL);
if (err < 0)
goto arch_cleanup;
diff --git a/kernel/params.c b/kernel/params.c
index a2915058231..9de637a5c8b 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -265,12 +265,12 @@ int param_get_invbool(char *buffer, struct kernel_param *kp)
}
/* We cheat here and temporarily mangle the string. */
-int param_array(const char *name,
- const char *val,
- unsigned int min, unsigned int max,
- void *elem, int elemsize,
- int (*set)(const char *, struct kernel_param *kp),
- int *num)
+static int param_array(const char *name,
+ const char *val,
+ unsigned int min, unsigned int max,
+ void *elem, int elemsize,
+ int (*set)(const char *, struct kernel_param *kp),
+ int *num)
{
int ret;
struct kernel_param kp;
diff --git a/kernel/power/smp.c b/kernel/power/smp.c
index 911fc62b822..5957312b2d6 100644
--- a/kernel/power/smp.c
+++ b/kernel/power/smp.c
@@ -49,9 +49,7 @@ void enable_nonboot_cpus(void)
printk("Thawing cpus ...\n");
for_each_cpu_mask(cpu, frozen_cpus) {
- error = smp_prepare_cpu(cpu);
- if (!error)
- error = cpu_up(cpu);
+ error = cpu_up(cpu);
if (!error) {
printk("CPU%d is up\n", cpu);
continue;
diff --git a/kernel/printk.c b/kernel/printk.c
index 13ced0f7828..8cc19431e74 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -122,44 +122,6 @@ static char *log_buf = __log_buf;
static int log_buf_len = __LOG_BUF_LEN;
static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */
-/*
- * Setup a list of consoles. Called from init/main.c
- */
-static int __init console_setup(char *str)
-{
- char name[sizeof(console_cmdline[0].name)];
- char *s, *options;
- int idx;
-
- /*
- * Decode str into name, index, options.
- */
- if (str[0] >= '0' && str[0] <= '9') {
- strcpy(name, "ttyS");
- strncpy(name + 4, str, sizeof(name) - 5);
- } else
- strncpy(name, str, sizeof(name) - 1);
- name[sizeof(name) - 1] = 0;
- if ((options = strchr(str, ',')) != NULL)
- *(options++) = 0;
-#ifdef __sparc__
- if (!strcmp(str, "ttya"))
- strcpy(name, "ttyS0");
- if (!strcmp(str, "ttyb"))
- strcpy(name, "ttyS1");
-#endif
- for (s = name; *s; s++)
- if ((*s >= '0' && *s <= '9') || *s == ',')
- break;
- idx = simple_strtoul(s, NULL, 10);
- *s = 0;
-
- add_preferred_console(name, idx, options);
- return 1;
-}
-
-__setup("console=", console_setup);
-
static int __init log_buf_len_setup(char *str)
{
unsigned long size = memparse(str, &str);
@@ -659,6 +621,44 @@ static void call_console_drivers(unsigned long start, unsigned long end)
#endif
+/*
+ * Set up a list of consoles. Called from init/main.c
+ */
+static int __init console_setup(char *str)
+{
+ char name[sizeof(console_cmdline[0].name)];
+ char *s, *options;
+ int idx;
+
+ /*
+ * Decode str into name, index, options.
+ */
+ if (str[0] >= '0' && str[0] <= '9') {
+ strcpy(name, "ttyS");
+ strncpy(name + 4, str, sizeof(name) - 5);
+ } else {
+ strncpy(name, str, sizeof(name) - 1);
+ }
+ name[sizeof(name) - 1] = 0;
+ if ((options = strchr(str, ',')) != NULL)
+ *(options++) = 0;
+#ifdef __sparc__
+ if (!strcmp(str, "ttya"))
+ strcpy(name, "ttyS0");
+ if (!strcmp(str, "ttyb"))
+ strcpy(name, "ttyS1");
+#endif
+ for (s = name; *s; s++)
+ if ((*s >= '0' && *s <= '9') || *s == ',')
+ break;
+ idx = simple_strtoul(s, NULL, 10);
+ *s = 0;
+
+ add_preferred_console(name, idx, options);
+ return 1;
+}
+__setup("console=", console_setup);
+
/**
* add_preferred_console - add a device to the list of preferred consoles.
* @name: device name
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 6df1559b1c0..13458bbaa1b 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -416,8 +416,8 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
rdp->curtail = &rdp->curlist;
}
- local_irq_disable();
if (rdp->nxtlist && !rdp->curlist) {
+ local_irq_disable();
rdp->curlist = rdp->nxtlist;
rdp->curtail = rdp->nxttail;
rdp->nxtlist = NULL;
@@ -442,9 +442,8 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
rcu_start_batch(rcp);
spin_unlock(&rcp->lock);
}
- } else {
- local_irq_enable();
}
+
rcu_check_quiescent_state(rcp, rdp);
if (rdp->donelist)
rcu_do_batch(rdp);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 7712912dbc8..b4b362b5baf 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -54,15 +54,15 @@ static int verbose; /* Print more debug info. */
static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
-MODULE_PARM(nreaders, "i");
+module_param(nreaders, int, 0);
MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
-MODULE_PARM(stat_interval, "i");
+module_param(stat_interval, int, 0);
MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
-MODULE_PARM(verbose, "i");
+module_param(verbose, bool, 0);
MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
-MODULE_PARM(test_no_idle_hz, "i");
+module_param(test_no_idle_hz, bool, 0);
MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
-MODULE_PARM(shuffle_interval, "i");
+module_param(shuffle_interval, int, 0);
MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
#define TORTURE_FLAG "rcutorture: "
#define PRINTK_STRING(s) \
@@ -441,6 +441,16 @@ rcu_torture_shuffle(void *arg)
return 0;
}
+static inline void
+rcu_torture_print_module_parms(char *tag)
+{
+ printk(KERN_ALERT TORTURE_FLAG "--- %s: nreaders=%d "
+ "stat_interval=%d verbose=%d test_no_idle_hz=%d "
+ "shuffle_interval = %d\n",
+ tag, nrealreaders, stat_interval, verbose, test_no_idle_hz,
+ shuffle_interval);
+}
+
static void
rcu_torture_cleanup(void)
{
@@ -483,9 +493,10 @@ rcu_torture_cleanup(void)
rcu_barrier();
rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
- printk(KERN_ALERT TORTURE_FLAG
- "--- End of test: %s\n",
- atomic_read(&n_rcu_torture_error) == 0 ? "SUCCESS" : "FAILURE");
+ if (atomic_read(&n_rcu_torture_error))
+ rcu_torture_print_module_parms("End of test: FAILURE");
+ else
+ rcu_torture_print_module_parms("End of test: SUCCESS");
}
static int
@@ -501,11 +512,7 @@ rcu_torture_init(void)
nrealreaders = nreaders;
else
nrealreaders = 2 * num_online_cpus();
- printk(KERN_ALERT TORTURE_FLAG "--- Start of test: nreaders=%d "
- "stat_interval=%d verbose=%d test_no_idle_hz=%d "
- "shuffle_interval = %d\n",
- nrealreaders, stat_interval, verbose, test_no_idle_hz,
- shuffle_interval);
+ rcu_torture_print_module_parms("Start of test");
fullstop = 0;
/* Set up the freelist. */
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index c67189a25d5..d9b3d5847ed 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -1,12 +1,11 @@
/*
* Detect Soft Lockups
*
- * started by Ingo Molnar, (C) 2005, Red Hat
+ * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
*
* this code detects soft lockups: incidents in where on a CPU
* the kernel does not reschedule for 10 seconds or more.
*/
-
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/init.h>
@@ -17,13 +16,14 @@
static DEFINE_SPINLOCK(print_lock);
-static DEFINE_PER_CPU(unsigned long, timestamp) = 0;
-static DEFINE_PER_CPU(unsigned long, print_timestamp) = 0;
+static DEFINE_PER_CPU(unsigned long, touch_timestamp);
+static DEFINE_PER_CPU(unsigned long, print_timestamp);
static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
static int did_panic = 0;
-static int softlock_panic(struct notifier_block *this, unsigned long event,
- void *ptr)
+
+static int
+softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
{
did_panic = 1;
@@ -36,7 +36,7 @@ static struct notifier_block panic_block = {
void touch_softlockup_watchdog(void)
{
- per_cpu(timestamp, raw_smp_processor_id()) = jiffies;
+ per_cpu(touch_timestamp, raw_smp_processor_id()) = jiffies;
}
EXPORT_SYMBOL(touch_softlockup_watchdog);
@@ -44,25 +44,35 @@ EXPORT_SYMBOL(touch_softlockup_watchdog);
* This callback runs from the timer interrupt, and checks
* whether the watchdog thread has hung or not:
*/
-void softlockup_tick(struct pt_regs *regs)
+void softlockup_tick(void)
{
int this_cpu = smp_processor_id();
- unsigned long timestamp = per_cpu(timestamp, this_cpu);
+ unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
- if (per_cpu(print_timestamp, this_cpu) == timestamp)
+ /* prevent double reports: */
+ if (per_cpu(print_timestamp, this_cpu) == touch_timestamp ||
+ did_panic ||
+ !per_cpu(watchdog_task, this_cpu))
return;
- /* Do not cause a second panic when there already was one */
- if (did_panic)
+ /* do not print during early bootup: */
+ if (unlikely(system_state != SYSTEM_RUNNING)) {
+ touch_softlockup_watchdog();
return;
+ }
- if (time_after(jiffies, timestamp + 10*HZ)) {
- per_cpu(print_timestamp, this_cpu) = timestamp;
+ /* Wake up the high-prio watchdog task every second: */
+ if (time_after(jiffies, touch_timestamp + HZ))
+ wake_up_process(per_cpu(watchdog_task, this_cpu));
+
+ /* Warn about unreasonable 10+ seconds delays: */
+ if (time_after(jiffies, touch_timestamp + 10*HZ)) {
+ per_cpu(print_timestamp, this_cpu) = touch_timestamp;
spin_lock(&print_lock);
printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n",
this_cpu);
- show_regs(regs);
+ dump_stack();
spin_unlock(&print_lock);
}
}
@@ -77,18 +87,16 @@ static int watchdog(void * __bind_cpu)
sched_setscheduler(current, SCHED_FIFO, &param);
current->flags |= PF_NOFREEZE;
- set_current_state(TASK_INTERRUPTIBLE);
-
/*
- * Run briefly once per second - if this gets delayed for
- * more than 10 seconds then the debug-printout triggers
- * in softlockup_tick():
+ * Run briefly once per second to reset the softlockup timestamp.
+ * If this gets delayed for more than 10 seconds then the
+ * debug-printout triggers in softlockup_tick().
*/
while (!kthread_should_stop()) {
- msleep_interruptible(1000);
+ set_current_state(TASK_INTERRUPTIBLE);
touch_softlockup_watchdog();
+ schedule();
}
- __set_current_state(TASK_RUNNING);
return 0;
}
@@ -110,11 +118,11 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
printk("watchdog for %i failed\n", hotcpu);
return NOTIFY_BAD;
}
+ per_cpu(touch_timestamp, hotcpu) = jiffies;
per_cpu(watchdog_task, hotcpu) = p;
kthread_bind(p, hotcpu);
break;
case CPU_ONLINE:
-
wake_up_process(per_cpu(watchdog_task, hotcpu));
break;
#ifdef CONFIG_HOTPLUG_CPU
@@ -146,4 +154,3 @@ __init void spawn_softlockup_task(void)
notifier_chain_register(&panic_notifier_list, &panic_block);
}
-
diff --git a/kernel/sys.c b/kernel/sys.c
index c0fcad9f826..38bc73ede2b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -224,18 +224,6 @@ int unregister_reboot_notifier(struct notifier_block * nb)
EXPORT_SYMBOL(unregister_reboot_notifier);
-#ifndef CONFIG_SECURITY
-int capable(int cap)
-{
- if (cap_raised(current->cap_effective, cap)) {
- current->flags |= PF_SUPERPRIV;
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL(capable);
-#endif
-
static int set_one_prio(struct task_struct *p, int niceval, int error)
{
int no_nice;
@@ -1375,7 +1363,7 @@ static void groups_sort(struct group_info *group_info)
/* a simple bsearch */
int groups_search(struct group_info *group_info, gid_t grp)
{
- int left, right;
+ unsigned int left, right;
if (!group_info)
return 0;
@@ -1383,7 +1371,7 @@ int groups_search(struct group_info *group_info, gid_t grp)
left = 0;
right = group_info->ngroups;
while (left < right) {
- int mid = (left+right)/2;
+ unsigned int mid = (left+right)/2;
int cmp = grp - GROUP_AT(group_info, mid);
if (cmp > 0)
left = mid + 1;
@@ -1433,7 +1421,6 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
return -EINVAL;
/* no need to grab task_lock here; it cannot change */
- get_group_info(current->group_info);
i = current->group_info->ngroups;
if (gidsetsize) {
if (i > gidsetsize) {
@@ -1446,7 +1433,6 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
}
}
out:
- put_group_info(current->group_info);
return i;
}
@@ -1487,9 +1473,7 @@ int in_group_p(gid_t grp)
{
int retval = 1;
if (grp != current->fsgid) {
- get_group_info(current->group_info);
retval = groups_search(current->group_info, grp);
- put_group_info(current->group_info);
}
return retval;
}
@@ -1500,9 +1484,7 @@ int in_egroup_p(gid_t grp)
{
int retval = 1;
if (grp != current->egid) {
- get_group_info(current->group_info);
retval = groups_search(current->group_info, grp);
- put_group_info(current->group_info);
}
return retval;
}
@@ -1630,20 +1612,21 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
{
struct rlimit new_rlim, *old_rlim;
+ unsigned long it_prof_secs;
int retval;
if (resource >= RLIM_NLIMITS)
return -EINVAL;
- if(copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+ if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
return -EFAULT;
- if (new_rlim.rlim_cur > new_rlim.rlim_max)
- return -EINVAL;
+ if (new_rlim.rlim_cur > new_rlim.rlim_max)
+ return -EINVAL;
old_rlim = current->signal->rlim + resource;
if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
!capable(CAP_SYS_RESOURCE))
return -EPERM;
if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN)
- return -EPERM;
+ return -EPERM;
retval = security_task_setrlimit(resource, &new_rlim);
if (retval)
@@ -1653,19 +1636,40 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
*old_rlim = new_rlim;
task_unlock(current->group_leader);
- if (resource == RLIMIT_CPU && new_rlim.rlim_cur != RLIM_INFINITY &&
- (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
- new_rlim.rlim_cur <= cputime_to_secs(
- current->signal->it_prof_expires))) {
- cputime_t cputime = secs_to_cputime(new_rlim.rlim_cur);
+ if (resource != RLIMIT_CPU)
+ goto out;
+
+ /*
+ * RLIMIT_CPU handling. Note that the kernel fails to return an error
+ * code if it rejected the user's attempt to set RLIMIT_CPU. This is a
+ * very long-standing error, and fixing it now risks breakage of
+ * applications, so we live with it
+ */
+ if (new_rlim.rlim_cur == RLIM_INFINITY)
+ goto out;
+
+ it_prof_secs = cputime_to_secs(current->signal->it_prof_expires);
+ if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) {
+ unsigned long rlim_cur = new_rlim.rlim_cur;
+ cputime_t cputime;
+
+ if (rlim_cur == 0) {
+ /*
+ * The caller is asking for an immediate RLIMIT_CPU
+ * expiry. But we use the zero value to mean "it was
+ * never set". So let's cheat and make it one second
+ * instead
+ */
+ rlim_cur = 1;
+ }
+ cputime = secs_to_cputime(rlim_cur);
read_lock(&tasklist_lock);
spin_lock_irq(&current->sighand->siglock);
- set_process_cpu_timer(current, CPUCLOCK_PROF,
- &cputime, NULL);
+ set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
spin_unlock_irq(&current->sighand->siglock);
read_unlock(&tasklist_lock);
}
-
+out:
return 0;
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 32b48e8ee36..e82726faeef 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -742,18 +742,18 @@ static ctl_table vm_table[] = {
{
.ctl_name = VM_DIRTY_WB_CS,
.procname = "dirty_writeback_centisecs",
- .data = &dirty_writeback_centisecs,
- .maxlen = sizeof(dirty_writeback_centisecs),
+ .data = &dirty_writeback_interval,
+ .maxlen = sizeof(dirty_writeback_interval),
.mode = 0644,
.proc_handler = &dirty_writeback_centisecs_handler,
},
{
.ctl_name = VM_DIRTY_EXPIRE_CS,
.procname = "dirty_expire_centisecs",
- .data = &dirty_expire_centisecs,
- .maxlen = sizeof(dirty_expire_centisecs),
+ .data = &dirty_expire_interval,
+ .maxlen = sizeof(dirty_expire_interval),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_dointvec_userhz_jiffies,
},
{
.ctl_name = VM_NR_PDFLUSH_THREADS,
@@ -848,9 +848,8 @@ static ctl_table vm_table[] = {
.data = &laptop_mode,
.maxlen = sizeof(laptop_mode),
.mode = 0644,
- .proc_handler = &proc_dointvec,
- .strategy = &sysctl_intvec,
- .extra1 = &zero,
+ .proc_handler = &proc_dointvec_jiffies,
+ .strategy = &sysctl_jiffies,
},
{
.ctl_name = VM_BLOCK_DUMP,
@@ -2054,6 +2053,8 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
int write, void *data)
{
if (write) {
+ if (*lvalp > LONG_MAX / HZ)
+ return 1;
*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
} else {
int val = *valp;
@@ -2075,6 +2076,8 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
int write, void *data)
{
if (write) {
+ if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
+ return 1;
*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
} else {
int val = *valp;
diff --git a/kernel/time.c b/kernel/time.c
index 804539165d8..e00a97b7724 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -202,24 +202,6 @@ asmlinkage long sys_settimeofday(struct timeval __user *tv,
return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL);
}
-long pps_offset; /* pps time offset (us) */
-long pps_jitter = MAXTIME; /* time dispersion (jitter) (us) */
-
-long pps_freq; /* frequency offset (scaled ppm) */
-long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */
-
-long pps_valid = PPS_VALID; /* pps signal watchdog counter */
-
-int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */
-
-long pps_jitcnt; /* jitter limit exceeded */
-long pps_calcnt; /* calibration intervals */
-long pps_errcnt; /* calibration errors */
-long pps_stbcnt; /* stability limit exceeded */
-
-/* hook for a loadable hardpps kernel module */
-void (*hardpps_ptr)(struct timeval *);
-
/* we call this to notify the arch when the clock is being
* controlled. If no such arch routine, do nothing.
*/
@@ -279,7 +261,7 @@ int do_adjtimex(struct timex *txc)
result = -EINVAL;
goto leave;
}
- time_freq = txc->freq - pps_freq;
+ time_freq = txc->freq;
}
if (txc->modes & ADJ_MAXERROR) {
@@ -312,10 +294,8 @@ int do_adjtimex(struct timex *txc)
if ((time_next_adjust = txc->offset) == 0)
time_adjust = 0;
}
- else if ( time_status & (STA_PLL | STA_PPSTIME) ) {
- ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) ==
- (STA_PPSTIME | STA_PPSSIGNAL) ?
- pps_offset : txc->offset;
+ else if (time_status & STA_PLL) {
+ ltemp = txc->offset;
/*
* Scale the phase adjustment and
@@ -356,23 +336,14 @@ int do_adjtimex(struct timex *txc)
}
time_freq = min(time_freq, time_tolerance);
time_freq = max(time_freq, -time_tolerance);
- } /* STA_PLL || STA_PPSTIME */
+ } /* STA_PLL */
} /* txc->modes & ADJ_OFFSET */
if (txc->modes & ADJ_TICK) {
tick_usec = txc->tick;
tick_nsec = TICK_USEC_TO_NSEC(tick_usec);
}
} /* txc->modes */
-leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
- || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0
- && (time_status & STA_PPSSIGNAL) == 0)
- /* p. 24, (b) */
- || ((time_status & (STA_PPSTIME|STA_PPSJITTER))
- == (STA_PPSTIME|STA_PPSJITTER))
- /* p. 24, (c) */
- || ((time_status & STA_PPSFREQ) != 0
- && (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0))
- /* p. 24, (d) */
+leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0)
result = TIME_ERROR;
if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
@@ -380,7 +351,7 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
else {
txc->offset = shift_right(time_offset, SHIFT_UPDATE);
}
- txc->freq = time_freq + pps_freq;
+ txc->freq = time_freq;
txc->maxerror = time_maxerror;
txc->esterror = time_esterror;
txc->status = time_status;
@@ -388,14 +359,16 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
txc->precision = time_precision;
txc->tolerance = time_tolerance;
txc->tick = tick_usec;
- txc->ppsfreq = pps_freq;
- txc->jitter = pps_jitter >> PPS_AVG;
- txc->shift = pps_shift;
- txc->stabil = pps_stabil;
- txc->jitcnt = pps_jitcnt;
- txc->calcnt = pps_calcnt;
- txc->errcnt = pps_errcnt;
- txc->stbcnt = pps_stbcnt;
+
+ /* PPS is not implemented, so these are zero */
+ txc->ppsfreq = 0;
+ txc->jitter = 0;
+ txc->shift = 0;
+ txc->stabil = 0;
+ txc->jitcnt = 0;
+ txc->calcnt = 0;
+ txc->errcnt = 0;
+ txc->stbcnt = 0;
write_sequnlock_irq(&xtime_lock);
do_gettimeofday(&txc->time);
notify_arch_cmos_timer();
diff --git a/kernel/timer.c b/kernel/timer.c
index 2410c18dbeb..ab189dd187c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -86,7 +86,8 @@ struct tvec_t_base_s {
} ____cacheline_aligned_in_smp;
typedef struct tvec_t_base_s tvec_base_t;
-static DEFINE_PER_CPU(tvec_base_t, tvec_bases);
+static DEFINE_PER_CPU(tvec_base_t *, tvec_bases);
+static tvec_base_t boot_tvec_bases;
static inline void set_running_timer(tvec_base_t *base,
struct timer_list *timer)
@@ -157,7 +158,7 @@ EXPORT_SYMBOL(__init_timer_base);
void fastcall init_timer(struct timer_list *timer)
{
timer->entry.next = NULL;
- timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base;
+ timer->base = &per_cpu(tvec_bases, raw_smp_processor_id())->t_base;
}
EXPORT_SYMBOL(init_timer);
@@ -218,7 +219,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
ret = 1;
}
- new_base = &__get_cpu_var(tvec_bases);
+ new_base = __get_cpu_var(tvec_bases);
if (base != &new_base->t_base) {
/*
@@ -258,7 +259,7 @@ EXPORT_SYMBOL(__mod_timer);
*/
void add_timer_on(struct timer_list *timer, int cpu)
{
- tvec_base_t *base = &per_cpu(tvec_bases, cpu);
+ tvec_base_t *base = per_cpu(tvec_bases, cpu);
unsigned long flags;
BUG_ON(timer_pending(timer) || !timer->function);
@@ -504,7 +505,7 @@ unsigned long next_timer_interrupt(void)
}
hr_expires += jiffies;
- base = &__get_cpu_var(tvec_bases);
+ base = __get_cpu_var(tvec_bases);
spin_lock(&base->t_base.lock);
expires = base->timer_jiffies + (LONG_MAX >> 1);
list = NULL;
@@ -696,18 +697,9 @@ static void second_overflow(void)
/*
* Compute the frequency estimate and additional phase adjustment due
- * to frequency error for the next second. When the PPS signal is
- * engaged, gnaw on the watchdog counter and update the frequency
- * computed by the pll and the PPS signal.
+ * to frequency error for the next second.
*/
- pps_valid++;
- if (pps_valid == PPS_VALID) { /* PPS signal lost */
- pps_jitter = MAXTIME;
- pps_stabil = MAXFREQ;
- time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
- STA_PPSWANDER | STA_PPSERROR);
- }
- ltemp = time_freq + pps_freq;
+ ltemp = time_freq;
time_adj += shift_right(ltemp,(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE));
#if HZ == 100
@@ -901,7 +893,7 @@ EXPORT_SYMBOL(xtime_lock);
*/
static void run_timer_softirq(struct softirq_action *h)
{
- tvec_base_t *base = &__get_cpu_var(tvec_bases);
+ tvec_base_t *base = __get_cpu_var(tvec_bases);
hrtimer_run_queues();
if (time_after_eq(jiffies, base->timer_jiffies))
@@ -914,6 +906,7 @@ static void run_timer_softirq(struct softirq_action *h)
void run_local_timers(void)
{
raise_softirq(TIMER_SOFTIRQ);
+ softlockup_tick();
}
/*
@@ -944,7 +937,6 @@ void do_timer(struct pt_regs *regs)
/* prevent loading jiffies before storing new jiffies_64 value. */
barrier();
update_times();
- softlockup_tick(regs);
}
#ifdef __ARCH_WANT_SYS_ALARM
@@ -955,19 +947,7 @@ void do_timer(struct pt_regs *regs)
*/
asmlinkage unsigned long sys_alarm(unsigned int seconds)
{
- struct itimerval it_new, it_old;
- unsigned int oldalarm;
-
- it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
- it_new.it_value.tv_sec = seconds;
- it_new.it_value.tv_usec = 0;
- do_setitimer(ITIMER_REAL, &it_new, &it_old);
- oldalarm = it_old.it_value.tv_sec;
- /* ehhh.. We can't return 0 if we have an alarm pending.. */
- /* And we'd better return too much than too little anyway */
- if ((!oldalarm && it_old.it_value.tv_usec) || it_old.it_value.tv_usec >= 500000)
- oldalarm++;
- return oldalarm;
+ return alarm_setitimer(seconds);
}
#endif
@@ -1256,12 +1236,32 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
return 0;
}
-static void __devinit init_timers_cpu(int cpu)
+static int __devinit init_timers_cpu(int cpu)
{
int j;
tvec_base_t *base;
- base = &per_cpu(tvec_bases, cpu);
+ base = per_cpu(tvec_bases, cpu);
+ if (!base) {
+ static char boot_done;
+
+ /*
+ * Cannot do allocation in init_timers as that runs before the
+ * allocator initializes (and would waste memory if there are
+ * more possible CPUs than will ever be installed/brought up).
+ */
+ if (boot_done) {
+ base = kmalloc_node(sizeof(*base), GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!base)
+ return -ENOMEM;
+ memset(base, 0, sizeof(*base));
+ } else {
+ base = &boot_tvec_bases;
+ boot_done = 1;
+ }
+ per_cpu(tvec_bases, cpu) = base;
+ }
spin_lock_init(&base->t_base.lock);
for (j = 0; j < TVN_SIZE; j++) {
INIT_LIST_HEAD(base->tv5.vec + j);
@@ -1273,6 +1273,7 @@ static void __devinit init_timers_cpu(int cpu)
INIT_LIST_HEAD(base->tv1.vec + j);
base->timer_jiffies = jiffies;
+ return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -1295,8 +1296,8 @@ static void __devinit migrate_timers(int cpu)
int i;
BUG_ON(cpu_online(cpu));
- old_base = &per_cpu(tvec_bases, cpu);
- new_base = &get_cpu_var(tvec_bases);
+ old_base = per_cpu(tvec_bases, cpu);
+ new_base = get_cpu_var(tvec_bases);
local_irq_disable();
spin_lock(&new_base->t_base.lock);
@@ -1326,7 +1327,8 @@ static int __devinit timer_cpu_notify(struct notifier_block *self,
long cpu = (long)hcpu;
switch(action) {
case CPU_UP_PREPARE:
- init_timers_cpu(cpu);
+ if (init_timers_cpu(cpu) < 0)
+ return NOTIFY_BAD;
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD:
diff --git a/kernel/user.c b/kernel/user.c
index d9deae43a9a..2116642f42c 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -105,15 +105,19 @@ void free_uid(struct user_struct *up)
{
unsigned long flags;
+ if (!up)
+ return;
+
local_irq_save(flags);
- if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
+ if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
uid_hash_remove(up);
+ spin_unlock_irqrestore(&uidhash_lock, flags);
key_put(up->uid_keyring);
key_put(up->session_keyring);
kmem_cache_free(uid_cachep, up);
- spin_unlock(&uidhash_lock);
+ } else {
+ local_irq_restore(flags);
}
- local_irq_restore(flags);
}
struct user_struct * alloc_uid(uid_t uid)