aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-04-08 10:35:30 +0200
committerIngo Molnar <mingo@elte.hu>2009-04-08 10:35:30 +0200
commit5ea472a77f8e4811ceee3f44a9deda6ad6e8b789 (patch)
treea9ec5019e2b666a19874fc344ffb0dd5da6bce94 /kernel
parent6c009ecef8cca28c7c09eb16d0802e37915a76e1 (diff)
parent577c9c456f0e1371cbade38eaf91ae8e8a308555 (diff)
Merge commit 'v2.6.30-rc1' into perfcounters/core
Conflicts: arch/powerpc/include/asm/systbl.h arch/powerpc/include/asm/unistd.h include/linux/init_task.h Merge reason: the conflicts are non-trivial: PowerPC placement of sys_perf_counter_open has to be mixed with the new preadv/pwrite syscalls. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/hung_task.c217
-rw-r--r--kernel/irq/devres.c16
-rw-r--r--kernel/irq/handle.c50
-rw-r--r--kernel/irq/manage.c189
-rw-r--r--kernel/kprobes.c281
-rw-r--r--kernel/module.c3
-rw-r--r--kernel/softlockup.c100
-rw-r--r--kernel/sysctl.c38
-rw-r--r--kernel/trace/blktrace.c7
-rw-r--r--kernel/trace/trace.c21
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_export.c2
-rw-r--r--kernel/trace/trace_output.c2
-rw-r--r--kernel/trace/trace_sched_switch.c3
-rw-r--r--kernel/trace/trace_sched_wakeup.c8
18 files changed, 729 insertions, 221 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 63c697529ca..e914ca992d7 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
+obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/exit.c b/kernel/exit.c
index fbb5d94c8bb..4741376c8de 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -926,6 +926,8 @@ NORET_TYPE void do_exit(long code)
schedule();
}
+ exit_irq_thread();
+
exit_signals(tsk); /* sets PF_EXITING */
/*
* tsk->flags are checked in the futex code to protect against
diff --git a/kernel/fork.c b/kernel/fork.c
index 381d7f9b70f..89c1efb3ccf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
tsk->min_flt = tsk->maj_flt = 0;
tsk->nvcsw = tsk->nivcsw = 0;
+#ifdef CONFIG_DETECT_HUNG_TASK
+ tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+#endif
tsk->mm = NULL;
tsk->active_mm = NULL;
@@ -1033,11 +1036,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->default_timer_slack_ns = current->timer_slack_ns;
-#ifdef CONFIG_DETECT_SOFTLOCKUP
- p->last_switch_count = 0;
- p->last_switch_timestamp = 0;
-#endif
-
task_io_accounting_init(&p->ioac);
acct_clear_integrals(p);
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
new file mode 100644
index 00000000000..022a4927b78
--- /dev/null
+++ b/kernel/hung_task.c
@@ -0,0 +1,217 @@
+/*
+ * Detect Hung Task
+ *
+ * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/lockdep.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+
+/*
+ * The number of tasks checked:
+ */
+unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
+
+/*
+ * Limit number of tasks checked in a batch.
+ *
+ * This value controls the preemptibility of khungtaskd since preemption
+ * is disabled during the critical section. It also controls the size of
+ * the RCU grace period. So it needs to be upper-bound.
+ */
+#define HUNG_TASK_BATCHING 1024
+
+/*
+ * Zero means infinite timeout - no checking done:
+ */
+unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
+
+unsigned long __read_mostly sysctl_hung_task_warnings = 10;
+
+static int __read_mostly did_panic;
+
+static struct task_struct *watchdog_task;
+
+/*
+ * Should we panic (and reboot, if panic_timeout= is set) when a
+ * hung task is detected:
+ */
+unsigned int __read_mostly sysctl_hung_task_panic =
+ CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
+
+static int __init hung_task_panic_setup(char *str)
+{
+ sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
+
+ return 1;
+}
+__setup("hung_task_panic=", hung_task_panic_setup);
+
+static int
+hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ did_panic = 1;
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block panic_block = {
+ .notifier_call = hung_task_panic,
+};
+
+static void check_hung_task(struct task_struct *t, unsigned long timeout)
+{
+ unsigned long switch_count = t->nvcsw + t->nivcsw;
+
+ /*
+ * Ensure the task is not frozen.
+ * Also, when a freshly created task is scheduled once, changes
+ * its state to TASK_UNINTERRUPTIBLE without having ever been
+ * switched out once, it musn't be checked.
+ */
+ if (unlikely(t->flags & PF_FROZEN || !switch_count))
+ return;
+
+ if (switch_count != t->last_switch_count) {
+ t->last_switch_count = switch_count;
+ return;
+ }
+ if (!sysctl_hung_task_warnings)
+ return;
+ sysctl_hung_task_warnings--;
+
+ /*
+ * Ok, the task did not get scheduled for more than 2 minutes,
+ * complain:
+ */
+ printk(KERN_ERR "INFO: task %s:%d blocked for more than "
+ "%ld seconds.\n", t->comm, t->pid, timeout);
+ printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
+ " disables this message.\n");
+ sched_show_task(t);
+ __debug_show_held_locks(t);
+
+ touch_nmi_watchdog();
+
+ if (sysctl_hung_task_panic)
+ panic("hung_task: blocked tasks");
+}
+
+/*
+ * To avoid extending the RCU grace period for an unbounded amount of time,
+ * periodically exit the critical section and enter a new one.
+ *
+ * For preemptible RCU it is sufficient to call rcu_read_unlock in order
+ * exit the grace period. For classic RCU, a reschedule is required.
+ */
+static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
+{
+ get_task_struct(g);
+ get_task_struct(t);
+ rcu_read_unlock();
+ cond_resched();
+ rcu_read_lock();
+ put_task_struct(t);
+ put_task_struct(g);
+}
+
+/*
+ * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
+ * a really long time (120 seconds). If that happens, print out
+ * a warning.
+ */
+static void check_hung_uninterruptible_tasks(unsigned long timeout)
+{
+ int max_count = sysctl_hung_task_check_count;
+ int batch_count = HUNG_TASK_BATCHING;
+ struct task_struct *g, *t;
+
+ /*
+ * If the system crashed already then all bets are off,
+ * do not report extra hung tasks:
+ */
+ if (test_taint(TAINT_DIE) || did_panic)
+ return;
+
+ rcu_read_lock();
+ do_each_thread(g, t) {
+ if (!--max_count)
+ goto unlock;
+ if (!--batch_count) {
+ batch_count = HUNG_TASK_BATCHING;
+ rcu_lock_break(g, t);
+ /* Exit if t or g was unhashed during refresh. */
+ if (t->state == TASK_DEAD || g->state == TASK_DEAD)
+ goto unlock;
+ }
+ /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
+ if (t->state == TASK_UNINTERRUPTIBLE)
+ check_hung_task(t, timeout);
+ } while_each_thread(g, t);
+ unlock:
+ rcu_read_unlock();
+}
+
+static unsigned long timeout_jiffies(unsigned long timeout)
+{
+ /* timeout of 0 will disable the watchdog */
+ return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
+}
+
+/*
+ * Process updating of timeout sysctl
+ */
+int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+ struct file *filp, void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+
+ if (ret || !write)
+ goto out;
+
+ wake_up_process(watchdog_task);
+
+ out:
+ return ret;
+}
+
+/*
+ * kthread which checks for tasks stuck in D state
+ */
+static int watchdog(void *dummy)
+{
+ set_user_nice(current, 0);
+
+ for ( ; ; ) {
+ unsigned long timeout = sysctl_hung_task_timeout_secs;
+
+ while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
+ timeout = sysctl_hung_task_timeout_secs;
+
+ check_hung_uninterruptible_tasks(timeout);
+ }
+
+ return 0;
+}
+
+static int __init hung_task_init(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
+ watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
+
+ return 0;
+}
+
+module_init(hung_task_init);
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 38a25b8d8bf..d06df9c41cb 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -26,10 +26,12 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
}
/**
- * devm_request_irq - allocate an interrupt line for a managed device
+ * devm_request_threaded_irq - allocate an interrupt line for a managed device
* @dev: device to request interrupt for
* @irq: Interrupt line to allocate
* @handler: Function to be called when the IRQ occurs
+ * @thread_fn: function to be called in a threaded interrupt context. NULL
+ * for devices which handle everything in @handler
* @irqflags: Interrupt type flags
* @devname: An ascii name for the claiming device
* @dev_id: A cookie passed back to the handler function
@@ -42,9 +44,10 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
* If an IRQ allocated with this function needs to be freed
* separately, dev_free_irq() must be used.
*/
-int devm_request_irq(struct device *dev, unsigned int irq,
- irq_handler_t handler, unsigned long irqflags,
- const char *devname, void *dev_id)
+int devm_request_threaded_irq(struct device *dev, unsigned int irq,
+ irq_handler_t handler, irq_handler_t thread_fn,
+ unsigned long irqflags, const char *devname,
+ void *dev_id)
{
struct irq_devres *dr;
int rc;
@@ -54,7 +57,8 @@ int devm_request_irq(struct device *dev, unsigned int irq,
if (!dr)
return -ENOMEM;
- rc = request_irq(irq, handler, irqflags, devname, dev_id);
+ rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname,
+ dev_id);
if (rc) {
devres_free(dr);
return rc;
@@ -66,7 +70,7 @@ int devm_request_irq(struct device *dev, unsigned int irq,
return 0;
}
-EXPORT_SYMBOL(devm_request_irq);
+EXPORT_SYMBOL(devm_request_threaded_irq);
/**
* devm_free_irq - free an interrupt
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 343acecae62..d82142be8dd 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -339,6 +339,15 @@ irqreturn_t no_action(int cpl, void *dev_id)
return IRQ_NONE;
}
+static void warn_no_thread(unsigned int irq, struct irqaction *action)
+{
+ if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags))
+ return;
+
+ printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD "
+ "but no thread function available.", irq, action->name);
+}
+
DEFINE_TRACE(irq_handler_entry);
DEFINE_TRACE(irq_handler_exit);
@@ -363,8 +372,47 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
trace_irq_handler_entry(irq, action);
ret = action->handler(irq, action->dev_id);
trace_irq_handler_exit(irq, action, ret);
- if (ret == IRQ_HANDLED)
+
+ switch (ret) {
+ case IRQ_WAKE_THREAD:
+ /*
+ * Set result to handled so the spurious check
+ * does not trigger.
+ */
+ ret = IRQ_HANDLED;
+
+ /*
+ * Catch drivers which return WAKE_THREAD but
+ * did not set up a thread function
+ */
+ if (unlikely(!action->thread_fn)) {
+ warn_no_thread(irq, action);
+ break;
+ }
+
+ /*
+ * Wake up the handler thread for this
+ * action. In case the thread crashed and was
+ * killed we just pretend that we handled the
+ * interrupt. The hardirq handler above has
+ * disabled the device interrupt, so no irq
+ * storm is lurking.
+ */
+ if (likely(!test_bit(IRQTF_DIED,
+ &action->thread_flags))) {
+ set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
+ wake_up_process(action->thread);
+ }
+
+ /* Fall through to add to randomness */
+ case IRQ_HANDLED:
status |= action->flags;
+ break;
+
+ default:
+ break;
+ }
+
retval |= ret;
action = action->next;
} while (action);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1516ab77355..7e2e7dd4cd2 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -8,16 +8,15 @@
*/
#include <linux/irq.h>
+#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
+#include <linux/sched.h>
#include "internals.h"
-#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
-cpumask_var_t irq_default_affinity;
-
/**
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
* @irq: interrupt number to wait for
@@ -53,9 +52,18 @@ void synchronize_irq(unsigned int irq)
/* Oops, that failed? */
} while (status & IRQ_INPROGRESS);
+
+ /*
+ * We made sure that no hardirq handler is running. Now verify
+ * that no threaded handlers are active.
+ */
+ wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active));
}
EXPORT_SYMBOL(synchronize_irq);
+#ifdef CONFIG_SMP
+cpumask_var_t irq_default_affinity;
+
/**
* irq_can_set_affinity - Check if the affinity of a given irq can be set
* @irq: Interrupt to check
@@ -72,6 +80,18 @@ int irq_can_set_affinity(unsigned int irq)
return 1;
}
+static void
+irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
+{
+ struct irqaction *action = desc->action;
+
+ while (action) {
+ if (action->thread)
+ set_cpus_allowed_ptr(action->thread, cpumask);
+ action = action->next;
+ }
+}
+
/**
* irq_set_affinity - Set the irq affinity of a given irq
* @irq: Interrupt to set affinity
@@ -100,6 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
cpumask_copy(desc->affinity, cpumask);
desc->chip->set_affinity(irq, cpumask);
#endif
+ irq_set_thread_affinity(desc, cpumask);
desc->status |= IRQ_AFFINITY_SET;
spin_unlock_irqrestore(&desc->lock, flags);
return 0;
@@ -150,6 +171,8 @@ int irq_select_affinity_usr(unsigned int irq)
spin_lock_irqsave(&desc->lock, flags);
ret = setup_affinity(irq, desc);
+ if (!ret)
+ irq_set_thread_affinity(desc, desc->affinity);
spin_unlock_irqrestore(&desc->lock, flags);
return ret;
@@ -401,6 +424,90 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
return ret;
}
+static int irq_wait_for_interrupt(struct irqaction *action)
+{
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ if (test_and_clear_bit(IRQTF_RUNTHREAD,
+ &action->thread_flags)) {
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+ schedule();
+ }
+ return -1;
+}
+
+/*
+ * Interrupt handler thread
+ */
+static int irq_thread(void *data)
+{
+ struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
+ struct irqaction *action = data;
+ struct irq_desc *desc = irq_to_desc(action->irq);
+ int wake;
+
+ sched_setscheduler(current, SCHED_FIFO, &param);
+ current->irqaction = action;
+
+ while (!irq_wait_for_interrupt(action)) {
+
+ atomic_inc(&desc->threads_active);
+
+ spin_lock_irq(&desc->lock);
+ if (unlikely(desc->status & IRQ_DISABLED)) {
+ /*
+ * CHECKME: We might need a dedicated
+ * IRQ_THREAD_PENDING flag here, which
+ * retriggers the thread in check_irq_resend()
+ * but AFAICT IRQ_PENDING should be fine as it
+ * retriggers the interrupt itself --- tglx
+ */
+ desc->status |= IRQ_PENDING;
+ spin_unlock_irq(&desc->lock);
+ } else {
+ spin_unlock_irq(&desc->lock);
+
+ action->thread_fn(action->irq, action->dev_id);
+ }
+
+ wake = atomic_dec_and_test(&desc->threads_active);
+
+ if (wake && waitqueue_active(&desc->wait_for_threads))
+ wake_up(&desc->wait_for_threads);
+ }
+
+ /*
+ * Clear irqaction. Otherwise exit_irq_thread() would make
+ * fuzz about an active irq thread going into nirvana.
+ */
+ current->irqaction = NULL;
+ return 0;
+}
+
+/*
+ * Called from do_exit()
+ */
+void exit_irq_thread(void)
+{
+ struct task_struct *tsk = current;
+
+ if (!tsk->irqaction)
+ return;
+
+ printk(KERN_ERR
+ "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
+ tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
+
+ /*
+ * Set the THREAD DIED flag to prevent further wakeups of the
+ * soon to be gone threaded handler.
+ */
+ set_bit(IRQTF_DIED, &tsk->irqaction->flags);
+}
+
/*
* Internal function to register an irqaction - typically used to
* allocate special interrupts that are part of the architecture.
@@ -437,6 +544,26 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
}
/*
+ * Threaded handler ?
+ */
+ if (new->thread_fn) {
+ struct task_struct *t;
+
+ t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
+ new->name);
+ if (IS_ERR(t))
+ return PTR_ERR(t);
+ /*
+ * We keep the reference to the task struct even if
+ * the thread dies to avoid that the interrupt code
+ * references an already freed task_struct.
+ */
+ get_task_struct(t);
+ new->thread = t;
+ wake_up_process(t);
+ }
+
+ /*
* The following block of code has to be executed atomically
*/
spin_lock_irqsave(&desc->lock, flags);
@@ -473,15 +600,15 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (!shared) {
irq_chip_set_defaults(desc->chip);
+ init_waitqueue_head(&desc->wait_for_threads);
+
/* Setup the type (level, edge polarity) if configured: */
if (new->flags & IRQF_TRIGGER_MASK) {
ret = __irq_set_trigger(desc, irq,
new->flags & IRQF_TRIGGER_MASK);
- if (ret) {
- spin_unlock_irqrestore(&desc->lock, flags);
- return ret;
- }
+ if (ret)
+ goto out_thread;
} else
compat_irq_chip_set_default_handler(desc);
#if defined(CONFIG_IRQ_PER_CPU)
@@ -549,8 +676,19 @@ mismatch:
dump_stack();
}
#endif
+ ret = -EBUSY;
+
+out_thread:
spin_unlock_irqrestore(&desc->lock, flags);
- return -EBUSY;
+ if (new->thread) {
+ struct task_struct *t = new->thread;
+
+ new->thread = NULL;
+ if (likely(!test_bit(IRQTF_DIED, &new->thread_flags)))
+ kthread_stop(t);
+ put_task_struct(t);
+ }
+ return ret;
}
/**
@@ -576,6 +714,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action, **action_ptr;
+ struct task_struct *irqthread;
unsigned long flags;
WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -622,6 +761,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
else
desc->chip->disable(irq);
}
+
+ irqthread = action->thread;
+ action->thread = NULL;
+
spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
@@ -629,6 +772,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
/* Make sure it's not being used on another CPU: */
synchronize_irq(irq);
+ if (irqthread) {
+ if (!test_bit(IRQTF_DIED, &action->thread_flags))
+ kthread_stop(irqthread);
+ put_task_struct(irqthread);
+ }
+
#ifdef CONFIG_DEBUG_SHIRQ
/*
* It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -681,9 +830,12 @@ void free_irq(unsigned int irq, void *dev_id)
EXPORT_SYMBOL(free_irq);
/**
- * request_irq - allocate an interrupt line
+ * request_threaded_irq - allocate an interrupt line
* @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs
+ * @handler: Function to be called when the IRQ occurs.
+ * Primary handler for threaded interrupts
+ * @thread_fn: Function called from the irq handler thread
+ * If NULL, no irq thread is created
* @irqflags: Interrupt type flags
* @devname: An ascii name for the claiming device
* @dev_id: A cookie passed back to the handler function
@@ -695,6 +847,15 @@ EXPORT_SYMBOL(free_irq);
* raises, you must take care both to initialise your hardware
* and to set up the interrupt handler in the right order.
*
+ * If you want to set up a threaded irq handler for your device
+ * then you need to supply @handler and @thread_fn. @handler ist
+ * still called in hard interrupt context and has to check
+ * whether the interrupt originates from the device. If yes it
+ * needs to disable the interrupt on the device and return
+ * IRQ_THREAD_WAKE which will wake up the handler thread and run
+ * @thread_fn. This split handler design is necessary to support
+ * shared interrupts.
+ *
* Dev_id must be globally unique. Normally the address of the
* device data structure is used as the cookie. Since the handler
* receives this value it makes sense to use it.
@@ -710,8 +871,9 @@ EXPORT_SYMBOL(free_irq);
* IRQF_TRIGGER_* Specify active edge(s) or level
*
*/
-int request_irq(unsigned int irq, irq_handler_t handler,
- unsigned long irqflags, const char *devname, void *dev_id)
+int request_threaded_irq(unsigned int irq, irq_handler_t handler,
+ irq_handler_t thread_fn, unsigned long irqflags,
+ const char *devname, void *dev_id)
{
struct irqaction *action;
struct irq_desc *desc;
@@ -759,6 +921,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
return -ENOMEM;
action->handler = handler;
+ action->thread_fn = thread_fn;
action->flags = irqflags;
action->name = devname;
action->dev_id = dev_id;
@@ -788,4 +951,4 @@ int request_irq(unsigned int irq, irq_handler_t handler,
#endif
return retval;
}
-EXPORT_SYMBOL(request_irq);
+EXPORT_SYMBOL(request_threaded_irq);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 5016bfb682b..a5e74ddee0e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -68,7 +68,7 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
/* NOTE: change this value only with kprobe_mutex held */
-static bool kprobe_enabled;
+static bool kprobes_all_disarmed;
static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
@@ -328,7 +328,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
struct kprobe *kp;
list_for_each_entry_rcu(kp, &p->list, list) {
- if (kp->pre_handler && !kprobe_gone(kp)) {
+ if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
set_kprobe_instance(kp);
if (kp->pre_handler(kp, regs))
return 1;
@@ -344,7 +344,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
struct kprobe *kp;
list_for_each_entry_rcu(kp, &p->list, list) {
- if (kp->post_handler && !kprobe_gone(kp)) {
+ if (kp->post_handler && likely(!kprobe_disabled(kp))) {
set_kprobe_instance(kp);
kp->post_handler(kp, regs, flags);
reset_kprobe_instance();
@@ -518,20 +518,28 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
}
/*
-* Add the new probe to old_p->list. Fail if this is the
+* Add the new probe to ap->list. Fail if this is the
* second jprobe at the address - two jprobes can't coexist
*/
-static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
+static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
{
+ BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
if (p->break_handler) {
- if (old_p->break_handler)
+ if (ap->break_handler)
return -EEXIST;
- list_add_tail_rcu(&p->list, &old_p->list);
- old_p->break_handler = aggr_break_handler;
+ list_add_tail_rcu(&p->list, &ap->list);
+ ap->break_handler = aggr_break_handler;
} else
- list_add_rcu(&p->list, &old_p->list);
- if (p->post_handler && !old_p->post_handler)
- old_p->post_handler = aggr_post_handler;
+ list_add_rcu(&p->list, &ap->list);
+ if (p->post_handler && !ap->post_handler)
+ ap->post_handler = aggr_post_handler;
+
+ if (kprobe_disabled(ap) && !kprobe_disabled(p)) {
+ ap->flags &= ~KPROBE_FLAG_DISABLED;
+ if (!kprobes_all_disarmed)
+ /* Arm the breakpoint again. */
+ arch_arm_kprobe(ap);
+ }
return 0;
}
@@ -544,6 +552,7 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
copy_kprobe(p, ap);
flush_insn_slot(ap);
ap->addr = p->addr;
+ ap->flags = p->flags;
ap->pre_handler = aggr_pre_handler;
ap->fault_handler = aggr_fault_handler;
/* We don't care the kprobe which has gone. */
@@ -566,44 +575,59 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
struct kprobe *p)
{
int ret = 0;
- struct kprobe *ap;
+ struct kprobe *ap = old_p;
- if (kprobe_gone(old_p)) {
+ if (old_p->pre_handler != aggr_pre_handler) {
+ /* If old_p is not an aggr_probe, create new aggr_kprobe. */
+ ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
+ if (!ap)
+ return -ENOMEM;
+ add_aggr_kprobe(ap, old_p);
+ }
+
+ if (kprobe_gone(ap)) {
/*
* Attempting to insert new probe at the same location that
* had a probe in the module vaddr area which already
* freed. So, the instruction slot has already been
* released. We need a new slot for the new probe.
*/
- ret = arch_prepare_kprobe(old_p);
+ ret = arch_prepare_kprobe(ap);
if (ret)
+ /*
+ * Even if fail to allocate new slot, don't need to
+ * free aggr_probe. It will be used next time, or
+ * freed by unregister_kprobe.
+ */
return ret;
- }
- if (old_p->pre_handler == aggr_pre_handler) {
- copy_kprobe(old_p, p);
- ret = add_new_kprobe(old_p, p);
- ap = old_p;
- } else {
- ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
- if (!ap) {
- if (kprobe_gone(old_p))
- arch_remove_kprobe(old_p);
- return -ENOMEM;
- }
- add_aggr_kprobe(ap, old_p);
- copy_kprobe(ap, p);
- ret = add_new_kprobe(ap, p);
- }
- if (kprobe_gone(old_p)) {
+
/*
- * If the old_p has gone, its breakpoint has been disarmed.
- * We have to arm it again after preparing real kprobes.
+ * Clear gone flag to prevent allocating new slot again, and
+ * set disabled flag because it is not armed yet.
*/
- ap->flags &= ~KPROBE_FLAG_GONE;
- if (kprobe_enabled)
- arch_arm_kprobe(ap);
+ ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
+ | KPROBE_FLAG_DISABLED;
}
- return ret;
+
+ copy_kprobe(ap, p);
+ return add_new_kprobe(ap, p);
+}
+
+/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
+static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
+{
+ struct kprobe *kp;
+
+ list_for_each_entry_rcu(kp, &p->list, list) {
+ if (!kprobe_disabled(kp))
+ /*
+ * There is an active probe on the list.
+ * We can't disable aggr_kprobe.
+ */
+ return 0;
+ }
+ p->flags |= KPROBE_FLAG_DISABLED;
+ return 1;
}
static int __kprobes in_kprobes_functions(unsigned long addr)
@@ -664,7 +688,9 @@ int __kprobes register_kprobe(struct kprobe *p)
return -EINVAL;
}
- p->flags = 0;
+ /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
+ p->flags &= KPROBE_FLAG_DISABLED;
+
/*
* Check if are we probing a module.
*/
@@ -709,7 +735,7 @@ int __kprobes register_kprobe(struct kprobe *p)
hlist_add_head_rcu(&p->hlist,
&kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
- if (kprobe_enabled)
+ if (!kprobes_all_disarmed && !kprobe_disabled(p))
arch_arm_kprobe(p);
out_unlock_text:
@@ -722,26 +748,39 @@ out:
return ret;
}
+EXPORT_SYMBOL_GPL(register_kprobe);
-/*
- * Unregister a kprobe without a scheduler synchronization.
- */
-static int __kprobes __unregister_kprobe_top(struct kprobe *p)
+/* Check passed kprobe is valid and return kprobe in kprobe_table. */
+static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
{
struct kprobe *old_p, *list_p;
old_p = get_kprobe(p->addr);
if (unlikely(!old_p))
- return -EINVAL;
+ return NULL;
if (p != old_p) {
list_for_each_entry_rcu(list_p, &old_p->list, list)
if (list_p == p)
/* kprobe p is a valid probe */
- goto valid_p;
- return -EINVAL;
+ goto valid;
+ return NULL;
}
-valid_p:
+valid:
+ return old_p;
+}
+
+/*
+ * Unregister a kprobe without a scheduler synchronization.
+ */
+static int __kprobes __unregister_kprobe_top(struct kprobe *p)
+{
+ struct kprobe *old_p, *list_p;
+
+ old_p = __get_valid_kprobe(p);
+ if (old_p == NULL)
+ return -EINVAL;
+
if (old_p == p ||
(old_p->pre_handler == aggr_pre_handler &&
list_is_singular(&old_p->list))) {
@@ -750,7 +789,7 @@ valid_p:
* enabled and not gone - otherwise, the breakpoint would
* already have been removed. We save on flushing icache.
*/
- if (kprobe_enabled && !kprobe_gone(old_p)) {
+ if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) {
mutex_lock(&text_mutex);
arch_disarm_kprobe(p);
mutex_unlock(&text_mutex);
@@ -768,6 +807,11 @@ valid_p:
}
noclean:
list_del_rcu(&p->list);
+ if (!kprobe_disabled(old_p)) {
+ try_to_disable_aggr_kprobe(old_p);
+ if (!kprobes_all_disarmed && kprobe_disabled(old_p))
+ arch_disarm_kprobe(old_p);
+ }
}
return 0;
}
@@ -803,11 +847,13 @@ int __kprobes register_kprobes(struct kprobe **kps, int num)
}
return ret;
}
+EXPORT_SYMBOL_GPL(register_kprobes);
void __kprobes unregister_kprobe(struct kprobe *p)
{
unregister_kprobes(&p, 1);
}
+EXPORT_SYMBOL_GPL(unregister_kprobe);
void __kprobes unregister_kprobes(struct kprobe **kps, int num)
{
@@ -826,6 +872,7 @@ void __kprobes unregister_kprobes(struct kprobe **kps, int num)
if (kps[i]->addr)
__unregister_kprobe_bottom(kps[i]);
}
+EXPORT_SYMBOL_GPL(unregister_kprobes);
static struct notifier_block kprobe_exceptions_nb = {
.notifier_call = kprobe_exceptions_notify,
@@ -865,16 +912,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num)
}
return ret;
}
+EXPORT_SYMBOL_GPL(register_jprobes);
int __kprobes register_jprobe(struct jprobe *jp)
{
return register_jprobes(&jp, 1);
}
+EXPORT_SYMBOL_GPL(register_jprobe);
void __kprobes unregister_jprobe(struct jprobe *jp)
{
unregister_jprobes(&jp, 1);
}
+EXPORT_SYMBOL_GPL(unregister_jprobe);
void __kprobes unregister_jprobes(struct jprobe **jps, int num)
{
@@ -894,6 +944,7 @@ void __kprobes unregister_jprobes(struct jprobe **jps, int num)
__unregister_kprobe_bottom(&jps[i]->kp);
}
}
+EXPORT_SYMBOL_GPL(unregister_jprobes);
#ifdef CONFIG_KRETPROBES
/*
@@ -987,6 +1038,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
free_rp_inst(rp);
return ret;
}
+EXPORT_SYMBOL_GPL(register_kretprobe);
int __kprobes register_kretprobes(struct kretprobe **rps, int num)
{
@@ -1004,11 +1056,13 @@ int __kprobes register_kretprobes(struct kretprobe **rps, int num)
}
return ret;
}
+EXPORT_SYMBOL_GPL(register_kretprobes);
void __kprobes unregister_kretprobe(struct kretprobe *rp)
{
unregister_kretprobes(&rp, 1);
}
+EXPORT_SYMBOL_GPL(unregister_kretprobe);
void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
{
@@ -1030,24 +1084,30 @@ void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
}
}
}
+EXPORT_SYMBOL_GPL(unregister_kretprobes);
#else /* CONFIG_KRETPROBES */
int __kprobes register_kretprobe(struct kretprobe *rp)
{
return -ENOSYS;
}
+EXPORT_SYMBOL_GPL(register_kretprobe);
int __kprobes register_kretprobes(struct kretprobe **rps, int num)
{
return -ENOSYS;
}
+EXPORT_SYMBOL_GPL(register_kretprobes);
+
void __kprobes unregister_kretprobe(struct kretprobe *rp)
{
}
+EXPORT_SYMBOL_GPL(unregister_kretprobe);
void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
{
}
+EXPORT_SYMBOL_GPL(unregister_kretprobes);
static int __kprobes pre_handler_kretprobe(struct kprobe *p,
struct pt_regs *regs)
@@ -1061,6 +1121,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
static void __kprobes kill_kprobe(struct kprobe *p)
{
struct kprobe *kp;
+
p->flags |= KPROBE_FLAG_GONE;
if (p->pre_handler == aggr_pre_handler) {
/*
@@ -1173,8 +1234,8 @@ static int __init init_kprobes(void)
}
}
- /* By default, kprobes are enabled */
- kprobe_enabled = true;
+ /* By default, kprobes are armed */
+ kprobes_all_disarmed = false;
err = arch_init_kprobes();
if (!err)
@@ -1202,12 +1263,18 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
else
kprobe_type = "k";
if (sym)
- seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type,
- sym, offset, (modname ? modname : " "),
- (kprobe_gone(p) ? "[GONE]" : ""));
+ seq_printf(pi, "%p %s %s+0x%x %s %s%s\n",
+ p->addr, kprobe_type, sym, offset,
+ (modname ? modname : " "),
+ (kprobe_gone(p) ? "[GONE]" : ""),
+ ((kprobe_disabled(p) && !kprobe_gone(p)) ?
+ "[DISABLED]" : ""));
else
- seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr,
- (kprobe_gone(p) ? "[GONE]" : ""));
+ seq_printf(pi, "%p %s %p %s%s\n",
+ p->addr, kprobe_type, p->addr,
+ (kprobe_gone(p) ? "[GONE]" : ""),
+ ((kprobe_disabled(p) && !kprobe_gone(p)) ?
+ "[DISABLED]" : ""));
}
static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -1272,7 +1339,72 @@ static struct file_operations debugfs_kprobes_operations = {
.release = seq_release,
};
-static void __kprobes enable_all_kprobes(void)
+/* Disable one kprobe */
+int __kprobes disable_kprobe(struct kprobe *kp)
+{
+ int ret = 0;
+ struct kprobe *p;
+
+ mutex_lock(&kprobe_mutex);
+
+ /* Check whether specified probe is valid. */
+ p = __get_valid_kprobe(kp);
+ if (unlikely(p == NULL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* If the probe is already disabled (or gone), just return */
+ if (kprobe_disabled(kp))
+ goto out;
+
+ kp->flags |= KPROBE_FLAG_DISABLED;
+ if (p != kp)
+ /* When kp != p, p is always enabled. */
+ try_to_disable_aggr_kprobe(p);
+
+ if (!kprobes_all_disarmed && kprobe_disabled(p))
+ arch_disarm_kprobe(p);
+out:
+ mutex_unlock(&kprobe_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(disable_kprobe);
+
+/* Enable one kprobe */
+int __kprobes enable_kprobe(struct kprobe *kp)
+{
+ int ret = 0;
+ struct kprobe *p;
+
+ mutex_lock(&kprobe_mutex);
+
+ /* Check whether specified probe is valid. */
+ p = __get_valid_kprobe(kp);
+ if (unlikely(p == NULL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (kprobe_gone(kp)) {
+ /* This kprobe has gone, we couldn't enable it. */
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!kprobes_all_disarmed && kprobe_disabled(p))
+ arch_arm_kprobe(p);
+
+ p->flags &= ~KPROBE_FLAG_DISABLED;
+ if (p != kp)
+ kp->flags &= ~KPROBE_FLAG_DISABLED;
+out:
+ mutex_unlock(&kprobe_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(enable_kprobe);
+
+static void __kprobes arm_all_kprobes(void)
{
struct hlist_head *head;
struct hlist_node *node;
@@ -1281,20 +1413,20 @@ static void __kprobes enable_all_kprobes(void)
mutex_lock(&kprobe_mutex);
- /* If kprobes are already enabled, just return */
- if (kprobe_enabled)
+ /* If kprobes are armed, just return */
+ if (!kprobes_all_disarmed)
goto already_enabled;
mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist)
- if (!kprobe_gone(p))
+ if (!kprobe_disabled(p))
arch_arm_kprobe(p);
}
mutex_unlock(&text_mutex);
- kprobe_enabled = true;
+ kprobes_all_disarmed = false;
printk(KERN_INFO "Kprobes globally enabled\n");
already_enabled:
@@ -1302,7 +1434,7 @@ already_enabled:
return;
}
-static void __kprobes disable_all_kprobes(void)
+static void __kprobes disarm_all_kprobes(void)
{
struct hlist_head *head;
struct hlist_node *node;
@@ -1311,17 +1443,17 @@ static void __kprobes disable_all_kprobes(void)
mutex_lock(&kprobe_mutex);
- /* If kprobes are already disabled, just return */
- if (!kprobe_enabled)
+ /* If kprobes are already disarmed, just return */
+ if (kprobes_all_disarmed)
goto already_disabled;
- kprobe_enabled = false;
+ kprobes_all_disarmed = true;
printk(KERN_INFO "Kprobes globally disabled\n");
mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist) {
- if (!arch_trampoline_kprobe(p) && !kprobe_gone(p))
+ if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
arch_disarm_kprobe(p);
}
}
@@ -1347,7 +1479,7 @@ static ssize_t read_enabled_file_bool(struct file *file,
{
char buf[3];
- if (kprobe_enabled)
+ if (!kprobes_all_disarmed)
buf[0] = '1';
else
buf[0] = '0';
@@ -1370,12 +1502,12 @@ static ssize_t write_enabled_file_bool(struct file *file,
case 'y':
case 'Y':
case '1':
- enable_all_kprobes();
+ arm_all_kprobes();
break;
case 'n':
case 'N':
case '0':
- disable_all_kprobes();
+ disarm_all_kprobes();
break;
}
@@ -1418,16 +1550,5 @@ late_initcall(debugfs_kprobe_init);
module_init(init_kprobes);
-EXPORT_SYMBOL_GPL(register_kprobe);
-EXPORT_SYMBOL_GPL(unregister_kprobe);
-EXPORT_SYMBOL_GPL(register_kprobes);
-EXPORT_SYMBOL_GPL(unregister_kprobes);
-EXPORT_SYMBOL_GPL(register_jprobe);
-EXPORT_SYMBOL_GPL(unregister_jprobe);
-EXPORT_SYMBOL_GPL(register_jprobes);
-EXPORT_SYMBOL_GPL(unregister_jprobes);
+/* defined in arch/.../kernel/kprobes.c */
EXPORT_SYMBOL_GPL(jprobe_return);
-EXPORT_SYMBOL_GPL(register_kretprobe);
-EXPORT_SYMBOL_GPL(unregister_kretprobe);
-EXPORT_SYMBOL_GPL(register_kretprobes);
-EXPORT_SYMBOL_GPL(unregister_kretprobes);
diff --git a/kernel/module.c b/kernel/module.c
index c268a771595..05f014efa32 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1952,9 +1952,6 @@ static noinline struct module *load_module(void __user *umod,
if (strstarts(secstrings+sechdrs[i].sh_name, ".exit"))
sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
#endif
- /* Don't keep __versions around; it's just for loading. */
- if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0)
- sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
}
modindex = find_sec(hdr, sechdrs, secstrings,
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 85d5a245510..88796c33083 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -166,97 +166,11 @@ void softlockup_tick(void)
}
/*
- * Have a reasonable limit on the number of tasks checked:
- */
-unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
-
-/*
- * Zero means infinite timeout - no checking done:
- */
-unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
-
-unsigned long __read_mostly sysctl_hung_task_warnings = 10;
-
-/*
- * Only do the hung-tasks check on one CPU:
- */
-static int check_cpu __read_mostly = -1;
-
-static void check_hung_task(struct task_struct *t, unsigned long now)
-{
- unsigned long switch_count = t->nvcsw + t->nivcsw;
-
- if (t->flags & PF_FROZEN)
- return;
-
- if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
- t->last_switch_count = switch_count;
- t->last_switch_timestamp = now;
- return;
- }
- if ((long)(now - t->last_switch_timestamp) <
- sysctl_hung_task_timeout_secs)
- return;
- if (!sysctl_hung_task_warnings)
- return;
- sysctl_hung_task_warnings--;
-
- /*
- * Ok, the task did not get scheduled for more than 2 minutes,
- * complain:
- */
- printk(KERN_ERR "INFO: task %s:%d blocked for more than "
- "%ld seconds.\n", t->comm, t->pid,
- sysctl_hung_task_timeout_secs);
- printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
- " disables this message.\n");
- sched_show_task(t);
- __debug_show_held_locks(t);
-
- t->last_switch_timestamp = now;
- touch_nmi_watchdog();
-
- if (softlockup_panic)
- panic("softlockup: blocked tasks");
-}
-
-/*
- * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
- * a really long time (120 seconds). If that happens, print out
- * a warning.
- */
-static void check_hung_uninterruptible_tasks(int this_cpu)
-{
- int max_count = sysctl_hung_task_check_count;
- unsigned long now = get_timestamp(this_cpu);
- struct task_struct *g, *t;
-
- /*
- * If the system crashed already then all bets are off,
- * do not report extra hung tasks:
- */
- if (test_taint(TAINT_DIE) || did_panic)
- return;
-
- read_lock(&tasklist_lock);
- do_each_thread(g, t) {
- if (!--max_count)
- goto unlock;
- /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
- if (t->state == TASK_UNINTERRUPTIBLE)
- check_hung_task(t, now);
- } while_each_thread(g, t);
- unlock:
- read_unlock(&tasklist_lock);
-}
-
-/*
* The watchdog thread - runs every second and touches the timestamp.
*/
static int watchdog(void *__bind_cpu)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
- int this_cpu = (long)__bind_cpu;
sched_setscheduler(current, SCHED_FIFO, &param);
@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu)
if (kthread_should_stop())
break;
- if (this_cpu == check_cpu) {
- if (sysctl_hung_task_timeout_secs)
- check_hung_uninterruptible_tasks(this_cpu);
- }
-
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
- check_cpu = cpumask_any(cpu_online_mask);
wake_up_process(per_cpu(watchdog_task, hotcpu));
break;
#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- if (hotcpu == check_cpu) {
- /* Pick any other online cpu. */
- check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
- }
- break;
-
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
if (!per_cpu(watchdog_task, hotcpu))
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b125e338756..4286b62b34a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -101,6 +101,7 @@ static int __maybe_unused one = 1;
static int __maybe_unused two = 2;
static unsigned long one_ul = 1;
static int one_hundred = 100;
+static int one_thousand = 1000;
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
static int maxolduid = 65535;
@@ -813,6 +814,19 @@ static struct ctl_table kern_table[] = {
.extra1 = &neg_one,
.extra2 = &sixty,
},
+#endif
+#ifdef CONFIG_DETECT_HUNG_TASK
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "hung_task_panic",
+ .data = &sysctl_hung_task_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{
.ctl_name = CTL_UNNUMBERED,
.procname = "hung_task_check_count",
@@ -828,7 +842,7 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_hung_task_timeout_secs,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
+ .proc_handler = &proc_dohung_task_timeout_secs,
.strategy = &sysctl_intvec,
},
{
@@ -1027,6 +1041,28 @@ static struct ctl_table vm_table[] = {
.proc_handler = &proc_dointvec,
},
{
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nr_pdflush_threads_min",
+ .data = &nr_pdflush_threads_min,
+ .maxlen = sizeof nr_pdflush_threads_min,
+ .mode = 0644 /* read-write */,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &one,
+ .extra2 = &nr_pdflush_threads_max,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nr_pdflush_threads_max",
+ .data = &nr_pdflush_threads_max,
+ .maxlen = sizeof nr_pdflush_threads_max,
+ .mode = 0644 /* read-write */,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &nr_pdflush_threads_min,
+ .extra2 = &one_thousand,
+ },
+ {
.ctl_name = VM_SWAPPINESS,
.procname = "swappiness",
.data = &vm_swappiness,
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 947c5b3f90c..b32ff446c3f 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -327,10 +327,10 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
char *msg;
struct blk_trace *bt;
- if (count > BLK_TN_MAX_MSG)
+ if (count >= BLK_TN_MAX_MSG)
return -EINVAL;
- msg = kmalloc(count, GFP_KERNEL);
+ msg = kmalloc(count + 1, GFP_KERNEL);
if (msg == NULL)
return -ENOMEM;
@@ -339,6 +339,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
return -EFAULT;
}
+ msg[count] = '\0';
bt = filp->private_data;
__trace_note_message(bt, "%s", msg);
kfree(msg);
@@ -642,7 +643,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
if (blk_pc_request(rq)) {
what |= BLK_TC_ACT(BLK_TC_PC);
__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
- sizeof(rq->cmd), rq->cmd);
+ rq->cmd_len, rq->cmd);
} else {
what |= BLK_TC_ACT(BLK_TC_FS);
__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a0174a40c56..9d28476a985 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -30,6 +30,7 @@
#include <linux/percpu.h>
#include <linux/splice.h>
#include <linux/kdebug.h>
+#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/poll.h>
@@ -147,8 +148,7 @@ static int __init set_ftrace_dump_on_oops(char *str)
}
__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
-long
-ns2usecs(cycle_t nsec)
+unsigned long long ns2usecs(cycle_t nsec)
{
nsec += 500;
do_div(nsec, 1000);
@@ -1632,7 +1632,11 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
return;
cpumask_set_cpu(iter->cpu, iter->started);
- trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
+
+ /* Don't print started cpu buffer for the first entry of the trace */
+ if (iter->idx > 1)
+ trace_seq_printf(s, "##### CPU %u buffer started ####\n",
+ iter->cpu);
}
static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
@@ -1867,6 +1871,11 @@ __tracing_open(struct inode *inode, struct file *file)
if (current_trace)
*iter->trace = *current_trace;
+ if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
+ goto fail;
+
+ cpumask_clear(iter->started);
+
if (current_trace && current_trace->print_max)
iter->tr = &max_tr;
else
@@ -1917,6 +1926,7 @@ __tracing_open(struct inode *inode, struct file *file)
if (iter->buffer_iter[cpu])
ring_buffer_read_finish(iter->buffer_iter[cpu]);
}
+ free_cpumask_var(iter->started);
fail:
mutex_unlock(&trace_types_lock);
kfree(iter->trace);
@@ -1960,6 +1970,7 @@ static int tracing_release(struct inode *inode, struct file *file)
seq_release(inode, file);
mutex_destroy(&iter->mutex);
+ free_cpumask_var(iter->started);
kfree(iter->trace);
kfree(iter);
return 0;
@@ -2358,9 +2369,9 @@ static const char readme_msg[] =
"# mkdir /debug\n"
"# mount -t debugfs nodev /debug\n\n"
"# cat /debug/tracing/available_tracers\n"
- "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
+ "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
"# cat /debug/tracing/current_tracer\n"
- "none\n"
+ "nop\n"
"# echo sched_switch > /debug/tracing/current_tracer\n"
"# cat /debug/tracing/current_tracer\n"
"sched_switch\n"
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cbc168f1e43..e685ac2b2ba 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -602,7 +602,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
-extern long ns2usecs(cycle_t nsec);
+extern unsigned long long ns2usecs(cycle_t nsec);
extern int
trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
extern int
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4d9952d3df5..07a22c33ebf 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -40,7 +40,7 @@
#undef TRACE_FIELD_ZERO_CHAR
#define TRACE_FIELD_ZERO_CHAR(item) \
- ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \
+ ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \
"offset:%u;\tsize:0;\n", \
(unsigned int)offsetof(typeof(field), item)); \
if (!ret) \
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index d72b9a63b24..64b54a59c55 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -423,7 +423,7 @@ int trace_print_lat_context(struct trace_iterator *iter)
trace_find_cmdline(entry->pid, comm);
- ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
+ ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]"
" %ld.%03ldms (+%ld.%03ldms): ", comm,
entry->pid, iter->cpu, entry->flags,
entry->preempt_count, iter->idx,
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index de35f200abd..9117cea6f1a 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -62,6 +62,9 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
pc = preempt_count();
tracing_record_cmdline(current);
+ if (sched_stopped)
+ return;
+
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = ctx_trace->data[cpu];
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3c5ad6b2ec8..5bc00e8f153 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -154,7 +154,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
if (unlikely(!tracer_enabled || next != wakeup_task))
goto out_unlock;
- trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
+ trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
/*
@@ -257,6 +257,12 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
data = wakeup_trace->data[wakeup_cpu];
data->preempt_timestamp = ftrace_now(cpu);
tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
+
+ /*
+ * We must be careful in using CALLER_ADDR2. But since wake_up
+ * is not called by an assembly function (where as schedule is)
+ * it should be safe to use it here.
+ */
trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
out_locked: