diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-01-25 13:42:32 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-01-25 13:42:32 -0800 |
commit | 0008bf54408d4c0637c24d34642f1038c299be95 (patch) | |
tree | 6a14cdacaa3057795381672339737a45e45effc7 /include/linux | |
parent | 2d94dfc8c38edf63e91e48fd55c3a8822b6a9ced (diff) | |
parent | 6d082592b62689fb91578d0338d04a9f50991990 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched
* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched: (96 commits)
sched: keep total / count stats in addition to the max for
sched, futex: detach sched.h and futex.h
sched: fix: don't take a mutex from interrupt context
sched: print backtrace of running tasks too
printk: use ktime_get()
softlockup: fix signedness
sched: latencytop support
sched: fix goto retry in pick_next_task_rt()
timers: don't #error on higher HZ values
sched: monitor clock underflows in /proc/sched_debug
sched: fix rq->clock warps on frequency changes
sched: fix, always create kernel threads with normal priority
debug: clean up kernel/profile.c
sched: remove the !PREEMPT_BKL code
sched: make PREEMPT_BKL the default
debug: track and print last unloaded module in the oops trace
debug: show being-loaded/being-unloaded indicator for modules
sched: rt-watchdog: fix .rlim_max = RLIM_INFINITY
sched: rt-group: reduce rescheduling
hrtimer: unlock hrtimer_wakeup
...
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/cpu.h | 17 | ||||
-rw-r--r-- | include/linux/debug_locks.h | 5 | ||||
-rw-r--r-- | include/linux/futex.h | 6 | ||||
-rw-r--r-- | include/linux/hardirq.h | 6 | ||||
-rw-r--r-- | include/linux/hrtimer.h | 14 | ||||
-rw-r--r-- | include/linux/init_task.h | 7 | ||||
-rw-r--r-- | include/linux/interrupt.h | 1 | ||||
-rw-r--r-- | include/linux/jiffies.h | 6 | ||||
-rw-r--r-- | include/linux/kernel.h | 4 | ||||
-rw-r--r-- | include/linux/latencytop.h | 44 | ||||
-rw-r--r-- | include/linux/notifier.h | 4 | ||||
-rw-r--r-- | include/linux/rcuclassic.h | 164 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 173 | ||||
-rw-r--r-- | include/linux/rcupreempt.h | 86 | ||||
-rw-r--r-- | include/linux/rcupreempt_trace.h | 99 | ||||
-rw-r--r-- | include/linux/sched.h | 83 | ||||
-rw-r--r-- | include/linux/smp_lock.h | 14 | ||||
-rw-r--r-- | include/linux/stacktrace.h | 3 | ||||
-rw-r--r-- | include/linux/topology.h | 5 |
19 files changed, 581 insertions, 160 deletions
diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 92f2029a34f..0be8d65bc3c 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -71,18 +71,27 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) int cpu_up(unsigned int cpu); +extern void cpu_hotplug_init(void); + #else static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; } + static inline void unregister_cpu_notifier(struct notifier_block *nb) { } +static inline void cpu_hotplug_init(void) +{ +} + #endif /* CONFIG_SMP */ extern struct sysdev_class cpu_sysdev_class; +extern void cpu_maps_update_begin(void); +extern void cpu_maps_update_done(void); #ifdef CONFIG_HOTPLUG_CPU /* Stop CPUs going up and down. */ @@ -97,8 +106,8 @@ static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex) mutex_unlock(cpu_hp_mutex); } -extern void lock_cpu_hotplug(void); -extern void unlock_cpu_hotplug(void); +extern void get_online_cpus(void); +extern void put_online_cpus(void); #define hotcpu_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ { .notifier_call = fn, .priority = pri }; \ @@ -115,8 +124,8 @@ static inline void cpuhotplug_mutex_lock(struct mutex *cpu_hp_mutex) static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex) { } -#define lock_cpu_hotplug() do { } while (0) -#define unlock_cpu_hotplug() do { } while (0) +#define get_online_cpus() do { } while (0) +#define put_online_cpus() do { } while (0) #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) /* These aren't inline functions due to a GCC bug. */ #define register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 1678a5de701..f4a5871767f 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -47,6 +47,7 @@ struct task_struct; #ifdef CONFIG_LOCKDEP extern void debug_show_all_locks(void); +extern void __debug_show_held_locks(struct task_struct *task); extern void debug_show_held_locks(struct task_struct *task); extern void debug_check_no_locks_freed(const void *from, unsigned long len); extern void debug_check_no_locks_held(struct task_struct *task); @@ -55,6 +56,10 @@ static inline void debug_show_all_locks(void) { } +static inline void __debug_show_held_locks(struct task_struct *task) +{ +} + static inline void debug_show_held_locks(struct task_struct *task) { } diff --git a/include/linux/futex.h b/include/linux/futex.h index 92d420fe03f..1a15f8e237a 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -1,8 +1,12 @@ #ifndef _LINUX_FUTEX_H #define _LINUX_FUTEX_H -#include <linux/sched.h> +#include <linux/compiler.h> +#include <linux/types.h> +struct inode; +struct mm_struct; +struct task_struct; union ktime; /* Second argument to futex syscall */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 8d302298a16..2961ec78804 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -72,11 +72,7 @@ #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) -#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL) -# define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != kernel_locked()) -#else -# define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) -#endif +#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) #ifdef CONFIG_PREEMPT # define PREEMPT_CHECK_OFFSET 1 diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 7a9398e1970..49067f14fac 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -115,10 +115,8 @@ struct hrtimer { enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; unsigned long state; -#ifdef CONFIG_HIGH_RES_TIMERS enum hrtimer_cb_mode cb_mode; struct list_head cb_entry; -#endif #ifdef CONFIG_TIMER_STATS void *start_site; char start_comm[16]; @@ -194,10 +192,10 @@ struct hrtimer_cpu_base { spinlock_t lock; struct lock_class_key lock_key; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; + struct list_head cb_pending; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; - struct list_head cb_pending; unsigned long nr_events; #endif }; @@ -217,6 +215,11 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) return timer->base->get_time(); } +static inline int hrtimer_is_hres_active(struct hrtimer *timer) +{ + return timer->base->cpu_base->hres_active; +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an @@ -248,6 +251,10 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) return timer->base->softirq_time; } +static inline int hrtimer_is_hres_active(struct hrtimer *timer) +{ + return 0; +} #endif extern ktime_t ktime_get(void); @@ -310,6 +317,7 @@ extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, /* Soft interrupt function to run the hrtimer queues: */ extern void hrtimer_run_queues(void); +extern void hrtimer_run_pending(void); /* Bootup initialization: */ extern void __init hrtimers_init(void); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index cae35b6b9ae..796019b22b6 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -132,9 +132,12 @@ extern struct group_info init_groups; .cpus_allowed = CPU_MASK_ALL, \ .mm = NULL, \ .active_mm = &init_mm, \ - .run_list = LIST_HEAD_INIT(tsk.run_list), \ + .rt = { \ + .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ + .time_slice = HZ, \ + .nr_cpus_allowed = NR_CPUS, \ + }, \ .ioprio = 0, \ - .time_slice = HZ, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 2306920fa38..c3db4a00f1f 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -256,6 +256,7 @@ enum #ifdef CONFIG_HIGH_RES_TIMERS HRTIMER_SOFTIRQ, #endif + RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ }; /* softirq mask and active fields moved to irq_cpustat_t in diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 8b080024bbc..7ba9e47bf06 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -29,6 +29,12 @@ # define SHIFT_HZ 9 #elif HZ >= 768 && HZ < 1536 # define SHIFT_HZ 10 +#elif HZ >= 1536 && HZ < 3072 +# define SHIFT_HZ 11 +#elif HZ >= 3072 && HZ < 6144 +# define SHIFT_HZ 12 +#elif HZ >= 6144 && HZ < 12288 +# define SHIFT_HZ 13 #else # error You lose. #endif diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 94bc9965696..a7283c9bead 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -105,8 +105,8 @@ struct user; * supposed to. */ #ifdef CONFIG_PREEMPT_VOLUNTARY -extern int cond_resched(void); -# define might_resched() cond_resched() +extern int _cond_resched(void); +# define might_resched() _cond_resched() #else # define might_resched() do { } while (0) #endif diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h new file mode 100644 index 00000000000..901c2d6377a --- /dev/null +++ b/include/linux/latencytop.h @@ -0,0 +1,44 @@ +/* + * latencytop.h: Infrastructure for displaying latency + * + * (C) Copyright 2008 Intel Corporation + * Author: Arjan van de Ven <arjan@linux.intel.com> + * + */ + +#ifndef _INCLUDE_GUARD_LATENCYTOP_H_ +#define _INCLUDE_GUARD_LATENCYTOP_H_ + +#ifdef CONFIG_LATENCYTOP + +#define LT_SAVECOUNT 32 +#define LT_BACKTRACEDEPTH 12 + +struct latency_record { + unsigned long backtrace[LT_BACKTRACEDEPTH]; + unsigned int count; + unsigned long time; + unsigned long max; +}; + + +struct task_struct; + +void account_scheduler_latency(struct task_struct *task, int usecs, int inter); + +void clear_all_latency_tracing(struct task_struct *p); + +#else + +static inline void +account_scheduler_latency(struct task_struct *task, int usecs, int inter) +{ +} + +static inline void clear_all_latency_tracing(struct task_struct *p) +{ +} + +#endif + +#endif diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 0c40cc0b4a3..5dfbc684ce7 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -207,9 +207,7 @@ static inline int notifier_to_errno(int ret) #define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */ #define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ -#define CPU_LOCK_ACQUIRE 0x0008 /* Acquire all hotcpu locks */ -#define CPU_LOCK_RELEASE 0x0009 /* Release all hotcpu locks */ -#define CPU_DYING 0x000A /* CPU (unsigned)v not running any task, +#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task, * not handling interrupts, soon dead */ /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h new file mode 100644 index 00000000000..4d6624260b4 --- /dev/null +++ b/include/linux/rcuclassic.h @@ -0,0 +1,164 @@ +/* + * Read-Copy Update mechanism for mutual exclusion (classic version) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2001 + * + * Author: Dipankar Sarma <dipankar@in.ibm.com> + * + * Based on the original work by Paul McKenney <paulmck@us.ibm.com> + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * Papers: + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + * + */ + +#ifndef __LINUX_RCUCLASSIC_H +#define __LINUX_RCUCLASSIC_H + +#ifdef __KERNEL__ + +#include <linux/cache.h> +#include <linux/spinlock.h> +#include <linux/threads.h> +#include <linux/percpu.h> +#include <linux/cpumask.h> +#include <linux/seqlock.h> + + +/* Global control variables for rcupdate callback mechanism. */ +struct rcu_ctrlblk { + long cur; /* Current batch number. */ + long completed; /* Number of the last completed batch */ + int next_pending; /* Is the next batch already waiting? */ + + int signaled; + + spinlock_t lock ____cacheline_internodealigned_in_smp; + cpumask_t cpumask; /* CPUs that need to switch in order */ + /* for current batch to proceed. */ +} ____cacheline_internodealigned_in_smp; + +/* Is batch a before batch b ? */ +static inline int rcu_batch_before(long a, long b) +{ + return (a - b) < 0; +} + +/* Is batch a after batch b ? */ +static inline int rcu_batch_after(long a, long b) +{ + return (a - b) > 0; +} + +/* + * Per-CPU data for Read-Copy UPdate. + * nxtlist - new callbacks are added here + * curlist - current batch for which quiescent cycle started if any + */ +struct rcu_data { + /* 1) quiescent state handling : */ + long quiescbatch; /* Batch # for grace period */ + int passed_quiesc; /* User-mode/idle loop etc. */ + int qs_pending; /* core waits for quiesc state */ + + /* 2) batch handling */ + long batch; /* Batch # for current RCU batch */ + struct rcu_head *nxtlist; + struct rcu_head **nxttail; + long qlen; /* # of queued callbacks */ + struct rcu_head *curlist; + struct rcu_head **curtail; + struct rcu_head *donelist; + struct rcu_head **donetail; + long blimit; /* Upper limit on a processed batch */ + int cpu; + struct rcu_head barrier; +}; + +DECLARE_PER_CPU(struct rcu_data, rcu_data); +DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); + +/* + * Increment the quiescent state counter. + * The counter is a bit degenerated: We do not need to know + * how many quiescent states passed, just if there was at least + * one since the start of the grace period. Thus just a flag. + */ +static inline void rcu_qsctr_inc(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + rdp->passed_quiesc = 1; +} +static inline void rcu_bh_qsctr_inc(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); + rdp->passed_quiesc = 1; +} + +extern int rcu_pending(int cpu); +extern int rcu_needs_cpu(int cpu); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern struct lockdep_map rcu_lock_map; +# define rcu_read_acquire() \ + lock_acquire(&rcu_lock_map, 0, 0, 2, 1, _THIS_IP_) +# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) +#else +# define rcu_read_acquire() do { } while (0) +# define rcu_read_release() do { } while (0) +#endif + +#define __rcu_read_lock() \ + do { \ + preempt_disable(); \ + __acquire(RCU); \ + rcu_read_acquire(); \ + } while (0) +#define __rcu_read_unlock() \ + do { \ + rcu_read_release(); \ + __release(RCU); \ + preempt_enable(); \ + } while (0) +#define __rcu_read_lock_bh() \ + do { \ + local_bh_disable(); \ + __acquire(RCU_BH); \ + rcu_read_acquire(); \ + } while (0) +#define __rcu_read_unlock_bh() \ + do { \ + rcu_read_release(); \ + __release(RCU_BH); \ + local_bh_enable(); \ + } while (0) + +#define __synchronize_sched() synchronize_rcu() + +extern void __rcu_init(void); +extern void rcu_check_callbacks(int cpu, int user); +extern void rcu_restart_cpu(int cpu); + +extern long rcu_batches_completed(void); +extern long rcu_batches_completed_bh(void); + +#endif /* __KERNEL__ */ +#endif /* __LINUX_RCUCLASSIC_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index cc24a01df94..d32c14de270 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -15,7 +15,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * Copyright (C) IBM Corporation, 2001 + * Copyright IBM Corporation, 2001 * * Author: Dipankar Sarma <dipankar@in.ibm.com> * @@ -53,96 +53,18 @@ struct rcu_head { void (*func)(struct rcu_head *head); }; +#ifdef CONFIG_CLASSIC_RCU +#include <linux/rcuclassic.h> +#else /* #ifdef CONFIG_CLASSIC_RCU */ +#include <linux/rcupreempt.h> +#endif /* #else #ifdef CONFIG_CLASSIC_RCU */ + #define RCU_HEAD_INIT { .next = NULL, .func = NULL } #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT #define INIT_RCU_HEAD(ptr) do { \ (ptr)->next = NULL; (ptr)->func = NULL; \ } while (0) - - -/* Global control variables for rcupdate callback mechanism. */ -struct rcu_ctrlblk { - long cur; /* Current batch number. */ - long completed; /* Number of the last completed batch */ - int next_pending; /* Is the next batch already waiting? */ - - int signaled; - - spinlock_t lock ____cacheline_internodealigned_in_smp; - cpumask_t cpumask; /* CPUs that need to switch in order */ - /* for current batch to proceed. */ -} ____cacheline_internodealigned_in_smp; - -/* Is batch a before batch b ? */ -static inline int rcu_batch_before(long a, long b) -{ - return (a - b) < 0; -} - -/* Is batch a after batch b ? */ -static inline int rcu_batch_after(long a, long b) -{ - return (a - b) > 0; -} - -/* - * Per-CPU data for Read-Copy UPdate. - * nxtlist - new callbacks are added here - * curlist - current batch for which quiescent cycle started if any - */ -struct rcu_data { - /* 1) quiescent state handling : */ - long quiescbatch; /* Batch # for grace period */ - int passed_quiesc; /* User-mode/idle loop etc. */ - int qs_pending; /* core waits for quiesc state */ - - /* 2) batch handling */ - long batch; /* Batch # for current RCU batch */ - struct rcu_head *nxtlist; - struct rcu_head **nxttail; - long qlen; /* # of queued callbacks */ - struct rcu_head *curlist; - struct rcu_head **curtail; - struct rcu_head *donelist; - struct rcu_head **donetail; - long blimit; /* Upper limit on a processed batch */ - int cpu; - struct rcu_head barrier; -}; - -DECLARE_PER_CPU(struct rcu_data, rcu_data); -DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); - -/* - * Increment the quiescent state counter. - * The counter is a bit degenerated: We do not need to know - * how many quiescent states passed, just if there was at least - * one since the start of the grace period. Thus just a flag. - */ -static inline void rcu_qsctr_inc(int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); - rdp->passed_quiesc = 1; -} -static inline void rcu_bh_qsctr_inc(int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); - rdp->passed_quiesc = 1; -} - -extern int rcu_pending(int cpu); -extern int rcu_needs_cpu(int cpu); - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -extern struct lockdep_map rcu_lock_map; -# define rcu_read_acquire() lock_acquire(&rcu_lock_map, 0, 0, 2, 1, _THIS_IP_) -# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) -#else -# define rcu_read_acquire() do { } while (0) -# define rcu_read_release() do { } while (0) -#endif - /** * rcu_read_lock - mark the beginning of an RCU read-side critical section. * @@ -172,24 +94,13 @@ extern struct lockdep_map rcu_lock_map; * * It is illegal to block while in an RCU read-side critical section. */ -#define rcu_read_lock() \ - do { \ - preempt_disable(); \ - __acquire(RCU); \ - rcu_read_acquire(); \ - } while(0) +#define rcu_read_lock() __rcu_read_lock() /** * rcu_read_unlock - marks the end of an RCU read-side critical section. * * See rcu_read_lock() for more information. */ -#define rcu_read_unlock() \ - do { \ - rcu_read_release(); \ - __release(RCU); \ - preempt_enable(); \ - } while(0) /* * So where is rcu_write_lock()? It does not exist, as there is no @@ -200,6 +111,7 @@ extern struct lockdep_map rcu_lock_map; * used as well. RCU does not care how the writers keep out of each * others' way, as long as they do so. */ +#define rcu_read_unlock() __rcu_read_unlock() /** * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section @@ -212,24 +124,14 @@ extern struct lockdep_map rcu_lock_map; * can use just rcu_read_lock(). * */ -#define rcu_read_lock_bh() \ - do { \ - local_bh_disable(); \ - __acquire(RCU_BH); \ - rcu_read_acquire(); \ - } while(0) +#define rcu_read_lock_bh() __rcu_read_lock_bh() /* * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section * * See rcu_read_lock_bh() for more information. */ -#define rcu_read_unlock_bh() \ - do { \ - rcu_read_release(); \ - __release(RCU_BH); \ - local_bh_enable(); \ - } while(0) +#define rcu_read_unlock_bh() __rcu_read_unlock_bh() /* * Prevent the compiler from merging or refetching accesses. The compiler @@ -293,21 +195,52 @@ extern struct lockdep_map rcu_lock_map; * In "classic RCU", these two guarantees happen to be one and * the same, but can differ in realtime RCU implementations. */ -#define synchronize_sched() synchronize_rcu() +#define synchronize_sched() __synchronize_sched() -extern void rcu_init(void); -extern void rcu_check_callbacks(int cpu, int user); -extern void rcu_restart_cpu(int cpu); -extern long rcu_batches_completed(void); -extern long rcu_batches_completed_bh(void); +/** + * call_rcu - Queue an RCU callback for invocation after a grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * + * The update function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + */ +extern void call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *head)); -/* Exported interfaces */ -extern void FASTCALL(call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *head))); -extern void FASTCALL(call_rcu_bh(struct rcu_head *head, - void (*func)(struct rcu_head *head))); +/** + * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * + * The update function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. call_rcu_bh() assumes + * that the read-side critical sections end on completion of a softirq + * handler. This means that read-side critical sections in process + * context must not be interrupted by softirqs. This interface is to be + * used when most of the read-side critical sections are in softirq context. + * RCU read-side critical sections are delimited by : + * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. + * OR + * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. + * These may be nested. + */ +extern void call_rcu_bh(struct rcu_head *head, + void (*func)(struct rcu_head *head)); + +/* Exported common interfaces */ extern void synchronize_rcu(void); extern void rcu_barrier(void); +extern long rcu_batches_completed(void); +extern long rcu_batches_completed_bh(void); + +/* Internal to kernel */ +extern void rcu_init(void); +extern int rcu_needs_cpu(int cpu); #endif /* __KERNEL__ */ #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h new file mode 100644 index 00000000000..ece8eb3e415 --- /dev/null +++ b/include/linux/rcupreempt.h @@ -0,0 +1,86 @@ +/* + * Read-Copy Update mechanism for mutual exclusion (RT implementation) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2006 + * + * Author: Paul McKenney <paulmck@us.ibm.com> + * + * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com> + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * Papers: + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + * + */ + +#ifndef __LINUX_RCUPREEMPT_H +#define __LINUX_RCUPREEMPT_H + +#ifdef __KERNEL__ + +#include <linux/cache.h> +#include <linux/spinlock.h> +#include <linux/threads.h> +#include <linux/percpu.h> +#include <linux/cpumask.h> +#include <linux/seqlock.h> + +#define rcu_qsctr_inc(cpu) +#define rcu_bh_qsctr_inc(cpu) +#define call_rcu_bh(head, rcu) call_rcu(head, rcu) + +extern void __rcu_read_lock(void); +extern void __rcu_read_unlock(void); +extern int rcu_pending(int cpu); +extern int rcu_needs_cpu(int cpu); + +#define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } +#define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } + +extern void __synchronize_sched(void); + +extern void __rcu_init(void); +extern void rcu_check_callbacks(int cpu, int user); +extern void rcu_restart_cpu(int cpu); +extern long rcu_batches_completed(void); + +/* + * Return the number of RCU batches processed thus far. Useful for debug + * and statistic. The _bh variant is identifcal to straight RCU + */ +static inline long rcu_batches_completed_bh(void) +{ + return rcu_batches_completed(); +} + +#ifdef CONFIG_RCU_TRACE +struct rcupreempt_trace; +extern long *rcupreempt_flipctr(int cpu); +extern long rcupreempt_data_completed(void); +extern int rcupreempt_flip_flag(int cpu); +extern int rcupreempt_mb_flag(int cpu); +extern char *rcupreempt_try_flip_state_name(void); +extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu); +#endif + +struct softirq_action; + +#endif /* __KERNEL__ */ +#endif /* __LINUX_RCUPREEMPT_H */ diff --git a/include/linux/rcupreempt_trace.h b/include/linux/rcupreempt_trace.h new file mode 100644 index 00000000000..21cd6b2a5c4 --- /dev/null +++ b/include/linux/rcupreempt_trace.h @@ -0,0 +1,99 @@ +/* + * Read-Copy Update mechanism for mutual exclusion (RT implementation) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2006 + * + * Author: Paul McKenney <paulmck@us.ibm.com> + * + * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com> + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * Papers: + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) + * + * For detailed explanation of the Preemptible Read-Copy Update mechanism see - + * http://lwn.net/Articles/253651/ + */ + +#ifndef __LINUX_RCUPREEMPT_TRACE_H +#define __LINUX_RCUPREEMPT_TRACE_H + +#ifdef __KERNEL__ +#include <linux/types.h> +#include <linux/kernel.h> + +#include <asm/atomic.h> + +/* + * PREEMPT_RCU data structures. + */ + +struct rcupreempt_trace { + long next_length; + long next_add; + long wait_length; + long wait_add; + long done_length; + long done_add; + long done_remove; + atomic_t done_invoked; + long rcu_check_callbacks; + atomic_t rcu_try_flip_1; + atomic_t rcu_try_flip_e1; + long rcu_try_flip_i1; + long rcu_try_flip_ie1; + long rcu_try_flip_g1; + long rcu_try_flip_a1; + long rcu_try_flip_ae1; + long rcu_try_flip_a2; + long rcu_try_flip_z1; + long rcu_try_flip_ze1; + long rcu_try_flip_z2; + long rcu_try_flip_m1; + long rcu_try_flip_me1; + long rcu_try_flip_m2; +}; + +#ifdef CONFIG_RCU_TRACE +#define RCU_TRACE(fn, arg) fn(arg); +#else +#define RCU_TRACE(fn, arg) +#endif + +extern void rcupreempt_trace_move2done(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_invoke(struct rcupreempt_trace *trace); +extern void rcupreempt_trace_next_add(struct rcupreempt_trace *trace); + +#endif /* __KERNEL__ */ +#endif /* __LINUX_RCUPREEMPT_TRACE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d6eacda765c..df5b24ee80b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -78,7 +78,6 @@ struct sched_param { #include <linux/proportions.h> #include <linux/seccomp.h> #include <linux/rcupdate.h> -#include <linux/futex.h> #include <linux/rtmutex.h> #include <linux/time.h> @@ -88,11 +87,13 @@ struct sched_param { #include <linux/hrtimer.h> #include <linux/task_io_accounting.h> #include <linux/kobject.h> +#include <linux/latencytop.h> #include <asm/processor.h> struct exec_domain; struct futex_pi_state; +struct robust_list_head; struct bio; /* @@ -230,6 +231,8 @@ static inline int select_nohz_load_balancer(int cpu) } #endif +extern unsigned long rt_needs_cpu(int cpu); + /* * Only dump TASK_* tasks. (0 for all tasks) */ @@ -257,13 +260,19 @@ extern void trap_init(void); extern void account_process_tick(struct task_struct *task, int user); extern void update_process_times(int user); extern void scheduler_tick(void); +extern void hrtick_resched(void); + +extern void sched_show_task(struct task_struct *p); #ifdef CONFIG_DETECT_SOFTLOCKUP extern void softlockup_tick(void); extern void spawn_softlockup_task(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); -extern int softlockup_thresh; +extern unsigned long softlockup_thresh; +extern unsigned long sysctl_hung_task_check_count; +extern unsigned long sysctl_hung_task_timeout_secs; +extern unsigned long sysctl_hung_task_warnings; #else static inline void softlockup_tick(void) { @@ -822,6 +831,7 @@ struct sched_class { void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); void (*yield_task) (struct rq *rq); + int (*select_task_rq)(struct task_struct *p, int sync); void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); @@ -837,11 +847,25 @@ struct sched_class { int (*move_one_task) (struct rq *this_rq, int this_cpu, struct rq *busiest, struct sched_domain *sd, enum cpu_idle_type idle); + void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); + void (*post_schedule) (struct rq *this_rq); + void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); #endif void (*set_curr_task) (struct rq *rq); - void (*task_tick) (struct rq *rq, struct task_struct *p); + void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); void (*task_new) (struct rq *rq, struct task_struct *p); + void (*set_cpus_allowed)(struct task_struct *p, cpumask_t *newmask); + + void (*join_domain)(struct rq *rq); + void (*leave_domain)(struct rq *rq); + + void (*switched_from) (struct rq *this_rq, struct task_struct *task, + int running); + void (*switched_to) (struct rq *this_rq, struct task_struct *task, + int running); + void (*prio_changed) (struct rq *this_rq, struct task_struct *task, + int oldprio, int running); }; struct load_weight { @@ -871,6 +895,8 @@ struct sched_entity { #ifdef CONFIG_SCHEDSTATS u64 wait_start; u64 wait_max; + u64 wait_count; + u64 wait_sum; u64 sleep_start; u64 sleep_max; @@ -909,6 +935,21 @@ struct sched_entity { #endif }; +struct sched_rt_entity { + struct list_head run_list; + unsigned int time_slice; + unsigned long timeout; + int nr_cpus_allowed; + +#ifdef CONFIG_FAIR_GROUP_SCHED + struct sched_rt_entity *parent; + /* rq on which this entity is (to be) queued: */ + struct rt_rq *rt_rq; + /* rq "owned" by this entity/group: */ + struct rt_rq *my_q; +#endif +}; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -925,9 +966,9 @@ struct task_struct { #endif int prio, static_prio, normal_prio; - struct list_head run_list; const struct sched_class *sched_class; struct sched_entity se; + struct sched_rt_entity rt; #ifdef CONFIG_PREEMPT_NOTIFIERS /* list of struct preempt_notifier: */ @@ -951,7 +992,11 @@ struct task_struct { unsigned int policy; cpumask_t cpus_allowed; - unsigned int time_slice; + +#ifdef CONFIG_PREEMPT_RCU + int rcu_read_lock_nesting; + int rcu_flipctr_idx; +#endif /* #ifdef CONFIG_PREEMPT_RCU */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info sched_info; @@ -1041,6 +1086,11 @@ struct task_struct { /* ipc stuff */ struct sysv_sem sysvsem; #endif +#ifdef CONFIG_DETECT_SOFTLOCKUP +/* hung task detection */ + unsigned long last_switch_timestamp; + unsigned long last_switch_count; +#endif /* CPU-specific state of this task */ struct thread_struct thread; /* filesystem information */ @@ -1173,6 +1223,10 @@ struct task_struct { int make_it_fail; #endif struct prop_local_single dirties; +#ifdef CONFIG_LATENCYTOP + int latency_record_count; + struct latency_record latency_record[LT_SAVECOUNT]; +#endif }; /* @@ -1453,6 +1507,12 @@ extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; +extern unsigned int sysctl_sched_rt_period; +extern unsigned int sysctl_sched_rt_ratio; +#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) +extern unsigned int sysctl_sched_min_bal_int_shares; +extern unsigned int sysctl_sched_max_bal_int_shares; +#endif int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, @@ -1845,7 +1905,18 @@ static inline int need_resched(void) * cond_resched_lock() will drop the spinlock before scheduling, * cond_resched_softirq() will enable bhs before scheduling. */ -extern int cond_resched(void); +#ifdef CONFIG_PREEMPT +static inline int cond_resched(void) +{ + return 0; +} +#else +extern int _cond_resched(void); +static inline int cond_resched(void) +{ + return _cond_resched(); +} +#endif extern int cond_resched_lock(spinlock_t * lock); extern int cond_resched_softirq(void); diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index 58962c51dee..aab3a4cff4e 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -17,22 +17,10 @@ extern void __lockfunc __release_kernel_lock(void); __release_kernel_lock(); \ } while (0) -/* - * Non-SMP kernels will never block on the kernel lock, - * so we are better off returning a constant zero from - * reacquire_kernel_lock() so that the compiler can see - * it at compile-time. - */ -#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_BKL) -# define return_value_on_smp return -#else -# define return_value_on_smp -#endif - static inline int reacquire_kernel_lock(struct task_struct *task) { if (unlikely(task->lock_depth >= 0)) - return_value_on_smp __reacquire_kernel_lock(); + return __reacquire_kernel_lock(); return 0; } diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index e7fa657d0c4..5da9794b2d7 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -9,10 +9,13 @@ struct stack_trace { }; extern void save_stack_trace(struct stack_trace *trace); +extern void save_stack_trace_tsk(struct task_struct *tsk, + struct stack_trace *trace); extern void print_stack_trace(struct stack_trace *trace, int spaces); #else # define save_stack_trace(trace) do { } while (0) +# define save_stack_trace_tsk(tsk, trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) #endif diff --git a/include/linux/topology.h b/include/linux/topology.h index 47729f18bfd..2352f46160d 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -5,7 +5,7 @@ * * Copyright (C) 2002, IBM Corp. * - * All rights reserved. + * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -103,6 +103,7 @@ .forkexec_idx = 0, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ + | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE \ | SD_WAKE_IDLE \ @@ -134,6 +135,7 @@ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ + | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE \ | SD_WAKE_IDLE \ @@ -165,6 +167,7 @@ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ + | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE \ | BALANCE_FOR_PKG_POWER,\ |