From 554ec22f075d46e4363520a407d2b7eeb5dfdd43 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 12 May 2008 21:21:03 +0200
Subject: namespacecheck: more sched.c fixes

[ Stephen Rothwell <sfr@canb.auug.org.au>: build fix ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ae0be3c6237..dc36c3aea01 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -134,7 +134,6 @@ extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
-extern unsigned long weighted_cpuload(const int cpu);
 
 struct seq_file;
 struct cfs_rq;
@@ -823,23 +822,6 @@ extern int arch_reinit_sched_domains(void);
 
 #endif	/* CONFIG_SMP */
 
-/*
- * A runqueue laden with a single nice 0 task scores a weighted_cpuload of
- * SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a
- * task of nice 0 or enough lower priority tasks to bring up the
- * weighted_cpuload
- */
-static inline int above_background_load(void)
-{
-	unsigned long cpu;
-
-	for_each_online_cpu(cpu) {
-		if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE)
-			return 1;
-	}
-	return 0;
-}
-
 struct io_context;			/* See blkdev.h */
 #define NGROUPS_SMALL		32
 #define NGROUPS_PER_BLOCK	((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
-- 
cgit v1.2.3


From c7aceaba042702538b23cf4e0de1b2891ad8e671 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Thu, 15 May 2008 12:09:15 +0100
Subject: sched: reorder task_struct to reduce padding on 64bit builds

This patch removes 24 bytes of padding and allows 1 extra object per
slab on my fedora based config.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index dc36c3aea01..ea2857b9959 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1021,6 +1021,7 @@ struct task_struct {
 #endif
 
 	int prio, static_prio, normal_prio;
+	unsigned int rt_priority;
 	const struct sched_class *sched_class;
 	struct sched_entity se;
 	struct sched_rt_entity rt;
@@ -1104,7 +1105,6 @@ struct task_struct {
 	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
 	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */
 
-	unsigned int rt_priority;
 	cputime_t utime, stime, utimescaled, stimescaled;
 	cputime_t gtime;
 	cputime_t prev_utime, prev_stime;
@@ -1123,12 +1123,12 @@ struct task_struct {
 	gid_t gid,egid,sgid,fsgid;
 	struct group_info *group_info;
 	kernel_cap_t   cap_effective, cap_inheritable, cap_permitted, cap_bset;
-	unsigned securebits;
 	struct user_struct *user;
+	unsigned securebits;
 #ifdef CONFIG_KEYS
+	unsigned char jit_keyring;	/* default keyring to attach requested keys to */
 	struct key *request_key_auth;	/* assumed request_key authority */
 	struct key *thread_keyring;	/* keyring private to this thread */
-	unsigned char jit_keyring;	/* default keyring to attach requested keys to */
 #endif
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
@@ -1215,8 +1215,8 @@ struct task_struct {
 # define MAX_LOCK_DEPTH 48UL
 	u64 curr_chain_key;
 	int lockdep_depth;
-	struct held_lock held_locks[MAX_LOCK_DEPTH];
 	unsigned int lockdep_recursion;
+	struct held_lock held_locks[MAX_LOCK_DEPTH];
 #endif
 
 /* journalling filesystem info */
@@ -1244,10 +1244,6 @@ struct task_struct {
 	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
 	cputime_t acct_stimexpd;/* stime since last update */
 #endif
-#ifdef CONFIG_NUMA
-  	struct mempolicy *mempolicy;
-	short il_next;
-#endif
 #ifdef CONFIG_CPUSETS
 	nodemask_t mems_allowed;
 	int cpuset_mems_generation;
@@ -1266,6 +1262,10 @@ struct task_struct {
 #endif
 	struct list_head pi_state_list;
 	struct futex_pi_state *pi_state_cache;
+#endif
+#ifdef CONFIG_NUMA
+	struct mempolicy *mempolicy;
+	short il_next;
 #endif
 	atomic_t fs_excl;	/* holding fs exclusive resources */
 	struct rcu_head rcu;
-- 
cgit v1.2.3


From 1f11eb6a8bc92536d9e93ead48fa3ffbd1478571 Mon Sep 17 00:00:00 2001
From: Gregory Haskins <ghaskins@novell.com>
Date: Wed, 4 Jun 2008 15:04:05 -0400
Subject: sched: fix cpupri hotplug support

The RT folks over at RedHat found an issue w.r.t. hotplug support which
was traced to problems with the cpupri infrastructure in the scheduler:

https://bugzilla.redhat.com/show_bug.cgi?id=449676

This bug affects 23-rt12+, 24-rtX, 25-rtX, and sched-devel.  This patch
applies to 25.4-rt4, though it should trivially apply to most cpupri enabled
kernels mentioned above.

It turned out that the issue was that offline cpus could get inadvertently
registered with cpupri so that they were erroneously selected during
migration decisions.  The end result would be an OOPS as the offline cpu
had tasks routed to it.

This patch generalizes the old join/leave domain interface into an
online/offline interface, and adjusts the root-domain/hotplug code to
utilize it.

I was able to easily reproduce the issue prior to this patch, and am no
longer able to reproduce it after this patch.  I can offline cpus
indefinately and everything seems to be in working order.

Thanks to Arnaldo (acme), Thomas, and Peter for doing the legwork to point
me in the right direction.  Also thank you to Peter for reviewing the
early iterations of this patch.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea2857b9959..d25acf600a3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -903,8 +903,8 @@ struct sched_class {
 	void (*set_cpus_allowed)(struct task_struct *p,
 				 const cpumask_t *newmask);
 
-	void (*join_domain)(struct rq *rq);
-	void (*leave_domain)(struct rq *rq);
+	void (*rq_online)(struct rq *rq);
+	void (*rq_offline)(struct rq *rq);
 
 	void (*switched_from) (struct rq *this_rq, struct task_struct *task,
 			       int running);
-- 
cgit v1.2.3


From 9985b0bab332289f14837eff3c6e0bcc658b58f7 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 5 Jun 2008 12:57:11 -0700
Subject: sched: prevent bound kthreads from changing cpus_allowed

Kthreads that have called kthread_bind() are bound to specific cpus, so
other tasks should not be able to change their cpus_allowed from under
them.  Otherwise, it is possible to move kthreads, such as the migration
or software watchdog threads, so they are not allowed access to the cpu
they work on.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d25acf600a3..2db1485f865 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1486,6 +1486,7 @@ static inline void put_task_struct(struct task_struct *t)
 #define PF_SWAPWRITE	0x00800000	/* Allowed to write to swap */
 #define PF_SPREAD_PAGE	0x01000000	/* Spread page cache over cpuset */
 #define PF_SPREAD_SLAB	0x02000000	/* Spread some slab caches over cpuset */
+#define PF_THREAD_BOUND	0x04000000	/* Thread bound to specific cpu */
 #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
 #define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezeable */
-- 
cgit v1.2.3


From c09595f63bb1909c5dc4dca288f4fe818561b5f3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 27 Jun 2008 13:41:14 +0200
Subject: sched: revert revert of: fair-group: SMP-nice for group scheduling

Try again..

Initial commit: 18d95a2832c1392a2d63227a7a6d433cb9f2037e
Revert: 6363ca57c76b7b83639ca8c83fc285fa26a7880e

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index eaf821072db..97a58b622ee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -765,6 +765,7 @@ struct sched_domain {
 	struct sched_domain *child;	/* bottom domain must be null terminated */
 	struct sched_group *groups;	/* the balancing groups of the domain */
 	cpumask_t span;			/* span of all CPUs in this domain */
+	int first_cpu;			/* cache of the first cpu in this domain */
 	unsigned long min_interval;	/* Minimum balance interval ms */
 	unsigned long max_interval;	/* Maximum balance interval ms */
 	unsigned int busy_factor;	/* less balancing by factor if busy */
-- 
cgit v1.2.3


From b6a86c746f5b708012809958462234d19e9c8177 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 27 Jun 2008 13:41:18 +0200
Subject: sched: fix sched_domain aggregation

Keeping the aggregate on the first cpu of the sched domain has two problems:
 - it could collide between different sched domains on different cpus
 - it could slow things down because of the remote accesses

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 97a58b622ee..eaf821072db 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -765,7 +765,6 @@ struct sched_domain {
 	struct sched_domain *child;	/* bottom domain must be null terminated */
 	struct sched_group *groups;	/* the balancing groups of the domain */
 	cpumask_t span;			/* span of all CPUs in this domain */
-	int first_cpu;			/* cache of the first cpu in this domain */
 	unsigned long min_interval;	/* Minimum balance interval ms */
 	unsigned long max_interval;	/* Maximum balance interval ms */
 	unsigned int busy_factor;	/* less balancing by factor if busy */
-- 
cgit v1.2.3


From 2398f2c6d34b43025f274fc42eaca34d23ec2320 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 27 Jun 2008 13:41:35 +0200
Subject: sched: update shares on wakeup

We found that the affine wakeup code needs rather accurate load figures
to be effective. The trouble is that updating the load figures is fairly
expensive with group scheduling. Therefore ratelimit the updating.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index eaf821072db..835b6c6fcc5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -783,6 +783,8 @@ struct sched_domain {
 	unsigned int balance_interval;	/* initialise to 1. units in ms. */
 	unsigned int nr_balance_failed; /* initialise to 0 */
 
+	u64 last_update;
+
 #ifdef CONFIG_SCHEDSTATS
 	/* load_balance() stats */
 	unsigned int lb_count[CPU_MAX_IDLE_TYPES];
@@ -1605,6 +1607,7 @@ extern unsigned int sysctl_sched_child_runs_first;
 extern unsigned int sysctl_sched_features;
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_sched_shares_ratelimit;
 
 int sched_nr_latency_handler(struct ctl_table *table, int write,
 		struct file *file, void __user *buffer, size_t *length,
-- 
cgit v1.2.3


From af52a90a14cdaa54ecbfb6e6982abb13466a4b56 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Mon, 7 Jul 2008 14:16:52 -0400
Subject: sched_clock: stop maximum check on NO HZ

Working with ftrace I would get large jumps of 11 millisecs or more with
the clock tracer. This killed the latencing timings of ftrace and also
caused the irqoff self tests to fail.

What was happening is with NO_HZ the idle would stop the jiffy counter and
before the jiffy counter was updated the sched_clock would have a bad
delta jiffies to compare with the gtod with the maximum.

The jiffies would stop and the last sched_tick would record the last gtod.
On wakeup, the sched clock update would compare the gtod + delta jiffies
(which would be zero) and compare it to the TSC. The TSC would have
correctly (with a stable TSC) moved forward several jiffies. But because the
jiffies has not been updated yet the clock would be prevented from moving
forward because it would appear that the TSC jumped too far ahead.

The clock would then virtually stop, until the jiffies are updated. Then
the next sched clock update would see that the clock was very much behind
since the delta jiffies is now correct. This would then jump the clock
forward by several jiffies.

This caused ftrace to report several milliseconds of interrupts off
latency at every resume from NO_HZ idle.

This patch adds hooks into the nohz code to disable the checking of the
maximum clock update when nohz is in effect. It resumes the max check
when nohz has updated the jiffies again.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Steven Rostedt <srostedt@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c5d3f847ca8..33a8f42041f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1573,13 +1573,28 @@ static inline void sched_clock_idle_sleep_event(void)
 static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
 {
 }
-#else
+
+#ifdef CONFIG_NO_HZ
+static inline void sched_clock_tick_stop(int cpu)
+{
+}
+
+static inline void sched_clock_tick_start(int cpu)
+{
+}
+#endif
+
+#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 extern void sched_clock_init(void);
 extern u64 sched_clock_cpu(int cpu);
 extern void sched_clock_tick(void);
 extern void sched_clock_idle_sleep_event(void);
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+#ifdef CONFIG_NO_HZ
+extern void sched_clock_tick_stop(int cpu);
+extern void sched_clock_tick_start(int cpu);
 #endif
+#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
 /*
  * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
-- 
cgit v1.2.3