From 67182ae1c42206e516f7efb292b745e826497b24 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 10 Aug 2008 18:35:38 -0700
Subject: rcu, debug: detect stalled grace periods

this is a diagnostic patch for Classic RCU.

The approach is to record a timestamp at the beginning
of the grace period (in rcu_start_batch()), then have
rcu_check_callbacks() complain if:

 1.	it is running on a CPU that has holding up grace periods for
 	a long time (say one second).  This will identify the culprit
 	assuming that the culprit has not disabled hardware irqs,
 	instruction execution, or some such.

 2.	it is running on a CPU that is not holding up grace periods,
 	but grace periods have been held up for an even longer time
 	(say two seconds).

It is enabled via the default-off CONFIG_DEBUG_RCU_STALL kernel parameter.

Rather than exponential backoff, it backs off to once per 30 seconds.
My feeling upon thinking on it was that if you have stalled RCU grace
periods for that long, a few extra printk() messages are probably the
least of your worries...

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: David Witbrodt <dawitbro@sbcglobal.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/Kconfig.debug | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e1d4764435e..2fb6d90bf1e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
 	  Say N here if you want the RCU torture tests to start only
 	  after being manually enabled via /proc.
 
+config RCU_CPU_STALL
+	bool "Check for stalled CPUs delaying RCU grace periods"
+	depends on CLASSIC_RCU
+	default n
+	help
+	  This option causes RCU to printk information on which
+	  CPUs are delaying the current grace period, but only when
+	  the grace period extends for excessive time periods.
+
+	  Say Y if you want RCU to perform such checks.
+
+	  Say N if you are unsure.
+
 config KPROBES_SANITY_TEST
 	bool "Kprobes sanity tests"
 	depends on DEBUG_KERNEL
-- 
cgit v1.2.3


From 1f7c14c62ce63805f9574664a6c6de3633d4a354 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Thu, 9 Oct 2008 12:50:59 -0400
Subject: percpu counter: clean up percpu_counter_sum_and_set()

percpu_counter_sum_and_set() and percpu_counter_sum() is the same except
the former updates the global counter after accounting.  Since we are
taking the fbc->lock to calculate the precise value of the counter in
percpu_counter_sum() anyway, it should simply set fbc->count too, as the
percpu_counter_sum_and_set() does.

This patch merges these two interfaces into one.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 lib/percpu_counter.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 4a8ba4bf5f6..a8663890a88 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add);
  * Add up all the per-cpu counts, return the result.  This is a more accurate
  * but much slower version of percpu_counter_read_positive()
  */
-s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
+s64 __percpu_counter_sum(struct percpu_counter *fbc)
 {
 	s64 ret;
 	int cpu;
@@ -62,11 +62,9 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
 	for_each_online_cpu(cpu) {
 		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
 		ret += *pcount;
-		if (set)
-			*pcount = 0;
+		*pcount = 0;
 	}
-	if (set)
-		fbc->count = ret;
+	fbc->count = ret;
 
 	spin_unlock(&fbc->lock);
 	return ret;
-- 
cgit v1.2.3


From 673d62cc5ea6fca046650f17f77985b112c62322 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Sun, 31 Aug 2008 23:39:21 +0200
Subject: debugobjects: fix lockdep warning

Daniel J. Blueman reported:
> =======================================================
> [ INFO: possible circular locking dependency detected ]
> 2.6.27-rc4-224c #1
> -------------------------------------------------------
> hald/4680 is trying to acquire lock:
>  (&n->list_lock){++..}, at: [<ffffffff802bfa26>] add_partial+0x26/0x80
>
> but task is already holding lock:
>  (&obj_hash[i].lock){++..}, at: [<ffffffff8041cfdc>]
> debug_object_free+0x5c/0x120

We fix it by moving the actual freeing to outside the lock (the lock
now only protects the list).

The pool lock is also promoted to irq-safe (suggested by Dan). It's
necessary because free_pool is now called outside the irq disabled
region. So we need to protect against an interrupt handler which calls
debug_object_init().

[tglx@linutronix.de: added hlist_move_list helper to avoid looping
		     through the list twice]

Reported-by: Daniel J Blueman <daniel.blueman@gmail.com>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 lib/debugobjects.c | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

(limited to 'lib')

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 45a6bde762d..e3ab374e133 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -112,6 +112,7 @@ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b)
 
 /*
  * Allocate a new object. If the pool is empty, switch off the debugger.
+ * Must be called with interrupts disabled.
  */
 static struct debug_obj *
 alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
@@ -148,17 +149,18 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
 static void free_object(struct debug_obj *obj)
 {
 	unsigned long idx = (unsigned long)(obj - obj_static_pool);
+	unsigned long flags;
 
 	if (obj_pool_free < ODEBUG_POOL_SIZE || idx < ODEBUG_POOL_SIZE) {
-		spin_lock(&pool_lock);
+		spin_lock_irqsave(&pool_lock, flags);
 		hlist_add_head(&obj->node, &obj_pool);
 		obj_pool_free++;
 		obj_pool_used--;
-		spin_unlock(&pool_lock);
+		spin_unlock_irqrestore(&pool_lock, flags);
 	} else {
-		spin_lock(&pool_lock);
+		spin_lock_irqsave(&pool_lock, flags);
 		obj_pool_used--;
-		spin_unlock(&pool_lock);
+		spin_unlock_irqrestore(&pool_lock, flags);
 		kmem_cache_free(obj_cache, obj);
 	}
 }
@@ -171,6 +173,7 @@ static void debug_objects_oom(void)
 {
 	struct debug_bucket *db = obj_hash;
 	struct hlist_node *node, *tmp;
+	HLIST_HEAD(freelist);
 	struct debug_obj *obj;
 	unsigned long flags;
 	int i;
@@ -179,11 +182,14 @@ static void debug_objects_oom(void)
 
 	for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
 		spin_lock_irqsave(&db->lock, flags);
-		hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) {
+		hlist_move_list(&db->list, &freelist);
+		spin_unlock_irqrestore(&db->lock, flags);
+
+		/* Now free them */
+		hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) {
 			hlist_del(&obj->node);
 			free_object(obj);
 		}
-		spin_unlock_irqrestore(&db->lock, flags);
 	}
 }
 
@@ -498,8 +504,9 @@ void debug_object_free(void *addr, struct debug_obj_descr *descr)
 		return;
 	default:
 		hlist_del(&obj->node);
+		spin_unlock_irqrestore(&db->lock, flags);
 		free_object(obj);
-		break;
+		return;
 	}
 out_unlock:
 	spin_unlock_irqrestore(&db->lock, flags);
@@ -510,6 +517,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 {
 	unsigned long flags, oaddr, saddr, eaddr, paddr, chunks;
 	struct hlist_node *node, *tmp;
+	HLIST_HEAD(freelist);
 	struct debug_obj_descr *descr;
 	enum debug_obj_state state;
 	struct debug_bucket *db;
@@ -545,11 +553,18 @@ repeat:
 				goto repeat;
 			default:
 				hlist_del(&obj->node);
-				free_object(obj);
+				hlist_add_head(&obj->node, &freelist);
 				break;
 			}
 		}
 		spin_unlock_irqrestore(&db->lock, flags);
+
+		/* Now free them */
+		hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) {
+			hlist_del(&obj->node);
+			free_object(obj);
+		}
+
 		if (cnt > debug_objects_maxchain)
 			debug_objects_maxchain = cnt;
 	}
-- 
cgit v1.2.3


From 7563dc64585324f443f5ac107eb6d89ee813a2d2 Mon Sep 17 00:00:00 2001
From: Tony Breeds <tony@bakeyournoodle.com>
Date: Tue, 2 Sep 2008 16:50:38 +1000
Subject: powerpc: Work around gcc's -fno-omit-frame-pointer bug

This bug is causing random crashes
(http://bugzilla.kernel.org/show_bug.cgi?id=11414).

-fno-omit-frame-pointer is only needed on powerpc when -pg is also
supplied, and there is a gcc bug that causes incorrect code generation
on 32-bit powerpc when -fno-omit-frame-pointer is used---it uses stack
locations below the stack pointer, which is not allowed by the ABI
because those locations can and sometimes do get corrupted by an
interrupt.

This ensures that CONFIG_FRAME_POINTER is only selected by ftrace.
When CONFIG_FTRACE is enabled we also pass -mno-sched-epilog to work
around the gcc codegen bug.

Patch based on work by:
	Andreas Schwab <schwab@suse.de>
	Segher Boessenkool <segher@kernel.crashing.org>

Signed-off-by: Tony Breeds <tony@bakeyournoodle.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 lib/Kconfig.debug | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8b5a7d304a5..0b504814e37 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -394,7 +394,7 @@ config LOCKDEP
 	bool
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
 	select STACKTRACE
-	select FRAME_POINTER if !X86 && !MIPS
+	select FRAME_POINTER if !X86 && !MIPS && !PPC
 	select KALLSYMS
 	select KALLSYMS_ALL
 
@@ -676,13 +676,13 @@ config FAULT_INJECTION_STACKTRACE_FILTER
 	depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
 	depends on !X86_64
 	select STACKTRACE
-	select FRAME_POINTER
+	select FRAME_POINTER if !PPC
 	help
 	  Provide stacktrace filter for fault-injection capabilities
 
 config LATENCYTOP
 	bool "Latency measuring infrastructure"
-	select FRAME_POINTER if !MIPS
+	select FRAME_POINTER if !MIPS && !PPC
 	select KALLSYMS
 	select KALLSYMS_ALL
 	select STACKTRACE
-- 
cgit v1.2.3


From 68e91d61346db4359464d06617500141cfd1442a Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 8 Sep 2008 18:10:14 +0900
Subject: swiotlb: remove GFP_DMA hack in swiotlb_alloc_coherent

The callers are supposed to set up the gfp flags appropriately.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/swiotlb.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 977edbdbc1d..3066ffe1f9e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -467,13 +467,6 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	void *ret;
 	int order = get_order(size);
 
-	/*
-	 * XXX fix me: the DMA API should pass us an explicit DMA mask
-	 * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32
-	 * bit range instead of a 16MB one).
-	 */
-	flags |= GFP_DMA;
-
 	ret = (void *)__get_free_pages(flags, order);
 	if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) {
 		/*
-- 
cgit v1.2.3


From 9dfda12b8b769e224ca4efbc35ace4524b9c017b Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 8 Sep 2008 18:53:48 +0900
Subject: swiotlb: use map_single instead of swiotlb_map_single in
 swiotlb_alloc_coherent

We always need swiotlb memory here so address_needs_mapping and
swiotlb_force testings are irrelevant. map_single should be used here
instead of swiotlb_map_single.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/swiotlb.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 3066ffe1f9e..2fb485d0e7e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -483,12 +483,9 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		 * swiotlb_map_single(), which will grab memory from
 		 * the lowest available address range.
 		 */
-		dma_addr_t handle;
-		handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
-		if (swiotlb_dma_mapping_error(hwdev, handle))
+		ret = map_single(hwdev, NULL, size, DMA_FROM_DEVICE);
+		if (!ret)
 			return NULL;
-
-		ret = bus_to_virt(handle);
 	}
 
 	memset(ret, 0, size);
-- 
cgit v1.2.3


From 21f6c4de4c25c4bdd88c75bc97a78e7fbeebac4d Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 8 Sep 2008 18:53:49 +0900
Subject: swiotlb: use unmap_single instead of swiotlb_unmap_single in
 swiotlb_free_coherent

We don't need any check in swiotlb_unmap_single here. unmap_single is
appropriate.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/swiotlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 2fb485d0e7e..bf61c73a341 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -513,7 +513,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 		free_pages((unsigned long) vaddr, get_order(size));
 	else
 		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-		swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE);
+		unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
 }
 
 static void
-- 
cgit v1.2.3


From 640aebfe014554ced9c38d2564e38862e488d0eb Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 8 Sep 2008 18:53:50 +0900
Subject: swiotlb: add is_swiotlb_buffer helper function

This adds is_swiotlb_buffer() helper function to see whether a buffer
belongs to the swiotlb buffer or not.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/swiotlb.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index bf61c73a341..b5f5d113304 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -283,6 +283,11 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr)
 	return (addr & ~mask) != 0;
 }
 
+static int is_swiotlb_buffer(char *addr)
+{
+	return addr >= io_tlb_start && addr < io_tlb_end;
+}
+
 /*
  * Allocates bounce buffer and returns its kernel virtual address.
  */
@@ -508,8 +513,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 		      dma_addr_t dma_handle)
 {
 	WARN_ON(irqs_disabled());
-	if (!(vaddr >= (void *)io_tlb_start
-                    && vaddr < (void *)io_tlb_end))
+	if (!is_swiotlb_buffer(vaddr))
 		free_pages((unsigned long) vaddr, get_order(size));
 	else
 		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
@@ -602,7 +606,7 @@ swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
 	char *dma_addr = bus_to_virt(dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
-	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+	if (is_swiotlb_buffer(dma_addr))
 		unmap_single(hwdev, dma_addr, size, dir);
 	else if (dir == DMA_FROM_DEVICE)
 		dma_mark_clean(dma_addr, size);
@@ -632,7 +636,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 	char *dma_addr = bus_to_virt(dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
-	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+	if (is_swiotlb_buffer(dma_addr))
 		sync_single(hwdev, dma_addr, size, dir, target);
 	else if (dir == DMA_FROM_DEVICE)
 		dma_mark_clean(dma_addr, size);
@@ -663,7 +667,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
 	char *dma_addr = bus_to_virt(dev_addr) + offset;
 
 	BUG_ON(dir == DMA_NONE);
-	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+	if (is_swiotlb_buffer(dma_addr))
 		sync_single(hwdev, dma_addr, size, dir, target);
 	else if (dir == DMA_FROM_DEVICE)
 		dma_mark_clean(dma_addr, size);
-- 
cgit v1.2.3


From deac93df26b20cf8438339b5935b5f5643bc30c9 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Wed, 3 Sep 2008 20:43:36 -0500
Subject: lib: Correct printk %pF to work on all architectures

It was introduced by "vsprintf: add support for '%pS' and '%pF' pointer
formats" in commit 0fe1ef24f7bd0020f29ffe287dfdb9ead33ca0b2.  However,
the current way its coded doesn't work on parisc64.  For two reasons: 1)
parisc isn't in the #ifdef and 2) parisc has a different format for
function descriptors

Make dereference_function_descriptor() more accommodating by allowing
architecture overrides.  I put the three overrides (for parisc64, ppc64
and ia64) in arch/kernel/module.c because that's where the kernel
internal linker which knows how to deal with function descriptors sits.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Tony Luck <tony.luck@intel.com>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/vsprintf.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d8d1d114224..c399bc1093c 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -27,6 +27,7 @@
 
 #include <asm/page.h>		/* for PAGE_SIZE */
 #include <asm/div64.h>
+#include <asm/sections.h>	/* for dereference_function_descriptor() */
 
 /* Works only for digits and letters, but small and fast */
 #define TOLOWER(x) ((x) | 0x20)
@@ -513,16 +514,6 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio
 	return buf;
 }
 
-static inline void *dereference_function_descriptor(void *ptr)
-{
-#if defined(CONFIG_IA64) || defined(CONFIG_PPC64)
-	void *p;
-	if (!probe_kernel_address(ptr, p))
-		ptr = p;
-#endif
-	return ptr;
-}
-
 static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int precision, int flags)
 {
 	unsigned long value = (unsigned long) ptr;
-- 
cgit v1.2.3


From 2797982ed93c10d5585ee1842ab298cb11326ff5 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 10 Sep 2008 01:06:49 +0900
Subject: swiotlb: convert swiotlb to use is_buffer_dma_capable helper function

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/swiotlb.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index b5f5d113304..240a67c2c97 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -274,13 +274,13 @@ cleanup1:
 }
 
 static int
-address_needs_mapping(struct device *hwdev, dma_addr_t addr)
+address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
 {
 	dma_addr_t mask = 0xffffffff;
 	/* If the device has a mask, use it, otherwise default to 32 bits */
 	if (hwdev && hwdev->dma_mask)
 		mask = *hwdev->dma_mask;
-	return (addr & ~mask) != 0;
+	return !is_buffer_dma_capable(mask, addr, size);
 }
 
 static int is_swiotlb_buffer(char *addr)
@@ -473,7 +473,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	int order = get_order(size);
 
 	ret = (void *)__get_free_pages(flags, order);
-	if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) {
+	if (ret && address_needs_mapping(hwdev, virt_to_bus(ret), size)) {
 		/*
 		 * The allocated memory isn't reachable by the device.
 		 * Fall back on swiotlb_map_single().
@@ -497,7 +497,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	dev_addr = virt_to_bus(ret);
 
 	/* Confirm address can be DMA'd by device */
-	if (address_needs_mapping(hwdev, dev_addr)) {
+	if (address_needs_mapping(hwdev, dev_addr, size)) {
 		printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
 		       (unsigned long long)*hwdev->dma_mask,
 		       (unsigned long long)dev_addr);
@@ -561,7 +561,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
-	if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force)
+	if (!address_needs_mapping(hwdev, dev_addr, size) && !swiotlb_force)
 		return dev_addr;
 
 	/*
@@ -578,7 +578,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
 	/*
 	 * Ensure that the address returned is DMA'ble
 	 */
-	if (address_needs_mapping(hwdev, dev_addr))
+	if (address_needs_mapping(hwdev, dev_addr, size))
 		panic("map_single: bounce buffer is not DMA'ble");
 
 	return dev_addr;
@@ -721,7 +721,8 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 	for_each_sg(sgl, sg, nelems, i) {
 		addr = SG_ENT_VIRT_ADDRESS(sg);
 		dev_addr = virt_to_bus(addr);
-		if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) {
+		if (swiotlb_force ||
+		    address_needs_mapping(hwdev, dev_addr, sg->length)) {
 			void *map = map_single(hwdev, addr, sg->length, dir);
 			if (!map) {
 				/* Don't panic here, we expect map_sg users
-- 
cgit v1.2.3


From 36223a399f639b13b7a454349565934e6d3e2db0 Mon Sep 17 00:00:00 2001
From: Daniel J Blueman <daniel.blueman@gmail.com>
Date: Wed, 10 Sep 2008 21:07:55 +0100
Subject: swiotlb: fix back-off path when memory allocation fails

This fixes a SWIOTLB oops

With SWIOTLB being enabled and straight-forward page allocation
failure [1], the swiotlb_alloc_coherent fall-back path hits an
issue [2], resulting in my webcam failing to work.

At the time of oops, RDI is clearly a pointer to a structure which
has arrived as NULL, leading to the typo in swiotlb_map_single's
callsite arguments.

Correctly passing the device structure [3] addresses the issue and
gets my webcam working again (the allocation failure still occuring).

 --- [1]

skype: page allocation failure. order:3, mode:0x1
Pid: 5895, comm: skype Not tainted 2.6.27-rc6-235c-debug #1

Call Trace:
 [<ffffffff802b7cf0>] __alloc_pages_internal+0x4a0/0x5d0
 [<ffffffff802d5ddd>] alloc_pages_current+0xad/0x110
 [<ffffffff802b4ccd>] __get_free_pages+0x1d/0x60
 [<ffffffff8046cd39>] swiotlb_alloc_coherent+0x49/0x180
 [<ffffffff80212731>] dma_alloc_coherent+0x281/0x310
 [<ffffffff805621c0>] hcd_buffer_alloc+0x50/0x90
 [<ffffffff805547fd>] usb_buffer_alloc+0x2d/0x40
 [<ffffffffa0056763>] uvc_alloc_urb_buffers+0x53/0xf0 [uvcvideo]
 [<ffffffffa0056958>] uvc_init_video+0x158/0x3e0 [uvcvideo]
 [<ffffffffa0056c17>] uvc_video_enable+0x37/0x80 [uvcvideo]
 [<ffffffffa0055853>] uvc_v4l2_do_ioctl+0x723/0x1260 [uvcvideo]
 [<ffffffff8026dd61>] ? trace_hardirqs_off_caller+0x21/0xc0
 [<ffffffff8026dd61>] ? trace_hardirqs_off_caller+0x21/0xc0
 [<ffffffffa0032c9f>] video_usercopy+0x19f/0x390 [videodev]
 [<ffffffffa0055130>] ? uvc_v4l2_do_ioctl+0x0/0x1260 [uvcvideo]
 [<ffffffff8026d0ce>] ? put_lock_stats+0xe/0x30
 [<ffffffffa0054dad>] uvc_v4l2_ioctl+0x4d/0x80 [uvcvideo]
 [<ffffffffa0045083>] native_ioctl+0x83/0x90 [compat_ioctl32]
 [<ffffffffa004534e>] v4l_compat_ioctl32+0x2be/0x1da4 [compat_ioctl32]
 [<ffffffff806aad21>] ? do_page_fault+0x3d1/0xae0
 [<ffffffff80270ccd>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff80270c59>] ? trace_hardirqs_on_caller+0x149/0x1b0
 [<ffffffff80270ccd>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff80329afa>] compat_sys_ioctl+0x8a/0x3c0
 [<ffffffff806a700d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
 [<ffffffff8022f816>] sysenter_dispatch+0x7/0x2c
 [<ffffffff806a6fce>] ? trace_hardirqs_on_thunk+0x3a/0x3f

Mem-Info:
Node 0 DMA per-cpu:
CPU    0: hi:    0, btch:   1 usd:   0
CPU    1: hi:    0, btch:   1 usd:   0
Node 0 DMA32 per-cpu:
CPU    0: hi:  186, btch:  31 usd:   3
CPU    1: hi:  186, btch:  31 usd:   0
Node 0 Normal per-cpu:
CPU    0: hi:  186, btch:  31 usd:  23
CPU    1: hi:  186, btch:  31 usd: 179
Active:78545 inactive:48683 dirty:31 writeback:0 unstable:2
 free:830202 slab:17516 mapped:17473 pagetables:3496 bounce:0
Node 0 DMA free:36kB min:28kB low:32kB high:40kB active:0kB
inactive:0kB present:15156kB pages_scanned:0 all_unreclaimable? no
lowmem_reserve[]: 0 3207 3956 3956
Node 0 DMA32 free:3197192kB min:6512kB low:8140kB high:9768kB
active:0kB inactive:0kB present:3284896kB pages_scanned:0
all_unreclaimable? no
lowmem_reserve[]: 0 0 748 748
Node 0 Normal free:123580kB min:1516kB low:1892kB high:2272kB
active:314180kB inactive:194732kB present:766464kB pages_scanned:0
all_unreclaimable? no
lowmem_reserve[]: 0 0 0 0
Node 0 DMA: 1*4kB 0*8kB 0*16kB 1*32kB 0*64kB 0*128kB 0*256kB 0*512kB
0*1024kB 0*2048kB 0*4096kB = 36kB
Node 0 DMA32: 4*4kB 3*8kB 2*16kB 3*32kB 4*64kB 5*128kB 3*256kB 5*512kB
4*1024kB 5*2048kB 776*4096kB = 3197224kB
Node 0 Normal: 14*4kB 14*8kB 8*16kB 6*32kB 1*64kB 3*128kB 3*256kB
2*512kB 4*1024kB 1*2048kB 28*4096kB = 123560kB
64847 total pagecache pages
0 pages in swap cache
Swap cache stats: add 0, delete 0, find 0/0
Free swap  = 502752kB
Total swap = 502752kB
1048576 pages RAM
52120 pages reserved
71967 pages shared
143004 pages non-shared

 --- [2]

BUG: unable to handle kernel NULL pointer dereference at 00000000000002c8
IP: [<ffffffff8046c84c>] map_single+0x1c/0x280
PGD 10e54e067 PUD 10e595067 PMD 0
Oops: 0000 [1] PREEMPT SMP DEBUG_PAGEALLOC
CPU 0
Modules linked in: kvm_intel kvm microcode uvcvideo compat_ioctl32
videodev v4l1_compat shpchp pci_hotplug
Pid: 5895, comm: skype Not tainted 2.6.27-rc6-235c-debug #1
RIP: 0010:[<ffffffff8046c84c>]  [<ffffffff8046c84c>] map_single+0x1c/0x280
RSP: 0018:ffff88010e78d988  EFLAGS: 00210296
RAX: 0000780000000000 RBX: 0000000000000000 RCX: 0000000000000002
RDX: 0000000000005000 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffff88010e78d9e8 R08: 0000000000000000 R09: 0000000000000001
R10: ffff88010e78d698 R11: 0000000000000001 R12: 0000000000000002
R13: 0000000000000000 R14: 0000000000005000 R15: ffff88012f1c9968
FS:  0000000000000000(0000) GS:ffffffff80a6cdc0(0063) knlGS:00000000f6355b90
CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
CR2: 00000000000002c8 CR3: 000000010e57d000 CR4: 00000000000026e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process skype (pid: 5895, threadinfo ffff88010e78c000, task ffff88012b9cc460)
Stack:  0000000200000000 0000000000005000 0000000000000000 0000000000000000
 00000000000017b8 0000000000000000 ffff88010e78d9c8 0000000000000000
 0000000000000002 0000000000000000 0000000000005000 ffff88012f1c9968
Call Trace:
 [<ffffffff8046cbb0>] swiotlb_map_single_attrs+0x60/0xf0
 [<ffffffff8046cc4c>] swiotlb_map_single+0xc/0x10
 [<ffffffff8046cdee>] swiotlb_alloc_coherent+0xfe/0x180
 [<ffffffff80212731>] dma_alloc_coherent+0x281/0x310
 [<ffffffff805621c0>] hcd_buffer_alloc+0x50/0x90
 [<ffffffff805547fd>] usb_buffer_alloc+0x2d/0x40
 [<ffffffffa0056763>] uvc_alloc_urb_buffers+0x53/0xf0 [uvcvideo]
 [<ffffffffa0056958>] uvc_init_video+0x158/0x3e0 [uvcvideo]
 [<ffffffffa0056c17>] uvc_video_enable+0x37/0x80 [uvcvideo]
 [<ffffffffa0055853>] uvc_v4l2_do_ioctl+0x723/0x1260 [uvcvideo]
 [<ffffffff8026dd61>] ? trace_hardirqs_off_caller+0x21/0xc0
 [<ffffffff8026dd61>] ? trace_hardirqs_off_caller+0x21/0xc0
 [<ffffffffa0032c9f>] video_usercopy+0x19f/0x390 [videodev]
 [<ffffffffa0055130>] ? uvc_v4l2_do_ioctl+0x0/0x1260 [uvcvideo]
 [<ffffffff8026d0ce>] ? put_lock_stats+0xe/0x30
 [<ffffffffa0054dad>] uvc_v4l2_ioctl+0x4d/0x80 [uvcvideo]
 [<ffffffffa0045083>] native_ioctl+0x83/0x90 [compat_ioctl32]
 [<ffffffffa004534e>] v4l_compat_ioctl32+0x2be/0x1da4 [compat_ioctl32]
 [<ffffffff806aad21>] ? do_page_fault+0x3d1/0xae0
 [<ffffffff80270ccd>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff80270c59>] ? trace_hardirqs_on_caller+0x149/0x1b0
 [<ffffffff80270ccd>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff80329afa>] compat_sys_ioctl+0x8a/0x3c0
 [<ffffffff806a700d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
 [<ffffffff8022f816>] sysenter_dispatch+0x7/0x2c
 [<ffffffff806a6fce>] ? trace_hardirqs_on_thunk+0x3a/0x3f

Code: 45 31 c0 48 89 e5 e8 a4 ff ff ff c9 c3 66 90 55 48 89 e5 41 57
41 56 41 55 41 54 53 48 83 ec 38 48 89 75 b0 48 89 55 a8 89 4d a4 <48>
8b 87 c8 02 00 00 48 85 c0 0f 84 1c 02 00 00 48 8b 58 08 48
RIP  [<ffffffff8046c84c>] map_single+0x1c/0x280
 RSP <ffff88010e78d988>
CR2: 00000000000002c8
---[ end trace 5d15baeeb7025a0e ]---

 --- [3]

ffffffff8046c830 <map_single>:
map_single():
/store/kernel/linux/lib/swiotlb.c:291
ffffffff8046c830:       55                      push   %rbp
ffffffff8046c831:       48 89 e5                mov    %rsp,%rbp
ffffffff8046c834:       41 57                   push   %r15
ffffffff8046c836:       41 56                   push   %r14
ffffffff8046c838:       41 55                   push   %r13
ffffffff8046c83a:       41 54                   push   %r12
ffffffff8046c83c:       53                      push   %rbx
ffffffff8046c83d:       48 83 ec 38             sub    $0x38,%rsp
ffffffff8046c841:       48 89 75 b0             mov    %rsi,-0x50(%rbp)
ffffffff8046c845:       48 89 55 a8             mov    %rdx,-0x58(%rbp)
ffffffff8046c849:       89 4d a4                mov    %ecx,-0x5c(%rbp)
dma_get_seg_boundary():
/store/kernel/linux/include/linux/dma-mapping.h:80
ffffffff8046c84c:       48 8b 87 c8 02 00 00    mov    0x2c8(%rdi),%rax <----

 --- [4]

Signed-off-by: Daniel J Blueman <daniel.blueman@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/swiotlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 977edbdbc1d..8826fdf0f18 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -491,7 +491,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		 * the lowest available address range.
 		 */
 		dma_addr_t handle;
-		handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
+		handle = swiotlb_map_single(hwdev, NULL, size, DMA_FROM_DEVICE);
 		if (swiotlb_dma_mapping_error(hwdev, handle))
 			return NULL;
 
-- 
cgit v1.2.3


From 50bed2e2862a8f3a4f7d683d0d27292e71ef18b9 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 11 Sep 2008 18:35:39 +0200
Subject: sg: disable interrupts inside sg_copy_buffer

The callers of sg_copy_buffer must disable interrupts before calling
it (since it uses kmap_atomic). Some callers use it on
interrupt-disabled code but some need to take the trouble to disable
interrupts just for this. No wonder they forget about it and we hit a
bug like:

http://bugzilla.kernel.org/show_bug.cgi?id=11529

James said that it might be better to disable interrupts inside the
function rather than risk the callers getting it wrong.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 lib/scatterlist.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'lib')

diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 876ba6d5b67..8d2688ff135 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -422,9 +422,12 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
 {
 	unsigned int offset = 0;
 	struct sg_mapping_iter miter;
+	unsigned long flags;
 
 	sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC);
 
+	local_irq_save(flags);
+
 	while (sg_miter_next(&miter) && offset < buflen) {
 		unsigned int len;
 
@@ -442,6 +445,7 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
 
 	sg_miter_stop(&miter);
 
+	local_irq_restore(flags);
 	return offset;
 }
 
-- 
cgit v1.2.3


From 07a2c01a0c2a0cb4581a67d50d4f17cb4d2457c4 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 19 Sep 2008 02:02:05 +0900
Subject: convert swiotlb to use dma_get_mask

swiotlb can use dma_get_mask() instead of the homegrown function.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: tony.luck@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/swiotlb.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 240a67c2c97..f8eebd48914 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -276,11 +276,7 @@ cleanup1:
 static int
 address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
 {
-	dma_addr_t mask = 0xffffffff;
-	/* If the device has a mask, use it, otherwise default to 32 bits */
-	if (hwdev && hwdev->dma_mask)
-		mask = *hwdev->dma_mask;
-	return !is_buffer_dma_capable(mask, addr, size);
+	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
 }
 
 static int is_swiotlb_buffer(char *addr)
-- 
cgit v1.2.3


From d26dbc5cf94b0a28acc947285c3b54814a73cb2e Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 22 Sep 2008 22:35:07 +0900
Subject: iommu: export iommu_area_reserve helper function

x86 has set_bit_string() that does the exact same thing that
set_bit_area() in lib/iommu-helper.c does.

This patch exports set_bit_area() in lib/iommu-helper.c as
iommu_area_reserve(), converts GART, Calgary, and AMD IOMMU to use it.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/iommu-helper.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index a3b8d4c3f77..5d90074dca7 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -30,8 +30,7 @@ again:
 	return index;
 }
 
-static inline void set_bit_area(unsigned long *map, unsigned long i,
-				int len)
+void iommu_area_reserve(unsigned long *map, unsigned long i, int len)
 {
 	unsigned long end = i + len;
 	while (i < end) {
@@ -64,7 +63,7 @@ again:
 			start = index + 1;
 			goto again;
 		}
-		set_bit_area(map, index, nr);
+		iommu_area_reserve(map, index, nr);
 	}
 	return index;
 }
-- 
cgit v1.2.3


From 2133b5d7ff531bc15a923db4a6a50bf96c561be9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 2 Oct 2008 16:06:39 -0700
Subject: rcu: RCU-based detection of stalled CPUs for Classic RCU

This patch adds stalled-CPU detection to Classic RCU.  This capability
is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, which
defaults disabled.

This is a debugging feature to detect infinite loops in kernel code, not
something that non-kernel-hackers would be expected to care about.

This feature can detect looping CPUs in !PREEMPT builds and looping CPUs
with preemption disabled in PREEMPT builds.  This is essentially a port of
this functionality from the treercu patch, replacing the stall debug patch
that is already in tip/core/rcu (commit 67182ae1c4).

The changes from the patch in tip/core/rcu include making the config
variable name match that in treercu, changing from seconds to jiffies to
avoid spurious warnings, and printing a boot message when this feature
is enabled.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ccede1aeab3..9fee969dd60 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -597,7 +597,7 @@ config RCU_TORTURE_TEST_RUNNABLE
 	  Say N here if you want the RCU torture tests to start only
 	  after being manually enabled via /proc.
 
-config RCU_CPU_STALL
+config RCU_CPU_STALL_DETECTOR
 	bool "Check for stalled CPUs delaying RCU grace periods"
 	depends on CLASSIC_RCU
 	default n
-- 
cgit v1.2.3


From 3c9f3681d0b4af09c1cbf04f92fdfb72bd81ad7b Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sun, 31 Aug 2008 10:13:54 -0500
Subject: [SCSI] lib: add generic helper to print sizes rounded to the correct
 SI range

This patch adds the ability to print sizes in either units of 10^3 (SI)
or 2^10 (Binary) units.  It rounds up to three significant figures and
can be used for either memory or storage capacities.

Oh, and I'm fully aware that 64 bits is only 16EiB ... the Zetta and
Yotta units are added for future proofing against the day we have 128
bit computers ...

[fujita.tomonori@lab.ntt.co.jp: fix missed unsigned long long cast]
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 lib/Makefile         |  3 ++-
 lib/string_helpers.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 lib/string_helpers.c

(limited to 'lib')

diff --git a/lib/Makefile b/lib/Makefile
index 3b1f94bbe9d..44001af76a7 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -19,7 +19,8 @@ lib-$(CONFIG_SMP) += cpumask.o
 lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
-	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o
+	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
+	 string_helpers.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
new file mode 100644
index 00000000000..8347925030f
--- /dev/null
+++ b/lib/string_helpers.c
@@ -0,0 +1,64 @@
+/*
+ * Helpers for formatting and printing strings
+ *
+ * Copyright 31 August 2008 James Bottomley
+ */
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/string_helpers.h>
+
+/**
+ * string_get_size - get the size in the specified units
+ * @size:	The size to be converted
+ * @units:	units to use (powers of 1000 or 1024)
+ * @buf:	buffer to format to
+ * @len:	length of buffer
+ *
+ * This function returns a string formatted to 3 significant figures
+ * giving the size in the required units.  Returns 0 on success or
+ * error on failure.  @buf is always zero terminated.
+ *
+ */
+int string_get_size(u64 size, const enum string_size_units units,
+		    char *buf, int len)
+{
+	const char *units_10[] = { "B", "KB", "MB", "GB", "TB", "PB",
+				   "EB", "ZB", "YB", NULL};
+	const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB",
+				 "EiB", "ZiB", "YiB", NULL };
+	const char **units_str[] = {
+		[STRING_UNITS_10] =  units_10,
+		[STRING_UNITS_2] = units_2,
+	};
+	const int divisor[] = {
+		[STRING_UNITS_10] = 1000,
+		[STRING_UNITS_2] = 1024,
+	};
+	int i, j;
+	u64 remainder = 0, sf_cap;
+	char tmp[8];
+
+	tmp[0] = '\0';
+
+	for (i = 0; size > divisor[units] && units_str[units][i]; i++)
+		remainder = do_div(size, divisor[units]);
+
+	sf_cap = size;
+	for (j = 0; sf_cap*10 < 1000; j++)
+		sf_cap *= 10;
+
+	if (j) {
+		remainder *= 1000;
+		do_div(remainder, divisor[units]);
+		snprintf(tmp, sizeof(tmp), ".%03lld",
+			 (unsigned long long)remainder);
+		tmp[j+1] = '\0';
+	}
+
+	snprintf(buf, len, "%lld%s%s", (unsigned long long)size,
+		 tmp, units_str[units][i]);
+
+	return 0;
+}
+EXPORT_SYMBOL(string_get_size);
-- 
cgit v1.2.3


From a1ed5b0cffe4b16a93a6a3390e8cee0fbef94f86 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 25 Aug 2008 19:50:16 +0200
Subject: klist: don't iterate over deleted entries

A klist entry is kept on the list till all its current iterations are
finished; however, a new iteration after deletion also iterates over
deleted entries as long as their reference count stays above zero.
This causes problems for cases where there are users which iterate
over the list while synchronized against list manipulations and
natuarally expect already deleted entries to not show up during
iteration.

This patch implements dead flag which gets set on deletion so that
iteration can skip already deleted entries.  The dead flag piggy backs
on the lowest bit of knode->n_klist and only visible to klist
implementation proper.

While at it, drop klist_iter->i_head as it's redundant and doesn't
offer anything in semantics or performance wise as klist_iter->i_klist
is dereferenced on every iteration anyway.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 lib/klist.c | 96 ++++++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 70 insertions(+), 26 deletions(-)

(limited to 'lib')

diff --git a/lib/klist.c b/lib/klist.c
index cca37f96faa..bbdd3015c2c 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -37,6 +37,37 @@
 #include <linux/klist.h>
 #include <linux/module.h>
 
+/*
+ * Use the lowest bit of n_klist to mark deleted nodes and exclude
+ * dead ones from iteration.
+ */
+#define KNODE_DEAD		1LU
+#define KNODE_KLIST_MASK	~KNODE_DEAD
+
+static struct klist *knode_klist(struct klist_node *knode)
+{
+	return (struct klist *)
+		((unsigned long)knode->n_klist & KNODE_KLIST_MASK);
+}
+
+static bool knode_dead(struct klist_node *knode)
+{
+	return (unsigned long)knode->n_klist & KNODE_DEAD;
+}
+
+static void knode_set_klist(struct klist_node *knode, struct klist *klist)
+{
+	knode->n_klist = klist;
+	/* no knode deserves to start its life dead */
+	WARN_ON(knode_dead(knode));
+}
+
+static void knode_kill(struct klist_node *knode)
+{
+	/* and no knode should die twice ever either, see we're very humane */
+	WARN_ON(knode_dead(knode));
+	*(unsigned long *)&knode->n_klist |= KNODE_DEAD;
+}
 
 /**
  * klist_init - Initialize a klist structure.
@@ -79,7 +110,7 @@ static void klist_node_init(struct klist *k, struct klist_node *n)
 	INIT_LIST_HEAD(&n->n_node);
 	init_completion(&n->n_removed);
 	kref_init(&n->n_ref);
-	n->n_klist = k;
+	knode_set_klist(n, k);
 	if (k->get)
 		k->get(n);
 }
@@ -115,7 +146,7 @@ EXPORT_SYMBOL_GPL(klist_add_tail);
  */
 void klist_add_after(struct klist_node *n, struct klist_node *pos)
 {
-	struct klist *k = pos->n_klist;
+	struct klist *k = knode_klist(pos);
 
 	klist_node_init(k, n);
 	spin_lock(&k->k_lock);
@@ -131,7 +162,7 @@ EXPORT_SYMBOL_GPL(klist_add_after);
  */
 void klist_add_before(struct klist_node *n, struct klist_node *pos)
 {
-	struct klist *k = pos->n_klist;
+	struct klist *k = knode_klist(pos);
 
 	klist_node_init(k, n);
 	spin_lock(&k->k_lock);
@@ -144,9 +175,10 @@ static void klist_release(struct kref *kref)
 {
 	struct klist_node *n = container_of(kref, struct klist_node, n_ref);
 
+	WARN_ON(!knode_dead(n));
 	list_del(&n->n_node);
 	complete(&n->n_removed);
-	n->n_klist = NULL;
+	knode_set_klist(n, NULL);
 }
 
 static int klist_dec_and_del(struct klist_node *n)
@@ -154,22 +186,29 @@ static int klist_dec_and_del(struct klist_node *n)
 	return kref_put(&n->n_ref, klist_release);
 }
 
-/**
- * klist_del - Decrement the reference count of node and try to remove.
- * @n: node we're deleting.
- */
-void klist_del(struct klist_node *n)
+static void klist_put(struct klist_node *n, bool kill)
 {
-	struct klist *k = n->n_klist;
+	struct klist *k = knode_klist(n);
 	void (*put)(struct klist_node *) = k->put;
 
 	spin_lock(&k->k_lock);
+	if (kill)
+		knode_kill(n);
 	if (!klist_dec_and_del(n))
 		put = NULL;
 	spin_unlock(&k->k_lock);
 	if (put)
 		put(n);
 }
+
+/**
+ * klist_del - Decrement the reference count of node and try to remove.
+ * @n: node we're deleting.
+ */
+void klist_del(struct klist_node *n)
+{
+	klist_put(n, true);
+}
 EXPORT_SYMBOL_GPL(klist_del);
 
 /**
@@ -206,7 +245,6 @@ void klist_iter_init_node(struct klist *k, struct klist_iter *i,
 			  struct klist_node *n)
 {
 	i->i_klist = k;
-	i->i_head = &k->k_list;
 	i->i_cur = n;
 	if (n)
 		kref_get(&n->n_ref);
@@ -237,7 +275,7 @@ EXPORT_SYMBOL_GPL(klist_iter_init);
 void klist_iter_exit(struct klist_iter *i)
 {
 	if (i->i_cur) {
-		klist_del(i->i_cur);
+		klist_put(i->i_cur, false);
 		i->i_cur = NULL;
 	}
 }
@@ -258,27 +296,33 @@ static struct klist_node *to_klist_node(struct list_head *n)
  */
 struct klist_node *klist_next(struct klist_iter *i)
 {
-	struct list_head *next;
-	struct klist_node *lnode = i->i_cur;
-	struct klist_node *knode = NULL;
 	void (*put)(struct klist_node *) = i->i_klist->put;
+	struct klist_node *last = i->i_cur;
+	struct klist_node *next;
 
 	spin_lock(&i->i_klist->k_lock);
-	if (lnode) {
-		next = lnode->n_node.next;
-		if (!klist_dec_and_del(lnode))
+
+	if (last) {
+		next = to_klist_node(last->n_node.next);
+		if (!klist_dec_and_del(last))
 			put = NULL;
 	} else
-		next = i->i_head->next;
+		next = to_klist_node(i->i_klist->k_list.next);
 
-	if (next != i->i_head) {
-		knode = to_klist_node(next);
-		kref_get(&knode->n_ref);
+	i->i_cur = NULL;
+	while (next != to_klist_node(&i->i_klist->k_list)) {
+		if (likely(!knode_dead(next))) {
+			kref_get(&next->n_ref);
+			i->i_cur = next;
+			break;
+		}
+		next = to_klist_node(next->n_node.next);
 	}
-	i->i_cur = knode;
+
 	spin_unlock(&i->i_klist->k_lock);
-	if (put && lnode)
-		put(lnode);
-	return knode;
+
+	if (put && last)
+		put(last);
+	return i->i_cur;
 }
 EXPORT_SYMBOL_GPL(klist_next);
-- 
cgit v1.2.3


From 870d6656126add8e383645732b03df2b7ccd4f94 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 25 Aug 2008 19:47:25 +0900
Subject: block: implement CONFIG_DEBUG_BLOCK_EXT_DEVT

Extended devt introduces non-contiguos device numbers.  This patch
implements a debug option which forces most devt allocations to be
from the extended area and spreads them out.  This is enabled by
default if DEBUG_KERNEL is set and achieves...

1. Detects code paths in kernel or userland which expect predetermined
   consecutive device numbers.

2. When something goes wrong, avoid corruption as adding to the minor
   of earlier partition won't lead to the wrong but valid device.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 lib/Kconfig.debug | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0b504814e37..5a536f703a8 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -624,6 +624,22 @@ config BACKTRACE_SELF_TEST
 
 	  Say N if you are unsure.
 
+config DEBUG_BLOCK_EXT_DEVT
+        bool "Force extended block device numbers and spread them"
+	depends on DEBUG_KERNEL
+	depends on BLOCK
+	default y
+	help
+	  Conventionally, block device numbers are allocated from
+	  predetermined contiguous area.  However, extended block area
+	  may introduce non-contiguous block device numbers.  This
+	  option forces most block device numbers to be allocated from
+	  the extended space and spreads them to discover kernel or
+	  userland code paths which assume predetermined contiguous
+	  device number allocation.
+
+	  Say N if you are unsure.
+
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
 	depends on DEBUG_KERNEL
-- 
cgit v1.2.3


From 759f8ca3048f7438aa3129268d7252552505d662 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 29 Aug 2008 09:06:29 +0200
Subject: Change default value of CONFIG_DEBUG_BLOCK_EXT_DEVT to 'n'

It's a debug option that you would explicitly enable to test this
feature, we should default it to 'n' to prevent accidental surprises
for now.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 lib/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 5a536f703a8..4378d5e923c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -628,7 +628,7 @@ config DEBUG_BLOCK_EXT_DEVT
         bool "Force extended block device numbers and spread them"
 	depends on DEBUG_KERNEL
 	depends on BLOCK
-	default y
+	default n
 	help
 	  Conventionally, block device numbers are allocated from
 	  predetermined contiguous area.  However, extended block area
-- 
cgit v1.2.3


From 55dc7db70a73a3809a2334063c9b5b0d8ccebdaa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Sep 2008 13:44:35 +0200
Subject: init: DEBUG_BLOCK_EXT_DEVT requires explicit root= param

DEBUG_BLOCK_EXT_DEVT shuffles SCSI and IDE device numbers and root
device number set using rdev become meaningless.  Root devices should
be explicitly specified using textual names.  Warn about it if root
can't be found and DEBUG_BLOCK_EXT_DEVT is enabled.  Also, add warning
to the help text.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 lib/Kconfig.debug | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 4378d5e923c..c556896abe5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -638,6 +638,12 @@ config DEBUG_BLOCK_EXT_DEVT
 	  userland code paths which assume predetermined contiguous
 	  device number allocation.
 
+	  Note that turning on this debug option shuffles all the
+	  device numbers for all IDE and SCSI devices including libata
+	  ones, so root partition specified using device number
+	  directly (via rdev or root=MAJ:MIN) won't work anymore.
+	  Textual device names (root=/dev/sdXn) will continue to work.
+
 	  Say N if you are unsure.
 
 config LKDTM
-- 
cgit v1.2.3


From 581d4e28d9195aa8b2231383dbabc288988d615e Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Sun, 14 Sep 2008 05:56:33 -0700
Subject: block: add fault injection mechanism for faking request timeouts

Only works for the generic request timer handling. Allows one to
sporadically ignore request completions, thus exercising the timeout
handling.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 lib/Kconfig.debug | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c556896abe5..7d7a31d0dde 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -683,10 +683,21 @@ config FAIL_PAGE_ALLOC
 
 config FAIL_MAKE_REQUEST
 	bool "Fault-injection capability for disk IO"
-	depends on FAULT_INJECTION
+	depends on FAULT_INJECTION && BLOCK
 	help
 	  Provide fault-injection capability for disk IO.
 
+config FAIL_IO_TIMEOUT
+	bool "Faul-injection capability for faking disk interrupts"
+	depends on FAULT_INJECTION && BLOCK
+	help
+	  Provide fault-injection capability on end IO handling. This
+	  will make the block layer "forget" an interrupt as configured,
+	  thus exercising the error handling.
+
+	  Only works with drivers that use the generic timeout handling,
+	  for others it wont do anything.
+
 config FAULT_INJECTION_DEBUG_FS
 	bool "Debugfs entries for fault-injection capabilities"
 	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
-- 
cgit v1.2.3