Auto-update from upstream

author: Tony Luck <tony.luck@intel.com> 2005-11-03 09:09:08 -0800
committer: Tony Luck <tony.luck@intel.com> 2005-11-03 09:09:08 -0800
commit: 55725495f2175d0be2a5ed4135c2ea298b470001 (patch)
tree: 6005d0c7a95d994bf7ec8e6410dbd5a573e86fdf /arch
parent: 067794a1170ef61e295aea1719cf4a1ce20200f8 (diff)
parent: 06024f217d607369f0ee0071034ebb03071d5fb2 (diff)
48 files changed, 2840 insertions, 1348 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 682367bd0f6..6b12d71978d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -194,6 +194,13 @@ config ARCH_VERSATILE
 	help
 	  This enables support for ARM Ltd Versatile board.
 
+config ARCH_REALVIEW
+	bool "RealView"
+	select ARM_AMBA
+	select ICST307
+	help
+	  This enables support for ARM Ltd RealView boards.
+
 config ARCH_IMX
 	bool "IMX"
 
@@ -244,6 +251,8 @@ source "arch/arm/mach-versatile/Kconfig"
 
 source "arch/arm/mach-aaec2000/Kconfig"
 
+source "arch/arm/mach-realview/Kconfig"
+
 # Definitions to make life easier
 config ARCH_ACORN
 	bool
@@ -340,6 +349,13 @@ config NR_CPUS
 	depends on SMP
 	default "4"
 
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
+	depends on SMP && HOTPLUG && EXPERIMENTAL
+	help
+	  Say Y here to experiment with turning CPUs off and on.  CPUs
+	  can be controlled through /sys/devices/system/cpu.
+
 config PREEMPT
 	bool "Preemptible Kernel (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 64cf480b0b0..d80749ae2a7 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -99,6 +99,7 @@ textaddr-$(CONFIG_ARCH_FORTUNET)   := 0xc0008000
  machine-$(CONFIG_ARCH_IMX)	   := imx
  machine-$(CONFIG_ARCH_H720X)	   := h720x
  machine-$(CONFIG_ARCH_AAEC2000)   := aaec2000
+ machine-$(CONFIG_ARCH_REALVIEW)   := realview
 
 ifeq ($(CONFIG_ARCH_EBSA110),y)
 # This is what happens if you forget the IOCS16 line.
diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig
index c279e41ed10..f74c926beb4 100644
--- a/arch/arm/configs/ixp4xx_defconfig
+++ b/arch/arm/configs/ixp4xx_defconfig
@@ -104,7 +104,7 @@ CONFIG_ARCH_IXCDP1100=y
 CONFIG_ARCH_PRPMC1100=y
 CONFIG_ARCH_IXDP4XX=y
 CONFIG_CPU_IXP46X=y
-CONFIG_MACH_GTWX5715=y
+# CONFIG_MACH_GTWX5715 is not set
 
 #
 # IXP4xx Options
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 3284118f356..9def4404e1f 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -1050,3 +1050,34 @@ static int __init noirqdebug_setup(char *str)
 }
 
 __setup("noirqdebug", noirqdebug_setup);
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * The CPU has been marked offline.  Migrate IRQs off this CPU.  If
+ * the affinity settings do not allow other CPUs, force them onto any
+ * available CPU.
+ */
+void migrate_irqs(void)
+{
+	unsigned int i, cpu = smp_processor_id();
+
+	for (i = 0; i < NR_IRQS; i++) {
+		struct irqdesc *desc = irq_desc + i;
+
+		if (desc->cpu == cpu) {
+			unsigned int newcpu = any_online_cpu(desc->affinity);
+
+			if (newcpu == NR_CPUS) {
+				if (printk_ratelimit())
+					printk(KERN_INFO "IRQ%u no longer affine to CPU%u\n",
+					       i, cpu);
+
+				cpus_setall(desc->affinity);
+				newcpu = any_online_cpu(desc->affinity);
+			}
+
+			route_irq(desc, i, newcpu);
+		}
+	}
+}
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 409db6d5ec9..ba298277bec 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -26,6 +26,7 @@
 #include <linux/interrupt.h>
 #include <linux/kallsyms.h>
 #include <linux/init.h>
+#include <linux/cpu.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
@@ -105,6 +106,14 @@ void cpu_idle(void)
 	/* endless idle loop with no priority at all */
 	while (1) {
 		void (*idle)(void) = pm_idle;
+
+#ifdef CONFIG_HOTPLUG_CPU
+		if (cpu_is_offline(smp_processor_id())) {
+			leds_event(led_idle_start);
+			cpu_die();
+		}
+#endif
+
 		if (!idle)
 			idle = default_idle;
 		preempt_disable();
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 82616494574..edb5a406922 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -80,19 +80,23 @@ static DEFINE_SPINLOCK(smp_call_function_lock);
 
 int __cpuinit __cpu_up(unsigned int cpu)
 {
-	struct task_struct *idle;
+	struct cpuinfo_arm *ci = &per_cpu(cpu_data, cpu);
+	struct task_struct *idle = ci->idle;
 	pgd_t *pgd;
 	pmd_t *pmd;
 	int ret;
 
 	/*
-	 * Spawn a new process manually.  Grab a pointer to
-	 * its task struct so we can mess with it
+	 * Spawn a new process manually, if not already done.
+	 * Grab a pointer to its task struct so we can mess with it
 	 */
-	idle = fork_idle(cpu);
-	if (IS_ERR(idle)) {
-		printk(KERN_ERR "CPU%u: fork() failed\n", cpu);
-		return PTR_ERR(idle);
+	if (!idle) {
+		idle = fork_idle(cpu);
+		if (IS_ERR(idle)) {
+			printk(KERN_ERR "CPU%u: fork() failed\n", cpu);
+			return PTR_ERR(idle);
+		}
+		ci->idle = idle;
 	}
 
 	/*
@@ -155,6 +159,91 @@ int __cpuinit __cpu_up(unsigned int cpu)
 	return ret;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * __cpu_disable runs on the processor to be shutdown.
+ */
+int __cpuexit __cpu_disable(void)
+{
+	unsigned int cpu = smp_processor_id();
+	struct task_struct *p;
+	int ret;
+
+	ret = mach_cpu_disable(cpu);
+	if (ret)
+		return ret;
+
+	/*
+	 * Take this CPU offline.  Once we clear this, we can't return,
+	 * and we must not schedule until we're ready to give up the cpu.
+	 */
+	cpu_clear(cpu, cpu_online_map);
+
+	/*
+	 * OK - migrate IRQs away from this CPU
+	 */
+	migrate_irqs();
+
+	/*
+	 * Flush user cache and TLB mappings, and then remove this CPU
+	 * from the vm mask set of all processes.
+	 */
+	flush_cache_all();
+	local_flush_tlb_all();
+
+	read_lock(&tasklist_lock);
+	for_each_process(p) {
+		if (p->mm)
+			cpu_clear(cpu, p->mm->cpu_vm_mask);
+	}
+	read_unlock(&tasklist_lock);
+
+	return 0;
+}
+
+/*
+ * called on the thread which is asking for a CPU to be shutdown -
+ * waits until shutdown has completed, or it is timed out.
+ */
+void __cpuexit __cpu_die(unsigned int cpu)
+{
+	if (!platform_cpu_kill(cpu))
+		printk("CPU%u: unable to kill\n", cpu);
+}
+
+/*
+ * Called from the idle thread for the CPU which has been shutdown.
+ *
+ * Note that we disable IRQs here, but do not re-enable them
+ * before returning to the caller. This is also the behaviour
+ * of the other hotplug-cpu capable cores, so presumably coming
+ * out of idle fixes this.
+ */
+void __cpuexit cpu_die(void)
+{
+	unsigned int cpu = smp_processor_id();
+
+	local_irq_disable();
+	idle_task_exit();
+
+	/*
+	 * actual CPU shutdown procedure is at least platform (if not
+	 * CPU) specific
+	 */
+	platform_cpu_die(cpu);
+
+	/*
+	 * Do not return to the idle loop - jump back to the secondary
+	 * cpu initialisation.  There's some initialisation which needs
+	 * to be repeated to undo the effects of taking the CPU offline.
+	 */
+	__asm__("mov	sp, %0\n"
+	"	b	secondary_start_kernel"
+		:
+		: "r" ((void *)current->thread_info + THREAD_SIZE - 8));
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
 /*
  * This is the secondary CPU boot entry.  We're using this CPUs
  * idle thread stack, but a set of temporary page tables.
@@ -236,6 +325,8 @@ void __init smp_prepare_boot_cpu(void)
 {
 	unsigned int cpu = smp_processor_id();
 
+	per_cpu(cpu_data, cpu).idle = current;
+
 	cpu_set(cpu, cpu_possible_map);
 	cpu_set(cpu, cpu_present_map);
 	cpu_set(cpu, cpu_online_map);
@@ -309,8 +400,8 @@ int smp_call_function_on_cpu(void (*func)(void *info), void *info, int retry,
 		printk(KERN_CRIT
 		       "CPU%u: smp_call_function timeout for %p(%p)\n"
 		       "      callmap %lx pending %lx, %swait\n",
-		       smp_processor_id(), func, info, callmap, data.pending,
-		       wait ? "" : "no ");
+		       smp_processor_id(), func, info, *cpus_addr(callmap),
+		       *cpus_addr(data.pending), wait ? "" : "no ");
 
 		/*
 		 * TRACE
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 71e5b99e519..391f3ab3ff3 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -7,13 +7,27 @@
 lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
 		   csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
 		   copy_page.o delay.o findbit.o memchr.o memcpy.o    \
-		   memset.o memzero.o setbit.o strncpy_from_user.o    \
-		   strnlen_user.o strchr.o strrchr.o testchangebit.o  \
-		   testclearbit.o testsetbit.o uaccess.o getuser.o    \
-		   putuser.o ashldi3.o ashrdi3.o lshrdi3.o muldi3.o   \
+		   memmove.o memset.o memzero.o setbit.o              \
+		   strncpy_from_user.o strnlen_user.o                 \
+		   strchr.o strrchr.o                                 \
+		   testchangebit.o testclearbit.o testsetbit.o        \
+		   getuser.o putuser.o clear_user.o                   \
+		   ashldi3.o ashrdi3.o lshrdi3.o muldi3.o             \
 		   ucmpdi2.o lib1funcs.o div64.o sha1.o               \
 		   io-readsb.o io-writesb.o io-readsl.o io-writesl.o
 
+# the code in uaccess.S is not preemption safe and
+# probably faster on ARMv3 only
+ifeq ($CONFIG_PREEMPT,y)
+  lib-y	+= copy_from_user.o copy_to_user.o
+else
+ifneq ($(CONFIG_CPU_32v3),y)
+  lib-y	+= copy_from_user.o copy_to_user.o
+else
+  lib-y	+= uaccess.o
+endif
+endif
+
 ifeq ($(CONFIG_CPU_32v3),y)
   lib-y	+= io-readsw-armv3.o io-writesw-armv3.o
 else
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S
new file mode 100644
index 00000000000..7ff9f831b3f
--- /dev/null
+++ b/arch/arm/lib/clear_user.S
@@ -0,0 +1,52 @@
+/*
+ *  linux/arch/arm/lib/clear_user.S
+ *
+ *  Copyright (C) 1995, 1996,1997,1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+		.text
+
+/* Prototype: int __arch_clear_user(void *addr, size_t sz)
+ * Purpose  : clear some user memory
+ * Params   : addr - user memory address to clear
+ *          : sz   - number of bytes to clear
+ * Returns  : number of bytes NOT cleared
+ */
+ENTRY(__arch_clear_user)
+		stmfd	sp!, {r1, lr}
+		mov	r2, #0
+		cmp	r1, #4
+		blt	2f
+		ands	ip, r0, #3
+		beq	1f
+		cmp	ip, #2
+USER(		strbt	r2, [r0], #1)
+USER(		strlebt	r2, [r0], #1)
+USER(		strltbt	r2, [r0], #1)
+		rsb	ip, ip, #4
+		sub	r1, r1, ip		@  7  6  5  4  3  2  1
+1:		subs	r1, r1, #8		@ -1 -2 -3 -4 -5 -6 -7
+USER(		strplt	r2, [r0], #4)
+USER(		strplt	r2, [r0], #4)
+		bpl	1b
+		adds	r1, r1, #4		@  3  2  1  0 -1 -2 -3
+USER(		strplt	r2, [r0], #4)
+2:		tst	r1, #2			@ 1x 1x 0x 0x 1x 1x 0x
+USER(		strnebt	r2, [r0], #1)
+USER(		strnebt	r2, [r0], #1)
+		tst	r1, #1			@ x1 x0 x1 x0 x1 x0 x1
+USER(		strnebt	r2, [r0], #1)
+		mov	r0, #0
+		LOADREGS(fd,sp!, {r1, pc})
+
+		.section .fixup,"ax"
+		.align	0
+9001:		LOADREGS(fd,sp!, {r0, pc})
+		.previous
+
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
new file mode 100644
index 00000000000..7497393a0e8
--- /dev/null
+++ b/arch/arm/lib/copy_from_user.S
@@ -0,0 +1,101 @@
+/*
+ *  linux/arch/arm/lib/copy_from_user.S
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Sep 29, 2005
+ *  Copyright:	MontaVista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Prototype:
+ *
+ *	size_t __arch_copy_from_user(void *to, const void *from, size_t n)
+ *
+ * Purpose:
+ *
+ *	copy a block to kernel memory from user memory
+ *
+ * Params:
+ *
+ *	to = kernel memory
+ *	from = user memory
+ *	n = number of bytes to copy
+ *
+ * Return value:
+ *
+ *	Number of bytes NOT copied.
+ */
+
+	.macro ldr1w ptr reg abort
+100:	ldrt \reg, [\ptr], #4
+	.section __ex_table, "a"
+	.long 100b, \abort
+	.previous
+	.endm
+
+	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+	ldr1w \ptr, \reg1, \abort
+	ldr1w \ptr, \reg2, \abort
+	ldr1w \ptr, \reg3, \abort
+	ldr1w \ptr, \reg4, \abort
+	.endm
+
+	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	ldr4w \ptr, \reg1, \reg2, \reg3, \reg4, \abort
+	ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort
+	.endm
+
+	.macro ldr1b ptr reg cond=al abort
+100:	ldr\cond\()bt \reg, [\ptr], #1
+	.section __ex_table, "a"
+	.long 100b, \abort
+	.previous
+	.endm
+
+	.macro str1w ptr reg abort
+	str \reg, [\ptr], #4
+	.endm
+
+	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+	.endm
+
+	.macro str1b ptr reg cond=al abort
+	str\cond\()b \reg, [\ptr], #1
+	.endm
+
+	.macro enter reg1 reg2
+	mov	r3, #0
+	stmdb	sp!, {r0, r2, r3, \reg1, \reg2}
+	.endm
+
+	.macro exit reg1 reg2
+	add	sp, sp, #8
+	ldmfd	sp!, {r0, \reg1, \reg2}
+	.endm
+
+	.text
+
+ENTRY(__arch_copy_from_user)
+
+#include "copy_template.S"
+
+	.section .fixup,"ax"
+	.align 0
+	copy_abort_preamble
+	ldmfd	sp!, {r1, r2}
+	sub	r3, r0, r1
+	rsb	r1, r3, r2
+	str	r1, [sp]
+	bl	__memzero
+	ldr	r0, [sp], #4
+	copy_abort_end
+	.previous
+
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
new file mode 100644
index 00000000000..838e435e492
--- /dev/null
+++ b/arch/arm/lib/copy_template.S
@@ -0,0 +1,255 @@
+/*
+ *  linux/arch/arm/lib/copy_template.s
+ *
+ *  Code template for optimized memory copy functions
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Sep 28, 2005
+ *  Copyright:	MontaVista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+/*
+ * This can be used to enable code to cacheline align the source pointer.
+ * Experiments on tested architectures (StrongARM and XScale) didn't show
+ * this a worthwhile thing to do.  That might be different in the future.
+ */
+//#define CALGN(code...)	code
+#define CALGN(code...)
+
+/*
+ * Theory of operation
+ * -------------------
+ *
+ * This file provides the core code for a forward memory copy used in
+ * the implementation of memcopy(), copy_to_user() and copy_from_user().
+ *
+ * The including file must define the following accessor macros
+ * according to the need of the given function:
+ *
+ * ldr1w ptr reg abort
+ *
+ *	This loads one word from 'ptr', stores it in 'reg' and increments
+ *	'ptr' to the next word. The 'abort' argument is used for fixup tables.
+ *
+ * ldr4w ptr reg1 reg2 reg3 reg4 abort
+ * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ *
+ *	This loads four or eight words starting from 'ptr', stores them
+ *	in provided registers and increments 'ptr' past those words.
+ *	The'abort' argument is used for fixup tables.
+ *
+ * ldr1b ptr reg cond abort
+ *
+ *	Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
+ *	It also must apply the condition code if provided, otherwise the
+ *	"al" condition is assumed by default.
+ *
+ * str1w ptr reg abort
+ * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ * str1b ptr reg cond abort
+ *
+ *	Same as their ldr* counterparts, but data is stored to 'ptr' location
+ *	rather than being loaded.
+ *
+ * enter reg1 reg2
+ *
+ *	Preserve the provided registers on the stack plus any additional
+ *	data as needed by the implementation including this code. Called
+ *	upon code entry.
+ *
+ * exit reg1 reg2
+ *
+ *	Restore registers with the values previously saved with the
+ *	'preserv' macro. Called upon code termination.
+ */
+
+
+		enter	r4, lr
+
+		subs	r2, r2, #4
+		blt	8f
+		ands	ip, r0, #3
+	PLD(	pld	[r1, #0]		)
+		bne	9f
+		ands	ip, r1, #3
+		bne	10f
+
+1:		subs	r2, r2, #(28)
+		stmfd	sp!, {r5 - r8}
+		blt	5f
+
+	CALGN(	ands	ip, r1, #31		)
+	CALGN(	rsb	r3, ip, #32		)
+	CALGN(	sbcnes	r4, r3, r2		)  @ C is always set here
+	CALGN(	bcs	2f			)
+	CALGN(	adr	r4, 6f			)
+	CALGN(	subs	r2, r2, r3		)  @ C gets set
+	CALGN(	add	pc, r4, ip		)
+
+	PLD(	pld	[r1, #0]		)
+2:	PLD(	subs	r2, r2, #96		)
+	PLD(	pld	[r1, #28]		)
+	PLD(	blt	4f			)
+	PLD(	pld	[r1, #60]		)
+	PLD(	pld	[r1, #92]		)
+
+3:	PLD(	pld	[r1, #124]		)
+4:		ldr8w	r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+		subs	r2, r2, #32
+		str8w	r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+		bge	3b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	4b			)
+
+5:		ands	ip, r2, #28
+		rsb	ip, ip, #32
+		addne	pc, pc, ip		@ C is always clear here
+		b	7f
+6:		nop
+		ldr1w	r1, r3, abort=20f
+		ldr1w	r1, r4, abort=20f
+		ldr1w	r1, r5, abort=20f
+		ldr1w	r1, r6, abort=20f
+		ldr1w	r1, r7, abort=20f
+		ldr1w	r1, r8, abort=20f
+		ldr1w	r1, lr, abort=20f
+
+		add	pc, pc, ip
+		nop
+		nop
+		str1w	r0, r3, abort=20f
+		str1w	r0, r4, abort=20f
+		str1w	r0, r5, abort=20f
+		str1w	r0, r6, abort=20f
+		str1w	r0, r7, abort=20f
+		str1w	r0, r8, abort=20f
+		str1w	r0, lr, abort=20f
+
+	CALGN(	bcs	2b			)
+
+7:		ldmfd	sp!, {r5 - r8}
+
+8:		movs	r2, r2, lsl #31
+		ldr1b	r1, r3, ne, abort=21f
+		ldr1b	r1, r4, cs, abort=21f
+		ldr1b	r1, ip, cs, abort=21f
+		str1b	r0, r3, ne, abort=21f
+		str1b	r0, r4, cs, abort=21f
+		str1b	r0, ip, cs, abort=21f
+
+		exit	r4, pc
+
+9:		rsb	ip, ip, #4
+		cmp	ip, #2
+		ldr1b	r1, r3, gt, abort=21f
+		ldr1b	r1, r4, ge, abort=21f
+		ldr1b	r1, lr, abort=21f
+		str1b	r0, r3, gt, abort=21f
+		str1b	r0, r4, ge, abort=21f
+		subs	r2, r2, ip
+		str1b	r0, lr, abort=21f
+		blt	8b
+		ands	ip, r1, #3
+		beq	1b
+
+10:		bic	r1, r1, #3
+		cmp	ip, #2
+		ldr1w	r1, lr, abort=21f
+		beq	17f
+		bgt	18f
+
+
+		.macro	forward_copy_shift pull push
+
+		subs	r2, r2, #28
+		blt	14f
+
+	CALGN(	ands	ip, r1, #31		)
+	CALGN(	rsb	ip, ip, #32		)
+	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+	CALGN(	subcc	r2, r2, ip		)
+	CALGN(	bcc	15f			)
+
+11:		stmfd	sp!, {r5 - r9}
+
+	PLD(	pld	[r1, #0]		)
+	PLD(	subs	r2, r2, #96		)
+	PLD(	pld	[r1, #28]		)
+	PLD(	blt	13f			)
+	PLD(	pld	[r1, #60]		)
+	PLD(	pld	[r1, #92]		)
+
+12:	PLD(	pld	[r1, #124]		)
+13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
+		mov	r3, lr, pull #\pull
+		subs	r2, r2, #32
+		ldr4w	r1, r8, r9, ip, lr, abort=19f
+		orr	r3, r3, r4, push #\push
+		mov	r4, r4, pull #\pull
+		orr	r4, r4, r5, push #\push
+		mov	r5, r5, pull #\pull
+		orr	r5, r5, r6, push #\push
+		mov	r6, r6, pull #\pull
+		orr	r6, r6, r7, push #\push
+		mov	r7, r7, pull #\pull
+		orr	r7, r7, r8, push #\push
+		mov	r8, r8, pull #\pull
+		orr	r8, r8, r9, push #\push
+		mov	r9, r9, pull #\pull
+		orr	r9, r9, ip, push #\push
+		mov	ip, ip, pull #\pull
+		orr	ip, ip, lr, push #\push
+		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
+		bge	12b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	13b			)
+
+		ldmfd	sp!, {r5 - r9}
+
+14:		ands	ip, r2, #28
+		beq	16f
+
+15:		mov	r3, lr, pull #\pull
+		ldr1w	r1, lr, abort=21f
+		subs	ip, ip, #4
+		orr	r3, r3, lr, push #\push
+		str1w	r0, r3, abort=21f
+		bgt	15b
+	CALGN(	cmp	r2, #0			)
+	CALGN(	bge	11b			)
+
+16:		sub	r1, r1, #(\push / 8)
+		b	8b
+
+		.endm
+
+
+		forward_copy_shift	pull=8	push=24
+
+17:		forward_copy_shift	pull=16	push=16
+
+18:		forward_copy_shift	pull=24	push=8
+
+
+/*
+ * Abort preanble and completion macros.
+ * If a fixup handler is required then those macros must surround it.
+ * It is assumed that the fixup code will handle the private part of
+ * the exit macro.
+ */
+
+	.macro	copy_abort_preamble
+19:	ldmfd	sp!, {r5 - r9}
+	b	21f
+20:	ldmfd	sp!, {r5 - r8}
+21:
+	.endm
+
+	.macro	copy_abort_end
+	ldmfd	sp!, {r4, pc}
+	.endm
+
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
new file mode 100644
index 00000000000..4a6d8ea1402
--- /dev/null
+++ b/arch/arm/lib/copy_to_user.S
@@ -0,0 +1,101 @@
+/*
+ *  linux/arch/arm/lib/copy_to_user.S
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Sep 29, 2005
+ *  Copyright:	MontaVista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Prototype:
+ *
+ *	size_t __arch_copy_to_user(void *to, const void *from, size_t n)
+ *
+ * Purpose:
+ *
+ *	copy a block to user memory from kernel memory
+ *
+ * Params:
+ *
+ *	to = user memory
+ *	from = kernel memory
+ *	n = number of bytes to copy
+ *
+ * Return value:
+ *
+ *	Number of bytes NOT copied.
+ */
+
+	.macro ldr1w ptr reg abort
+	ldr \reg, [\ptr], #4
+	.endm
+
+	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
+	.endm
+
+	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+	.endm
+
+	.macro ldr1b ptr reg cond=al abort
+	ldr\cond\()b \reg, [\ptr], #1
+	.endm
+
+	.macro str1w ptr reg abort
+100:	strt \reg, [\ptr], #4
+	.section __ex_table, "a"
+	.long 100b, \abort
+	.previous
+	.endm
+
+	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	str1w \ptr, \reg1, \abort
+	str1w \ptr, \reg2, \abort
+	str1w \ptr, \reg3, \abort
+	str1w \ptr, \reg4, \abort
+	str1w \ptr, \reg5, \abort
+	str1w \ptr, \reg6, \abort
+	str1w \ptr, \reg7, \abort
+	str1w \ptr, \reg8, \abort
+	.endm
+
+	.macro str1b ptr reg cond=al abort
+100:	str\cond\()bt \reg, [\ptr], #1
+	.section __ex_table, "a"
+	.long 100b, \abort
+	.previous
+	.endm
+
+	.macro enter reg1 reg2
+	mov	r3, #0
+	stmdb	sp!, {r0, r2, r3, \reg1, \reg2}
+	.endm
+
+	.macro exit reg1 reg2
+	add	sp, sp, #8
+	ldmfd	sp!, {r0, \reg1, \reg2}
+	.endm
+
+	.text
+
+ENTRY(__arch_copy_to_user)
+
+#include "copy_template.S"
+
+	.section .fixup,"ax"
+	.align 0
+	copy_abort_preamble
+	ldmfd	sp!, {r1, r2, r3}
+	sub	r0, r0, r1
+	rsb	r0, r0, r2
+	copy_abort_end
+	.previous
+
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index f5a593ceb8c..7e71d6708a8 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -1,393 +1,59 @@
 /*
  *  linux/arch/arm/lib/memcpy.S
  *
- *  Copyright (C) 1995-1999 Russell King
+ *  Author:	Nicolas Pitre
+ *  Created:	Sep 28, 2005
+ *  Copyright:	MontaVista Software, Inc.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  ASM optimised string functions
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
  */
+
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-		.text
-
-#define ENTER	\
-		mov	ip,sp	;\
-		stmfd	sp!,{r0,r4-r9,fp,ip,lr,pc}	;\
-		sub	fp,ip,#4
-
-#define EXIT	\
-		LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc})
-
-#define EXITEQ	\
-		LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc})
-
-/*
- * Prototype: void memcpy(void *to,const void *from,unsigned long n);
- */
-ENTRY(memcpy)
-ENTRY(memmove)
-		ENTER
-		cmp	r1, r0
-		bcc	23f
-		subs	r2, r2, #4
-		blt	6f
-	PLD(	pld	[r1, #0]		)
-		ands	ip, r0, #3
-		bne	7f
-		ands	ip, r1, #3
-		bne	8f
+	.macro ldr1w ptr reg abort
+	ldr \reg, [\ptr], #4
+	.endm
 
-1:		subs	r2, r2, #8
-		blt	5f
-		subs	r2, r2, #20
-		blt	4f
-	PLD(	pld	[r1, #28]		)
-	PLD(	subs	r2, r2, #64		)
-	PLD(	blt	3f			)
-2:	PLD(	pld	[r1, #60]		)
-	PLD(	pld	[r1, #92]		)
-		ldmia	r1!, {r3 - r9, ip}
-		subs	r2, r2, #32
-		stmgeia	r0!, {r3 - r9, ip}
-		ldmgeia	r1!, {r3 - r9, ip}
-		subges	r2, r2, #32
-		stmia	r0!, {r3 - r9, ip}
-		bge	2b
-3:	PLD(	ldmia	r1!, {r3 - r9, ip}	)
-	PLD(	adds	r2, r2, #32		)
-	PLD(	stmgeia	r0!, {r3 - r9, ip}	)
-	PLD(	ldmgeia	r1!, {r3 - r9, ip}	)
-	PLD(	subges	r2, r2, #32		)
-	PLD(	stmia	r0!, {r3 - r9, ip}	)
-4:		cmn	r2, #16
-		ldmgeia	r1!, {r3 - r6}
-		subge	r2, r2, #16
-		stmgeia	r0!, {r3 - r6}
-		adds	r2, r2, #20
-		ldmgeia	r1!, {r3 - r5}
-		subge	r2, r2, #12
-		stmgeia	r0!, {r3 - r5}
-5:		adds	r2, r2, #8
-		blt	6f
-		subs	r2, r2, #4
-		ldrlt	r3, [r1], #4
-		ldmgeia	r1!, {r4, r5}
-		subge	r2, r2, #4
-		strlt	r3, [r0], #4
-		stmgeia	r0!, {r4, r5}
+	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
+	.endm
 
-6:		adds	r2, r2, #4
-		EXITEQ
-		cmp	r2, #2
-		ldrb	r3, [r1], #1
-		ldrgeb	r4, [r1], #1
-		ldrgtb	r5, [r1], #1
-		strb	r3, [r0], #1
-		strgeb	r4, [r0], #1
-		strgtb	r5, [r0], #1
-		EXIT
+	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+	.endm
 
-7:		rsb	ip, ip, #4
-		cmp	ip, #2
-		ldrb	r3, [r1], #1
-		ldrgeb	r4, [r1], #1
-		ldrgtb	r5, [r1], #1
-		strb	r3, [r0], #1
-		strgeb	r4, [r0], #1
-		strgtb	r5, [r0], #1
-		subs	r2, r2, ip
-		blt	6b
-		ands	ip, r1, #3
-		beq	1b
+	.macro ldr1b ptr reg cond=al abort
+	ldr\cond\()b \reg, [\ptr], #1
+	.endm
 
-8:		bic	r1, r1, #3
-		ldr	r7, [r1], #4
-		cmp	ip, #2
-		bgt	18f
-		beq	13f
-		cmp	r2, #12
-		blt	11f
-	PLD(	pld	[r1, #12]		)
-		sub	r2, r2, #12
-	PLD(	subs	r2, r2, #32		)
-	PLD(	blt	10f			)
-	PLD(	pld	[r1, #28]		)
-9:	PLD(	pld	[r1, #44]		)
-10:		mov	r3, r7, pull #8
-		ldmia	r1!, {r4 - r7}
-		subs	r2, r2, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
-		stmia	r0!, {r3 - r6}
-		bge	9b
-	PLD(	cmn	r2, #32			)
-	PLD(	bge	10b			)
-	PLD(	add	r2, r2, #32		)
-		adds	r2, r2, #12
-		blt	12f
-11:		mov	r3, r7, pull #8
-		ldr	r7, [r1], #4
-		subs	r2, r2, #4
-		orr	r3, r3, r7, push #24
-		str	r3, [r0], #4
-		bge	11b
-12:		sub	r1, r1, #3
-		b	6b
+	.macro str1w ptr reg abort
+	str \reg, [\ptr], #4
+	.endm
 
-13:		cmp	r2, #12
-		blt	16f
-	PLD(	pld	[r1, #12]		)
-		sub	r2, r2, #12
-	PLD(	subs	r2, r2, #32		)
-	PLD(	blt	15f			)
-	PLD(	pld	[r1, #28]		)
-14:	PLD(	pld	[r1, #44]		)
-15:		mov	r3, r7, pull #16
-		ldmia	r1!, {r4 - r7}
-		subs	r2, r2, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
-		stmia	r0!, {r3 - r6}
-		bge	14b
-	PLD(	cmn	r2, #32			)
-	PLD(	bge	15b			)
-	PLD(	add	r2, r2, #32		)
-		adds	r2, r2, #12
-		blt	17f
-16:		mov	r3, r7, pull #16
-		ldr	r7, [r1], #4
-		subs	r2, r2, #4
-		orr	r3, r3, r7, push #16
-		str	r3, [r0], #4
-		bge	16b
-17:		sub	r1, r1, #2
-		b	6b
+	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+	.endm
 
-18:		cmp	r2, #12
-		blt	21f
-	PLD(	pld	[r1, #12]		)
-		sub	r2, r2, #12
-	PLD(	subs	r2, r2, #32		)
-	PLD(	blt	20f			)
-	PLD(	pld	[r1, #28]		)
-19:	PLD(	pld	[r1, #44]		)
-20:		mov	r3, r7, pull #24
-		ldmia	r1!, {r4 - r7}
-		subs	r2, r2, #16
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
-		stmia	r0!, {r3 - r6}
-		bge	19b
-	PLD(	cmn	r2, #32			)
-	PLD(	bge	20b			)
-	PLD(	add	r2, r2, #32		)
-		adds	r2, r2, #12
-		blt	22f
-21:		mov	r3, r7, pull #24
-		ldr	r7, [r1], #4
-		subs	r2, r2, #4
-		orr	r3, r3, r7, push #8
-		str	r3, [r0], #4
-		bge	21b
-22:		sub	r1, r1, #1
-		b	6b
+	.macro str1b ptr reg cond=al abort
+	str\cond\()b \reg, [\ptr], #1
+	.endm
 
+	.macro enter reg1 reg2
+	stmdb sp!, {r0, \reg1, \reg2}
+	.endm
 
-23:		add	r1, r1, r2
-		add	r0, r0, r2
-		subs	r2, r2, #4
-		blt	29f
-	PLD(	pld	[r1, #-4]		)
-		ands	ip, r0, #3
-		bne	30f
-		ands	ip, r1, #3
-		bne	31f
+	.macro exit reg1 reg2
+	ldmfd sp!, {r0, \reg1, \reg2}
+	.endm
 
-24:		subs	r2, r2, #8
-		blt	28f
-		subs	r2, r2, #20
-		blt	27f
-	PLD(	pld	[r1, #-32]		)
-	PLD(	subs	r2, r2, #64		)
-	PLD(	blt	26f			)
-25:	PLD(	pld	[r1, #-64]		)
-	PLD(	pld	[r1, #-96]		)
-		ldmdb	r1!, {r3 - r9, ip}
-		subs	r2, r2, #32
-		stmgedb	r0!, {r3 - r9, ip}
-		ldmgedb	r1!, {r3 - r9, ip}
-		subges	r2, r2, #32
-		stmdb	r0!, {r3 - r9, ip}
-		bge	25b
-26:	PLD(	ldmdb	r1!, {r3 - r9, ip}	)
-	PLD(	adds	r2, r2, #32		)
-	PLD(	stmgedb	r0!, {r3 - r9, ip}	)
-	PLD(	ldmgedb	r1!, {r3 - r9, ip}	)
-	PLD(	subges	r2, r2, #32		)
-	PLD(	stmdb	r0!, {r3 - r9, ip}	)
-27:		cmn	r2, #16
-		ldmgedb	r1!, {r3 - r6}
-		subge	r2, r2, #16
-		stmgedb	r0!, {r3 - r6}
-		adds	r2, r2, #20
-		ldmgedb	r1!, {r3 - r5}
-		subge	r2, r2, #12
-		stmgedb	r0!, {r3 - r5}
-28:		adds	r2, r2, #8
-		blt	29f
-		subs	r2, r2, #4
-		ldrlt	r3, [r1, #-4]!
-		ldmgedb	r1!, {r4, r5}
-		subge	r2, r2, #4
-		strlt	r3, [r0, #-4]!
-		stmgedb	r0!, {r4, r5}
+	.text
 
-29:		adds	r2, r2, #4
-		EXITEQ
-		cmp	r2, #2
-		ldrb	r3, [r1, #-1]!
-		ldrgeb	r4, [r1, #-1]!
-		ldrgtb	r5, [r1, #-1]!
-		strb	r3, [r0, #-1]!
-		strgeb	r4, [r0, #-1]!
-		strgtb	r5, [r0, #-1]!
-		EXIT
+/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
 
-30:		cmp	ip, #2
-		ldrb	r3, [r1, #-1]!
-		ldrgeb	r4, [r1, #-1]!
-		ldrgtb	r5, [r1, #-1]!
-		strb	r3, [r0, #-1]!
-		strgeb	r4, [r0, #-1]!
-		strgtb	r5, [r0, #-1]!
-		subs	r2, r2, ip
-		blt	29b
-		ands	ip, r1, #3
-		beq	24b
-
-31:		bic	r1, r1, #3
-		ldr	r3, [r1], #0
-		cmp	ip, #2
-		blt	41f
-		beq	36f
-		cmp	r2, #12
-		blt	34f
-	PLD(	pld	[r1, #-16]		)
-		sub	r2, r2, #12
-	PLD(	subs	r2, r2, #32		)
-	PLD(	blt	33f			)
-	PLD(	pld	[r1, #-32]		)
-32:	PLD(	pld	[r1, #-48]		)
-33:		mov	r7, r3, push #8
-		ldmdb	r1!, {r3, r4, r5, r6}
-		subs	r2, r2, #16
-		orr	r7, r7, r6, pull #24
-		mov	r6, r6, push #8
-		orr	r6, r6, r5, pull #24
-		mov	r5, r5, push #8
-		orr	r5, r5, r4, pull #24
-		mov	r4, r4, push #8
-		orr	r4, r4, r3, pull #24
-		stmdb	r0!, {r4, r5, r6, r7}
-		bge	32b
-	PLD(	cmn	r2, #32			)
-	PLD(	bge	33b			)
-	PLD(	add	r2, r2, #32		)
-		adds	r2, r2, #12
-		blt	35f
-34:		mov	ip, r3, push #8
-		ldr	r3, [r1, #-4]!
-		subs	r2, r2, #4
-		orr	ip, ip, r3, pull #24
-		str	ip, [r0, #-4]!
-		bge	34b
-35:		add	r1, r1, #3
-		b	29b
-
-36:		cmp	r2, #12
-		blt	39f
-	PLD(	pld	[r1, #-16]		)
-		sub	r2, r2, #12
-	PLD(	subs	r2, r2, #32		)
-	PLD(	blt	38f			)
-	PLD(	pld	[r1, #-32]		)
-37:	PLD(	pld	[r1, #-48]		)
-38:		mov	r7, r3, push #16
-		ldmdb	r1!, {r3, r4, r5, r6}
-		subs	r2, r2, #16
-		orr	r7, r7, r6, pull #16
-		mov	r6, r6, push #16
-		orr	r6, r6, r5, pull #16
-		mov	r5, r5, push #16
-		orr	r5, r5, r4, pull #16
-		mov	r4, r4, push #16
-		orr	r4, r4, r3, pull #16
-		stmdb	r0!, {r4, r5, r6, r7}
-		bge	37b
-	PLD(	cmn	r2, #32			)
-	PLD(	bge	38b			)
-	PLD(	add	r2, r2, #32		)
-		adds	r2, r2, #12
-		blt	40f
-39:		mov	ip, r3, push #16
-		ldr	r3, [r1, #-4]!
-		subs	r2, r2, #4
-		orr	ip, ip, r3, pull #16
-		str	ip, [r0, #-4]!
-		bge	39b
-40:		add	r1, r1, #2
-		b	29b
+ENTRY(memcpy)
 
-41:		cmp	r2, #12
-		blt	44f
-	PLD(	pld	[r1, #-16]		)
-		sub	r2, r2, #12
-	PLD(	subs	r2, r2, #32		)
-	PLD(	blt	43f			)
-	PLD(	pld	[r1, #-32]		)
-42:	PLD(	pld	[r1, #-48]		)
-43:		mov	r7, r3, push #24
-		ldmdb	r1!, {r3, r4, r5, r6}
-		subs	r2, r2, #16
-		orr	r7, r7, r6, pull #8
-		mov	r6, r6, push #24
-		orr	r6, r6, r5, pull #8
-		mov	r5, r5, push #24
-		orr	r5, r5, r4, pull #8
-		mov	r4, r4, push #24
-		orr	r4, r4, r3, pull #8
-		stmdb	r0!, {r4, r5, r6, r7}
-		bge	42b
-	PLD(	cmn	r2, #32			)
-	PLD(	bge	43b			)
-	PLD(	add	r2, r2, #32		)
-		adds	r2, r2, #12
-		blt	45f
-44:		mov	ip, r3, push #24
-		ldr	r3, [r1, #-4]!
-		subs	r2, r2, #4
-		orr	ip, ip, r3, pull #8
-		str	ip, [r0, #-4]!
-		bge	44b
-45:		add	r1, r1, #1
-		b	29b
+#include "copy_template.S"
 
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
new file mode 100644
index 00000000000..ef7fddc14ac
--- /dev/null
+++ b/arch/arm/lib/memmove.S
@@ -0,0 +1,206 @@
+/*
+ *  linux/arch/arm/lib/memmove.S
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Sep 28, 2005
+ *  Copyright:	(C) MontaVista Software Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * This can be used to enable code to cacheline align the source pointer.
+ * Experiments on tested architectures (StrongARM and XScale) didn't show
+ * this a worthwhile thing to do.  That might be different in the future.
+ */
+//#define CALGN(code...)        code
+#define CALGN(code...)
+
+		.text
+
+/*
+ * Prototype: void *memmove(void *dest, const void *src, size_t n);
+ *
+ * Note:
+ *
+ * If the memory regions don't overlap, we simply branch to memcpy which is
+ * normally a bit faster. Otherwise the copy is done going downwards.  This
+ * is a transposition of the code from copy_template.S but with the copy
+ * occurring in the opposite direction.
+ */
+
+ENTRY(memmove)
+
+		subs	ip, r0, r1
+		cmphi	r2, ip
+		bls	memcpy
+
+		stmfd	sp!, {r0, r4, lr}
+		add	r1, r1, r2
+		add	r0, r0, r2
+		subs	r2, r2, #4
+		blt	8f
+		ands	ip, r0, #3
+	PLD(	pld	[r1, #-4]		)
+		bne	9f
+		ands	ip, r1, #3
+		bne	10f
+
+1:		subs	r2, r2, #(28)
+		stmfd	sp!, {r5 - r8}
+		blt	5f
+
+	CALGN(	ands	ip, r1, #31		)
+	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+	CALGN(	bcs	2f			)
+	CALGN(	adr	r4, 6f			)
+	CALGN(	subs	r2, r2, ip		)  @ C is set here
+	CALGN(	add	pc, r4, ip		)
+
+	PLD(	pld	[r1, #-4]		)
+2:	PLD(	subs	r2, r2, #96		)
+	PLD(	pld	[r1, #-32]		)
+	PLD(	blt	4f			)
+	PLD(	pld	[r1, #-64]		)
+	PLD(	pld	[r1, #-96]		)
+
+3:	PLD(	pld	[r1, #-128]		)
+4:		ldmdb	r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+		subs	r2, r2, #32
+		stmdb	r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+		bge	3b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	4b			)
+
+5:		ands	ip, r2, #28
+		rsb	ip, ip, #32
+		addne	pc, pc, ip		@ C is always clear here
+		b	7f
+6:		nop
+		ldr	r3, [r1, #-4]!
+		ldr	r4, [r1, #-4]!
+		ldr	r5, [r1, #-4]!
+		ldr	r6, [r1, #-4]!
+		ldr	r7, [r1, #-4]!
+		ldr	r8, [r1, #-4]!
+		ldr	lr, [r1, #-4]!
+
+		add	pc, pc, ip
+		nop
+		nop
+		str	r3, [r0, #-4]!
+		str	r4, [r0, #-4]!
+		str	r5, [r0, #-4]!
+		str	r6, [r0, #-4]!
+		str	r7, [r0, #-4]!
+		str	r8, [r0, #-4]!
+		str	lr, [r0, #-4]!
+
+	CALGN(	bcs	2b			)
+
+7:		ldmfd	sp!, {r5 - r8}
+
+8:		movs	r2, r2, lsl #31
+		ldrneb	r3, [r1, #-1]!
+		ldrcsb	r4, [r1, #-1]!
+		ldrcsb	ip, [r1, #-1]
+		strneb	r3, [r0, #-1]!
+		strcsb	r4, [r0, #-1]!
+		strcsb	ip, [r0, #-1]
+		ldmfd	sp!, {r0, r4, pc}
+
+9:		cmp	ip, #2
+		ldrgtb	r3, [r1, #-1]!
+		ldrgeb	r4, [r1, #-1]!
+		ldrb	lr, [r1, #-1]!
+		strgtb	r3, [r0, #-1]!
+		strgeb	r4, [r0, #-1]!
+		subs	r2, r2, ip
+		strb	lr, [r0, #-1]!
+		blt	8b
+		ands	ip, r1, #3
+		beq	1b
+
+10:		bic	r1, r1, #3
+		cmp	ip, #2
+		ldr	r3, [r1, #0]
+		beq	17f
+		blt	18f
+
+
+		.macro	backward_copy_shift push pull
+
+		subs	r2, r2, #28
+		blt	14f
+
+	CALGN(	ands	ip, r1, #31		)
+	CALGN(	rsb	ip, ip, #32		)
+	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+	CALGN(	subcc	r2, r2, ip		)
+	CALGN(	bcc	15f			)
+
+11:		stmfd	sp!, {r5 - r9}
+
+	PLD(	pld	[r1, #-4]		)
+	PLD(	subs	r2, r2, #96		)
+	PLD(	pld	[r1, #-32]		)
+	PLD(	blt	13f			)
+	PLD(	pld	[r1, #-64]		)
+	PLD(	pld	[r1, #-96]		)
+
+12:	PLD(	pld	[r1, #-128]		)
+13:		ldmdb   r1!, {r7, r8, r9, ip}
+		mov     lr, r3, push #\push
+		subs    r2, r2, #32
+		ldmdb   r1!, {r3, r4, r5, r6}
+		orr     lr, lr, ip, pull #\pull
+		mov     ip, ip, push #\push
+		orr     ip, ip, r9, pull #\pull
+		mov     r9, r9, push #\push
+		orr     r9, r9, r8, pull #\pull
+		mov     r8, r8, push #\push
+		orr     r8, r8, r7, pull #\pull
+		mov     r7, r7, push #\push
+		orr     r7, r7, r6, pull #\pull
+		mov     r6, r6, push #\push
+		orr     r6, r6, r5, pull #\pull
+		mov     r5, r5, push #\push
+		orr     r5, r5, r4, pull #\pull
+		mov     r4, r4, push #\push
+		orr     r4, r4, r3, pull #\pull
+		stmdb   r0!, {r4 - r9, ip, lr}
+		bge	12b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	13b			)
+
+		ldmfd	sp!, {r5 - r9}
+
+14:		ands	ip, r2, #28
+		beq	16f
+
+15:		mov     lr, r3, push #\push
+		ldr	r3, [r1, #-4]!
+		subs	ip, ip, #4
+		orr	lr, lr, r3, pull #\pull
+		str	lr, [r0, #-4]!
+		bgt	15b
+	CALGN(	cmp	r2, #0			)
+	CALGN(	bge	11b			)
+
+16:		add	r1, r1, #(\pull / 8)
+		b	8b
+
+		.endm
+
+
+		backward_copy_shift	push=8	pull=24
+
+17:		backward_copy_shift	push=16	pull=16
+
+18:		backward_copy_shift	push=24	pull=8
+
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
index d3ed0636c00..c28449157be 100644
--- a/arch/arm/lib/uaccess.S
+++ b/arch/arm/lib/uaccess.S
@@ -657,41 +657,3 @@ USER(		ldrgtbt	r3, [r1], #1)			@ May fault
 		LOADREGS(fd,sp!, {r4 - r7, pc})
 		.previous
 
-/* Prototype: int __arch_clear_user(void *addr, size_t sz)
- * Purpose  : clear some user memory
- * Params   : addr - user memory address to clear
- *          : sz   - number of bytes to clear
- * Returns  : number of bytes NOT cleared
- */
-ENTRY(__arch_clear_user)
-		stmfd	sp!, {r1, lr}
-		mov	r2, #0
-		cmp	r1, #4
-		blt	2f
-		ands	ip, r0, #3
-		beq	1f
-		cmp	ip, #2
-USER(		strbt	r2, [r0], #1)
-USER(		strlebt	r2, [r0], #1)
-USER(		strltbt	r2, [r0], #1)
-		rsb	ip, ip, #4
-		sub	r1, r1, ip		@  7  6  5  4  3  2  1
-1:		subs	r1, r1, #8		@ -1 -2 -3 -4 -5 -6 -7
-USER(		strplt	r2, [r0], #4)
-USER(		strplt	r2, [r0], #4)
-		bpl	1b
-		adds	r1, r1, #4		@  3  2  1  0 -1 -2 -3
-USER(		strplt	r2, [r0], #4)
-2:		tst	r1, #2			@ 1x 1x 0x 0x 1x 1x 0x
-USER(		strnebt	r2, [r0], #1)
-USER(		strnebt	r2, [r0], #1)
-		tst	r1, #1			@ x1 x0 x1 x0 x1 x0 x1
-USER(		strnebt	r2, [r0], #1)
-		mov	r0, #0
-		LOADREGS(fd,sp!, {r1, pc})
-
-		.section .fixup,"ax"
-		.align	0
-9001:		LOADREGS(fd,sp!, {r0, pc})
-		.previous
-
diff --git a/arch/arm/mach-ebsa110/core.c b/arch/arm/mach-ebsa110/core.c
index 15261646dcd..ed4614983ad 100644
--- a/arch/arm/mach-ebsa110/core.c
+++ b/arch/arm/mach-ebsa110/core.c
@@ -251,9 +251,33 @@ static struct platform_device serial_device = {
 	},
 };
 
+static struct resource am79c961_resources[] = {
+	{
+		.start		= 0x220,
+		.end		= 0x238,
+		.flags		= IORESOURCE_IO,
+	}, {
+		.start		= IRQ_EBSA110_ETHERNET,
+		.end		= IRQ_EBSA110_ETHERNET,
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device am79c961_device = {
+	.name			= "am79c961",
+	.id			= -1,
+	.num_resources		= ARRAY_SIZE(am79c961_resources),
+	.resource		= am79c961_resources,
+};
+
+static struct platform_device *ebsa110_devices[] = {
+	&serial_device,
+	&am79c961_device,
+};
+
 static int __init ebsa110_init(void)
 {
-	return platform_device_register(&serial_device);
+	return platform_add_devices(ebsa110_devices, ARRAY_SIZE(ebsa110_devices));
 }
 
 arch_initcall(ebsa110_init);
diff --git a/arch/arm/mach-ixp2000/Makefile b/arch/arm/mach-ixp2000/Makefile
index 1e6139d42a9..9621aeb61f4 100644
--- a/arch/arm/mach-ixp2000/Makefile
+++ b/arch/arm/mach-ixp2000/Makefile
@@ -1,7 +1,7 @@
 #
 # Makefile for the linux kernel.
 #
-obj-y			:= core.o pci.o
+obj-y			:= core.o pci.o uengine.o
 obj-m			:=
 obj-n			:=
 obj-			:=
diff --git a/arch/arm/mach-ixp2000/core.c b/arch/arm/mach-ixp2000/core.c
index 01c393c504d..c93a98b2a32 100644
--- a/arch/arm/mach-ixp2000/core.c
+++ b/arch/arm/mach-ixp2000/core.c
@@ -1,5 +1,5 @@
 /*
- * arch/arm/mach-ixp2000/common.c
+ * arch/arm/mach-ixp2000/core.c
  *
  * Common routines used by all IXP2400/2800 based platforms.
  *
@@ -49,7 +49,6 @@ static unsigned long ixp2000_slowport_irq_flags;
  *************************************************************************/
 void ixp2000_acquire_slowport(struct slowport_cfg *new_cfg, struct slowport_cfg *old_cfg)
 {
-
 	spin_lock_irqsave(&ixp2000_slowport_lock, ixp2000_slowport_irq_flags);
 
 	old_cfg->CCR = *IXP2000_SLOWPORT_CCR;
@@ -62,7 +61,7 @@ void ixp2000_acquire_slowport(struct slowport_cfg *new_cfg, struct slowport_cfg
 	ixp2000_reg_write(IXP2000_SLOWPORT_WTC2, new_cfg->WTC);
 	ixp2000_reg_write(IXP2000_SLOWPORT_RTC2, new_cfg->RTC);
 	ixp2000_reg_write(IXP2000_SLOWPORT_PCR, new_cfg->PCR);
-	ixp2000_reg_write(IXP2000_SLOWPORT_ADC, new_cfg->ADC);
+	ixp2000_reg_wrb(IXP2000_SLOWPORT_ADC, new_cfg->ADC);
 }
 
 void ixp2000_release_slowport(struct slowport_cfg *old_cfg)
@@ -71,7 +70,7 @@ void ixp2000_release_slowport(struct slowport_cfg *old_cfg)
 	ixp2000_reg_write(IXP2000_SLOWPORT_WTC2, old_cfg->WTC);
 	ixp2000_reg_write(IXP2000_SLOWPORT_RTC2, old_cfg->RTC);
 	ixp2000_reg_write(IXP2000_SLOWPORT_PCR, old_cfg->PCR);
-	ixp2000_reg_write(IXP2000_SLOWPORT_ADC, old_cfg->ADC);
+	ixp2000_reg_wrb(IXP2000_SLOWPORT_ADC, old_cfg->ADC);
 
 	spin_unlock_irqrestore(&ixp2000_slowport_lock, 
 					ixp2000_slowport_irq_flags);
@@ -145,7 +144,7 @@ void __init ixp2000_map_io(void)
 	iotable_init(ixp2000_io_desc, ARRAY_SIZE(ixp2000_io_desc));
 
 	/* Set slowport to 8-bit mode.  */
-	ixp2000_reg_write(IXP2000_SLOWPORT_FRM, 1);
+	ixp2000_reg_wrb(IXP2000_SLOWPORT_FRM, 1);
 }
 
 
@@ -209,7 +208,7 @@ static int ixp2000_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 	write_seqlock(&xtime_lock);
 
 	/* clear timer 1 */
-	ixp2000_reg_write(IXP2000_T1_CLR, 1);
+	ixp2000_reg_wrb(IXP2000_T1_CLR, 1);
 
 	while ((next_jiffy_time - *missing_jiffy_timer_csr) > ticks_per_jiffy) {
 		timer_tick(regs);
@@ -252,12 +251,12 @@ void __init ixp2000_init_time(unsigned long tick_rate)
 
 		ixp2000_reg_write(IXP2000_T4_CLR, 0);
 		ixp2000_reg_write(IXP2000_T4_CLD, -1);
-		ixp2000_reg_write(IXP2000_T4_CTL, (1 << 7));
+		ixp2000_reg_wrb(IXP2000_T4_CTL, (1 << 7));
 		missing_jiffy_timer_csr = IXP2000_T4_CSR;
 	} else {
 		ixp2000_reg_write(IXP2000_T2_CLR, 0);
 		ixp2000_reg_write(IXP2000_T2_CLD, -1);
-		ixp2000_reg_write(IXP2000_T2_CTL, (1 << 7));
+		ixp2000_reg_wrb(IXP2000_T2_CTL, (1 << 7));
 		missing_jiffy_timer_csr = IXP2000_T2_CSR;
 	}
  	next_jiffy_time = 0xffffffff;
@@ -279,7 +278,7 @@ static void update_gpio_int_csrs(void)
 	ixp2000_reg_write(IXP2000_GPIO_FEDR, GPIO_IRQ_falling_edge);
 	ixp2000_reg_write(IXP2000_GPIO_REDR, GPIO_IRQ_rising_edge);
 	ixp2000_reg_write(IXP2000_GPIO_LSLR, GPIO_IRQ_level_low);
-	ixp2000_reg_write(IXP2000_GPIO_LSHR, GPIO_IRQ_level_high);
+	ixp2000_reg_wrb(IXP2000_GPIO_LSHR, GPIO_IRQ_level_high);
 }
 
 void gpio_line_config(int line, int direction)
@@ -297,9 +296,9 @@ void gpio_line_config(int line, int direction)
 		GPIO_IRQ_level_high &= ~(1 << line);
 		update_gpio_int_csrs();
 
-		ixp2000_reg_write(IXP2000_GPIO_PDSR, 1 << line);
+		ixp2000_reg_wrb(IXP2000_GPIO_PDSR, 1 << line);
 	} else if (direction == GPIO_IN) {
-		ixp2000_reg_write(IXP2000_GPIO_PDCR, 1 << line);
+		ixp2000_reg_wrb(IXP2000_GPIO_PDCR, 1 << line);
 	}
 	local_irq_restore(flags);
 }
@@ -365,12 +364,12 @@ static void ixp2000_GPIO_irq_mask_ack(unsigned int irq)
 
 	ixp2000_reg_write(IXP2000_GPIO_EDSR, (1 << (irq - IRQ_IXP2000_GPIO0)));
 	ixp2000_reg_write(IXP2000_GPIO_LDSR, (1 << (irq - IRQ_IXP2000_GPIO0)));
-	ixp2000_reg_write(IXP2000_GPIO_INST, (1 << (irq - IRQ_IXP2000_GPIO0)));
+	ixp2000_reg_wrb(IXP2000_GPIO_INST, (1 << (irq - IRQ_IXP2000_GPIO0)));
 }
 
 static void ixp2000_GPIO_irq_mask(unsigned int irq)
 {
-	ixp2000_reg_write(IXP2000_GPIO_INCR, (1 << (irq - IRQ_IXP2000_GPIO0)));
+	ixp2000_reg_wrb(IXP2000_GPIO_INCR, (1 << (irq - IRQ_IXP2000_GPIO0)));
 }
 
 static void ixp2000_GPIO_irq_unmask(unsigned int irq)
@@ -389,9 +388,9 @@ static void ixp2000_pci_irq_mask(unsigned int irq)
 {
 	unsigned long temp = *IXP2000_PCI_XSCALE_INT_ENABLE;
 	if (irq == IRQ_IXP2000_PCIA)
-		ixp2000_reg_write(IXP2000_PCI_XSCALE_INT_ENABLE, (temp & ~(1 << 26)));
+		ixp2000_reg_wrb(IXP2000_PCI_XSCALE_INT_ENABLE, (temp & ~(1 << 26)));
 	else if (irq == IRQ_IXP2000_PCIB)
-		ixp2000_reg_write(IXP2000_PCI_XSCALE_INT_ENABLE, (temp & ~(1 << 27)));
+		ixp2000_reg_wrb(IXP2000_PCI_XSCALE_INT_ENABLE, (temp & ~(1 << 27)));
 }
 
 static void ixp2000_pci_irq_unmask(unsigned int irq)
@@ -411,7 +410,7 @@ static struct irqchip ixp2000_pci_irq_chip = {
 
 static void ixp2000_irq_mask(unsigned int irq)
 {
-	ixp2000_reg_write(IXP2000_IRQ_ENABLE_CLR, (1 << irq));
+	ixp2000_reg_wrb(IXP2000_IRQ_ENABLE_CLR, (1 << irq));
 }
 
 static void ixp2000_irq_unmask(unsigned int irq)
@@ -443,7 +442,7 @@ void __init ixp2000_init_irq(void)
 	ixp2000_reg_write(IXP2000_GPIO_INCR, -1);
 
 	/* clear PCI interrupt sources */
-	ixp2000_reg_write(IXP2000_PCI_XSCALE_INT_ENABLE, 0);
+	ixp2000_reg_wrb(IXP2000_PCI_XSCALE_INT_ENABLE, 0);
 
 	/*
 	 * Certain bits in the IRQ status register of the 
diff --git a/arch/arm/mach-ixp2000/enp2611.c b/arch/arm/mach-ixp2000/enp2611.c
index 643f5e1c3d9..7719c478aa8 100644
--- a/arch/arm/mach-ixp2000/enp2611.c
+++ b/arch/arm/mach-ixp2000/enp2611.c
@@ -64,6 +64,35 @@ static struct sys_timer enp2611_timer = {
 
 
 /*************************************************************************
+ * ENP-2611 I/O
+ *************************************************************************/
+static struct map_desc enp2611_io_desc[] __initdata = {
+	{
+		.virtual	= ENP2611_CALEB_VIRT_BASE,
+		.physical	= ENP2611_CALEB_PHYS_BASE,
+		.length		= ENP2611_CALEB_SIZE,
+		.type		= MT_IXP2000_DEVICE
+	}, {
+		.virtual	= ENP2611_PM3386_0_VIRT_BASE,
+		.physical	= ENP2611_PM3386_0_PHYS_BASE,
+		.length		= ENP2611_PM3386_0_SIZE,
+		.type		= MT_IXP2000_DEVICE
+	}, {
+		.virtual	= ENP2611_PM3386_1_VIRT_BASE,
+		.physical	= ENP2611_PM3386_1_PHYS_BASE,
+		.length		= ENP2611_PM3386_1_SIZE,
+		.type		= MT_IXP2000_DEVICE
+	}
+};
+
+void __init enp2611_map_io(void)
+{
+	ixp2000_map_io();
+	iotable_init(enp2611_io_desc, ARRAY_SIZE(enp2611_io_desc));
+}
+
+
+/*************************************************************************
  * ENP-2611 PCI
  *************************************************************************/
 static int enp2611_pci_setup(int nr, struct pci_sys_data *sys)
@@ -229,7 +258,7 @@ MACHINE_START(ENP2611, "Radisys ENP-2611 PCI network processor board")
 	.phys_io	= IXP2000_UART_PHYS_BASE,
 	.io_pg_offst	= ((IXP2000_UART_VIRT_BASE) >> 18) & 0xfffc,
 	.boot_params	= 0x00000100,
-	.map_io		= ixp2000_map_io,
+	.map_io		= enp2611_map_io,
 	.init_irq	= ixp2000_init_irq,
 	.timer		= &enp2611_timer,
 	.init_machine	= enp2611_init_machine,
diff --git a/arch/arm/mach-ixp2000/ixdp2x00.c b/arch/arm/mach-ixp2000/ixdp2x00.c
index 05dfcb48c2b..d628da56b4b 100644
--- a/arch/arm/mach-ixp2000/ixdp2x00.c
+++ b/arch/arm/mach-ixp2000/ixdp2x00.c
@@ -81,7 +81,7 @@ static void ixdp2x00_irq_mask(unsigned int irq)
 
 	dummy = *board_irq_mask;
 	dummy |=  IXP2000_BOARD_IRQ_MASK(irq);
-	ixp2000_reg_write(board_irq_mask, dummy);
+	ixp2000_reg_wrb(board_irq_mask, dummy);
 
 #ifdef CONFIG_ARCH_IXDP2400
 	if (machine_is_ixdp2400())
@@ -101,7 +101,7 @@ static void ixdp2x00_irq_unmask(unsigned int irq)
 
 	dummy = *board_irq_mask;
 	dummy &=  ~IXP2000_BOARD_IRQ_MASK(irq);
-	ixp2000_reg_write(board_irq_mask, dummy);
+	ixp2000_reg_wrb(board_irq_mask, dummy);
 
 	if (machine_is_ixdp2400()) 
 		ixp2000_release_slowport(&old_cfg);
diff --git a/arch/arm/mach-ixp2000/ixdp2x01.c b/arch/arm/mach-ixp2000/ixdp2x01.c
index b21249908ae..e6a882f35da 100644
--- a/arch/arm/mach-ixp2000/ixdp2x01.c
+++ b/arch/arm/mach-ixp2000/ixdp2x01.c
@@ -51,7 +51,7 @@
  *************************************************************************/
 static void ixdp2x01_irq_mask(unsigned int irq)
 {
-	ixp2000_reg_write(IXDP2X01_INT_MASK_SET_REG,
+	ixp2000_reg_wrb(IXDP2X01_INT_MASK_SET_REG,
 				IXP2000_BOARD_IRQ_MASK(irq));
 }
 
@@ -114,7 +114,7 @@ void __init ixdp2x01_init_irq(void)
 
 	/* Mask all interrupts from CPLD, disable simulation */
 	ixp2000_reg_write(IXDP2X01_INT_MASK_SET_REG, 0xffffffff);
-	ixp2000_reg_write(IXDP2X01_INT_SIM_REG, 0);
+	ixp2000_reg_wrb(IXDP2X01_INT_SIM_REG, 0);
 
 	for (irq = NR_IXP2000_IRQS; irq < NR_IXDP2X01_IRQS; irq++) {
 		if (irq & valid_irq_mask) {
@@ -299,7 +299,6 @@ struct hw_pci ixdp2x01_pci __initdata = {
 
 int __init ixdp2x01_pci_init(void)
 {
-
 	pci_common_init(&ixdp2x01_pci);
 	return 0;
 }
@@ -316,7 +315,7 @@ static struct flash_platform_data ixdp2x01_flash_platform_data = {
 
 static unsigned long ixdp2x01_flash_bank_setup(unsigned long ofs)
 {
-	ixp2000_reg_write(IXDP2X01_CPLD_FLASH_REG,
+	ixp2000_reg_wrb(IXDP2X01_CPLD_FLASH_REG,
 		((ofs >> IXDP2X01_FLASH_WINDOW_BITS) | IXDP2X01_CPLD_FLASH_INTERN));
 	return (ofs & IXDP2X01_FLASH_WINDOW_MASK);
 }
@@ -363,7 +362,7 @@ static struct platform_device *ixdp2x01_devices[] __initdata = {
 
 static void __init ixdp2x01_init_machine(void)
 {
-	ixp2000_reg_write(IXDP2X01_CPLD_FLASH_REG,
+	ixp2000_reg_wrb(IXDP2X01_CPLD_FLASH_REG,
 		(IXDP2X01_CPLD_FLASH_BANK_MASK | IXDP2X01_CPLD_FLASH_INTERN));
 	
 	ixdp2x01_flash_data.nr_banks =
diff --git a/arch/arm/mach-ixp2000/pci.c b/arch/arm/mach-ixp2000/pci.c
index 522205acb31..d4bf1e1c003 100644
--- a/arch/arm/mach-ixp2000/pci.c
+++ b/arch/arm/mach-ixp2000/pci.c
@@ -148,7 +148,7 @@ int ixp2000_pci_abort_handler(unsigned long addr, unsigned int fsr, struct pt_re
 	local_irq_save(flags);
 	temp = *(IXP2000_PCI_CONTROL);
 	if (temp & ((1 << 8) | (1 << 5))) {
-		ixp2000_reg_write(IXP2000_PCI_CONTROL, temp);
+		ixp2000_reg_wrb(IXP2000_PCI_CONTROL, temp);
 	}
 
 	temp = *(IXP2000_PCI_CMDSTAT);
@@ -178,8 +178,8 @@ clear_master_aborts(void)
 
 	local_irq_save(flags);
 	temp = *(IXP2000_PCI_CONTROL);
-	if (temp & ((1 << 8) | (1 << 5))) {	
-		ixp2000_reg_write(IXP2000_PCI_CONTROL, temp);
+	if (temp & ((1 << 8) | (1 << 5))) {
+		ixp2000_reg_wrb(IXP2000_PCI_CONTROL, temp);
 	}
 
 	temp = *(IXP2000_PCI_CMDSTAT);
diff --git a/arch/arm/mach-ixp2000/uengine.c b/arch/arm/mach-ixp2000/uengine.c
new file mode 100644
index 00000000000..43e234349d4
--- /dev/null
+++ b/arch/arm/mach-ixp2000/uengine.c
@@ -0,0 +1,474 @@
+/*
+ * Generic library functions for the microengines found on the Intel
+ * IXP2000 series of network processors.
+ *
+ * Copyright (C) 2004, 2005 Lennert Buytenhek <buytenh@wantstofly.org>
+ * Dedicated to Marija Kulikova.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <asm/hardware.h>
+#include <asm/arch/ixp2000-regs.h>
+#include <asm/arch/uengine.h>
+#include <asm/io.h>
+
+#define USTORE_ADDRESS			0x000
+#define USTORE_DATA_LOWER		0x004
+#define USTORE_DATA_UPPER		0x008
+#define CTX_ENABLES			0x018
+#define CC_ENABLE			0x01c
+#define CSR_CTX_POINTER			0x020
+#define INDIRECT_CTX_STS		0x040
+#define ACTIVE_CTX_STS			0x044
+#define INDIRECT_CTX_SIG_EVENTS		0x048
+#define INDIRECT_CTX_WAKEUP_EVENTS	0x050
+#define NN_PUT				0x080
+#define NN_GET				0x084
+#define TIMESTAMP_LOW			0x0c0
+#define TIMESTAMP_HIGH			0x0c4
+#define T_INDEX_BYTE_INDEX		0x0f4
+#define LOCAL_CSR_STATUS		0x180
+
+u32 ixp2000_uengine_mask;
+
+static void *ixp2000_uengine_csr_area(int uengine)
+{
+	return ((void *)IXP2000_UENGINE_CSR_VIRT_BASE) + (uengine << 10);
+}
+
+/*
+ * LOCAL_CSR_STATUS=1 after a read or write to a microengine's CSR
+ * space means that the microengine we tried to access was also trying
+ * to access its own CSR space on the same clock cycle as we did.  When
+ * this happens, we lose the arbitration process by default, and the
+ * read or write we tried to do was not actually performed, so we try
+ * again until it succeeds.
+ */
+u32 ixp2000_uengine_csr_read(int uengine, int offset)
+{
+	void *uebase;
+	u32 *local_csr_status;
+	u32 *reg;
+	u32 value;
+
+	uebase = ixp2000_uengine_csr_area(uengine);
+
+	local_csr_status = (u32 *)(uebase + LOCAL_CSR_STATUS);
+	reg = (u32 *)(uebase + offset);
+	do {
+		value = ixp2000_reg_read(reg);
+	} while (ixp2000_reg_read(local_csr_status) & 1);
+
+	return value;
+}
+EXPORT_SYMBOL(ixp2000_uengine_csr_read);
+
+void ixp2000_uengine_csr_write(int uengine, int offset, u32 value)
+{
+	void *uebase;
+	u32 *local_csr_status;
+	u32 *reg;
+
+	uebase = ixp2000_uengine_csr_area(uengine);
+
+	local_csr_status = (u32 *)(uebase + LOCAL_CSR_STATUS);
+	reg = (u32 *)(uebase + offset);
+	do {
+		ixp2000_reg_write(reg, value);
+	} while (ixp2000_reg_read(local_csr_status) & 1);
+}
+EXPORT_SYMBOL(ixp2000_uengine_csr_write);
+
+void ixp2000_uengine_reset(u32 uengine_mask)
+{
+	ixp2000_reg_write(IXP2000_RESET1, uengine_mask & ixp2000_uengine_mask);
+	ixp2000_reg_write(IXP2000_RESET1, 0);
+}
+EXPORT_SYMBOL(ixp2000_uengine_reset);
+
+void ixp2000_uengine_set_mode(int uengine, u32 mode)
+{
+	/*
+	 * CTL_STR_PAR_EN: unconditionally enable parity checking on
+	 * control store.
+	 */
+	mode |= 0x10000000;
+	ixp2000_uengine_csr_write(uengine, CTX_ENABLES, mode);
+
+	/*
+	 * Enable updating of condition codes.
+	 */
+	ixp2000_uengine_csr_write(uengine, CC_ENABLE, 0x00002000);
+
+	/*
+	 * Initialise other per-microengine registers.
+	 */
+	ixp2000_uengine_csr_write(uengine, NN_PUT, 0x00);
+	ixp2000_uengine_csr_write(uengine, NN_GET, 0x00);
+	ixp2000_uengine_csr_write(uengine, T_INDEX_BYTE_INDEX, 0);
+}
+EXPORT_SYMBOL(ixp2000_uengine_set_mode);
+
+static int make_even_parity(u32 x)
+{
+	return hweight32(x) & 1;
+}
+
+static void ustore_write(int uengine, u64 insn)
+{
+	/*
+	 * Generate even parity for top and bottom 20 bits.
+	 */
+	insn |= (u64)make_even_parity((insn >> 20) & 0x000fffff) << 41;
+	insn |= (u64)make_even_parity(insn & 0x000fffff) << 40;
+
+	/*
+	 * Write to microstore.  The second write auto-increments
+	 * the USTORE_ADDRESS index register.
+	 */
+	ixp2000_uengine_csr_write(uengine, USTORE_DATA_LOWER, (u32)insn);
+	ixp2000_uengine_csr_write(uengine, USTORE_DATA_UPPER, (u32)(insn >> 32));
+}
+
+void ixp2000_uengine_load_microcode(int uengine, u8 *ucode, int insns)
+{
+	int i;
+
+	/*
+	 * Start writing to microstore at address 0.
+	 */
+	ixp2000_uengine_csr_write(uengine, USTORE_ADDRESS, 0x80000000);
+	for (i = 0; i < insns; i++) {
+		u64 insn;
+
+		insn = (((u64)ucode[0]) << 32) |
+			(((u64)ucode[1]) << 24) |
+			(((u64)ucode[2]) << 16) |
+			(((u64)ucode[3]) << 8) |
+			((u64)ucode[4]);
+		ucode += 5;
+
+		ustore_write(uengine, insn);
+	}
+
+	/*
+ 	 * Pad with a few NOPs at the end (to avoid the microengine
+	 * aborting as it prefetches beyond the last instruction), unless
+	 * we run off the end of the instruction store first, at which
+	 * point the address register will wrap back to zero.
+	 */
+	for (i = 0; i < 4; i++) {
+		u32 addr;
+
+		addr = ixp2000_uengine_csr_read(uengine, USTORE_ADDRESS);
+		if (addr == 0x80000000)
+			break;
+		ustore_write(uengine, 0xf0000c0300ULL);
+	}
+
+	/*
+	 * End programming.
+	 */
+	ixp2000_uengine_csr_write(uengine, USTORE_ADDRESS, 0x00000000);
+}
+EXPORT_SYMBOL(ixp2000_uengine_load_microcode);
+
+void ixp2000_uengine_init_context(int uengine, int context, int pc)
+{
+	/*
+	 * Select the right context for indirect access.
+	 */
+	ixp2000_uengine_csr_write(uengine, CSR_CTX_POINTER, context);
+
+	/*
+	 * Initialise signal masks to immediately go to Ready state.
+	 */
+	ixp2000_uengine_csr_write(uengine, INDIRECT_CTX_SIG_EVENTS, 1);
+	ixp2000_uengine_csr_write(uengine, INDIRECT_CTX_WAKEUP_EVENTS, 1);
+
+	/*
+	 * Set program counter.
+	 */
+	ixp2000_uengine_csr_write(uengine, INDIRECT_CTX_STS, pc);
+}
+EXPORT_SYMBOL(ixp2000_uengine_init_context);
+
+void ixp2000_uengine_start_contexts(int uengine, u8 ctx_mask)
+{
+	u32 mask;
+
+	/*
+	 * Enable the specified context to go to Executing state.
+	 */
+	mask = ixp2000_uengine_csr_read(uengine, CTX_ENABLES);
+	mask |= ctx_mask << 8;
+	ixp2000_uengine_csr_write(uengine, CTX_ENABLES, mask);
+}
+EXPORT_SYMBOL(ixp2000_uengine_start_contexts);
+
+void ixp2000_uengine_stop_contexts(int uengine, u8 ctx_mask)
+{
+	u32 mask;
+
+	/*
+	 * Disable the Ready->Executing transition.  Note that this
+	 * does not stop the context until it voluntarily yields.
+	 */
+	mask = ixp2000_uengine_csr_read(uengine, CTX_ENABLES);
+	mask &= ~(ctx_mask << 8);
+	ixp2000_uengine_csr_write(uengine, CTX_ENABLES, mask);
+}
+EXPORT_SYMBOL(ixp2000_uengine_stop_contexts);
+
+static int check_ixp_type(struct ixp2000_uengine_code *c)
+{
+	u32 product_id;
+	u32 rev;
+
+	product_id = ixp2000_reg_read(IXP2000_PRODUCT_ID);
+	if (((product_id >> 16) & 0x1f) != 0)
+		return 0;
+
+	switch ((product_id >> 8) & 0xff) {
+	case 0:		/* IXP2800 */
+		if (!(c->cpu_model_bitmask & 4))
+			return 0;
+		break;
+
+	case 1:		/* IXP2850 */
+		if (!(c->cpu_model_bitmask & 8))
+			return 0;
+		break;
+
+	case 2:		/* IXP2400 */
+		if (!(c->cpu_model_bitmask & 2))
+			return 0;
+		break;
+
+	default:
+		return 0;
+	}
+
+	rev = product_id & 0xff;
+	if (rev < c->cpu_min_revision || rev > c->cpu_max_revision)
+		return 0;
+
+	return 1;
+}
+
+static void generate_ucode(u8 *ucode, u32 *gpr_a, u32 *gpr_b)
+{
+	int offset;
+	int i;
+
+	offset = 0;
+
+	for (i = 0; i < 128; i++) {
+		u8 b3;
+		u8 b2;
+		u8 b1;
+		u8 b0;
+
+		b3 = (gpr_a[i] >> 24) & 0xff;
+		b2 = (gpr_a[i] >> 16) & 0xff;
+		b1 = (gpr_a[i] >> 8) & 0xff;
+		b0 = gpr_a[i] & 0xff;
+
+		// immed[@ai, (b1 << 8) | b0]
+		// 11110000 0000VVVV VVVV11VV VVVVVV00 1IIIIIII
+		ucode[offset++] = 0xf0;
+		ucode[offset++] = (b1 >> 4);
+		ucode[offset++] = (b1 << 4) | 0x0c | (b0 >> 6);
+		ucode[offset++] = (b0 << 2);
+		ucode[offset++] = 0x80 | i;
+
+		// immed_w1[@ai, (b3 << 8) | b2]
+		// 11110100 0100VVVV VVVV11VV VVVVVV00 1IIIIIII
+		ucode[offset++] = 0xf4;
+		ucode[offset++] = 0x40 | (b3 >> 4);
+		ucode[offset++] = (b3 << 4) | 0x0c | (b2 >> 6);
+		ucode[offset++] = (b2 << 2);
+		ucode[offset++] = 0x80 | i;
+	}
+
+	for (i = 0; i < 128; i++) {
+		u8 b3;
+		u8 b2;
+		u8 b1;
+		u8 b0;
+
+		b3 = (gpr_b[i] >> 24) & 0xff;
+		b2 = (gpr_b[i] >> 16) & 0xff;
+		b1 = (gpr_b[i] >> 8) & 0xff;
+		b0 = gpr_b[i] & 0xff;
+
+		// immed[@bi, (b1 << 8) | b0]
+		// 11110000 0000VVVV VVVV001I IIIIII11 VVVVVVVV
+		ucode[offset++] = 0xf0;
+		ucode[offset++] = (b1 >> 4);
+		ucode[offset++] = (b1 << 4) | 0x02 | (i >> 6);
+		ucode[offset++] = (i << 2) | 0x03;
+		ucode[offset++] = b0;
+
+		// immed_w1[@bi, (b3 << 8) | b2]
+		// 11110100 0100VVVV VVVV001I IIIIII11 VVVVVVVV
+		ucode[offset++] = 0xf4;
+		ucode[offset++] = 0x40 | (b3 >> 4);
+		ucode[offset++] = (b3 << 4) | 0x02 | (i >> 6);
+		ucode[offset++] = (i << 2) | 0x03;
+		ucode[offset++] = b2;
+	}
+
+	// ctx_arb[kill]
+	ucode[offset++] = 0xe0;
+	ucode[offset++] = 0x00;
+	ucode[offset++] = 0x01;
+	ucode[offset++] = 0x00;
+	ucode[offset++] = 0x00;
+}
+
+static int set_initial_registers(int uengine, struct ixp2000_uengine_code *c)
+{
+	int per_ctx_regs;
+	u32 *gpr_a;
+	u32 *gpr_b;
+	u8 *ucode;
+	int i;
+
+	gpr_a = kmalloc(128 * sizeof(u32), GFP_KERNEL);
+	gpr_b = kmalloc(128 * sizeof(u32), GFP_KERNEL);
+	ucode = kmalloc(513 * 5, GFP_KERNEL);
+	if (gpr_a == NULL || gpr_b == NULL || ucode == NULL) {
+		kfree(ucode);
+		kfree(gpr_b);
+		kfree(gpr_a);
+		return 1;
+	}
+
+	per_ctx_regs = 16;
+	if (c->uengine_parameters & IXP2000_UENGINE_4_CONTEXTS)
+		per_ctx_regs = 32;
+
+	memset(gpr_a, 0, sizeof(gpr_a));
+	memset(gpr_b, 0, sizeof(gpr_b));
+	for (i = 0; i < 256; i++) {
+		struct ixp2000_reg_value *r = c->initial_reg_values + i;
+		u32 *bank;
+		int inc;
+		int j;
+
+		if (r->reg == -1)
+			break;
+
+		bank = (r->reg & 0x400) ? gpr_b : gpr_a;
+		inc = (r->reg & 0x80) ? 128 : per_ctx_regs;
+
+		j = r->reg & 0x7f;
+		while (j < 128) {
+			bank[j] = r->value;
+			j += inc;
+		}
+	}
+
+	generate_ucode(ucode, gpr_a, gpr_b);
+	ixp2000_uengine_load_microcode(uengine, ucode, 513);
+	ixp2000_uengine_init_context(uengine, 0, 0);
+	ixp2000_uengine_start_contexts(uengine, 0x01);
+	for (i = 0; i < 100; i++) {
+		u32 status;
+
+		status = ixp2000_uengine_csr_read(uengine, ACTIVE_CTX_STS);
+		if (!(status & 0x80000000))
+			break;
+	}
+	ixp2000_uengine_stop_contexts(uengine, 0x01);
+
+	kfree(ucode);
+	kfree(gpr_b);
+	kfree(gpr_a);
+
+	return !!(i == 100);
+}
+
+int ixp2000_uengine_load(int uengine, struct ixp2000_uengine_code *c)
+{
+	int ctx;
+
+	if (!check_ixp_type(c))
+		return 1;
+
+	if (!(ixp2000_uengine_mask & (1 << uengine)))
+		return 1;
+
+	ixp2000_uengine_reset(1 << uengine);
+	ixp2000_uengine_set_mode(uengine, c->uengine_parameters);
+	if (set_initial_registers(uengine, c))
+		return 1;
+	ixp2000_uengine_load_microcode(uengine, c->insns, c->num_insns);
+
+	for (ctx = 0; ctx < 8; ctx++)
+		ixp2000_uengine_init_context(uengine, ctx, 0);
+
+	return 0;
+}
+EXPORT_SYMBOL(ixp2000_uengine_load);
+
+
+static int __init ixp2000_uengine_init(void)
+{
+	int uengine;
+	u32 value;
+
+	/*
+	 * Determine number of microengines present.
+	 */
+	switch ((ixp2000_reg_read(IXP2000_PRODUCT_ID) >> 8) & 0x1fff) {
+	case 0:		/* IXP2800 */
+	case 1:		/* IXP2850 */
+		ixp2000_uengine_mask = 0x00ff00ff;
+		break;
+
+	case 2:		/* IXP2400 */
+		ixp2000_uengine_mask = 0x000f000f;
+		break;
+
+	default:
+		printk(KERN_INFO "Detected unknown IXP2000 model (%.8x)\n",
+			(unsigned int)ixp2000_reg_read(IXP2000_PRODUCT_ID));
+		ixp2000_uengine_mask = 0x00000000;
+		break;
+	}
+
+	/*
+	 * Reset microengines.
+	 */
+	ixp2000_reg_write(IXP2000_RESET1, ixp2000_uengine_mask);
+	ixp2000_reg_write(IXP2000_RESET1, 0);
+
+	/*
+	 * Synchronise timestamp counters across all microengines.
+	 */
+	value = ixp2000_reg_read(IXP2000_MISC_CONTROL);
+	ixp2000_reg_write(IXP2000_MISC_CONTROL, value & ~0x80);
+	for (uengine = 0; uengine < 32; uengine++) {
+		if (ixp2000_uengine_mask & (1 << uengine)) {
+			ixp2000_uengine_csr_write(uengine, TIMESTAMP_LOW, 0);
+			ixp2000_uengine_csr_write(uengine, TIMESTAMP_HIGH, 0);
+		}
+	}
+	ixp2000_reg_write(IXP2000_MISC_CONTROL, value | 0x80);
+
+	return 0;
+}
+
+subsys_initcall(ixp2000_uengine_init);
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index 1f6857d7747..9c6e77faec5 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -175,7 +175,7 @@ static struct platform_device sa1111_device = {
 static struct resource smc91x_resources[] = {
 	[0] = {
 		.name	= "smc91x-regs",
-		.start	= 0x0c000000,
+		.start	= 0x0c000c00,
 		.end	= 0x0c0fffff,
 		.flags	= IORESOURCE_MEM,
 	},
@@ -224,18 +224,75 @@ static struct pxafb_mach_info sharp_lm8v31 __initdata = {
 	.lccr3		= LCCR3_PCP | LCCR3_Acb(255),
 };
 
-static int lubbock_mci_init(struct device *dev, irqreturn_t (*lubbock_detect_int)(int, void *, struct pt_regs *), void *data)
+#define	MMC_POLL_RATE		msecs_to_jiffies(1000)
+
+static void lubbock_mmc_poll(unsigned long);
+static irqreturn_t (*mmc_detect_int)(int, void *, struct pt_regs *);
+
+static struct timer_list mmc_timer = {
+	.function	= lubbock_mmc_poll,
+};
+
+static void lubbock_mmc_poll(unsigned long data)
+{
+	unsigned long flags;
+
+	/* clear any previous irq state, then ... */
+	local_irq_save(flags);
+	LUB_IRQ_SET_CLR &= ~(1 << 0);
+	local_irq_restore(flags);
+
+	/* poll until mmc/sd card is removed */
+	if (LUB_IRQ_SET_CLR & (1 << 0))
+		mod_timer(&mmc_timer, jiffies + MMC_POLL_RATE);
+	else {
+		(void) mmc_detect_int(LUBBOCK_SD_IRQ, (void *)data, NULL);
+		enable_irq(LUBBOCK_SD_IRQ);
+	}
+}
+
+static irqreturn_t lubbock_detect_int(int irq, void *data, struct pt_regs *regs)
+{
+	/* IRQ is level triggered; disable, and poll for removal */
+	disable_irq(irq);
+	mod_timer(&mmc_timer, jiffies + MMC_POLL_RATE);
+
+	return mmc_detect_int(irq, data, regs);
+}
+
+static int lubbock_mci_init(struct device *dev,
+		irqreturn_t (*detect_int)(int, void *, struct pt_regs *),
+		void *data)
 {
 	/* setup GPIO for PXA25x MMC controller	*/
 	pxa_gpio_mode(GPIO6_MMCCLK_MD);
 	pxa_gpio_mode(GPIO8_MMCCS0_MD);
 
-	return 0;
+	/* detect card insert/eject */
+	mmc_detect_int = detect_int;
+	init_timer(&mmc_timer);
+	mmc_timer.data = (unsigned long) data;
+	return request_irq(LUBBOCK_SD_IRQ, lubbock_detect_int,
+			SA_SAMPLE_RANDOM, "lubbock-sd-detect", data);
+}
+
+static int lubbock_mci_get_ro(struct device *dev)
+{
+	return (LUB_MISC_RD & (1 << 2)) != 0;
+}
+
+static void lubbock_mci_exit(struct device *dev, void *data)
+{
+	free_irq(LUBBOCK_SD_IRQ, data);
+	del_timer_sync(&mmc_timer);
 }
 
 static struct pxamci_platform_data lubbock_mci_platform_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
+	.detect_delay	= 1,
 	.init 		= lubbock_mci_init,
+	.get_ro		= lubbock_mci_get_ro,
+	.exit 		= lubbock_mci_exit,
 };
 
 static void lubbock_irda_transceiver_mode(struct device *dev, int mode)
diff --git a/arch/arm/mach-realview/Kconfig b/arch/arm/mach-realview/Kconfig
new file mode 100644
index 00000000000..4b63dc9eabf
--- /dev/null
+++ b/arch/arm/mach-realview/Kconfig
@@ -0,0 +1,11 @@
+menu "RealView platform type"
+	depends on ARCH_REALVIEW
+
+config MACH_REALVIEW_EB
+	bool "Support RealView/EB platform"
+	default n
+	select ARM_GIC
+	help
+	  Include support for the ARM(R) RealView Emulation Baseboard platform.
+
+endmenu
diff --git a/arch/arm/mach-realview/Makefile b/arch/arm/mach-realview/Makefile
new file mode 100644
index 00000000000..8d37ea1605f
--- /dev/null
+++ b/arch/arm/mach-realview/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the linux kernel.
+#
+
+obj-y					:= core.o clock.o
+obj-$(CONFIG_MACH_REALVIEW_EB)		+= realview_eb.o
diff --git a/arch/arm/mach-realview/Makefile.boot b/arch/arm/mach-realview/Makefile.boot
new file mode 100644
index 00000000000..c7e75acfe6c
--- /dev/null
+++ b/arch/arm/mach-realview/Makefile.boot
@@ -0,0 +1,4 @@
+   zreladdr-y	:= 0x00008000
+params_phys-y	:= 0x00000100
+initrd_phys-y	:= 0x00800000
+
diff --git a/arch/arm/mach-realview/clock.c b/arch/arm/mach-realview/clock.c
new file mode 100644
index 00000000000..002635c97bb
--- /dev/null
+++ b/arch/arm/mach-realview/clock.c
@@ -0,0 +1,145 @@
+/*
+ *  linux/arch/arm/mach-realview/clock.c
+ *
+ *  Copyright (C) 2004 ARM Limited.
+ *  Written by Deep Blue Solutions Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+
+#include <asm/semaphore.h>
+#include <asm/hardware/clock.h>
+#include <asm/hardware/icst307.h>
+
+#include "clock.h"
+
+static LIST_HEAD(clocks);
+static DECLARE_MUTEX(clocks_sem);
+
+struct clk *clk_get(struct device *dev, const char *id)
+{
+	struct clk *p, *clk = ERR_PTR(-ENOENT);
+
+	down(&clocks_sem);
+	list_for_each_entry(p, &clocks, node) {
+		if (strcmp(id, p->name) == 0 && try_module_get(p->owner)) {
+			clk = p;
+			break;
+		}
+	}
+	up(&clocks_sem);
+
+	return clk;
+}
+EXPORT_SYMBOL(clk_get);
+
+void clk_put(struct clk *clk)
+{
+	module_put(clk->owner);
+}
+EXPORT_SYMBOL(clk_put);
+
+int clk_enable(struct clk *clk)
+{
+	return 0;
+}
+EXPORT_SYMBOL(clk_enable);
+
+void clk_disable(struct clk *clk)
+{
+}
+EXPORT_SYMBOL(clk_disable);
+
+int clk_use(struct clk *clk)
+{
+	return 0;
+}
+EXPORT_SYMBOL(clk_use);
+
+void clk_unuse(struct clk *clk)
+{
+}
+EXPORT_SYMBOL(clk_unuse);
+
+unsigned long clk_get_rate(struct clk *clk)
+{
+	return clk->rate;
+}
+EXPORT_SYMBOL(clk_get_rate);
+
+long clk_round_rate(struct clk *clk, unsigned long rate)
+{
+	return rate;
+}
+EXPORT_SYMBOL(clk_round_rate);
+
+int clk_set_rate(struct clk *clk, unsigned long rate)
+{
+	int ret = -EIO;
+
+	if (clk->setvco) {
+		struct icst307_vco vco;
+
+		vco = icst307_khz_to_vco(clk->params, rate / 1000);
+		clk->rate = icst307_khz(clk->params, vco) * 1000;
+
+		printk("Clock %s: setting VCO reg params: S=%d R=%d V=%d\n",
+			clk->name, vco.s, vco.r, vco.v);
+
+		clk->setvco(clk, vco);
+		ret = 0;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(clk_set_rate);
+
+/*
+ * These are fixed clocks.
+ */
+static struct clk kmi_clk = {
+	.name	= "KMIREFCLK",
+	.rate	= 24000000,
+};
+
+static struct clk uart_clk = {
+	.name	= "UARTCLK",
+	.rate	= 24000000,
+};
+
+static struct clk mmci_clk = {
+	.name	= "MCLK",
+	.rate	= 33000000,
+};
+
+int clk_register(struct clk *clk)
+{
+	down(&clocks_sem);
+	list_add(&clk->node, &clocks);
+	up(&clocks_sem);
+	return 0;
+}
+EXPORT_SYMBOL(clk_register);
+
+void clk_unregister(struct clk *clk)
+{
+	down(&clocks_sem);
+	list_del(&clk->node);
+	up(&clocks_sem);
+}
+EXPORT_SYMBOL(clk_unregister);
+
+static int __init clk_init(void)
+{
+	clk_register(&kmi_clk);
+	clk_register(&uart_clk);
+	clk_register(&mmci_clk);
+	return 0;
+}
+arch_initcall(clk_init);
diff --git a/arch/arm/mach-realview/clock.h b/arch/arm/mach-realview/clock.h
new file mode 100644
index 00000000000..dadba695e18
--- /dev/null
+++ b/arch/arm/mach-realview/clock.h
@@ -0,0 +1,25 @@
+/*
+ *  linux/arch/arm/mach-realview/clock.h
+ *
+ *  Copyright (C) 2004 ARM Limited.
+ *  Written by Deep Blue Solutions Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+struct module;
+struct icst307_params;
+
+struct clk {
+	struct list_head	node;
+	unsigned long		rate;
+	struct module		*owner;
+	const char		*name;
+	const struct icst307_params *params;
+	void			*data;
+	void			(*setvco)(struct clk *, struct icst307_vco vco);
+};
+
+int clk_register(struct clk *clk);
+void clk_unregister(struct clk *clk);
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
new file mode 100644
index 00000000000..482eb512ebe
--- /dev/null
+++ b/arch/arm/mach-realview/core.c
@@ -0,0 +1,605 @@
+/*
+ *  linux/arch/arm/mach-realview/core.c
+ *
+ *  Copyright (C) 1999 - 2003 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/sysdev.h>
+#include <linux/interrupt.h>
+
+#include <asm/system.h>
+#include <asm/hardware.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/leds.h>
+#include <asm/mach-types.h>
+#include <asm/hardware/amba.h>
+#include <asm/hardware/amba_clcd.h>
+#include <asm/hardware/arm_timer.h>
+#include <asm/hardware/icst307.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/flash.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/time.h>
+#include <asm/mach/map.h>
+#include <asm/mach/mmc.h>
+
+#include <asm/hardware/gic.h>
+
+#include "core.h"
+#include "clock.h"
+
+#define REALVIEW_REFCOUNTER	(__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_24MHz_OFFSET)
+
+/*
+ * This is the RealView sched_clock implementation.  This has
+ * a resolution of 41.7ns, and a maximum value of about 179s.
+ */
+unsigned long long sched_clock(void)
+{
+	unsigned long long v;
+
+	v = (unsigned long long)readl(REALVIEW_REFCOUNTER) * 125;
+	do_div(v, 3);
+
+	return v;
+}
+
+
+#define REALVIEW_FLASHCTRL    (__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_FLASH_OFFSET)
+
+static int realview_flash_init(void)
+{
+	u32 val;
+
+	val = __raw_readl(REALVIEW_FLASHCTRL);
+	val &= ~REALVIEW_FLASHPROG_FLVPPEN;
+	__raw_writel(val, REALVIEW_FLASHCTRL);
+
+	return 0;
+}
+
+static void realview_flash_exit(void)
+{
+	u32 val;
+
+	val = __raw_readl(REALVIEW_FLASHCTRL);
+	val &= ~REALVIEW_FLASHPROG_FLVPPEN;
+	__raw_writel(val, REALVIEW_FLASHCTRL);
+}
+
+static void realview_flash_set_vpp(int on)
+{
+	u32 val;
+
+	val = __raw_readl(REALVIEW_FLASHCTRL);
+	if (on)
+		val |= REALVIEW_FLASHPROG_FLVPPEN;
+	else
+		val &= ~REALVIEW_FLASHPROG_FLVPPEN;
+	__raw_writel(val, REALVIEW_FLASHCTRL);
+}
+
+static struct flash_platform_data realview_flash_data = {
+	.map_name		= "cfi_probe",
+	.width			= 4,
+	.init			= realview_flash_init,
+	.exit			= realview_flash_exit,
+	.set_vpp		= realview_flash_set_vpp,
+};
+
+static struct resource realview_flash_resource = {
+	.start			= REALVIEW_FLASH_BASE,
+	.end			= REALVIEW_FLASH_BASE + REALVIEW_FLASH_SIZE,
+	.flags			= IORESOURCE_MEM,
+};
+
+struct platform_device realview_flash_device = {
+	.name			= "armflash",
+	.id			= 0,
+	.dev			= {
+		.platform_data	= &realview_flash_data,
+	},
+	.num_resources		= 1,
+	.resource		= &realview_flash_resource,
+};
+
+static struct resource realview_smc91x_resources[] = {
+	[0] = {
+		.start		= REALVIEW_ETH_BASE,
+		.end		= REALVIEW_ETH_BASE + SZ_64K - 1,
+		.flags		= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start		= IRQ_ETH,
+		.end		= IRQ_ETH,
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+struct platform_device realview_smc91x_device = {
+	.name		= "smc91x",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(realview_smc91x_resources),
+	.resource	= realview_smc91x_resources,
+};
+
+#define REALVIEW_SYSMCI	(__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_MCI_OFFSET)
+
+static unsigned int realview_mmc_status(struct device *dev)
+{
+	struct amba_device *adev = container_of(dev, struct amba_device, dev);
+	u32 mask;
+
+	if (adev->res.start == REALVIEW_MMCI0_BASE)
+		mask = 1;
+	else
+		mask = 2;
+
+	return readl(REALVIEW_SYSMCI) & mask;
+}
+
+struct mmc_platform_data realview_mmc0_plat_data = {
+	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
+	.status		= realview_mmc_status,
+};
+
+struct mmc_platform_data realview_mmc1_plat_data = {
+	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
+	.status		= realview_mmc_status,
+};
+
+/*
+ * Clock handling
+ */
+static const struct icst307_params realview_oscvco_params = {
+	.ref		= 24000,
+	.vco_max	= 200000,
+	.vd_min		= 4 + 8,
+	.vd_max		= 511 + 8,
+	.rd_min		= 1 + 2,
+	.rd_max		= 127 + 2,
+};
+
+static void realview_oscvco_set(struct clk *clk, struct icst307_vco vco)
+{
+	void __iomem *sys_lock = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_LOCK_OFFSET;
+	void __iomem *sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC1_OFFSET;
+	u32 val;
+
+	val = readl(sys_osc) & ~0x7ffff;
+	val |= vco.v | (vco.r << 9) | (vco.s << 16);
+
+	writel(0xa05f, sys_lock);
+	writel(val, sys_osc);
+	writel(0, sys_lock);
+}
+
+struct clk realview_clcd_clk = {
+	.name	= "CLCDCLK",
+	.params	= &realview_oscvco_params,
+	.setvco = realview_oscvco_set,
+};
+
+/*
+ * CLCD support.
+ */
+#define SYS_CLCD_MODE_MASK	(3 << 0)
+#define SYS_CLCD_MODE_888	(0 << 0)
+#define SYS_CLCD_MODE_5551	(1 << 0)
+#define SYS_CLCD_MODE_565_RLSB	(2 << 0)
+#define SYS_CLCD_MODE_565_BLSB	(3 << 0)
+#define SYS_CLCD_NLCDIOON	(1 << 2)
+#define SYS_CLCD_VDDPOSSWITCH	(1 << 3)
+#define SYS_CLCD_PWR3V5SWITCH	(1 << 4)
+#define SYS_CLCD_ID_MASK	(0x1f << 8)
+#define SYS_CLCD_ID_SANYO_3_8	(0x00 << 8)
+#define SYS_CLCD_ID_UNKNOWN_8_4	(0x01 << 8)
+#define SYS_CLCD_ID_EPSON_2_2	(0x02 << 8)
+#define SYS_CLCD_ID_SANYO_2_5	(0x07 << 8)
+#define SYS_CLCD_ID_VGA		(0x1f << 8)
+
+static struct clcd_panel vga = {
+	.mode		= {
+		.name		= "VGA",
+		.refresh	= 60,
+		.xres		= 640,
+		.yres		= 480,
+		.pixclock	= 39721,
+		.left_margin	= 40,
+		.right_margin	= 24,
+		.upper_margin	= 32,
+		.lower_margin	= 11,
+		.hsync_len	= 96,
+		.vsync_len	= 2,
+		.sync		= 0,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+	.width		= -1,
+	.height		= -1,
+	.tim2		= TIM2_BCD | TIM2_IPC,
+	.cntl		= CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+	.bpp		= 16,
+};
+
+static struct clcd_panel sanyo_3_8_in = {
+	.mode		= {
+		.name		= "Sanyo QVGA",
+		.refresh	= 116,
+		.xres		= 320,
+		.yres		= 240,
+		.pixclock	= 100000,
+		.left_margin	= 6,
+		.right_margin	= 6,
+		.upper_margin	= 5,
+		.lower_margin	= 5,
+		.hsync_len	= 6,
+		.vsync_len	= 6,
+		.sync		= 0,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+	.width		= -1,
+	.height		= -1,
+	.tim2		= TIM2_BCD,
+	.cntl		= CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+	.bpp		= 16,
+};
+
+static struct clcd_panel sanyo_2_5_in = {
+	.mode		= {
+		.name		= "Sanyo QVGA Portrait",
+		.refresh	= 116,
+		.xres		= 240,
+		.yres		= 320,
+		.pixclock	= 100000,
+		.left_margin	= 20,
+		.right_margin	= 10,
+		.upper_margin	= 2,
+		.lower_margin	= 2,
+		.hsync_len	= 10,
+		.vsync_len	= 2,
+		.sync		= FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+	.width		= -1,
+	.height		= -1,
+	.tim2		= TIM2_IVS | TIM2_IHS | TIM2_IPC,
+	.cntl		= CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+	.bpp		= 16,
+};
+
+static struct clcd_panel epson_2_2_in = {
+	.mode		= {
+		.name		= "Epson QCIF",
+		.refresh	= 390,
+		.xres		= 176,
+		.yres		= 220,
+		.pixclock	= 62500,
+		.left_margin	= 3,
+		.right_margin	= 2,
+		.upper_margin	= 1,
+		.lower_margin	= 0,
+		.hsync_len	= 3,
+		.vsync_len	= 2,
+		.sync		= 0,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+	.width		= -1,
+	.height		= -1,
+	.tim2		= TIM2_BCD | TIM2_IPC,
+	.cntl		= CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+	.bpp		= 16,
+};
+
+/*
+ * Detect which LCD panel is connected, and return the appropriate
+ * clcd_panel structure.  Note: we do not have any information on
+ * the required timings for the 8.4in panel, so we presently assume
+ * VGA timings.
+ */
+static struct clcd_panel *realview_clcd_panel(void)
+{
+	void __iomem *sys_clcd = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_CLCD_OFFSET;
+	struct clcd_panel *panel = &vga;
+	u32 val;
+
+	val = readl(sys_clcd) & SYS_CLCD_ID_MASK;
+	if (val == SYS_CLCD_ID_SANYO_3_8)
+		panel = &sanyo_3_8_in;
+	else if (val == SYS_CLCD_ID_SANYO_2_5)
+		panel = &sanyo_2_5_in;
+	else if (val == SYS_CLCD_ID_EPSON_2_2)
+		panel = &epson_2_2_in;
+	else if (val == SYS_CLCD_ID_VGA)
+		panel = &vga;
+	else {
+		printk(KERN_ERR "CLCD: unknown LCD panel ID 0x%08x, using VGA\n",
+			val);
+		panel = &vga;
+	}
+
+	return panel;
+}
+
+/*
+ * Disable all display connectors on the interface module.
+ */
+static void realview_clcd_disable(struct clcd_fb *fb)
+{
+	void __iomem *sys_clcd = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_CLCD_OFFSET;
+	u32 val;
+
+	val = readl(sys_clcd);
+	val &= ~SYS_CLCD_NLCDIOON | SYS_CLCD_PWR3V5SWITCH;
+	writel(val, sys_clcd);
+}
+
+/*
+ * Enable the relevant connector on the interface module.
+ */
+static void realview_clcd_enable(struct clcd_fb *fb)
+{
+	void __iomem *sys_clcd = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_CLCD_OFFSET;
+	u32 val;
+
+	val = readl(sys_clcd);
+	val &= ~SYS_CLCD_MODE_MASK;
+
+	switch (fb->fb.var.green.length) {
+	case 5:
+		val |= SYS_CLCD_MODE_5551;
+		break;
+	case 6:
+		val |= SYS_CLCD_MODE_565_RLSB;
+		break;
+	case 8:
+		val |= SYS_CLCD_MODE_888;
+		break;
+	}
+
+	/*
+	 * Set the MUX
+	 */
+	writel(val, sys_clcd);
+
+	/*
+	 * And now enable the PSUs
+	 */
+	val |= SYS_CLCD_NLCDIOON | SYS_CLCD_PWR3V5SWITCH;
+	writel(val, sys_clcd);
+}
+
+static unsigned long framesize = SZ_1M;
+
+static int realview_clcd_setup(struct clcd_fb *fb)
+{
+	dma_addr_t dma;
+
+	fb->panel		= realview_clcd_panel();
+
+	fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize,
+						    &dma, GFP_KERNEL);
+	if (!fb->fb.screen_base) {
+		printk(KERN_ERR "CLCD: unable to map framebuffer\n");
+		return -ENOMEM;
+	}
+
+	fb->fb.fix.smem_start	= dma;
+	fb->fb.fix.smem_len	= framesize;
+
+	return 0;
+}
+
+static int realview_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
+{
+	return dma_mmap_writecombine(&fb->dev->dev, vma,
+				     fb->fb.screen_base,
+				     fb->fb.fix.smem_start,
+				     fb->fb.fix.smem_len);
+}
+
+static void realview_clcd_remove(struct clcd_fb *fb)
+{
+	dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
+			      fb->fb.screen_base, fb->fb.fix.smem_start);
+}
+
+struct clcd_board clcd_plat_data = {
+	.name		= "RealView",
+	.check		= clcdfb_check,
+	.decode		= clcdfb_decode,
+	.disable	= realview_clcd_disable,
+	.enable		= realview_clcd_enable,
+	.setup		= realview_clcd_setup,
+	.mmap		= realview_clcd_mmap,
+	.remove		= realview_clcd_remove,
+};
+
+#ifdef CONFIG_LEDS
+#define VA_LEDS_BASE (__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_LED_OFFSET)
+
+void realview_leds_event(led_event_t ledevt)
+{
+	unsigned long flags;
+	u32 val;
+
+	local_irq_save(flags);
+	val = readl(VA_LEDS_BASE);
+
+	switch (ledevt) {
+	case led_idle_start:
+		val = val & ~REALVIEW_SYS_LED0;
+		break;
+
+	case led_idle_end:
+		val = val | REALVIEW_SYS_LED0;
+		break;
+
+	case led_timer:
+		val = val ^ REALVIEW_SYS_LED1;
+		break;
+
+	case led_halted:
+		val = 0;
+		break;
+
+	default:
+		break;
+	}
+
+	writel(val, VA_LEDS_BASE);
+	local_irq_restore(flags);
+}
+#endif	/* CONFIG_LEDS */
+
+/*
+ * Where is the timer (VA)?
+ */
+#define TIMER0_VA_BASE		 __io_address(REALVIEW_TIMER0_1_BASE)
+#define TIMER1_VA_BASE		(__io_address(REALVIEW_TIMER0_1_BASE) + 0x20)
+#define TIMER2_VA_BASE		 __io_address(REALVIEW_TIMER2_3_BASE)
+#define TIMER3_VA_BASE		(__io_address(REALVIEW_TIMER2_3_BASE) + 0x20)
+
+/*
+ * How long is the timer interval?
+ */
+#define TIMER_INTERVAL	(TICKS_PER_uSEC * mSEC_10)
+#if TIMER_INTERVAL >= 0x100000
+#define TIMER_RELOAD	(TIMER_INTERVAL >> 8)
+#define TIMER_DIVISOR	(TIMER_CTRL_DIV256)
+#define TICKS2USECS(x)	(256 * (x) / TICKS_PER_uSEC)
+#elif TIMER_INTERVAL >= 0x10000
+#define TIMER_RELOAD	(TIMER_INTERVAL >> 4)		/* Divide by 16 */
+#define TIMER_DIVISOR	(TIMER_CTRL_DIV16)
+#define TICKS2USECS(x)	(16 * (x) / TICKS_PER_uSEC)
+#else
+#define TIMER_RELOAD	(TIMER_INTERVAL)
+#define TIMER_DIVISOR	(TIMER_CTRL_DIV1)
+#define TICKS2USECS(x)	((x) / TICKS_PER_uSEC)
+#endif
+
+/*
+ * Returns number of ms since last clock interrupt.  Note that interrupts
+ * will have been disabled by do_gettimeoffset()
+ */
+static unsigned long realview_gettimeoffset(void)
+{
+	unsigned long ticks1, ticks2, status;
+
+	/*
+	 * Get the current number of ticks.  Note that there is a race
+	 * condition between us reading the timer and checking for
+	 * an interrupt.  We get around this by ensuring that the
+	 * counter has not reloaded between our two reads.
+	 */
+	ticks2 = readl(TIMER0_VA_BASE + TIMER_VALUE) & 0xffff;
+	do {
+		ticks1 = ticks2;
+		status = __raw_readl(__io_address(REALVIEW_GIC_DIST_BASE + GIC_DIST_PENDING_SET)
+				     + ((IRQ_TIMERINT0_1 >> 5) << 2));
+		ticks2 = readl(TIMER0_VA_BASE + TIMER_VALUE) & 0xffff;
+	} while (ticks2 > ticks1);
+
+	/*
+	 * Number of ticks since last interrupt.
+	 */
+	ticks1 = TIMER_RELOAD - ticks2;
+
+	/*
+	 * Interrupt pending?  If so, we've reloaded once already.
+	 *
+	 * FIXME: Need to check this is effectively timer 0 that expires
+	 */
+	if (status & IRQMASK_TIMERINT0_1)
+		ticks1 += TIMER_RELOAD;
+
+	/*
+	 * Convert the ticks to usecs
+	 */
+	return TICKS2USECS(ticks1);
+}
+
+/*
+ * IRQ handler for the timer
+ */
+static irqreturn_t realview_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	write_seqlock(&xtime_lock);
+
+	// ...clear the interrupt
+	writel(1, TIMER0_VA_BASE + TIMER_INTCLR);
+
+	timer_tick(regs);
+
+	write_sequnlock(&xtime_lock);
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction realview_timer_irq = {
+	.name		= "RealView Timer Tick",
+	.flags		= SA_INTERRUPT | SA_TIMER,
+	.handler	= realview_timer_interrupt,
+};
+
+/*
+ * Set up timer interrupt, and return the current time in seconds.
+ */
+static void __init realview_timer_init(void)
+{
+	u32 val;
+
+	/* 
+	 * set clock frequency: 
+	 *	REALVIEW_REFCLK is 32KHz
+	 *	REALVIEW_TIMCLK is 1MHz
+	 */
+	val = readl(__io_address(REALVIEW_SCTL_BASE));
+	writel((REALVIEW_TIMCLK << REALVIEW_TIMER1_EnSel) |
+	       (REALVIEW_TIMCLK << REALVIEW_TIMER2_EnSel) | 
+	       (REALVIEW_TIMCLK << REALVIEW_TIMER3_EnSel) |
+	       (REALVIEW_TIMCLK << REALVIEW_TIMER4_EnSel) | val,
+	       __io_address(REALVIEW_SCTL_BASE));
+
+	/*
+	 * Initialise to a known state (all timers off)
+	 */
+	writel(0, TIMER0_VA_BASE + TIMER_CTRL);
+	writel(0, TIMER1_VA_BASE + TIMER_CTRL);
+	writel(0, TIMER2_VA_BASE + TIMER_CTRL);
+	writel(0, TIMER3_VA_BASE + TIMER_CTRL);
+
+	writel(TIMER_RELOAD, TIMER0_VA_BASE + TIMER_LOAD);
+	writel(TIMER_RELOAD, TIMER0_VA_BASE + TIMER_VALUE);
+	writel(TIMER_DIVISOR | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC |
+	       TIMER_CTRL_IE, TIMER0_VA_BASE + TIMER_CTRL);
+
+	/* 
+	 * Make irqs happen for the system timer
+	 */
+	setup_irq(IRQ_TIMERINT0_1, &realview_timer_irq);
+}
+
+struct sys_timer realview_timer = {
+	.init		= realview_timer_init,
+	.offset		= realview_gettimeoffset,
+};
diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h
new file mode 100644
index 00000000000..575599db74d
--- /dev/null
+++ b/arch/arm/mach-realview/core.h
@@ -0,0 +1,118 @@
+/*
+ *  linux/arch/arm/mach-realview/core.h
+ *
+ *  Copyright (C) 2004 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __ASM_ARCH_REALVIEW_H
+#define __ASM_ARCH_REALVIEW_H
+
+#include <asm/hardware/amba.h>
+#include <asm/io.h>
+
+#define __io_address(n)		__io(IO_ADDRESS(n))
+
+extern struct sys_timer realview_timer;
+
+#define AMBA_DEVICE(name,busid,base,plat)			\
+static struct amba_device name##_device = {			\
+	.dev		= {					\
+		.coherent_dma_mask = ~0,			\
+		.bus_id	= busid,				\
+		.platform_data = plat,				\
+	},							\
+	.res		= {					\
+		.start	= REALVIEW_##base##_BASE,		\
+		.end	= (REALVIEW_##base##_BASE) + SZ_4K - 1,\
+		.flags	= IORESOURCE_MEM,			\
+	},							\
+	.dma_mask	= ~0,					\
+	.irq		= base##_IRQ,				\
+	/* .dma		= base##_DMA,*/				\
+}
+
+/*
+ * These devices are connected via the core APB bridge
+ */
+#define GPIO2_IRQ	{ IRQ_GPIOINT2, NO_IRQ }
+#define GPIO2_DMA	{ 0, 0 }
+#define GPIO3_IRQ	{ IRQ_GPIOINT3, NO_IRQ }
+#define GPIO3_DMA	{ 0, 0 }
+
+#define AACI_IRQ	{ IRQ_AACI, NO_IRQ }
+#define AACI_DMA	{ 0x80, 0x81 }
+#define MMCI0_IRQ	{ IRQ_MMCI0A,IRQ_MMCI0B }
+#define MMCI0_DMA	{ 0x84, 0 }
+#define KMI0_IRQ	{ IRQ_KMI0, NO_IRQ }
+#define KMI0_DMA	{ 0, 0 }
+#define KMI1_IRQ	{ IRQ_KMI1, NO_IRQ }
+#define KMI1_DMA	{ 0, 0 }
+
+/*
+ * These devices are connected directly to the multi-layer AHB switch
+ */
+#define SMC_IRQ		{ NO_IRQ, NO_IRQ }
+#define SMC_DMA		{ 0, 0 }
+#define MPMC_IRQ	{ NO_IRQ, NO_IRQ }
+#define MPMC_DMA	{ 0, 0 }
+#define CLCD_IRQ	{ IRQ_CLCDINT, NO_IRQ }
+#define CLCD_DMA	{ 0, 0 }
+#define DMAC_IRQ	{ IRQ_DMAINT, NO_IRQ }
+#define DMAC_DMA	{ 0, 0 }
+
+/*
+ * These devices are connected via the core APB bridge
+ */
+#define SCTL_IRQ	{ NO_IRQ, NO_IRQ }
+#define SCTL_DMA	{ 0, 0 }
+#define WATCHDOG_IRQ	{ IRQ_WDOGINT, NO_IRQ }
+#define WATCHDOG_DMA	{ 0, 0 }
+#define GPIO0_IRQ	{ IRQ_GPIOINT0, NO_IRQ }
+#define GPIO0_DMA	{ 0, 0 }
+#define GPIO1_IRQ	{ IRQ_GPIOINT1, NO_IRQ }
+#define GPIO1_DMA	{ 0, 0 }
+#define RTC_IRQ		{ IRQ_RTCINT, NO_IRQ }
+#define RTC_DMA		{ 0, 0 }
+
+/*
+ * These devices are connected via the DMA APB bridge
+ */
+#define SCI_IRQ		{ IRQ_SCIINT, NO_IRQ }
+#define SCI_DMA		{ 7, 6 }
+#define UART0_IRQ	{ IRQ_UARTINT0, NO_IRQ }
+#define UART0_DMA	{ 15, 14 }
+#define UART1_IRQ	{ IRQ_UARTINT1, NO_IRQ }
+#define UART1_DMA	{ 13, 12 }
+#define UART2_IRQ	{ IRQ_UARTINT2, NO_IRQ }
+#define UART2_DMA	{ 11, 10 }
+#define UART3_IRQ	{ IRQ_UART3, NO_IRQ }
+#define UART3_DMA	{ 0x86, 0x87 }
+#define SSP_IRQ		{ IRQ_SSPINT, NO_IRQ }
+#define SSP_DMA		{ 9, 8 }
+
+
+extern struct platform_device realview_flash_device;
+extern struct platform_device realview_smc91x_device;
+extern struct mmc_platform_data realview_mmc0_plat_data;
+extern struct mmc_platform_data realview_mmc1_plat_data;
+extern struct clk realview_clcd_clk;
+extern struct clcd_board clcd_plat_data;
+
+extern void realview_leds_event(led_event_t ledevt);
+
+#endif
diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
new file mode 100644
index 00000000000..267bb07e39b
--- /dev/null
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -0,0 +1,172 @@
+/*
+ *  linux/arch/arm/mach-realview/realview_eb.c
+ *
+ *  Copyright (C) 2004 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/sysdev.h>
+
+#include <asm/hardware.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/leds.h>
+#include <asm/mach-types.h>
+#include <asm/hardware/gic.h>
+#include <asm/hardware/amba.h>
+#include <asm/hardware/icst307.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/mmc.h>
+
+#include <asm/arch/irqs.h>
+
+#include "core.h"
+#include "clock.h"
+
+static struct map_desc realview_eb_io_desc[] __initdata = {
+	{
+		.virtual	= IO_ADDRESS(REALVIEW_SYS_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_SYS_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	}, {
+		.virtual	= IO_ADDRESS(REALVIEW_GIC_CPU_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_GIC_CPU_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	}, {
+		.virtual	= IO_ADDRESS(REALVIEW_GIC_DIST_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_GIC_DIST_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	}, {
+		.virtual	= IO_ADDRESS(REALVIEW_SCTL_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_SCTL_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	}, {
+		.virtual	= IO_ADDRESS(REALVIEW_TIMER0_1_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_TIMER0_1_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	}, {
+		.virtual	= IO_ADDRESS(REALVIEW_TIMER2_3_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_TIMER2_3_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	},
+#ifdef CONFIG_DEBUG_LL
+	{
+		.virtual	= IO_ADDRESS(REALVIEW_UART0_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_UART0_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	}
+#endif
+};
+
+static void __init realview_eb_map_io(void)
+{
+	iotable_init(realview_eb_io_desc, ARRAY_SIZE(realview_eb_io_desc));
+}
+
+/* FPGA Primecells */
+AMBA_DEVICE(aaci,  "fpga:04", AACI,     NULL);
+AMBA_DEVICE(mmc0,  "fpga:05", MMCI0,    &realview_mmc0_plat_data);
+AMBA_DEVICE(kmi0,  "fpga:06", KMI0,     NULL);
+AMBA_DEVICE(kmi1,  "fpga:07", KMI1,     NULL);
+AMBA_DEVICE(uart3, "fpga:09", UART3,    NULL);
+
+/* DevChip Primecells */
+AMBA_DEVICE(smc,   "dev:00",  SMC,      NULL);
+AMBA_DEVICE(clcd,  "dev:20",  CLCD,     &clcd_plat_data);
+AMBA_DEVICE(dmac,  "dev:30",  DMAC,     NULL);
+AMBA_DEVICE(sctl,  "dev:e0",  SCTL,     NULL);
+AMBA_DEVICE(wdog,  "dev:e1",  WATCHDOG, NULL);
+AMBA_DEVICE(gpio0, "dev:e4",  GPIO0,    NULL);
+AMBA_DEVICE(gpio1, "dev:e5",  GPIO1,    NULL);
+AMBA_DEVICE(gpio2, "dev:e6",  GPIO2,    NULL);
+AMBA_DEVICE(rtc,   "dev:e8",  RTC,      NULL);
+AMBA_DEVICE(sci0,  "dev:f0",  SCI,      NULL);
+AMBA_DEVICE(uart0, "dev:f1",  UART0,    NULL);
+AMBA_DEVICE(uart1, "dev:f2",  UART1,    NULL);
+AMBA_DEVICE(uart2, "dev:f3",  UART2,    NULL);
+AMBA_DEVICE(ssp0,  "dev:f4",  SSP,      NULL);
+
+static struct amba_device *amba_devs[] __initdata = {
+	&dmac_device,
+	&uart0_device,
+	&uart1_device,
+	&uart2_device,
+	&uart3_device,
+	&smc_device,
+	&clcd_device,
+	&sctl_device,
+	&wdog_device,
+	&gpio0_device,
+	&gpio1_device,
+	&gpio2_device,
+	&rtc_device,
+	&sci0_device,
+	&ssp0_device,
+	&aaci_device,
+	&mmc0_device,
+	&kmi0_device,
+	&kmi1_device,
+};
+
+static void __init gic_init_irq(void)
+{
+	gic_dist_init(__io_address(REALVIEW_GIC_DIST_BASE));
+	gic_cpu_init(__io_address(REALVIEW_GIC_CPU_BASE));
+}
+
+static void __init realview_eb_init(void)
+{
+	int i;
+
+	clk_register(&realview_clcd_clk);
+
+	platform_device_register(&realview_flash_device);
+	platform_device_register(&realview_smc91x_device);
+
+	for (i = 0; i < ARRAY_SIZE(amba_devs); i++) {
+		struct amba_device *d = amba_devs[i];
+		amba_device_register(d, &iomem_resource);
+	}
+
+#ifdef CONFIG_LEDS
+	leds_event = realview_leds_event;
+#endif
+}
+
+MACHINE_START(REALVIEW_EB, "ARM-RealView EB")
+	/* Maintainer: ARM Ltd/Deep Blue Solutions Ltd */
+	.phys_ram	= 0x00000000,
+	.phys_io	= REALVIEW_UART0_BASE,
+	.io_pg_offst	= (IO_ADDRESS(REALVIEW_UART0_BASE) >> 18) & 0xfffc,
+	.boot_params	= 0x00000100,
+	.map_io		= realview_eb_map_io,
+	.init_irq	= gic_init_irq,
+	.timer		= &realview_timer,
+	.init_machine	= realview_eb_init,
+MACHINE_END
diff --git a/arch/arm/mach-s3c2410/mach-rx3715.c b/arch/arm/mach-s3c2410/mach-rx3715.c
index 8f2a90bf940..24d69019a84 100644
--- a/arch/arm/mach-s3c2410/mach-rx3715.c
+++ b/arch/arm/mach-s3c2410/mach-rx3715.c
@@ -17,6 +17,7 @@
  *	10-Mar-2005 LCVR Changed S3C2410_VA to S3C24XX_VA
  *	14-Mar-2005 BJD  Fixed __iomem warnings
  *	20-Sep-2005 BJD  Added static to non-exported items
+ *	31-Oct-2005 BJD  Added LCD setup for framebuffer
 */
 
 #include <linux/kernel.h>
@@ -43,6 +44,9 @@
 
 #include <asm/arch/regs-serial.h>
 #include <asm/arch/regs-gpio.h>
+#include <asm/arch/regs-lcd.h>
+
+#include <asm/arch/fb.h>
 
 #include "clock.h"
 #include "devs.h"
@@ -97,6 +101,66 @@ static struct s3c2410_uartcfg rx3715_uartcfgs[] = {
 	}
 };
 
+/* framebuffer lcd controller information */
+
+static struct s3c2410fb_mach_info rx3715_lcdcfg __initdata = {
+	.regs	= {
+		.lcdcon1 =	S3C2410_LCDCON1_TFT16BPP | \
+				S3C2410_LCDCON1_TFT | \
+				S3C2410_LCDCON1_CLKVAL(0x0C),
+
+		.lcdcon2 =	S3C2410_LCDCON2_VBPD(5) | \
+				S3C2410_LCDCON2_LINEVAL(319) | \
+				S3C2410_LCDCON2_VFPD(6) | \
+				S3C2410_LCDCON2_VSPW(2),
+
+		.lcdcon3 =	S3C2410_LCDCON3_HBPD(35) | \
+				S3C2410_LCDCON3_HOZVAL(239) | \
+				S3C2410_LCDCON3_HFPD(35),
+
+		.lcdcon4 =	S3C2410_LCDCON4_MVAL(0) | \
+				S3C2410_LCDCON4_HSPW(7),
+
+		.lcdcon5 =	S3C2410_LCDCON5_INVVLINE |
+				S3C2410_LCDCON5_FRM565 |
+				S3C2410_LCDCON5_HWSWP,
+	},
+
+	.lpcsel =	0xf82,
+
+	.gpccon =	0xaa955699,
+	.gpccon_mask =	0xffc003cc,
+	.gpcup =	0x0000ffff,
+	.gpcup_mask =	0xffffffff,
+
+	.gpdcon =	0xaa95aaa1,
+	.gpdcon_mask =	0xffc0fff0,
+	.gpdup =	0x0000faff,
+	.gpdup_mask =	0xffffffff,
+
+	.fixed_syncs =	1,
+	.width  =	240,
+	.height =	320,
+
+	.xres	= {
+		.min =		240,
+		.max =		240,
+		.defval =	240,
+	},
+
+	.yres	= {
+		.max =		320,
+		.min =		320,
+		.defval	=	320,
+	},
+
+	.bpp	= {
+		.min =		16,
+		.max =		16,
+		.defval =	16,
+	},
+};
+
 static struct platform_device *rx3715_devices[] __initdata = {
 	&s3c_device_usb,
 	&s3c_device_lcd,
@@ -123,14 +187,12 @@ static void __init rx3715_init_irq(void)
 	s3c24xx_init_irq();
 }
 
-#ifdef CONFIG_PM
 static void __init rx3715_init_machine(void)
 {
 	s3c2410_pm_init();
+	s3c24xx_fb_set_platdata(&rx3715_lcdcfg);
 }
-#else
-#define rx3715_init_machine NULL
-#endif
+
 
 MACHINE_START(RX3715, "IPAQ-RX3715")
 	/* Maintainer: Ben Dooks <ben@fluff.org> */
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index c54e04c995e..5568403e984 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -120,8 +120,8 @@ config CPU_ARM925T
 
 # ARM926T
 config CPU_ARM926T
-	bool "Support ARM926T processor" if ARCH_INTEGRATOR
-	depends on ARCH_INTEGRATOR || ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX
+	bool "Support ARM926T processor"
+	depends on ARCH_INTEGRATOR || ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || MACH_REALVIEW_EB
 	default y if ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX
 	select CPU_32v5
 	select CPU_ABRT_EV5TJ
@@ -242,7 +242,7 @@ config CPU_XSCALE
 # ARMv6
 config CPU_V6
 	bool "Support ARM V6 processor"
-	depends on ARCH_INTEGRATOR
+	depends on ARCH_INTEGRATOR || MACH_REALVIEW_EB
 	select CPU_32v6
 	select CPU_ABRT_EV6
 	select CPU_CACHE_V6
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index fd079ff1fc5..c168f322ef8 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -486,10 +486,17 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 
 	/*
 	 * Ask the machine support to map in the statically mapped devices.
-	 * After this point, we can start to touch devices again.
 	 */
 	if (mdesc->map_io)
 		mdesc->map_io();
+
+	/*
+	 * Finally flush the tlb again - this ensures that we're in a
+	 * consistent state wrt the writebuffer if the writebuffer needs
+	 * draining.  After this point, we can start to touch devices
+	 * again.
+	 */
+	local_flush_tlb_all();
 }
 
 /*
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 9204be6eedb..7c724ffa08b 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -803,7 +803,6 @@ no_apic:
 
 void __init init_apic_mappings(void)
 {
-	unsigned int orig_apicid;
 	unsigned long apic_phys;
 
 	/*
@@ -825,11 +824,8 @@ void __init init_apic_mappings(void)
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
 	 */
-	orig_apicid = boot_cpu_physical_apicid;
-	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
-	if ((orig_apicid != -1U) && (orig_apicid != boot_cpu_physical_apicid))
-		printk(KERN_WARNING "Boot APIC ID in local APIC unexpected (%d vs %d)",
-			orig_apicid, boot_cpu_physical_apicid);
+	if (boot_cpu_physical_apicid == -1U)
+		boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
 
 #ifdef CONFIG_X86_IO_APIC
 	{
@@ -1259,81 +1255,40 @@ fastcall void smp_error_interrupt(struct pt_regs *regs)
 }
 
 /*
- * This initializes the IO-APIC and APIC hardware.
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
  */
-int __init APIC_init(void)
+int __init APIC_init_uniprocessor (void)
 {
-	if (enable_local_apic < 0) {
-		printk(KERN_INFO "APIC disabled\n");
-		return -1;
-	}
+	if (enable_local_apic < 0)
+		clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
 
-	/* See if we have a SMP configuration or have forced enabled
-	 * the local apic.
-	 */
-	if (!smp_found_config && !acpi_lapic && !cpu_has_apic) {
-		enable_local_apic = -1;
+	if (!smp_found_config && !cpu_has_apic)
 		return -1;
-	}
 
 	/*
-	 * Complain if the BIOS pretends there is an apic.
-	 * Then get out because we don't have an a local apic.
+	 * Complain if the BIOS pretends there is one.
 	 */
 	if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
 		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
 			boot_cpu_physical_apicid);
-		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
-		enable_local_apic = -1;
 		return -1;
 	}
 
 	verify_local_APIC();
 
-	/*
-	 * Should not be necessary because the MP table should list the boot
-	 * CPU too, but we do it for the sake of robustness anyway.
-	 * Makes no sense to do this check in clustered apic mode, so skip it
-	 */
-	if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
-		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
-				boot_cpu_physical_apicid);
-		physid_set(boot_cpu_physical_apicid, phys_cpu_present_map);
-	}
-
-	/*
-	 * Switch from PIC to APIC mode.
-	 */
 	connect_bsp_APIC();
-	setup_local_APIC();
 
-#ifdef CONFIG_X86_IO_APIC
-	/*
-	 * Now start the IO-APICs
-	 */
-	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
-		setup_IO_APIC();
-#endif
-	return 0;
-}
+	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
 
-void __init APIC_late_time_init(void)
-{
-	/* Improve our loops per jiffy estimate */
-	loops_per_jiffy = ((1000 + HZ - 1)/HZ)*cpu_khz;
-	boot_cpu_data.loops_per_jiffy = loops_per_jiffy;
-	cpu_data[0].loops_per_jiffy = loops_per_jiffy;
-
-	/* setup_apic_nmi_watchdog doesn't work properly before cpu_khz is
-	 * initialized.  So redo it here to ensure the boot cpu is setup
-	 * properly.
-	 */
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		setup_apic_nmi_watchdog();
+	setup_local_APIC();
 
 #ifdef CONFIG_X86_IO_APIC
-	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
-		IO_APIC_late_time_init();
+	if (smp_found_config)
+		if (!skip_ioapic_setup && nr_ioapics)
+			setup_IO_APIC();
 #endif
 	setup_boot_APIC_clock();
+
+	return 0;
 }
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index d86f2490928..323ef8ab324 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -435,8 +435,4 @@ void __init init_IRQ(void)
 		setup_irq(FPU_IRQ, &fpu_irq);
 
 	irq_ctx_init(smp_processor_id());
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	APIC_init();
-#endif
 }
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 5a77c52b20a..cc5d7ac5b2e 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -2387,15 +2387,11 @@ void __init setup_IO_APIC(void)
 	sync_Arb_IDs();
 	setup_IO_APIC_irqs();
 	init_IO_APIC_traps();
+	check_timer();
 	if (!acpi_ioapic)
 		print_IO_APIC();
 }
 
-void __init IO_APIC_late_time_init(void)
-{
-	check_timer();
-}
-
 /*
  *	Called after all the initialization is done. If we didnt find any
  *	APIC bugs then we can allow the modify fast path
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 5a2bbe0c4ff..01b618e73ec 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -1078,16 +1078,6 @@ void *xquad_portio;
 EXPORT_SYMBOL(xquad_portio);
 #endif
 
-/*
- * Fall back to non SMP mode after errors.
- *
- */
-static __init void disable_smp(void)
-{
-	cpu_set(0, cpu_sibling_map[0]);
-	cpu_set(0, cpu_core_map[0]);
-}
-
 static void __init smp_boot_cpus(unsigned int max_cpus)
 {
 	int apicid, cpu, bit, kicked;
@@ -1100,6 +1090,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 	printk("CPU%d: ", 0);
 	print_cpu_info(&cpu_data[0]);
 
+	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
 	boot_cpu_logical_apicid = logical_smp_processor_id();
 	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
 
@@ -1111,27 +1102,68 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 	cpus_clear(cpu_core_map[0]);
 	cpu_set(0, cpu_core_map[0]);
 
-	map_cpu_to_logical_apicid();
-
 	/*
 	 * If we couldn't find an SMP configuration at boot time,
 	 * get out of here now!
 	 */
 	if (!smp_found_config && !acpi_lapic) {
 		printk(KERN_NOTICE "SMP motherboard not detected.\n");
-		disable_smp();
+		smpboot_clear_io_apic_irqs();
+		phys_cpu_present_map = physid_mask_of_physid(0);
+		if (APIC_init_uniprocessor())
+			printk(KERN_NOTICE "Local APIC not detected."
+					   " Using dummy APIC emulation.\n");
+		map_cpu_to_logical_apicid();
+		cpu_set(0, cpu_sibling_map[0]);
+		cpu_set(0, cpu_core_map[0]);
+		return;
+	}
+
+	/*
+	 * Should not be necessary because the MP table should list the boot
+	 * CPU too, but we do it for the sake of robustness anyway.
+	 * Makes no sense to do this check in clustered apic mode, so skip it
+	 */
+	if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
+		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+				boot_cpu_physical_apicid);
+		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
+	}
+
+	/*
+	 * If we couldn't find a local APIC, then get out of here now!
+	 */
+	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
+		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+			boot_cpu_physical_apicid);
+		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+		smpboot_clear_io_apic_irqs();
+		phys_cpu_present_map = physid_mask_of_physid(0);
+		cpu_set(0, cpu_sibling_map[0]);
+		cpu_set(0, cpu_core_map[0]);
 		return;
 	}
 
+	verify_local_APIC();
+
 	/*
 	 * If SMP should be disabled, then really disable it!
 	 */
-	if (!max_cpus || (enable_local_apic < 0)) {
-		printk(KERN_INFO "SMP mode deactivated.\n");
-		disable_smp();
+	if (!max_cpus) {
+		smp_found_config = 0;
+		printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+		smpboot_clear_io_apic_irqs();
+		phys_cpu_present_map = physid_mask_of_physid(0);
+		cpu_set(0, cpu_sibling_map[0]);
+		cpu_set(0, cpu_core_map[0]);
 		return;
 	}
 
+	connect_bsp_APIC();
+	setup_local_APIC();
+	map_cpu_to_logical_apicid();
+
+
 	setup_portio_remap();
 
 	/*
@@ -1212,6 +1244,10 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 	cpu_set(0, cpu_sibling_map[0]);
 	cpu_set(0, cpu_core_map[0]);
 
+	smpboot_setup_io_apic();
+
+	setup_boot_APIC_clock();
+
 	/*
 	 * Synchronize the TSC with the AP
 	 */
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 07471bba2dc..41c5b2dc620 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -440,8 +440,8 @@ static int time_init_device(void)
 
 device_initcall(time_init_device);
 
-extern void (*late_time_init)(void);
 #ifdef CONFIG_HPET_TIMER
+extern void (*late_time_init)(void);
 /* Duplicate of time_init() below, with hpet_enable part added */
 static void __init hpet_time_init(void)
 {
@@ -458,11 +458,6 @@ static void __init hpet_time_init(void)
 	printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
 
 	time_init_hook();
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	if (enable_local_apic >= 0)
-		APIC_late_time_init();
-#endif
 }
 #endif
 
@@ -487,9 +482,4 @@ void __init time_init(void)
 	printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
 
 	time_init_hook();
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	if (enable_local_apic >= 0)
-		late_time_init = APIC_late_time_init;
-#endif
 }
diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c
index 330fd2b6807..3984226a8b9 100644
--- a/arch/i386/pci/fixup.c
+++ b/arch/i386/pci/fixup.c
@@ -398,7 +398,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video);
  */
 static u16 toshiba_line_size;
 
-static struct dmi_system_id __devinit toshiba_ohci1394_dmi_table[] = {
+static struct dmi_system_id __devinitdata toshiba_ohci1394_dmi_table[] = {
 	{
 		.ident = "Toshiba PS5 based laptop",
 		.matches = {
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 1642375fb14..3b4248cff9a 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -26,6 +26,10 @@ config MMU
 	bool
 	default y
 
+config SWIOTLB
+       bool
+       default y
+
 config RWSEM_XCHGADD_ALGORITHM
 	bool
 	default y
diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile
index cb1af597370..ac64664a180 100644
--- a/arch/ia64/lib/Makefile
+++ b/arch/ia64/lib/Makefile
@@ -9,7 +9,7 @@ lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o			\
 	bitop.o checksum.o clear_page.o csum_partial_copy.o		\
 	clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o	\
 	flush.o ip_fast_csum.o do_csum.o				\
-	memset.o strlen.o swiotlb.o
+	memset.o strlen.o
 
 lib-$(CONFIG_ITANIUM)	+= copy_page.o copy_user.o memcpy.o
 lib-$(CONFIG_MCKINLEY)	+= copy_page_mck.o memcpy_mck.o
diff --git a/arch/ia64/lib/swiotlb.c b/arch/ia64/lib/swiotlb.c
deleted file mode 100644
index 96edcc0fdcd..00000000000
--- a/arch/ia64/lib/swiotlb.c
+++ /dev/null
@@ -1,759 +0,0 @@
-/*
- * Dynamic DMA mapping support.
- *
- * This implementation is for IA-64 platforms that do not support
- * I/O TLBs (aka DMA address translation hardware).
- * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
- * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
- * Copyright (C) 2000, 2003 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 03/05/07 davidm	Switch from PCI-DMA to generic device DMA API.
- * 00/12/13 davidm	Rename to swiotlb.c and add mark_clean() to avoid
- *			unnecessary i-cache flushing.
- * 04/07/.. ak          Better overflow handling. Assorted fixes.
- */
-
-#include <linux/cache.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ctype.h>
-
-#include <asm/io.h>
-#include <asm/pci.h>
-#include <asm/dma.h>
-
-#include <linux/init.h>
-#include <linux/bootmem.h>
-
-#define OFFSET(val,align) ((unsigned long)	\
-	                   ( (val) & ( (align) - 1)))
-
-#define SG_ENT_VIRT_ADDRESS(sg)	(page_address((sg)->page) + (sg)->offset)
-#define SG_ENT_PHYS_ADDRESS(SG)	virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))
-
-/*
- * Maximum allowable number of contiguous slabs to map,
- * must be a power of 2.  What is the appropriate value ?
- * The complexity of {map,unmap}_single is linearly dependent on this value.
- */
-#define IO_TLB_SEGSIZE	128
-
-/*
- * log of the size of each IO TLB slab.  The number of slabs is command line
- * controllable.
- */
-#define IO_TLB_SHIFT 11
-
-#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
-
-/*
- * Minimum IO TLB size to bother booting with.  Systems with mainly
- * 64bit capable cards will only lightly use the swiotlb.  If we can't
- * allocate a contiguous 1MB, we're probably in trouble anyway.
- */
-#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-
-int swiotlb_force;
-
-/*
- * Used to do a quick range check in swiotlb_unmap_single and
- * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
- * API.
- */
-static char *io_tlb_start, *io_tlb_end;
-
-/*
- * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
- * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
- */
-static unsigned long io_tlb_nslabs;
-
-/*
- * When the IOMMU overflows we return a fallback buffer. This sets the size.
- */
-static unsigned long io_tlb_overflow = 32*1024;
-
-void *io_tlb_overflow_buffer;
-
-/*
- * This is a free list describing the number of free entries available from
- * each index
- */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
-
-/*
- * We need to save away the original address corresponding to a mapped entry
- * for the sync operations.
- */
-static unsigned char **io_tlb_orig_addr;
-
-/*
- * Protect the above data structures in the map and unmap calls
- */
-static DEFINE_SPINLOCK(io_tlb_lock);
-
-static int __init
-setup_io_tlb_npages(char *str)
-{
-	if (isdigit(*str)) {
-		io_tlb_nslabs = simple_strtoul(str, &str, 0);
-		/* avoid tail segment of size < IO_TLB_SEGSIZE */
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
-	if (*str == ',')
-		++str;
-	if (!strcmp(str, "force"))
-		swiotlb_force = 1;
-	return 1;
-}
-__setup("swiotlb=", setup_io_tlb_npages);
-/* make io_tlb_overflow tunable too? */
-
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the PCI DMA API.
- */
-void
-swiotlb_init_with_default_size (size_t default_size)
-{
-	unsigned long i;
-
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
-
-	/*
-	 * Get IO TLB memory from the low pages
-	 */
-	io_tlb_start = alloc_bootmem_low_pages_limit(io_tlb_nslabs *
-					     (1 << IO_TLB_SHIFT), 0x100000000);
-	if (!io_tlb_start)
-		panic("Cannot allocate SWIOTLB buffer");
-	io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
-
-	/*
-	 * Allocate and initialize the free list array.  This array is used
-	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-	 * between io_tlb_start and io_tlb_end.
-	 */
-	io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
-	for (i = 0; i < io_tlb_nslabs; i++)
- 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-	io_tlb_index = 0;
-	io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
-
-	/*
-	 * Get the overflow emergency buffer
-	 */
-	io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
-	printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
-	       virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
-}
-
-void
-swiotlb_init (void)
-{
-	swiotlb_init_with_default_size(64 * (1<<20));	/* default to 64MB */
-}
-
-/*
- * Systems with larger DMA zones (those that don't support ISA) can
- * initialize the swiotlb later using the slab allocator if needed.
- * This should be just like above, but with some error catching.
- */
-int
-swiotlb_late_init_with_default_size (size_t default_size)
-{
-	unsigned long i, req_nslabs = io_tlb_nslabs;
-	unsigned int order;
-
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
-
-	/*
-	 * Get IO TLB memory from the low pages
-	 */
-	order = get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
-	io_tlb_nslabs = SLABS_PER_PAGE << order;
-
-	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-		io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
-		                                        order);
-		if (io_tlb_start)
-			break;
-		order--;
-	}
-
-	if (!io_tlb_start)
-		goto cleanup1;
-
-	if (order != get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))) {
-		printk(KERN_WARNING "Warning: only able to allocate %ld MB "
-		       "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
-		io_tlb_nslabs = SLABS_PER_PAGE << order;
-	}
-	io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
-	memset(io_tlb_start, 0, io_tlb_nslabs * (1 << IO_TLB_SHIFT));
-
-	/*
-	 * Allocate and initialize the free list array.  This array is used
-	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-	 * between io_tlb_start and io_tlb_end.
-	 */
-	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
-	                              get_order(io_tlb_nslabs * sizeof(int)));
-	if (!io_tlb_list)
-		goto cleanup2;
-
-	for (i = 0; i < io_tlb_nslabs; i++)
- 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-	io_tlb_index = 0;
-
-	io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL,
-	                           get_order(io_tlb_nslabs * sizeof(char *)));
-	if (!io_tlb_orig_addr)
-		goto cleanup3;
-
-	memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *));
-
-	/*
-	 * Get the overflow emergency buffer
-	 */
-	io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
-	                                          get_order(io_tlb_overflow));
-	if (!io_tlb_overflow_buffer)
-		goto cleanup4;
-
-	printk(KERN_INFO "Placing %ldMB software IO TLB between 0x%lx - "
-	       "0x%lx\n", (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20,
-	       virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
-
-	return 0;
-
-cleanup4:
-	free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
-	                                                      sizeof(char *)));
-	io_tlb_orig_addr = NULL;
-cleanup3:
-	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-	                                                 sizeof(int)));
-	io_tlb_list = NULL;
-	io_tlb_end = NULL;
-cleanup2:
-	free_pages((unsigned long)io_tlb_start, order);
-	io_tlb_start = NULL;
-cleanup1:
-	io_tlb_nslabs = req_nslabs;
-	return -ENOMEM;
-}
-
-static inline int
-address_needs_mapping(struct device *hwdev, dma_addr_t addr)
-{
-	dma_addr_t mask = 0xffffffff;
-	/* If the device has a mask, use it, otherwise default to 32 bits */
-	if (hwdev && hwdev->dma_mask)
-		mask = *hwdev->dma_mask;
-	return (addr & ~mask) != 0;
-}
-
-/*
- * Allocates bounce buffer and returns its kernel virtual address.
- */
-static void *
-map_single(struct device *hwdev, char *buffer, size_t size, int dir)
-{
-	unsigned long flags;
-	char *dma_addr;
-	unsigned int nslots, stride, index, wrap;
-	int i;
-
-	/*
-	 * For mappings greater than a page, we limit the stride (and
-	 * hence alignment) to a page size.
-	 */
-	nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	if (size > PAGE_SIZE)
-		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
-	else
-		stride = 1;
-
-	if (!nslots)
-		BUG();
-
-	/*
-	 * Find suitable number of IO TLB entries size that will fit this
-	 * request and allocate a buffer from that IO TLB pool.
-	 */
-	spin_lock_irqsave(&io_tlb_lock, flags);
-	{
-		wrap = index = ALIGN(io_tlb_index, stride);
-
-		if (index >= io_tlb_nslabs)
-			wrap = index = 0;
-
-		do {
-			/*
-			 * If we find a slot that indicates we have 'nslots'
-			 * number of contiguous buffers, we allocate the
-			 * buffers from that slot and mark the entries as '0'
-			 * indicating unavailable.
-			 */
-			if (io_tlb_list[index] >= nslots) {
-				int count = 0;
-
-				for (i = index; i < (int) (index + nslots); i++)
-					io_tlb_list[i] = 0;
-				for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
-					io_tlb_list[i] = ++count;
-				dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
-				/*
-				 * Update the indices to avoid searching in
-				 * the next round.
-				 */
-				io_tlb_index = ((index + nslots) < io_tlb_nslabs
-						? (index + nslots) : 0);
-
-				goto found;
-			}
-			index += stride;
-			if (index >= io_tlb_nslabs)
-				index = 0;
-		} while (index != wrap);
-
-		spin_unlock_irqrestore(&io_tlb_lock, flags);
-		return NULL;
-	}
-  found:
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
-
-	/*
-	 * Save away the mapping from the original address to the DMA address.
-	 * This is needed when we sync the memory.  Then we sync the buffer if
-	 * needed.
-	 */
-	io_tlb_orig_addr[index] = buffer;
-	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
-		memcpy(dma_addr, buffer, size);
-
-	return dma_addr;
-}
-
-/*
- * dma_addr is the kernel virtual address of the bounce buffer to unmap.
- */
-static void
-unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
-{
-	unsigned long flags;
-	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	char *buffer = io_tlb_orig_addr[index];
-
-	/*
-	 * First, sync the memory before unmapping the entry
-	 */
-	if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-		/*
-		 * bounce... copy the data back into the original buffer * and
-		 * delete the bounce buffer.
-		 */
-		memcpy(buffer, dma_addr, size);
-
-	/*
-	 * Return the buffer to the free list by setting the corresponding
-	 * entries to indicate the number of contigous entries available.
-	 * While returning the entries to the free list, we merge the entries
-	 * with slots below and above the pool being returned.
-	 */
-	spin_lock_irqsave(&io_tlb_lock, flags);
-	{
-		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
-			 io_tlb_list[index + nslots] : 0);
-		/*
-		 * Step 1: return the slots to the free list, merging the
-		 * slots with superceeding slots
-		 */
-		for (i = index + nslots - 1; i >= index; i--)
-			io_tlb_list[i] = ++count;
-		/*
-		 * Step 2: merge the returned slots with the preceding slots,
-		 * if available (non zero)
-		 */
-		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
-			io_tlb_list[i] = ++count;
-	}
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
-}
-
-static void
-sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
-{
-	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	char *buffer = io_tlb_orig_addr[index];
-
-	/*
-	 * bounce... copy the data back into/from the original buffer
-	 * XXX How do you handle DMA_BIDIRECTIONAL here ?
-	 */
-	if (dir == DMA_FROM_DEVICE)
-		memcpy(buffer, dma_addr, size);
-	else if (dir == DMA_TO_DEVICE)
-		memcpy(dma_addr, buffer, size);
-	else
-		BUG();
-}
-
-void *
-swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-		       dma_addr_t *dma_handle, gfp_t flags)
-{
-	unsigned long dev_addr;
-	void *ret;
-	int order = get_order(size);
-
-	/*
-	 * XXX fix me: the DMA API should pass us an explicit DMA mask
-	 * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32
-	 * bit range instead of a 16MB one).
-	 */
-	flags |= GFP_DMA;
-
-	ret = (void *)__get_free_pages(flags, order);
-	if (ret && address_needs_mapping(hwdev, virt_to_phys(ret))) {
-		/*
-		 * The allocated memory isn't reachable by the device.
-		 * Fall back on swiotlb_map_single().
-		 */
-		free_pages((unsigned long) ret, order);
-		ret = NULL;
-	}
-	if (!ret) {
-		/*
-		 * We are either out of memory or the device can't DMA
-		 * to GFP_DMA memory; fall back on
-		 * swiotlb_map_single(), which will grab memory from
-		 * the lowest available address range.
-		 */
-		dma_addr_t handle;
-		handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
-		if (dma_mapping_error(handle))
-			return NULL;
-
-		ret = phys_to_virt(handle);
-	}
-
-	memset(ret, 0, size);
-	dev_addr = virt_to_phys(ret);
-
-	/* Confirm address can be DMA'd by device */
-	if (address_needs_mapping(hwdev, dev_addr)) {
-		printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016lx\n",
-		       (unsigned long long)*hwdev->dma_mask, dev_addr);
-		panic("swiotlb_alloc_coherent: allocated memory is out of "
-		      "range for device");
-	}
-	*dma_handle = dev_addr;
-	return ret;
-}
-
-void
-swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
-		      dma_addr_t dma_handle)
-{
-	if (!(vaddr >= (void *)io_tlb_start
-                    && vaddr < (void *)io_tlb_end))
-		free_pages((unsigned long) vaddr, get_order(size));
-	else
-		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-		swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE);
-}
-
-static void
-swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
-{
-	/*
-	 * Ran out of IOMMU space for this operation. This is very bad.
-	 * Unfortunately the drivers cannot handle this operation properly.
-	 * unless they check for pci_dma_mapping_error (most don't)
-	 * When the mapping is small enough return a static buffer to limit
-	 * the damage, or panic when the transfer is too big.
-	 */
-	printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at "
-	       "device %s\n", size, dev ? dev->bus_id : "?");
-
-	if (size > io_tlb_overflow && do_panic) {
-		if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
-			panic("PCI-DMA: Memory would be corrupted\n");
-		if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
-			panic("PCI-DMA: Random memory would be DMAed\n");
-	}
-}
-
-/*
- * Map a single buffer of the indicated size for DMA in streaming mode.  The
- * PCI address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
- */
-dma_addr_t
-swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
-{
-	unsigned long dev_addr = virt_to_phys(ptr);
-	void *map;
-
-	if (dir == DMA_NONE)
-		BUG();
-	/*
-	 * If the pointer passed in happens to be in the device's DMA window,
-	 * we can safely return the device addr and not worry about bounce
-	 * buffering it.
-	 */
-	if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force)
-		return dev_addr;
-
-	/*
-	 * Oh well, have to allocate and map a bounce buffer.
-	 */
-	map = map_single(hwdev, ptr, size, dir);
-	if (!map) {
-		swiotlb_full(hwdev, size, dir, 1);
-		map = io_tlb_overflow_buffer;
-	}
-
-	dev_addr = virt_to_phys(map);
-
-	/*
-	 * Ensure that the address returned is DMA'ble
-	 */
-	if (address_needs_mapping(hwdev, dev_addr))
-		panic("map_single: bounce buffer is not DMA'ble");
-
-	return dev_addr;
-}
-
-/*
- * Since DMA is i-cache coherent, any (complete) pages that were written via
- * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
- * flush them when they get mapped into an executable vm-area.
- */
-static void
-mark_clean(void *addr, size_t size)
-{
-	unsigned long pg_addr, end;
-
-	pg_addr = PAGE_ALIGN((unsigned long) addr);
-	end = (unsigned long) addr + size;
-	while (pg_addr + PAGE_SIZE <= end) {
-		struct page *page = virt_to_page(pg_addr);
-		set_bit(PG_arch_1, &page->flags);
-		pg_addr += PAGE_SIZE;
-	}
-}
-
-/*
- * Unmap a single streaming mode DMA translation.  The dma_addr and size must
- * match what was provided for in a previous swiotlb_map_single call.  All
- * other usages are undefined.
- *
- * After this call, reads by the cpu to the buffer are guaranteed to see
- * whatever the device wrote there.
- */
-void
-swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
-		     int dir)
-{
-	char *dma_addr = phys_to_virt(dev_addr);
-
-	if (dir == DMA_NONE)
-		BUG();
-	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
-		unmap_single(hwdev, dma_addr, size, dir);
-	else if (dir == DMA_FROM_DEVICE)
-		mark_clean(dma_addr, size);
-}
-
-/*
- * Make physical memory consistent for a single streaming mode DMA translation
- * after a transfer.
- *
- * If you perform a swiotlb_map_single() but wish to interrogate the buffer
- * using the cpu, yet do not wish to teardown the PCI dma mapping, you must
- * call this function before doing so.  At the next point you give the PCI dma
- * address back to the card, you must first perform a
- * swiotlb_dma_sync_for_device, and then the device again owns the buffer
- */
-void
-swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-			    size_t size, int dir)
-{
-	char *dma_addr = phys_to_virt(dev_addr);
-
-	if (dir == DMA_NONE)
-		BUG();
-	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
-		sync_single(hwdev, dma_addr, size, dir);
-	else if (dir == DMA_FROM_DEVICE)
-		mark_clean(dma_addr, size);
-}
-
-void
-swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
-			       size_t size, int dir)
-{
-	char *dma_addr = phys_to_virt(dev_addr);
-
-	if (dir == DMA_NONE)
-		BUG();
-	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
-		sync_single(hwdev, dma_addr, size, dir);
-	else if (dir == DMA_FROM_DEVICE)
-		mark_clean(dma_addr, size);
-}
-
-/*
- * Map a set of buffers described by scatterlist in streaming mode for DMA.
- * This is the scatter-gather version of the above swiotlb_map_single
- * interface.  Here the scatter gather list elements are each tagged with the
- * appropriate dma address and length.  They are obtained via
- * sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- *       DMA address/length pairs than there are SG table elements.
- *       (for example via virtual mapping capabilities)
- *       The routine returns the number of addr/length pairs actually
- *       used, at most nents.
- *
- * Device ownership issues as mentioned above for swiotlb_map_single are the
- * same here.
- */
-int
-swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
-	       int dir)
-{
-	void *addr;
-	unsigned long dev_addr;
-	int i;
-
-	if (dir == DMA_NONE)
-		BUG();
-
-	for (i = 0; i < nelems; i++, sg++) {
-		addr = SG_ENT_VIRT_ADDRESS(sg);
-		dev_addr = virt_to_phys(addr);
-		if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) {
-			sg->dma_address = (dma_addr_t) virt_to_phys(map_single(hwdev, addr, sg->length, dir));
-			if (!sg->dma_address) {
-				/* Don't panic here, we expect map_sg users
-				   to do proper error handling. */
-				swiotlb_full(hwdev, sg->length, dir, 0);
-				swiotlb_unmap_sg(hwdev, sg - i, i, dir);
-				sg[0].dma_length = 0;
-				return 0;
-			}
-		} else
-			sg->dma_address = dev_addr;
-		sg->dma_length = sg->length;
-	}
-	return nelems;
-}
-
-/*
- * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
- * concerning calls here are the same as for swiotlb_unmap_single() above.
- */
-void
-swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
-		 int dir)
-{
-	int i;
-
-	if (dir == DMA_NONE)
-		BUG();
-
-	for (i = 0; i < nelems; i++, sg++)
-		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
-			unmap_single(hwdev, (void *) phys_to_virt(sg->dma_address), sg->dma_length, dir);
-		else if (dir == DMA_FROM_DEVICE)
-			mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
-}
-
-/*
- * Make physical memory consistent for a set of streaming mode DMA translations
- * after a transfer.
- *
- * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
- * and usage.
- */
-void
-swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
-			int nelems, int dir)
-{
-	int i;
-
-	if (dir == DMA_NONE)
-		BUG();
-
-	for (i = 0; i < nelems; i++, sg++)
-		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
-			sync_single(hwdev, (void *) sg->dma_address,
-				    sg->dma_length, dir);
-}
-
-void
-swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
-			   int nelems, int dir)
-{
-	int i;
-
-	if (dir == DMA_NONE)
-		BUG();
-
-	for (i = 0; i < nelems; i++, sg++)
-		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
-			sync_single(hwdev, (void *) sg->dma_address,
-				    sg->dma_length, dir);
-}
-
-int
-swiotlb_dma_mapping_error(dma_addr_t dma_addr)
-{
-	return (dma_addr == virt_to_phys(io_tlb_overflow_buffer));
-}
-
-/*
- * Return whether the given PCI device DMA address mask can be supported
- * properly.  For example, if your device can only drive the low 24-bits
- * during PCI bus mastering, then you would pass 0x00ffffff as the mask to
- * this function.
- */
-int
-swiotlb_dma_supported (struct device *hwdev, u64 mask)
-{
-	return (virt_to_phys (io_tlb_end) - 1) <= mask;
-}
-
-EXPORT_SYMBOL(swiotlb_init);
-EXPORT_SYMBOL(swiotlb_map_single);
-EXPORT_SYMBOL(swiotlb_unmap_single);
-EXPORT_SYMBOL(swiotlb_map_sg);
-EXPORT_SYMBOL(swiotlb_unmap_sg);
-EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
-EXPORT_SYMBOL(swiotlb_sync_single_for_device);
-EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
-EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
-EXPORT_SYMBOL(swiotlb_dma_mapping_error);
-EXPORT_SYMBOL(swiotlb_alloc_coherent);
-EXPORT_SYMBOL(swiotlb_free_coherent);
-EXPORT_SYMBOL(swiotlb_dma_supported);
diff --git a/arch/m68knommu/defconfig b/arch/m68knommu/defconfig
index 87f2d6587c5..2d59ba1a79b 100644
--- a/arch/m68knommu/defconfig
+++ b/arch/m68knommu/defconfig
@@ -99,7 +99,7 @@ CONFIG_M5272C3=y
 # CONFIG_NETtel is not set
 # CONFIG_CPU16B is not set
 # CONFIG_MOD5272 is not set
-CONFIG_MOTOROLA=y
+CONFIG_FREESCALE=y
 # CONFIG_LARGE_ALLOCS is not set
 CONFIG_4KSTACKS=y
 CONFIG_RAMAUTO=y
@@ -554,7 +554,6 @@ CONFIG_EXT2_FS=y
 # CONFIG_XFS_FS is not set
 # CONFIG_MINIX_FS is not set
 CONFIG_ROMFS_FS=y
-CONFIG_MAGIC_ROM_PTR=y
 # CONFIG_INOTIFY is not set
 # CONFIG_QUOTA is not set
 # CONFIG_DNOTIFY is not set
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 984a1063071..2d22bf03484 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -279,7 +279,7 @@ _GLOBAL(ppc32_rt_sigsuspend)
 	bne	syscall_exit
 	/* If sigsuspend() returns zero, we are going into a signal handler. We
 	   may need to call audit_syscall_exit() to mark the exit from sigsuspend() */
-#ifdef CONFIG_AUDIT
+#ifdef CONFIG_AUDITSYSCALL
 	ld	r3,PACACURRENT(r13)
 	ld	r4,AUDITCONTEXT(r3)
 	cmpdi	0,r4,0
diff --git a/arch/um/include/net_kern.h b/arch/um/include/net_kern.h
index 1c07949a13d..f7de6df60dd 100644
--- a/arch/um/include/net_kern.h
+++ b/arch/um/include/net_kern.h
@@ -6,10 +6,11 @@
 #ifndef __UM_NET_KERN_H
 #define __UM_NET_KERN_H
 
-#include "linux/netdevice.h"
-#include "linux/skbuff.h"
-#include "linux/socket.h"
-#include "linux/list.h"
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/list.h>
 
 struct uml_net {
 	struct list_head list;
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index bcdd0a805fe..14328cab5d3 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -27,7 +27,6 @@ obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_GART_IOMMU)	+= pci-gart.o aperture.o
 obj-$(CONFIG_DUMMY_IOMMU)	+= pci-nommu.o pci-dma.o
-obj-$(CONFIG_SWIOTLB)		+= swiotlb.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer.o
 
@@ -41,7 +40,6 @@ CFLAGS_vsyscall.o		:= $(PROFILING) -g0
 bootflag-y			+= ../../i386/kernel/bootflag.o
 cpuid-$(subst m,y,$(CONFIG_X86_CPUID))  += ../../i386/kernel/cpuid.o
 topology-y                     += ../../i386/mach-default/topology.o
-swiotlb-$(CONFIG_SWIOTLB)      += ../../ia64/lib/swiotlb.o
 microcode-$(subst m,y,$(CONFIG_MICROCODE))  += ../../i386/kernel/microcode.o
 intel_cacheinfo-y		+= ../../i386/kernel/cpu/intel_cacheinfo.o
 quirks-y			+= ../../i386/kernel/quirks.o
diff --git a/arch/x86_64/lib/bitops.c b/arch/x86_64/lib/bitops.c
index a29fb75b33a..95b6d9639fb 100644
--- a/arch/x86_64/lib/bitops.c
+++ b/arch/x86_64/lib/bitops.c
@@ -5,19 +5,23 @@
 #undef find_first_bit
 #undef find_next_bit
 
-/**
- * find_first_zero_bit - find the first zero bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first zero bit, not the number of the byte
- * containing a bit.
- */
-inline long find_first_zero_bit(const unsigned long * addr, unsigned long size)
+static inline long
+__find_first_zero_bit(const unsigned long * addr, unsigned long size)
 {
 	long d0, d1, d2;
 	long res;
 
+	/*
+	 * We must test the size in words, not in bits, because
+	 * otherwise incoming sizes in the range -63..-1 will not run
+	 * any scasq instructions, and then the flags used by the je
+	 * instruction will have whatever random value was in place
+	 * before.  Nobody should call us like that, but
+	 * find_next_zero_bit() does when offset and size are at the
+	 * same word and it fails to find a zero itself.
+	 */
+	size += 63;
+	size >>= 6;
 	if (!size)
 		return 0;
 	asm volatile(
@@ -30,12 +34,30 @@ inline long find_first_zero_bit(const unsigned long * addr, unsigned long size)
 		"  shlq $3,%%rdi\n"
 		"  addq %%rdi,%%rdx"
 		:"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
-		:"0" (0ULL), "1" ((size + 63) >> 6), "2" (addr), "3" (-1ULL),
-		 [addr] "r" (addr) : "memory");
+		:"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL),
+		 [addr] "S" (addr) : "memory");
+	/*
+	 * Any register would do for [addr] above, but GCC tends to
+	 * prefer rbx over rsi, even though rsi is readily available
+	 * and doesn't have to be saved.
+	 */
 	return res;
 }
 
 /**
+ * find_first_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first zero bit, not the number of the byte
+ * containing a bit.
+ */
+long find_first_zero_bit(const unsigned long * addr, unsigned long size)
+{
+	return __find_first_zero_bit (addr, size);
+}
+
+/**
  * find_next_zero_bit - find the first zero bit in a memory region
  * @addr: The address to base the search on
  * @offset: The bitnumber to start searching at
@@ -43,7 +65,7 @@ inline long find_first_zero_bit(const unsigned long * addr, unsigned long size)
  */
 long find_next_zero_bit (const unsigned long * addr, long size, long offset)
 {
-	unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
+	const unsigned long * p = addr + (offset >> 6);
 	unsigned long set = 0;
 	unsigned long res, bit = offset&63;
 
@@ -63,8 +85,8 @@ long find_next_zero_bit (const unsigned long * addr, long size, long offset)
 	/*
 	 * No zero yet, search remaining full words for a zero
 	 */
-	res = find_first_zero_bit ((const unsigned long *)p,
-				   size - 64 * (p - (unsigned long *) addr));
+	res = __find_first_zero_bit (p, size - 64 * (p - addr));
+
 	return (offset + set + res);
 }
 
@@ -74,6 +96,19 @@ __find_first_bit(const unsigned long * addr, unsigned long size)
 	long d0, d1;
 	long res;
 
+	/*
+	 * We must test the size in words, not in bits, because
+	 * otherwise incoming sizes in the range -63..-1 will not run
+	 * any scasq instructions, and then the flags used by the jz
+	 * instruction will have whatever random value was in place
+	 * before.  Nobody should call us like that, but
+	 * find_next_bit() does when offset and size are at the same
+	 * word and it fails to find a one itself.
+	 */
+	size += 63;
+	size >>= 6;
+	if (!size)
+		return 0;
 	asm volatile(
 		"   repe; scasq\n"
 		"   jz 1f\n"
@@ -83,8 +118,7 @@ __find_first_bit(const unsigned long * addr, unsigned long size)
 		"   shlq $3,%%rdi\n"
 		"   addq %%rdi,%%rax"
 		:"=a" (res), "=&c" (d0), "=&D" (d1)
-		:"0" (0ULL),
-		 "1" ((size + 63) >> 6), "2" (addr),
+		:"0" (0ULL), "1" (size), "2" (addr),
 		 [addr] "r" (addr) : "memory");
 	return res;
 }
author	Tony Luck <tony.luck@intel.com>	2005-11-03 09:09:08 -0800
committer	Tony Luck <tony.luck@intel.com>	2005-11-03 09:09:08 -0800
commit	55725495f2175d0be2a5ed4135c2ea298b470001 (patch)
tree	6005d0c7a95d994bf7ec8e6410dbd5a573e86fdf /arch
parent	067794a1170ef61e295aea1719cf4a1ce20200f8 (diff)
parent	06024f217d607369f0ee0071034ebb03071d5fb2 (diff)