From 48adcf148c83faa41999fb0b3524299c4e160fd9 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 20 May 2008 01:03:16 +0300
Subject: [CPUFREQ] cpufreq: remove CVS keywords

This patch removes CVS keywords that weren't updated for a long time
from comments.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/kernel/cpu/cpufreq/powernow-k7.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
index f8a63b3664e..35fb4eaf6e1 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
@@ -1,5 +1,4 @@
 /*
- *  $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $
  *  (C) 2003 Dave Jones.
  *
  *  Licensed under the terms of the GNU GPL License version 2.
-- 
cgit v1.2.3


From 444933c6c6e82362ba8e0da26f41a53c433d11ef Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Mon, 19 May 2008 22:13:28 +0900
Subject: [IA64] pvops: preparation: remove extern in irq_ia64.c

remove extern declaration of handle_IPI() in irq_ia64.c.
Instead, declare it in asm-ia64/smp.h.
Later handle_IPI() will be referenced from another file.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/irq_ia64.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 5538471e8d6..c48171bc796 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -600,7 +600,6 @@ static irqreturn_t dummy_handler (int irq, void *dev_id)
 {
 	BUG();
 }
-extern irqreturn_t handle_IPI (int irq, void *dev_id);
 
 static struct irqaction ipi_irqaction = {
 	.handler =	handle_IPI,
-- 
cgit v1.2.3


From 8311d21c35092aa4c4a12e0140e1ef3443489d77 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Mon, 19 May 2008 22:13:29 +0900
Subject: [IA64] pvops: preparation: move the constants, LOAD_OFFSET, to a
 header file.

Move the LOAD_OFFSET definition from vmlinux.lds.S into system.h.
On paravirtualized environments, it is necessary to detect the
execution environment. One of the solutions is the multi entry point.
The multi entry point allows a boot loader to start the kernel execution
from the entry point which is different from the ELF entry point.
The non standard entry point will defined as the specialized elf note
which contains the LMA of the entry point symbol.
The constant, LOAD_OFFSET, is necessary to calculate the symbol's LMA.
Move the definition into the public header file to make it available
to the multi entry point support.

Cc: "He, Qing" <qing.he@intel.com>
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/vmlinux.lds.S | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 5929ab10a28..5a77206c249 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -4,7 +4,6 @@
 #include <asm/system.h>
 #include <asm/pgtable.h>
 
-#define LOAD_OFFSET	(KERNEL_START - KERNEL_TR_PAGE_SIZE)
 #include <asm-generic/vmlinux.lds.h>
 
 #define IVT_TEXT							\
-- 
cgit v1.2.3


From 90aeb169c03a96e22674741f08054023c33d595b Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Mon, 19 May 2008 22:13:32 +0900
Subject: [IA64] pvops: introduce pv_info which describes some random info.

introduce pv_info which describes some randome info about
underlying execution environment.

Cc: Jes Sorensen <jes@sgi.com>
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/Makefile   |  2 ++
 arch/ia64/kernel/paravirt.c | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 arch/ia64/kernel/paravirt.c

(limited to 'arch')

diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 13fd10e8699..10a4ddb5b27 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -36,6 +36,8 @@ obj-$(CONFIG_PCI_MSI)		+= msi_ia64.o
 mca_recovery-y			+= mca_drv.o mca_drv_asm.o
 obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
 
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o
+
 obj-$(CONFIG_IA64_ESI)		+= esi.o
 ifneq ($(CONFIG_IA64_ESI),)
 obj-y				+= esi_stub.o	# must be in kernel proper
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
new file mode 100644
index 00000000000..d295ea5e59c
--- /dev/null
+++ b/arch/ia64/kernel/paravirt.c
@@ -0,0 +1,41 @@
+/******************************************************************************
+ * arch/ia64/kernel/paravirt.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *     Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/init.h>
+
+#include <linux/compiler.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/types.h>
+
+#include <asm/iosapic.h>
+#include <asm/paravirt.h>
+
+/***************************************************************************
+ * general info
+ */
+struct pv_info pv_info = {
+	.kernel_rpl = 0,
+	.paravirt_enabled = 0,
+	.name = "bare hardware"
+};
-- 
cgit v1.2.3


From 3e0879deb700f322f6c81ab34f056fc72d15ec02 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Mon, 19 May 2008 22:13:33 +0900
Subject: [IA64] pvops: add an early setup hook for pv_ops.

This patch adds a setup hook in the very early boot sequence
before start_kernel() to initialize paravirtualization stuff.
The hook will be set by each pv loader code or by using multi entry point.

Signed-off-by: Qing He <qing.he@intel.com>
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/head.S | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

(limited to 'arch')

diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index ddeab4e36fd..db540e58c78 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -26,11 +26,14 @@
 #include <asm/mmu_context.h>
 #include <asm/asm-offsets.h>
 #include <asm/pal.h>
+#include <asm/paravirt.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
 #include <asm/mca_asm.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
 
 #ifdef CONFIG_HOTPLUG_CPU
 #define SAL_PSR_BITS_TO_SET				\
@@ -367,6 +370,44 @@ start_ap:
 	;;
 (isBP)	st8 [r2]=r28		// save the address of the boot param area passed by the bootloader
 
+#ifdef CONFIG_PARAVIRT
+
+	movl r14=hypervisor_setup_hooks
+	movl r15=hypervisor_type
+	mov r16=num_hypervisor_hooks
+	;;
+	ld8 r2=[r15]
+	;;
+	cmp.ltu p7,p0=r2,r16	// array size check
+	shladd r8=r2,3,r14
+	;;
+(p7)	ld8 r9=[r8]
+	;;
+(p7)	mov b1=r9
+(p7)	cmp.ne.unc p7,p0=r9,r0	// no actual branch to NULL
+	;;
+(p7)	br.call.sptk.many rp=b1
+
+	__INITDATA
+
+default_setup_hook = 0		// Currently nothing needs to be done.
+
+	.weak xen_setup_hook
+
+	.global hypervisor_type
+hypervisor_type:
+	data8		PARAVIRT_HYPERVISOR_TYPE_DEFAULT
+
+	// must have the same order with PARAVIRT_HYPERVISOR_TYPE_xxx
+
+hypervisor_setup_hooks:
+	data8		default_setup_hook
+	data8		xen_setup_hook
+num_hypervisor_hooks = (. - hypervisor_setup_hooks) / 8
+	.previous
+
+#endif
+
 #ifdef CONFIG_SMP
 (isAP)	br.call.sptk.many rp=start_secondary
 .ret0:
-- 
cgit v1.2.3


From 1ff730b52f0c3e4e3846c3ff345c5526b2633ba9 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Mon, 19 May 2008 22:13:34 +0900
Subject: [IA64] pvops: introduce pv_cpu_ops to paravirtualize privileged
 instructions.

introduce pv_cpu_ops to paravirtualize privleged instructions
which are defined by ia64 intrinsics.
make them indirect C function calls by introducing function
tables, pv_cpu_ops.

Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/paravirt.c | 247 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 247 insertions(+)

(limited to 'arch')

diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
index d295ea5e59c..e5482bb6841 100644
--- a/arch/ia64/kernel/paravirt.c
+++ b/arch/ia64/kernel/paravirt.c
@@ -26,6 +26,7 @@
 #include <linux/compiler.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/module.h>
 #include <linux/types.h>
 
 #include <asm/iosapic.h>
@@ -39,3 +40,249 @@ struct pv_info pv_info = {
 	.paravirt_enabled = 0,
 	.name = "bare hardware"
 };
+
+/***************************************************************************
+ * pv_cpu_ops
+ * intrinsics hooks.
+ */
+
+/* ia64_native_xxx are macros so that we have to make them real functions */
+
+#define DEFINE_VOID_FUNC1(name)					\
+	static void						\
+	ia64_native_ ## name ## _func(unsigned long arg)	\
+	{							\
+		ia64_native_ ## name(arg);			\
+	}							\
+
+#define DEFINE_VOID_FUNC2(name)					\
+	static void						\
+	ia64_native_ ## name ## _func(unsigned long arg0,	\
+				      unsigned long arg1)	\
+	{							\
+		ia64_native_ ## name(arg0, arg1);		\
+	}							\
+
+#define DEFINE_FUNC0(name)			\
+	static unsigned long			\
+	ia64_native_ ## name ## _func(void)	\
+	{					\
+		return ia64_native_ ## name();	\
+	}
+
+#define DEFINE_FUNC1(name, type)			\
+	static unsigned long				\
+	ia64_native_ ## name ## _func(type arg)		\
+	{						\
+		return ia64_native_ ## name(arg);	\
+	}						\
+
+DEFINE_VOID_FUNC1(fc);
+DEFINE_VOID_FUNC1(intrin_local_irq_restore);
+
+DEFINE_VOID_FUNC2(ptcga);
+DEFINE_VOID_FUNC2(set_rr);
+
+DEFINE_FUNC0(get_psr_i);
+
+DEFINE_FUNC1(thash, unsigned long);
+DEFINE_FUNC1(get_cpuid, int);
+DEFINE_FUNC1(get_pmd, int);
+DEFINE_FUNC1(get_rr, unsigned long);
+
+static void
+ia64_native_ssm_i_func(void)
+{
+	ia64_native_ssm(IA64_PSR_I);
+}
+
+static void
+ia64_native_rsm_i_func(void)
+{
+	ia64_native_rsm(IA64_PSR_I);
+}
+
+static void
+ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1,
+				unsigned long val2, unsigned long val3,
+				unsigned long val4)
+{
+	ia64_native_set_rr0_to_rr4(val0, val1, val2, val3, val4);
+}
+
+#define CASE_GET_REG(id)				\
+	case _IA64_REG_ ## id:				\
+	res = ia64_native_getreg(_IA64_REG_ ## id);	\
+	break;
+#define CASE_GET_AR(id) CASE_GET_REG(AR_ ## id)
+#define CASE_GET_CR(id) CASE_GET_REG(CR_ ## id)
+
+unsigned long
+ia64_native_getreg_func(int regnum)
+{
+	unsigned long res = -1;
+	switch (regnum) {
+	CASE_GET_REG(GP);
+	CASE_GET_REG(IP);
+	CASE_GET_REG(PSR);
+	CASE_GET_REG(TP);
+	CASE_GET_REG(SP);
+
+	CASE_GET_AR(KR0);
+	CASE_GET_AR(KR1);
+	CASE_GET_AR(KR2);
+	CASE_GET_AR(KR3);
+	CASE_GET_AR(KR4);
+	CASE_GET_AR(KR5);
+	CASE_GET_AR(KR6);
+	CASE_GET_AR(KR7);
+	CASE_GET_AR(RSC);
+	CASE_GET_AR(BSP);
+	CASE_GET_AR(BSPSTORE);
+	CASE_GET_AR(RNAT);
+	CASE_GET_AR(FCR);
+	CASE_GET_AR(EFLAG);
+	CASE_GET_AR(CSD);
+	CASE_GET_AR(SSD);
+	CASE_GET_AR(CFLAG);
+	CASE_GET_AR(FSR);
+	CASE_GET_AR(FIR);
+	CASE_GET_AR(FDR);
+	CASE_GET_AR(CCV);
+	CASE_GET_AR(UNAT);
+	CASE_GET_AR(FPSR);
+	CASE_GET_AR(ITC);
+	CASE_GET_AR(PFS);
+	CASE_GET_AR(LC);
+	CASE_GET_AR(EC);
+
+	CASE_GET_CR(DCR);
+	CASE_GET_CR(ITM);
+	CASE_GET_CR(IVA);
+	CASE_GET_CR(PTA);
+	CASE_GET_CR(IPSR);
+	CASE_GET_CR(ISR);
+	CASE_GET_CR(IIP);
+	CASE_GET_CR(IFA);
+	CASE_GET_CR(ITIR);
+	CASE_GET_CR(IIPA);
+	CASE_GET_CR(IFS);
+	CASE_GET_CR(IIM);
+	CASE_GET_CR(IHA);
+	CASE_GET_CR(LID);
+	CASE_GET_CR(IVR);
+	CASE_GET_CR(TPR);
+	CASE_GET_CR(EOI);
+	CASE_GET_CR(IRR0);
+	CASE_GET_CR(IRR1);
+	CASE_GET_CR(IRR2);
+	CASE_GET_CR(IRR3);
+	CASE_GET_CR(ITV);
+	CASE_GET_CR(PMV);
+	CASE_GET_CR(CMCV);
+	CASE_GET_CR(LRR0);
+	CASE_GET_CR(LRR1);
+
+	default:
+		printk(KERN_CRIT "wrong_getreg %d\n", regnum);
+		break;
+	}
+	return res;
+}
+
+#define CASE_SET_REG(id)				\
+	case _IA64_REG_ ## id:				\
+	ia64_native_setreg(_IA64_REG_ ## id, val);	\
+	break;
+#define CASE_SET_AR(id) CASE_SET_REG(AR_ ## id)
+#define CASE_SET_CR(id) CASE_SET_REG(CR_ ## id)
+
+void
+ia64_native_setreg_func(int regnum, unsigned long val)
+{
+	switch (regnum) {
+	case _IA64_REG_PSR_L:
+		ia64_native_setreg(_IA64_REG_PSR_L, val);
+		ia64_dv_serialize_data();
+		break;
+	CASE_SET_REG(SP);
+	CASE_SET_REG(GP);
+
+	CASE_SET_AR(KR0);
+	CASE_SET_AR(KR1);
+	CASE_SET_AR(KR2);
+	CASE_SET_AR(KR3);
+	CASE_SET_AR(KR4);
+	CASE_SET_AR(KR5);
+	CASE_SET_AR(KR6);
+	CASE_SET_AR(KR7);
+	CASE_SET_AR(RSC);
+	CASE_SET_AR(BSP);
+	CASE_SET_AR(BSPSTORE);
+	CASE_SET_AR(RNAT);
+	CASE_SET_AR(FCR);
+	CASE_SET_AR(EFLAG);
+	CASE_SET_AR(CSD);
+	CASE_SET_AR(SSD);
+	CASE_SET_AR(CFLAG);
+	CASE_SET_AR(FSR);
+	CASE_SET_AR(FIR);
+	CASE_SET_AR(FDR);
+	CASE_SET_AR(CCV);
+	CASE_SET_AR(UNAT);
+	CASE_SET_AR(FPSR);
+	CASE_SET_AR(ITC);
+	CASE_SET_AR(PFS);
+	CASE_SET_AR(LC);
+	CASE_SET_AR(EC);
+
+	CASE_SET_CR(DCR);
+	CASE_SET_CR(ITM);
+	CASE_SET_CR(IVA);
+	CASE_SET_CR(PTA);
+	CASE_SET_CR(IPSR);
+	CASE_SET_CR(ISR);
+	CASE_SET_CR(IIP);
+	CASE_SET_CR(IFA);
+	CASE_SET_CR(ITIR);
+	CASE_SET_CR(IIPA);
+	CASE_SET_CR(IFS);
+	CASE_SET_CR(IIM);
+	CASE_SET_CR(IHA);
+	CASE_SET_CR(LID);
+	CASE_SET_CR(IVR);
+	CASE_SET_CR(TPR);
+	CASE_SET_CR(EOI);
+	CASE_SET_CR(IRR0);
+	CASE_SET_CR(IRR1);
+	CASE_SET_CR(IRR2);
+	CASE_SET_CR(IRR3);
+	CASE_SET_CR(ITV);
+	CASE_SET_CR(PMV);
+	CASE_SET_CR(CMCV);
+	CASE_SET_CR(LRR0);
+	CASE_SET_CR(LRR1);
+	default:
+		printk(KERN_CRIT "wrong setreg %d\n", regnum);
+		break;
+	}
+}
+
+struct pv_cpu_ops pv_cpu_ops = {
+	.fc		= ia64_native_fc_func,
+	.thash		= ia64_native_thash_func,
+	.get_cpuid	= ia64_native_get_cpuid_func,
+	.get_pmd	= ia64_native_get_pmd_func,
+	.ptcga		= ia64_native_ptcga_func,
+	.get_rr		= ia64_native_get_rr_func,
+	.set_rr		= ia64_native_set_rr_func,
+	.set_rr0_to_rr4	= ia64_native_set_rr0_to_rr4_func,
+	.ssm_i		= ia64_native_ssm_i_func,
+	.getreg		= ia64_native_getreg_func,
+	.setreg		= ia64_native_setreg_func,
+	.rsm_i		= ia64_native_rsm_i_func,
+	.get_psr_i	= ia64_native_get_psr_i_func,
+	.intrin_local_irq_restore
+			= ia64_native_intrin_local_irq_restore_func,
+};
+EXPORT_SYMBOL(pv_cpu_ops);
-- 
cgit v1.2.3


From 1e39d80a5957eab9dfdd7490d5c5cee272c34aa7 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Mon, 19 May 2008 22:13:35 +0900
Subject: [IA64] pvops: preparation for paravirtulization of hand written
 assembly code.

Preparation for paravirtualization of hand written assembly code.
They are paravirtualized by single source code and compiled multi times.
To tell those files for target (including native), add one defines.

Cc: "Dong, Eddie" <eddie.dong@intel.com>
Cc: Keith Owens <kaos@ocs.com.au>
Cc: tgingold@free.fr
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/Makefile | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 10a4ddb5b27..8b2524293eb 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -72,3 +72,12 @@ $(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
 # We must build gate.so before we can assemble it.
 # Note: kbuild does not track this dependency due to usage of .incbin
 $(obj)/gate-data.o: $(obj)/gate.so
+
+#
+# native ivt.S and entry.S
+#
+ASM_PARAVIRT_OBJS = ivt.o entry.o
+define paravirtualized_native
+AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE
+endef
+$(foreach obj,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_native,$(obj))))
-- 
cgit v1.2.3


From 02e32e36f42f8ea7ee6060d02f2d69ad5bad6d50 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Mon, 19 May 2008 22:13:37 +0900
Subject: [IA64] pvops: paravirtualize minstate.h.

paravirtualize minstate.h which are hand written assembly code.
They include sensitive or performance critical privileged
instructions. So that they are appropriate for paravirtualization.

Cc: Keith Owens <kaos@ocs.com.au>
Cc: Akio Takebe <takebe_akio@jp.fujitsu.com>
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/minstate.h      | 13 +++++++------
 arch/ia64/kernel/paravirt_inst.h | 29 +++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 6 deletions(-)
 create mode 100644 arch/ia64/kernel/paravirt_inst.h

(limited to 'arch')

diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index 74b6d670aae..292e214a3b8 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -2,6 +2,7 @@
 #include <asm/cache.h>
 
 #include "entry.h"
+#include "paravirt_inst.h"
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 /* read ar.itc in advance, and use it before leaving bank 0 */
@@ -43,16 +44,16 @@
  * Note that psr.ic is NOT turned on by this macro.  This is so that
  * we can pass interruption state as arguments to a handler.
  */
-#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA,WORKAROUND)						\
+#define IA64_NATIVE_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND)				\
 	mov r16=IA64_KR(CURRENT);	/* M */							\
 	mov r27=ar.rsc;			/* M */							\
 	mov r20=r1;			/* A */							\
 	mov r25=ar.unat;		/* M */							\
-	mov r29=cr.ipsr;		/* M */							\
+	MOV_FROM_IPSR(p0,r29);		/* M */							\
 	mov r26=ar.pfs;			/* I */							\
-	mov r28=cr.iip;			/* M */							\
+	MOV_FROM_IIP(r28);			/* M */						\
 	mov r21=ar.fpsr;		/* M */							\
-	COVER;				/* B;; (or nothing) */					\
+	__COVER;				/* B;; (or nothing) */				\
 	;;											\
 	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16;						\
 	;;											\
@@ -244,6 +245,6 @@
 1:						\
 	.pred.rel "mutex", pKStk, pUStk
 
-#define SAVE_MIN_WITH_COVER	DO_SAVE_MIN(cover, mov r30=cr.ifs, , RSE_WORKAROUND)
-#define SAVE_MIN_WITH_COVER_R19	DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND)
+#define SAVE_MIN_WITH_COVER	DO_SAVE_MIN(COVER, mov r30=cr.ifs, , RSE_WORKAROUND)
+#define SAVE_MIN_WITH_COVER_R19	DO_SAVE_MIN(COVER, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND)
 #define SAVE_MIN			DO_SAVE_MIN(     , mov r30=r0, , )
diff --git a/arch/ia64/kernel/paravirt_inst.h b/arch/ia64/kernel/paravirt_inst.h
new file mode 100644
index 00000000000..5cad6fb2ed1
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_inst.h
@@ -0,0 +1,29 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_inst.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifdef __IA64_ASM_PARAVIRTUALIZED_XEN
+#include <asm/xen/inst.h>
+#include <asm/xen/minstate.h>
+#else
+#include <asm/native/inst.h>
+#endif
+
-- 
cgit v1.2.3


From 498c5170472ff0c03a29d22dbd33225a0be038f4 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Mon, 19 May 2008 22:13:38 +0900
Subject: [IA64] pvops: paravirtualize ivt.S

paravirtualize ivt.S which implements fault handler in hand written
assembly code.
They includes sensitive or performance critical privileged instructions.
So they need paravirtualization.

Cc: Keith Owens <kaos@ocs.com.au>
Cc: tgingold@free.fr
Cc: Akio Takebe <takebe_akio@jp.fujitsu.com>
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/ivt.S | 249 ++++++++++++++++++++++++-------------------------
 1 file changed, 122 insertions(+), 127 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 80b44ea052d..23749ed3cf0 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -12,6 +12,14 @@
  *
  * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP
  * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Dan Magenheimer <dan.magenheimer@hp.com>
+ *      Xen paravirtualization
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *                    pv_ops.
+ *      Yaozu (Eddie) Dong <eddie.dong@intel.com>
  */
 /*
  * This file defines the interruption vector table used by the CPU.
@@ -102,13 +110,13 @@ ENTRY(vhpt_miss)
 	 *	- the faulting virtual address uses unimplemented address bits
 	 *	- the faulting virtual address has no valid page table mapping
 	 */
-	mov r16=cr.ifa				// get address that caused the TLB miss
+	MOV_FROM_IFA(r16)			// get address that caused the TLB miss
 #ifdef CONFIG_HUGETLB_PAGE
 	movl r18=PAGE_SHIFT
-	mov r25=cr.itir
+	MOV_FROM_ITIR(r25)
 #endif
 	;;
-	rsm psr.dt				// use physical addressing for data
+	RSM_PSR_DT				// use physical addressing for data
 	mov r31=pr				// save the predicate registers
 	mov r19=IA64_KR(PT_BASE)		// get page table base address
 	shl r21=r16,3				// shift bit 60 into sign bit
@@ -168,21 +176,21 @@ ENTRY(vhpt_miss)
 	dep r21=r19,r20,3,(PAGE_SHIFT-3)	// r21=pte_offset(pmd,addr)
 	;;
 (p7)	ld8 r18=[r21]				// read *pte
-	mov r19=cr.isr				// cr.isr bit 32 tells us if this is an insn miss
+	MOV_FROM_ISR(r19)			// cr.isr bit 32 tells us if this is an insn miss
 	;;
 (p7)	tbit.z p6,p7=r18,_PAGE_P_BIT		// page present bit cleared?
-	mov r22=cr.iha				// get the VHPT address that caused the TLB miss
+	MOV_FROM_IHA(r22)			// get the VHPT address that caused the TLB miss
 	;;					// avoid RAW on p7
 (p7)	tbit.nz.unc p10,p11=r19,32		// is it an instruction TLB miss?
 	dep r23=0,r20,0,PAGE_SHIFT		// clear low bits to get page address
 	;;
-(p10)	itc.i r18				// insert the instruction TLB entry
-(p11)	itc.d r18				// insert the data TLB entry
+	ITC_I_AND_D(p10, p11, r18, r24)		// insert the instruction TLB entry and
+						// insert the data TLB entry
 (p6)	br.cond.spnt.many page_fault		// handle bad address/page not present (page fault)
-	mov cr.ifa=r22
+	MOV_TO_IFA(r22, r24)
 
 #ifdef CONFIG_HUGETLB_PAGE
-(p8)	mov cr.itir=r25				// change to default page-size for VHPT
+	MOV_TO_ITIR(p8, r25, r24)		// change to default page-size for VHPT
 #endif
 
 	/*
@@ -192,7 +200,7 @@ ENTRY(vhpt_miss)
 	 */
 	adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23
 	;;
-(p7)	itc.d r24
+	ITC_D(p7, r24, r25)
 	;;
 #ifdef CONFIG_SMP
 	/*
@@ -234,7 +242,7 @@ ENTRY(vhpt_miss)
 #endif
 
 	mov pr=r31,-1				// restore predicate registers
-	rfi
+	RFI
 END(vhpt_miss)
 
 	.org ia64_ivt+0x400
@@ -248,11 +256,11 @@ ENTRY(itlb_miss)
 	 * mode, walk the page table, and then re-execute the PTE read and
 	 * go on normally after that.
 	 */
-	mov r16=cr.ifa				// get virtual address
+	MOV_FROM_IFA(r16)			// get virtual address
 	mov r29=b0				// save b0
 	mov r31=pr				// save predicates
 .itlb_fault:
-	mov r17=cr.iha				// get virtual address of PTE
+	MOV_FROM_IHA(r17)			// get virtual address of PTE
 	movl r30=1f				// load nested fault continuation point
 	;;
 1:	ld8 r18=[r17]				// read *pte
@@ -261,7 +269,7 @@ ENTRY(itlb_miss)
 	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
 (p6)	br.cond.spnt page_fault
 	;;
-	itc.i r18
+	ITC_I(p0, r18, r19)
 	;;
 #ifdef CONFIG_SMP
 	/*
@@ -278,7 +286,7 @@ ENTRY(itlb_miss)
 (p7)	ptc.l r16,r20
 #endif
 	mov pr=r31,-1
-	rfi
+	RFI
 END(itlb_miss)
 
 	.org ia64_ivt+0x0800
@@ -292,11 +300,11 @@ ENTRY(dtlb_miss)
 	 * mode, walk the page table, and then re-execute the PTE read and
 	 * go on normally after that.
 	 */
-	mov r16=cr.ifa				// get virtual address
+	MOV_FROM_IFA(r16)			// get virtual address
 	mov r29=b0				// save b0
 	mov r31=pr				// save predicates
 dtlb_fault:
-	mov r17=cr.iha				// get virtual address of PTE
+	MOV_FROM_IHA(r17)			// get virtual address of PTE
 	movl r30=1f				// load nested fault continuation point
 	;;
 1:	ld8 r18=[r17]				// read *pte
@@ -305,7 +313,7 @@ dtlb_fault:
 	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
 (p6)	br.cond.spnt page_fault
 	;;
-	itc.d r18
+	ITC_D(p0, r18, r19)
 	;;
 #ifdef CONFIG_SMP
 	/*
@@ -322,7 +330,7 @@ dtlb_fault:
 (p7)	ptc.l r16,r20
 #endif
 	mov pr=r31,-1
-	rfi
+	RFI
 END(dtlb_miss)
 
 	.org ia64_ivt+0x0c00
@@ -330,9 +338,9 @@ END(dtlb_miss)
 // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
 ENTRY(alt_itlb_miss)
 	DBG_FAULT(3)
-	mov r16=cr.ifa		// get address that caused the TLB miss
+	MOV_FROM_IFA(r16)	// get address that caused the TLB miss
 	movl r17=PAGE_KERNEL
-	mov r21=cr.ipsr
+	MOV_FROM_IPSR(p0, r21)
 	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
 	mov r31=pr
 	;;
@@ -341,9 +349,9 @@ ENTRY(alt_itlb_miss)
 	;;
 	cmp.gt p8,p0=6,r22			// user mode
 	;;
-(p8)	thash r17=r16
+	THASH(p8, r17, r16, r23)
 	;;
-(p8)	mov cr.iha=r17
+	MOV_TO_IHA(p8, r17, r23)
 (p8)	mov r29=b0				// save b0
 (p8)	br.cond.dptk .itlb_fault
 #endif
@@ -358,9 +366,9 @@ ENTRY(alt_itlb_miss)
 	or r19=r19,r18		// set bit 4 (uncached) if the access was to region 6
 (p8)	br.cond.spnt page_fault
 	;;
-	itc.i r19		// insert the TLB entry
+	ITC_I(p0, r19, r18)	// insert the TLB entry
 	mov pr=r31,-1
-	rfi
+	RFI
 END(alt_itlb_miss)
 
 	.org ia64_ivt+0x1000
@@ -368,11 +376,11 @@ END(alt_itlb_miss)
 // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
 ENTRY(alt_dtlb_miss)
 	DBG_FAULT(4)
-	mov r16=cr.ifa		// get address that caused the TLB miss
+	MOV_FROM_IFA(r16)	// get address that caused the TLB miss
 	movl r17=PAGE_KERNEL
-	mov r20=cr.isr
+	MOV_FROM_ISR(r20)
 	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	mov r21=cr.ipsr
+	MOV_FROM_IPSR(p0, r21)
 	mov r31=pr
 	mov r24=PERCPU_ADDR
 	;;
@@ -381,9 +389,9 @@ ENTRY(alt_dtlb_miss)
 	;;
 	cmp.gt p8,p0=6,r22			// access to region 0-5
 	;;
-(p8)	thash r17=r16
+	THASH(p8, r17, r16, r25)
 	;;
-(p8)	mov cr.iha=r17
+	MOV_TO_IHA(p8, r17, r25)
 (p8)	mov r29=b0				// save b0
 (p8)	br.cond.dptk dtlb_fault
 #endif
@@ -402,7 +410,7 @@ ENTRY(alt_dtlb_miss)
 	tbit.nz p9,p0=r20,IA64_ISR_NA_BIT	// is non-access bit on?
 	;;
 (p10)	sub r19=r19,r26
-(p10)	mov cr.itir=r25
+	MOV_TO_ITIR(p10, r25, r24)
 	cmp.ne p8,p0=r0,r23
 (p9)	cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22	// check isr.code field
 (p12)	dep r17=-1,r17,4,1			// set ma=UC for region 6 addr
@@ -411,11 +419,11 @@ ENTRY(alt_dtlb_miss)
 	dep r21=-1,r21,IA64_PSR_ED_BIT,1
 	;;
 	or r19=r19,r17		// insert PTE control bits into r19
-(p6)	mov cr.ipsr=r21
+	MOV_TO_IPSR(p6, r21, r24)
 	;;
-(p7)	itc.d r19		// insert the TLB entry
+	ITC_D(p7, r19, r18)	// insert the TLB entry
 	mov pr=r31,-1
-	rfi
+	RFI
 END(alt_dtlb_miss)
 
 	.org ia64_ivt+0x1400
@@ -444,10 +452,10 @@ ENTRY(nested_dtlb_miss)
 	 *
 	 * Clobbered:	b0, r18, r19, r21, r22, psr.dt (cleared)
 	 */
-	rsm psr.dt				// switch to using physical data addressing
+	RSM_PSR_DT				// switch to using physical data addressing
 	mov r19=IA64_KR(PT_BASE)		// get the page table base address
 	shl r21=r16,3				// shift bit 60 into sign bit
-	mov r18=cr.itir
+	MOV_FROM_ITIR(r18)
 	;;
 	shr.u r17=r16,61			// get the region number into r17
 	extr.u r18=r18,2,6			// get the faulting page size
@@ -510,21 +518,15 @@ END(ikey_miss)
 	//-----------------------------------------------------------------------------------
 	// call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
 ENTRY(page_fault)
-	ssm psr.dt
-	;;
-	srlz.i
+	SSM_PSR_DT_AND_SRLZ_I
 	;;
 	SAVE_MIN_WITH_COVER
 	alloc r15=ar.pfs,0,0,3,0
-	mov out0=cr.ifa
-	mov out1=cr.isr
+	MOV_FROM_IFA(out0)
+	MOV_FROM_ISR(out1)
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3)
 	adds r3=8,r2				// set up second base pointer
-	;;
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i					// guarantee that interruption collectin is on
-	;;
-(p15)	ssm psr.i				// restore psr.i
+	SSM_PSR_I(p15, p15, r14)		// restore psr.i
 	movl r14=ia64_leave_kernel
 	;;
 	SAVE_REST
@@ -556,10 +558,10 @@ ENTRY(dirty_bit)
 	 * page table TLB entry isn't present, we take a nested TLB miss hit where we look
 	 * up the physical address of the L3 PTE and then continue at label 1 below.
 	 */
-	mov r16=cr.ifa				// get the address that caused the fault
+	MOV_FROM_IFA(r16)			// get the address that caused the fault
 	movl r30=1f				// load continuation point in case of nested fault
 	;;
-	thash r17=r16				// compute virtual address of L3 PTE
+	THASH(p0, r17, r16, r18)		// compute virtual address of L3 PTE
 	mov r29=b0				// save b0 in case of nested fault
 	mov r31=pr				// save pr
 #ifdef CONFIG_SMP
@@ -576,7 +578,7 @@ ENTRY(dirty_bit)
 	;;
 (p6)	cmp.eq p6,p7=r26,r18			// Only compare if page is present
 	;;
-(p6)	itc.d r25				// install updated PTE
+	ITC_D(p6, r25, r18)			// install updated PTE
 	;;
 	/*
 	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
@@ -602,7 +604,7 @@ ENTRY(dirty_bit)
 	itc.d r18				// install updated PTE
 #endif
 	mov pr=r31,-1				// restore pr
-	rfi
+	RFI
 END(dirty_bit)
 
 	.org ia64_ivt+0x2400
@@ -611,22 +613,22 @@ END(dirty_bit)
 ENTRY(iaccess_bit)
 	DBG_FAULT(9)
 	// Like Entry 8, except for instruction access
-	mov r16=cr.ifa				// get the address that caused the fault
+	MOV_FROM_IFA(r16)			// get the address that caused the fault
 	movl r30=1f				// load continuation point in case of nested fault
 	mov r31=pr				// save predicates
 #ifdef CONFIG_ITANIUM
 	/*
 	 * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
 	 */
-	mov r17=cr.ipsr
+	MOV_FROM_IPSR(p0, r17)
 	;;
-	mov r18=cr.iip
+	MOV_FROM_IIP(r18)
 	tbit.z p6,p0=r17,IA64_PSR_IS_BIT	// IA64 instruction set?
 	;;
 (p6)	mov r16=r18				// if so, use cr.iip instead of cr.ifa
 #endif /* CONFIG_ITANIUM */
 	;;
-	thash r17=r16				// compute virtual address of L3 PTE
+	THASH(p0, r17, r16, r18)		// compute virtual address of L3 PTE
 	mov r29=b0				// save b0 in case of nested fault)
 #ifdef CONFIG_SMP
 	mov r28=ar.ccv				// save ar.ccv
@@ -642,7 +644,7 @@ ENTRY(iaccess_bit)
 	;;
 (p6)	cmp.eq p6,p7=r26,r18			// Only if page present
 	;;
-(p6)	itc.i r25				// install updated PTE
+	ITC_I(p6, r25, r26)			// install updated PTE
 	;;
 	/*
 	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
@@ -668,7 +670,7 @@ ENTRY(iaccess_bit)
 	itc.i r18				// install updated PTE
 #endif /* !CONFIG_SMP */
 	mov pr=r31,-1
-	rfi
+	RFI
 END(iaccess_bit)
 
 	.org ia64_ivt+0x2800
@@ -677,10 +679,10 @@ END(iaccess_bit)
 ENTRY(daccess_bit)
 	DBG_FAULT(10)
 	// Like Entry 8, except for data access
-	mov r16=cr.ifa				// get the address that caused the fault
+	MOV_FROM_IFA(r16)			// get the address that caused the fault
 	movl r30=1f				// load continuation point in case of nested fault
 	;;
-	thash r17=r16				// compute virtual address of L3 PTE
+	THASH(p0, r17, r16, r18)		// compute virtual address of L3 PTE
 	mov r31=pr
 	mov r29=b0				// save b0 in case of nested fault)
 #ifdef CONFIG_SMP
@@ -697,7 +699,7 @@ ENTRY(daccess_bit)
 	;;
 (p6)	cmp.eq p6,p7=r26,r18			// Only if page is present
 	;;
-(p6)	itc.d r25				// install updated PTE
+	ITC_D(p6, r25, r26)			// install updated PTE
 	/*
 	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
 	 * cannot possibly affect the following loads:
@@ -721,7 +723,7 @@ ENTRY(daccess_bit)
 #endif
 	mov b0=r29				// restore b0
 	mov pr=r31,-1
-	rfi
+	RFI
 END(daccess_bit)
 
 	.org ia64_ivt+0x2c00
@@ -745,10 +747,10 @@ ENTRY(break_fault)
 	 */
 	DBG_FAULT(11)
 	mov.m r16=IA64_KR(CURRENT)		// M2 r16 <- current task (12 cyc)
-	mov r29=cr.ipsr				// M2 (12 cyc)
+	MOV_FROM_IPSR(p0, r29)			// M2 (12 cyc)
 	mov r31=pr				// I0 (2 cyc)
 
-	mov r17=cr.iim				// M2 (2 cyc)
+	MOV_FROM_IIM(r17)			// M2 (2 cyc)
 	mov.m r27=ar.rsc			// M2 (12 cyc)
 	mov r18=__IA64_BREAK_SYSCALL		// A
 
@@ -767,7 +769,7 @@ ENTRY(break_fault)
 	nop.m 0
 	movl r30=sys_call_table			// X
 
-	mov r28=cr.iip				// M2 (2 cyc)
+	MOV_FROM_IIP(r28)			// M2 (2 cyc)
 	cmp.eq p0,p7=r18,r17			// I0 is this a system call?
 (p7)	br.cond.spnt non_syscall		// B  no ->
 	//
@@ -864,18 +866,17 @@ ENTRY(break_fault)
 #endif
 	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
 	nop 0
-	bsw.1					// B (6 cyc) regs are saved, switch to bank 1
+	BSW_1(r2, r14)				// B (6 cyc) regs are saved, switch to bank 1
 	;;
 
-	ssm psr.ic | PSR_DEFAULT_BITS		// M2	now it's safe to re-enable intr.-collection
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r16)	// M2	now it's safe to re-enable intr.-collection
+						// M0   ensure interruption collection is on
 	movl r3=ia64_ret_from_syscall		// X
 	;;
-
-	srlz.i					// M0   ensure interruption collection is on
 	mov rp=r3				// I0   set the real return addr
 (p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
 
-(p15)	ssm psr.i				// M2   restore psr.i
+	SSM_PSR_I(p15, p15, r16)		// M2   restore psr.i
 (p14)	br.call.sptk.many b6=b6			// B    invoke syscall-handker (ignore return addr)
 	br.cond.spnt.many ia64_trace_syscall	// B	do syscall-tracing thingamagic
 	// NOT REACHED
@@ -899,16 +900,15 @@ ENTRY(interrupt)
 	mov r31=pr		// prepare to save predicates
 	;;
 	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14)
+				// ensure everybody knows psr.ic is back on
 	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	srlz.i			// ensure everybody knows psr.ic is back on
 	;;
 	SAVE_REST
 	;;
 	MCA_RECOVER_RANGE(interrupt)
 	alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
-	mov out0=cr.ivr		// pass cr.ivr as first arg
+	MOV_FROM_IVR(out0, r8)	// pass cr.ivr as first arg
 	add out1=16,sp		// pass pointer to pt_regs as second arg
 	;;
 	srlz.d			// make sure we see the effect of cr.ivr
@@ -978,6 +978,7 @@ END(interrupt)
 	 *	- ar.fpsr: set to kernel settings
 	 *	-  b6: preserved (same as on entry)
 	 */
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
 GLOBAL_ENTRY(ia64_syscall_setup)
 #if PT(B6) != 0
 # error This code assumes that b6 is the first field in pt_regs.
@@ -1069,6 +1070,7 @@ GLOBAL_ENTRY(ia64_syscall_setup)
 (p10)	mov r8=-EINVAL
 	br.ret.sptk.many b7
 END(ia64_syscall_setup)
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
 
 	.org ia64_ivt+0x3c00
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -1082,7 +1084,7 @@ END(ia64_syscall_setup)
 	DBG_FAULT(16)
 	FAULT(16)
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
 	/*
 	 * There is no particular reason for this code to be here, other than
 	 * that there happens to be space here that would go unused otherwise.
@@ -1092,7 +1094,7 @@ END(ia64_syscall_setup)
 	 * account_sys_enter is called from SAVE_MIN* macros if accounting is
 	 * enabled and if the macro is entered from user mode.
 	 */
-ENTRY(account_sys_enter)
+GLOBAL_ENTRY(account_sys_enter)
 	// mov.m r20=ar.itc is called in advance, and r13 is current
 	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13
 	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13
@@ -1134,15 +1136,13 @@ ENTRY(non_syscall)
 	// suitable spot...
 
 	alloc r14=ar.pfs,0,0,2,0
-	mov out0=cr.iim
+	MOV_FROM_IIM(out0)
 	add out1=16,sp
 	adds r3=8,r2			// set up second base pointer for SAVE_REST
 
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i				// guarantee that interruption collection is on
-	;;
-(p15)	ssm psr.i			// restore psr.i
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24)
+					// guarantee that interruption collection is on
+	SSM_PSR_I(p15, p15, r15)	// restore psr.i
 	movl r15=ia64_leave_kernel
 	;;
 	SAVE_REST
@@ -1168,14 +1168,12 @@ ENTRY(dispatch_unaligned_handler)
 	SAVE_MIN_WITH_COVER
 	;;
 	alloc r14=ar.pfs,0,0,2,0		// now it's safe (must be first in insn group!)
-	mov out0=cr.ifa
+	MOV_FROM_IFA(out0)
 	adds out1=16,sp
 
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i					// guarantee that interruption collection is on
-	;;
-(p15)	ssm psr.i				// restore psr.i
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24)
+						// guarantee that interruption collection is on
+	SSM_PSR_I(p15, p15, r3)			// restore psr.i
 	adds r3=8,r2				// set up second base pointer
 	;;
 	SAVE_REST
@@ -1207,17 +1205,16 @@ ENTRY(dispatch_to_fault_handler)
 	 */
 	SAVE_MIN_WITH_COVER_R19
 	alloc r14=ar.pfs,0,0,5,0
-	mov out0=r15
-	mov out1=cr.isr
-	mov out2=cr.ifa
-	mov out3=cr.iim
-	mov out4=cr.itir
+	MOV_FROM_ISR(out1)
+	MOV_FROM_IFA(out2)
+	MOV_FROM_IIM(out3)
+	MOV_FROM_ITIR(out4)
 	;;
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i					// guarantee that interruption collection is on
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0)
+						// guarantee that interruption collection is on
+	mov out0=r15
 	;;
-(p15)	ssm psr.i				// restore psr.i
+	SSM_PSR_I(p15, p15, r3)			// restore psr.i
 	adds r3=8,r2				// set up second base pointer for SAVE_REST
 	;;
 	SAVE_REST
@@ -1236,8 +1233,8 @@ END(dispatch_to_fault_handler)
 // 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
 ENTRY(page_not_present)
 	DBG_FAULT(20)
-	mov r16=cr.ifa
-	rsm psr.dt
+	MOV_FROM_IFA(r16)
+	RSM_PSR_DT
 	/*
 	 * The Linux page fault handler doesn't expect non-present pages to be in
 	 * the TLB.  Flush the existing entry now, so we meet that expectation.
@@ -1256,8 +1253,8 @@ END(page_not_present)
 // 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
 ENTRY(key_permission)
 	DBG_FAULT(21)
-	mov r16=cr.ifa
-	rsm psr.dt
+	MOV_FROM_IFA(r16)
+	RSM_PSR_DT
 	mov r31=pr
 	;;
 	srlz.d
@@ -1269,8 +1266,8 @@ END(key_permission)
 // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
 ENTRY(iaccess_rights)
 	DBG_FAULT(22)
-	mov r16=cr.ifa
-	rsm psr.dt
+	MOV_FROM_IFA(r16)
+	RSM_PSR_DT
 	mov r31=pr
 	;;
 	srlz.d
@@ -1282,8 +1279,8 @@ END(iaccess_rights)
 // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
 ENTRY(daccess_rights)
 	DBG_FAULT(23)
-	mov r16=cr.ifa
-	rsm psr.dt
+	MOV_FROM_IFA(r16)
+	RSM_PSR_DT
 	mov r31=pr
 	;;
 	srlz.d
@@ -1295,7 +1292,7 @@ END(daccess_rights)
 // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
 ENTRY(general_exception)
 	DBG_FAULT(24)
-	mov r16=cr.isr
+	MOV_FROM_ISR(r16)
 	mov r31=pr
 	;;
 	cmp4.eq p6,p0=0,r16
@@ -1324,8 +1321,8 @@ END(disabled_fp_reg)
 ENTRY(nat_consumption)
 	DBG_FAULT(26)
 
-	mov r16=cr.ipsr
-	mov r17=cr.isr
+	MOV_FROM_IPSR(p0, r16)
+	MOV_FROM_ISR(r17)
 	mov r31=pr				// save PR
 	;;
 	and r18=0xf,r17				// r18 = cr.ipsr.code{3:0}
@@ -1335,10 +1332,10 @@ ENTRY(nat_consumption)
 	dep r16=-1,r16,IA64_PSR_ED_BIT,1
 (p6)	br.cond.spnt 1f		// branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH)
 	;;
-	mov cr.ipsr=r16		// set cr.ipsr.na
+	MOV_TO_IPSR(p0, r16, r18)
 	mov pr=r31,-1
 	;;
-	rfi
+	RFI
 
 1:	mov pr=r31,-1
 	;;
@@ -1360,26 +1357,26 @@ ENTRY(speculation_vector)
 	 *
 	 * cr.imm contains zero_ext(imm21)
 	 */
-	mov r18=cr.iim
+	MOV_FROM_IIM(r18)
 	;;
-	mov r17=cr.iip
+	MOV_FROM_IIP(r17)
 	shl r18=r18,43			// put sign bit in position (43=64-21)
 	;;
 
-	mov r16=cr.ipsr
+	MOV_FROM_IPSR(p0, r16)
 	shr r18=r18,39			// sign extend (39=43-4)
 	;;
 
 	add r17=r17,r18			// now add the offset
 	;;
-	mov cr.iip=r17
+	MOV_FROM_IIP(r17)
 	dep r16=0,r16,41,2		// clear EI
 	;;
 
-	mov cr.ipsr=r16
+	MOV_FROM_IPSR(p0, r16)
 	;;
 
-	rfi				// and go back
+	RFI
 END(speculation_vector)
 
 	.org ia64_ivt+0x5800
@@ -1517,11 +1514,11 @@ ENTRY(ia32_intercept)
 	DBG_FAULT(46)
 #ifdef	CONFIG_IA32_SUPPORT
 	mov r31=pr
-	mov r16=cr.isr
+	MOV_FROM_ISR(r16)
 	;;
 	extr.u r17=r16,16,8	// get ISR.code
 	mov r18=ar.eflag
-	mov r19=cr.iim		// old eflag value
+	MOV_FROM_IIM(r19)	// old eflag value
 	;;
 	cmp.ne p6,p0=2,r17
 (p6)	br.cond.spnt 1f		// not a system flag fault
@@ -1533,7 +1530,7 @@ ENTRY(ia32_intercept)
 (p6)	br.cond.spnt 1f		// eflags.ac bit didn't change
 	;;
 	mov pr=r31,-1		// restore predicate registers
-	rfi
+	RFI
 
 1:
 #endif	// CONFIG_IA32_SUPPORT
@@ -1686,11 +1683,10 @@ ENTRY(dispatch_illegal_op_fault)
 	.prologue
 	.body
 	SAVE_MIN_WITH_COVER
-	ssm psr.ic | PSR_DEFAULT_BITS
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24)
+				// guarantee that interruption collection is on
 	;;
-	srlz.i		// guarantee that interruption collection is on
-	;;
-(p15)	ssm psr.i	// restore psr.i
+	SSM_PSR_I(p15, p15, r3)	// restore psr.i
 	adds r3=8,r2	// set up second base pointer for SAVE_REST
 	;;
 	alloc r14=ar.pfs,0,0,1,0	// must be first in insn group
@@ -1729,12 +1725,11 @@ END(dispatch_illegal_op_fault)
 ENTRY(dispatch_to_ia32_handler)
 	SAVE_MIN
 	;;
-	mov r14=cr.isr
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i					// guarantee that interruption collection is on
+	MOV_FROM_ISR(r14)
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24)
+				// guarantee that interruption collection is on
 	;;
-(p15)	ssm psr.i
+	SSM_PSR_I(p15, p15, r3)
 	adds r3=8,r2		// Base pointer for SAVE_REST
 	;;
 	SAVE_REST
-- 
cgit v1.2.3


From 4df8d22bbbb16ccfa4e10cc068135183c9e5e006 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Tue, 27 May 2008 15:08:01 -0700
Subject: [IA64] pvops: paravirtualize entry.S

paravirtualize ia64_swtich_to, ia64_leave_syscall and ia64_leave_kernel.
They include sensitive or performance critical privileged instructions
so that they need paravirtualization.
To paravirtualize them by single source and multi compile
they are converted into indirect jump. And define each pv instances.

Cc: Keith Owens <kaos@ocs.com.au>
Cc: "Dong, Eddie" <eddie.dong@intel.com>
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/Makefile        |   2 +-
 arch/ia64/kernel/entry.S         | 115 ++++++++++++++++++++++++---------------
 arch/ia64/kernel/paravirt.c      |  19 +++++++
 arch/ia64/kernel/paravirtentry.S |  60 ++++++++++++++++++++
 4 files changed, 152 insertions(+), 44 deletions(-)
 create mode 100644 arch/ia64/kernel/paravirtentry.S

(limited to 'arch')

diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 8b2524293eb..cea91f17d44 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -36,7 +36,7 @@ obj-$(CONFIG_PCI_MSI)		+= msi_ia64.o
 mca_recovery-y			+= mca_drv.o mca_drv_asm.o
 obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
 
-obj-$(CONFIG_PARAVIRT)		+= paravirt.o
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirtentry.o
 
 obj-$(CONFIG_IA64_ESI)		+= esi.o
 ifneq ($(CONFIG_IA64_ESI),)
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index ca2bb95726d..56ab156c48a 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -22,6 +22,11 @@
  * Patrick O'Rourke	<orourke@missioncriticallinux.com>
  * 11/07/2000
  */
+/*
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *                    pv_ops.
+ */
 /*
  * Global (preserved) predicate usage on syscall entry/exit path:
  *
@@ -45,6 +50,7 @@
 
 #include "minstate.h"
 
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
 	/*
 	 * execve() is special because in case of success, we need to
 	 * setup a null register window frame.
@@ -173,6 +179,7 @@ GLOBAL_ENTRY(sys_clone)
 	mov rp=loc0
 	br.ret.sptk.many rp
 END(sys_clone)
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
 
 /*
  * prev_task <- ia64_switch_to(struct task_struct *next)
@@ -180,7 +187,7 @@ END(sys_clone)
  *	called.  The code starting at .map relies on this.  The rest of the code
  *	doesn't care about the interrupt masking status.
  */
-GLOBAL_ENTRY(ia64_switch_to)
+GLOBAL_ENTRY(__paravirt_switch_to)
 	.prologue
 	alloc r16=ar.pfs,1,0,0,0
 	DO_SAVE_SWITCH_STACK
@@ -204,7 +211,7 @@ GLOBAL_ENTRY(ia64_switch_to)
 	;;
 .done:
 	ld8 sp=[r21]			// load kernel stack pointer of new task
-	mov IA64_KR(CURRENT)=in0	// update "current" application register
+	MOV_TO_KR(CURRENT, in0, r8, r9)		// update "current" application register
 	mov r8=r13			// return pointer to previously running task
 	mov r13=in0			// set "current" pointer
 	;;
@@ -216,26 +223,25 @@ GLOBAL_ENTRY(ia64_switch_to)
 	br.ret.sptk.many rp		// boogie on out in new context
 
 .map:
-	rsm psr.ic			// interrupts (psr.i) are already disabled here
+	RSM_PSR_IC(r25)			// interrupts (psr.i) are already disabled here
 	movl r25=PAGE_KERNEL
 	;;
 	srlz.d
 	or r23=r25,r20			// construct PA | page properties
 	mov r25=IA64_GRANULE_SHIFT<<2
 	;;
-	mov cr.itir=r25
-	mov cr.ifa=in0			// VA of next task...
+	MOV_TO_ITIR(p0, r25, r8)
+	MOV_TO_IFA(in0, r8)		// VA of next task...
 	;;
 	mov r25=IA64_TR_CURRENT_STACK
-	mov IA64_KR(CURRENT_STACK)=r26	// remember last page we mapped...
+	MOV_TO_KR(CURRENT_STACK, r26, r8, r9)	// remember last page we mapped...
 	;;
 	itr.d dtr[r25]=r23		// wire in new mapping...
-	ssm psr.ic			// reenable the psr.ic bit
-	;;
-	srlz.d
+	SSM_PSR_IC_AND_SRLZ_D(r8, r9)	// reenable the psr.ic bit
 	br.cond.sptk .done
-END(ia64_switch_to)
+END(__paravirt_switch_to)
 
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
 /*
  * Note that interrupts are enabled during save_switch_stack and load_switch_stack.  This
  * means that we may get an interrupt with "sp" pointing to the new kernel stack while
@@ -375,7 +381,7 @@ END(save_switch_stack)
  *	- b7 holds address to return to
  *	- must not touch r8-r11
  */
-ENTRY(load_switch_stack)
+GLOBAL_ENTRY(load_switch_stack)
 	.prologue
 	.altrp b7
 
@@ -571,7 +577,7 @@ GLOBAL_ENTRY(ia64_trace_syscall)
 .ret3:
 (pUStk)	cmp.eq.unc p6,p0=r0,r0			// p6 <- pUStk
 (pUStk)	rsm psr.i				// disable interrupts
-	br.cond.sptk .work_pending_syscall_end
+	br.cond.sptk ia64_work_pending_syscall_end
 
 strace_error:
 	ld8 r3=[r2]				// load pt_regs.r8
@@ -636,8 +642,17 @@ GLOBAL_ENTRY(ia64_ret_from_syscall)
 	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
 	mov r10=r0				// clear error indication in r10
 (p7)	br.cond.spnt handle_syscall_error	// handle potential syscall failure
+#ifdef CONFIG_PARAVIRT
+	;;
+	br.cond.sptk.few ia64_leave_syscall
+	;;
+#endif /* CONFIG_PARAVIRT */
 END(ia64_ret_from_syscall)
+#ifndef CONFIG_PARAVIRT
 	// fall through
+#endif
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
+
 /*
  * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
  *	need to switch to bank 0 and doesn't restore the scratch registers.
@@ -682,7 +697,7 @@ END(ia64_ret_from_syscall)
  *	      ar.csd: cleared
  *	      ar.ssd: cleared
  */
-ENTRY(ia64_leave_syscall)
+GLOBAL_ENTRY(__paravirt_leave_syscall)
 	PT_REGS_UNWIND_INFO(0)
 	/*
 	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
@@ -692,11 +707,11 @@ ENTRY(ia64_leave_syscall)
 	 * extra work.  We always check for extra work when returning to user-level.
 	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
 	 * is 0.  After extra work processing has been completed, execution
-	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
+	 * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check
 	 * needs to be redone.
 	 */
 #ifdef CONFIG_PREEMPT
-	rsm psr.i				// disable interrupts
+	RSM_PSR_I(p0, r2, r18)			// disable interrupts
 	cmp.eq pLvSys,p0=r0,r0			// pLvSys=1: leave from syscall
 (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
 	;;
@@ -706,11 +721,12 @@ ENTRY(ia64_leave_syscall)
 	;;
 	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
 #else /* !CONFIG_PREEMPT */
-(pUStk)	rsm psr.i
+	RSM_PSR_I(pUStk, r2, r18)
 	cmp.eq pLvSys,p0=r0,r0		// pLvSys=1: leave from syscall
 (pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
 #endif
-.work_processed_syscall:
+.global __paravirt_work_processed_syscall;
+__paravirt_work_processed_syscall:
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	adds r2=PT(LOADRS)+16,r12
 (pUStk)	mov.m r22=ar.itc			// fetch time at leave
@@ -744,7 +760,7 @@ ENTRY(ia64_leave_syscall)
 (pNonSys) break 0		//      bug check: we shouldn't be here if pNonSys is TRUE!
 	;;
 	invala			// M0|1 invalidate ALAT
-	rsm psr.i | psr.ic	// M2   turn off interrupts and interruption collection
+	RSM_PSR_I_IC(r28, r29, r30)	// M2   turn off interrupts and interruption collection
 	cmp.eq p9,p0=r0,r0	// A    set p9 to indicate that we should restore cr.ifs
 
 	ld8 r29=[r2],16		// M0|1 load cr.ipsr
@@ -765,7 +781,7 @@ ENTRY(ia64_leave_syscall)
 	;;
 #endif
 	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
-(pKStk)	mov r22=psr			// M2   read PSR now that interrupts are disabled
+	MOV_FROM_PSR(pKStk, r22, r21)	// M2   read PSR now that interrupts are disabled
 	nop 0
 	;;
 	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
@@ -798,7 +814,7 @@ ENTRY(ia64_leave_syscall)
 
 	srlz.d				// M0   ensure interruption collection is off (for cover)
 	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
-	cover				// B    add current frame into dirty partition & set cr.ifs
+	COVER				// B    add current frame into dirty partition & set cr.ifs
 	;;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	mov r19=ar.bsp			// M2   get new backing store pointer
@@ -823,8 +839,9 @@ ENTRY(ia64_leave_syscall)
 	mov.m ar.ssd=r0			// M2   clear ar.ssd
 	mov f11=f0			// F    clear f11
 	br.cond.sptk.many rbs_switch	// B
-END(ia64_leave_syscall)
+END(__paravirt_leave_syscall)
 
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
 #ifdef CONFIG_IA32_SUPPORT
 GLOBAL_ENTRY(ia64_ret_from_ia32_execve)
 	PT_REGS_UNWIND_INFO(0)
@@ -835,10 +852,20 @@ GLOBAL_ENTRY(ia64_ret_from_ia32_execve)
 	st8.spill [r2]=r8	// store return value in slot for r8 and set unat bit
 	.mem.offset 8,0
 	st8.spill [r3]=r0	// clear error indication in slot for r10 and set unat bit
+#ifdef CONFIG_PARAVIRT
+	;;
+	// don't fall through, ia64_leave_kernel may be #define'd
+	br.cond.sptk.few ia64_leave_kernel
+	;;
+#endif /* CONFIG_PARAVIRT */
 END(ia64_ret_from_ia32_execve)
+#ifndef CONFIG_PARAVIRT
 	// fall through
+#endif
 #endif /* CONFIG_IA32_SUPPORT */
-GLOBAL_ENTRY(ia64_leave_kernel)
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
+
+GLOBAL_ENTRY(__paravirt_leave_kernel)
 	PT_REGS_UNWIND_INFO(0)
 	/*
 	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
@@ -852,7 +879,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	 * needs to be redone.
 	 */
 #ifdef CONFIG_PREEMPT
-	rsm psr.i				// disable interrupts
+	RSM_PSR_I(p0, r17, r31)			// disable interrupts
 	cmp.eq p0,pLvSys=r0,r0			// pLvSys=0: leave from kernel
 (pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
 	;;
@@ -862,7 +889,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	;;
 	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
 #else
-(pUStk)	rsm psr.i
+	RSM_PSR_I(pUStk, r17, r31)
 	cmp.eq p0,pLvSys=r0,r0		// pLvSys=0: leave from kernel
 (pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
 #endif
@@ -910,7 +937,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	mov ar.csd=r30
 	mov ar.ssd=r31
 	;;
-	rsm psr.i | psr.ic	// initiate turning off of interrupt and interruption collection
+	RSM_PSR_I_IC(r23, r22, r25)	// initiate turning off of interrupt and interruption collection
 	invala			// invalidate ALAT
 	;;
 	ld8.fill r22=[r2],24
@@ -942,7 +969,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	mov ar.ccv=r15
 	;;
 	ldf.fill f11=[r2]
-	bsw.0			// switch back to bank 0 (no stop bit required beforehand...)
+	BSW_0(r2, r3, r15)	// switch back to bank 0 (no stop bit required beforehand...)
 	;;
 (pUStk)	mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
 	adds r16=PT(CR_IPSR)+16,r12
@@ -950,12 +977,12 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	.pred.rel.mutex pUStk,pKStk
-(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
+	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
 (pUStk)	mov.m r22=ar.itc	// M  fetch time at leave
 	nop.i 0
 	;;
 #else
-(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
+	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
 	nop.i 0
 	nop.i 0
 	;;
@@ -1027,7 +1054,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	 * NOTE: alloc, loadrs, and cover can't be predicated.
 	 */
 (pNonSys) br.cond.dpnt dont_preserve_current_frame
-	cover				// add current frame into dirty partition and set cr.ifs
+	COVER				// add current frame into dirty partition and set cr.ifs
 	;;
 	mov r19=ar.bsp			// get new backing store pointer
 rbs_switch:
@@ -1130,16 +1157,16 @@ skip_rbs_switch:
 (pKStk)	dep r29=r22,r29,21,1	// I0 update ipsr.pp with psr.pp
 (pLvSys)mov r16=r0		// A  clear r16 for leave_syscall, no-op otherwise
 	;;
-	mov cr.ipsr=r29		// M2
+	MOV_TO_IPSR(p0, r29, r25)	// M2
 	mov ar.pfs=r26		// I0
 (pLvSys)mov r17=r0		// A  clear r17 for leave_syscall, no-op otherwise
 
-(p9)	mov cr.ifs=r30		// M2
+	MOV_TO_IFS(p9, r30, r25)// M2
 	mov b0=r21		// I0
 (pLvSys)mov r18=r0		// A  clear r18 for leave_syscall, no-op otherwise
 
 	mov ar.fpsr=r20		// M2
-	mov cr.iip=r28		// M2
+	MOV_TO_IIP(r28, r25)	// M2
 	nop 0
 	;;
 (pUStk)	mov ar.rnat=r24		// M2 must happen with RSE in lazy mode
@@ -1148,7 +1175,7 @@ skip_rbs_switch:
 
 	mov ar.rsc=r27		// M2
 	mov pr=r31,-1		// I0
-	rfi			// B
+	RFI			// B
 
 	/*
 	 * On entry:
@@ -1174,35 +1201,36 @@ skip_rbs_switch:
 	;;
 (pKStk) st4 [r20]=r21
 #endif
-	ssm psr.i		// enable interrupts
+	SSM_PSR_I(p0, p6, r2)	// enable interrupts
 	br.call.spnt.many rp=schedule
 .ret9:	cmp.eq p6,p0=r0,r0	// p6 <- 1 (re-check)
-	rsm psr.i		// disable interrupts
+	RSM_PSR_I(p0, r2, r20)	// disable interrupts
 	;;
 #ifdef CONFIG_PREEMPT
 (pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
 	;;
 (pKStk)	st4 [r20]=r0		// preempt_count() <- 0
 #endif
-(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
+(pLvSys)br.cond.sptk.few  __paravirt_pending_syscall_end
 	br.cond.sptk.many .work_processed_kernel
 
 .notify:
 (pUStk)	br.call.spnt.many rp=notify_resume_user
 .ret10:	cmp.ne p6,p0=r0,r0	// p6 <- 0 (don't re-check)
-(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
+(pLvSys)br.cond.sptk.few  __paravirt_pending_syscall_end
 	br.cond.sptk.many .work_processed_kernel
 
-.work_pending_syscall_end:
+.global __paravirt_pending_syscall_end;
+__paravirt_pending_syscall_end:
 	adds r2=PT(R8)+16,r12
 	adds r3=PT(R10)+16,r12
 	;;
 	ld8 r8=[r2]
 	ld8 r10=[r3]
-	br.cond.sptk.many .work_processed_syscall
-
-END(ia64_leave_kernel)
+	br.cond.sptk.many __paravirt_work_processed_syscall_target
+END(__paravirt_leave_kernel)
 
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
 ENTRY(handle_syscall_error)
 	/*
 	 * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could
@@ -1244,7 +1272,7 @@ END(ia64_invoke_schedule_tail)
 	 * We declare 8 input registers so the system call args get preserved,
 	 * in case we need to restart a system call.
 	 */
-ENTRY(notify_resume_user)
+GLOBAL_ENTRY(notify_resume_user)
 	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
 	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
 	mov r9=ar.unat
@@ -1306,7 +1334,7 @@ ENTRY(sys_rt_sigreturn)
 	adds sp=16,sp
 	;;
 	ld8 r9=[sp]				// load new ar.unat
-	mov.sptk b7=r8,ia64_leave_kernel
+	mov.sptk b7=r8,ia64_native_leave_kernel
 	;;
 	mov ar.unat=r9
 	br.many b7
@@ -1665,3 +1693,4 @@ sys_call_table:
 	data8 sys_timerfd_gettime
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
index e5482bb6841..7126ea8f7ec 100644
--- a/arch/ia64/kernel/paravirt.c
+++ b/arch/ia64/kernel/paravirt.c
@@ -286,3 +286,22 @@ struct pv_cpu_ops pv_cpu_ops = {
 			= ia64_native_intrin_local_irq_restore_func,
 };
 EXPORT_SYMBOL(pv_cpu_ops);
+
+/******************************************************************************
+ * replacement of hand written assembly codes.
+ */
+
+void
+paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch)
+{
+	extern unsigned long paravirt_switch_to_targ;
+	extern unsigned long paravirt_leave_syscall_targ;
+	extern unsigned long paravirt_work_processed_syscall_targ;
+	extern unsigned long paravirt_leave_kernel_targ;
+
+	paravirt_switch_to_targ = cpu_asm_switch->switch_to;
+	paravirt_leave_syscall_targ = cpu_asm_switch->leave_syscall;
+	paravirt_work_processed_syscall_targ =
+		cpu_asm_switch->work_processed_syscall;
+	paravirt_leave_kernel_targ = cpu_asm_switch->leave_kernel;
+}
diff --git a/arch/ia64/kernel/paravirtentry.S b/arch/ia64/kernel/paravirtentry.S
new file mode 100644
index 00000000000..2f42fcb9776
--- /dev/null
+++ b/arch/ia64/kernel/paravirtentry.S
@@ -0,0 +1,60 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirtentry.S
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/asm-offsets.h>
+#include "entry.h"
+
+#define DATA8(sym, init_value)			\
+	.pushsection .data.read_mostly ;	\
+	.align 8 ;				\
+	.global sym ;				\
+	sym: ;					\
+	data8 init_value ;			\
+	.popsection
+
+#define BRANCH(targ, reg, breg)		\
+	movl reg=targ ;			\
+	;;				\
+	ld8 reg=[reg] ;			\
+	;;				\
+	mov breg=reg ;			\
+	br.cond.sptk.many breg
+
+#define BRANCH_PROC(sym, reg, breg)				\
+	DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \
+	GLOBAL_ENTRY(paravirt_ ## sym) ;			\
+		BRANCH(paravirt_ ## sym ## _targ, reg, breg) ;	\
+	END(paravirt_ ## sym)
+
+#define BRANCH_PROC_UNWINFO(sym, reg, breg)			\
+	DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \
+	GLOBAL_ENTRY(paravirt_ ## sym) ;			\
+		PT_REGS_UNWIND_INFO(0) ;			\
+		BRANCH(paravirt_ ## sym ## _targ, reg, breg) ;	\
+	END(paravirt_ ## sym)
+
+
+BRANCH_PROC(switch_to, r22, b7)
+BRANCH_PROC_UNWINFO(leave_syscall, r22, b7)
+BRANCH_PROC(work_processed_syscall, r2, b7)
+BRANCH_PROC_UNWINFO(leave_kernel, r22, b7)
-- 
cgit v1.2.3


From 213060a4d6991a95d0b9344406d195be3464accf Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Mon, 19 May 2008 22:13:40 +0900
Subject: [IA64] pvops: paravirtualize NR_IRQS

Make NR_IRQ overridable by each pv instances.
Pv instance may need each own number of irqs so that
NR_IRQS should be the maximum number of nr_irqs each
pv instances need.

Cc: Jes Sorensen <jes@sgi.com>
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/Makefile         |  6 ++++++
 arch/ia64/kernel/Makefile  | 33 +++++++++++++++++++++++++++++++++
 arch/ia64/kernel/nr-irqs.c | 24 ++++++++++++++++++++++++
 3 files changed, 63 insertions(+)
 create mode 100644 arch/ia64/kernel/nr-irqs.c

(limited to 'arch')

diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 88f1a55c6c9..3b9c8cadfd3 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -99,3 +99,9 @@ define archhelp
   echo '  boot		- Build vmlinux and bootloader for Ski simulator'
   echo '* unwcheck	- Check vmlinux for invalid unwind info'
 endef
+
+archprepare: make_nr_irqs_h FORCE
+PHONY += make_nr_irqs_h FORCE
+
+make_nr_irqs_h: FORCE
+	$(Q)$(MAKE) $(build)=arch/ia64/kernel include/asm-ia64/nr-irqs.h
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index cea91f17d44..87fea11aecb 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -73,6 +73,39 @@ $(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
 # Note: kbuild does not track this dependency due to usage of .incbin
 $(obj)/gate-data.o: $(obj)/gate.so
 
+# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config
+define sed-y
+	"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
+endef
+quiet_cmd_nr_irqs = GEN     $@
+define cmd_nr_irqs
+	(set -e; \
+	 echo "#ifndef __ASM_NR_IRQS_H__"; \
+	 echo "#define __ASM_NR_IRQS_H__"; \
+	 echo "/*"; \
+	 echo " * DO NOT MODIFY."; \
+	 echo " *"; \
+	 echo " * This file was generated by Kbuild"; \
+	 echo " *"; \
+	 echo " */"; \
+	 echo ""; \
+	 sed -ne $(sed-y) $<; \
+	 echo ""; \
+	 echo "#endif" ) > $@
+endef
+
+# We use internal kbuild rules to avoid the "is up to date" message from make
+arch/$(SRCARCH)/kernel/nr-irqs.s: $(srctree)/arch/$(SRCARCH)/kernel/nr-irqs.c \
+				$(wildcard $(srctree)/include/asm-ia64/*/irq.h)
+	$(Q)mkdir -p $(dir $@)
+	$(call if_changed_dep,cc_s_c)
+
+include/asm-ia64/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s
+	$(Q)mkdir -p $(dir $@)
+	$(call cmd,nr_irqs)
+
+clean-files += $(objtree)/include/asm-ia64/nr-irqs.h
+
 #
 # native ivt.S and entry.S
 #
diff --git a/arch/ia64/kernel/nr-irqs.c b/arch/ia64/kernel/nr-irqs.c
new file mode 100644
index 00000000000..1ae049181e8
--- /dev/null
+++ b/arch/ia64/kernel/nr-irqs.c
@@ -0,0 +1,24 @@
+/*
+ * calculate
+ * NR_IRQS = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, FOO_NR_IRQS...)
+ * depending on config.
+ * This must be calculated before processing asm-offset.c.
+ */
+
+#define ASM_OFFSETS_C 1
+
+#include <linux/kbuild.h>
+#include <linux/threads.h>
+#include <asm-ia64/native/irq.h>
+
+void foo(void)
+{
+	union paravirt_nr_irqs_max {
+		char ia64_native_nr_irqs[IA64_NATIVE_NR_IRQS];
+#ifdef CONFIG_XEN
+		char xen_nr_irqs[XEN_NR_IRQS];
+#endif
+	};
+
+	DEFINE(NR_IRQS, sizeof (union paravirt_nr_irqs_max));
+}
-- 
cgit v1.2.3


From e51835d58a5abdf82211f36f500f666ca7ef9aee Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Mon, 19 May 2008 22:13:41 +0900
Subject: [IA64] pvops: define initialization hooks, pv_init_ops, for
 paravirtualized environment.

define pv_init_ops hooks which represents various initialization
hooks for paravirtualized environment. and add hooks.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/paravirt.c |  7 +++++++
 arch/ia64/kernel/setup.c    | 10 ++++++++++
 arch/ia64/kernel/smpboot.c  |  2 ++
 3 files changed, 19 insertions(+)

(limited to 'arch')

diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
index 7126ea8f7ec..5daf659ff29 100644
--- a/arch/ia64/kernel/paravirt.c
+++ b/arch/ia64/kernel/paravirt.c
@@ -41,6 +41,13 @@ struct pv_info pv_info = {
 	.name = "bare hardware"
 };
 
+/***************************************************************************
+ * pv_init_ops
+ * initialization hooks.
+ */
+
+struct pv_init_ops pv_init_ops;
+
 /***************************************************************************
  * pv_cpu_ops
  * intrinsics hooks.
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index f48a809c686..750749551e8 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -51,6 +51,7 @@
 #include <asm/mca.h>
 #include <asm/meminit.h>
 #include <asm/page.h>
+#include <asm/paravirt.h>
 #include <asm/patch.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -341,6 +342,8 @@ reserve_memory (void)
 	rsvd_region[n].end   = (unsigned long) ia64_imva(_end);
 	n++;
 
+	n += paravirt_reserve_memory(&rsvd_region[n]);
+
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (ia64_boot_param->initrd_start) {
 		rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start);
@@ -519,6 +522,8 @@ setup_arch (char **cmdline_p)
 {
 	unw_init();
 
+	paravirt_arch_setup_early();
+
 	ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
 
 	*cmdline_p = __va(ia64_boot_param->command_line);
@@ -584,6 +589,9 @@ setup_arch (char **cmdline_p)
 	acpi_boot_init();
 #endif
 
+	paravirt_banner();
+	paravirt_arch_setup_console(cmdline_p);
+
 #ifdef CONFIG_VT
 	if (!conswitchp) {
 # if defined(CONFIG_DUMMY_CONSOLE)
@@ -603,6 +611,8 @@ setup_arch (char **cmdline_p)
 #endif
 
 	/* enable IA-64 Machine Check Abort Handling unless disabled */
+	if (paravirt_arch_setup_nomca())
+		nomca = 1;
 	if (!nomca)
 		ia64_mca_init();
 
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index d7ad42b77d4..933f3881152 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -50,6 +50,7 @@
 #include <asm/machvec.h>
 #include <asm/mca.h>
 #include <asm/page.h>
+#include <asm/paravirt.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -642,6 +643,7 @@ void __devinit smp_prepare_boot_cpu(void)
 	cpu_set(smp_processor_id(), cpu_online_map);
 	cpu_set(smp_processor_id(), cpu_callin_map);
 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+	paravirt_post_smp_prepare_boot_cpu();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-- 
cgit v1.2.3


From 33b39e84209b0308b572dce017df7ee9b63f086c Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Mon, 19 May 2008 22:13:42 +0900
Subject: [IA64] pvops: add hooks, pv_iosapic_ops, to paravirtualize iosapic.

add hooks to paravirtualize iosapic which is a real hardware resource.
On virtualized environment it may be replaced something virtualized
friendly.
Define pv_iosapic_ops and add the hooks.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/iosapic.c  | 45 +++++++++++++++++++++++++++++----------------
 arch/ia64/kernel/paravirt.c | 25 +++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 082c31dcfd9..587196dd84f 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -587,6 +587,15 @@ static inline int irq_is_shared (int irq)
 	return (iosapic_intr_info[irq].count > 1);
 }
 
+struct irq_chip*
+ia64_native_iosapic_get_irq_chip(unsigned long trigger)
+{
+	if (trigger == IOSAPIC_EDGE)
+		return &irq_type_iosapic_edge;
+	else
+		return &irq_type_iosapic_level;
+}
+
 static int
 register_intr (unsigned int gsi, int irq, unsigned char delivery,
 	       unsigned long polarity, unsigned long trigger)
@@ -637,13 +646,10 @@ register_intr (unsigned int gsi, int irq, unsigned char delivery,
 	iosapic_intr_info[irq].dmode    = delivery;
 	iosapic_intr_info[irq].trigger  = trigger;
 
-	if (trigger == IOSAPIC_EDGE)
-		irq_type = &irq_type_iosapic_edge;
-	else
-		irq_type = &irq_type_iosapic_level;
+	irq_type = iosapic_get_irq_chip(trigger);
 
 	idesc = irq_desc + irq;
-	if (idesc->chip != irq_type) {
+	if (irq_type != NULL && idesc->chip != irq_type) {
 		if (idesc->chip != &no_irq_type)
 			printk(KERN_WARNING
 			       "%s: changing vector %d from %s to %s\n",
@@ -975,6 +981,22 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
 	set_rte(gsi, irq, dest, 1);
 }
 
+void __init
+ia64_native_iosapic_pcat_compat_init(void)
+{
+	if (pcat_compat) {
+		/*
+		 * Disable the compatibility mode interrupts (8259 style),
+		 * needs IN/OUT support enabled.
+		 */
+		printk(KERN_INFO
+		       "%s: Disabling PC-AT compatible 8259 interrupts\n",
+		       __func__);
+		outb(0xff, 0xA1);
+		outb(0xff, 0x21);
+	}
+}
+
 void __init
 iosapic_system_init (int system_pcat_compat)
 {
@@ -989,17 +1011,8 @@ iosapic_system_init (int system_pcat_compat)
 	}
 
 	pcat_compat = system_pcat_compat;
-	if (pcat_compat) {
-		/*
-		 * Disable the compatibility mode interrupts (8259 style),
-		 * needs IN/OUT support enabled.
-		 */
-		printk(KERN_INFO
-		       "%s: Disabling PC-AT compatible 8259 interrupts\n",
-		       __func__);
-		outb(0xff, 0xA1);
-		outb(0xff, 0x21);
-	}
+	if (pcat_compat)
+		iosapic_pcat_compat_init();
 }
 
 static inline int
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
index 5daf659ff29..65c211b2f98 100644
--- a/arch/ia64/kernel/paravirt.c
+++ b/arch/ia64/kernel/paravirt.c
@@ -312,3 +312,28 @@ paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch)
 		cpu_asm_switch->work_processed_syscall;
 	paravirt_leave_kernel_targ = cpu_asm_switch->leave_kernel;
 }
+
+/***************************************************************************
+ * pv_iosapic_ops
+ * iosapic read/write hooks.
+ */
+
+static unsigned int
+ia64_native_iosapic_read(char __iomem *iosapic, unsigned int reg)
+{
+	return __ia64_native_iosapic_read(iosapic, reg);
+}
+
+static void
+ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
+{
+	__ia64_native_iosapic_write(iosapic, reg, val);
+}
+
+struct pv_iosapic_ops pv_iosapic_ops = {
+	.pcat_compat_init = ia64_native_iosapic_pcat_compat_init,
+	.get_irq_chip = ia64_native_iosapic_get_irq_chip,
+
+	.__read = ia64_native_iosapic_read,
+	.__write = ia64_native_iosapic_write,
+};
-- 
cgit v1.2.3


From 85cbc503787d577c215f9540c57294e1ec799144 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Mon, 19 May 2008 22:13:43 +0900
Subject: [IA64] pvops: add hooks, pv_irq_ops, to paravirtualized irq related
 operations.

introduce pv_irq_ops which adds hooks to paravirtualize irq related
operations.
On virtualized environment, interruption may be replaced by something
virtualization friendly. So the irq related operation also may need
paravirtualization.
This patch adds necessary hooks to paravirtualize irq related operations.

Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/irq_ia64.c | 18 +++++++++++++-----
 arch/ia64/kernel/paravirt.c | 15 +++++++++++++++
 2 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index c48171bc796..28d3d483db9 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -196,7 +196,7 @@ static void clear_irq_vector(int irq)
 }
 
 int
-assign_irq_vector (int irq)
+ia64_native_assign_irq_vector (int irq)
 {
 	unsigned long flags;
 	int vector, cpu;
@@ -222,7 +222,7 @@ assign_irq_vector (int irq)
 }
 
 void
-free_irq_vector (int vector)
+ia64_native_free_irq_vector (int vector)
 {
 	if (vector < IA64_FIRST_DEVICE_VECTOR ||
 	    vector > IA64_LAST_DEVICE_VECTOR)
@@ -622,7 +622,7 @@ static struct irqaction tlb_irqaction = {
 #endif
 
 void
-register_percpu_irq (ia64_vector vec, struct irqaction *action)
+ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action)
 {
 	irq_desc_t *desc;
 	unsigned int irq;
@@ -637,13 +637,21 @@ register_percpu_irq (ia64_vector vec, struct irqaction *action)
 }
 
 void __init
-init_IRQ (void)
+ia64_native_register_ipi(void)
 {
-	register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
 #ifdef CONFIG_SMP
 	register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
 	register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction);
 	register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction);
+#endif
+}
+
+void __init
+init_IRQ (void)
+{
+	ia64_register_ipi();
+	register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
+#ifdef CONFIG_SMP
 #if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG)
 	if (vector_domain_type != VECTOR_DOMAIN_NONE) {
 		BUG_ON(IA64_FIRST_DEVICE_VECTOR != IA64_IRQ_MOVE_VECTOR);
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
index 65c211b2f98..ba5383be03c 100644
--- a/arch/ia64/kernel/paravirt.c
+++ b/arch/ia64/kernel/paravirt.c
@@ -337,3 +337,18 @@ struct pv_iosapic_ops pv_iosapic_ops = {
 	.__read = ia64_native_iosapic_read,
 	.__write = ia64_native_iosapic_write,
 };
+
+/***************************************************************************
+ * pv_irq_ops
+ * irq operations
+ */
+
+struct pv_irq_ops pv_irq_ops = {
+	.register_ipi = ia64_native_register_ipi,
+
+	.assign_irq_vector = ia64_native_assign_irq_vector,
+	.free_irq_vector = ia64_native_free_irq_vector,
+	.register_percpu_irq = ia64_native_register_percpu_irq,
+
+	.resend_irq = ia64_native_resend_irq,
+};
-- 
cgit v1.2.3


From 00d21d82b8a9e290286e09d8eedc20bfc33b0eee Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Mon, 19 May 2008 22:13:44 +0900
Subject: [IA64] pvops: add to hooks, pv_time_ops, for steal time accounting.

Introduce pv_time_ops which adds hook to steal time accounting.
On virtualized environment, cpus are shared by many guests and
steal time is the time which is used for other guests.
On virtualized environtment, streal time should be accounted.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/paravirt.c | 15 +++++++++++++++
 arch/ia64/kernel/time.c     | 23 +++++++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'arch')

diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
index ba5383be03c..afaf5b9a2cf 100644
--- a/arch/ia64/kernel/paravirt.c
+++ b/arch/ia64/kernel/paravirt.c
@@ -352,3 +352,18 @@ struct pv_irq_ops pv_irq_ops = {
 
 	.resend_irq = ia64_native_resend_irq,
 };
+
+/***************************************************************************
+ * pv_time_ops
+ * time operations
+ */
+
+static int
+ia64_native_do_steal_accounting(unsigned long *new_itm)
+{
+	return 0;
+}
+
+struct pv_time_ops pv_time_ops = {
+	.do_steal_accounting = ia64_native_do_steal_accounting,
+};
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 8c73643f2d6..046ca89efc0 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -24,6 +24,7 @@
 #include <asm/machvec.h>
 #include <asm/delay.h>
 #include <asm/hw_irq.h>
+#include <asm/paravirt.h>
 #include <asm/ptrace.h>
 #include <asm/sal.h>
 #include <asm/sections.h>
@@ -48,6 +49,15 @@ EXPORT_SYMBOL(last_cli_ip);
 
 #endif
 
+#ifdef CONFIG_PARAVIRT
+static void
+paravirt_clocksource_resume(void)
+{
+	if (pv_time_ops.clocksource_resume)
+		pv_time_ops.clocksource_resume();
+}
+#endif
+
 static struct clocksource clocksource_itc = {
 	.name           = "itc",
 	.rating         = 350,
@@ -56,6 +66,9 @@ static struct clocksource clocksource_itc = {
 	.mult           = 0, /*to be calculated*/
 	.shift          = 16,
 	.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
+#ifdef CONFIG_PARAVIRT
+	.resume		= paravirt_clocksource_resume,
+#endif
 };
 static struct clocksource *itc_clocksource;
 
@@ -156,6 +169,9 @@ timer_interrupt (int irq, void *dev_id)
 
 	profile_tick(CPU_PROFILING);
 
+	if (paravirt_do_steal_accounting(&new_itm))
+		goto skip_process_time_accounting;
+
 	while (1) {
 		update_process_times(user_mode(get_irq_regs()));
 
@@ -185,6 +201,8 @@ timer_interrupt (int irq, void *dev_id)
 		local_irq_disable();
 	}
 
+skip_process_time_accounting:
+
 	do {
 		/*
 		 * If we're too close to the next clock tick for
@@ -334,6 +352,11 @@ ia64_init_itm (void)
 		 */
 		clocksource_itc.rating = 50;
 
+	paravirt_init_missing_ticks_accounting(smp_processor_id());
+
+	/* avoid softlock up message when cpu is unplug and plugged again. */
+	touch_softlockup_watchdog();
+
 	/* Setup the CPU local timer tick */
 	ia64_cpu_local_tick();
 
-- 
cgit v1.2.3


From 4d58bbcc89e267d52b4df572acbf209a60a8a497 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 28 May 2008 09:41:58 -0700
Subject: [IA64] pv_ops: move some functions in ivt.S to avoid lack of space.

move interrupt, page_fault, non_syscall, dispatch_unaligned_handler and
dispatch_to_fault_handler to avoid lack of instructin space.
The change set 4dcc29e1574d88f4465ba865ed82800032f76418 bloated
SAVE_MIN_WITH_COVER, SAVE_MIN_WITH_COVER_R19 so that it bloated the
functions which uses those macros.
In the native case, only dispatch_illegal_op_fault had to be moved.
When paravirtualized case the all functions which use the macros need
to be moved to avoid the lack of space.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/ivt.S | 261 +++++++++++++++++++++++++------------------------
 1 file changed, 133 insertions(+), 128 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 23749ed3cf0..c39627df3cd 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -515,27 +515,6 @@ ENTRY(ikey_miss)
 	FAULT(6)
 END(ikey_miss)
 
-	//-----------------------------------------------------------------------------------
-	// call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
-ENTRY(page_fault)
-	SSM_PSR_DT_AND_SRLZ_I
-	;;
-	SAVE_MIN_WITH_COVER
-	alloc r15=ar.pfs,0,0,3,0
-	MOV_FROM_IFA(out0)
-	MOV_FROM_ISR(out1)
-	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3)
-	adds r3=8,r2				// set up second base pointer
-	SSM_PSR_I(p15, p15, r14)		// restore psr.i
-	movl r14=ia64_leave_kernel
-	;;
-	SAVE_REST
-	mov rp=r14
-	;;
-	adds out2=16,r12			// out2 = pointer to pt_regs
-	br.call.sptk.many b6=ia64_do_page_fault	// ignore return address
-END(page_fault)
-
 	.org ia64_ivt+0x1c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
@@ -896,26 +875,8 @@ END(break_fault)
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
 ENTRY(interrupt)
-	DBG_FAULT(12)
-	mov r31=pr		// prepare to save predicates
-	;;
-	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
-	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14)
-				// ensure everybody knows psr.ic is back on
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	;;
-	SAVE_REST
-	;;
-	MCA_RECOVER_RANGE(interrupt)
-	alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
-	MOV_FROM_IVR(out0, r8)	// pass cr.ivr as first arg
-	add out1=16,sp		// pass pointer to pt_regs as second arg
-	;;
-	srlz.d			// make sure we see the effect of cr.ivr
-	movl r14=ia64_leave_kernel
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=ia64_handle_irq
+	/* interrupt handler has become too big to fit this area. */
+	br.sptk.many __interrupt
 END(interrupt)
 
 	.org ia64_ivt+0x3400
@@ -1125,105 +1086,18 @@ END(account_sys_enter)
 	DBG_FAULT(17)
 	FAULT(17)
 
-ENTRY(non_syscall)
-	mov ar.rsc=r27			// restore ar.rsc before SAVE_MIN_WITH_COVER
-	;;
-	SAVE_MIN_WITH_COVER
-
-	// There is no particular reason for this code to be here, other than that
-	// there happens to be space here that would go unused otherwise.  If this
-	// fault ever gets "unreserved", simply moved the following code to a more
-	// suitable spot...
-
-	alloc r14=ar.pfs,0,0,2,0
-	MOV_FROM_IIM(out0)
-	add out1=16,sp
-	adds r3=8,r2			// set up second base pointer for SAVE_REST
-
-	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24)
-					// guarantee that interruption collection is on
-	SSM_PSR_I(p15, p15, r15)	// restore psr.i
-	movl r15=ia64_leave_kernel
-	;;
-	SAVE_REST
-	mov rp=r15
-	;;
-	br.call.sptk.many b6=ia64_bad_break	// avoid WAW on CFM and ignore return addr
-END(non_syscall)
-
 	.org ia64_ivt+0x4800
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4800 Entry 18 (size 64 bundles) Reserved
 	DBG_FAULT(18)
 	FAULT(18)
 
-	/*
-	 * There is no particular reason for this code to be here, other than that
-	 * there happens to be space here that would go unused otherwise.  If this
-	 * fault ever gets "unreserved", simply moved the following code to a more
-	 * suitable spot...
-	 */
-
-ENTRY(dispatch_unaligned_handler)
-	SAVE_MIN_WITH_COVER
-	;;
-	alloc r14=ar.pfs,0,0,2,0		// now it's safe (must be first in insn group!)
-	MOV_FROM_IFA(out0)
-	adds out1=16,sp
-
-	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24)
-						// guarantee that interruption collection is on
-	SSM_PSR_I(p15, p15, r3)			// restore psr.i
-	adds r3=8,r2				// set up second base pointer
-	;;
-	SAVE_REST
-	movl r14=ia64_leave_kernel
-	;;
-	mov rp=r14
-	br.sptk.many ia64_prepare_handle_unaligned
-END(dispatch_unaligned_handler)
-
 	.org ia64_ivt+0x4c00
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4c00 Entry 19 (size 64 bundles) Reserved
 	DBG_FAULT(19)
 	FAULT(19)
 
-	/*
-	 * There is no particular reason for this code to be here, other than that
-	 * there happens to be space here that would go unused otherwise.  If this
-	 * fault ever gets "unreserved", simply moved the following code to a more
-	 * suitable spot...
-	 */
-
-ENTRY(dispatch_to_fault_handler)
-	/*
-	 * Input:
-	 *	psr.ic:	off
-	 *	r19:	fault vector number (e.g., 24 for General Exception)
-	 *	r31:	contains saved predicates (pr)
-	 */
-	SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,5,0
-	MOV_FROM_ISR(out1)
-	MOV_FROM_IFA(out2)
-	MOV_FROM_IIM(out3)
-	MOV_FROM_ITIR(out4)
-	;;
-	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0)
-						// guarantee that interruption collection is on
-	mov out0=r15
-	;;
-	SSM_PSR_I(p15, p15, r3)			// restore psr.i
-	adds r3=8,r2				// set up second base pointer for SAVE_REST
-	;;
-	SAVE_REST
-	movl r14=ia64_leave_kernel
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=ia64_fault
-END(dispatch_to_fault_handler)
-
 //
 // --- End of long entries, Beginning of short entries
 //
@@ -1670,6 +1544,137 @@ END(ia32_interrupt)
 	DBG_FAULT(67)
 	FAULT(67)
 
+	//-----------------------------------------------------------------------------------
+	// call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
+ENTRY(page_fault)
+	SSM_PSR_DT_AND_SRLZ_I
+	;;
+	SAVE_MIN_WITH_COVER
+	alloc r15=ar.pfs,0,0,3,0
+	MOV_FROM_IFA(out0)
+	MOV_FROM_ISR(out1)
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3)
+	adds r3=8,r2				// set up second base pointer
+	SSM_PSR_I(p15, p15, r14)		// restore psr.i
+	movl r14=ia64_leave_kernel
+	;;
+	SAVE_REST
+	mov rp=r14
+	;;
+	adds out2=16,r12			// out2 = pointer to pt_regs
+	br.call.sptk.many b6=ia64_do_page_fault	// ignore return address
+END(page_fault)
+
+ENTRY(non_syscall)
+	mov ar.rsc=r27			// restore ar.rsc before SAVE_MIN_WITH_COVER
+	;;
+	SAVE_MIN_WITH_COVER
+
+	// There is no particular reason for this code to be here, other than that
+	// there happens to be space here that would go unused otherwise.  If this
+	// fault ever gets "unreserved", simply moved the following code to a more
+	// suitable spot...
+
+	alloc r14=ar.pfs,0,0,2,0
+	MOV_FROM_IIM(out0)
+	add out1=16,sp
+	adds r3=8,r2			// set up second base pointer for SAVE_REST
+
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24)
+					// guarantee that interruption collection is on
+	SSM_PSR_I(p15, p15, r15)	// restore psr.i
+	movl r15=ia64_leave_kernel
+	;;
+	SAVE_REST
+	mov rp=r15
+	;;
+	br.call.sptk.many b6=ia64_bad_break	// avoid WAW on CFM and ignore return addr
+END(non_syscall)
+
+ENTRY(__interrupt)
+	DBG_FAULT(12)
+	mov r31=pr		// prepare to save predicates
+	;;
+	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14)
+				// ensure everybody knows psr.ic is back on
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	;;
+	SAVE_REST
+	;;
+	MCA_RECOVER_RANGE(interrupt)
+	alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+	MOV_FROM_IVR(out0, r8)	// pass cr.ivr as first arg
+	add out1=16,sp		// pass pointer to pt_regs as second arg
+	;;
+	srlz.d			// make sure we see the effect of cr.ivr
+	movl r14=ia64_leave_kernel
+	;;
+	mov rp=r14
+	br.call.sptk.many b6=ia64_handle_irq
+END(__interrupt)
+
+	/*
+	 * There is no particular reason for this code to be here, other than that
+	 * there happens to be space here that would go unused otherwise.  If this
+	 * fault ever gets "unreserved", simply moved the following code to a more
+	 * suitable spot...
+	 */
+
+ENTRY(dispatch_unaligned_handler)
+	SAVE_MIN_WITH_COVER
+	;;
+	alloc r14=ar.pfs,0,0,2,0		// now it's safe (must be first in insn group!)
+	MOV_FROM_IFA(out0)
+	adds out1=16,sp
+
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24)
+						// guarantee that interruption collection is on
+	SSM_PSR_I(p15, p15, r3)			// restore psr.i
+	adds r3=8,r2				// set up second base pointer
+	;;
+	SAVE_REST
+	movl r14=ia64_leave_kernel
+	;;
+	mov rp=r14
+	br.sptk.many ia64_prepare_handle_unaligned
+END(dispatch_unaligned_handler)
+
+	/*
+	 * There is no particular reason for this code to be here, other than that
+	 * there happens to be space here that would go unused otherwise.  If this
+	 * fault ever gets "unreserved", simply moved the following code to a more
+	 * suitable spot...
+	 */
+
+ENTRY(dispatch_to_fault_handler)
+	/*
+	 * Input:
+	 *	psr.ic:	off
+	 *	r19:	fault vector number (e.g., 24 for General Exception)
+	 *	r31:	contains saved predicates (pr)
+	 */
+	SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,5,0
+	MOV_FROM_ISR(out1)
+	MOV_FROM_IFA(out2)
+	MOV_FROM_IIM(out3)
+	MOV_FROM_ITIR(out4)
+	;;
+	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0)
+						// guarantee that interruption collection is on
+	mov out0=r15
+	;;
+	SSM_PSR_I(p15, p15, r3)			// restore psr.i
+	adds r3=8,r2				// set up second base pointer for SAVE_REST
+	;;
+	SAVE_REST
+	movl r14=ia64_leave_kernel
+	;;
+	mov rp=r14
+	br.call.sptk.many b6=ia64_fault
+END(dispatch_to_fault_handler)
+
 	/*
 	 * Squatting in this space ...
 	 *
-- 
cgit v1.2.3


From da3854fc9f80c0240ba7cadd2aebf036683ff21b Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurentp@cse-semaphore.com>
Date: Tue, 24 Jun 2008 22:15:58 +0100
Subject: DM9000: Fixup blackfin after removing 2 resource usage

The dm9000 driver accepts either 2 or 3 resources to describe the platform
devices. The 2 resources case abuses the ioresource mechanism by passing
ioremap()ed memory through the platform device resources. This patch removes
converts boards that were using it to the 3 resources scheme.

CC: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Laurent Pinchart <laurentp@cse-semaphore.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 arch/blackfin/mach-bf527/boards/ezkit.c         | 7 ++++++-
 arch/blackfin/mach-bf533/boards/H8606.c         | 7 ++++++-
 arch/blackfin/mach-bf537/boards/generic_board.c | 7 ++++++-
 3 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c
index 5958eecefcf..689b69c98ee 100644
--- a/arch/blackfin/mach-bf527/boards/ezkit.c
+++ b/arch/blackfin/mach-bf527/boards/ezkit.c
@@ -323,10 +323,15 @@ static struct platform_device smc91x_device = {
 static struct resource dm9000_resources[] = {
 	[0] = {
 		.start	= 0x203FB800,
-		.end	= 0x203FB800 + 8,
+		.end	= 0x203FB800 + 1,
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
+		.start	= 0x203FB800 + 4,
+		.end	= 0x203FB800 + 5,
+		.flags	= IORESOURCE_MEM,
+	},
+	[2] = {
 		.start	= IRQ_PF9,
 		.end	= IRQ_PF9,
 		.flags	= (IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE),
diff --git a/arch/blackfin/mach-bf533/boards/H8606.c b/arch/blackfin/mach-bf533/boards/H8606.c
index 7cc4864f6aa..4103a97c1a7 100644
--- a/arch/blackfin/mach-bf533/boards/H8606.c
+++ b/arch/blackfin/mach-bf533/boards/H8606.c
@@ -65,10 +65,15 @@ static struct platform_device rtc_device = {
 static struct resource dm9000_resources[] = {
 	[0] = {
 		.start	= 0x20300000,
-		.end	= 0x20300000 + 8,
+		.end	= 0x20300000 + 1,
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
+		.start	= 0x20300000 + 4,
+		.end	= 0x20300000 + 5,
+		.flags	= IORESOURCE_MEM,
+	},
+	[2] = {
 		.start	= IRQ_PF10,
 		.end	= IRQ_PF10,
 		.flags	= (IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE),
diff --git a/arch/blackfin/mach-bf537/boards/generic_board.c b/arch/blackfin/mach-bf537/boards/generic_board.c
index 7d250828dad..01b63e2ec18 100644
--- a/arch/blackfin/mach-bf537/boards/generic_board.c
+++ b/arch/blackfin/mach-bf537/boards/generic_board.c
@@ -166,10 +166,15 @@ static struct platform_device smc91x_device = {
 static struct resource dm9000_resources[] = {
 	[0] = {
 		.start	= 0x203FB800,
-		.end	= 0x203FB800 + 8,
+		.end	= 0x203FB800 + 1,
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
+		.start	= 0x203FB800 + 4,
+		.end	= 0x203FB800 + 5,
+		.flags	= IORESOURCE_MEM,
+	},
+	[2] = {
 		.start	= IRQ_PF9,
 		.end	= IRQ_PF9,
 		.flags	= (IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE),
-- 
cgit v1.2.3


From 3aa30df3d0d78f568cff9d6a98ae01ae55494f10 Mon Sep 17 00:00:00 2001
From: Hinko Kocevar <hinko.kocevar@cetrtapot.si>
Date: Fri, 6 Jun 2008 14:12:26 +0200
Subject: cris: compile fixes for 2.6.26-rc5

Add dummy ops for serial debug port.
Add setting of c_ispeed/c_ospeed as suggested by Alan Cox.

Signed-off-by: Hinko Kocevar <hinko.kocevar@cetrtapot.si>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Jesper Nilsson <jesper.nilsson@axis.com>
---
 arch/cris/arch-v10/kernel/debugport.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/cris/arch-v10/kernel/debugport.c b/arch/cris/arch-v10/kernel/debugport.c
index 04d5eee2c90..162730eb887 100644
--- a/arch/cris/arch-v10/kernel/debugport.c
+++ b/arch/cris/arch-v10/kernel/debugport.c
@@ -426,12 +426,18 @@ static int dummy_write(struct tty_struct * tty,
 	return count;
 }
 
-static int
-dummy_write_room(struct tty_struct *tty)
+static int dummy_write_room(struct tty_struct *tty)
 {
 	return 8192;
 }
 
+static const struct tty_operations dummy_ops = {
+        .open = dummy_open,
+        .close = dummy_close,
+        .write = dummy_write,
+        .write_room = dummy_write_room,
+};
+
 void __init
 init_dummy_console(void)
 {
@@ -444,14 +450,14 @@ init_dummy_console(void)
 	dummy_driver.type = TTY_DRIVER_TYPE_SERIAL;
 	dummy_driver.subtype = SERIAL_TYPE_NORMAL;
 	dummy_driver.init_termios = tty_std_termios;
+	/* Normally B9600 default... */
 	dummy_driver.init_termios.c_cflag =
-		B115200 | CS8 | CREAD | HUPCL | CLOCAL; /* is normally B9600 default... */
+		B115200 | CS8 | CREAD | HUPCL | CLOCAL;
 	dummy_driver.flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
+	dummy_driver.init_termios.c_ispeed = 115200;
+	dummy_driver.init_termios.c_ospeed = 115200;
 
-	dummy_driver.open = dummy_open;
-	dummy_driver.close = dummy_close;
-	dummy_driver.write = dummy_write;
-	dummy_driver.write_room = dummy_write_room;
+	dummy_driver.ops = &dummy_ops;
 	if (tty_register_driver(&dummy_driver))
 		panic("Couldn't register dummy serial driver\n");
 }
-- 
cgit v1.2.3


From 9be48a94b8ae8c944dc918ad65f2f27e9df3ed00 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sun, 29 Jun 2008 22:50:56 +0200
Subject: It looks at least odd to apply spin_unlock to a mutex.

The semantic patch that makes this change is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@def@
declarer DEFINE_MUTEX;
identifier m;
@@

DEFINE_MUTEX(m);

@@
identifier def.m;
@@

(
- spin_lock(&m)
+ mutex_lock(&m)
|
- spin_unlock(&m)
+ mutex_unlock(&m)
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Jesper Nilsson <jesper.nilsson@axis.com>
---
 arch/cris/arch-v10/drivers/pcf8563.c | 2 +-
 arch/cris/arch-v32/drivers/pcf8563.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/cris/arch-v10/drivers/pcf8563.c b/arch/cris/arch-v10/drivers/pcf8563.c
index 52103d16dc6..8769dc91407 100644
--- a/arch/cris/arch-v10/drivers/pcf8563.c
+++ b/arch/cris/arch-v10/drivers/pcf8563.c
@@ -233,7 +233,7 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 
 		if (copy_to_user((struct rtc_time *) arg, &tm,
 				 sizeof tm)) {
-			spin_unlock(&rtc_lock);
+			mutex_unlock(&rtc_lock);
 			return -EFAULT;
 		}
 
diff --git a/arch/cris/arch-v32/drivers/pcf8563.c b/arch/cris/arch-v32/drivers/pcf8563.c
index 53db3870ba0..f263ab57122 100644
--- a/arch/cris/arch-v32/drivers/pcf8563.c
+++ b/arch/cris/arch-v32/drivers/pcf8563.c
@@ -229,7 +229,7 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 
 		if (copy_to_user((struct rtc_time *) arg, &tm,
 				 sizeof tm)) {
-			spin_unlock(&rtc_lock);
+			mutex_unlock(&rtc_lock);
 			return -EFAULT;
 		}
 
-- 
cgit v1.2.3


From bdb144b67a7660ce5d044ae9a2fd1a8030f12523 Mon Sep 17 00:00:00 2001
From: Jesper Nilsson <jesper@jni.nu>
Date: Sun, 29 Jun 2008 23:15:19 +0200
Subject: [CRIS] Build fixes for compressed and rescue images for v10 and v32:

- Use the normal cross gcc instead of using an elf specific cris toolchain.
  This removes the dependency of this second toolchain.

- Use the normal cross objcopy instead of overriding it to use elf-toolchain.
  This allows compiling using "CROSS_COMPILE=$CRIS_GCC/cris-axis-linux-gnu-"
  instead of just "CROSS_COMPILE=$CRIS_GCC/cris-axis-linux-gnu/bin/"

- Remove redundant rules for compiling, the implicit rules are sufficient.

- Convert the arch/cris/arch-v10/boot/compressed/head.S to format
  accepted by the cris-axis-linux-gnu-gcc (registers must be prefixed
  with '$', remove explicit underscore on exported symbols)

- Remove a number of unused (and duplicated) prototypes from
  arch/cris/arch-v10/boot/compressed/misc.c.

- Correct memcpy and memset return values (actually return them!)

Signed-off-by: Jesper Nilsson <jesper.nilsson@axis.com>
---
 arch/cris/arch-v10/boot/Makefile                 |  1 -
 arch/cris/arch-v10/boot/compressed/Makefile      | 12 +--
 arch/cris/arch-v10/boot/compressed/decompress.ld |  3 +-
 arch/cris/arch-v10/boot/compressed/head.S        | 98 ++++++++++++------------
 arch/cris/arch-v10/boot/compressed/misc.c        | 23 +++---
 arch/cris/arch-v10/boot/rescue/Makefile          |  7 +-
 arch/cris/arch-v32/boot/Makefile                 |  1 -
 arch/cris/arch-v32/boot/compressed/Makefile      |  4 -
 arch/cris/arch-v32/boot/rescue/Makefile          |  1 -
 9 files changed, 65 insertions(+), 85 deletions(-)

(limited to 'arch')

diff --git a/arch/cris/arch-v10/boot/Makefile b/arch/cris/arch-v10/boot/Makefile
index 20c83a53caf..21720301443 100644
--- a/arch/cris/arch-v10/boot/Makefile
+++ b/arch/cris/arch-v10/boot/Makefile
@@ -2,7 +2,6 @@
 # arch/cris/arch-v10/boot/Makefile
 #
 
-OBJCOPY = objcopy-cris
 OBJCOPYFLAGS = -O binary --remove-section=.bss
 
 subdir- := compressed rescue
diff --git a/arch/cris/arch-v10/boot/compressed/Makefile b/arch/cris/arch-v10/boot/compressed/Makefile
index 4a031cb27eb..9ec5f87d515 100644
--- a/arch/cris/arch-v10/boot/compressed/Makefile
+++ b/arch/cris/arch-v10/boot/compressed/Makefile
@@ -2,12 +2,10 @@
 # arch/cris/arch-v10/boot/compressed/Makefile
 #
 
-CC = gcc-cris -melf $(LINUXINCLUDE)
-ccflags-y += -O2
-LD = ld-cris
+asflags-y += $(LINUXINCLUDE)
+ccflags-y += -O2 $(LINUXINCLUDE)
 ldflags-y += -T $(obj)/decompress.ld
 OBJECTS = $(obj)/head.o $(obj)/misc.o
-OBJCOPY = objcopy-cris
 OBJCOPYFLAGS = -O binary --remove-section=.bss
 
 quiet_cmd_image = BUILD   $@
@@ -21,12 +19,6 @@ $(obj)/decompress.o: $(OBJECTS) FORCE
 $(obj)/decompress.bin: $(obj)/decompress.o FORCE
 	$(call if_changed,objcopy)
 
-$(obj)/head.o: $(obj)/head.S .config
-	@$(CC) -D__ASSEMBLY__ -traditional -c $< -o $@
-
-$(obj)/misc.o: $(obj)/misc.c .config
-	@$(CC) -D__KERNEL__ -c $< -o $@
-
 $(obj)/vmlinux: $(obj)/piggy.gz $(obj)/decompress.bin FORCE
 	$(call if_changed,image)
 
diff --git a/arch/cris/arch-v10/boot/compressed/decompress.ld b/arch/cris/arch-v10/boot/compressed/decompress.ld
index 0b0a14fe617..e80f4594d54 100644
--- a/arch/cris/arch-v10/boot/compressed/decompress.ld
+++ b/arch/cris/arch-v10/boot/compressed/decompress.ld
@@ -1,4 +1,5 @@
-OUTPUT_FORMAT(elf32-us-cris)
+/* OUTPUT_FORMAT(elf32-us-cris) */
+OUTPUT_FORMAT(elf32-cris)
 
 MEMORY 
 	{
diff --git a/arch/cris/arch-v10/boot/compressed/head.S b/arch/cris/arch-v10/boot/compressed/head.S
index 610bdb23755..981fbae8495 100644
--- a/arch/cris/arch-v10/boot/compressed/head.S
+++ b/arch/cris/arch-v10/boot/compressed/head.S
@@ -15,77 +15,77 @@
 #define COMMAND_LINE_MAGIC 0x87109563
 
 	;; Exported symbols
-	
-	.globl	_input_data
 
-	
+	.globl	input_data
+
+
 	.text
 
 	nop
 	di
 
 ;; We need to initialze DRAM registers before we start using the DRAM
-	
-	cmp.d	RAM_INIT_MAGIC, r8	; Already initialized?
+
+	cmp.d	RAM_INIT_MAGIC, $r8	; Already initialized?
 	beq	dram_init_finished
 	nop
-	
+
 #include "../../lib/dram_init.S"
-	
-dram_init_finished:	
-		
+
+dram_init_finished:
+
 	;; Initiate the PA and PB ports
 
-	move.b   CONFIG_ETRAX_DEF_R_PORT_PA_DATA, r0
-	move.b   r0, [R_PORT_PA_DATA]
+	move.b   CONFIG_ETRAX_DEF_R_PORT_PA_DATA, $r0
+	move.b   $r0, [R_PORT_PA_DATA]
 
-	move.b   CONFIG_ETRAX_DEF_R_PORT_PA_DIR, r0
-	move.b   r0, [R_PORT_PA_DIR]
+	move.b   CONFIG_ETRAX_DEF_R_PORT_PA_DIR, $r0
+	move.b   $r0, [R_PORT_PA_DIR]
 
-	move.b   CONFIG_ETRAX_DEF_R_PORT_PB_DATA, r0
-	move.b   r0, [R_PORT_PB_DATA]
+	move.b   CONFIG_ETRAX_DEF_R_PORT_PB_DATA, $r0
+	move.b   $r0, [R_PORT_PB_DATA]
 
-	move.b   CONFIG_ETRAX_DEF_R_PORT_PB_DIR, r0
-	move.b   r0, [R_PORT_PB_DIR]
+	move.b   CONFIG_ETRAX_DEF_R_PORT_PB_DIR, $r0
+	move.b   $r0, [R_PORT_PB_DIR]
 
 	;; Setup the stack to a suitably high address.
 	;; We assume 8 MB is the minimum DRAM in an eLinux
 	;; product and put the sp at the top for now.
 
-	move.d	0x40800000, sp
+	move.d	0x40800000, $sp
 
 	;; Figure out where the compressed piggyback image is
 	;; in the flash (since we wont try to copy it to DRAM
 	;; before unpacking). It is at _edata, but in flash.
 	;; Use (_edata - basse) as offset to the current PC.
-	
-basse:	move.d	pc, r5
-	and.d	0x7fffffff, r5	; strip any non-cache bit
-	subq	2, r5		; compensate for the move.d pc instr
-	move.d	r5, r0		; save for later - flash address of 'basse'
-	add.d	_edata, r5
-	sub.d	basse, r5	; r5 = flash address of '_edata'
-	
+
+basse:	move.d	$pc, $r5
+	and.d	0x7fffffff, $r5	; strip any non-cache bit
+	subq	2, $r5		; compensate for the move.d $pc instr
+	move.d	$r5, $r0		; save for later - flash address of 'basse'
+	add.d	_edata, $r5
+	sub.d	basse, $r5	; $r5 = flash address of '_edata'
+
 	;; Copy text+data to DRAM
-	
-	move.d	basse, r1	; destination
-	move.d	_edata, r2	; end destination
-1:	move.w	[r0+], r3
-	move.w	r3, [r1+]
-	cmp.d	r2, r1
+
+	move.d	basse, $r1	; destination
+	move.d	_edata, $r2	; end destination
+1:	move.w	[$r0+], $r3
+	move.w	$r3, [$r1+]
+	cmp.d	$r2, $r1
 	bcs	1b
 	nop
 
-	move.d	r5, [_input_data] ; for the decompressor
+	move.d	$r5, [input_data] ; for the decompressor
 
 
 	;; Clear the decompressors BSS (between _edata and _end)
-	
-	moveq	0, r0
-	move.d	_edata, r1
-	move.d	_end, r2
-1:	move.w	r0, [r1+]
-	cmp.d	r2, r1
+
+	moveq	0, $r0
+	move.d	_edata, $r1
+	move.d	_end, $r2
+1:	move.w	$r0, [$r1+]
+	cmp.d	$r2, $r1
 	bcs	1b
 	nop
 
@@ -94,16 +94,16 @@ basse:	move.d	pc, r5
 	move.d  $r10, [$r12]
 	move.d	_cmd_line_addr, $r12
 	move.d  $r11, [$r12]
-	
-	;; Do the decompression and save compressed size in _inptr
 
-	jsr	_decompress_kernel
-	
-	;; Put start address of root partition in r9 so the kernel can use it
+	;; Do the decompression and save compressed size in inptr
+
+	jsr	decompress_kernel
+
+	;; Put start address of root partition in $r9 so the kernel can use it
 	;; when mounting from flash
 
-	move.d	[_input_data], r9	; flash address of compressed kernel
-	add.d	[_inptr], r9		; size of compressed kernel
+	move.d	[input_data], $r9	; flash address of compressed kernel
+	add.d	[inptr], $r9		; size of compressed kernel
 
 	;; Restore command line magic and address.
 	move.d  _cmd_line_magic, $r10
@@ -112,12 +112,12 @@ basse:	move.d	pc, r5
 	move.d  [$r11], $r11
 
 	;; Enter the decompressed kernel
-	move.d	RAM_INIT_MAGIC, r8	; Tell kernel that DRAM is initialized
+	move.d	RAM_INIT_MAGIC, $r8	; Tell kernel that DRAM is initialized
 	jump	0x40004000	; kernel is linked to this address
-	
+
 	.data
 
-_input_data:
+input_data:
 	.dword	0		; used by the decompressor
 _cmd_line_magic:
 	.dword 0
diff --git a/arch/cris/arch-v10/boot/compressed/misc.c b/arch/cris/arch-v10/boot/compressed/misc.c
index 9a43ab19391..59961f20fab 100644
--- a/arch/cris/arch-v10/boot/compressed/misc.c
+++ b/arch/cris/arch-v10/boot/compressed/misc.c
@@ -30,8 +30,7 @@
 #define STATIC static
 
 void* memset(void* s, int c, size_t n);
-void* memcpy(void* __dest, __const void* __src,
-	     size_t __n);
+void* memcpy(void* __dest, __const void* __src, size_t __n);
 
 #define memzero(s, n)     memset ((s), 0, (n))
 
@@ -81,11 +80,8 @@ static unsigned outcnt = 0;  /* bytes in output buffer */
 #  define Tracecv(c,x)
 #endif
 
-static int  fill_inbuf(void);
 static void flush_window(void);
 static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
 
 extern char *input_data;  /* lives in head.S */
 
@@ -95,7 +91,6 @@ static unsigned long output_ptr = 0;
  
 static void *malloc(int size);
 static void free(void *where);
-static void error(char *m);
 static void gzip_mark(void **);
 static void gzip_release(void **);
  
@@ -103,8 +98,8 @@ static void puts(const char *);
 
 /* the "heap" is put directly after the BSS ends, at end */
   
-extern int end;
-static long free_mem_ptr = (long)&end;
+extern int _end;
+static long free_mem_ptr = (long)&_end;
  
 #include "../../../../../lib/inflate.c"
 
@@ -170,6 +165,8 @@ memset(void* s, int c, size_t n)
 	char *ss = (char*)s;
 
 	for (i=0;i<n;i++) ss[i] = c;
+
+   return s;
 }
 
 void*
@@ -180,6 +177,8 @@ memcpy(void* __dest, __const void* __src,
 	char *d = (char *)__dest, *s = (char *)__src;
 
 	for (i=0;i<__n;i++) d[i] = s[i];
+
+   return __dest;
 }
 
 /* ===========================================================================
@@ -216,14 +215,12 @@ error(char *x)
 	while(1);	/* Halt */
 }
 
-void
-setup_normal_output_buffer()
+void setup_normal_output_buffer(void)
 {
 	output_data = (char *)KERNEL_LOAD_ADR;
 }
 
-void
-decompress_kernel()
+void decompress_kernel(void)
 {
 	char revision;
 	
@@ -257,7 +254,7 @@ decompress_kernel()
 
 	makecrc();
 
-	__asm__ volatile ("move vr,%0" : "=rm" (revision));
+	__asm__ volatile ("move $vr,%0" : "=rm" (revision));
 	if (revision < 10)
 	{
 		puts("You need an ETRAX 100LX to run linux 2.6\n");
diff --git a/arch/cris/arch-v10/boot/rescue/Makefile b/arch/cris/arch-v10/boot/rescue/Makefile
index 2e5045b9e19..bea8b9c2a7c 100644
--- a/arch/cris/arch-v10/boot/rescue/Makefile
+++ b/arch/cris/arch-v10/boot/rescue/Makefile
@@ -2,12 +2,9 @@
 # Makefile for rescue (bootstrap) code
 #
 
-CC = gcc-cris -mlinux $(LINUXINCLUDE)
-ccflags-y += -O2
-asflags-y += -traditional
-LD = gcc-cris -mlinux -nostdlib
+ccflags-y += -O2 $(LINUXINCLUDE)
+asflags-y += $(LINUXINCLUDE)
 ldflags-y += -T $(obj)/rescue.ld
-OBJCOPY = objcopy-cris
 OBJCOPYFLAGS = -O binary --remove-section=.bss
 obj-$(CONFIG_ETRAX_AXISFLASHMAP) = head.o
 OBJECT := $(obj)/head.o
diff --git a/arch/cris/arch-v32/boot/Makefile b/arch/cris/arch-v32/boot/Makefile
index 3f91349c5f1..99896ad60b3 100644
--- a/arch/cris/arch-v32/boot/Makefile
+++ b/arch/cris/arch-v32/boot/Makefile
@@ -2,7 +2,6 @@
 # arch/cris/arch-v32/boot/Makefile
 #
 
-OBJCOPY = objcopy-cris
 OBJCOPYFLAGS = -O binary -R .note -R .comment
 
 subdir- := compressed rescue
diff --git a/arch/cris/arch-v32/boot/compressed/Makefile b/arch/cris/arch-v32/boot/compressed/Makefile
index 2c8c2c3039c..9138938eec3 100644
--- a/arch/cris/arch-v32/boot/compressed/Makefile
+++ b/arch/cris/arch-v32/boot/compressed/Makefile
@@ -2,14 +2,10 @@
 # arch/cris/arch-v32/boot/compressed/Makefile
 #
 
-CC = gcc-cris -mlinux -march=v32 $(LINUXINCLUDE)
 asflags-y += -I $(srctree)/include/asm/mach/ -I $(srctree)/include/asm/arch
 ccflags-y += -O2 -I $(srctree)/include/asm/mach/ -I $(srctree)/include/asm/arch
-LD = gcc-cris -mlinux -march=v32 -nostdlib
 ldflags-y += -T $(obj)/decompress.ld
-obj-y = head.o misc.o
 OBJECTS = $(obj)/head.o $(obj)/misc.o
-OBJCOPY = objcopy-cris
 OBJCOPYFLAGS = -O binary --remove-section=.bss
 
 quiet_cmd_image = BUILD   $@
diff --git a/arch/cris/arch-v32/boot/rescue/Makefile b/arch/cris/arch-v32/boot/rescue/Makefile
index c0987795dcb..b548bde185d 100644
--- a/arch/cris/arch-v32/boot/rescue/Makefile
+++ b/arch/cris/arch-v32/boot/rescue/Makefile
@@ -9,7 +9,6 @@ asflags-y += -I $(srctree)/include/asm/arch/mach/ -I $(srctree)/include/asm/arch
 LD = gcc-cris -mlinux -march=v32 -nostdlib
 ldflags-y += -T $(obj)/rescue.ld
 LDPOSTFLAGS = -lgcc
-OBJCOPY = objcopy-cris
 OBJCOPYFLAGS = -O binary --remove-section=.bss
 obj-$(CONFIG_ETRAX_AXISFLASHMAP) = head.o
 OBJECT := $(obj)/head.o
-- 
cgit v1.2.3


From 5e5a29bf2624a5984e1c36c3a2481ee91249ec9c Mon Sep 17 00:00:00 2001
From: Anders H Kaseorg <andersk@MIT.EDU>
Date: Sat, 28 Jun 2008 18:25:41 -0400
Subject: x86, 64-bit: patch paravirt inline replacements when loading modules

small speedup.

Paravirt replacements were added to the i386 module loader by commit
139ec7c416248b9ea227d21839235344edfee1e0.  This adds the same code to
the x86_64 module loader.

Signed-off-by: Anders Kaseorg <andersk@mit.edu>
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/module_64.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index a888e67f587..0e867676b5a 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -150,7 +150,8 @@ int module_finalize(const Elf_Ehdr *hdr,
                     const Elf_Shdr *sechdrs,
                     struct module *me)
 {
-	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
+	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+		*para = NULL;
 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
 	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -160,6 +161,8 @@ int module_finalize(const Elf_Ehdr *hdr,
 			alt = s;
 		if (!strcmp(".smp_locks", secstrings + s->sh_name))
 			locks= s;
+		if (!strcmp(".parainstructions", secstrings + s->sh_name))
+			para = s;
 	}
 
 	if (alt) {
@@ -175,6 +178,11 @@ int module_finalize(const Elf_Ehdr *hdr,
 					    tseg, tseg + text->sh_size);
 	}
 
+	if (para) {
+		void *pseg = (void *)para->sh_addr;
+		apply_paravirt(pseg, pseg + para->sh_size);
+	}
+
 	return module_bug_finalize(hdr, sechdrs, me);
 }
 
-- 
cgit v1.2.3


From 7dd071058f70d517f2d24e80cbb9d9885d565f0a Mon Sep 17 00:00:00 2001
From: Jesper Nilsson <jesper@jni.nu>
Date: Mon, 30 Jun 2008 20:38:06 +0200
Subject: [CRIS] Correct image makefiles to allow using a separate
 OBJ-directory.

Make compile succeed when building with O= (srctree != objtree).

Signed-off-by: Hinko Kocevar <hinko.kocevar@cetrtapot.si>
Signed-off-by: Jesper Nilsson <jesper.nilsson@axis.com>
---
 arch/cris/arch-v10/boot/compressed/Makefile | 2 +-
 arch/cris/arch-v10/boot/rescue/Makefile     | 2 +-
 arch/cris/arch-v32/boot/compressed/Makefile | 2 +-
 arch/cris/arch-v32/boot/rescue/Makefile     | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/cris/arch-v10/boot/compressed/Makefile b/arch/cris/arch-v10/boot/compressed/Makefile
index 9ec5f87d515..08d943ce4be 100644
--- a/arch/cris/arch-v10/boot/compressed/Makefile
+++ b/arch/cris/arch-v10/boot/compressed/Makefile
@@ -4,7 +4,7 @@
 
 asflags-y += $(LINUXINCLUDE)
 ccflags-y += -O2 $(LINUXINCLUDE)
-ldflags-y += -T $(obj)/decompress.ld
+ldflags-y += -T $(srctree)/$(obj)/decompress.ld
 OBJECTS = $(obj)/head.o $(obj)/misc.o
 OBJCOPYFLAGS = -O binary --remove-section=.bss
 
diff --git a/arch/cris/arch-v10/boot/rescue/Makefile b/arch/cris/arch-v10/boot/rescue/Makefile
index bea8b9c2a7c..07688da9270 100644
--- a/arch/cris/arch-v10/boot/rescue/Makefile
+++ b/arch/cris/arch-v10/boot/rescue/Makefile
@@ -4,7 +4,7 @@
 
 ccflags-y += -O2 $(LINUXINCLUDE)
 asflags-y += $(LINUXINCLUDE)
-ldflags-y += -T $(obj)/rescue.ld
+ldflags-y += -T $(srctree)/$(obj)/rescue.ld
 OBJCOPYFLAGS = -O binary --remove-section=.bss
 obj-$(CONFIG_ETRAX_AXISFLASHMAP) = head.o
 OBJECT := $(obj)/head.o
diff --git a/arch/cris/arch-v32/boot/compressed/Makefile b/arch/cris/arch-v32/boot/compressed/Makefile
index 9138938eec3..d6335f26083 100644
--- a/arch/cris/arch-v32/boot/compressed/Makefile
+++ b/arch/cris/arch-v32/boot/compressed/Makefile
@@ -4,7 +4,7 @@
 
 asflags-y += -I $(srctree)/include/asm/mach/ -I $(srctree)/include/asm/arch
 ccflags-y += -O2 -I $(srctree)/include/asm/mach/ -I $(srctree)/include/asm/arch
-ldflags-y += -T $(obj)/decompress.ld
+ldflags-y += -T $(srctree)/$(obj)/decompress.ld
 OBJECTS = $(obj)/head.o $(obj)/misc.o
 OBJCOPYFLAGS = -O binary --remove-section=.bss
 
diff --git a/arch/cris/arch-v32/boot/rescue/Makefile b/arch/cris/arch-v32/boot/rescue/Makefile
index b548bde185d..44ae0ad61f9 100644
--- a/arch/cris/arch-v32/boot/rescue/Makefile
+++ b/arch/cris/arch-v32/boot/rescue/Makefile
@@ -7,7 +7,7 @@ ccflags-y += -O2 -I $(srctree)/include/asm/arch/mach/ \
 		-I $(srctree)/include/asm/arch
 asflags-y += -I $(srctree)/include/asm/arch/mach/ -I $(srctree)/include/asm/arch
 LD = gcc-cris -mlinux -march=v32 -nostdlib
-ldflags-y += -T $(obj)/rescue.ld
+ldflags-y += -T $(srctree)/$(obj)/rescue.ld
 LDPOSTFLAGS = -lgcc
 OBJCOPYFLAGS = -O binary --remove-section=.bss
 obj-$(CONFIG_ETRAX_AXISFLASHMAP) = head.o
-- 
cgit v1.2.3


From bd451d5ed206cda4ed0e03fac4e5dece2fd7767f Mon Sep 17 00:00:00 2001
From: Jesper Nilsson <jesper@jni.nu>
Date: Mon, 30 Jun 2008 23:22:51 +0200
Subject: [CRISv10] Correct whitespace damage.

The previous patch was whitespace damaged, correct to indent
using tabs.

Signed-off-by: Jesper Nilsson <jesper.nilsson@axis.com>
---
 arch/cris/arch-v10/kernel/debugport.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/cris/arch-v10/kernel/debugport.c b/arch/cris/arch-v10/kernel/debugport.c
index 162730eb887..3dc6e91ba39 100644
--- a/arch/cris/arch-v10/kernel/debugport.c
+++ b/arch/cris/arch-v10/kernel/debugport.c
@@ -432,10 +432,10 @@ static int dummy_write_room(struct tty_struct *tty)
 }
 
 static const struct tty_operations dummy_ops = {
-        .open = dummy_open,
-        .close = dummy_close,
-        .write = dummy_write,
-        .write_room = dummy_write_room,
+	.open = dummy_open,
+	.close = dummy_close,
+	.write = dummy_write,
+	.write_room = dummy_write_room,
 };
 
 void __init
-- 
cgit v1.2.3


From f3c4b53d5ec6bd2ae0f284c1e6371bff545f0f80 Mon Sep 17 00:00:00 2001
From: Jesper Nilsson <jesper@jni.nu>
Date: Mon, 30 Jun 2008 21:20:23 +0200
Subject: [CRISv10] Clean up compressed/misc.c

Many minor fixes in whitespace and formatting.

Signed-off-by: Jesper Nilsson <jesper.nilsson@axis.com>
---
 arch/cris/arch-v10/boot/compressed/misc.c | 127 ++++++++++++++++--------------
 1 file changed, 70 insertions(+), 57 deletions(-)

(limited to 'arch')

diff --git a/arch/cris/arch-v10/boot/compressed/misc.c b/arch/cris/arch-v10/boot/compressed/misc.c
index 59961f20fab..18e13bce140 100644
--- a/arch/cris/arch-v10/boot/compressed/misc.c
+++ b/arch/cris/arch-v10/boot/compressed/misc.c
@@ -29,11 +29,10 @@
 #define OF(args)  args
 #define STATIC static
 
-void* memset(void* s, int c, size_t n);
-void* memcpy(void* __dest, __const void* __src, size_t __n);
-
-#define memzero(s, n)     memset ((s), 0, (n))
+void *memset(void *s, int c, size_t n);
+void *memcpy(void *__dest, __const void *__src, size_t __n);
 
+#define memzero(s, n)     memset((s), 0, (n))
 
 typedef unsigned char  uch;
 typedef unsigned short ush;
@@ -61,23 +60,38 @@ static unsigned outcnt = 0;  /* bytes in output buffer */
 #define ENCRYPTED    0x20 /* bit 5 set: file is encrypted */
 #define RESERVED     0xC0 /* bit 6,7:   reserved */
 
-#define get_byte() inbuf[inptr++]	
-	
+#define get_byte() (inbuf[inptr++])
+
 /* Diagnostic functions */
 #ifdef DEBUG
-#  define Assert(cond,msg) {if(!(cond)) error(msg);}
+#  define Assert(cond, msg) do { \
+		if (!(cond)) \
+			error(msg); \
+	} while (0)
 #  define Trace(x) fprintf x
-#  define Tracev(x) {if (verbose) fprintf x ;}
-#  define Tracevv(x) {if (verbose>1) fprintf x ;}
-#  define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
-#  define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
+#  define Tracev(x) do { \
+		if (verbose) \
+			fprintf x; \
+	} while (0)
+#  define Tracevv(x) do { \
+		if (verbose > 1) \
+			fprintf x; \
+	} while (0)
+#  define Tracec(c, x) do { \
+		if (verbose && (c)) \
+			fprintf x; \
+	} while (0)
+#  define Tracecv(c, x) do { \
+		if (verbose > 1 && (c)) \
+			fprintf x; \
+	} while (0)
 #else
-#  define Assert(cond,msg)
+#  define Assert(cond, msg)
 #  define Trace(x)
 #  define Tracev(x)
 #  define Tracevv(x)
-#  define Tracec(c,x)
-#  define Tracecv(c,x)
+#  define Tracec(c, x)
+#  define Tracecv(c, x)
 #endif
 
 static void flush_window(void);
@@ -88,26 +102,27 @@ extern char *input_data;  /* lives in head.S */
 static long bytes_out = 0;
 static uch *output_data;
 static unsigned long output_ptr = 0;
- 
+
 static void *malloc(int size);
 static void free(void *where);
 static void gzip_mark(void **);
 static void gzip_release(void **);
- 
+
 static void puts(const char *);
 
 /* the "heap" is put directly after the BSS ends, at end */
-  
+
 extern int _end;
 static long free_mem_ptr = (long)&_end;
- 
+
 #include "../../../../../lib/inflate.c"
 
 static void *malloc(int size)
 {
 	void *p;
 
-	if (size <0) error("Malloc error");
+	if (size < 0)
+		error("Malloc error");
 
 	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
 
@@ -137,48 +152,47 @@ static void
 puts(const char *s)
 {
 #ifndef CONFIG_ETRAX_DEBUG_PORT_NULL
-	while(*s) {
+	while (*s) {
 #ifdef CONFIG_ETRAX_DEBUG_PORT0
-		while(!(*R_SERIAL0_STATUS & (1 << 5))) ;
+		while (!(*R_SERIAL0_STATUS & (1 << 5))) ;
 		*R_SERIAL0_TR_DATA = *s++;
 #endif
 #ifdef CONFIG_ETRAX_DEBUG_PORT1
-		while(!(*R_SERIAL1_STATUS & (1 << 5))) ;
+		while (!(*R_SERIAL1_STATUS & (1 << 5))) ;
 		*R_SERIAL1_TR_DATA = *s++;
 #endif
 #ifdef CONFIG_ETRAX_DEBUG_PORT2
-		while(!(*R_SERIAL2_STATUS & (1 << 5))) ;
+		while (!(*R_SERIAL2_STATUS & (1 << 5))) ;
 		*R_SERIAL2_TR_DATA = *s++;
 #endif
 #ifdef CONFIG_ETRAX_DEBUG_PORT3
-		while(!(*R_SERIAL3_STATUS & (1 << 5))) ;
+		while (!(*R_SERIAL3_STATUS & (1 << 5))) ;
 		*R_SERIAL3_TR_DATA = *s++;
 #endif
 	}
 #endif
 }
 
-void*
-memset(void* s, int c, size_t n)
+void *memset(void *s, int c, size_t n)
 {
 	int i;
-	char *ss = (char*)s;
+	char *ss = (char *)s;
 
-	for (i=0;i<n;i++) ss[i] = c;
+	for (i = 0; i < n; i++)
+		ss[i] = c;
 
-   return s;
+	return s;
 }
 
-void*
-memcpy(void* __dest, __const void* __src,
-			    size_t __n)
+void *memcpy(void *__dest, __const void *__src, size_t __n)
 {
 	int i;
 	char *d = (char *)__dest, *s = (char *)__src;
 
-	for (i=0;i<__n;i++) d[i] = s[i];
+	for (i = 0; i < __n; i++)
+		d[i] = s[i];
 
-   return __dest;
+	return __dest;
 }
 
 /* ===========================================================================
@@ -186,33 +200,33 @@ memcpy(void* __dest, __const void* __src,
  * (Used for the decompressed data only.)
  */
 
-static void
-flush_window()
+static void flush_window(void)
 {
-    ulg c = crc;         /* temporary variable */
-    unsigned n;
-    uch *in, *out, ch;
-    
-    in = window;
-    out = &output_data[output_ptr]; 
-    for (n = 0; n < outcnt; n++) {
-	    ch = *out++ = *in++;
-	    c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
-    }
-    crc = c;
-    bytes_out += (ulg)outcnt;
-    output_ptr += (ulg)outcnt;
-    outcnt = 0;
+	ulg c = crc;         /* temporary variable */
+	unsigned n;
+	uch *in, *out, ch;
+
+	in = window;
+	out = &output_data[output_ptr];
+	for (n = 0; n < outcnt; n++) {
+		ch = *out = *in;
+		out++;
+		in++;
+		c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+	}
+	crc = c;
+	bytes_out += (ulg)outcnt;
+	output_ptr += (ulg)outcnt;
+	outcnt = 0;
 }
 
-static void
-error(char *x)
+static void error(char *x)
 {
 	puts("\n\n");
 	puts(x);
 	puts("\n\n -- System halted\n");
 
-	while(1);	/* Halt */
+	while (1);	/* Halt */
 }
 
 void setup_normal_output_buffer(void)
@@ -223,7 +237,7 @@ void setup_normal_output_buffer(void)
 void decompress_kernel(void)
 {
 	char revision;
-	
+
 	/* input_data is set in head.S */
 	inbuf = input_data;
 
@@ -255,10 +269,9 @@ void decompress_kernel(void)
 	makecrc();
 
 	__asm__ volatile ("move $vr,%0" : "=rm" (revision));
-	if (revision < 10)
-	{
+	if (revision < 10) {
 		puts("You need an ETRAX 100LX to run linux 2.6\n");
-		while(1);
+		while (1);
 	}
 
 	puts("Uncompressing Linux...\n");
-- 
cgit v1.2.3


From 299a140dacaa514be5e567b5851c187c42ec38c4 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 8 Jul 2008 14:47:16 +0200
Subject: x86, AMD IOMMU: ignore detection of GART IOMMU

One of the last IOMMU updates covered a bug in the AMD IOMMU code. The early
detection code does not succeed if the GART is already detected. This patch
fixes this.

Cc: Robert Richter <robert.richter@amd.com>
Cc: Bhavna Sarathy <Bhavna.Sarathy@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: Joerg Roedel <joerg.roedel@amd.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Bhavna Sarathy <Bhavna.Sarathy@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 2a13e430437..bb0280077a3 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -828,7 +828,7 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table)
 
 void __init amd_iommu_detect(void)
 {
-	if (swiotlb || no_iommu || iommu_detected)
+	if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture))
 		return;
 
 	if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
-- 
cgit v1.2.3


From ab6bc3e343fbe3be4a0f67225e849d0db6b4b7ac Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sat, 5 Jul 2008 15:53:36 +0400
Subject: x86: idle process - add checking for NULL early param

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: akpm@linux-foundation.org
Cc: andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ba370dc8685..58325a6604a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -164,6 +164,9 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 
 static int __init idle_setup(char *str)
 {
+	if (!str)
+		return -EINVAL;
+
 	if (!strcmp(str, "poll")) {
 		printk("using polling idle threads.\n");
 		pm_idle = poll_idle;
-- 
cgit v1.2.3


From d6cd7effcc5e0047faf15ab0a54c980f1a616a07 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sat, 5 Jul 2008 15:53:37 +0400
Subject: x86: io delay - add checking for NULL early param

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: akpm@linux-foundation.org
Cc: andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_delay.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 5921e5f0a64..1c3a66a67f8 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -103,6 +103,9 @@ void __init io_delay_init(void)
 
 static int __init io_delay_param(char *s)
 {
+	if (!s)
+		return -EINVAL;
+
 	if (!strcmp(s, "0x80"))
 		io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
 	else if (!strcmp(s, "0xed"))
-- 
cgit v1.2.3


From 4d8cc874d7ed43eda72765e9c0e141e170fee4f3 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sat, 5 Jul 2008 15:53:38 +0400
Subject: x86: smpboot maxcpus - add checking for NULL early param

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: akpm@linux-foundation.org
Cc: andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/smpboot.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fd933b5465b..e47bfac70c3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1452,7 +1452,8 @@ static int __init parse_maxcpus(char *arg)
 {
 	extern unsigned int maxcpus;
 
-	maxcpus = simple_strtoul(arg, NULL, 0);
+	if (arg)
+		maxcpus = simple_strtoul(arg, NULL, 0);
 	return 0;
 }
 early_param("maxcpus", parse_maxcpus);
-- 
cgit v1.2.3


From 46a7fa270afbe5fddc6042a598cfe22977b0e989 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 11 Jul 2008 10:23:42 +0900
Subject: x86: make only GART code include gart.h

gart.h has only GART-specific stuff. Only GART code needs it. Other
IOMMU stuff should include iommu.h instead of gart.h.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu.c      | 2 +-
 arch/x86/kernel/amd_iommu_init.c | 2 +-
 arch/x86/kernel/aperture_64.c    | 1 +
 arch/x86/kernel/early-quirks.c   | 5 +----
 arch/x86/kernel/pci-calgary_64.c | 2 +-
 arch/x86/kernel/pci-dma.c        | 2 +-
 arch/x86/kernel/pci-gart_64.c    | 1 +
 arch/x86/kernel/pci-nommu.c      | 2 +-
 arch/x86/kernel/pci-swiotlb_64.c | 2 +-
 arch/x86/kernel/setup.c          | 2 +-
 10 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f2766d84c7a..cf2f74bcde5 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -23,7 +23,7 @@
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
 #include <asm/proto.h>
-#include <asm/gart.h>
+#include <asm/iommu.h>
 #include <asm/amd_iommu_types.h>
 #include <asm/amd_iommu.h>
 
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 2a13e430437..66438284c69 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -25,7 +25,7 @@
 #include <asm/pci-direct.h>
 #include <asm/amd_iommu_types.h>
 #include <asm/amd_iommu.h>
-#include <asm/gart.h>
+#include <asm/iommu.h>
 
 /*
  * definitions for the ACPI scanning code
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 9f907806c1a..44e21826db1 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -21,6 +21,7 @@
 #include <linux/suspend.h>
 #include <asm/e820.h>
 #include <asm/io.h>
+#include <asm/iommu.h>
 #include <asm/gart.h>
 #include <asm/pci-direct.h>
 #include <asm/dma.h>
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index a4665f37cfc..510b8e36773 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -16,10 +16,7 @@
 #include <asm/dma.h>
 #include <asm/io_apic.h>
 #include <asm/apic.h>
-
-#ifdef CONFIG_GART_IOMMU
-#include <asm/gart.h>
-#endif
+#include <asm/iommu.h>
 
 static void __init fix_hypertransport_config(int num, int slot, int func)
 {
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 6959b5c45df..151f2d171f7 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -36,7 +36,7 @@
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
-#include <asm/gart.h>
+#include <asm/iommu.h>
 #include <asm/calgary.h>
 #include <asm/tce.h>
 #include <asm/pci-direct.h>
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 8467ec2320f..f581a4b63b4 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -5,7 +5,7 @@
 
 #include <asm/proto.h>
 #include <asm/dma.h>
-#include <asm/gart.h>
+#include <asm/iommu.h>
 #include <asm/calgary.h>
 #include <asm/amd_iommu.h>
 
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index d0d18db5d2a..949ca985deb 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -32,6 +32,7 @@
 #include <asm/mtrr.h>
 #include <asm/pgtable.h>
 #include <asm/proto.h>
+#include <asm/iommu.h>
 #include <asm/gart.h>
 #include <asm/cacheflush.h>
 #include <asm/swiotlb.h>
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index aec43d56f49..792b9179eff 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -7,7 +7,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/scatterlist.h>
 
-#include <asm/gart.h>
+#include <asm/iommu.h>
 #include <asm/processor.h>
 #include <asm/dma.h>
 
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 82299cd1d04..20df839b9c2 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -5,7 +5,7 @@
 #include <linux/module.h>
 #include <linux/dma-mapping.h>
 
-#include <asm/gart.h>
+#include <asm/iommu.h>
 #include <asm/swiotlb.h>
 #include <asm/dma.h>
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 86fc2d62427..e5d208934bf 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -96,7 +96,7 @@
 #include <asm/smp.h>
 #include <asm/desc.h>
 #include <asm/dma.h>
-#include <asm/gart.h>
+#include <asm/iommu.h>
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 
-- 
cgit v1.2.3


From ac7ded2adb2e43152fe7385ddd53bf45f5c92285 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 11 Jul 2008 10:23:43 +0900
Subject: x86: remove ifdef CONFIG_GART_IOMMU in pci-dma.c

Our way to handle gart_* functions for CONFIG_GART_IOMMU and
!CONFIG_GART_IOMMU cases is inconsistent.

We have some dummy gart_* functions in !CONFIG_GART_IOMMU case and
also use ifdef CONFIG_GART_IOMMU tricks in pci-dma.c to call some
gart_* functions in only CONFIG_GART_IOMMU case.

This patch removes ifdef CONFIG_GART_IOMMU in pci-dma.c and always use
dummy gart_* functions in iommu.h.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-dma.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index f581a4b63b4..dd57c5bbe2d 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -114,9 +114,7 @@ void __init pci_iommu_alloc(void)
 	 * The order of these functions is important for
 	 * fall-back/fail-over reasons
 	 */
-#ifdef CONFIG_GART_IOMMU
 	gart_iommu_hole_init();
-#endif
 
 #ifdef CONFIG_CALGARY_IOMMU
 	detect_calgary();
@@ -184,9 +182,7 @@ static __init int iommu_setup(char *p)
 			swiotlb = 1;
 #endif
 
-#ifdef CONFIG_GART_IOMMU
 		gart_parse_options(p);
-#endif
 
 #ifdef CONFIG_CALGARY_IOMMU
 		if (!strncmp(p, "calgary", 7))
@@ -508,9 +504,7 @@ static int __init pci_iommu_init(void)
 
 	amd_iommu_init();
 
-#ifdef CONFIG_GART_IOMMU
 	gart_iommu_init();
-#endif
 
 	no_iommu_init();
 	return 0;
-- 
cgit v1.2.3


From b8b48326f312026af12799917383c54c25d05482 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 11 Jul 2008 10:23:44 +0900
Subject: x86: remove ifdef CONFIG_CALGARY_IOMMU in pci-dma.c

asm-x86/calgary.h has dummy calgary_iommu_init() and detect_calgary()
in !CONFIG_CALGARY_IOMMU case. So we don't need ifdef
CONFIG_CALGARY_IOMMU in pci-dma.c.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
Cc: Alexis Bruemmer <alexisb@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-dma.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index dd57c5bbe2d..f16cbbe424a 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -116,9 +116,7 @@ void __init pci_iommu_alloc(void)
 	 */
 	gart_iommu_hole_init();
 
-#ifdef CONFIG_CALGARY_IOMMU
 	detect_calgary();
-#endif
 
 	detect_intel_iommu();
 
@@ -496,9 +494,7 @@ EXPORT_SYMBOL(dma_free_coherent);
 
 static int __init pci_iommu_init(void)
 {
-#ifdef CONFIG_CALGARY_IOMMU
 	calgary_iommu_init();
-#endif
 
 	intel_iommu_init();
 
-- 
cgit v1.2.3


From be54f9d1c8df93c4998e134a306652caaa58f67f Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 11 Jul 2008 10:23:45 +0900
Subject: x86: remove ifdef CONFIG_SWIOTLB in pci-dma.c

As other IOMMUs do, this puts dummy pci_swiotlb_init() in swiotlb.h
and remove ifdef CONFIG_SWIOTLB in pci-dma.c.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-dma.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index f16cbbe424a..d12945de056 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -122,9 +122,7 @@ void __init pci_iommu_alloc(void)
 
 	amd_iommu_detect();
 
-#ifdef CONFIG_SWIOTLB
 	pci_swiotlb_init();
-#endif
 }
 #endif
 
-- 
cgit v1.2.3


From b65233a9c1da587bf19ee161982f4f0ec59941c0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:21 +0200
Subject: x86, AMD IOMMU: add comments to the initialization code

This patch adds some comments to the AMD IOMMU initialization code to increase
its readability.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu_init.c | 214 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 206 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index bb0280077a3..9ddb46d7c52 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -71,6 +71,17 @@
 #define ACPI_DEVFLAG_LINT1              0x80
 #define ACPI_DEVFLAG_ATSDIS             0x10000000
 
+/*
+ * ACPI table definitions
+ *
+ * These data structures are laid over the table to parse the important values
+ * out of it.
+ */
+
+/*
+ * structure describing one IOMMU in the ACPI table. Typically followed by one
+ * or more ivhd_entrys.
+ */
 struct ivhd_header {
 	u8 type;
 	u8 flags;
@@ -83,6 +94,10 @@ struct ivhd_header {
 	u32 reserved;
 } __attribute__((packed));
 
+/*
+ * A device entry describing which devices a specific IOMMU translates and
+ * which requestor ids they use.
+ */
 struct ivhd_entry {
 	u8 type;
 	u16 devid;
@@ -90,6 +105,10 @@ struct ivhd_entry {
 	u32 ext;
 } __attribute__((packed));
 
+/*
+ * An AMD IOMMU memory definition structure. It defines things like exclusion
+ * ranges for devices and regions that should be unity mapped.
+ */
 struct ivmd_header {
 	u8 type;
 	u8 flags;
@@ -103,22 +122,66 @@ struct ivmd_header {
 
 static int __initdata amd_iommu_detected;
 
-u16 amd_iommu_last_bdf;
-struct list_head amd_iommu_unity_map;
-unsigned amd_iommu_aperture_order = 26;
-int amd_iommu_isolate;
+u16 amd_iommu_last_bdf;			/* largest PCI device id we have
+					   to handle */
+struct list_head amd_iommu_unity_map;	/* a list of required unity mappings
+					   we find in ACPI */
+unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
+int amd_iommu_isolate;			/* if 1, device isolation is enabled */
 
-struct list_head amd_iommu_list;
+struct list_head amd_iommu_list;	/* list of all AMD IOMMUs in the
+					   system */
+
+/*
+ * Pointer to the device table which is shared by all AMD IOMMUs
+ * it is indexed by the PCI device id or the HT unit id and contains
+ * information about the domain the device belongs to as well as the
+ * page table root pointer.
+ */
 struct dev_table_entry *amd_iommu_dev_table;
+
+/*
+ * The alias table is a driver specific data structure which contains the
+ * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
+ * More than one device can share the same requestor id.
+ */
 u16 *amd_iommu_alias_table;
+
+/*
+ * The rlookup table is used to find the IOMMU which is responsible
+ * for a specific device. It is also indexed by the PCI device id.
+ */
 struct amd_iommu **amd_iommu_rlookup_table;
+
+/*
+ * The pd table (protection domain table) is used to find the protection domain
+ * data structure a device belongs to. Indexed with the PCI device id too.
+ */
 struct protection_domain **amd_iommu_pd_table;
+
+/*
+ * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
+ * to know which ones are already in use.
+ */
 unsigned long *amd_iommu_pd_alloc_bitmap;
 
-static u32 dev_table_size;
-static u32 alias_table_size;
-static u32 rlookup_table_size;
+static u32 dev_table_size;	/* size of the device table */
+static u32 alias_table_size;	/* size of the alias table */
+static u32 rlookup_table_size;	/* size if the rlookup table */
+
+/****************************************************************************
+ *
+ * AMD IOMMU MMIO register space handling functions
+ *
+ * These functions are used to program the IOMMU device registers in
+ * MMIO space required for that driver.
+ *
+ ****************************************************************************/
 
+/*
+ * This function set the exclusion range in the IOMMU. DMA accesses to the
+ * exclusion range are passed through untranslated
+ */
 static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
 {
 	u64 start = iommu->exclusion_start & PAGE_MASK;
@@ -137,6 +200,7 @@ static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
 			&entry, sizeof(entry));
 }
 
+/* Programs the physical address of the device table into the IOMMU hardware */
 static void __init iommu_set_device_table(struct amd_iommu *iommu)
 {
 	u32 entry;
@@ -149,6 +213,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu)
 			&entry, sizeof(entry));
 }
 
+/* Generic functions to enable/disable certain features of the IOMMU. */
 static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
 {
 	u32 ctrl;
@@ -167,6 +232,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 	writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 }
 
+/* Function to enable the hardware */
 void __init iommu_enable(struct amd_iommu *iommu)
 {
 	printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at ");
@@ -176,6 +242,10 @@ void __init iommu_enable(struct amd_iommu *iommu)
 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 }
 
+/*
+ * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
+ * the system has one.
+ */
 static u8 * __init iommu_map_mmio_space(u64 address)
 {
 	u8 *ret;
@@ -199,6 +269,19 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
 	release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
 }
 
+/****************************************************************************
+ *
+ * The functions below belong to the first pass of AMD IOMMU ACPI table
+ * parsing. In this pass we try to find out the highest device id this
+ * code has to handle. Upon this information the size of the shared data
+ * structures is determined later.
+ *
+ ****************************************************************************/
+
+/*
+ * This function reads the last device id the IOMMU has to handle from the PCI
+ * capability header for this IOMMU
+ */
 static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
 {
 	u32 cap;
@@ -209,6 +292,10 @@ static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
 	return 0;
 }
 
+/*
+ * After reading the highest device id from the IOMMU PCI capability header
+ * this function looks if there is a higher device id defined in the ACPI table
+ */
 static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
 {
 	u8 *p = (void *)h, *end = (void *)h;
@@ -229,6 +316,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
 		case IVHD_DEV_RANGE_END:
 		case IVHD_DEV_ALIAS:
 		case IVHD_DEV_EXT_SELECT:
+			/* all the above subfield types refer to device ids */
 			UPDATE_LAST_BDF(dev->devid);
 			break;
 		default:
@@ -242,6 +330,11 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
 	return 0;
 }
 
+/*
+ * Iterate over all IVHD entries in the ACPI table and find the highest device
+ * id which we need to handle. This is the first of three functions which parse
+ * the ACPI table. So we check the checksum here.
+ */
 static int __init find_last_devid_acpi(struct acpi_table_header *table)
 {
 	int i;
@@ -277,6 +370,20 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
 	return 0;
 }
 
+/****************************************************************************
+ *
+ * The following functions belong the the code path which parses the ACPI table
+ * the second time. In this ACPI parsing iteration we allocate IOMMU specific
+ * data structures, initialize the device/alias/rlookup table and also
+ * basically initialize the hardware.
+ *
+ ****************************************************************************/
+
+/*
+ * Allocates the command buffer. This buffer is per AMD IOMMU. We can
+ * write commands to that buffer later and the IOMMU will execute them
+ * asynchronously
+ */
 static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
 {
 	u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL,
@@ -307,6 +414,7 @@ static void __init free_command_buffer(struct amd_iommu *iommu)
 				get_order(CMD_BUFFER_SIZE));
 }
 
+/* sets a specific bit in the device table entry. */
 static void set_dev_entry_bit(u16 devid, u8 bit)
 {
 	int i = (bit >> 5) & 0x07;
@@ -315,6 +423,10 @@ static void set_dev_entry_bit(u16 devid, u8 bit)
 	amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
 }
 
+/*
+ * This function takes the device specific flags read from the ACPI
+ * table and sets up the device table entry with that information
+ */
 static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
 {
 	if (flags & ACPI_DEVFLAG_INITPASS)
@@ -333,11 +445,16 @@ static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
 		set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
 }
 
+/* Writes the specific IOMMU for a device into the rlookup table */
 static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
 {
 	amd_iommu_rlookup_table[devid] = iommu;
 }
 
+/*
+ * Reads the device exclusion range from ACPI and initialize IOMMU with
+ * it
+ */
 static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
 {
 	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
@@ -346,12 +463,22 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
 		return;
 
 	if (iommu) {
+		/*
+		 * We only can configure exclusion ranges per IOMMU, not
+		 * per device. But we can enable the exclusion range per
+		 * device. This is done here
+		 */
 		set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
 		iommu->exclusion_start = m->range_start;
 		iommu->exclusion_length = m->range_length;
 	}
 }
 
+/*
+ * This function reads some important data from the IOMMU PCI space and
+ * initializes the driver data structure with it. It reads the hardware
+ * capabilities and the first/last device entries
+ */
 static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 {
 	int bus = PCI_BUS(iommu->devid);
@@ -367,6 +494,10 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 	iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range));
 }
 
+/*
+ * Takes a pointer to an AMD IOMMU entry in the ACPI table and
+ * initializes the hardware and our data structures with it.
+ */
 static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 					struct ivhd_header *h)
 {
@@ -467,6 +598,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 	}
 }
 
+/* Initializes the device->iommu mapping for the driver */
 static int __init init_iommu_devices(struct amd_iommu *iommu)
 {
 	u16 i;
@@ -494,6 +626,11 @@ static void __init free_iommu_all(void)
 	}
 }
 
+/*
+ * This function clues the initialization function for one IOMMU
+ * together and also allocates the command buffer and programs the
+ * hardware. It does NOT enable the IOMMU. This is done afterwards.
+ */
 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 {
 	spin_lock_init(&iommu->lock);
@@ -521,6 +658,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 	return 0;
 }
 
+/*
+ * Iterates over all IOMMU entries in the ACPI table, allocates the
+ * IOMMU structure and initializes it with init_iommu_one()
+ */
 static int __init init_iommu_all(struct acpi_table_header *table)
 {
 	u8 *p = (u8 *)table, *end = (u8 *)table;
@@ -555,6 +696,14 @@ static int __init init_iommu_all(struct acpi_table_header *table)
 	return 0;
 }
 
+/****************************************************************************
+ *
+ * The next functions belong to the third pass of parsing the ACPI
+ * table. In this last pass the memory mapping requirements are
+ * gathered (like exclusion and unity mapping reanges).
+ *
+ ****************************************************************************/
+
 static void __init free_unity_maps(void)
 {
 	struct unity_map_entry *entry, *next;
@@ -565,6 +714,7 @@ static void __init free_unity_maps(void)
 	}
 }
 
+/* called when we find an exclusion range definition in ACPI */
 static int __init init_exclusion_range(struct ivmd_header *m)
 {
 	int i;
@@ -588,6 +738,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
 	return 0;
 }
 
+/* called for unity map ACPI definition */
 static int __init init_unity_map_range(struct ivmd_header *m)
 {
 	struct unity_map_entry *e = 0;
@@ -619,6 +770,7 @@ static int __init init_unity_map_range(struct ivmd_header *m)
 	return 0;
 }
 
+/* iterates over all memory definitions we find in the ACPI table */
 static int __init init_memory_definitions(struct acpi_table_header *table)
 {
 	u8 *p = (u8 *)table, *end = (u8 *)table;
@@ -642,6 +794,10 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
 	return 0;
 }
 
+/*
+ * This function finally enables all IOMMUs found in the system after
+ * they have been initialized
+ */
 static void __init enable_iommus(void)
 {
 	struct amd_iommu *iommu;
@@ -678,6 +834,34 @@ static struct sys_device device_amd_iommu = {
 	.cls = &amd_iommu_sysdev_class,
 };
 
+/*
+ * This is the core init function for AMD IOMMU hardware in the system.
+ * This function is called from the generic x86 DMA layer initialization
+ * code.
+ *
+ * This function basically parses the ACPI table for AMD IOMMU (IVRS)
+ * three times:
+ *
+ *	1 pass) Find the highest PCI device id the driver has to handle.
+ *		Upon this information the size of the data structures is
+ *		determined that needs to be allocated.
+ *
+ *	2 pass) Initialize the data structures just allocated with the
+ *		information in the ACPI table about available AMD IOMMUs
+ *		in the system. It also maps the PCI devices in the
+ *		system to specific IOMMUs
+ *
+ *	3 pass) After the basic data structures are allocated and
+ *		initialized we update them with information about memory
+ *		remapping requirements parsed out of the ACPI table in
+ *		this last pass.
+ *
+ * After that the hardware is initialized and ready to go. In the last
+ * step we do some Linux specific things like registering the driver in
+ * the dma_ops interface and initializing the suspend/resume support
+ * functions. Finally it prints some information about AMD IOMMUs and
+ * the driver state and enables the hardware.
+ */
 int __init amd_iommu_init(void)
 {
 	int i, ret = 0;
@@ -821,6 +1005,13 @@ free:
 	goto out;
 }
 
+/****************************************************************************
+ *
+ * Early detect code. This code runs at IOMMU detection time in the DMA
+ * layer. It just looks if there is an IVRS ACPI table to detect AMD
+ * IOMMUs
+ *
+ ****************************************************************************/
 static int __init early_amd_iommu_detect(struct acpi_table_header *table)
 {
 	return 0;
@@ -841,6 +1032,13 @@ void __init amd_iommu_detect(void)
 	}
 }
 
+/****************************************************************************
+ *
+ * Parsing functions for the AMD IOMMU specific kernel command line
+ * options.
+ *
+ ****************************************************************************/
+
 static int __init parse_amd_iommu_options(char *str)
 {
 	for (; *str; ++str) {
-- 
cgit v1.2.3


From 431b2a2015337533f1a9e39a840266a8a2c93144 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:22 +0200
Subject: x86, AMD IOMMU: add comments to core code

This patch adds comments about how the AMD IOMMU core code works for the DMA
remapping functionality.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu.c | 201 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 199 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f2766d84c7a..4bae96ca7c1 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -34,6 +34,9 @@
 
 static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 
+/*
+ * general struct to manage commands send to an IOMMU
+ */
 struct command {
 	u32 data[4];
 };
@@ -41,11 +44,22 @@ struct command {
 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 			     struct unity_map_entry *e);
 
+/* returns !0 if the IOMMU is caching non-present entries in its TLB */
 static int iommu_has_npcache(struct amd_iommu *iommu)
 {
 	return iommu->cap & IOMMU_CAP_NPCACHE;
 }
 
+/****************************************************************************
+ *
+ * IOMMU command queuing functions
+ *
+ ****************************************************************************/
+
+/*
+ * Writes the command to the IOMMUs command buffer and informs the
+ * hardware about the new command. Must be called with iommu->lock held.
+ */
 static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
 {
 	u32 tail, head;
@@ -63,6 +77,10 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
 	return 0;
 }
 
+/*
+ * General queuing function for commands. Takes iommu->lock and calls
+ * __iommu_queue_command().
+ */
 static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
 {
 	unsigned long flags;
@@ -75,6 +93,13 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
 	return ret;
 }
 
+/*
+ * This function is called whenever we need to ensure that the IOMMU has
+ * completed execution of all commands we sent. It sends a
+ * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
+ * us about that by writing a value to a physical address we pass with
+ * the command.
+ */
 static int iommu_completion_wait(struct amd_iommu *iommu)
 {
 	int ret;
@@ -101,6 +126,9 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 	return 0;
 }
 
+/*
+ * Command send function for invalidating a device table entry
+ */
 static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
 {
 	struct command cmd;
@@ -116,6 +144,9 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
 	return iommu_queue_command(iommu, &cmd);
 }
 
+/*
+ * Generic command send function for invalidaing TLB entries
+ */
 static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
 		u64 address, u16 domid, int pde, int s)
 {
@@ -127,9 +158,9 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
 	cmd.data[1] |= domid;
 	cmd.data[2] = LOW_U32(address);
 	cmd.data[3] = HIGH_U32(address);
-	if (s)
+	if (s) /* size bit - we flush more than one 4kb page */
 		cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
-	if (pde)
+	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
 		cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
 
 	iommu->need_sync = 1;
@@ -137,6 +168,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
 	return iommu_queue_command(iommu, &cmd);
 }
 
+/*
+ * TLB invalidation function which is called from the mapping functions.
+ * It invalidates a single PTE if the range to flush is within a single
+ * page. Otherwise it flushes the whole TLB of the IOMMU.
+ */
 static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
 		u64 address, size_t size)
 {
@@ -159,6 +195,20 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
 	return 0;
 }
 
+/****************************************************************************
+ *
+ * The functions below are used the create the page table mappings for
+ * unity mapped regions.
+ *
+ ****************************************************************************/
+
+/*
+ * Generic mapping functions. It maps a physical address into a DMA
+ * address space. It allocates the page table pages if necessary.
+ * In the future it can be extended to a generic mapping function
+ * supporting all features of AMD IOMMU page tables like level skipping
+ * and full 64 bit address spaces.
+ */
 static int iommu_map(struct protection_domain *dom,
 		     unsigned long bus_addr,
 		     unsigned long phys_addr,
@@ -209,6 +259,10 @@ static int iommu_map(struct protection_domain *dom,
 	return 0;
 }
 
+/*
+ * This function checks if a specific unity mapping entry is needed for
+ * this specific IOMMU.
+ */
 static int iommu_for_unity_map(struct amd_iommu *iommu,
 			       struct unity_map_entry *entry)
 {
@@ -223,6 +277,12 @@ static int iommu_for_unity_map(struct amd_iommu *iommu,
 	return 0;
 }
 
+/*
+ * Init the unity mappings for a specific IOMMU in the system
+ *
+ * Basically iterates over all unity mapping entries and applies them to
+ * the default domain DMA of that IOMMU if necessary.
+ */
 static int iommu_init_unity_mappings(struct amd_iommu *iommu)
 {
 	struct unity_map_entry *entry;
@@ -239,6 +299,10 @@ static int iommu_init_unity_mappings(struct amd_iommu *iommu)
 	return 0;
 }
 
+/*
+ * This function actually applies the mapping to the page table of the
+ * dma_ops domain.
+ */
 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 			     struct unity_map_entry *e)
 {
@@ -261,6 +325,9 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 	return 0;
 }
 
+/*
+ * Inits the unity mappings required for a specific device
+ */
 static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
 					  u16 devid)
 {
@@ -278,12 +345,26 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
 	return 0;
 }
 
+/****************************************************************************
+ *
+ * The next functions belong to the address allocator for the dma_ops
+ * interface functions. They work like the allocators in the other IOMMU
+ * drivers. Its basically a bitmap which marks the allocated pages in
+ * the aperture. Maybe it could be enhanced in the future to a more
+ * efficient allocator.
+ *
+ ****************************************************************************/
 static unsigned long dma_mask_to_pages(unsigned long mask)
 {
 	return (mask >> PAGE_SHIFT) +
 		(PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
 }
 
+/*
+ * The address allocator core function.
+ *
+ * called with domain->lock held
+ */
 static unsigned long dma_ops_alloc_addresses(struct device *dev,
 					     struct dma_ops_domain *dom,
 					     unsigned int pages)
@@ -317,6 +398,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
 	return address;
 }
 
+/*
+ * The address free function.
+ *
+ * called with domain->lock held
+ */
 static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 				   unsigned long address,
 				   unsigned int pages)
@@ -325,6 +411,16 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 	iommu_area_free(dom->bitmap, address, pages);
 }
 
+/****************************************************************************
+ *
+ * The next functions belong to the domain allocation. A domain is
+ * allocated for every IOMMU as the default domain. If device isolation
+ * is enabled, every device get its own domain. The most important thing
+ * about domains is the page table mapping the DMA address space they
+ * contain.
+ *
+ ****************************************************************************/
+
 static u16 domain_id_alloc(void)
 {
 	unsigned long flags;
@@ -342,6 +438,10 @@ static u16 domain_id_alloc(void)
 	return id;
 }
 
+/*
+ * Used to reserve address ranges in the aperture (e.g. for exclusion
+ * ranges.
+ */
 static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
 				      unsigned long start_page,
 				      unsigned int pages)
@@ -382,6 +482,10 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
 	free_page((unsigned long)p1);
 }
 
+/*
+ * Free a domain, only used if something went wrong in the
+ * allocation path and we need to free an already allocated page table
+ */
 static void dma_ops_domain_free(struct dma_ops_domain *dom)
 {
 	if (!dom)
@@ -396,6 +500,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 	kfree(dom);
 }
 
+/*
+ * Allocates a new protection domain usable for the dma_ops functions.
+ * It also intializes the page table and the address allocator data
+ * structures required for the dma_ops interface
+ */
 static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 						   unsigned order)
 {
@@ -436,6 +545,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 	dma_dom->bitmap[0] = 1;
 	dma_dom->next_bit = 0;
 
+	/* Intialize the exclusion range if necessary */
 	if (iommu->exclusion_start &&
 	    iommu->exclusion_start < dma_dom->aperture_size) {
 		unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
@@ -444,6 +554,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 		dma_ops_reserve_addresses(dma_dom, startpage, pages);
 	}
 
+	/*
+	 * At the last step, build the page tables so we don't need to
+	 * allocate page table pages in the dma_ops mapping/unmapping
+	 * path.
+	 */
 	num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
 	dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
 			GFP_KERNEL);
@@ -472,6 +587,10 @@ free_dma_dom:
 	return NULL;
 }
 
+/*
+ * Find out the protection domain structure for a given PCI device. This
+ * will give us the pointer to the page table root for example.
+ */
 static struct protection_domain *domain_for_device(u16 devid)
 {
 	struct protection_domain *dom;
@@ -484,6 +603,10 @@ static struct protection_domain *domain_for_device(u16 devid)
 	return dom;
 }
 
+/*
+ * If a device is not yet associated with a domain, this function does
+ * assigns it visible for the hardware
+ */
 static void set_device_domain(struct amd_iommu *iommu,
 			      struct protection_domain *domain,
 			      u16 devid)
@@ -508,6 +631,19 @@ static void set_device_domain(struct amd_iommu *iommu,
 	iommu->need_sync = 1;
 }
 
+/*****************************************************************************
+ *
+ * The next functions belong to the dma_ops mapping/unmapping code.
+ *
+ *****************************************************************************/
+
+/*
+ * In the dma_ops path we only have the struct device. This function
+ * finds the corresponding IOMMU, the protection domain and the
+ * requestor id for a given device.
+ * If the device is not yet associated with a domain this is also done
+ * in this function.
+ */
 static int get_device_resources(struct device *dev,
 				struct amd_iommu **iommu,
 				struct protection_domain **domain,
@@ -522,6 +658,7 @@ static int get_device_resources(struct device *dev,
 	pcidev = to_pci_dev(dev);
 	_bdf = (pcidev->bus->number << 8) | pcidev->devfn;
 
+	/* device not translated by any IOMMU in the system? */
 	if (_bdf >= amd_iommu_last_bdf) {
 		*iommu = NULL;
 		*domain = NULL;
@@ -547,6 +684,10 @@ static int get_device_resources(struct device *dev,
 	return 1;
 }
 
+/*
+ * This is the generic map function. It maps one 4kb page at paddr to
+ * the given address in the DMA address space for the domain.
+ */
 static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
 				     struct dma_ops_domain *dom,
 				     unsigned long address,
@@ -578,6 +719,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
 	return (dma_addr_t)address;
 }
 
+/*
+ * The generic unmapping function for on page in the DMA address space.
+ */
 static void dma_ops_domain_unmap(struct amd_iommu *iommu,
 				 struct dma_ops_domain *dom,
 				 unsigned long address)
@@ -597,6 +741,12 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
 	*pte = 0ULL;
 }
 
+/*
+ * This function contains common code for mapping of a physically
+ * contiguous memory region into DMA address space. It is uses by all
+ * mapping functions provided by this IOMMU driver.
+ * Must be called with the domain lock held.
+ */
 static dma_addr_t __map_single(struct device *dev,
 			       struct amd_iommu *iommu,
 			       struct dma_ops_domain *dma_dom,
@@ -628,6 +778,10 @@ out:
 	return address;
 }
 
+/*
+ * Does the reverse of the __map_single function. Must be called with
+ * the domain lock held too
+ */
 static void __unmap_single(struct amd_iommu *iommu,
 			   struct dma_ops_domain *dma_dom,
 			   dma_addr_t dma_addr,
@@ -652,6 +806,9 @@ static void __unmap_single(struct amd_iommu *iommu,
 	dma_ops_free_addresses(dma_dom, dma_addr, pages);
 }
 
+/*
+ * The exported map_single function for dma_ops.
+ */
 static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
 			     size_t size, int dir)
 {
@@ -664,6 +821,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
 	get_device_resources(dev, &iommu, &domain, &devid);
 
 	if (iommu == NULL || domain == NULL)
+		/* device not handled by any AMD IOMMU */
 		return (dma_addr_t)paddr;
 
 	spin_lock_irqsave(&domain->lock, flags);
@@ -683,6 +841,9 @@ out:
 	return addr;
 }
 
+/*
+ * The exported unmap_single function for dma_ops.
+ */
 static void unmap_single(struct device *dev, dma_addr_t dma_addr,
 			 size_t size, int dir)
 {
@@ -692,6 +853,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
 	u16 devid;
 
 	if (!get_device_resources(dev, &iommu, &domain, &devid))
+		/* device not handled by any AMD IOMMU */
 		return;
 
 	spin_lock_irqsave(&domain->lock, flags);
@@ -706,6 +868,10 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
 
+/*
+ * This is a special map_sg function which is used if we should map a
+ * device which is not handled by an AMD IOMMU in the system.
+ */
 static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
 			   int nelems, int dir)
 {
@@ -720,6 +886,10 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
 	return nelems;
 }
 
+/*
+ * The exported map_sg function for dma_ops (handles scatter-gather
+ * lists).
+ */
 static int map_sg(struct device *dev, struct scatterlist *sglist,
 		  int nelems, int dir)
 {
@@ -775,6 +945,10 @@ unmap:
 	goto out;
 }
 
+/*
+ * The exported map_sg function for dma_ops (handles scatter-gather
+ * lists).
+ */
 static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 		     int nelems, int dir)
 {
@@ -804,6 +978,9 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
 
+/*
+ * The exported alloc_coherent function for dma_ops.
+ */
 static void *alloc_coherent(struct device *dev, size_t size,
 			    dma_addr_t *dma_addr, gfp_t flag)
 {
@@ -851,6 +1028,11 @@ out:
 	return virt_addr;
 }
 
+/*
+ * The exported free_coherent function for dma_ops.
+ * FIXME: fix the generic x86 DMA layer so that it actually calls that
+ *        function.
+ */
 static void free_coherent(struct device *dev, size_t size,
 			  void *virt_addr, dma_addr_t dma_addr)
 {
@@ -879,6 +1061,8 @@ free_mem:
 }
 
 /*
+ * The function for pre-allocating protection domains.
+ *
  * If the driver core informs the DMA layer if a driver grabs a device
  * we don't need to preallocate the protection domains anymore.
  * For now we have to.
@@ -921,12 +1105,20 @@ static struct dma_mapping_ops amd_iommu_dma_ops = {
 	.unmap_sg = unmap_sg,
 };
 
+/*
+ * The function which clues the AMD IOMMU driver into dma_ops.
+ */
 int __init amd_iommu_init_dma_ops(void)
 {
 	struct amd_iommu *iommu;
 	int order = amd_iommu_aperture_order;
 	int ret;
 
+	/*
+	 * first allocate a default protection domain for every IOMMU we
+	 * found in the system. Devices not assigned to any other
+	 * protection domain will be assigned to the default one.
+	 */
 	list_for_each_entry(iommu, &amd_iommu_list, list) {
 		iommu->default_dom = dma_ops_domain_alloc(iommu, order);
 		if (iommu->default_dom == NULL)
@@ -936,6 +1128,10 @@ int __init amd_iommu_init_dma_ops(void)
 			goto free_domains;
 	}
 
+	/*
+	 * If device isolation is enabled, pre-allocate the protection
+	 * domains for each device.
+	 */
 	if (amd_iommu_isolate)
 		prealloc_protection_domains();
 
@@ -947,6 +1143,7 @@ int __init amd_iommu_init_dma_ops(void)
 	gart_iommu_aperture = 0;
 #endif
 
+	/* Make the driver finally visible to the drivers */
 	dma_ops = &amd_iommu_dma_ops;
 
 	return 0;
-- 
cgit v1.2.3


From 8ea80d783efd0c50577ec8d69757ae54c408eacd Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:23 +0200
Subject: x86, AMD IOMMU: replace HIGH_U32 macro with upper_32_bits function

Removes a driver specific macro and replaces it with a generic function already
available in Linux.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 4bae96ca7c1..9098f047c1a 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -109,7 +109,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
-	cmd.data[1] = HIGH_U32(ready_phys);
+	cmd.data[1] = upper_32_bits(ready_phys);
 	cmd.data[2] = 1; /* value written to 'ready' */
 	CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
 
@@ -157,7 +157,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
 	CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
 	cmd.data[1] |= domid;
 	cmd.data[2] = LOW_U32(address);
-	cmd.data[3] = HIGH_U32(address);
+	cmd.data[3] = upper_32_bits(address);
 	if (s) /* size bit - we flush more than one 4kb page */
 		cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
 	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
-- 
cgit v1.2.3


From 208ec8c94d818a3def0b424958493728871716d1 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:24 +0200
Subject: x86, AMD IOMMU: replace UPDATE_LAST_BDF macro with a function

This patch replaces the UPDATE_LAST_BDF macro in the init code with the
update_last_devid function.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu_init.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 9ddb46d7c52..6e1c8ffc0c5 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -30,11 +30,6 @@
 /*
  * definitions for the ACPI scanning code
  */
-#define UPDATE_LAST_BDF(x) do {\
-	if ((x) > amd_iommu_last_bdf) \
-		amd_iommu_last_bdf = (x); \
-	} while (0);
-
 #define DEVID(bus, devfn) (((bus) << 8) | (devfn))
 #define PCI_BUS(x) (((x) >> 8) & 0xff)
 #define IVRS_HEADER_LENGTH 48
@@ -169,6 +164,12 @@ static u32 dev_table_size;	/* size of the device table */
 static u32 alias_table_size;	/* size of the alias table */
 static u32 rlookup_table_size;	/* size if the rlookup table */
 
+static inline void update_last_devid(u16 devid)
+{
+	if (devid > amd_iommu_last_bdf)
+		amd_iommu_last_bdf = devid;
+}
+
 /****************************************************************************
  *
  * AMD IOMMU MMIO register space handling functions
@@ -287,7 +288,7 @@ static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
 	u32 cap;
 
 	cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
-	UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
+	update_last_devid(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
 
 	return 0;
 }
@@ -317,7 +318,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
 		case IVHD_DEV_ALIAS:
 		case IVHD_DEV_EXT_SELECT:
 			/* all the above subfield types refer to device ids */
-			UPDATE_LAST_BDF(dev->devid);
+			update_last_devid(dev->devid);
 			break;
 		default:
 			break;
-- 
cgit v1.2.3


From c571484e53f3e1d90bc5374528580c7419d28d4c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:25 +0200
Subject: x86, AMD IOMMU: replace TBL_SIZE macro with a function

This patch converts the TBL_SIZE macro in the init code to a function.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu_init.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 6e1c8ffc0c5..1f148393cf7 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -33,7 +33,6 @@
 #define DEVID(bus, devfn) (((bus) << 8) | (devfn))
 #define PCI_BUS(x) (((x) >> 8) & 0xff)
 #define IVRS_HEADER_LENGTH 48
-#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x))))
 
 #define ACPI_IVHD_TYPE                  0x10
 #define ACPI_IVMD_TYPE_ALL              0x20
@@ -170,6 +169,14 @@ static inline void update_last_devid(u16 devid)
 		amd_iommu_last_bdf = devid;
 }
 
+static inline unsigned long tbl_size(int entry_size)
+{
+	unsigned shift = PAGE_SHIFT +
+			 get_order(amd_iommu_last_bdf * entry_size);
+
+	return 1UL << shift;
+}
+
 /****************************************************************************
  *
  * AMD IOMMU MMIO register space handling functions
@@ -884,9 +891,9 @@ int __init amd_iommu_init(void)
 	if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
 		return -ENODEV;
 
-	dev_table_size     = TBL_SIZE(DEV_TABLE_ENTRY_SIZE);
-	alias_table_size   = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE);
-	rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE);
+	dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE);
+	alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
+	rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
 
 	ret = -ENOMEM;
 
-- 
cgit v1.2.3


From 9a836de0c9944c42d006ec241712c72e74737c73 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:26 +0200
Subject: x86, AMD IOMMU: remove unnecessary free checks from init code

This patch removes unnecessary checks before memory is released.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu_init.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 1f148393cf7..0f5a9115a69 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -417,9 +417,7 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
 
 static void __init free_command_buffer(struct amd_iommu *iommu)
 {
-	if (iommu->cmd_buf)
-		free_pages((unsigned long)iommu->cmd_buf,
-				get_order(CMD_BUFFER_SIZE));
+	free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
 }
 
 /* sets a specific bit in the device table entry. */
@@ -987,24 +985,19 @@ out:
 	return ret;
 
 free:
-	if (amd_iommu_pd_alloc_bitmap)
-		free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
+	free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
 
-	if (amd_iommu_pd_table)
-		free_pages((unsigned long)amd_iommu_pd_table,
-				get_order(rlookup_table_size));
+	free_pages((unsigned long)amd_iommu_pd_table,
+		   get_order(rlookup_table_size));
 
-	if (amd_iommu_rlookup_table)
-		free_pages((unsigned long)amd_iommu_rlookup_table,
-				get_order(rlookup_table_size));
+	free_pages((unsigned long)amd_iommu_rlookup_table,
+		   get_order(rlookup_table_size));
 
-	if (amd_iommu_alias_table)
-		free_pages((unsigned long)amd_iommu_alias_table,
-				get_order(alias_table_size));
+	free_pages((unsigned long)amd_iommu_alias_table,
+		   get_order(alias_table_size));
 
-	if (amd_iommu_dev_table)
-		free_pages((unsigned long)amd_iommu_dev_table,
-				get_order(dev_table_size));
+	free_pages((unsigned long)amd_iommu_dev_table,
+		   get_order(dev_table_size));
 
 	free_iommu_all();
 
-- 
cgit v1.2.3


From 136f78a19cf94d469f31a4009c7c0ac2301fbbf0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:27 +0200
Subject: x86, AMD IOMMU: add an emergency exit to the completion wait loop

To make the loop waiting for the completion wait command not wait forever this
patch adds a limit of cycles that loop.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 9098f047c1a..7fa2d5d57dd 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -32,6 +32,8 @@
 #define to_pages(addr, size) \
 	 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
 
+#define EXIT_LOOP_COUNT 10000000
+
 static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 
 /*
@@ -106,6 +108,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 	struct command cmd;
 	volatile u64 ready = 0;
 	unsigned long ready_phys = virt_to_phys(&ready);
+	unsigned long i = 0;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
@@ -120,8 +123,13 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 	if (ret)
 		return ret;
 
-	while (!ready)
+	while (!ready && (i < EXIT_LOOP_COUNT)) {
+		++i;
 		cpu_relax();
+	}
+
+	if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
+		printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
 
 	return 0;
 }
-- 
cgit v1.2.3


From d64495366ff78fdbd5bd3176a7ada2f0c2cbfba6 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:28 +0200
Subject: x86, AMD IOMMU: rename struct command to iommu_cmd

This patch gives the struct command a more descriptive and not so generic name.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 7fa2d5d57dd..dec10e1a397 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -39,7 +39,7 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 /*
  * general struct to manage commands send to an IOMMU
  */
-struct command {
+struct iommu_cmd {
 	u32 data[4];
 };
 
@@ -62,7 +62,7 @@ static int iommu_has_npcache(struct amd_iommu *iommu)
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command. Must be called with iommu->lock held.
  */
-static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
+static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 {
 	u32 tail, head;
 	u8 *target;
@@ -83,7 +83,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
  * General queuing function for commands. Takes iommu->lock and calls
  * __iommu_queue_command().
  */
-static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
+static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 {
 	unsigned long flags;
 	int ret;
@@ -105,7 +105,7 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
 static int iommu_completion_wait(struct amd_iommu *iommu)
 {
 	int ret;
-	struct command cmd;
+	struct iommu_cmd cmd;
 	volatile u64 ready = 0;
 	unsigned long ready_phys = virt_to_phys(&ready);
 	unsigned long i = 0;
@@ -139,7 +139,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
  */
 static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
 {
-	struct command cmd;
+	struct iommu_cmd cmd;
 
 	BUG_ON(iommu == NULL);
 
@@ -158,7 +158,7 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
 static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
 		u64 address, u16 domid, int pde, int s)
 {
-	struct command cmd;
+	struct iommu_cmd cmd;
 
 	memset(&cmd, 0, sizeof(cmd));
 	address &= PAGE_MASK;
-- 
cgit v1.2.3


From d0312b2142ac7665031755c1cc3dba827d4eb711 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:29 +0200
Subject: x86, AMD IOMMU: remove unneeded initializations from command buffer
 allocation

This patch removes an unneeded initialization from the alloc_command_buffer
function and replaces a memset with __GFP_ZERO.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu_init.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 0f5a9115a69..0124995c7b5 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -394,17 +394,15 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
  */
 static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
 {
-	u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL,
+	u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 			get_order(CMD_BUFFER_SIZE));
-	u64 entry = 0;
+	u64 entry;
 
 	if (cmd_buf == NULL)
 		return NULL;
 
 	iommu->cmd_buf_size = CMD_BUFFER_SIZE;
 
-	memset(cmd_buf, 0, CMD_BUFFER_SIZE);
-
 	entry = (u64)virt_to_phys(cmd_buf);
 	entry |= MMIO_CMD_SIZE_512;
 	memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
-- 
cgit v1.2.3


From 58a3bee567b588a84cdde05fecc45439b396362c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:30 +0200
Subject: x86, AMD IOMMU: use true/false instead of 0/1 for bool value

This patch replaces the integer values used for the bool variable in ACPI
scanning code with true and false.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu_init.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 0124995c7b5..316fe2eaeef 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -509,7 +509,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 	u8 *end = p, flags = 0;
 	u16 dev_i, devid = 0, devid_start = 0, devid_to = 0;
 	u32 ext_flags = 0;
-	bool alias = 0;
+	bool alias = false;
 	struct ivhd_entry *e;
 
 	/*
@@ -559,7 +559,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 			devid_start = e->devid;
 			flags = e->flags;
 			ext_flags = 0;
-			alias = 0;
+			alias = false;
 			break;
 		case IVHD_DEV_ALIAS:
 			devid = e->devid;
@@ -572,7 +572,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 			flags = e->flags;
 			devid_to = e->ext >> 8;
 			ext_flags = 0;
-			alias = 1;
+			alias = true;
 			break;
 		case IVHD_DEV_EXT_SELECT:
 			devid = e->devid;
@@ -582,7 +582,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 			devid_start = e->devid;
 			flags = e->flags;
 			ext_flags = e->ext;
-			alias = 0;
+			alias = false;
 			break;
 		case IVHD_DEV_RANGE_END:
 			devid = e->devid;
-- 
cgit v1.2.3


From 2e22847fbe05f2543ccebd0c2df94d9cf3c52aa5 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:31 +0200
Subject: x86, AMD IOMMU: do runtime list initialization at compile time

This patch changes the list initialization for the iommu list and the unity map
list from runtime to compile time.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu_init.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 316fe2eaeef..0c247032308 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -118,12 +118,12 @@ static int __initdata amd_iommu_detected;
 
 u16 amd_iommu_last_bdf;			/* largest PCI device id we have
 					   to handle */
-struct list_head amd_iommu_unity_map;	/* a list of required unity mappings
+LIST_HEAD(amd_iommu_unity_map);		/* a list of required unity mappings
 					   we find in ACPI */
 unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
 int amd_iommu_isolate;			/* if 1, device isolation is enabled */
 
-struct list_head amd_iommu_list;	/* list of all AMD IOMMUs in the
+LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
 					   system */
 
 /*
@@ -673,8 +673,6 @@ static int __init init_iommu_all(struct acpi_table_header *table)
 	struct amd_iommu *iommu;
 	int ret;
 
-	INIT_LIST_HEAD(&amd_iommu_list);
-
 	end += table->length;
 	p += IVRS_HEADER_LENGTH;
 
@@ -780,8 +778,6 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
 	u8 *p = (u8 *)table, *end = (u8 *)table;
 	struct ivmd_header *m;
 
-	INIT_LIST_HEAD(&amd_iommu_unity_map);
-
 	end += table->length;
 	p += IVRS_HEADER_LENGTH;
 
-- 
cgit v1.2.3


From 5dc8bff0f6d0dfeb1f1c6e694294ba7c33d099f1 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:32 +0200
Subject: x86, AMD IOMMU: replace memset with __GFP_ZERO for table allocation

This patch removes the memset from the data structure initialization code and
allocate the structures with the __GFP_ZERO flag.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu_init.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 0c247032308..2efc3d59b7e 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -890,7 +890,7 @@ int __init amd_iommu_init(void)
 	ret = -ENOMEM;
 
 	/* Device table - directly used by all IOMMUs */
-	amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL,
+	amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 				      get_order(dev_table_size));
 	if (amd_iommu_dev_table == NULL)
 		goto out;
@@ -914,27 +914,23 @@ int __init amd_iommu_init(void)
 	 * Protection Domain table - maps devices to protection domains
 	 * This table has the same size as the rlookup_table
 	 */
-	amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL,
+	amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 				     get_order(rlookup_table_size));
 	if (amd_iommu_pd_table == NULL)
 		goto free;
 
-	amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL,
+	amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
+					    GFP_KERNEL | __GFP_ZERO,
 					    get_order(MAX_DOMAIN_ID/8));
 	if (amd_iommu_pd_alloc_bitmap == NULL)
 		goto free;
 
 	/*
-	 * memory is allocated now; initialize the device table with all zeroes
-	 * and let all alias entries point to itself
+	 * let all alias entries point to itself
 	 */
-	memset(amd_iommu_dev_table, 0, dev_table_size);
 	for (i = 0; i < amd_iommu_last_bdf; ++i)
 		amd_iommu_alias_table[i] = i;
 
-	memset(amd_iommu_pd_table, 0, rlookup_table_size);
-	memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8);
-
 	/*
 	 * never allocate domain 0 because its used as the non-allocated and
 	 * error value placeholder
-- 
cgit v1.2.3


From 0906372e6cf372f3162481f24a0b8ccae0eff4d7 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:33 +0200
Subject: x86, AMD IOMMU: replace self made size parsing with memparse call

This patch replaces the self-made parsing of the amd_iommu_size option with the
generic memparse function call.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu_init.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 2efc3d59b7e..e0ff9404e6c 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1042,20 +1042,10 @@ static int __init parse_amd_iommu_options(char *str)
 
 static int __init parse_amd_iommu_size_options(char *str)
 {
-	for (; *str; ++str) {
-		if (strcmp(str, "32M") == 0)
-			amd_iommu_aperture_order = 25;
-		if (strcmp(str, "64M") == 0)
-			amd_iommu_aperture_order = 26;
-		if (strcmp(str, "128M") == 0)
-			amd_iommu_aperture_order = 27;
-		if (strcmp(str, "256M") == 0)
-			amd_iommu_aperture_order = 28;
-		if (strcmp(str, "512M") == 0)
-			amd_iommu_aperture_order = 29;
-		if (strcmp(str, "1G") == 0)
-			amd_iommu_aperture_order = 30;
-	}
+	unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
+
+	if ((order > 24) && (order < 31))
+		amd_iommu_aperture_order = order;
 
 	return 1;
 }
-- 
cgit v1.2.3


From d591b0a3ae25f587d0c4da1e1d1a425143590790 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 11 Jul 2008 17:14:35 +0200
Subject: x86, AMD IOMMU: replace DEVID macro with a function

This patch replaces the DEVID macro with a function and uses them where
apropriate (also in the core code).

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu.c      | 2 +-
 arch/x86/kernel/amd_iommu_init.c | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index dec10e1a397..8c3deb027d3 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -664,7 +664,7 @@ static int get_device_resources(struct device *dev,
 	BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
 
 	pcidev = to_pci_dev(dev);
-	_bdf = (pcidev->bus->number << 8) | pcidev->devfn;
+	_bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
 
 	/* device not translated by any IOMMU in the system? */
 	if (_bdf >= amd_iommu_last_bdf) {
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index e0ff9404e6c..9bf1b8111b0 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -30,7 +30,6 @@
 /*
  * definitions for the ACPI scanning code
  */
-#define DEVID(bus, devfn) (((bus) << 8) | (devfn))
 #define PCI_BUS(x) (((x) >> 8) & 0xff)
 #define IVRS_HEADER_LENGTH 48
 
@@ -295,7 +294,7 @@ static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
 	u32 cap;
 
 	cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
-	update_last_devid(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
+	update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
 
 	return 0;
 }
@@ -494,8 +493,10 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 	iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET);
 
 	range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
-	iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range));
-	iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range));
+	iommu->first_device = calc_devid(MMIO_GET_BUS(range),
+					 MMIO_GET_FD(range));
+	iommu->last_device = calc_devid(MMIO_GET_BUS(range),
+					MMIO_GET_LD(range));
 }
 
 /*
-- 
cgit v1.2.3


From 2510495e208e7a69b64fcf5cdf8966d873536d9e Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 11 Jul 2008 12:13:59 +0200
Subject: x86/pci: Removing pci-y in Makefile

Cc: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/pci/Makefile | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index e515e8db842..28451f41e0e 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -5,13 +5,13 @@ obj-$(CONFIG_PCI_MMCONFIG)	+= mmconfig_$(BITS).o direct.o mmconfig-shared.o
 obj-$(CONFIG_PCI_DIRECT)	+= direct.o
 obj-$(CONFIG_PCI_OLPC)		+= olpc.o
 
-pci-y				:= fixup.o
-pci-$(CONFIG_ACPI)		+= acpi.o
-pci-y				+= legacy.o irq.o
+obj-y				+= fixup.o
+obj-$(CONFIG_ACPI)		+= acpi.o
+obj-y				+= legacy.o irq.o
 
-pci-$(CONFIG_X86_VISWS)		+= visws.o
+obj-$(CONFIG_X86_VISWS)		+= visws.o
 
-pci-$(CONFIG_X86_NUMAQ)		+= numa.o
+obj-$(CONFIG_X86_NUMAQ)		+= numa.o
 
-obj-y				+= $(pci-y) common.o early.o
+obj-y				+= common.o early.o
 obj-y				+= amd_bus.o
-- 
cgit v1.2.3


From 060b9708a0c04cf9af69c128ef7954b6f0a84180 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 11 Jul 2008 12:14:27 +0200
Subject: x86/pci: Changing subsystem initialization order for NUMA

Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/pci/legacy.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index 132876cc6fc..60e8caa1356 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -57,14 +57,14 @@ static int __init pci_legacy_init(void)
 
 int __init pci_subsys_init(void)
 {
+#ifdef CONFIG_X86_NUMAQ
+	pci_numa_init();
+#endif
 #ifdef CONFIG_ACPI
 	pci_acpi_init();
 #endif
 	pci_legacy_init();
 	pcibios_irq_init();
-#ifdef CONFIG_X86_NUMAQ
-	pci_numa_init();
-#endif
 	pcibios_init();
 
 	return 0;
-- 
cgit v1.2.3


From 9314d301390ad0d96986da3d893a21e81a287982 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 11 Jul 2008 12:18:40 +0200
Subject: x86/pci: renamed: numa.c -> numaq_32.c

Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/pci/Makefile   |   2 +-
 arch/x86/pci/numa.c     | 178 ------------------------------------------------
 arch/x86/pci/numaq_32.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 179 insertions(+), 179 deletions(-)
 delete mode 100644 arch/x86/pci/numa.c
 create mode 100644 arch/x86/pci/numaq_32.c

(limited to 'arch')

diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 28451f41e0e..d49202e740e 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -11,7 +11,7 @@ obj-y				+= legacy.o irq.o
 
 obj-$(CONFIG_X86_VISWS)		+= visws.o
 
-obj-$(CONFIG_X86_NUMAQ)		+= numa.o
+obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
 
 obj-y				+= common.o early.o
 obj-y				+= amd_bus.o
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c
deleted file mode 100644
index 8b5ca196673..00000000000
--- a/arch/x86/pci/numa.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * numa.c - Low-level PCI access for NUMA-Q machines
- */
-
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/nodemask.h>
-#include <mach_apic.h>
-#include <asm/mpspec.h>
-#include "pci.h"
-
-#define XQUAD_PORTIO_BASE 0xfe400000
-#define XQUAD_PORTIO_QUAD 0x40000  /* 256k per quad. */
-
-#define BUS2QUAD(global) (mp_bus_id_to_node[global])
-
-#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
-
-#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
-
-/* Where the IO area was mapped on multiquad, always 0 otherwise */
-void *xquad_portio;
-EXPORT_SYMBOL(xquad_portio);
-
-#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
-
-#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
-	(0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3))
-
-static void write_cf8(unsigned bus, unsigned devfn, unsigned reg)
-{
-	unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg);
-	if (xquad_portio)
-		writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus)));
-	else
-		outl(val, 0xCF8);
-}
-
-static int pci_conf1_mq_read(unsigned int seg, unsigned int bus,
-			     unsigned int devfn, int reg, int len, u32 *value)
-{
-	unsigned long flags;
-	void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
-
-	if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
-		return -EINVAL;
-
-	spin_lock_irqsave(&pci_config_lock, flags);
-
-	write_cf8(bus, devfn, reg);
-
-	switch (len) {
-	case 1:
-		if (xquad_portio)
-			*value = readb(adr + (reg & 3));
-		else
-			*value = inb(0xCFC + (reg & 3));
-		break;
-	case 2:
-		if (xquad_portio)
-			*value = readw(adr + (reg & 2));
-		else
-			*value = inw(0xCFC + (reg & 2));
-		break;
-	case 4:
-		if (xquad_portio)
-			*value = readl(adr);
-		else
-			*value = inl(0xCFC);
-		break;
-	}
-
-	spin_unlock_irqrestore(&pci_config_lock, flags);
-
-	return 0;
-}
-
-static int pci_conf1_mq_write(unsigned int seg, unsigned int bus,
-			      unsigned int devfn, int reg, int len, u32 value)
-{
-	unsigned long flags;
-	void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
-
-	if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) 
-		return -EINVAL;
-
-	spin_lock_irqsave(&pci_config_lock, flags);
-
-	write_cf8(bus, devfn, reg);
-
-	switch (len) {
-	case 1:
-		if (xquad_portio)
-			writeb(value, adr + (reg & 3));
-		else
-			outb((u8)value, 0xCFC + (reg & 3));
-		break;
-	case 2:
-		if (xquad_portio)
-			writew(value, adr + (reg & 2));
-		else
-			outw((u16)value, 0xCFC + (reg & 2));
-		break;
-	case 4:
-		if (xquad_portio)
-			writel(value, adr + reg);
-		else
-			outl((u32)value, 0xCFC);
-		break;
-	}
-
-	spin_unlock_irqrestore(&pci_config_lock, flags);
-
-	return 0;
-}
-
-#undef PCI_CONF1_MQ_ADDRESS
-
-static struct pci_raw_ops pci_direct_conf1_mq = {
-	.read	= pci_conf1_mq_read,
-	.write	= pci_conf1_mq_write
-};
-
-
-static void __devinit pci_fixup_i450nx(struct pci_dev *d)
-{
-	/*
-	 * i450NX -- Find and scan all secondary buses on all PXB's.
-	 */
-	int pxb, reg;
-	u8 busno, suba, subb;
-	int quad = BUS2QUAD(d->bus->number);
-
-	printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d));
-	reg = 0xd0;
-	for(pxb=0; pxb<2; pxb++) {
-		pci_read_config_byte(d, reg++, &busno);
-		pci_read_config_byte(d, reg++, &suba);
-		pci_read_config_byte(d, reg++, &subb);
-		DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
-		if (busno) {
-			/* Bus A */
-			pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno));
-		}
-		if (suba < subb) {
-			/* Bus B */
-			pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, suba+1));
-		}
-	}
-	pcibios_last_bus = -1;
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx);
-
-int __init pci_numa_init(void)
-{
-	int quad;
-
-	if (!found_numaq)
-		return 0;
-
-	raw_pci_ops = &pci_direct_conf1_mq;
-
-	if (pcibios_scanned++)
-		return 0;
-
-	pci_root_bus = pcibios_scan_root(0);
-	if (pci_root_bus)
-		pci_bus_add_devices(pci_root_bus);
-	if (num_online_nodes() > 1)
-		for_each_online_node(quad) {
-			if (quad == 0)
-				continue;
-			printk("Scanning PCI bus %d for quad %d\n", 
-				QUADLOCAL2BUS(quad,0), quad);
-			pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, 0));
-		}
-	return 0;
-}
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
new file mode 100644
index 00000000000..8b5ca196673
--- /dev/null
+++ b/arch/x86/pci/numaq_32.c
@@ -0,0 +1,178 @@
+/*
+ * numa.c - Low-level PCI access for NUMA-Q machines
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/nodemask.h>
+#include <mach_apic.h>
+#include <asm/mpspec.h>
+#include "pci.h"
+
+#define XQUAD_PORTIO_BASE 0xfe400000
+#define XQUAD_PORTIO_QUAD 0x40000  /* 256k per quad. */
+
+#define BUS2QUAD(global) (mp_bus_id_to_node[global])
+
+#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
+
+#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
+
+/* Where the IO area was mapped on multiquad, always 0 otherwise */
+void *xquad_portio;
+EXPORT_SYMBOL(xquad_portio);
+
+#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
+
+#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
+	(0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3))
+
+static void write_cf8(unsigned bus, unsigned devfn, unsigned reg)
+{
+	unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg);
+	if (xquad_portio)
+		writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus)));
+	else
+		outl(val, 0xCF8);
+}
+
+static int pci_conf1_mq_read(unsigned int seg, unsigned int bus,
+			     unsigned int devfn, int reg, int len, u32 *value)
+{
+	unsigned long flags;
+	void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
+
+	if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	write_cf8(bus, devfn, reg);
+
+	switch (len) {
+	case 1:
+		if (xquad_portio)
+			*value = readb(adr + (reg & 3));
+		else
+			*value = inb(0xCFC + (reg & 3));
+		break;
+	case 2:
+		if (xquad_portio)
+			*value = readw(adr + (reg & 2));
+		else
+			*value = inw(0xCFC + (reg & 2));
+		break;
+	case 4:
+		if (xquad_portio)
+			*value = readl(adr);
+		else
+			*value = inl(0xCFC);
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+static int pci_conf1_mq_write(unsigned int seg, unsigned int bus,
+			      unsigned int devfn, int reg, int len, u32 value)
+{
+	unsigned long flags;
+	void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
+
+	if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) 
+		return -EINVAL;
+
+	spin_lock_irqsave(&pci_config_lock, flags);
+
+	write_cf8(bus, devfn, reg);
+
+	switch (len) {
+	case 1:
+		if (xquad_portio)
+			writeb(value, adr + (reg & 3));
+		else
+			outb((u8)value, 0xCFC + (reg & 3));
+		break;
+	case 2:
+		if (xquad_portio)
+			writew(value, adr + (reg & 2));
+		else
+			outw((u16)value, 0xCFC + (reg & 2));
+		break;
+	case 4:
+		if (xquad_portio)
+			writel(value, adr + reg);
+		else
+			outl((u32)value, 0xCFC);
+		break;
+	}
+
+	spin_unlock_irqrestore(&pci_config_lock, flags);
+
+	return 0;
+}
+
+#undef PCI_CONF1_MQ_ADDRESS
+
+static struct pci_raw_ops pci_direct_conf1_mq = {
+	.read	= pci_conf1_mq_read,
+	.write	= pci_conf1_mq_write
+};
+
+
+static void __devinit pci_fixup_i450nx(struct pci_dev *d)
+{
+	/*
+	 * i450NX -- Find and scan all secondary buses on all PXB's.
+	 */
+	int pxb, reg;
+	u8 busno, suba, subb;
+	int quad = BUS2QUAD(d->bus->number);
+
+	printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d));
+	reg = 0xd0;
+	for(pxb=0; pxb<2; pxb++) {
+		pci_read_config_byte(d, reg++, &busno);
+		pci_read_config_byte(d, reg++, &suba);
+		pci_read_config_byte(d, reg++, &subb);
+		DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
+		if (busno) {
+			/* Bus A */
+			pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno));
+		}
+		if (suba < subb) {
+			/* Bus B */
+			pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, suba+1));
+		}
+	}
+	pcibios_last_bus = -1;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx);
+
+int __init pci_numa_init(void)
+{
+	int quad;
+
+	if (!found_numaq)
+		return 0;
+
+	raw_pci_ops = &pci_direct_conf1_mq;
+
+	if (pcibios_scanned++)
+		return 0;
+
+	pci_root_bus = pcibios_scan_root(0);
+	if (pci_root_bus)
+		pci_bus_add_devices(pci_root_bus);
+	if (num_online_nodes() > 1)
+		for_each_online_node(quad) {
+			if (quad == 0)
+				continue;
+			printk("Scanning PCI bus %d for quad %d\n", 
+				QUADLOCAL2BUS(quad,0), quad);
+			pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, 0));
+		}
+	return 0;
+}
-- 
cgit v1.2.3


From e27cf3a2e151b79375efadf71a5d383ad416fb44 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 11 Jul 2008 12:18:41 +0200
Subject: x86/pci: renaming numa into numaq

Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/pci/legacy.c   | 2 +-
 arch/x86/pci/numaq_32.c | 4 ++--
 arch/x86/pci/pci.h      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index 60e8caa1356..f405eb0b891 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -58,7 +58,7 @@ static int __init pci_legacy_init(void)
 int __init pci_subsys_init(void)
 {
 #ifdef CONFIG_X86_NUMAQ
-	pci_numa_init();
+	pci_numaq_init();
 #endif
 #ifdef CONFIG_ACPI
 	pci_acpi_init();
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 8b5ca196673..f4b16dc11da 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -1,5 +1,5 @@
 /*
- * numa.c - Low-level PCI access for NUMA-Q machines
+ * numaq_32.c - Low-level PCI access for NUMA-Q machines
  */
 
 #include <linux/pci.h>
@@ -151,7 +151,7 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx);
 
-int __init pci_numa_init(void)
+int __init pci_numaq_init(void)
 {
 	int quad;
 
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index b2270a55b0c..36b8dd019fa 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -107,7 +107,7 @@ extern void __init dmi_check_skip_isa_align(void);
 /* some common used subsys_initcalls */
 extern int __init pci_acpi_init(void);
 extern int __init pcibios_irq_init(void);
-extern int __init pci_numa_init(void);
+extern int __init pci_numaq_init(void);
 extern int __init pcibios_init(void);
 
 /* pci-mmconfig.c */
-- 
cgit v1.2.3


From 3cabf37f6167125cb5185db05f5061650f685ab7 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 11 Jul 2008 12:26:59 +0200
Subject: x86/pci: Changing subsystem init for visws

I don't know, if this new code boots, but at least it
compiles. Someone should really test it.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/pci/legacy.c |  3 +++
 arch/x86/pci/pci.h    |  1 +
 arch/x86/pci/visws.c  | 23 +++++++----------------
 3 files changed, 11 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index f405eb0b891..ec9ce35e44d 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -62,6 +62,9 @@ int __init pci_subsys_init(void)
 #endif
 #ifdef CONFIG_ACPI
 	pci_acpi_init();
+#endif
+#ifdef CONFIG_X86_VISWS
+	pci_visws_init();
 #endif
 	pci_legacy_init();
 	pcibios_irq_init();
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index 36b8dd019fa..a2c55ee98af 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -107,6 +107,7 @@ extern void __init dmi_check_skip_isa_align(void);
 /* some common used subsys_initcalls */
 extern int __init pci_acpi_init(void);
 extern int __init pcibios_irq_init(void);
+extern int __init pci_visws_init(void);
 extern int __init pci_numaq_init(void);
 extern int __init pcibios_init(void);
 
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 1a7bed492bb..42f4cb19fac 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -86,8 +86,14 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq)
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
 
-static int __init pci_visws_init(void)
+int __init pci_visws_init(void)
 {
+	if (!is_visws_box())
+		return -1;
+
+	pcibios_enable_irq = &pci_visws_enable_irq;
+	pcibios_disable_irq = &pci_visws_disable_irq;
+
 	/* The VISWS supports configuration access type 1 only */
 	pci_probe = (pci_probe | PCI_PROBE_CONF1) &
 		    ~(PCI_PROBE_BIOS | PCI_PROBE_CONF2);
@@ -105,18 +111,3 @@ static int __init pci_visws_init(void)
 	pcibios_resource_survey();
 	return 0;
 }
-
-static __init int pci_subsys_init(void)
-{
-	if (!is_visws_box())
-		return -1;
-
-	pcibios_enable_irq = &pci_visws_enable_irq;
-	pcibios_disable_irq = &pci_visws_disable_irq;
-
-	pci_visws_init();
-	pcibios_init();
-
-	return 0;
-}
-subsys_initcall(pci_subsys_init);
-- 
cgit v1.2.3


From d54191b85e294c46f05a2249b1f55ae54930bcc7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 22 Apr 2008 15:09:30 +0200
Subject: Kprobe smoke test lockdep warning

On Mon, 2008-04-21 at 18:54 -0400, Masami Hiramatsu wrote:
> Thank you for reporting.
>
> Actually, kprobes tries to fixup thread's flags in post_kprobe_handler
> (which is called from kprobe_exceptions_notify) by
> trace_hardirqs_fixup_flags(pt_regs->flags). However, even the irq flag
> is set in pt_regs->flags, true hardirq is still off until returning
> from do_debug. Thus, lockdep assumes that hardirq is off without annotation.
>
> IMHO, one possible solution is that fixing hardirq flags right after
> notify_die in do_debug instead of in post_kprobe_handler.

My reply to BZ 10489:

> [    2.707509] Kprobe smoke test started
> [    2.709300] ------------[ cut here ]------------
> [    2.709420] WARNING: at kernel/lockdep.c:2658 check_flags+0x4d/0x12c()
> [    2.709541] Modules linked in:
> [    2.709588] Pid: 1, comm: swapper Not tainted 2.6.25.jml.057 #1
> [    2.709588]  [<c0126acc>] warn_on_slowpath+0x41/0x51
> [    2.709588]  [<c010bafc>] ? save_stack_trace+0x1d/0x3b
> [    2.709588]  [<c0140a83>] ? save_trace+0x37/0x89
> [    2.709588]  [<c011987d>] ? kernel_map_pages+0x103/0x11c
> [    2.709588]  [<c0109803>] ? native_sched_clock+0xca/0xea
> [    2.709588]  [<c0142958>] ? mark_held_locks+0x41/0x5c
> [    2.709588]  [<c0382580>] ? kprobe_exceptions_notify+0x322/0x3af
> [    2.709588]  [<c0142aff>] ? trace_hardirqs_on+0xf1/0x119
> [    2.709588]  [<c03825b3>] ? kprobe_exceptions_notify+0x355/0x3af
> [    2.709588]  [<c0140823>] check_flags+0x4d/0x12c
> [    2.709588]  [<c0143c9d>] lock_release+0x58/0x195
> [    2.709588]  [<c038347c>] ? __atomic_notifier_call_chain+0x0/0x80
> [    2.709588]  [<c03834d6>] __atomic_notifier_call_chain+0x5a/0x80
> [    2.709588]  [<c0383508>] atomic_notifier_call_chain+0xc/0xe
> [    2.709588]  [<c013b6d4>] notify_die+0x2d/0x2f
> [    2.709588]  [<c038168a>] do_debug+0x67/0xfe
> [    2.709588]  [<c0381287>] debug_stack_correct+0x27/0x30
> [    2.709588]  [<c01564c0>] ? kprobe_target+0x1/0x34
> [    2.709588]  [<c0156572>] ? init_test_probes+0x50/0x186
> [    2.709588]  [<c04fae48>] init_kprobes+0x85/0x8c
> [    2.709588]  [<c04e947b>] kernel_init+0x13d/0x298
> [    2.709588]  [<c04e933e>] ? kernel_init+0x0/0x298
> [    2.709588]  [<c04e933e>] ? kernel_init+0x0/0x298
> [    2.709588]  [<c0105ef7>] kernel_thread_helper+0x7/0x10
> [    2.709588]  =======================
> [    2.709588] ---[ end trace 778e504de7e3b1e3 ]---
> [    2.709588] possible reason: unannotated irqs-off.
> [    2.709588] irq event stamp: 370065
> [    2.709588] hardirqs last  enabled at (370065): [<c0382580>] kprobe_exceptions_notify+0x322/0x3af
> [    2.709588] hardirqs last disabled at (370064): [<c0381bb7>] do_int3+0x1d/0x7d
> [    2.709588] softirqs last  enabled at (370050): [<c012b464>] __do_softirq+0xfa/0x100
> [    2.709588] softirqs last disabled at (370045): [<c0107438>] do_softirq+0x74/0xd9
> [    2.714751] Kprobe smoke test passed successfully

how I love this stuff...

Ok, do_debug() is a trap, this can happen at any time regardless of the
machine's IRQ state. So the first thing we do is fix up the IRQ state.
Then we call this die notifier stuff; and return with messed up IRQ
state... YAY.

So, kprobes fudges it..

  notify_die(DIE_DEBUG)
    kprobe_exceptions_notify()
      post_kprobe_handler()
        modify regs->flags
        trace_hardirqs_fixup_flags(regs->flags);  <--- must be it

So what's the use of modifying flags if they're not meant to take effect
at some point.

/me tries to reproduce issue; enable kprobes test thingy && boot

OK, that reproduces..

So the below makes it work - but I'm not getting this code; at the time
I wrote that stuff I CC'ed each and every kprobe maintainer listed in
the usual places but got no reposonse - can some please explain this
stuff to me?

Are the saved flags only for the TF bit or are they made in full effect
later (and if so, where) ?

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/kprobes.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index b8c6743a13d..43c019f85f0 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -860,7 +860,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
 
 	resume_execution(cur, regs, kcb);
 	regs->flags |= kcb->kprobe_saved_flags;
-	trace_hardirqs_fixup_flags(regs->flags);
 
 	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
 		kcb->kprobe_status = KPROBE_HIT_SSDONE;
-- 
cgit v1.2.3


From a312b37b2a212fd2e227d1d6321f903b91b65ec7 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Tue, 8 Jul 2008 15:06:23 -0700
Subject: x86/paravirt: call paravirt_pagetable_setup_{start, done}

Call paravirt_pagetable_setup_{start,done}

These paravirt_ops functions were not being called on x86_64.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/paravirt.c | 4 ++++
 arch/x86/kernel/setup.c    | 2 ++
 arch/x86/xen/enlighten.c   | 4 ++++
 3 files changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e0f571d58c1..2963ab5d91e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -29,6 +29,7 @@
 #include <asm/desc.h>
 #include <asm/setup.h>
 #include <asm/arch_hooks.h>
+#include <asm/pgtable.h>
 #include <asm/time.h>
 #include <asm/pgalloc.h>
 #include <asm/irq.h>
@@ -373,6 +374,9 @@ struct pv_mmu_ops pv_mmu_ops = {
 #ifndef CONFIG_X86_64
 	.pagetable_setup_start = native_pagetable_setup_start,
 	.pagetable_setup_done = native_pagetable_setup_done,
+#else
+	.pagetable_setup_start = paravirt_nop,
+	.pagetable_setup_done = paravirt_nop,
 #endif
 
 	.read_cr2 = native_read_cr2,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 36c540d4ac4..8ce6a91ce10 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -819,7 +819,9 @@ void __init setup_arch(char **cmdline_p)
 	vmi_init();
 #endif
 
+	paravirt_pagetable_setup_start(swapper_pg_dir);
 	paging_init();
+	paravirt_pagetable_setup_done(swapper_pg_dir);
 
 #ifdef CONFIG_X86_64
 	map_vsyscall();
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bb508456ef5..eaab6c9b4a8 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -841,6 +841,7 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
 
 static __init void xen_pagetable_setup_start(pgd_t *base)
 {
+#ifdef CONFIG_X86_32
 	pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
 	int i;
 
@@ -886,6 +887,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
 	/* Unpin initial Xen pagetable */
 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
 			  PFN_DOWN(__pa(xen_start_info->pt_base)));
+#endif	/* CONFIG_X86_32 */
 }
 
 void xen_setup_shared_info(void)
@@ -927,9 +929,11 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
 
 	xen_setup_shared_info();
 
+#ifdef CONFIG_X86_32
 	/* Actually pin the pagetable down, but we can't set PG_pinned
 	   yet because the page structures don't exist yet. */
 	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
+#endif
 }
 
 static __init void xen_post_allocator_init(void)
-- 
cgit v1.2.3


From c1f2f09ef66d5dadd5fe42ea909e708470c9636d Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Tue, 8 Jul 2008 15:06:24 -0700
Subject: pvops-64: call paravirt_post_allocator_init() on setup_arch()

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 1 +
 arch/x86/mm/init_32.c   | 2 --
 arch/x86/xen/mmu.c      | 8 +++++---
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 8ce6a91ce10..2ed504b97d4 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -822,6 +822,7 @@ void __init setup_arch(char **cmdline_p)
 	paravirt_pagetable_setup_start(swapper_pg_dir);
 	paging_init();
 	paravirt_pagetable_setup_done(swapper_pg_dir);
+	paravirt_post_allocator_init();
 
 #ifdef CONFIG_X86_64
 	map_vsyscall();
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9689a5138e6..7113acd8ac4 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -868,8 +868,6 @@ void __init paging_init(void)
 	 */
 	sparse_init();
 	zone_sizes_init();
-
-	paravirt_post_allocator_init();
 }
 
 /*
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index ff0aa74afaa..ebd6900e331 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -656,9 +656,11 @@ void xen_mm_pin_all(void)
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
-/* The init_mm pagetable is really pinned as soon as its created, but
-   that's before we have page structures to store the bits.  So do all
-   the book-keeping now. */
+/*
+ * The init_mm pagetable is really pinned as soon as its created, but
+ * that's before we have page structures to store the bits.  So do all
+ * the book-keeping now.
+ */
 static __init int mark_pinned(struct page *page, enum pt_level level)
 {
 	SetPagePinned(page);
-- 
cgit v1.2.3


From 8840c0ccd763936a8e730ece118197a51be8dc8e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:25 -0700
Subject: x86_64: there's no need to preallocate level1_fixmap_pgt

Early fixmap will allocate its own L1 pagetable page for fixmap
mappings, so there's no need to preallocate one.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head_64.S | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b07ac7b217c..4b6bda21837 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -362,12 +362,6 @@ NEXT_PAGE(level3_kernel_pgt)
 	.quad	level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
 
 NEXT_PAGE(level2_fixmap_pgt)
-	.fill	506,8,0
-	.quad	level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
-	/* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
-	.fill	5,8,0
-
-NEXT_PAGE(level1_fixmap_pgt)
 	.fill	512,8,0
 
 NEXT_PAGE(level2_ident_pgt)
-- 
cgit v1.2.3


From 87b935a0ef9a1ddf62f2f0c0fc17b10654ff41cd Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:26 -0700
Subject: x86: clean up formatting of __switch_to

process_64.c:__switch_to has some very old strange formatting, some of
it dating back to pre-git.  Fix it up.

No functional changes.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process_64.c | 56 ++++++++++++++++++++++----------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index a8e53626ac9..e8a8e1b9981 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -537,8 +537,8 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
 struct task_struct *
 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
-	struct thread_struct *prev = &prev_p->thread,
-				 *next = &next_p->thread;
+	struct thread_struct *prev = &prev_p->thread;
+	struct thread_struct *next = &next_p->thread;
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 	unsigned fsindex, gsindex;
@@ -586,35 +586,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
 	/* 
 	 * Switch FS and GS.
+	 *
+	 * Segment register != 0 always requires a reload.  Also
+	 * reload when it has changed.  When prev process used 64bit
+	 * base always reload to avoid an information leak.
 	 */
-	{ 
-		/* segment register != 0 always requires a reload. 
-		   also reload when it has changed. 
-		   when prev process used 64bit base always reload
-		   to avoid an information leak. */
-		if (unlikely(fsindex | next->fsindex | prev->fs)) {
-			loadsegment(fs, next->fsindex);
-			/* check if the user used a selector != 0
-	                 * if yes clear 64bit base, since overloaded base
-                         * is always mapped to the Null selector
-                         */
-			if (fsindex)
+	if (unlikely(fsindex | next->fsindex | prev->fs)) {
+		loadsegment(fs, next->fsindex);
+		/* 
+		 * Check if the user used a selector != 0; if yes
+		 *  clear 64bit base, since overloaded base is always
+		 *  mapped to the Null selector
+		 */
+		if (fsindex)
 			prev->fs = 0;				
-		}
-		/* when next process has a 64bit base use it */
-		if (next->fs) 
-			wrmsrl(MSR_FS_BASE, next->fs); 
-		prev->fsindex = fsindex;
-
-		if (unlikely(gsindex | next->gsindex | prev->gs)) {
-			load_gs_index(next->gsindex);
-			if (gsindex)
+	}
+	/* when next process has a 64bit base use it */
+	if (next->fs)
+		wrmsrl(MSR_FS_BASE, next->fs);
+	prev->fsindex = fsindex;
+
+	if (unlikely(gsindex | next->gsindex | prev->gs)) {
+		load_gs_index(next->gsindex);
+		if (gsindex)
 			prev->gs = 0;				
-		}
-		if (next->gs)
-			wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 
-		prev->gsindex = gsindex;
 	}
+	if (next->gs)
+		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
+	prev->gsindex = gsindex;
 
 	/* Must be after DS reload */
 	unlazy_fpu(prev_p);
@@ -627,7 +626,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	write_pda(pcurrent, next_p); 
 
 	write_pda(kernelstack,
-	(unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
+		  (unsigned long)task_stack_page(next_p) +
+		  THREAD_SIZE - PDA_STACKOFFSET);
 #ifdef CONFIG_CC_STACKPROTECTOR
 	write_pda(stack_canary, next_p->stack_canary);
 	/*
-- 
cgit v1.2.3


From cbcd79c2e5b496b84845618cef734b4c40736576 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:27 -0700
Subject: x86: use __page_aligned_data/bss

Update arch/x86's use of page-aligned variables.  The change to
arch/x86/xen/mmu.c fixes an actual bug, but the rest are cleanups
and to set a precedent.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common_64.c |  4 ++--
 arch/x86/kernel/irq_32.c        |  7 ++-----
 arch/x86/xen/mmu.c              | 15 ++++++---------
 3 files changed, 10 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 7b8cc72feb4..15419cd3c5a 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -16,6 +16,7 @@
 #include <asm/i387.h>
 #include <asm/msr.h>
 #include <asm/io.h>
+#include <asm/linkage.h>
 #include <asm/mmu_context.h>
 #include <asm/mtrr.h>
 #include <asm/mce.h>
@@ -517,8 +518,7 @@ void pda_init(int cpu)
 }
 
 char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
-			   DEBUG_STKSZ]
-__attribute__((section(".bss.page_aligned")));
+			   DEBUG_STKSZ] __page_aligned_bss;
 
 extern asmlinkage void ignore_sysret(void);
 
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 47a6f6f1247..1cf8c1fcc08 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -83,11 +83,8 @@ union irq_ctx {
 static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
 static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
 
-static char softirq_stack[NR_CPUS * THREAD_SIZE]
-		__attribute__((__section__(".bss.page_aligned")));
-
-static char hardirq_stack[NR_CPUS * THREAD_SIZE]
-		__attribute__((__section__(".bss.page_aligned")));
+static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
 
 static void call_on_stack(void *func, void *stack)
 {
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index ebd6900e331..4fca9d88bef 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -46,6 +46,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/paravirt.h>
+#include <asm/linkage.h>
 
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
@@ -60,22 +61,18 @@
 #define TOP_ENTRIES		(MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
 
 /* Placeholder for holes in the address space */
-static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
-	__attribute__((section(".data.page_aligned"))) =
+static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data =
 		{ [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
 
  /* Array of pointers to pages containing p2m entries */
-static unsigned long *p2m_top[TOP_ENTRIES]
-	__attribute__((section(".data.page_aligned"))) =
+static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data =
 		{ [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
 
 /* Arrays of p2m arrays expressed in mfns used for save/restore */
-static unsigned long p2m_top_mfn[TOP_ENTRIES]
-	__attribute__((section(".bss.page_aligned")));
+static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss;
 
-static unsigned long p2m_top_mfn_list[
-			PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)]
-	__attribute__((section(".bss.page_aligned")));
+static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
+	__page_aligned_bss;
 
 static inline unsigned p2m_top_index(unsigned long pfn)
 {
-- 
cgit v1.2.3


From 360c044eb1b985a9ef29d952276a3e14973bed93 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:28 -0700
Subject: x86_64: adjust exception frame in ia32entry

The 32-bit compat int $0x80 entrypoint needs exception frame
adjustment.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32entry.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 20371d0635e..0ae1e77eae5 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -321,6 +321,7 @@ ENTRY(ia32_syscall)
 	/*CFI_REL_OFFSET	rflags,EFLAGS-RIP*/
 	/*CFI_REL_OFFSET	cs,CS-RIP*/
 	CFI_REL_OFFSET	rip,RIP-RIP
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	SWAPGS
 	/*
 	 * No need to follow this irqs on/off section: the syscall
-- 
cgit v1.2.3


From 7c33b1e6ee26d67551109aca04d46544d0ce55b1 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:29 -0700
Subject: x86_64: unstatic get_local_pda

This allows Xen's xen_cpu_up() to allocate a pda for the new CPU.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/smpboot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 687376ab07e..1deb3b624a7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -768,7 +768,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
  *
  * Must be called after the _cpu_pda pointer table is initialized.
  */
-static int __cpuinit get_local_pda(int cpu)
+int __cpuinit get_local_pda(int cpu)
 {
 	struct x8664_pda *oldpda, *newpda;
 	unsigned long size = sizeof(struct x8664_pda);
-- 
cgit v1.2.3


From 8ba6c2b0958c332d2f3336f4ca9c116ed81f38e9 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:30 -0700
Subject: xen: print backtrace on multicall failure

Print a backtrace if a multicall fails, to help with debugging.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/multicalls.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 3c63c4da7ed..9efd1c6c977 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -76,6 +76,7 @@ void xen_mc_flush(void)
 		if (ret) {
 			printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
 			       ret, smp_processor_id());
+			dump_stack();
 			for (i = 0; i < b->mcidx; i++) {
 				printk("  call %2d/%d: op=%lu arg=[%lx] result=%ld\n",
 				       i+1, b->mcidx,
-- 
cgit v1.2.3


From ad55db9fed6d6cd09333045945cb03ba2c070085 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Tue, 8 Jul 2008 15:06:32 -0700
Subject: xen: add xen_arch_resume()/xen_timer_resume hook for ia64 support

add xen_timer_resume() hook.

Timer resume should be done after event channel is resumed.
add xen_arch_resume() hook when ipi becomes usable after resume.
After resume, some cpu specific resource must be reinitialized
on ia64 that can't be set by another cpu.

However available hooks is run once on only one cpu so that ipi has
to be used.

During stop_machine_run() ipi can't be used because interrupt is masked.
So add another hook after stop_machine_run().
Another approach might be use resume hook which is run by
device_resume(). However device_resume() may be executed on
suspend error recovery path.

So it is necessary to determine whether it is executed on real resume path
or error recovery path.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/suspend.c | 5 ++++-
 arch/x86/xen/xen-ops.h | 1 -
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 251669a932d..2a234db5949 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -38,8 +38,11 @@ void xen_post_suspend(int suspend_cancelled)
 		xen_cpu_initialized_map = cpu_online_map;
 #endif
 		xen_vcpu_restore();
-		xen_timer_resume();
 	}
 
 }
 
+void xen_arch_resume(void)
+{
+	/* nothing */
+}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 6f4b1045c1c..77354d20425 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -37,7 +37,6 @@ void __init xen_time_init(void);
 unsigned long xen_get_wallclock(void);
 int xen_set_wallclock(unsigned long time);
 unsigned long long xen_sched_clock(void);
-void xen_timer_resume(void);
 
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
 
-- 
cgit v1.2.3


From 851fa3c4e7b50d6a946d8b4c0a68683b5e56b2f1 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:33 -0700
Subject: xen: define set_pte from the outset

We need set_pte to work from a relatively early point, so enable it
from the start.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index eaab6c9b4a8..c5f0b40aa39 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -845,9 +845,6 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
 	pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
 	int i;
 
-	/* special set_pte for pagetable initialization */
-	pv_mmu_ops.set_pte = xen_set_pte_init;
-
 	init_mm.pgd = base;
 	/*
 	 * copy top-level of Xen-supplied pagetable into place.  This
@@ -1174,7 +1171,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.kmap_atomic_pte = xen_kmap_atomic_pte,
 #endif
 
-	.set_pte = NULL,	/* see xen_pagetable_setup_* */
+	.set_pte = xen_set_pte_init,
 	.set_pte_at = xen_set_pte_at,
 	.set_pmd = xen_set_pmd_hyper,
 
-- 
cgit v1.2.3


From 48b5db20621388582ca11ac3c61d3403966dbe51 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:34 -0700
Subject: xen64: define asm/xen/interface for 64-bit

Copy 64-bit definitions of various interface structures into place.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/mmu.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 297bf9f5b8b..7856e37f604 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -10,18 +10,6 @@ enum pt_level {
 	PT_PTE
 };
 
-/*
- * Page-directory addresses above 4GB do not fit into architectural %cr3.
- * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
- * must use the following accessor macros to pack/unpack valid MFNs.
- *
- * Note that Xen is using the fact that the pagetable base is always
- * page-aligned, and putting the 12 MSB of the address into the 12 LSB
- * of cr3.
- */
-#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
-#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
-
 
 void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
-- 
cgit v1.2.3


From 7077c33d81a8d790135ae87cd19e6efcb075c23a Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:35 -0700
Subject: xen: make ELF notes work for 32 and 64 bit

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/xen-head.S | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 7c0cf6320a0..a9cac9dc04b 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -5,7 +5,10 @@
 
 #include <linux/elfnote.h>
 #include <linux/init.h>
+
 #include <asm/boot.h>
+#include <asm/asm.h>
+
 #include <xen/interface/elfnote.h>
 #include <asm/xen/interface.h>
 
@@ -21,21 +24,21 @@ ENTRY(startup_xen)
 .pushsection .text
 	.align PAGE_SIZE_asm
 ENTRY(hypercall_page)
-	.skip 0x1000
+	.skip PAGE_SIZE_asm
 .popsection
 
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz "2.6")
 	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz "xen-3.0")
-	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      .long  __PAGE_OFFSET)
-	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .long  startup_xen)
-	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long  hypercall_page)
+	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      _ASM_PTR __PAGE_OFFSET)
+	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          _ASM_PTR startup_xen)
+	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz "!writable_page_tables|pae_pgdir_above_4gb")
 	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "yes")
 	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
 	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
 		.quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
 	ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
-	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   .long __HYPERVISOR_VIRT_START)
+	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   _ASM_PTR __HYPERVISOR_VIRT_START)
 
 #endif /*CONFIG_XEN */
-- 
cgit v1.2.3


From f6e587325b3bc7e5c829a407ddc25b52c1e73851 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:38 -0700
Subject: xen64: add extra pv_mmu_ops

We need extra pv_mmu_ops for 64-bit, to deal with the extra level of
pagetable.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 33 ++++++++++++++++++++++++++++++-
 arch/x86/xen/mmu.c       | 51 +++++++++++++++++++++++++++++++++++++++++++++++-
 arch/x86/xen/mmu.h       | 15 ++++++++++++--
 3 files changed, 95 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c5f0b40aa39..afb047e30bd 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -803,6 +803,18 @@ static void xen_release_pmd(u32 pfn)
 	xen_release_ptpage(pfn, PT_PMD);
 }
 
+#if PAGETABLE_LEVELS == 4
+static void xen_alloc_pud(struct mm_struct *mm, u32 pfn)
+{
+	xen_alloc_ptpage(mm, pfn, PT_PUD);
+}
+
+static void xen_release_pud(u32 pfn)
+{
+	xen_release_ptpage(pfn, PT_PUD);
+}
+#endif
+
 #ifdef CONFIG_HIGHPTE
 static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
 {
@@ -922,6 +934,11 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
 	pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
 	pv_mmu_ops.release_pte = xen_release_pte;
 	pv_mmu_ops.release_pmd = xen_release_pmd;
+#if PAGETABLE_LEVELS == 4
+	pv_mmu_ops.alloc_pud = xen_alloc_pud;
+	pv_mmu_ops.release_pud = xen_release_pud;
+#endif
+
 	pv_mmu_ops.set_pte = xen_set_pte;
 
 	xen_setup_shared_info();
@@ -937,6 +954,9 @@ static __init void xen_post_allocator_init(void)
 {
 	pv_mmu_ops.set_pmd = xen_set_pmd;
 	pv_mmu_ops.set_pud = xen_set_pud;
+#if PAGETABLE_LEVELS == 4
+	pv_mmu_ops.set_pgd = xen_set_pgd;
+#endif
 
 	xen_mark_init_mm_pinned();
 }
@@ -1185,15 +1205,26 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.make_pte = xen_make_pte,
 	.make_pgd = xen_make_pgd,
 
+#ifdef CONFIG_X86_PAE
 	.set_pte_atomic = xen_set_pte_atomic,
 	.set_pte_present = xen_set_pte_at,
-	.set_pud = xen_set_pud_hyper,
 	.pte_clear = xen_pte_clear,
 	.pmd_clear = xen_pmd_clear,
+#endif	/* CONFIG_X86_PAE */
+	.set_pud = xen_set_pud_hyper,
 
 	.make_pmd = xen_make_pmd,
 	.pmd_val = xen_pmd_val,
 
+#if PAGETABLE_LEVELS == 4
+	.pud_val = xen_pud_val,
+	.make_pud = xen_make_pud,
+	.set_pgd = xen_set_pgd_hyper,
+
+	.alloc_pud = xen_alloc_pte_init,
+	.release_pud = xen_release_pte_init,
+#endif	/* PAGETABLE_LEVELS == 4 */
+
 	.activate_mm = xen_activate_mm,
 	.dup_mmap = xen_dup_mmap,
 	.exit_mmap = xen_exit_mmap,
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 4fca9d88bef..d0976b87cd2 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -438,14 +438,19 @@ void xen_set_pud(pud_t *ptr, pud_t val)
 
 void xen_set_pte(pte_t *ptep, pte_t pte)
 {
+#ifdef CONFIG_X86_PAE
 	ptep->pte_high = pte.pte_high;
 	smp_wmb();
 	ptep->pte_low = pte.pte_low;
+#else
+	*ptep = pte;
+#endif
 }
 
+#ifdef CONFIG_X86_PAE
 void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
 {
-	set_64bit((u64 *)ptep, pte_val_ma(pte));
+	set_64bit((u64 *)ptep, native_pte_val(pte));
 }
 
 void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
@@ -459,6 +464,7 @@ void xen_pmd_clear(pmd_t *pmdp)
 {
 	set_pmd(pmdp, __pmd(0));
 }
+#endif	/* CONFIG_X86_PAE */
 
 pmd_t xen_make_pmd(pmdval_t pmd)
 {
@@ -466,6 +472,49 @@ pmd_t xen_make_pmd(pmdval_t pmd)
 	return native_make_pmd(pmd);
 }
 
+#if PAGETABLE_LEVELS == 4
+pudval_t xen_pud_val(pud_t pud)
+{
+	return pte_mfn_to_pfn(pud.pud);
+}
+
+pud_t xen_make_pud(pudval_t pud)
+{
+	pud = pte_pfn_to_mfn(pud);
+
+	return native_make_pud(pud);
+}
+
+void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+{
+	struct mmu_update u;
+
+	preempt_disable();
+
+	xen_mc_batch();
+
+	u.ptr = virt_to_machine(ptr).maddr;
+	u.val = pgd_val_ma(val);
+	extend_mmu_update(&u);
+
+	xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+	preempt_enable();
+}
+
+void xen_set_pgd(pgd_t *ptr, pgd_t val)
+{
+	/* If page is not pinned, we can just update the entry
+	   directly */
+	if (!page_pinned(ptr)) {
+		*ptr = val;
+		return;
+	}
+
+	xen_set_pgd_hyper(ptr, val);
+}
+#endif	/* PAGETABLE_LEVELS == 4 */
+
 /*
   (Yet another) pagetable walker.  This one is intended for pinning a
   pagetable.  This means that it walks a pagetable and calls the
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 7856e37f604..19d544b0b6c 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -32,13 +32,24 @@ pgd_t xen_make_pgd(pgdval_t);
 void xen_set_pte(pte_t *ptep, pte_t pteval);
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval);
+
+#ifdef CONFIG_X86_PAE
 void xen_set_pte_atomic(pte_t *ptep, pte_t pte);
+void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+void xen_pmd_clear(pmd_t *pmdp);
+#endif	/* CONFIG_X86_PAE */
+
 void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
 void xen_set_pud(pud_t *ptr, pud_t val);
 void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval);
 void xen_set_pud_hyper(pud_t *ptr, pud_t val);
-void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
-void xen_pmd_clear(pmd_t *pmdp);
+
+#if PAGETABLE_LEVELS == 4
+pudval_t xen_pud_val(pud_t pud);
+pud_t xen_make_pud(pudval_t pudval);
+void xen_set_pgd(pgd_t *pgdp, pgd_t pgd);
+void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd);
+#endif
 
 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
 void  xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
-- 
cgit v1.2.3


From f5d36de069f4b343f64e858e7377cfc9c772c4fb Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:39 -0700
Subject: xen64: random ifdefs to mask out 32-bit only code

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index afb047e30bd..ada2e1a141d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1296,6 +1296,7 @@ static const struct machine_ops __initdata xen_machine_ops = {
 
 static void __init xen_reserve_top(void)
 {
+#ifdef CONFIG_X86_32
 	unsigned long top = HYPERVISOR_VIRT_START;
 	struct xen_platform_parameters pp;
 
@@ -1303,6 +1304,7 @@ static void __init xen_reserve_top(void)
 		top = pp.virt_start;
 
 	reserve_top_address(-top + 2 * PAGE_SIZE);
+#endif	/* CONFIG_X86_32 */
 }
 
 /* First C function to be called on Xen boot */
@@ -1333,6 +1335,11 @@ asmlinkage void __init xen_start_kernel(void)
 
 	machine_ops = xen_machine_ops;
 
+#ifdef CONFIG_X86_64
+	/* Disable until direct per-cpu data access. */
+	have_vcpu_info_placement = 0;
+#endif
+
 #ifdef CONFIG_SMP
 	smp_ops = xen_smp_ops;
 #endif
@@ -1343,9 +1350,11 @@ asmlinkage void __init xen_start_kernel(void)
 
 	pgd = (pgd_t *)xen_start_info->pt_base;
 
+#ifdef CONFIG_X86_32
 	init_pg_tables_start = __pa(pgd);
 	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
 	max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT;
+#endif
 
 	init_mm.pgd = pgd; /* use the Xen pagetables to start */
 
@@ -1372,7 +1381,9 @@ asmlinkage void __init xen_start_kernel(void)
 
 	/* set up basic CPUID stuff */
 	cpu_detect(&new_cpu_data);
+#ifdef CONFIG_X86_32
 	new_cpu_data.hard_math = 1;
+#endif
 	new_cpu_data.x86_capability[0] = cpuid_edx(1);
 
 	/* Poke various useful things into boot_params */
@@ -1388,5 +1399,9 @@ asmlinkage void __init xen_start_kernel(void)
 	}
 
 	/* Start the world */
+#ifdef CONFIG_X86_32
 	i386_start_kernel();
+#else
+	x86_64_start_kernel((char *)&boot_params);
+#endif
 }
-- 
cgit v1.2.3


From ce87b3d326de733c72b47662f106ee6cd699a20f Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:40 -0700
Subject: xen64: get active_mm from the pda

x86_64 stores the active_mm in the pda, so fetch it from there.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/mmu.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index d0976b87cd2..2579e70cdd0 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -805,8 +805,15 @@ void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 static void drop_other_mm_ref(void *info)
 {
 	struct mm_struct *mm = info;
+	struct mm_struct *active_mm;
 
-	if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
+#ifdef CONFIG_X86_64
+	active_mm = read_pda(active_mm);
+#else
+	active_mm = __get_cpu_var(cpu_tlbstate).active_mm;
+#endif
+
+	if (active_mm == mm)
 		leave_mm(smp_processor_id());
 
 	/* If this cpu still has a stale cr3 reference, then make sure
-- 
cgit v1.2.3


From a9e7062d7339f1a1df2b6d7e5d595c7d55b56bfb Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:41 -0700
Subject: xen: move smp setup into smp.c

Move all the smp_ops setup into smp.c, allowing a lot of things to
become static.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 19 +------------------
 arch/x86/xen/smp.c       | 34 ++++++++++++++++++++++++++--------
 arch/x86/xen/xen-ops.h   | 13 +++++--------
 3 files changed, 32 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index ada2e1a141d..a85f447b8d0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1237,21 +1237,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.set_fixmap = xen_set_fixmap,
 };
 
-#ifdef CONFIG_SMP
-static const struct smp_ops xen_smp_ops __initdata = {
-	.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
-	.smp_prepare_cpus = xen_smp_prepare_cpus,
-	.cpu_up = xen_cpu_up,
-	.smp_cpus_done = xen_smp_cpus_done,
-
-	.smp_send_stop = xen_smp_send_stop,
-	.smp_send_reschedule = xen_smp_send_reschedule,
-
-	.send_call_func_ipi = xen_smp_send_call_function_ipi,
-	.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
-};
-#endif	/* CONFIG_SMP */
-
 static void xen_reboot(int reason)
 {
 	struct sched_shutdown r = { .reason = reason };
@@ -1340,9 +1325,7 @@ asmlinkage void __init xen_start_kernel(void)
 	have_vcpu_info_placement = 0;
 #endif
 
-#ifdef CONFIG_SMP
-	smp_ops = xen_smp_ops;
-#endif
+	xen_smp_init();
 
 	/* Get mfn list */
 	if (!xen_feature(XENFEAT_auto_translated_physmap))
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 233156f39b7..91fae8ff756 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -152,7 +152,7 @@ void __init xen_fill_possible_map(void)
 	}
 }
 
-void __init xen_smp_prepare_boot_cpu(void)
+static void __init xen_smp_prepare_boot_cpu(void)
 {
 	int cpu;
 
@@ -176,7 +176,7 @@ void __init xen_smp_prepare_boot_cpu(void)
 	xen_setup_vcpu_info_placement();
 }
 
-void __init xen_smp_prepare_cpus(unsigned int max_cpus)
+static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned cpu;
 
@@ -276,7 +276,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	return 0;
 }
 
-int __cpuinit xen_cpu_up(unsigned int cpu)
+static int __cpuinit xen_cpu_up(unsigned int cpu)
 {
 	struct task_struct *idle = idle_task(cpu);
 	int rc;
@@ -319,7 +319,7 @@ int __cpuinit xen_cpu_up(unsigned int cpu)
 	return 0;
 }
 
-void xen_smp_cpus_done(unsigned int max_cpus)
+static void xen_smp_cpus_done(unsigned int max_cpus)
 {
 }
 
@@ -335,12 +335,12 @@ static void stop_self(void *v)
 	BUG();
 }
 
-void xen_smp_send_stop(void)
+static void xen_smp_send_stop(void)
 {
 	smp_call_function(stop_self, NULL, 0);
 }
 
-void xen_smp_send_reschedule(int cpu)
+static void xen_smp_send_reschedule(int cpu)
 {
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
@@ -355,7 +355,7 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 		xen_send_IPI_one(cpu, vector);
 }
 
-void xen_smp_send_call_function_ipi(cpumask_t mask)
+static void xen_smp_send_call_function_ipi(cpumask_t mask)
 {
 	int cpu;
 
@@ -370,7 +370,7 @@ void xen_smp_send_call_function_ipi(cpumask_t mask)
 	}
 }
 
-void xen_smp_send_call_function_single_ipi(int cpu)
+static void xen_smp_send_call_function_single_ipi(int cpu)
 {
 	xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
 }
@@ -394,3 +394,21 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 
 	return IRQ_HANDLED;
 }
+
+static const struct smp_ops xen_smp_ops __initdata = {
+	.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
+	.smp_prepare_cpus = xen_smp_prepare_cpus,
+	.cpu_up = xen_cpu_up,
+	.smp_cpus_done = xen_smp_cpus_done,
+
+	.smp_send_stop = xen_smp_send_stop,
+	.smp_send_reschedule = xen_smp_send_reschedule,
+
+	.send_call_func_ipi = xen_smp_send_call_function_ipi,
+	.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
+};
+
+void __init xen_smp_init(void)
+{
+	smp_ops = xen_smp_ops;
+}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 77354d20425..81a779fc9b2 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -47,17 +47,14 @@ void xen_mark_init_mm_pinned(void);
 void __init xen_fill_possible_map(void);
 
 void __init xen_setup_vcpu_info_placement(void);
-void xen_smp_prepare_boot_cpu(void);
-void xen_smp_prepare_cpus(unsigned int max_cpus);
-int xen_cpu_up(unsigned int cpu);
-void xen_smp_cpus_done(unsigned int max_cpus);
 
-void xen_smp_send_stop(void);
-void xen_smp_send_reschedule(int cpu);
-void xen_smp_send_call_function_ipi(cpumask_t mask);
-void xen_smp_send_call_function_single_ipi(int cpu);
+#ifdef CONFIG_SMP
+void xen_smp_init(void);
 
 extern cpumask_t xen_cpu_initialized_map;
+#else
+static inline void xen_smp_init(void) {}
+#endif
 
 
 /* Declare an asm function, along with symbols needed to make it
-- 
cgit v1.2.3


From 5b09b2876ed1a8e34a0da8f069575fc6174e2077 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:42 -0700
Subject: x86_64: add workaround for no %gs-based percpu

As a stopgap until Mike Travis's x86-64 gs-based percpu patches are
ready, provide workaround functions for x86_read/write_percpu for
Xen's use.

Specifically, this means that we can't really make use of vcpu
placement, because we can't use a single gs-based memory access to get
to vcpu fields.  So disable all that for now.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head64.c | 11 ++++++++---
 arch/x86/xen/enlighten.c |  5 +++++
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index c9781982914..1b318e903bf 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -39,6 +39,13 @@ static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
 static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
 #endif
 
+void __init x86_64_init_pda(void)
+{
+	_cpu_pda = __cpu_pda;
+	cpu_pda(0) = &_boot_cpu_pda;
+	pda_init(0);
+}
+
 static void __init zap_identity_mappings(void)
 {
 	pgd_t *pgd = pgd_offset_k(0UL);
@@ -102,9 +109,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
 
 	early_printk("Kernel alive\n");
 
-	_cpu_pda = __cpu_pda;
-	cpu_pda(0) = &_boot_cpu_pda;
-	pda_init(0);
+	x86_64_init_pda();
 
 	early_printk("Kernel really alive\n");
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a85f447b8d0..f3f11acf785 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -971,6 +971,7 @@ void xen_setup_vcpu_info_placement(void)
 
 	/* xen_vcpu_setup managed to place the vcpu_info within the
 	   percpu area for all cpus, so make use of it */
+#ifdef CONFIG_X86_32
 	if (have_vcpu_info_placement) {
 		printk(KERN_INFO "Xen: using vcpu_info placement\n");
 
@@ -980,6 +981,7 @@ void xen_setup_vcpu_info_placement(void)
 		pv_irq_ops.irq_enable = xen_irq_enable_direct;
 		pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
 	}
+#endif
 }
 
 static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
@@ -1000,10 +1002,12 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
 	goto patch_site
 
 	switch (type) {
+#ifdef CONFIG_X86_32
 		SITE(pv_irq_ops, irq_enable);
 		SITE(pv_irq_ops, irq_disable);
 		SITE(pv_irq_ops, save_fl);
 		SITE(pv_irq_ops, restore_fl);
+#endif /* CONFIG_X86_32 */
 #undef SITE
 
 	patch_site:
@@ -1323,6 +1327,7 @@ asmlinkage void __init xen_start_kernel(void)
 #ifdef CONFIG_X86_64
 	/* Disable until direct per-cpu data access. */
 	have_vcpu_info_placement = 0;
+	x86_64_init_pda();
 #endif
 
 	xen_smp_init();
-- 
cgit v1.2.3


From c7b75947f89d45493562ede6d9ee7311dfa5c4ce Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:43 -0700
Subject: xen64: smp.c compile hacking

A number of random changes to make xen/smp.c compile in 64-bit mode.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>a
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/setup.c   |  7 +---
 arch/x86/xen/smp.c     | 98 +++++++++++++++++++++++++++++---------------------
 arch/x86/xen/xen-ops.h |  2 --
 3 files changed, 58 insertions(+), 49 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index e0a39595bde..f52f3855fb6 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -98,7 +98,7 @@ void xen_enable_sysenter(void)
 	/* Mask events on entry, even though they get enabled immediately */
 	static struct callback_register sysenter = {
 		.type = CALLBACKTYPE_sysenter,
-		.address = { __KERNEL_CS, (unsigned long)xen_sysenter_target },
+		.address = XEN_CALLBACK(__KERNEL_CS, xen_sysenter_target),
 		.flags = CALLBACKF_mask_events,
 	};
 
@@ -143,11 +143,6 @@ void __init xen_arch_setup(void)
 
 	pm_idle = xen_idle;
 
-#ifdef CONFIG_SMP
-	/* fill cpus_possible with all available cpus */
-	xen_fill_possible_map();
-#endif
-
 	paravirt_disable_iospace();
 
 	fiddle_vdso();
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 91fae8ff756..800bb2191e2 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -66,13 +66,21 @@ static __cpuinit void cpu_bringup_and_idle(void)
 	int cpu = smp_processor_id();
 
 	cpu_init();
+	preempt_disable();
+
 	xen_enable_sysenter();
 
-	preempt_disable();
-	per_cpu(cpu_state, cpu) = CPU_ONLINE;
+	cpu = smp_processor_id();
+	smp_store_cpu_info(cpu);
+	cpu_data(cpu).x86_max_cores = 1;
+	set_cpu_sibling_map(cpu);
 
 	xen_setup_cpu_clockevents();
 
+	cpu_set(cpu, cpu_online_map);
+	x86_write_percpu(cpu_state, CPU_ONLINE);
+	wmb();
+
 	/* We can take interrupts now: we're officially "up". */
 	local_irq_enable();
 
@@ -141,7 +149,7 @@ static int xen_smp_intr_init(unsigned int cpu)
 	return rc;
 }
 
-void __init xen_fill_possible_map(void)
+static void __init xen_fill_possible_map(void)
 {
 	int i, rc;
 
@@ -154,24 +162,12 @@ void __init xen_fill_possible_map(void)
 
 static void __init xen_smp_prepare_boot_cpu(void)
 {
-	int cpu;
-
 	BUG_ON(smp_processor_id() != 0);
 	native_smp_prepare_boot_cpu();
 
 	/* We've switched to the "real" per-cpu gdt, so make sure the
 	   old memory can be recycled */
-	make_lowmem_page_readwrite(&per_cpu__gdt_page);
-
-	for_each_possible_cpu(cpu) {
-		cpus_clear(per_cpu(cpu_sibling_map, cpu));
-		/*
-		 * cpu_core_map lives in a per cpu area that is cleared
-		 * when the per cpu array is allocated.
-		 *
-		 * cpus_clear(per_cpu(cpu_core_map, cpu));
-		 */
-	}
+	make_lowmem_page_readwrite(&per_cpu_var(gdt_page));
 
 	xen_setup_vcpu_info_placement();
 }
@@ -180,17 +176,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned cpu;
 
-	for_each_possible_cpu(cpu) {
-		cpus_clear(per_cpu(cpu_sibling_map, cpu));
-		/*
-		 * cpu_core_ map will be zeroed when the per
-		 * cpu area is allocated.
-		 *
-		 * cpus_clear(per_cpu(cpu_core_map, cpu));
-		 */
-	}
-
 	smp_store_cpu_info(0);
+	cpu_data(0).x86_max_cores = 1;
 	set_cpu_sibling_map(0);
 
 	if (xen_smp_intr_init(0))
@@ -225,7 +212,7 @@ static __cpuinit int
 cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 {
 	struct vcpu_guest_context *ctxt;
-	struct gdt_page *gdt = &per_cpu(gdt_page, cpu);
+	struct desc_struct *gdt;
 
 	if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
 		return 0;
@@ -234,12 +221,15 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	if (ctxt == NULL)
 		return -ENOMEM;
 
+	gdt = get_cpu_gdt_table(cpu);
+
 	ctxt->flags = VGCF_IN_KERNEL;
 	ctxt->user_regs.ds = __USER_DS;
 	ctxt->user_regs.es = __USER_DS;
-	ctxt->user_regs.fs = __KERNEL_PERCPU;
-	ctxt->user_regs.gs = 0;
 	ctxt->user_regs.ss = __KERNEL_DS;
+#ifdef CONFIG_X86_32
+	ctxt->user_regs.fs = __KERNEL_PERCPU;
+#endif
 	ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
 	ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
 
@@ -249,11 +239,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 
 	ctxt->ldt_ents = 0;
 
-	BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK);
-	make_lowmem_page_readonly(gdt->gdt);
+	BUG_ON((unsigned long)gdt & ~PAGE_MASK);
+	make_lowmem_page_readonly(gdt);
 
-	ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt);
-	ctxt->gdt_ents      = ARRAY_SIZE(gdt->gdt);
+	ctxt->gdt_frames[0] = virt_to_mfn(gdt);
+	ctxt->gdt_ents      = GDT_ENTRIES;
 
 	ctxt->user_regs.cs = __KERNEL_CS;
 	ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
@@ -261,9 +251,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	ctxt->kernel_ss = __KERNEL_DS;
 	ctxt->kernel_sp = idle->thread.sp0;
 
+#ifdef CONFIG_X86_32
 	ctxt->event_callback_cs     = __KERNEL_CS;
-	ctxt->event_callback_eip    = (unsigned long)xen_hypervisor_callback;
 	ctxt->failsafe_callback_cs  = __KERNEL_CS;
+#endif
+	ctxt->event_callback_eip    = (unsigned long)xen_hypervisor_callback;
 	ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
 
 	per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
@@ -287,11 +279,28 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
 		return rc;
 #endif
 
+#ifdef CONFIG_X86_64
+	/* Allocate node local memory for AP pdas */
+	WARN_ON(cpu == 0);
+	if (cpu > 0) {
+		rc = get_local_pda(cpu);
+		if (rc)
+			return rc;
+	}
+#endif
+
+#ifdef CONFIG_X86_32
 	init_gdt(cpu);
 	per_cpu(current_task, cpu) = idle;
 	irq_ctx_init(cpu);
+#else
+	cpu_pda(cpu)->pcurrent = idle;
+	clear_tsk_thread_flag(idle, TIF_FORK);
+#endif
 	xen_setup_timer(cpu);
 
+	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+
 	/* make sure interrupts start blocked */
 	per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
 
@@ -306,16 +315,14 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
 	if (rc)
 		return rc;
 
-	smp_store_cpu_info(cpu);
-	set_cpu_sibling_map(cpu);
-	/* This must be done before setting cpu_online_map */
-	wmb();
-
-	cpu_set(cpu, cpu_online_map);
-
 	rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
 	BUG_ON(rc);
 
+	while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
+		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+		barrier();
+	}
+
 	return 0;
 }
 
@@ -379,7 +386,11 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 {
 	irq_enter();
 	generic_smp_call_function_interrupt();
+#ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
+#else
+	add_pda(irq_call_count, 1);
+#endif
 	irq_exit();
 
 	return IRQ_HANDLED;
@@ -389,7 +400,11 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 {
 	irq_enter();
 	generic_smp_call_function_single_interrupt();
+#ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
+#else
+	add_pda(irq_call_count, 1);
+#endif
 	irq_exit();
 
 	return IRQ_HANDLED;
@@ -411,4 +426,5 @@ static const struct smp_ops xen_smp_ops __initdata = {
 void __init xen_smp_init(void)
 {
 	smp_ops = xen_smp_ops;
+	xen_fill_possible_map();
 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 81a779fc9b2..aca4a7803e2 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -44,8 +44,6 @@ bool xen_vcpu_stolen(int vcpu);
 
 void xen_mark_init_mm_pinned(void);
 
-void __init xen_fill_possible_map(void);
-
 void __init xen_setup_vcpu_info_placement(void);
 
 #ifdef CONFIG_SMP
-- 
cgit v1.2.3


From 8c5e5ac32fe08793246709fbb94c055ec76a7c0e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:44 -0700
Subject: xen64: add xen-head code to head_64.S

Add the Xen entrypoint and ELF notes to head_64.S.  Adapts xen-head.S
to compile either 32-bit or 64-bit.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/asm-offsets_64.c |  3 +++
 arch/x86/kernel/head_64.S        |  1 +
 arch/x86/xen/xen-head.S          | 15 +++++++++++++--
 3 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index bacf5deeec2..0f7e1f09aa0 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -131,5 +131,8 @@ int main(void)
 	OFFSET(BP_loadflags, boot_params, hdr.loadflags);
 	OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
 	OFFSET(BP_version, boot_params, hdr.version);
+
+	BLANK();
+	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
 	return 0;
 }
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 4b6bda21837..2240f823676 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -401,6 +401,7 @@ ENTRY(phys_base)
 	/* This must match the first entry in level2_kernel_pgt */
 	.quad   0x0000000000000000
 
+#include "../../x86/xen/xen-head.S"
 	
 	.section .bss, "aw", @nobits
 	.align L1_CACHE_BYTES
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index a9cac9dc04b..63d49a523ed 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -8,15 +8,21 @@
 
 #include <asm/boot.h>
 #include <asm/asm.h>
+#include <asm/page.h>
 
 #include <xen/interface/elfnote.h>
 #include <asm/xen/interface.h>
 
 	__INIT
 ENTRY(startup_xen)
-	movl %esi,xen_start_info
 	cld
-	movl $(init_thread_union+THREAD_SIZE),%esp
+#ifdef CONFIG_X86_32
+	mov %esi,xen_start_info
+	mov $init_thread_union+THREAD_SIZE,%esp
+#else
+	mov %rsi,xen_start_info
+	mov $init_thread_union+THREAD_SIZE,%rsp
+#endif
 	jmp xen_start_kernel
 
 	__FINIT
@@ -30,7 +36,11 @@ ENTRY(hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz "2.6")
 	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz "xen-3.0")
+#ifdef CONFIG_X86_32
 	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      _ASM_PTR __PAGE_OFFSET)
+#else
+	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      _ASM_PTR __START_KERNEL_map)
+#endif
 	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          _ASM_PTR startup_xen)
 	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz "!writable_page_tables|pae_pgdir_above_4gb")
@@ -40,5 +50,6 @@ ENTRY(hypercall_page)
 		.quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
 	ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
 	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   _ASM_PTR __HYPERVISOR_VIRT_START)
+	ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   _ASM_PTR 0)
 
 #endif /*CONFIG_XEN */
-- 
cgit v1.2.3


From 555cf2b5805a213ba262a2830c4d22ad635a249e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:45 -0700
Subject: xen64: add asm-offsets

Add Xen vcpu_info offsets to asm-offsets_64.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/asm-offsets_64.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 0f7e1f09aa0..aa89387006f 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -18,6 +18,8 @@
 #include <asm/ia32.h>
 #include <asm/bootparam.h>
 
+#include <xen/interface/xen.h>
+
 #define __NO_STUBS 1
 #undef __SYSCALL
 #undef _ASM_X86_64_UNISTD_H_
@@ -134,5 +136,11 @@ int main(void)
 
 	BLANK();
 	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
+#ifdef CONFIG_XEN
+	BLANK();
+	OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
+	OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+#undef ENTRY
+#endif
 	return 0;
 }
-- 
cgit v1.2.3


From cdacc1278b12d929f9a053c245ff3d16eb7af9f8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:46 -0700
Subject: xen64: add 64-bit assembler

Split xen-asm into 32- and 64-bit files, and implement the 64-bit
variants.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/Makefile     |   2 +-
 arch/x86/xen/xen-asm.S    | 305 ----------------------------------------------
 arch/x86/xen/xen-asm_32.S | 305 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/xen-asm_64.S | 141 +++++++++++++++++++++
 4 files changed, 447 insertions(+), 306 deletions(-)
 delete mode 100644 arch/x86/xen/xen-asm.S
 create mode 100644 arch/x86/xen/xen-asm_32.S
 create mode 100644 arch/x86/xen/xen-asm_64.S

(limited to 'arch')

diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 2ba2d164913..59c1e539aed 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o \
-			time.o xen-asm.o grant-table.o suspend.o
+			time.o xen-asm_$(BITS).o grant-table.o suspend.o
 
 obj-$(CONFIG_SMP)	+= smp.o
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
deleted file mode 100644
index 2497a30f41d..00000000000
--- a/arch/x86/xen/xen-asm.S
+++ /dev/null
@@ -1,305 +0,0 @@
-/*
-	Asm versions of Xen pv-ops, suitable for either direct use or inlining.
-	The inline versions are the same as the direct-use versions, with the
-	pre- and post-amble chopped off.
-
-	This code is encoded for size rather than absolute efficiency,
-	with a view to being able to inline as much as possible.
-
-	We only bother with direct forms (ie, vcpu in pda) of the operations
-	here; the indirect forms are better handled in C, since they're
-	generally too large to inline anyway.
- */
-
-#include <linux/linkage.h>
-
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/percpu.h>
-#include <asm/processor-flags.h>
-#include <asm/segment.h>
-
-#include <xen/interface/xen.h>
-
-#define RELOC(x, v)	.globl x##_reloc; x##_reloc=v
-#define ENDPATCH(x)	.globl x##_end; x##_end=.
-
-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
-#define XEN_EFLAGS_NMI	0x80000000
-
-/*
-	Enable events.  This clears the event mask and tests the pending
-	event status with one and operation.  If there are pending
-	events, then enter the hypervisor to get them handled.
- */
-ENTRY(xen_irq_enable_direct)
-	/* Unmask events */
-	movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-
-	/* Preempt here doesn't matter because that will deal with
-	   any pending interrupts.  The pending check may end up being
-	   run on the wrong CPU, but that doesn't hurt. */
-
-	/* Test for pending */
-	testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
-	jz 1f
-
-2:	call check_events
-1:
-ENDPATCH(xen_irq_enable_direct)
-	ret
-	ENDPROC(xen_irq_enable_direct)
-	RELOC(xen_irq_enable_direct, 2b+1)
-
-
-/*
-	Disabling events is simply a matter of making the event mask
-	non-zero.
- */
-ENTRY(xen_irq_disable_direct)
-	movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-ENDPATCH(xen_irq_disable_direct)
-	ret
-	ENDPROC(xen_irq_disable_direct)
-	RELOC(xen_irq_disable_direct, 0)
-
-/*
-	(xen_)save_fl is used to get the current interrupt enable status.
-	Callers expect the status to be in X86_EFLAGS_IF, and other bits
-	may be set in the return value.  We take advantage of this by
-	making sure that X86_EFLAGS_IF has the right value (and other bits
-	in that byte are 0), but other bits in the return value are
-	undefined.  We need to toggle the state of the bit, because
-	Xen and x86 use opposite senses (mask vs enable).
- */
-ENTRY(xen_save_fl_direct)
-	testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-	setz %ah
-	addb %ah,%ah
-ENDPATCH(xen_save_fl_direct)
-	ret
-	ENDPROC(xen_save_fl_direct)
-	RELOC(xen_save_fl_direct, 0)
-
-
-/*
-	In principle the caller should be passing us a value return
-	from xen_save_fl_direct, but for robustness sake we test only
-	the X86_EFLAGS_IF flag rather than the whole byte. After
-	setting the interrupt mask state, it checks for unmasked
-	pending events and enters the hypervisor to get them delivered
-	if so.
- */
-ENTRY(xen_restore_fl_direct)
-	testb $X86_EFLAGS_IF>>8, %ah
-	setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-	/* Preempt here doesn't matter because that will deal with
-	   any pending interrupts.  The pending check may end up being
-	   run on the wrong CPU, but that doesn't hurt. */
-
-	/* check for unmasked and pending */
-	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
-	jz 1f
-2:	call check_events
-1:
-ENDPATCH(xen_restore_fl_direct)
-	ret
-	ENDPROC(xen_restore_fl_direct)
-	RELOC(xen_restore_fl_direct, 2b+1)
-
-/*
-	We can't use sysexit directly, because we're not running in ring0.
-	But we can easily fake it up using iret.  Assuming xen_sysexit
-	is jumped to with a standard stack frame, we can just strip it
-	back to a standard iret frame and use iret.
- */
-ENTRY(xen_sysexit)
-	movl PT_EAX(%esp), %eax			/* Shouldn't be necessary? */
-	orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
-	lea PT_EIP(%esp), %esp
-
-	jmp xen_iret
-ENDPROC(xen_sysexit)
-
-/*
-	This is run where a normal iret would be run, with the same stack setup:
-	      8: eflags
-	      4: cs
-	esp-> 0: eip
-
-	This attempts to make sure that any pending events are dealt
-	with on return to usermode, but there is a small window in
-	which an event can happen just before entering usermode.  If
-	the nested interrupt ends up setting one of the TIF_WORK_MASK
-	pending work flags, they will not be tested again before
-	returning to usermode. This means that a process can end up
-	with pending work, which will be unprocessed until the process
-	enters and leaves the kernel again, which could be an
-	unbounded amount of time.  This means that a pending signal or
-	reschedule event could be indefinitely delayed.
-
-	The fix is to notice a nested interrupt in the critical
-	window, and if one occurs, then fold the nested interrupt into
-	the current interrupt stack frame, and re-process it
-	iteratively rather than recursively.  This means that it will
-	exit via the normal path, and all pending work will be dealt
-	with appropriately.
-
-	Because the nested interrupt handler needs to deal with the
-	current stack state in whatever form its in, we keep things
-	simple by only using a single register which is pushed/popped
-	on the stack.
- */
-ENTRY(xen_iret)
-	/* test eflags for special cases */
-	testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
-	jnz hyper_iret
-
-	push %eax
-	ESP_OFFSET=4	# bytes pushed onto stack
-
-	/* Store vcpu_info pointer for easy access.  Do it this
-	   way to avoid having to reload %fs */
-#ifdef CONFIG_SMP
-	GET_THREAD_INFO(%eax)
-	movl TI_cpu(%eax),%eax
-	movl __per_cpu_offset(,%eax,4),%eax
-	mov per_cpu__xen_vcpu(%eax),%eax
-#else
-	movl per_cpu__xen_vcpu, %eax
-#endif
-
-	/* check IF state we're restoring */
-	testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
-
-	/* Maybe enable events.  Once this happens we could get a
-	   recursive event, so the critical region starts immediately
-	   afterwards.  However, if that happens we don't end up
-	   resuming the code, so we don't have to be worried about
-	   being preempted to another CPU. */
-	setz XEN_vcpu_info_mask(%eax)
-xen_iret_start_crit:
-
-	/* check for unmasked and pending */
-	cmpw $0x0001, XEN_vcpu_info_pending(%eax)
-
-	/* If there's something pending, mask events again so we
-	   can jump back into xen_hypervisor_callback */
-	sete XEN_vcpu_info_mask(%eax)
-
-	popl %eax
-
-	/* From this point on the registers are restored and the stack
-	   updated, so we don't need to worry about it if we're preempted */
-iret_restore_end:
-
-	/* Jump to hypervisor_callback after fixing up the stack.
-	   Events are masked, so jumping out of the critical
-	   region is OK. */
-	je xen_hypervisor_callback
-
-1:	iret
-xen_iret_end_crit:
-.section __ex_table,"a"
-	.align 4
-	.long 1b,iret_exc
-.previous
-
-hyper_iret:
-	/* put this out of line since its very rarely used */
-	jmp hypercall_page + __HYPERVISOR_iret * 32
-
-	.globl xen_iret_start_crit, xen_iret_end_crit
-
-/*
-   This is called by xen_hypervisor_callback in entry.S when it sees
-   that the EIP at the time of interrupt was between xen_iret_start_crit
-   and xen_iret_end_crit.  We're passed the EIP in %eax so we can do
-   a more refined determination of what to do.
-
-   The stack format at this point is:
-	----------------
-	 ss		: (ss/esp may be present if we came from usermode)
-	 esp		:
-	 eflags		}  outer exception info
-	 cs		}
-	 eip		}
-	---------------- <- edi (copy dest)
-	 eax		:  outer eax if it hasn't been restored
-	----------------
-	 eflags		}  nested exception info
-	 cs		}   (no ss/esp because we're nested
-	 eip		}    from the same ring)
-	 orig_eax	}<- esi (copy src)
-	 - - - - - - - -
-	 fs		}
-	 es		}
-	 ds		}  SAVE_ALL state
-	 eax		}
-	  :		:
-	 ebx		}<- esp
-	----------------
-
-   In order to deliver the nested exception properly, we need to shift
-   everything from the return addr up to the error code so it
-   sits just under the outer exception info.  This means that when we
-   handle the exception, we do it in the context of the outer exception
-   rather than starting a new one.
-
-   The only caveat is that if the outer eax hasn't been
-   restored yet (ie, it's still on stack), we need to insert
-   its value into the SAVE_ALL state before going on, since
-   it's usermode state which we eventually need to restore.
- */
-ENTRY(xen_iret_crit_fixup)
-	/*
-	   Paranoia: Make sure we're really coming from kernel space.
-	   One could imagine a case where userspace jumps into the
-	   critical range address, but just before the CPU delivers a GP,
-	   it decides to deliver an interrupt instead.  Unlikely?
-	   Definitely.  Easy to avoid?  Yes.  The Intel documents
-	   explicitly say that the reported EIP for a bad jump is the
-	   jump instruction itself, not the destination, but some virtual
-	   environments get this wrong.
-	 */
-	movl PT_CS(%esp), %ecx
-	andl $SEGMENT_RPL_MASK, %ecx
-	cmpl $USER_RPL, %ecx
-	je 2f
-
-	lea PT_ORIG_EAX(%esp), %esi
-	lea PT_EFLAGS(%esp), %edi
-
-	/* If eip is before iret_restore_end then stack
-	   hasn't been restored yet. */
-	cmp $iret_restore_end, %eax
-	jae 1f
-
-	movl 0+4(%edi),%eax		/* copy EAX (just above top of frame) */
-	movl %eax, PT_EAX(%esp)
-
-	lea ESP_OFFSET(%edi),%edi	/* move dest up over saved regs */
-
-	/* set up the copy */
-1:	std
-	mov $PT_EIP / 4, %ecx		/* saved regs up to orig_eax */
-	rep movsl
-	cld
-
-	lea 4(%edi),%esp		/* point esp to new frame */
-2:	jmp xen_do_upcall
-
-
-/*
-	Force an event check by making a hypercall,
-	but preserve regs before making the call.
- */
-check_events:
-	push %eax
-	push %ecx
-	push %edx
-	call force_evtchn_callback
-	pop %edx
-	pop %ecx
-	pop %eax
-	ret
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
new file mode 100644
index 00000000000..2497a30f41d
--- /dev/null
+++ b/arch/x86/xen/xen-asm_32.S
@@ -0,0 +1,305 @@
+/*
+	Asm versions of Xen pv-ops, suitable for either direct use or inlining.
+	The inline versions are the same as the direct-use versions, with the
+	pre- and post-amble chopped off.
+
+	This code is encoded for size rather than absolute efficiency,
+	with a view to being able to inline as much as possible.
+
+	We only bother with direct forms (ie, vcpu in pda) of the operations
+	here; the indirect forms are better handled in C, since they're
+	generally too large to inline anyway.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/percpu.h>
+#include <asm/processor-flags.h>
+#include <asm/segment.h>
+
+#include <xen/interface/xen.h>
+
+#define RELOC(x, v)	.globl x##_reloc; x##_reloc=v
+#define ENDPATCH(x)	.globl x##_end; x##_end=.
+
+/* Pseudo-flag used for virtual NMI, which we don't implement yet */
+#define XEN_EFLAGS_NMI	0x80000000
+
+/*
+	Enable events.  This clears the event mask and tests the pending
+	event status with one and operation.  If there are pending
+	events, then enter the hypervisor to get them handled.
+ */
+ENTRY(xen_irq_enable_direct)
+	/* Unmask events */
+	movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
+
+	/* Preempt here doesn't matter because that will deal with
+	   any pending interrupts.  The pending check may end up being
+	   run on the wrong CPU, but that doesn't hurt. */
+
+	/* Test for pending */
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
+	jz 1f
+
+2:	call check_events
+1:
+ENDPATCH(xen_irq_enable_direct)
+	ret
+	ENDPROC(xen_irq_enable_direct)
+	RELOC(xen_irq_enable_direct, 2b+1)
+
+
+/*
+	Disabling events is simply a matter of making the event mask
+	non-zero.
+ */
+ENTRY(xen_irq_disable_direct)
+	movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
+ENDPATCH(xen_irq_disable_direct)
+	ret
+	ENDPROC(xen_irq_disable_direct)
+	RELOC(xen_irq_disable_direct, 0)
+
+/*
+	(xen_)save_fl is used to get the current interrupt enable status.
+	Callers expect the status to be in X86_EFLAGS_IF, and other bits
+	may be set in the return value.  We take advantage of this by
+	making sure that X86_EFLAGS_IF has the right value (and other bits
+	in that byte are 0), but other bits in the return value are
+	undefined.  We need to toggle the state of the bit, because
+	Xen and x86 use opposite senses (mask vs enable).
+ */
+ENTRY(xen_save_fl_direct)
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
+	setz %ah
+	addb %ah,%ah
+ENDPATCH(xen_save_fl_direct)
+	ret
+	ENDPROC(xen_save_fl_direct)
+	RELOC(xen_save_fl_direct, 0)
+
+
+/*
+	In principle the caller should be passing us a value return
+	from xen_save_fl_direct, but for robustness sake we test only
+	the X86_EFLAGS_IF flag rather than the whole byte. After
+	setting the interrupt mask state, it checks for unmasked
+	pending events and enters the hypervisor to get them delivered
+	if so.
+ */
+ENTRY(xen_restore_fl_direct)
+	testb $X86_EFLAGS_IF>>8, %ah
+	setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
+	/* Preempt here doesn't matter because that will deal with
+	   any pending interrupts.  The pending check may end up being
+	   run on the wrong CPU, but that doesn't hurt. */
+
+	/* check for unmasked and pending */
+	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
+	jz 1f
+2:	call check_events
+1:
+ENDPATCH(xen_restore_fl_direct)
+	ret
+	ENDPROC(xen_restore_fl_direct)
+	RELOC(xen_restore_fl_direct, 2b+1)
+
+/*
+	We can't use sysexit directly, because we're not running in ring0.
+	But we can easily fake it up using iret.  Assuming xen_sysexit
+	is jumped to with a standard stack frame, we can just strip it
+	back to a standard iret frame and use iret.
+ */
+ENTRY(xen_sysexit)
+	movl PT_EAX(%esp), %eax			/* Shouldn't be necessary? */
+	orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
+	lea PT_EIP(%esp), %esp
+
+	jmp xen_iret
+ENDPROC(xen_sysexit)
+
+/*
+	This is run where a normal iret would be run, with the same stack setup:
+	      8: eflags
+	      4: cs
+	esp-> 0: eip
+
+	This attempts to make sure that any pending events are dealt
+	with on return to usermode, but there is a small window in
+	which an event can happen just before entering usermode.  If
+	the nested interrupt ends up setting one of the TIF_WORK_MASK
+	pending work flags, they will not be tested again before
+	returning to usermode. This means that a process can end up
+	with pending work, which will be unprocessed until the process
+	enters and leaves the kernel again, which could be an
+	unbounded amount of time.  This means that a pending signal or
+	reschedule event could be indefinitely delayed.
+
+	The fix is to notice a nested interrupt in the critical
+	window, and if one occurs, then fold the nested interrupt into
+	the current interrupt stack frame, and re-process it
+	iteratively rather than recursively.  This means that it will
+	exit via the normal path, and all pending work will be dealt
+	with appropriately.
+
+	Because the nested interrupt handler needs to deal with the
+	current stack state in whatever form its in, we keep things
+	simple by only using a single register which is pushed/popped
+	on the stack.
+ */
+ENTRY(xen_iret)
+	/* test eflags for special cases */
+	testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
+	jnz hyper_iret
+
+	push %eax
+	ESP_OFFSET=4	# bytes pushed onto stack
+
+	/* Store vcpu_info pointer for easy access.  Do it this
+	   way to avoid having to reload %fs */
+#ifdef CONFIG_SMP
+	GET_THREAD_INFO(%eax)
+	movl TI_cpu(%eax),%eax
+	movl __per_cpu_offset(,%eax,4),%eax
+	mov per_cpu__xen_vcpu(%eax),%eax
+#else
+	movl per_cpu__xen_vcpu, %eax
+#endif
+
+	/* check IF state we're restoring */
+	testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
+
+	/* Maybe enable events.  Once this happens we could get a
+	   recursive event, so the critical region starts immediately
+	   afterwards.  However, if that happens we don't end up
+	   resuming the code, so we don't have to be worried about
+	   being preempted to another CPU. */
+	setz XEN_vcpu_info_mask(%eax)
+xen_iret_start_crit:
+
+	/* check for unmasked and pending */
+	cmpw $0x0001, XEN_vcpu_info_pending(%eax)
+
+	/* If there's something pending, mask events again so we
+	   can jump back into xen_hypervisor_callback */
+	sete XEN_vcpu_info_mask(%eax)
+
+	popl %eax
+
+	/* From this point on the registers are restored and the stack
+	   updated, so we don't need to worry about it if we're preempted */
+iret_restore_end:
+
+	/* Jump to hypervisor_callback after fixing up the stack.
+	   Events are masked, so jumping out of the critical
+	   region is OK. */
+	je xen_hypervisor_callback
+
+1:	iret
+xen_iret_end_crit:
+.section __ex_table,"a"
+	.align 4
+	.long 1b,iret_exc
+.previous
+
+hyper_iret:
+	/* put this out of line since its very rarely used */
+	jmp hypercall_page + __HYPERVISOR_iret * 32
+
+	.globl xen_iret_start_crit, xen_iret_end_crit
+
+/*
+   This is called by xen_hypervisor_callback in entry.S when it sees
+   that the EIP at the time of interrupt was between xen_iret_start_crit
+   and xen_iret_end_crit.  We're passed the EIP in %eax so we can do
+   a more refined determination of what to do.
+
+   The stack format at this point is:
+	----------------
+	 ss		: (ss/esp may be present if we came from usermode)
+	 esp		:
+	 eflags		}  outer exception info
+	 cs		}
+	 eip		}
+	---------------- <- edi (copy dest)
+	 eax		:  outer eax if it hasn't been restored
+	----------------
+	 eflags		}  nested exception info
+	 cs		}   (no ss/esp because we're nested
+	 eip		}    from the same ring)
+	 orig_eax	}<- esi (copy src)
+	 - - - - - - - -
+	 fs		}
+	 es		}
+	 ds		}  SAVE_ALL state
+	 eax		}
+	  :		:
+	 ebx		}<- esp
+	----------------
+
+   In order to deliver the nested exception properly, we need to shift
+   everything from the return addr up to the error code so it
+   sits just under the outer exception info.  This means that when we
+   handle the exception, we do it in the context of the outer exception
+   rather than starting a new one.
+
+   The only caveat is that if the outer eax hasn't been
+   restored yet (ie, it's still on stack), we need to insert
+   its value into the SAVE_ALL state before going on, since
+   it's usermode state which we eventually need to restore.
+ */
+ENTRY(xen_iret_crit_fixup)
+	/*
+	   Paranoia: Make sure we're really coming from kernel space.
+	   One could imagine a case where userspace jumps into the
+	   critical range address, but just before the CPU delivers a GP,
+	   it decides to deliver an interrupt instead.  Unlikely?
+	   Definitely.  Easy to avoid?  Yes.  The Intel documents
+	   explicitly say that the reported EIP for a bad jump is the
+	   jump instruction itself, not the destination, but some virtual
+	   environments get this wrong.
+	 */
+	movl PT_CS(%esp), %ecx
+	andl $SEGMENT_RPL_MASK, %ecx
+	cmpl $USER_RPL, %ecx
+	je 2f
+
+	lea PT_ORIG_EAX(%esp), %esi
+	lea PT_EFLAGS(%esp), %edi
+
+	/* If eip is before iret_restore_end then stack
+	   hasn't been restored yet. */
+	cmp $iret_restore_end, %eax
+	jae 1f
+
+	movl 0+4(%edi),%eax		/* copy EAX (just above top of frame) */
+	movl %eax, PT_EAX(%esp)
+
+	lea ESP_OFFSET(%edi),%edi	/* move dest up over saved regs */
+
+	/* set up the copy */
+1:	std
+	mov $PT_EIP / 4, %ecx		/* saved regs up to orig_eax */
+	rep movsl
+	cld
+
+	lea 4(%edi),%esp		/* point esp to new frame */
+2:	jmp xen_do_upcall
+
+
+/*
+	Force an event check by making a hypercall,
+	but preserve regs before making the call.
+ */
+check_events:
+	push %eax
+	push %ecx
+	push %edx
+	call force_evtchn_callback
+	pop %edx
+	pop %ecx
+	pop %eax
+	ret
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
new file mode 100644
index 00000000000..4ec10827370
--- /dev/null
+++ b/arch/x86/xen/xen-asm_64.S
@@ -0,0 +1,141 @@
+/*
+	Asm versions of Xen pv-ops, suitable for either direct use or inlining.
+	The inline versions are the same as the direct-use versions, with the
+	pre- and post-amble chopped off.
+
+	This code is encoded for size rather than absolute efficiency,
+	with a view to being able to inline as much as possible.
+
+	We only bother with direct forms (ie, vcpu in pda) of the operations
+	here; the indirect forms are better handled in C, since they're
+	generally too large to inline anyway.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/processor-flags.h>
+
+#include <xen/interface/xen.h>
+
+#define RELOC(x, v)	.globl x##_reloc; x##_reloc=v
+#define ENDPATCH(x)	.globl x##_end; x##_end=.
+
+/* Pseudo-flag used for virtual NMI, which we don't implement yet */
+#define XEN_EFLAGS_NMI	0x80000000
+
+#if 0
+#include <asm/percpu.h>
+
+/*
+	Enable events.  This clears the event mask and tests the pending
+	event status with one and operation.  If there are pending
+	events, then enter the hypervisor to get them handled.
+ */
+ENTRY(xen_irq_enable_direct)
+	/* Unmask events */
+	movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+
+	/* Preempt here doesn't matter because that will deal with
+	   any pending interrupts.  The pending check may end up being
+	   run on the wrong CPU, but that doesn't hurt. */
+
+	/* Test for pending */
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
+	jz 1f
+
+2:	call check_events
+1:
+ENDPATCH(xen_irq_enable_direct)
+	ret
+	ENDPROC(xen_irq_enable_direct)
+	RELOC(xen_irq_enable_direct, 2b+1)
+
+/*
+	Disabling events is simply a matter of making the event mask
+	non-zero.
+ */
+ENTRY(xen_irq_disable_direct)
+	movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+ENDPATCH(xen_irq_disable_direct)
+	ret
+	ENDPROC(xen_irq_disable_direct)
+	RELOC(xen_irq_disable_direct, 0)
+
+/*
+	(xen_)save_fl is used to get the current interrupt enable status.
+	Callers expect the status to be in X86_EFLAGS_IF, and other bits
+	may be set in the return value.  We take advantage of this by
+	making sure that X86_EFLAGS_IF has the right value (and other bits
+	in that byte are 0), but other bits in the return value are
+	undefined.  We need to toggle the state of the bit, because
+	Xen and x86 use opposite senses (mask vs enable).
+ */
+ENTRY(xen_save_fl_direct)
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	setz %ah
+	addb %ah,%ah
+ENDPATCH(xen_save_fl_direct)
+	ret
+	ENDPROC(xen_save_fl_direct)
+	RELOC(xen_save_fl_direct, 0)
+
+/*
+	In principle the caller should be passing us a value return
+	from xen_save_fl_direct, but for robustness sake we test only
+	the X86_EFLAGS_IF flag rather than the whole byte. After
+	setting the interrupt mask state, it checks for unmasked
+	pending events and enters the hypervisor to get them delivered
+	if so.
+ */
+ENTRY(xen_restore_fl_direct)
+	testb $X86_EFLAGS_IF>>8, %ah
+	setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	/* Preempt here doesn't matter because that will deal with
+	   any pending interrupts.  The pending check may end up being
+	   run on the wrong CPU, but that doesn't hurt. */
+
+	/* check for unmasked and pending */
+	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
+	jz 1f
+2:	call check_events
+1:
+ENDPATCH(xen_restore_fl_direct)
+	ret
+	ENDPROC(xen_restore_fl_direct)
+	RELOC(xen_restore_fl_direct, 2b+1)
+
+
+/*
+	Force an event check by making a hypercall,
+	but preserve regs before making the call.
+ */
+check_events:
+	push %rax
+	push %rcx
+	push %rdx
+	push %rsi
+	push %rdi
+	push %r8
+	push %r9
+	push %r10
+	push %r11
+	call force_evtchn_callback
+	pop %r11
+	pop %r10
+	pop %r9
+	pop %r8
+	pop %rdi
+	pop %rsi
+	pop %rdx
+	pop %rcx
+	pop %rax
+	ret
+#endif
+
+ENTRY(xen_iret)
+	pushq $0
+	jmp hypercall_page + __HYPERVISOR_iret * 32
+
+ENTRY(xen_sysexit)
+	ud2a
-- 
cgit v1.2.3


From 15664f968a95d8fbf4a0d7b462fcc20f88906bb3 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:47 -0700
Subject: xen64: use set_fixmap for shared_info structure

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f3f11acf785..dbe3549fad4 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -902,18 +902,11 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
 void xen_setup_shared_info(void)
 {
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-		unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
-
-		/*
-		 * Create a mapping for the shared info page.
-		 * Should be set_fixmap(), but shared_info is a machine
-		 * address with no corresponding pseudo-phys address.
-		 */
-		set_pte_mfn(addr,
-			    PFN_DOWN(xen_start_info->shared_info),
-			    PAGE_KERNEL);
-
-		HYPERVISOR_shared_info = (struct shared_info *)addr;
+		set_fixmap(FIX_PARAVIRT_BOOTMAP,
+			   xen_start_info->shared_info);
+
+		HYPERVISOR_shared_info =
+			(struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
 	} else
 		HYPERVISOR_shared_info =
 			(struct shared_info *)__va(xen_start_info->shared_info);
@@ -1050,8 +1043,13 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
 #ifdef CONFIG_X86_F00F_BUG
 	case FIX_F00F_IDT:
 #endif
+#ifdef CONFIG_X86_32
 	case FIX_WP_TEST:
 	case FIX_VDSO:
+	case FIX_KMAP_BEGIN ... FIX_KMAP_END:
+#else
+	case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+#endif
 #ifdef CONFIG_X86_LOCAL_APIC
 	case FIX_APIC_BASE:	/* maps dummy local APIC */
 #endif
-- 
cgit v1.2.3


From 7d087b68d6ddb2398fb7f6e45990b7248de640ef Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:48 -0700
Subject: xen: cpu_detect is 32-bit only

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index dbe3549fad4..2b7bea3bb6f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1365,12 +1365,12 @@ asmlinkage void __init xen_start_kernel(void)
 	/* set the limit of our address space */
 	xen_reserve_top();
 
+#ifdef CONFIG_X86_32
 	/* set up basic CPUID stuff */
 	cpu_detect(&new_cpu_data);
-#ifdef CONFIG_X86_32
 	new_cpu_data.hard_math = 1;
-#endif
 	new_cpu_data.x86_capability[0] = cpuid_edx(1);
+#endif
 
 	/* Poke various useful things into boot_params */
 	boot_params.hdr.type_of_loader = (9 << 4) | 0;
-- 
cgit v1.2.3


From 3d75e1b8ef1567348ceba93d4666a1c7c2333583 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:49 -0700
Subject: xen64: add hypervisor callbacks for events, etc

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 98 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index ae63e584c34..7cc2de79614 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1312,3 +1312,101 @@ KPROBE_ENTRY(ignore_sysret)
 	sysret
 	CFI_ENDPROC
 ENDPROC(ignore_sysret)
+
+#ifdef CONFIG_XEN
+ENTRY(xen_hypervisor_callback)
+	zeroentry xen_do_hypervisor_callback
+END(xen_hypervisor_callback)
+
+/*
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until we've done all processing. HOWEVER, we must enable events before
+# popping the stack frame (can't be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so we'd
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+*/
+ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
+	CFI_STARTPROC
+/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
+   see the correct pointer to the pt_regs */
+	movq %rdi, %rsp            # we don't return, adjust the stack frame
+	CFI_ENDPROC
+	CFI_DEFAULT_STACK
+11:	incl %gs:pda_irqcount
+	movq %rsp,%rbp
+	CFI_DEF_CFA_REGISTER rbp
+	cmovzq %gs:pda_irqstackptr,%rsp
+	pushq %rbp			# backlink for old unwinder
+	call xen_evtchn_do_upcall
+	popq %rsp
+	CFI_DEF_CFA_REGISTER rsp
+	decl %gs:pda_irqcount
+	jmp  error_exit
+	CFI_ENDPROC
+END(do_hypervisor_callback)
+
+/*
+# Hypervisor uses this for application faults while it executes.
+# We get here for two reasons:
+#  1. Fault while reloading DS, ES, FS or GS
+#  2. Fault while executing IRET
+# Category 1 we do not need to fix up as Xen has already reloaded all segment
+# registers that could be reloaded and zeroed the others.
+# Category 2 we fix up by killing the current process. We cannot use the
+# normal Linux return path in this case because if we use the IRET hypercall
+# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
+# We distinguish between categories by comparing each saved segment register
+# with its current contents: any discrepancy means we in category 1.
+*/
+ENTRY(xen_failsafe_callback)
+#if 1
+	ud2a
+#else
+	_frame (RIP-0x30)
+	CFI_REL_OFFSET rcx, 0
+	CFI_REL_OFFSET r11, 8
+	movw %ds,%cx
+	cmpw %cx,0x10(%rsp)
+	CFI_REMEMBER_STATE
+	jne 1f
+	movw %es,%cx
+	cmpw %cx,0x18(%rsp)
+	jne 1f
+	movw %fs,%cx
+	cmpw %cx,0x20(%rsp)
+	jne 1f
+	movw %gs,%cx
+	cmpw %cx,0x28(%rsp)
+	jne 1f
+	/* All segments match their saved values => Category 2 (Bad IRET). */
+	movq (%rsp),%rcx
+	CFI_RESTORE rcx
+	movq 8(%rsp),%r11
+	CFI_RESTORE r11
+	addq $0x30,%rsp
+	CFI_ADJUST_CFA_OFFSET -0x30
+	movq $11,%rdi	/* SIGSEGV */
+	jmp do_exit
+	CFI_RESTORE_STATE
+1:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
+	movq (%rsp),%rcx
+	CFI_RESTORE rcx
+	movq 8(%rsp),%r11
+	CFI_RESTORE r11
+	addq $0x30,%rsp
+	CFI_ADJUST_CFA_OFFSET -0x30
+	pushq $0
+	CFI_ADJUST_CFA_OFFSET 8
+	SAVE_ALL
+	jmp error_exit
+	CFI_ENDPROC
+#endif
+END(xen_failsafe_callback)
+
+#endif /* CONFIG_XEN */
-- 
cgit v1.2.3


From 084a2a4e7656209ea93aac9778defa03213ca31d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:50 -0700
Subject: xen64: early mapping setup

Set up the initial pagetables to map the kernel mapping into the
physical mapping space.  This makes __va() usable, since it requires
physical mappings.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 192 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 176 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2b7bea3bb6f..a991ee7ade9 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -33,6 +33,7 @@
 #include <xen/interface/sched.h>
 #include <xen/features.h>
 #include <xen/page.h>
+#include <xen/hvc-console.h>
 
 #include <asm/paravirt.h>
 #include <asm/page.h>
@@ -1294,6 +1295,157 @@ static void __init xen_reserve_top(void)
 #endif	/* CONFIG_X86_32 */
 }
 
+#ifdef CONFIG_X86_64
+/*
+ * Like __va(), but returns address in the kernel mapping (which is
+ * all we have until the physical memory mapping has been set up.
+ */
+static void *__ka(phys_addr_t paddr)
+{
+	return (void *)(paddr + __START_KERNEL_map);
+}
+
+/* Convert a machine address to physical address */
+static unsigned long m2p(phys_addr_t maddr)
+{
+	phys_addr_t paddr;
+
+	maddr &= PTE_MASK;
+	paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
+
+	return paddr;
+}
+
+/* Convert a machine address to kernel virtual */
+static void *m2v(phys_addr_t maddr)
+{
+	return __ka(m2p(maddr));
+}
+
+static void walk(pgd_t *pgd, unsigned long addr)
+{
+	unsigned l4idx = pgd_index(addr);
+	unsigned l3idx = pud_index(addr);
+	unsigned l2idx = pmd_index(addr);
+	unsigned l1idx = pte_index(addr);
+	pgd_t l4;
+	pud_t l3;
+	pmd_t l2;
+	pte_t l1;
+
+	xen_raw_printk("walk %p, %lx -> %d %d %d %d\n",
+		       pgd, addr, l4idx, l3idx, l2idx, l1idx);
+
+	l4 = pgd[l4idx];
+	xen_raw_printk("  l4: %016lx\n", l4.pgd);
+	xen_raw_printk("      %016lx\n", pgd_val(l4));
+
+	l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx];
+	xen_raw_printk("  l3: %016lx\n", l3.pud);
+	xen_raw_printk("      %016lx\n", pud_val(l3));
+
+	l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx];
+	xen_raw_printk("  l2: %016lx\n", l2.pmd);
+	xen_raw_printk("      %016lx\n", pmd_val(l2));
+
+	l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx];
+	xen_raw_printk("  l1: %016lx\n", l1.pte);
+	xen_raw_printk("      %016lx\n", pte_val(l1));
+}
+
+static void set_page_prot(void *addr, pgprot_t prot)
+{
+	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
+	pte_t pte = pfn_pte(pfn, prot);
+
+	xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016x pte=%016x\n",
+		       addr, pfn, get_phys_to_machine(pfn),
+		       pgprot_val(prot), pte.pte);
+
+	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
+		BUG();
+}
+
+static void convert_pfn_mfn(void *v)
+{
+	pte_t *pte = v;
+	int i;
+
+	/* All levels are converted the same way, so just treat them
+	   as ptes. */
+	for(i = 0; i < PTRS_PER_PTE; i++)
+		pte[i] = xen_make_pte(pte[i].pte);
+}
+
+/*
+ * Set up the inital kernel pagetable.
+ *
+ * We can construct this by grafting the Xen provided pagetable into
+ * head_64.S's preconstructed pagetables.  We copy the Xen L2's into
+ * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt.  This
+ * means that only the kernel has a physical mapping to start with -
+ * but that's enough to get __va working.  We need to fill in the rest
+ * of the physical mapping once some sort of allocator has been set
+ * up.
+ */
+static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd)
+{
+	pud_t *l3;
+	pmd_t *l2;
+
+	/* Zap identity mapping */
+	init_level4_pgt[0] = __pgd(0);
+
+	/* Pre-constructed entries are in pfn, so convert to mfn */
+	convert_pfn_mfn(init_level4_pgt);
+	convert_pfn_mfn(level3_ident_pgt);
+	convert_pfn_mfn(level3_kernel_pgt);
+
+	l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
+	l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
+
+	memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+	memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+
+	l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
+	l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
+	memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+
+	/* Make pagetable pieces RO */
+	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
+
+	/* Pin down new L4 */
+	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(init_level4_pgt)));
+
+	/* Unpin Xen-provided one */
+	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+
+	/* Switch over */
+	pgd = init_level4_pgt;
+	xen_write_cr3(__pa(pgd));
+
+	max_pfn_mapped = PFN_DOWN(__pa(pgd) +
+				  xen_start_info->nr_pt_frames*PAGE_SIZE +
+				  512*1024);
+
+	return pgd;
+}
+#else
+static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd)
+{
+	init_pg_tables_start = __pa(pgd);
+	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
+	max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
+
+	return pgd;
+}
+#endif	/* CONFIG_X86_64 */
+
 /* First C function to be called on Xen boot */
 asmlinkage void __init xen_start_kernel(void)
 {
@@ -1336,32 +1488,29 @@ asmlinkage void __init xen_start_kernel(void)
 
 	pgd = (pgd_t *)xen_start_info->pt_base;
 
-#ifdef CONFIG_X86_32
-	init_pg_tables_start = __pa(pgd);
-	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
-	max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT;
-#endif
+	/* Prevent unwanted bits from being set in PTEs. */
+	__supported_pte_mask &= ~_PAGE_GLOBAL;
+	if (!is_initial_xendomain())
+		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
+
+	/* Don't do the full vcpu_info placement stuff until we have a
+	   possible map and a non-dummy shared_info. */
+	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
+
+	xen_raw_console_write("mapping kernel into physical memory\n");
+	pgd = xen_setup_kernel_pagetable(pgd);
 
-	init_mm.pgd = pgd; /* use the Xen pagetables to start */
+	init_mm.pgd = pgd;
 
 	/* keep using Xen gdt for now; no urgent need to change it */
 
 	x86_write_percpu(xen_cr3, __pa(pgd));
 	x86_write_percpu(xen_current_cr3, __pa(pgd));
 
-	/* Don't do the full vcpu_info placement stuff until we have a
-	   possible map and a non-dummy shared_info. */
-	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
-
 	pv_info.kernel_rpl = 1;
 	if (xen_feature(XENFEAT_supervisor_mode_kernel))
 		pv_info.kernel_rpl = 0;
 
-	/* Prevent unwanted bits from being set in PTEs. */
-	__supported_pte_mask &= ~_PAGE_GLOBAL;
-	if (!is_initial_xendomain())
-		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
-
 	/* set the limit of our address space */
 	xen_reserve_top();
 
@@ -1384,10 +1533,21 @@ asmlinkage void __init xen_start_kernel(void)
 		add_preferred_console("hvc", 0, NULL);
 	}
 
+	xen_raw_console_write("about to get started...\n");
+
+#if 0
+	xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n",
+		       &boot_params, __pa_symbol(&boot_params),
+		       __va(__pa_symbol(&boot_params)));
+
+	walk(pgd, &boot_params);
+	walk(pgd, __va(__pa(&boot_params)));
+#endif
+
 	/* Start the world */
 #ifdef CONFIG_X86_32
 	i386_start_kernel();
 #else
-	x86_64_start_kernel((char *)&boot_params);
+	x86_64_start_reservations((char *)__pa_symbol(&boot_params));
 #endif
 }
-- 
cgit v1.2.3


From 22911b3f1cf5431058e56b1727e8ef77be5e0ac9 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:51 -0700
Subject: xen64: 64-bit starts using set_pte from very early

It also doesn't need the 32-bit hack version of set_pte for initial
pagetable construction, so just make it use the real thing.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a991ee7ade9..392450787aa 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1194,7 +1194,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.kmap_atomic_pte = xen_kmap_atomic_pte,
 #endif
 
+#ifdef CONFIG_X86_64
+	.set_pte = xen_set_pte,
+#else
 	.set_pte = xen_set_pte_init,
+#endif
 	.set_pte_at = xen_set_pte_at,
 	.set_pmd = xen_set_pmd_hyper,
 
-- 
cgit v1.2.3


From d114e1981cc1a51131230993a082c27c79ab370a Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:52 -0700
Subject: xen64: map an initial chunk of physical memory

Early in boot, map a chunk of extra physical memory for use later on.
We need a pool of mapped pages to allocate further pages to construct
pagetables mapping all physical memory.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 79 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 69 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 392450787aa..e9e3bafe48c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1381,6 +1381,61 @@ static void convert_pfn_mfn(void *v)
 		pte[i] = xen_make_pte(pte[i].pte);
 }
 
+/*
+ * Identity map, in addition to plain kernel map.  This needs to be
+ * large enough to allocate page table pages to allocate the rest.
+ * Each page can map 2MB.
+ */
+static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
+
+static __init void xen_map_identity_early(unsigned long max_pfn)
+{
+	unsigned pmdidx, pteidx;
+	unsigned ident_pte;
+	unsigned long pfn;
+
+	ident_pte = 0;
+	pfn = 0;
+	for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
+		pte_t *pte_page;
+
+		BUG_ON(level2_ident_pgt[pmdidx].pmd != level2_kernel_pgt[pmdidx].pmd);
+
+		/* Reuse or allocate a page of ptes */
+		if (pmd_present(level2_ident_pgt[pmdidx]))
+			pte_page = m2v(level2_ident_pgt[pmdidx].pmd);
+		else {
+			/* Check for free pte pages */
+			if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
+				break;
+
+			pte_page = &level1_ident_pgt[ident_pte];
+			ident_pte += PTRS_PER_PTE;
+
+			/* Install new l1 in l2(s) */
+			level2_ident_pgt[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
+			level2_kernel_pgt[pmdidx] = level2_ident_pgt[pmdidx];
+		}
+
+		/* Install mappings */
+		for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
+			pte_t pte;
+
+			if (pfn > max_pfn_mapped)
+				max_pfn_mapped = pfn;
+
+			if (!pte_none(pte_page[pteidx]))
+				continue;
+
+			pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
+			pte_page[pteidx] = pte;
+		}
+	}
+
+	for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
+		set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
+}
+
 /*
  * Set up the inital kernel pagetable.
  *
@@ -1392,7 +1447,7 @@ static void convert_pfn_mfn(void *v)
  * of the physical mapping once some sort of allocator has been set
  * up.
  */
-static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd)
+static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
 	pud_t *l3;
 	pmd_t *l2;
@@ -1415,6 +1470,9 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd)
 	l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
 	memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
 
+	/* Set up identity map */
+	xen_map_identity_early(max_pfn);
+
 	/* Make pagetable pieces RO */
 	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
@@ -1424,7 +1482,7 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd)
 	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
 
 	/* Pin down new L4 */
-	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(init_level4_pgt)));
+	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa_symbol(init_level4_pgt)));
 
 	/* Unpin Xen-provided one */
 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
@@ -1433,19 +1491,23 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd)
 	pgd = init_level4_pgt;
 	xen_write_cr3(__pa(pgd));
 
-	max_pfn_mapped = PFN_DOWN(__pa(pgd) +
-				  xen_start_info->nr_pt_frames*PAGE_SIZE +
-				  512*1024);
+	reserve_early(__pa(xen_start_info->pt_base),
+		      __pa(xen_start_info->pt_base +
+			   xen_start_info->nr_pt_frames * PAGE_SIZE),
+		      "XEN PAGETABLES");
 
 	return pgd;
 }
 #else
-static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd)
+static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
 	init_pg_tables_start = __pa(pgd);
 	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
 	max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
 
+	x86_write_percpu(xen_cr3, __pa(pgd));
+	x86_write_percpu(xen_current_cr3, __pa(pgd));
+
 	return pgd;
 }
 #endif	/* CONFIG_X86_64 */
@@ -1502,15 +1564,12 @@ asmlinkage void __init xen_start_kernel(void)
 	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
 
 	xen_raw_console_write("mapping kernel into physical memory\n");
-	pgd = xen_setup_kernel_pagetable(pgd);
+	pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
 
 	init_mm.pgd = pgd;
 
 	/* keep using Xen gdt for now; no urgent need to change it */
 
-	x86_write_percpu(xen_cr3, __pa(pgd));
-	x86_write_percpu(xen_current_cr3, __pa(pgd));
-
 	pv_info.kernel_rpl = 1;
 	if (xen_feature(XENFEAT_supervisor_mode_kernel))
 		pv_info.kernel_rpl = 0;
-- 
cgit v1.2.3


From 39dbc5bd345ebf93e066dde7f8e29467eb61b42e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:53 -0700
Subject: xen32: create initial mappings like 64-bit

Rearrange the pagetable initialization to share code with the 64-bit
kernel.  Rather than deferring anything to pagetable_setup_start, just
set up an initial pagetable in swapper_pg_dir early at startup, and
create an additional 8MB of physical memory mappings.  This matches
the native head_32.S mappings to a large degree, and allows the rest
of the pagetable setup to continue without much Xen vs. native
difference.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 130 +++++++++++++++++++----------------------------
 1 file changed, 52 insertions(+), 78 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e9e3bafe48c..19c12a6c731 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -854,50 +854,6 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
 
 static __init void xen_pagetable_setup_start(pgd_t *base)
 {
-#ifdef CONFIG_X86_32
-	pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
-	int i;
-
-	init_mm.pgd = base;
-	/*
-	 * copy top-level of Xen-supplied pagetable into place.  This
-	 * is a stand-in while we copy the pmd pages.
-	 */
-	memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
-
-	/*
-	 * For PAE, need to allocate new pmds, rather than
-	 * share Xen's, since Xen doesn't like pmd's being
-	 * shared between address spaces.
-	 */
-	for (i = 0; i < PTRS_PER_PGD; i++) {
-		if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
-			pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
-
-			memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
-			       PAGE_SIZE);
-
-			make_lowmem_page_readonly(pmd);
-
-			set_pgd(&base[i], __pgd(1 + __pa(pmd)));
-		} else
-			pgd_clear(&base[i]);
-	}
-
-	/* make sure zero_page is mapped RO so we can use it in pagetables */
-	make_lowmem_page_readonly(empty_zero_page);
-	make_lowmem_page_readonly(base);
-	/*
-	 * Switch to new pagetable.  This is done before
-	 * pagetable_init has done anything so that the new pages
-	 * added to the table can be prepared properly for Xen.
-	 */
-	xen_write_cr3(__pa(base));
-
-	/* Unpin initial Xen pagetable */
-	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
-			  PFN_DOWN(__pa(xen_start_info->pt_base)));
-#endif	/* CONFIG_X86_32 */
 }
 
 void xen_setup_shared_info(void)
@@ -936,12 +892,6 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
 	pv_mmu_ops.set_pte = xen_set_pte;
 
 	xen_setup_shared_info();
-
-#ifdef CONFIG_X86_32
-	/* Actually pin the pagetable down, but we can't set PG_pinned
-	   yet because the page structures don't exist yet. */
-	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
-#endif
 }
 
 static __init void xen_post_allocator_init(void)
@@ -1299,14 +1249,17 @@ static void __init xen_reserve_top(void)
 #endif	/* CONFIG_X86_32 */
 }
 
-#ifdef CONFIG_X86_64
 /*
  * Like __va(), but returns address in the kernel mapping (which is
  * all we have until the physical memory mapping has been set up.
  */
 static void *__ka(phys_addr_t paddr)
 {
+#ifdef CONFIG_X86_64
 	return (void *)(paddr + __START_KERNEL_map);
+#else
+	return __va(paddr);
+#endif
 }
 
 /* Convert a machine address to physical address */
@@ -1326,6 +1279,7 @@ static void *m2v(phys_addr_t maddr)
 	return __ka(m2p(maddr));
 }
 
+#ifdef CONFIG_X86_64
 static void walk(pgd_t *pgd, unsigned long addr)
 {
 	unsigned l4idx = pgd_index(addr);
@@ -1356,13 +1310,14 @@ static void walk(pgd_t *pgd, unsigned long addr)
 	xen_raw_printk("  l1: %016lx\n", l1.pte);
 	xen_raw_printk("      %016lx\n", pte_val(l1));
 }
+#endif
 
 static void set_page_prot(void *addr, pgprot_t prot)
 {
 	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
 	pte_t pte = pfn_pte(pfn, prot);
 
-	xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016x pte=%016x\n",
+	xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n",
 		       addr, pfn, get_phys_to_machine(pfn),
 		       pgprot_val(prot), pte.pte);
 
@@ -1370,17 +1325,6 @@ static void set_page_prot(void *addr, pgprot_t prot)
 		BUG();
 }
 
-static void convert_pfn_mfn(void *v)
-{
-	pte_t *pte = v;
-	int i;
-
-	/* All levels are converted the same way, so just treat them
-	   as ptes. */
-	for(i = 0; i < PTRS_PER_PTE; i++)
-		pte[i] = xen_make_pte(pte[i].pte);
-}
-
 /*
  * Identity map, in addition to plain kernel map.  This needs to be
  * large enough to allocate page table pages to allocate the rest.
@@ -1388,7 +1332,7 @@ static void convert_pfn_mfn(void *v)
  */
 static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
 
-static __init void xen_map_identity_early(unsigned long max_pfn)
+static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 {
 	unsigned pmdidx, pteidx;
 	unsigned ident_pte;
@@ -1399,11 +1343,9 @@ static __init void xen_map_identity_early(unsigned long max_pfn)
 	for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
 		pte_t *pte_page;
 
-		BUG_ON(level2_ident_pgt[pmdidx].pmd != level2_kernel_pgt[pmdidx].pmd);
-
 		/* Reuse or allocate a page of ptes */
-		if (pmd_present(level2_ident_pgt[pmdidx]))
-			pte_page = m2v(level2_ident_pgt[pmdidx].pmd);
+		if (pmd_present(pmd[pmdidx]))
+			pte_page = m2v(pmd[pmdidx].pmd);
 		else {
 			/* Check for free pte pages */
 			if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
@@ -1412,9 +1354,7 @@ static __init void xen_map_identity_early(unsigned long max_pfn)
 			pte_page = &level1_ident_pgt[ident_pte];
 			ident_pte += PTRS_PER_PTE;
 
-			/* Install new l1 in l2(s) */
-			level2_ident_pgt[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
-			level2_kernel_pgt[pmdidx] = level2_ident_pgt[pmdidx];
+			pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
 		}
 
 		/* Install mappings */
@@ -1434,6 +1374,20 @@ static __init void xen_map_identity_early(unsigned long max_pfn)
 
 	for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
 		set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
+
+	set_page_prot(pmd, PAGE_KERNEL_RO);
+}
+
+#ifdef CONFIG_X86_64
+static void convert_pfn_mfn(void *v)
+{
+	pte_t *pte = v;
+	int i;
+
+	/* All levels are converted the same way, so just treat them
+	   as ptes. */
+	for(i = 0; i < PTRS_PER_PTE; i++)
+		pte[i] = xen_make_pte(pte[i].pte);
 }
 
 /*
@@ -1471,18 +1425,18 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf
 	memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
 
 	/* Set up identity map */
-	xen_map_identity_early(max_pfn);
+	xen_map_identity_early(level2_ident_pgt, max_pfn);
 
 	/* Make pagetable pieces RO */
 	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
-	set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
 
 	/* Pin down new L4 */
-	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa_symbol(init_level4_pgt)));
+	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
+			  PFN_DOWN(__pa_symbol(init_level4_pgt)));
 
 	/* Unpin Xen-provided one */
 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
@@ -1498,17 +1452,37 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf
 
 	return pgd;
 }
-#else
+#else	/* !CONFIG_X86_64 */
+static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
+
 static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
+	pmd_t *kernel_pmd;
+
 	init_pg_tables_start = __pa(pgd);
 	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
 	max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
 
-	x86_write_percpu(xen_cr3, __pa(pgd));
-	x86_write_percpu(xen_current_cr3, __pa(pgd));
+	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
+	memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
 
-	return pgd;
+	xen_map_identity_early(level2_kernel_pgt, max_pfn);
+
+	memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+	set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
+			__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
+
+	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+	set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+	set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
+
+	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+
+	xen_write_cr3(__pa(swapper_pg_dir));
+
+	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
+
+	return swapper_pg_dir;
 }
 #endif	/* CONFIG_X86_64 */
 
-- 
cgit v1.2.3


From ebd879e397f6361727c36267a12d1650710e465a Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:54 -0700
Subject: xen: fix truncation of machine address

arbitrary_virt_to_machine can truncate a machine address if its above
4G.  Cast the problem away.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2579e70cdd0..05d7392a7a4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -186,7 +186,7 @@ xmaddr_t arbitrary_virt_to_machine(unsigned long address)
 
 	BUG_ON(pte == NULL);
 
-	return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset);
+	return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset);
 }
 
 void make_lowmem_page_readonly(void *vaddr)
-- 
cgit v1.2.3


From ce803e705f1cbdd2703e83061622089b5b4a5417 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:55 -0700
Subject: xen64: use arbitrary_virt_to_machine for xen_set_pmd

When building initial pagetables in 64-bit kernel the pud/pmd pointer may
be in ioremap/fixmap space, so we need to walk the pagetable to look up the
physical address.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/mmu.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 05d7392a7a4..a8f02327181 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -178,8 +178,9 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 	p2m_top[topidx][idx] = mfn;
 }
 
-xmaddr_t arbitrary_virt_to_machine(unsigned long address)
+xmaddr_t arbitrary_virt_to_machine(void *vaddr)
 {
+	unsigned long address = (unsigned long)vaddr;
 	unsigned int level;
 	pte_t *pte = lookup_address(address, &level);
 	unsigned offset = address & ~PAGE_MASK;
@@ -253,7 +254,8 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
 
 	xen_mc_batch();
 
-	u.ptr = virt_to_machine(ptr).maddr;
+	/* ptr may be ioremapped for 64-bit pagetable setup */
+	u.ptr = arbitrary_virt_to_machine(ptr).maddr;
 	u.val = pmd_val_ma(val);
 	extend_mmu_update(&u);
 
@@ -415,7 +417,8 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
 
 	xen_mc_batch();
 
-	u.ptr = virt_to_machine(ptr).maddr;
+	/* ptr may be ioremapped for 64-bit pagetable setup */
+	u.ptr = arbitrary_virt_to_machine(ptr).maddr;
 	u.val = pud_val_ma(val);
 	extend_mmu_update(&u);
 
-- 
cgit v1.2.3


From 4560a2947e32670fc6ede108c2b032c396180649 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:56 -0700
Subject: xen: set num_processors

Someone's got to do it.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/smp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 800bb2191e2..8310ca0ea37 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -155,8 +155,10 @@ static void __init xen_fill_possible_map(void)
 
 	for (i = 0; i < NR_CPUS; i++) {
 		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
-		if (rc >= 0)
+		if (rc >= 0) {
+			num_processors++;
 			cpu_set(i, cpu_possible_map);
+		}
 	}
 }
 
-- 
cgit v1.2.3


From 8745f8b0b914cf1d617ecc49726c24011858c74e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:57 -0700
Subject: xen64: defer setting pagetable alloc/release ops

We need to wait until the page structure is available to use the
proper pagetable page alloc/release operations, since they use struct
page to determine if a pagetable is pinned.

This happened to work in 32bit because nobody allocated new pagetable
pages in the interim between xen_pagetable_setup_done and
xen_post_allocator_init, but the 64-bit kenrel needs to allocate more
pagetable levels.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 19c12a6c731..da91404fc66 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -878,30 +878,29 @@ void xen_setup_shared_info(void)
 
 static __init void xen_pagetable_setup_done(pgd_t *base)
 {
-	/* This will work as long as patching hasn't happened yet
-	   (which it hasn't) */
-	pv_mmu_ops.alloc_pte = xen_alloc_pte;
-	pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
-	pv_mmu_ops.release_pte = xen_release_pte;
-	pv_mmu_ops.release_pmd = xen_release_pmd;
-#if PAGETABLE_LEVELS == 4
-	pv_mmu_ops.alloc_pud = xen_alloc_pud;
-	pv_mmu_ops.release_pud = xen_release_pud;
-#endif
-
-	pv_mmu_ops.set_pte = xen_set_pte;
-
 	xen_setup_shared_info();
 }
 
 static __init void xen_post_allocator_init(void)
 {
+	pv_mmu_ops.set_pte = xen_set_pte;
 	pv_mmu_ops.set_pmd = xen_set_pmd;
 	pv_mmu_ops.set_pud = xen_set_pud;
 #if PAGETABLE_LEVELS == 4
 	pv_mmu_ops.set_pgd = xen_set_pgd;
 #endif
 
+	/* This will work as long as patching hasn't happened yet
+	   (which it hasn't) */
+	pv_mmu_ops.alloc_pte = xen_alloc_pte;
+	pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
+	pv_mmu_ops.release_pte = xen_release_pte;
+	pv_mmu_ops.release_pmd = xen_release_pmd;
+#if PAGETABLE_LEVELS == 4
+	pv_mmu_ops.alloc_pud = xen_alloc_pud;
+	pv_mmu_ops.release_pud = xen_release_pud;
+#endif
+
 	xen_mark_init_mm_pinned();
 }
 
-- 
cgit v1.2.3


From 836fe2f291cb450a6193fa713878efe7d32bec6e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:06:58 -0700
Subject: xen: use set_pte_vaddr

Make Xen's set_pte_mfn() use set_pte_vaddr rather than copying it.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/mmu.c | 30 +-----------------------------
 1 file changed, 1 insertion(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index a8f02327181..eb31ed291b9 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -282,35 +282,7 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val)
  */
 void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-
-	pgd = swapper_pg_dir + pgd_index(vaddr);
-	if (pgd_none(*pgd)) {
-		BUG();
-		return;
-	}
-	pud = pud_offset(pgd, vaddr);
-	if (pud_none(*pud)) {
-		BUG();
-		return;
-	}
-	pmd = pmd_offset(pud, vaddr);
-	if (pmd_none(*pmd)) {
-		BUG();
-		return;
-	}
-	pte = pte_offset_kernel(pmd, vaddr);
-	/* <mfn,flags> stored as-is, to permit clearing entries */
-	xen_set_pte(pte, mfn_pte(mfn, flags));
-
-	/*
-	 * It's enough to flush this one mapping.
-	 * (PGE mappings get flushed as well)
-	 */
-	__flush_tlb_one(vaddr);
+	set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
 }
 
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
-- 
cgit v1.2.3


From e176d367d0cc8b8efd2e0960c9edf5d2fe7cd9f1 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@Rawhide-64.localdomain>
Date: Tue, 8 Jul 2008 15:06:59 -0700
Subject: xen64: xen_write_idt_entry() and cvt_gate_to_trap()

Changed to use the (to-be-)unified descriptor structs.

Signed-off-by: Eduardo Habkost <ehabkost@Rawhide-64.localdomain>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index da91404fc66..f5e96f7a4c5 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -401,23 +401,18 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 	preempt_enable();
 }
 
-static int cvt_gate_to_trap(int vector, u32 low, u32 high,
+static int cvt_gate_to_trap(int vector, const gate_desc *val,
 			    struct trap_info *info)
 {
-	u8 type, dpl;
-
-	type = (high >> 8) & 0x1f;
-	dpl = (high >> 13) & 3;
-
-	if (type != 0xf && type != 0xe)
+	if (val->type != 0xf && val->type != 0xe)
 		return 0;
 
 	info->vector = vector;
-	info->address = (high & 0xffff0000) | (low & 0x0000ffff);
-	info->cs = low >> 16;
-	info->flags = dpl;
+	info->address = gate_offset(*val);
+	info->cs = gate_segment(*val);
+	info->flags = val->dpl;
 	/* interrupt gates clear IF */
-	if (type == 0xe)
+	if (val->type == 0xe)
 		info->flags |= 4;
 
 	return 1;
@@ -444,11 +439,10 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
 
 	if (p >= start && (p + 8) <= end) {
 		struct trap_info info[2];
-		u32 *desc = (u32 *)g;
 
 		info[1].address = 0;
 
-		if (cvt_gate_to_trap(entrynum, desc[0], desc[1], &info[0]))
+		if (cvt_gate_to_trap(entrynum, g, &info[0]))
 			if (HYPERVISOR_set_trap_table(info))
 				BUG();
 	}
@@ -461,13 +455,13 @@ static void xen_convert_trap_info(const struct desc_ptr *desc,
 {
 	unsigned in, out, count;
 
-	count = (desc->size+1) / 8;
+	count = (desc->size+1) / sizeof(gate_desc);
 	BUG_ON(count > 256);
 
 	for (in = out = 0; in < count; in++) {
-		const u32 *entry = (u32 *)(desc->address + in * 8);
+		gate_desc *entry = (gate_desc*)(desc->address) + in;
 
-		if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out]))
+		if (cvt_gate_to_trap(in, entry, &traps[out]))
 			out++;
 	}
 	traps[out].address = 0;
-- 
cgit v1.2.3


From 997409d3d0bd6894f33e31ced251c0fdf523aa14 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:00 -0700
Subject: xen64: deal with extra words Xen pushes onto exception frames

Xen pushes two extra words containing the values of rcx and r11.  This
pvop hook copies the words back into their appropriate registers, and
cleans them off the stack.  This leaves the stack in native form, so
the normal handler can run unchanged.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c  | 2 +-
 arch/x86/xen/xen-asm_64.S | 5 +++++
 arch/x86/xen/xen-ops.h    | 2 ++
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f5e96f7a4c5..9d94483b3b5 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1091,7 +1091,7 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
 	.safe_halt = xen_safe_halt,
 	.halt = xen_halt,
 #ifdef CONFIG_X86_64
-	.adjust_exception_frame = paravirt_nop,
+	.adjust_exception_frame = xen_adjust_exception_frame,
 #endif
 };
 
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 4ec10827370..b147b495dae 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -133,6 +133,11 @@ check_events:
 	ret
 #endif
 
+ENTRY(xen_adjust_exception_frame)
+	mov 8+0(%rsp),%rcx
+	mov 8+8(%rsp),%r11
+	ret $16
+
 ENTRY(xen_iret)
 	pushq $0
 	jmp hypercall_page + __HYPERVISOR_iret * 32
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index aca4a7803e2..c4800a2c5a4 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -67,7 +67,9 @@ DECL_ASM(void, xen_irq_disable_direct, void);
 DECL_ASM(unsigned long, xen_save_fl_direct, void);
 DECL_ASM(void, xen_restore_fl_direct, unsigned long);
 
+/* These are not functions, and cannot be called normally */
 void xen_iret(void);
 void xen_sysexit(void);
+void xen_adjust_exception_frame(void);
 
 #endif /* XEN_OPS_H */
-- 
cgit v1.2.3


From 952d1d7055c8cbf95b4ad2f90be5ed37db8a48ee Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:01 -0700
Subject: xen64: add pvop for swapgs

swapgs is a no-op under Xen, because the hypervisor makes sure the
right version of %gs is current when switching between user and kernel
modes.  This means that the swapgs "implementation" can be inlined and
used when the stack is unsafe (usermode).  Unfortunately, it means
that disabling patching will result in a non-booting kernel...

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 9d94483b3b5..8b60982e457 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1076,6 +1076,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.set_iopl_mask = xen_set_iopl_mask,
 	.io_delay = xen_io_delay,
 
+	/* Xen takes care of %gs when switching to usermode for us */
+	.swapgs = paravirt_nop,
+
 	.lazy_mode = {
 		.enter = paravirt_enter_lazy_cpu,
 		.leave = xen_leave_lazy,
-- 
cgit v1.2.3


From 88459d4c7eb68c4a15609e00e5d100e2a305f040 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:02 -0700
Subject: xen64: register callbacks in arch-independent way

Use callback_op hypercall to register callbacks in a 32/64-bit
independent way (64-bit doesn't need a code segment, but that detail
is hidden in XEN_CALLBACK).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/setup.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index f52f3855fb6..bea3d4f779d 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -91,19 +91,25 @@ static void __init fiddle_vdso(void)
 	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
 }
 
-void xen_enable_sysenter(void)
+static __cpuinit int register_callback(unsigned type, const void *func)
 {
-	int cpu = smp_processor_id();
-	extern void xen_sysenter_target(void);
-	/* Mask events on entry, even though they get enabled immediately */
-	static struct callback_register sysenter = {
-		.type = CALLBACKTYPE_sysenter,
-		.address = XEN_CALLBACK(__KERNEL_CS, xen_sysenter_target),
+	struct callback_register callback = {
+		.type = type,
+		.address = XEN_CALLBACK(__KERNEL_CS, func),
 		.flags = CALLBACKF_mask_events,
 	};
 
+	return HYPERVISOR_callback_op(CALLBACKOP_register, &callback);
+}
+
+void __cpuinit xen_enable_sysenter(void)
+{
+	int cpu = smp_processor_id();
+	extern void xen_sysenter_target(void);
+
 	if (!boot_cpu_has(X86_FEATURE_SEP) ||
-	    HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) {
+	    register_callback(CALLBACKTYPE_sysenter,
+			      xen_sysenter_target) != 0) {
 		clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
 	}
@@ -120,8 +126,9 @@ void __init xen_arch_setup(void)
 	if (!xen_feature(XENFEAT_auto_translated_physmap))
 		HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3);
 
-	HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
-				 __KERNEL_CS, (unsigned long)xen_failsafe_callback);
+	if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
+	    register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
+		BUG();
 
 	xen_enable_sysenter();
 
-- 
cgit v1.2.3


From 0725cbb97793d4e65bf148e4872959cdbb8c6ddd Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:03 -0700
Subject: xen64: add identity irq->vector map

The x86_64 interrupt subsystem is oriented towards vectors, as opposed
to a flat irq space as it is in x86-32.  This patch adds a simple
identity irq->vector mapping so that we can continue to feed irqs into
do_IRQ() and get a good result.

Ideally x86_32 will unify with the 64-bit code and use vectors too.
At that point we can move to mapping event channels to vectors, which
will allow us to economise on irqs (so per-cpu event channels can
share irqs, rather than having to allocte one per cpu, for example).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 8b60982e457..52f2292672c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1085,8 +1085,25 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	},
 };
 
+static void __init __xen_init_IRQ(void)
+{
+#ifdef CONFIG_X86_64
+	int i;
+
+	/* Create identity vector->irq map */
+	for(i = 0; i < NR_VECTORS; i++) {
+		int cpu;
+
+		for_each_possible_cpu(cpu)
+			per_cpu(vector_irq, cpu)[i] = i;
+	}
+#endif	/* CONFIG_X86_64 */
+
+	xen_init_IRQ();
+}
+
 static const struct pv_irq_ops xen_irq_ops __initdata = {
-	.init_IRQ = xen_init_IRQ,
+	.init_IRQ = __xen_init_IRQ,
 	.save_fl = xen_save_fl,
 	.restore_fl = xen_restore_fl,
 	.irq_disable = xen_irq_disable,
-- 
cgit v1.2.3


From a8fc1089e49caa5dca346dfacb5c84abf9a22a0c Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Tue, 8 Jul 2008 15:07:05 -0700
Subject: xen64: implement xen_load_gs_index()

xen-64: implement xen_load_gs_index()

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 52f2292672c..3b6b7fcf5b5 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -385,6 +385,14 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
 		loadsegment(gs, 0);
 }
 
+#ifdef CONFIG_X86_64
+static void xen_load_gs_index(unsigned int idx)
+{
+	if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
+		BUG();
+}
+#endif
+
 static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 				const void *ptr)
 {
@@ -1063,6 +1071,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.load_gdt = xen_load_gdt,
 	.load_idt = xen_load_idt,
 	.load_tls = xen_load_tls,
+#ifdef CONFIG_X86_64
+	.load_gs_index = xen_load_gs_index,
+#endif
 
 	.store_gdt = native_store_gdt,
 	.store_idt = native_store_idt,
-- 
cgit v1.2.3


From 5deb30d194d28b6bf7dacfb758267a51bf7c5b78 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:06 -0700
Subject: xen: rework pgd_walk to deal with 32/64 bit

Rewrite pgd_walk to deal with 64-bit address spaces.  There are two
notible features of 64-bit workspaces:

 1. The physical address is only 48 bits wide, with the upper 16 bits
    being sign extension; kernel addresses are negative, and userspace is
    positive.

 2. The Xen hypervisor mapping is at the negative-most address, just above
    the sign-extension hole.

1. means that we can't easily use addresses when traversing the space,
since we must deal with sign extension.  This rewrite expresses
everything in terms of pgd/pud/pmd indices, which means we don't need
to worry about the exact configuration of the virtual memory space.
This approach works equally well in 32-bit.

To deal with 2, assume the hole is between the uppermost userspace
address and PAGE_OFFSET.  For 64-bit this skips the Xen mapping hole.
For 32-bit, the hole is zero-sized.

In all cases, the uppermost kernel address is FIXADDR_TOP.

A side-effect of this patch is that the upper boundary is actually
handled properly, exposing a long-standing bug in 32-bit, which failed
to pin kernel pmd page.  The kernel pmd is not shared, and so must be
explicitly pinned, even though the kernel ptes are shared and don't
need pinning.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/mmu.c | 115 ++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 75 insertions(+), 40 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index eb31ed291b9..046c1f23dd6 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -44,6 +44,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
+#include <asm/fixmap.h>
 #include <asm/mmu_context.h>
 #include <asm/paravirt.h>
 #include <asm/linkage.h>
@@ -491,77 +492,103 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
 #endif	/* PAGETABLE_LEVELS == 4 */
 
 /*
-  (Yet another) pagetable walker.  This one is intended for pinning a
-  pagetable.  This means that it walks a pagetable and calls the
-  callback function on each page it finds making up the page table,
-  at every level.  It walks the entire pagetable, but it only bothers
-  pinning pte pages which are below pte_limit.  In the normal case
-  this will be TASK_SIZE, but at boot we need to pin up to
-  FIXADDR_TOP.  But the important bit is that we don't pin beyond
-  there, because then we start getting into Xen's ptes.
-*/
-static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level),
+ * (Yet another) pagetable walker.  This one is intended for pinning a
+ * pagetable.  This means that it walks a pagetable and calls the
+ * callback function on each page it finds making up the page table,
+ * at every level.  It walks the entire pagetable, but it only bothers
+ * pinning pte pages which are below limit.  In the normal case this
+ * will be STACK_TOP_MAX, but at boot we need to pin up to
+ * FIXADDR_TOP.
+ *
+ * For 32-bit the important bit is that we don't pin beyond there,
+ * because then we start getting into Xen's ptes.
+ *
+ * For 64-bit, we must skip the Xen hole in the middle of the address
+ * space, just after the big x86-64 virtual hole.
+ */
+static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
 		    unsigned long limit)
 {
-	pgd_t *pgd = pgd_base;
 	int flush = 0;
-	unsigned long addr = 0;
-	unsigned long pgd_next;
+	unsigned hole_low, hole_high;
+	unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
+	unsigned pgdidx, pudidx, pmdidx;
 
-	BUG_ON(limit > FIXADDR_TOP);
+	/* The limit is the last byte to be touched */
+	limit--;
+	BUG_ON(limit >= FIXADDR_TOP);
 
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return 0;
 
-	for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) {
+	/*
+	 * 64-bit has a great big hole in the middle of the address
+	 * space, which contains the Xen mappings.  On 32-bit these
+	 * will end up making a zero-sized hole and so is a no-op.
+	 */
+	hole_low = pgd_index(STACK_TOP_MAX + PGDIR_SIZE - 1);
+	hole_high = pgd_index(PAGE_OFFSET);
+
+	pgdidx_limit = pgd_index(limit);
+#if PTRS_PER_PUD > 1
+	pudidx_limit = pud_index(limit);
+#else
+	pudidx_limit = 0;
+#endif
+#if PTRS_PER_PMD > 1
+	pmdidx_limit = pmd_index(limit);
+#else
+	pmdidx_limit = 0;
+#endif
+
+	flush |= (*func)(virt_to_page(pgd), PT_PGD);
+
+	for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
 		pud_t *pud;
-		unsigned long pud_limit, pud_next;
 
-		pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP);
+		if (pgdidx >= hole_low && pgdidx < hole_high)
+			continue;
 
-		if (!pgd_val(*pgd))
+		if (!pgd_val(pgd[pgdidx]))
 			continue;
 
-		pud = pud_offset(pgd, 0);
+		pud = pud_offset(&pgd[pgdidx], 0);
 
 		if (PTRS_PER_PUD > 1) /* not folded */
 			flush |= (*func)(virt_to_page(pud), PT_PUD);
 
-		for (; addr != pud_limit; pud++, addr = pud_next) {
+		for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
 			pmd_t *pmd;
-			unsigned long pmd_limit;
 
-			pud_next = pud_addr_end(addr, pud_limit);
-
-			if (pud_next < limit)
-				pmd_limit = pud_next;
-			else
-				pmd_limit = limit;
+			if (pgdidx == pgdidx_limit &&
+			    pudidx > pudidx_limit)
+				goto out;
 
-			if (pud_none(*pud))
+			if (pud_none(pud[pudidx]))
 				continue;
 
-			pmd = pmd_offset(pud, 0);
+			pmd = pmd_offset(&pud[pudidx], 0);
 
 			if (PTRS_PER_PMD > 1) /* not folded */
 				flush |= (*func)(virt_to_page(pmd), PT_PMD);
 
-			for (; addr != pmd_limit; pmd++) {
-				addr += (PAGE_SIZE * PTRS_PER_PTE);
-				if ((pmd_limit-1) < (addr-1)) {
-					addr = pmd_limit;
-					break;
-				}
+			for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
+				struct page *pte;
+
+				if (pgdidx == pgdidx_limit &&
+				    pudidx == pudidx_limit &&
+				    pmdidx > pmdidx_limit)
+					goto out;
 
-				if (pmd_none(*pmd))
+				if (pmd_none(pmd[pmdidx]))
 					continue;
 
-				flush |= (*func)(pmd_page(*pmd), PT_PTE);
+				pte = pmd_page(pmd[pmdidx]);
+				flush |= (*func)(pte, PT_PTE);
 			}
 		}
 	}
-
-	flush |= (*func)(virt_to_page(pgd_base), PT_PGD);
+out:
 
 	return flush;
 }
@@ -650,6 +677,11 @@ void xen_pgd_pin(pgd_t *pgd)
 		xen_mc_batch();
 	}
 
+#ifdef CONFIG_X86_PAE
+	/* Need to make sure unshared kernel PMD is pinnable */
+	pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
+#endif
+
 	xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
 	xen_mc_issue(0);
 }
@@ -731,6 +763,10 @@ static void xen_pgd_unpin(pgd_t *pgd)
 
 	xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 
+#ifdef CONFIG_X86_PAE
+	/* Need to make sure unshared kernel PMD is unpinned */
+	pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
+#endif
 	pgd_walk(pgd, unpin_page, TASK_SIZE);
 
 	xen_mc_issue(0);
@@ -750,7 +786,6 @@ void xen_mm_unpin_all(void)
 	list_for_each_entry(page, &pgd_list, lru) {
 		if (PageSavePinned(page)) {
 			BUG_ON(!PagePinned(page));
-			printk("unpinning pinned %p\n", page_address(page));
 			xen_pgd_unpin((pgd_t *)page_address(page));
 			ClearPageSavePinned(page);
 		}
-- 
cgit v1.2.3


From b7c3c5c15936a40c79ef40af7b3bac801c7feb20 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:07 -0700
Subject: xen: make sure the kernel command line is right

Point the boot params cmd_line_ptr to the domain-builder-provided
command line.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 3b6b7fcf5b5..0172ba77452 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1587,6 +1587,7 @@ asmlinkage void __init xen_start_kernel(void)
 	boot_params.hdr.ramdisk_image = xen_start_info->mod_start
 		? __pa(xen_start_info->mod_start) : 0;
 	boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
+	boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
 
 	if (!is_initial_xendomain()) {
 		add_preferred_console("xenboot", 0, NULL);
-- 
cgit v1.2.3


From 4a5c3e77f70b3ea8b361d7fa9eb2e4dad18f70ae Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:09 -0700
Subject: xen64: implement failsafe callback

Implement the failsafe callback, so that iret and segment register
load exceptions are reported to the kernel.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7cc2de79614..6aa6932e21b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1365,10 +1365,8 @@ END(do_hypervisor_callback)
 # with its current contents: any discrepancy means we in category 1.
 */
 ENTRY(xen_failsafe_callback)
-#if 1
-	ud2a
-#else
-	_frame (RIP-0x30)
+	framesz = (RIP-0x30)	/* workaround buggy gas */
+	_frame framesz
 	CFI_REL_OFFSET rcx, 0
 	CFI_REL_OFFSET r11, 8
 	movw %ds,%cx
@@ -1391,8 +1389,13 @@ ENTRY(xen_failsafe_callback)
 	CFI_RESTORE r11
 	addq $0x30,%rsp
 	CFI_ADJUST_CFA_OFFSET -0x30
-	movq $11,%rdi	/* SIGSEGV */
-	jmp do_exit
+	pushq $0
+	CFI_ADJUST_CFA_OFFSET 8
+	pushq %r11
+	CFI_ADJUST_CFA_OFFSET 8
+	pushq %rcx
+	CFI_ADJUST_CFA_OFFSET 8
+	jmp general_protection
 	CFI_RESTORE_STATE
 1:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
 	movq (%rsp),%rcx
@@ -1406,7 +1409,6 @@ ENTRY(xen_failsafe_callback)
 	SAVE_ALL
 	jmp error_exit
 	CFI_ENDPROC
-#endif
 END(xen_failsafe_callback)
 
 #endif /* CONFIG_XEN */
-- 
cgit v1.2.3


From 8a95408e183b3e4aaf3b6a66fa34bff4db53011b Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Tue, 8 Jul 2008 15:07:10 -0700
Subject: xen64: Clear %fs on xen_load_tls()

We need to do this, otherwise we can get a GPF on hypercall return
after TLS descriptor is cleared but %fs is still pointing to it.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0172ba77452..c13698faae5 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -364,14 +364,6 @@ static void load_TLS_descriptor(struct thread_struct *t,
 
 static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
 {
-	xen_mc_batch();
-
-	load_TLS_descriptor(t, cpu, 0);
-	load_TLS_descriptor(t, cpu, 1);
-	load_TLS_descriptor(t, cpu, 2);
-
-	xen_mc_issue(PARAVIRT_LAZY_CPU);
-
 	/*
 	 * XXX sleazy hack: If we're being called in a lazy-cpu zone,
 	 * it means we're in a context switch, and %gs has just been
@@ -380,9 +372,30 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
 	 * Either way, it has been saved, and the new value will get
 	 * loaded properly.  This will go away as soon as Xen has been
 	 * modified to not save/restore %gs for normal hypercalls.
+	 *
+	 * On x86_64, this hack is not used for %gs, because gs points
+	 * to KERNEL_GS_BASE (and uses it for PDA references), so we
+	 * must not zero %gs on x86_64
+	 *
+	 * For x86_64, we need to zero %fs, otherwise we may get an
+	 * exception between the new %fs descriptor being loaded and
+	 * %fs being effectively cleared at __switch_to().
 	 */
-	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
+	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
+#ifdef CONFIG_X86_32
 		loadsegment(gs, 0);
+#else
+		loadsegment(fs, 0);
+#endif
+	}
+
+	xen_mc_batch();
+
+	load_TLS_descriptor(t, cpu, 0);
+	load_TLS_descriptor(t, cpu, 1);
+	load_TLS_descriptor(t, cpu, 2);
+
+	xen_mc_issue(PARAVIRT_LAZY_CPU);
 }
 
 #ifdef CONFIG_X86_64
-- 
cgit v1.2.3


From d6182fbf04164016cb6540db02eef3d6bdc967c3 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:13 -0700
Subject: xen64: allocate and manage user pagetables

Because the x86_64 architecture does not enforce segment limits, Xen
cannot protect itself with them as it does in 32-bit mode.  Therefore,
to protect itself, it runs the guest kernel in ring 3.  Since it also
runs the guest userspace in ring3, the guest kernel must maintain a
second pagetable for its userspace, which does not map kernel space.
Naturally, the guest kernel pagetables map both kernel and userspace.

The userspace pagetable is attached to the corresponding kernel
pagetable via the pgd's page->private field.  It is allocated and
freed at the same time as the kernel pgd via the
paravirt_pgd_alloc/free hooks.

Fortunately, the user pagetable is almost entirely shared with the
kernel pagetable; the only difference is the pgd page itself.  set_pgd
will populate all entries in the kernel pagetable, and also set the
corresponding user pgd entry if the address is less than
STACK_TOP_MAX.

The user pagetable must be pinned and unpinned with the kernel one,
but because the pagetables are aliased, pgd_walk() only needs to be
called on the kernel pagetable.  The user pgd page is then
pinned/unpinned along with the kernel pgd page.

xen_write_cr3 must write both the kernel and user cr3s.

The init_mm.pgd pagetable never has a user pagetable allocated for it,
because it can never be used while running usermode.

One awkward area is that early in boot the page structures are not
available.  No user pagetable can exist at that point, but it
complicates the logic to avoid looking at the page structure.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 99 ++++++++++++++++++++++++++++++++++++++++--------
 arch/x86/xen/mmu.c       | 91 +++++++++++++++++++++++++++++++++++++++-----
 arch/x86/xen/mmu.h       |  2 +
 3 files changed, 168 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c13698faae5..48f1a7eca8b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -46,7 +46,6 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/reboot.h>
-#include <asm/pgalloc.h>
 
 #include "xen-ops.h"
 #include "mmu.h"
@@ -711,29 +710,57 @@ static void set_current_cr3(void *v)
 	x86_write_percpu(xen_current_cr3, (unsigned long)v);
 }
 
-static void xen_write_cr3(unsigned long cr3)
+static void __xen_write_cr3(bool kernel, unsigned long cr3)
 {
 	struct mmuext_op *op;
 	struct multicall_space mcs;
-	unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
+	unsigned long mfn;
 
-	BUG_ON(preemptible());
+	if (cr3)
+		mfn = pfn_to_mfn(PFN_DOWN(cr3));
+	else
+		mfn = 0;
 
-	mcs = xen_mc_entry(sizeof(*op));  /* disables interrupts */
+	WARN_ON(mfn == 0 && kernel);
 
-	/* Update while interrupts are disabled, so its atomic with
-	   respect to ipis */
-	x86_write_percpu(xen_cr3, cr3);
+	mcs = __xen_mc_entry(sizeof(*op));
 
 	op = mcs.args;
-	op->cmd = MMUEXT_NEW_BASEPTR;
+	op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
 	op->arg1.mfn = mfn;
 
 	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
 
-	/* Update xen_update_cr3 once the batch has actually
-	   been submitted. */
-	xen_mc_callback(set_current_cr3, (void *)cr3);
+	if (kernel) {
+		x86_write_percpu(xen_cr3, cr3);
+
+		/* Update xen_current_cr3 once the batch has actually
+		   been submitted. */
+		xen_mc_callback(set_current_cr3, (void *)cr3);
+	}
+}
+
+static void xen_write_cr3(unsigned long cr3)
+{
+	BUG_ON(preemptible());
+
+	xen_mc_batch();  /* disables interrupts */
+
+	/* Update while interrupts are disabled, so its atomic with
+	   respect to ipis */
+	x86_write_percpu(xen_cr3, cr3);
+
+	__xen_write_cr3(true, cr3);
+
+#ifdef CONFIG_X86_64
+	{
+		pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
+		if (user_pgd)
+			__xen_write_cr3(false, __pa(user_pgd));
+		else
+			__xen_write_cr3(false, 0);
+	}
+#endif
 
 	xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
 }
@@ -794,6 +821,40 @@ static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
 	xen_alloc_ptpage(mm, pfn, PT_PMD);
 }
 
+static int xen_pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *pgd = mm->pgd;
+	int ret = 0;
+
+	BUG_ON(PagePinned(virt_to_page(pgd)));
+
+#ifdef CONFIG_X86_64
+	{
+		struct page *page = virt_to_page(pgd);
+
+		BUG_ON(page->private != 0);
+
+		page->private = __get_free_page(GFP_KERNEL | __GFP_ZERO);
+		if (page->private == 0)
+			ret = -ENOMEM;
+
+		BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
+	}
+#endif
+
+	return ret;
+}
+
+static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+#ifdef CONFIG_X86_64
+	pgd_t *user_pgd = xen_get_user_pgd(pgd);
+
+	if (user_pgd)
+		free_page((unsigned long)user_pgd);
+#endif
+}
+
 /* This should never happen until we're OK to use struct page */
 static void xen_release_ptpage(u32 pfn, unsigned level)
 {
@@ -1168,8 +1229,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.pte_update = paravirt_nop,
 	.pte_update_defer = paravirt_nop,
 
-	.pgd_alloc = __paravirt_pgd_alloc,
-	.pgd_free = paravirt_nop,
+	.pgd_alloc = xen_pgd_alloc,
+	.pgd_free = xen_pgd_free,
 
 	.alloc_pte = xen_alloc_pte_init,
 	.release_pte = xen_release_pte_init,
@@ -1480,7 +1541,15 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf
 
 	/* Switch over */
 	pgd = init_level4_pgt;
-	xen_write_cr3(__pa(pgd));
+
+	/*
+	 * At this stage there can be no user pgd, and no page
+	 * structure to attach it to, so make sure we just set kernel
+	 * pgd.
+	 */
+	xen_mc_batch();
+	__xen_write_cr3(true, __pa(pgd));
+	xen_mc_issue(PARAVIRT_LAZY_CPU);
 
 	reserve_early(__pa(xen_start_info->pt_base),
 		      __pa(xen_start_info->pt_base +
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 046c1f23dd6..a44d56e38bd 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -58,6 +58,13 @@
 #include "multicalls.h"
 #include "mmu.h"
 
+/*
+ * Just beyond the highest usermode address.  STACK_TOP_MAX has a
+ * redzone above it, so round it up to a PGD boundary.
+ */
+#define USER_LIMIT	((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
+
+
 #define P2M_ENTRIES_PER_PAGE	(PAGE_SIZE / sizeof(unsigned long))
 #define TOP_ENTRIES		(MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
 
@@ -461,17 +468,45 @@ pud_t xen_make_pud(pudval_t pud)
 	return native_make_pud(pud);
 }
 
-void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+pgd_t *xen_get_user_pgd(pgd_t *pgd)
 {
-	struct mmu_update u;
+	pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
+	unsigned offset = pgd - pgd_page;
+	pgd_t *user_ptr = NULL;
 
-	preempt_disable();
+	if (offset < pgd_index(USER_LIMIT)) {
+		struct page *page = virt_to_page(pgd_page);
+		user_ptr = (pgd_t *)page->private;
+		if (user_ptr)
+			user_ptr += offset;
+	}
 
-	xen_mc_batch();
+	return user_ptr;
+}
+
+static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+{
+	struct mmu_update u;
 
 	u.ptr = virt_to_machine(ptr).maddr;
 	u.val = pgd_val_ma(val);
 	extend_mmu_update(&u);
+}
+
+/*
+ * Raw hypercall-based set_pgd, intended for in early boot before
+ * there's a page structure.  This implies:
+ *  1. The only existing pagetable is the kernel's
+ *  2. It is always pinned
+ *  3. It has no user pagetable attached to it
+ */
+void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+{
+	preempt_disable();
+
+	xen_mc_batch();
+
+	__xen_set_pgd_hyper(ptr, val);
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 
@@ -480,14 +515,28 @@ void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
 
 void xen_set_pgd(pgd_t *ptr, pgd_t val)
 {
+	pgd_t *user_ptr = xen_get_user_pgd(ptr);
+
 	/* If page is not pinned, we can just update the entry
 	   directly */
 	if (!page_pinned(ptr)) {
 		*ptr = val;
+		if (user_ptr) {
+			WARN_ON(page_pinned(user_ptr));
+			*user_ptr = val;
+		}
 		return;
 	}
 
-	xen_set_pgd_hyper(ptr, val);
+	/* If it's pinned, then we can at least batch the kernel and
+	   user updates together. */
+	xen_mc_batch();
+
+	__xen_set_pgd_hyper(ptr, val);
+	if (user_ptr)
+		__xen_set_pgd_hyper(user_ptr, val);
+
+	xen_mc_issue(PARAVIRT_LAZY_MMU);
 }
 #endif	/* PAGETABLE_LEVELS == 4 */
 
@@ -526,7 +575,7 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
 	 * space, which contains the Xen mappings.  On 32-bit these
 	 * will end up making a zero-sized hole and so is a no-op.
 	 */
-	hole_low = pgd_index(STACK_TOP_MAX + PGDIR_SIZE - 1);
+	hole_low = pgd_index(USER_LIMIT);
 	hole_high = pgd_index(PAGE_OFFSET);
 
 	pgdidx_limit = pgd_index(limit);
@@ -670,19 +719,31 @@ void xen_pgd_pin(pgd_t *pgd)
 {
 	xen_mc_batch();
 
-	if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
+	if (pgd_walk(pgd, pin_page, USER_LIMIT)) {
 		/* re-enable interrupts for kmap_flush_unused */
 		xen_mc_issue(0);
 		kmap_flush_unused();
 		xen_mc_batch();
 	}
 
+#ifdef CONFIG_X86_64
+	{
+		pgd_t *user_pgd = xen_get_user_pgd(pgd);
+
+		xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
+
+		if (user_pgd) {
+			pin_page(virt_to_page(user_pgd), PT_PGD);
+			xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd)));
+		}
+	}
+#else /* CONFIG_X86_32 */
 #ifdef CONFIG_X86_PAE
 	/* Need to make sure unshared kernel PMD is pinnable */
 	pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
 #endif
-
 	xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
+#endif /* CONFIG_X86_64 */
 	xen_mc_issue(0);
 }
 
@@ -763,11 +824,23 @@ static void xen_pgd_unpin(pgd_t *pgd)
 
 	xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 
+#ifdef CONFIG_X86_64
+	{
+		pgd_t *user_pgd = xen_get_user_pgd(pgd);
+
+		if (user_pgd) {
+			xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd)));
+			unpin_page(virt_to_page(user_pgd), PT_PGD);
+		}
+	}
+#endif
+
 #ifdef CONFIG_X86_PAE
 	/* Need to make sure unshared kernel PMD is unpinned */
 	pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
 #endif
-	pgd_walk(pgd, unpin_page, TASK_SIZE);
+
+	pgd_walk(pgd, unpin_page, USER_LIMIT);
 
 	xen_mc_issue(0);
 }
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 19d544b0b6c..0f59bd03f9e 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -51,6 +51,8 @@ void xen_set_pgd(pgd_t *pgdp, pgd_t pgd);
 void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd);
 #endif
 
+pgd_t *xen_get_user_pgd(pgd_t *pgd);
+
 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
 void  xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 				  pte_t *ptep, pte_t pte);
-- 
cgit v1.2.3


From 6fcac6d305e8238939e169f4c52e8ec8a552a31f Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:14 -0700
Subject: xen64: set up syscall and sysenter entrypoints for 64-bit

We set up entrypoints for syscall and sysenter.  sysenter is only used
for 32-bit compat processes, whereas syscall can be used in by both 32
and 64-bit processes.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c  |   4 ++
 arch/x86/xen/setup.c      |  42 +++++++++++++--
 arch/x86/xen/smp.c        |   1 +
 arch/x86/xen/xen-asm_64.S | 129 +++++++++++++++++++++++++++++++++++++++++++++-
 arch/x86/xen/xen-ops.h    |   3 ++
 5 files changed, 174 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 48f1a7eca8b..87d36044054 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1139,6 +1139,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 
 	.iret = xen_iret,
 	.irq_enable_sysexit = xen_sysexit,
+#ifdef CONFIG_X86_64
+	.usergs_sysret32 = xen_sysret32,
+	.usergs_sysret64 = xen_sysret64,
+#endif
 
 	.load_tr_desc = paravirt_nop,
 	.set_ldt = xen_set_ldt,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index bea3d4f779d..9d7a1440289 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -86,9 +86,11 @@ static void xen_idle(void)
  */
 static void __init fiddle_vdso(void)
 {
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 	extern const char vdso32_default_start;
 	u32 *mask = VDSO32_SYMBOL(&vdso32_default_start, NOTE_MASK);
 	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
+#endif
 }
 
 static __cpuinit int register_callback(unsigned type, const void *func)
@@ -106,15 +108,48 @@ void __cpuinit xen_enable_sysenter(void)
 {
 	int cpu = smp_processor_id();
 	extern void xen_sysenter_target(void);
+	int ret;
+
+#ifdef CONFIG_X86_32
+	if (!boot_cpu_has(X86_FEATURE_SEP)) {
+		return;
+	}
+#else
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR) {
+		return;
+	}
+#endif
 
-	if (!boot_cpu_has(X86_FEATURE_SEP) ||
-	    register_callback(CALLBACKTYPE_sysenter,
-			      xen_sysenter_target) != 0) {
+	ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
+	if(ret != 0) {
 		clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
 	}
 }
 
+void __cpuinit xen_enable_syscall(void)
+{
+#ifdef CONFIG_X86_64
+	int cpu = smp_processor_id();
+	int ret;
+	extern void xen_syscall_target(void);
+	extern void xen_syscall32_target(void);
+
+	ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
+	if (ret != 0) {
+		printk("failed to set syscall: %d\n", ret);
+		clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SYSCALL);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SYSCALL);
+	} else {
+		ret = register_callback(CALLBACKTYPE_syscall32,
+					xen_syscall32_target);
+		if (ret != 0)
+			printk("failed to set 32-bit syscall: %d\n", ret);
+	}
+#endif /* CONFIG_X86_64 */
+}
+
 void __init xen_arch_setup(void)
 {
 	struct physdev_set_iopl set_iopl;
@@ -131,6 +166,7 @@ void __init xen_arch_setup(void)
 		BUG();
 
 	xen_enable_sysenter();
+	xen_enable_syscall();
 
 	set_iopl.iopl = 1;
 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 8310ca0ea37..f702199312a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -69,6 +69,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
 	preempt_disable();
 
 	xen_enable_sysenter();
+	xen_enable_syscall();
 
 	cpu = smp_processor_id();
 	smp_store_cpu_info(cpu);
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index b147b495dae..4038cbfe333 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -15,6 +15,8 @@
 
 #include <asm/asm-offsets.h>
 #include <asm/processor-flags.h>
+#include <asm/errno.h>
+#include <asm/segment.h>
 
 #include <xen/interface/xen.h>
 
@@ -138,9 +140,132 @@ ENTRY(xen_adjust_exception_frame)
 	mov 8+8(%rsp),%r11
 	ret $16
 
+hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
+/*
+	Xen64 iret frame:
+
+	ss
+	rsp
+	rflags
+	cs
+	rip		<-- standard iret frame
+
+	flags
+
+	rcx		}
+	r11		}<-- pushed by hypercall page
+rsp ->	rax		}
+ */
 ENTRY(xen_iret)
 	pushq $0
-	jmp hypercall_page + __HYPERVISOR_iret * 32
+1:	jmp hypercall_iret
+ENDPATCH(xen_iret)
+RELOC(xen_iret, 1b+1)
 
+/*
+	sysexit is not used for 64-bit processes, so it's
+	only ever used to return to 32-bit compat userspace.
+ */
 ENTRY(xen_sysexit)
-	ud2a
+	pushq $__USER32_DS
+	pushq %rcx
+	pushq $X86_EFLAGS_IF
+	pushq $__USER32_CS
+	pushq %rdx
+
+	pushq $VGCF_in_syscall
+1:	jmp hypercall_iret
+ENDPATCH(xen_sysexit)
+RELOC(xen_sysexit, 1b+1)
+
+ENTRY(xen_sysret64)
+	/* We're already on the usermode stack at this point, but still
+	   with the kernel gs, so we can easily switch back */
+	movq %rsp, %gs:pda_oldrsp
+	movq %gs:pda_kernelstack,%rsp
+
+	pushq $__USER_DS
+	pushq %gs:pda_oldrsp
+	pushq %r11
+	pushq $__USER_CS
+	pushq %rcx
+
+	pushq $VGCF_in_syscall
+1:	jmp hypercall_iret
+ENDPATCH(xen_sysret64)
+RELOC(xen_sysret64, 1b+1)
+
+ENTRY(xen_sysret32)
+	/* We're already on the usermode stack at this point, but still
+	   with the kernel gs, so we can easily switch back */
+	movq %rsp, %gs:pda_oldrsp
+	movq %gs:pda_kernelstack, %rsp
+
+	pushq $__USER32_DS
+	pushq %gs:pda_oldrsp
+	pushq %r11
+	pushq $__USER32_CS
+	pushq %rcx
+
+	pushq $VGCF_in_syscall
+1:	jmp hypercall_iret
+ENDPATCH(xen_sysret32)
+RELOC(xen_sysret32, 1b+1)
+
+/*
+	Xen handles syscall callbacks much like ordinary exceptions,
+	which means we have:
+	 - kernel gs
+	 - kernel rsp
+	 - an iret-like stack frame on the stack (including rcx and r11):
+		ss
+		rsp
+		rflags
+		cs
+		rip
+		r11
+	rsp->	rcx
+
+	In all the entrypoints, we undo all that to make it look
+	like a CPU-generated syscall/sysenter and jump to the normal
+	entrypoint.
+ */
+
+.macro undo_xen_syscall
+	mov 0*8(%rsp),%rcx
+	mov 1*8(%rsp),%r11
+	mov 5*8(%rsp),%rsp
+.endm
+
+/* Normal 64-bit system call target */
+ENTRY(xen_syscall_target)
+	undo_xen_syscall
+	jmp system_call_after_swapgs
+ENDPROC(xen_syscall_target)
+
+#ifdef CONFIG_IA32_EMULATION
+
+/* 32-bit compat syscall target */
+ENTRY(xen_syscall32_target)
+	undo_xen_syscall
+	jmp ia32_cstar_target
+ENDPROC(xen_syscall32_target)
+
+/* 32-bit compat sysenter target */
+ENTRY(xen_sysenter_target)
+	undo_xen_syscall
+	jmp ia32_sysenter_target
+ENDPROC(xen_sysenter_target)
+
+#else /* !CONFIG_IA32_EMULATION */
+
+ENTRY(xen_syscall32_target)
+ENTRY(xen_sysenter_target)
+	lea 16(%rsp), %rsp	/* strip %rcx,%r11 */
+	mov $-ENOSYS, %rax
+	pushq $VGCF_in_syscall
+	jmp hypercall_iret
+ENDPROC(xen_syscall32_target)
+ENDPROC(xen_sysenter_target)
+
+#endif	/* CONFIG_IA32_EMULATION */
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index c4800a2c5a4..dd3c23152a2 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -26,6 +26,7 @@ char * __init xen_memory_setup(void);
 void __init xen_arch_setup(void);
 void __init xen_init_IRQ(void);
 void xen_enable_sysenter(void);
+void xen_enable_syscall(void);
 void xen_vcpu_restore(void);
 
 void __init xen_build_dynamic_phys_to_machine(void);
@@ -70,6 +71,8 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
 /* These are not functions, and cannot be called normally */
 void xen_iret(void);
 void xen_sysexit(void);
+void xen_sysret32(void);
+void xen_sysret64(void);
 void xen_adjust_exception_frame(void);
 
 #endif /* XEN_OPS_H */
-- 
cgit v1.2.3


From bf18bf94dc72db998d0fbebc846c07c858a59c90 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:15 -0700
Subject: xen64: set up userspace syscall patch

64-bit userspace expects the vdso to be mapped at a specific fixed
address, which happens to be in the middle of the kernel address
space.  Because we have split user and kernel pagetables, we need to
make special arrangements for the vsyscall mapping to appear in the
kernel part of the user pagetable.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 46 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 87d36044054..f64b8729cd0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -56,6 +56,18 @@ EXPORT_SYMBOL_GPL(hypercall_page);
 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
 DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
 
+/*
+ * Identity map, in addition to plain kernel map.  This needs to be
+ * large enough to allocate page table pages to allocate the rest.
+ * Each page can map 2MB.
+ */
+static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
+
+#ifdef CONFIG_X86_64
+/* l3 pud for userspace vsyscall mapping */
+static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
+#endif /* CONFIG_X86_64 */
+
 /*
  * Note about cr3 (pagetable base) values:
  *
@@ -831,12 +843,20 @@ static int xen_pgd_alloc(struct mm_struct *mm)
 #ifdef CONFIG_X86_64
 	{
 		struct page *page = virt_to_page(pgd);
+		pgd_t *user_pgd;
 
 		BUG_ON(page->private != 0);
 
-		page->private = __get_free_page(GFP_KERNEL | __GFP_ZERO);
-		if (page->private == 0)
-			ret = -ENOMEM;
+		ret = -ENOMEM;
+
+		user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+		page->private = (unsigned long)user_pgd;
+
+		if (user_pgd != NULL) {
+			user_pgd[pgd_index(VSYSCALL_START)] =
+				__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
+			ret = 0;
+		}
 
 		BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
 	}
@@ -977,6 +997,9 @@ static __init void xen_post_allocator_init(void)
 	pv_mmu_ops.release_pud = xen_release_pud;
 #endif
 
+#ifdef CONFIG_X86_64
+	SetPagePinned(virt_to_page(level3_user_vsyscall));
+#endif
 	xen_mark_init_mm_pinned();
 }
 
@@ -1088,6 +1111,15 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
 	}
 
 	__native_set_fixmap(idx, pte);
+
+#ifdef CONFIG_X86_64
+	/* Replicate changes to map the vsyscall page into the user
+	   pagetable vsyscall mapping. */
+	if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+		unsigned long vaddr = __fix_to_virt(idx);
+		set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
+	}
+#endif
 }
 
 static const struct pv_info xen_info __initdata = {
@@ -1427,13 +1459,6 @@ static void set_page_prot(void *addr, pgprot_t prot)
 		BUG();
 }
 
-/*
- * Identity map, in addition to plain kernel map.  This needs to be
- * large enough to allocate page table pages to allocate the rest.
- * Each page can map 2MB.
- */
-static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
-
 static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 {
 	unsigned pmdidx, pteidx;
@@ -1533,6 +1558,7 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf
 	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
+	set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
 	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
 
-- 
cgit v1.2.3


From 1153968a48e3ca3e2b7a437e8b82ec9e6f768e24 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:16 -0700
Subject: xen: implement Xen write_msr operation

64-bit uses MSRs for important things like the base for fs and
gs-prefixed addresses.  It's more efficient to use a hypercall to
update these, rather than go via the trap and emulate path.

Other MSR writes are just passed through; in an unprivileged domain
they do nothing, but it might be useful later.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f64b8729cd0..776c0fb77d6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -41,6 +41,7 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/fixmap.h>
 #include <asm/processor.h>
+#include <asm/msr-index.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
@@ -777,6 +778,34 @@ static void xen_write_cr3(unsigned long cr3)
 	xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
 }
 
+static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
+{
+	int ret;
+
+	ret = 0;
+
+	switch(msr) {
+#ifdef CONFIG_X86_64
+		unsigned which;
+		u64 base;
+
+	case MSR_FS_BASE:		which = SEGBASE_FS; goto set;
+	case MSR_KERNEL_GS_BASE:	which = SEGBASE_GS_USER; goto set;
+	case MSR_GS_BASE:		which = SEGBASE_GS_KERNEL; goto set;
+
+	set:
+		base = ((u64)high << 32) | low;
+		if (HYPERVISOR_set_segment_base(which, base) != 0)
+			ret = -EFAULT;
+		break;
+#endif
+	default:
+		ret = native_write_msr_safe(msr, low, high);
+	}
+
+	return ret;
+}
+
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
 static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
@@ -1165,7 +1194,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.wbinvd = native_wbinvd,
 
 	.read_msr = native_read_msr_safe,
-	.write_msr = native_write_msr_safe,
+	.write_msr = xen_write_msr_safe,
 	.read_tsc = native_read_tsc,
 	.read_pmc = native_read_pmc,
 
-- 
cgit v1.2.3


From 51dd660a2cd6eab4d470cfe1009c7f473832b786 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 8 Jul 2008 15:07:17 -0700
Subject: xen: update Kconfig to allow 64-bit Xen

Allow Xen to be enabled on 64-bit.

Also extend domain size limit from 8 GB (on 32-bit) to 32 GB on 64-bit.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/Kconfig | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index c2cc9958087..20b49729bed 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -6,8 +6,8 @@ config XEN
 	bool "Xen guest support"
 	select PARAVIRT
 	select PARAVIRT_CLOCK
-	depends on X86_32
-	depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER)
+	depends on X86_64 || (X86_32 && X86_PAE && !(X86_VISWS || X86_VOYAGER))
+	depends on X86_CMPXCHG && X86_TSC
 	help
 	  This is the Linux Xen port.  Enabling this will allow the
 	  kernel to boot in a paravirtualized environment under the
@@ -15,10 +15,11 @@ config XEN
 
 config XEN_MAX_DOMAIN_MEMORY
        int "Maximum allowed size of a domain in gigabytes"
-       default 8
+       default 8 if X86_32
+       default 32 if X86_64
        depends on XEN
        help
          The pseudo-physical to machine address array is sized
          according to the maximum possible memory size of a Xen
          domain.  This array uses 1 page per gigabyte, so there's no
-         need to be too stingy here.
\ No newline at end of file
+         need to be too stingy here.
-- 
cgit v1.2.3


From b3fe124389f9dd97f0bbd954da2910e286648f0f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 9 Jul 2008 13:45:33 +0200
Subject: xen64: fix build error on 32-bit + !HIGHMEM

fix:

arch/x86/xen/enlighten.c: In function 'xen_set_fixmap':
arch/x86/xen/enlighten.c:1127: error: 'FIX_KMAP_BEGIN' undeclared (first use in this function)
arch/x86/xen/enlighten.c:1127: error: (Each undeclared identifier is reported only once
arch/x86/xen/enlighten.c:1127: error: for each function it appears in.)
arch/x86/xen/enlighten.c:1127: error: 'FIX_KMAP_END' undeclared (first use in this function)
make[1]: *** [arch/x86/xen/enlighten.o] Error 1
make: *** [arch/x86/xen/enlighten.o] Error 2

FIX_KMAP_BEGIN is only available on HIGHMEM.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 776c0fb77d6..3da6acb7eaf 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1124,7 +1124,9 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
 #ifdef CONFIG_X86_32
 	case FIX_WP_TEST:
 	case FIX_VDSO:
+# ifdef CONFIG_HIGHMEM
 	case FIX_KMAP_BEGIN ... FIX_KMAP_END:
+# endif
 #else
 	case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
 #endif
-- 
cgit v1.2.3


From 6596f2422306a05be2170efc114da49f26a047dd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 9 Jul 2008 22:32:33 +0200
Subject: Revert "x86_64: there's no need to preallocate level1_fixmap_pgt"

This reverts commit 033786969d1d1b5af12a32a19d3a760314d05329.

Suresh Siddha reported that this broke booting on his 2GB testbox.

Reported-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head_64.S | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 2240f823676..db3280afe88 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -362,6 +362,12 @@ NEXT_PAGE(level3_kernel_pgt)
 	.quad	level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
 
 NEXT_PAGE(level2_fixmap_pgt)
+	.fill	506,8,0
+	.quad	level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
+	/* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
+	.fill	5,8,0
+
+NEXT_PAGE(level1_fixmap_pgt)
 	.fill	512,8,0
 
 NEXT_PAGE(level2_ident_pgt)
-- 
cgit v1.2.3


From 62541c376668042e20122864a044360707b2fb82 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 10 Jul 2008 16:24:08 -0700
Subject: xen64: disable 32-bit syscall/sysenter if not supported.

Old versions of Xen (3.1 and before) don't support sysenter or syscall
from 32-bit compat userspaces.  If we can't set the appropriate
syscall callback, then disable the corresponding feature bit, which
will cause the vdso32 setup to fall back appropriately.

Linux assumes that syscall is always available to 32-bit userspace,
and installs it by default if sysenter isn't available.  In that case,
we just disable vdso altogether, forcing userspace libc to fall back
to int $0x80.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/setup.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 9d7a1440289..9cce4a92aac 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -106,46 +106,46 @@ static __cpuinit int register_callback(unsigned type, const void *func)
 
 void __cpuinit xen_enable_sysenter(void)
 {
-	int cpu = smp_processor_id();
 	extern void xen_sysenter_target(void);
 	int ret;
+	unsigned sysenter_feature;
 
 #ifdef CONFIG_X86_32
-	if (!boot_cpu_has(X86_FEATURE_SEP)) {
-		return;
-	}
+	sysenter_feature = X86_FEATURE_SEP;
 #else
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
-	    boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR) {
-		return;
-	}
+	sysenter_feature = X86_FEATURE_SYSENTER32;
 #endif
 
+	if (!boot_cpu_has(sysenter_feature))
+		return;
+
 	ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
-	if(ret != 0) {
-		clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
-		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
-	}
+	if(ret != 0)
+		setup_clear_cpu_cap(sysenter_feature);
 }
 
 void __cpuinit xen_enable_syscall(void)
 {
 #ifdef CONFIG_X86_64
-	int cpu = smp_processor_id();
 	int ret;
 	extern void xen_syscall_target(void);
 	extern void xen_syscall32_target(void);
 
 	ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
 	if (ret != 0) {
-		printk("failed to set syscall: %d\n", ret);
-		clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SYSCALL);
-		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SYSCALL);
-	} else {
+		printk(KERN_ERR "Failed to set syscall: %d\n", ret);
+		/* Pretty fatal; 64-bit userspace has no other
+		   mechanism for syscalls. */
+	}
+
+	if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
 		ret = register_callback(CALLBACKTYPE_syscall32,
 					xen_syscall32_target);
-		if (ret != 0)
-			printk("failed to set 32-bit syscall: %d\n", ret);
+		if (ret != 0) {
+			printk(KERN_INFO "Xen: 32-bit syscall not supported: disabling vdso\n");
+			setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
+			sysctl_vsyscall32 = 0;
+		}
 	}
 #endif /* CONFIG_X86_64 */
 }
-- 
cgit v1.2.3


From 71415c6a0877d5944d5dc3060f3b03513746158d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 11 Jul 2008 22:41:34 +0200
Subject: x86, xen, vdso: fix build error

fix:

   arch/x86/xen/built-in.o: In function `xen_enable_syscall':
   (.cpuinit.text+0xdb): undefined reference to `sysctl_vsyscall32'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/setup.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 9cce4a92aac..3e11779755c 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -144,7 +144,9 @@ void __cpuinit xen_enable_syscall(void)
 		if (ret != 0) {
 			printk(KERN_INFO "Xen: 32-bit syscall not supported: disabling vdso\n");
 			setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
+#ifdef CONFIG_COMPAT
 			sysctl_vsyscall32 = 0;
+#endif
 		}
 	}
 #endif /* CONFIG_X86_64 */
-- 
cgit v1.2.3


From 6a52e4b1cddd90fbfde8fb67021657936ee74b07 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Sat, 12 Jul 2008 02:22:00 -0700
Subject: x86_64: further cleanup of 32-bit compat syscall mechanisms

AMD only supports "syscall" from 32-bit compat usermode.
Intel and Centaur(?) only support "sysenter" from 32-bit compat usermode.

Set the X86 feature bits accordingly, and set up the vdso in
accordance with those bits.  On the offchance we run on in a 64-bit
environment which supports neither syscall nor sysenter from 32-bit
mode, then fall back to the int $0x80 vdso.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/amd_64.c    |  2 ++
 arch/x86/kernel/cpu/common_64.c |  3 ---
 arch/x86/vdso/Makefile          |  2 +-
 arch/x86/vdso/vdso32-setup.c    | 19 +++++++++----------
 arch/x86/vdso/vdso32.S          | 13 ++++++++-----
 arch/x86/xen/setup.c            | 10 +++++++---
 6 files changed, 27 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 7c36fb8a28d..d1692b2a41f 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -115,6 +115,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 	/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
 	if (c->x86_power & (1<<8))
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
+	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
 }
 
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 15419cd3c5a..736f50fa433 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -317,9 +317,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 		c->x86_phys_bits = eax & 0xff;
 	}
 
-	/* Assume all 64-bit CPUs support 32-bit syscall */
-	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
-
 	if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
 	    cpu_devs[c->x86_vendor]->c_early_init)
 		cpu_devs[c->x86_vendor]->c_early_init(c);
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index b7ad9f89d21..4d6ef0a336d 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -62,7 +62,7 @@ $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
 # Build multiple 32-bit vDSO images to choose from at boot time.
 #
 obj-$(VDSO32-y)			+= vdso32-syms.lds
-vdso32.so-$(CONFIG_X86_32)	+= int80
+vdso32.so-$(VDSO32-y)		+= int80
 vdso32.so-$(CONFIG_COMPAT)	+= syscall
 vdso32.so-$(VDSO32-y)		+= sysenter
 
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 0bce5429a51..513f330c583 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -193,17 +193,12 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr)
 	}
 }
 
-/*
- * These symbols are defined by vdso32.S to mark the bounds
- * of the ELF DSO images included therein.
- */
-extern const char vdso32_default_start, vdso32_default_end;
-extern const char vdso32_sysenter_start, vdso32_sysenter_end;
 static struct page *vdso32_pages[1];
 
 #ifdef CONFIG_X86_64
 
 #define	vdso32_sysenter()	(boot_cpu_has(X86_FEATURE_SYSENTER32))
+#define	vdso32_syscall()	(boot_cpu_has(X86_FEATURE_SYSCALL32))
 
 /* May not be __init: called during resume */
 void syscall32_cpu_init(void)
@@ -226,6 +221,7 @@ static inline void map_compat_vdso(int map)
 #else  /* CONFIG_X86_32 */
 
 #define vdso32_sysenter()	(boot_cpu_has(X86_FEATURE_SEP))
+#define vdso32_syscall()	(0)
 
 void enable_sep_cpu(void)
 {
@@ -296,12 +292,15 @@ int __init sysenter_setup(void)
 	gate_vma_init();
 #endif
 
-	if (!vdso32_sysenter()) {
-		vsyscall = &vdso32_default_start;
-		vsyscall_len = &vdso32_default_end - &vdso32_default_start;
-	} else {
+	if (vdso32_syscall()) {
+		vsyscall = &vdso32_syscall_start;
+		vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start;
+	} else if (vdso32_sysenter()){
 		vsyscall = &vdso32_sysenter_start;
 		vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start;
+	} else {
+		vsyscall = &vdso32_int80_start;
+		vsyscall_len = &vdso32_int80_end - &vdso32_int80_start;
 	}
 
 	memcpy(syscall_page, vsyscall, vsyscall_len);
diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S
index 1e36f72cab8..2ce5f82c333 100644
--- a/arch/x86/vdso/vdso32.S
+++ b/arch/x86/vdso/vdso32.S
@@ -2,14 +2,17 @@
 
 __INITDATA
 
-	.globl vdso32_default_start, vdso32_default_end
-vdso32_default_start:
-#ifdef CONFIG_X86_32
+	.globl vdso32_int80_start, vdso32_int80_end
+vdso32_int80_start:
 	.incbin "arch/x86/vdso/vdso32-int80.so"
-#else
+vdso32_int80_end:
+
+	.globl vdso32_syscall_start, vdso32_syscall_end
+vdso32_syscall_start:
+#ifdef CONFIG_COMPAT
 	.incbin "arch/x86/vdso/vdso32-syscall.so"
 #endif
-vdso32_default_end:
+vdso32_syscall_end:
 
 	.globl vdso32_sysenter_start, vdso32_sysenter_end
 vdso32_sysenter_start:
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 3e11779755c..e3648e64a63 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -83,12 +83,16 @@ static void xen_idle(void)
 
 /*
  * Set the bit indicating "nosegneg" library variants should be used.
+ * We only need to bother in pure 32-bit mode; compat 32-bit processes
+ * can have un-truncated segments, so wrapping around is allowed.
  */
 static void __init fiddle_vdso(void)
 {
-#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
-	extern const char vdso32_default_start;
-	u32 *mask = VDSO32_SYMBOL(&vdso32_default_start, NOTE_MASK);
+#ifdef CONFIG_X86_32
+	u32 *mask;
+	mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK);
+	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
+	mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK);
 	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
 #endif
 }
-- 
cgit v1.2.3


From d5303b811b9d6dad2e7396d545eb7db414d42a61 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Sat, 12 Jul 2008 02:22:06 -0700
Subject: x86: xen: no need to disable vdso32

Now that the vdso32 code can cope with both syscall and sysenter
missing for 32-bit compat processes, just disable the features without
disabling vdso altogether.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/xen/setup.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index e3648e64a63..b6acc3a0af4 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -137,7 +137,7 @@ void __cpuinit xen_enable_syscall(void)
 
 	ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
 	if (ret != 0) {
-		printk(KERN_ERR "Failed to set syscall: %d\n", ret);
+		printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
 		/* Pretty fatal; 64-bit userspace has no other
 		   mechanism for syscalls. */
 	}
@@ -145,13 +145,8 @@ void __cpuinit xen_enable_syscall(void)
 	if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
 		ret = register_callback(CALLBACKTYPE_syscall32,
 					xen_syscall32_target);
-		if (ret != 0) {
-			printk(KERN_INFO "Xen: 32-bit syscall not supported: disabling vdso\n");
+		if (ret != 0)
 			setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
-#ifdef CONFIG_COMPAT
-			sysctl_vsyscall32 = 0;
-#endif
-		}
 	}
 #endif /* CONFIG_X86_64 */
 }
-- 
cgit v1.2.3


From 094029479be8eb380447f42eff1b35362ef1a464 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Sat, 12 Jul 2008 02:22:12 -0700
Subject: x86_64: adjust exception frame on paranoid exceptions

Exceptions using paranoidentry need to have their exception frames
adjusted explicitly.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/entry_64.S | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 6aa6932e21b..80d5663db3b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1189,6 +1189,7 @@ END(device_not_available)
 	/* runs on exception stack */
 KPROBE_ENTRY(debug)
  	INTR_FRAME
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq $0
 	CFI_ADJUST_CFA_OFFSET 8		
 	paranoidentry do_debug, DEBUG_STACK
@@ -1198,6 +1199,7 @@ KPROBE_END(debug)
 	/* runs on exception stack */	
 KPROBE_ENTRY(nmi)
 	INTR_FRAME
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq $-1
 	CFI_ADJUST_CFA_OFFSET 8
 	paranoidentry do_nmi, 0, 0
@@ -1211,6 +1213,7 @@ KPROBE_END(nmi)
 
 KPROBE_ENTRY(int3)
  	INTR_FRAME
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
  	pushq $0
  	CFI_ADJUST_CFA_OFFSET 8
  	paranoidentry do_int3, DEBUG_STACK
@@ -1237,6 +1240,7 @@ END(coprocessor_segment_overrun)
 	/* runs on exception stack */
 ENTRY(double_fault)
 	XCPT_FRAME
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	paranoidentry do_double_fault
 	jmp paranoid_exit1
 	CFI_ENDPROC
@@ -1253,6 +1257,7 @@ END(segment_not_present)
 	/* runs on exception stack */
 ENTRY(stack_segment)
 	XCPT_FRAME
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	paranoidentry do_stack_segment
 	jmp paranoid_exit1
 	CFI_ENDPROC
@@ -1278,6 +1283,7 @@ END(spurious_interrupt_bug)
 	/* runs on exception stack */
 ENTRY(machine_check)
 	INTR_FRAME
+	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	pushq $0
 	CFI_ADJUST_CFA_OFFSET 8	
 	paranoidentry do_machine_check
-- 
cgit v1.2.3


From 74d4affde8feb8d5bdebf7fba8e90e4eae3b7b1d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 7 Jul 2008 12:07:50 -0700
Subject: x86/paravirt: add hooks for spinlock operations

Ticket spinlocks have absolutely ghastly worst-case performance
characteristics in a virtual environment.  If there is any contention
for physical CPUs (ie, there are more runnable vcpus than cpus), then
ticket locks can cause the system to end up spending 90+% of its time
spinning.

The problem is that (v)cpus waiting on a ticket spinlock will be
granted access to the lock in strict order they got their tickets.  If
the hypervisor scheduler doesn't give the vcpus time in that order,
they will burn timeslices waiting for the scheduler to give the right
vcpu some time.  In the worst case it could take O(n^2) vcpu scheduler
timeslices for everyone waiting on the lock to get it, not counting
new cpus trying to take the lock while the log-jam is sorted out.

These hooks allow a paravirt backend to replace the spinlock
implementation.

At the very least, this could revert the implementation back to the
old lock algorithm, which allows the next scheduled vcpu to take the
lock, and has basically fairly good performance.

It also allows the spinlocks to take advantages of the hypervisor
features to make locks more efficient (spin and block, for example).

The cost to native execution is an extra direct call when using a
spinlock function.  There's no overhead if CONFIG_PARAVIRT is turned
off.

The lock structure is fixed at a single "unsigned int", initialized to
zero, but the spinlock implementation can use it as it wishes.

Thanks to Thomas Friebel's Xen Summit talk "Preventing Guests from
Spinning Around" for pointing out this problem.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Christoph Lameter <clameter@linux-foundation.org>
Cc: Petr Tesarik <ptesarik@suse.cz>
Cc: Virtualization <virtualization@lists.linux-foundation.org>
Cc: Xen devel <xen-devel@lists.xensource.com>
Cc: Thomas Friebel <thomas.friebel@amd.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/paravirt.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 2963ab5d91e..f3381686870 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -124,6 +124,7 @@ static void *get_call_destination(u8 type)
 		.pv_irq_ops = pv_irq_ops,
 		.pv_apic_ops = pv_apic_ops,
 		.pv_mmu_ops = pv_mmu_ops,
+		.pv_lock_ops = pv_lock_ops,
 	};
 	return *((void **)&tmpl + type);
 }
@@ -450,6 +451,15 @@ struct pv_mmu_ops pv_mmu_ops = {
 	.set_fixmap = native_set_fixmap,
 };
 
+struct pv_lock_ops pv_lock_ops = {
+	.spin_is_locked = __ticket_spin_is_locked,
+	.spin_is_contended = __ticket_spin_is_contended,
+
+	.spin_lock = __ticket_spin_lock,
+	.spin_trylock = __ticket_spin_trylock,
+	.spin_unlock = __ticket_spin_unlock,
+};
+
 EXPORT_SYMBOL_GPL(pv_time_ops);
 EXPORT_SYMBOL    (pv_cpu_ops);
 EXPORT_SYMBOL    (pv_mmu_ops);
-- 
cgit v1.2.3


From 8efcbab674de2bee45a2e4cdf97de16b8e609ac8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 7 Jul 2008 12:07:51 -0700
Subject: paravirt: introduce a "lock-byte" spinlock implementation

Implement a version of the old spinlock algorithm, in which everyone
spins waiting for a lock byte.  In order to be compatible with the
ticket-lock's use of a zero initializer, this uses the convention of
'0' for unlocked and '1' for locked.

This algorithm is much better than ticket locks in a virtual
envionment, because it doesn't interact badly with the vcpu scheduler.
If there are multiple vcpus spinning on a lock and the lock is
released, the next vcpu to be scheduled will take the lock, rather
than cycling around until the next ticketed vcpu gets it.

To use this, you must call paravirt_use_bytelocks() very early, before
any spinlocks have been taken.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Christoph Lameter <clameter@linux-foundation.org>
Cc: Petr Tesarik <ptesarik@suse.cz>
Cc: Virtualization <virtualization@lists.linux-foundation.org>
Cc: Xen devel <xen-devel@lists.xensource.com>
Cc: Thomas Friebel <thomas.friebel@amd.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/paravirt.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index f3381686870..bba4041bb7f 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -268,6 +268,15 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 	return __get_cpu_var(paravirt_lazy_mode);
 }
 
+void __init paravirt_use_bytelocks(void)
+{
+	pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
+	pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
+	pv_lock_ops.spin_lock = __byte_spin_lock;
+	pv_lock_ops.spin_trylock = __byte_spin_trylock;
+	pv_lock_ops.spin_unlock = __byte_spin_unlock;
+}
+
 struct pv_info pv_info = {
 	.name = "bare hardware",
 	.paravirt_enabled = 0,
-- 
cgit v1.2.3


From 56397f8dadb40055479a8ffff23f21a890098a31 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 7 Jul 2008 12:07:52 -0700
Subject: xen: use lock-byte spinlock implementation

Switch to using the lock-byte spinlock implementation, to avoid the
worst of the performance hit from ticket locks.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Christoph Lameter <clameter@linux-foundation.org>
Cc: Petr Tesarik <ptesarik@suse.cz>
Cc: Virtualization <virtualization@lists.linux-foundation.org>
Cc: Xen devel <xen-devel@lists.xensource.com>
Cc: Thomas Friebel <thomas.friebel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/smp.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index f702199312a..a8ebafc09d4 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -430,4 +430,5 @@ void __init xen_smp_init(void)
 {
 	smp_ops = xen_smp_ops;
 	xen_fill_possible_map();
+	paravirt_use_bytelocks();
 }
-- 
cgit v1.2.3


From 2d9e1e2f58b5612aa4eab0ab54c84308a29dbd79 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 7 Jul 2008 12:07:53 -0700
Subject: xen: implement Xen-specific spinlocks

The standard ticket spinlocks are very expensive in a virtual
environment, because their performance depends on Xen's scheduler
giving vcpus time in the order that they're supposed to take the
spinlock.

This implements a Xen-specific spinlock, which should be much more
efficient.

The fast-path is essentially the old Linux-x86 locks, using a single
lock byte.  The locker decrements the byte; if the result is 0, then
they have the lock.  If the lock is negative, then locker must spin
until the lock is positive again.

When there's contention, the locker spin for 2^16[*] iterations waiting
to get the lock.  If it fails to get the lock in that time, it adds
itself to the contention count in the lock and blocks on a per-cpu
event channel.

When unlocking the spinlock, the locker looks to see if there's anyone
blocked waiting for the lock by checking for a non-zero waiter count.
If there's a waiter, it traverses the per-cpu "lock_spinners"
variable, which contains which lock each CPU is waiting on.  It picks
one CPU waiting on the lock and sends it an event to wake it up.

This allows efficient fast-path spinlock operation, while allowing
spinning vcpus to give up their processor time while waiting for a
contended lock.

[*] 2^16 iterations is threshold at which 98% locks have been taken
according to Thomas Friebel's Xen Summit talk "Preventing Guests from
Spinning Around".  Therefore, we'd expect the lock and unlock slow
paths will only be entered 2% of the time.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Christoph Lameter <clameter@linux-foundation.org>
Cc: Petr Tesarik <ptesarik@suse.cz>
Cc: Virtualization <virtualization@lists.linux-foundation.org>
Cc: Xen devel <xen-devel@lists.xensource.com>
Cc: Thomas Friebel <thomas.friebel@amd.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/smp.c | 172 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 171 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index a8ebafc09d4..e693812ac59 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -15,6 +15,7 @@
  * This does not handle HOTPLUG_CPU yet.
  */
 #include <linux/sched.h>
+#include <linux/kernel_stat.h>
 #include <linux/err.h>
 #include <linux/smp.h>
 
@@ -35,6 +36,8 @@
 #include "xen-ops.h"
 #include "mmu.h"
 
+static void __cpuinit xen_init_lock_cpu(int cpu);
+
 cpumask_t xen_cpu_initialized_map;
 
 static DEFINE_PER_CPU(int, resched_irq);
@@ -179,6 +182,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned cpu;
 
+	xen_init_lock_cpu(0);
+
 	smp_store_cpu_info(0);
 	cpu_data(0).x86_max_cores = 1;
 	set_cpu_sibling_map(0);
@@ -301,6 +306,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
 	clear_tsk_thread_flag(idle, TIF_FORK);
 #endif
 	xen_setup_timer(cpu);
+	xen_init_lock_cpu(cpu);
 
 	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
 
@@ -413,6 +419,170 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+struct xen_spinlock {
+	unsigned char lock;		/* 0 -> free; 1 -> locked */
+	unsigned short spinners;	/* count of waiting cpus */
+};
+
+static int xen_spin_is_locked(struct raw_spinlock *lock)
+{
+	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+
+	return xl->lock != 0;
+}
+
+static int xen_spin_is_contended(struct raw_spinlock *lock)
+{
+	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+
+	/* Not strictly true; this is only the count of contended
+	   lock-takers entering the slow path. */
+	return xl->spinners != 0;
+}
+
+static int xen_spin_trylock(struct raw_spinlock *lock)
+{
+	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+	u8 old = 1;
+
+	asm("xchgb %b0,%1"
+	    : "+q" (old), "+m" (xl->lock) : : "memory");
+
+	return old == 0;
+}
+
+static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
+static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
+
+static inline void spinning_lock(struct xen_spinlock *xl)
+{
+	__get_cpu_var(lock_spinners) = xl;
+	wmb();			/* set lock of interest before count */
+	asm(LOCK_PREFIX " incw %0"
+	    : "+m" (xl->spinners) : : "memory");
+}
+
+static inline void unspinning_lock(struct xen_spinlock *xl)
+{
+	asm(LOCK_PREFIX " decw %0"
+	    : "+m" (xl->spinners) : : "memory");
+	wmb();			/* decrement count before clearing lock */
+	__get_cpu_var(lock_spinners) = NULL;
+}
+
+static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
+{
+	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+	int irq = __get_cpu_var(lock_kicker_irq);
+	int ret;
+
+	/* If kicker interrupts not initialized yet, just spin */
+	if (irq == -1)
+		return 0;
+
+	/* announce we're spinning */
+	spinning_lock(xl);
+
+	/* clear pending */
+	xen_clear_irq_pending(irq);
+
+	/* check again make sure it didn't become free while
+	   we weren't looking  */
+	ret = xen_spin_trylock(lock);
+	if (ret)
+		goto out;
+
+	/* block until irq becomes pending */
+	xen_poll_irq(irq);
+	kstat_this_cpu.irqs[irq]++;
+
+out:
+	unspinning_lock(xl);
+	return ret;
+}
+
+static void xen_spin_lock(struct raw_spinlock *lock)
+{
+	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+	int timeout;
+	u8 oldval;
+
+	do {
+		timeout = 1 << 10;
+
+		asm("1: xchgb %1,%0\n"
+		    "   testb %1,%1\n"
+		    "   jz 3f\n"
+		    "2: rep;nop\n"
+		    "   cmpb $0,%0\n"
+		    "   je 1b\n"
+		    "   dec %2\n"
+		    "   jnz 2b\n"
+		    "3:\n"
+		    : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
+		    : "1" (1)
+		    : "memory");
+
+	} while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock)));
+}
+
+static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		/* XXX should mix up next cpu selection */
+		if (per_cpu(lock_spinners, cpu) == xl) {
+			xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
+			break;
+		}
+	}
+}
+
+static void xen_spin_unlock(struct raw_spinlock *lock)
+{
+	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+
+	smp_wmb();		/* make sure no writes get moved after unlock */
+	xl->lock = 0;		/* release lock */
+
+	/* make sure unlock happens before kick */
+	barrier();
+
+	if (unlikely(xl->spinners))
+		xen_spin_unlock_slow(xl);
+}
+
+static __cpuinit void xen_init_lock_cpu(int cpu)
+{
+	int irq;
+	const char *name;
+
+	name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
+	irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
+				     cpu,
+				     xen_reschedule_interrupt,
+				     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+				     name,
+				     NULL);
+
+	if (irq >= 0) {
+		disable_irq(irq); /* make sure it's never delivered */
+		per_cpu(lock_kicker_irq, cpu) = irq;
+	}
+
+	printk("cpu %d spinlock event irq %d\n", cpu, irq);
+}
+
+static void __init xen_init_spinlocks(void)
+{
+	pv_lock_ops.spin_is_locked = xen_spin_is_locked;
+	pv_lock_ops.spin_is_contended = xen_spin_is_contended;
+	pv_lock_ops.spin_lock = xen_spin_lock;
+	pv_lock_ops.spin_trylock = xen_spin_trylock;
+	pv_lock_ops.spin_unlock = xen_spin_unlock;
+}
+
 static const struct smp_ops xen_smp_ops __initdata = {
 	.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
 	.smp_prepare_cpus = xen_smp_prepare_cpus,
@@ -430,5 +600,5 @@ void __init xen_smp_init(void)
 {
 	smp_ops = xen_smp_ops;
 	xen_fill_possible_map();
-	paravirt_use_bytelocks();
+	xen_init_spinlocks();
 }
-- 
cgit v1.2.3


From 4bb689eee12ceb6d669a0c9a519037c049a8af38 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 9 Jul 2008 14:33:33 +0200
Subject: x86: paravirt spinlocks, !CONFIG_SMP build fixes

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/paravirt.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index bba4041bb7f..6aa8aed06d5 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -270,11 +270,13 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 
 void __init paravirt_use_bytelocks(void)
 {
+#ifdef CONFIG_SMP
 	pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
 	pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
 	pv_lock_ops.spin_lock = __byte_spin_lock;
 	pv_lock_ops.spin_trylock = __byte_spin_trylock;
 	pv_lock_ops.spin_unlock = __byte_spin_unlock;
+#endif
 }
 
 struct pv_info pv_info = {
@@ -461,12 +463,14 @@ struct pv_mmu_ops pv_mmu_ops = {
 };
 
 struct pv_lock_ops pv_lock_ops = {
+#ifdef CONFIG_SMP
 	.spin_is_locked = __ticket_spin_is_locked,
 	.spin_is_contended = __ticket_spin_is_contended,
 
 	.spin_lock = __ticket_spin_lock,
 	.spin_trylock = __ticket_spin_trylock,
 	.spin_unlock = __ticket_spin_unlock,
+#endif
 };
 
 EXPORT_SYMBOL_GPL(pv_time_ops);
-- 
cgit v1.2.3


From 9af98578d6af588f52d0dacd64fe42caa405a327 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 9 Jul 2008 14:39:15 +0200
Subject: x86: paravirt spinlocks, modular build fix

fix:

  MODPOST 408 modules
ERROR: "pv_lock_ops" [net/dccp/dccp.ko] undefined!
ERROR: "pv_lock_ops" [fs/jbd2/jbd2.ko] undefined!
ERROR: "pv_lock_ops" [drivers/media/common/saa7146_vv.ko] undefined!

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/paravirt.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 6aa8aed06d5..3edfd7af22a 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -472,6 +472,7 @@ struct pv_lock_ops pv_lock_ops = {
 	.spin_unlock = __ticket_spin_unlock,
 #endif
 };
+EXPORT_SYMBOL_GPL(pv_lock_ops);
 
 EXPORT_SYMBOL_GPL(pv_time_ops);
 EXPORT_SYMBOL    (pv_cpu_ops);
-- 
cgit v1.2.3


From 34646bca474142e1424e5f6c4a33cb2ba0930ea1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 9 Jul 2008 15:42:09 +0200
Subject: x86, paravirt-spinlocks: fix boot hang

the paravirt-spinlock patches caused a boot hang with this config:

 http://redhat.com/~mingo/misc/config-Wed_Jul__9_14_47_04_CEST_2008.bad

i have bisected it down to:

|  commit e17b58c2e85bc2ad2afc07fb8d898017c2b75ed1
|  Author: Jeremy Fitzhardinge <jeremy@goop.org>
|  Date:   Mon Jul 7 12:07:53 2008 -0700
|
|      xen: implement Xen-specific spinlocks

i.e. applying that patch alone causes the hang. The hang happens in the
ftrace self-test:

  initcall utsname_sysctl_init+0x0/0x19 returned 0 after 0 msecs
  calling  init_sched_switch_trace+0x0/0x4c
  Testing tracer sched_switch: PASSED
  initcall init_sched_switch_trace+0x0/0x4c returned 0 after 167 msecs
  calling  init_function_trace+0x0/0x12
  Testing tracer ftrace:
  [hard hang]

it should have continued like this:

  Testing tracer ftrace: PASSED
  initcall init_function_trace+0x0/0x12 returned 0 after 198 msecs
  calling  init_irqsoff_tracer+0x0/0x14
  Testing tracer irqsoff: PASSED
  initcall init_irqsoff_tracer+0x0/0x14 returned 0 after 3 msecs
  calling  init_mmio_trace+0x0/0x12
  initcall init_mmio_trace+0x0/0x12 returned 0 after 0 msecs

the problem is that such lowlevel primitives as spinlocks should never
be built with -pg (which ftrace does). Marking paravirt.o as non-pg and
marking all spinlock ops as always-inline solve the hang.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5112c84f542..78d52171400 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -7,10 +7,11 @@ extra-y                := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu
 CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
 
 ifdef CONFIG_FTRACE
-# Do not profile debug utilities
+# Do not profile debug and lowlevel utilities
 CFLAGS_REMOVE_tsc_64.o = -pg
 CFLAGS_REMOVE_tsc_32.o = -pg
 CFLAGS_REMOVE_rtc.o = -pg
+CFLAGS_REMOVE_paravirt.o = -pg
 endif
 
 #
-- 
cgit v1.2.3


From 6718d0d6da2749d3bff522e6057e97e6aa85e4d1 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 9 Jul 2008 01:07:02 -0700
Subject: x86 ptrace: block-step fix

The enable_single_step() logic bails out early if TF is already set.
That skips some of the bookkeeping that keeps things straight.
This makes PTRACE_SINGLEBLOCK break the behavior of a user task
that was already setting TF itself in user mode.

Fix the bookkeeping to notice the old TF setting as it should.

Test case at: http://sources.redhat.com/cgi-bin/cvsweb.cgi/~checkout~/tests/ptrace-tests/tests/step-jump-cont-strict.c?cvsroot=systemtap

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86/kernel/step.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 92c20fee678..0d2cb363ea7 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -105,6 +105,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
 static int enable_single_step(struct task_struct *child)
 {
 	struct pt_regs *regs = task_pt_regs(child);
+	unsigned long oflags;
 
 	/*
 	 * Always set TIF_SINGLESTEP - this guarantees that
@@ -113,11 +114,7 @@ static int enable_single_step(struct task_struct *child)
 	 */
 	set_tsk_thread_flag(child, TIF_SINGLESTEP);
 
-	/*
-	 * If TF was already set, don't do anything else
-	 */
-	if (regs->flags & X86_EFLAGS_TF)
-		return 0;
+	oflags = regs->flags;
 
 	/* Set TF on the kernel stack.. */
 	regs->flags |= X86_EFLAGS_TF;
@@ -126,9 +123,22 @@ static int enable_single_step(struct task_struct *child)
 	 * ..but if TF is changed by the instruction we will trace,
 	 * don't mark it as being "us" that set it, so that we
 	 * won't clear it by hand later.
+	 *
+	 * Note that if we don't actually execute the popf because
+	 * of a signal arriving right now or suchlike, we will lose
+	 * track of the fact that it really was "us" that set it.
 	 */
-	if (is_setting_trap_flag(child, regs))
+	if (is_setting_trap_flag(child, regs)) {
+		clear_tsk_thread_flag(child, TIF_FORCED_TF);
 		return 0;
+	}
+
+	/*
+	 * If TF was already set, check whether it was us who set it.
+	 * If not, we should never attempt a block step.
+	 */
+	if (oflags & X86_EFLAGS_TF)
+		return test_tsk_thread_flag(child, TIF_FORCED_TF);
 
 	set_tsk_thread_flag(child, TIF_FORCED_TF);
 
-- 
cgit v1.2.3


From 64f097331928b01d704047c1dbc738bb6d2a9bf9 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 9 Jul 2008 01:33:14 -0700
Subject: x86 ptrace: unify TIF_SINGLESTEP

This unifies the treatment of TIF_SINGLESTEP on i386 and x86_64.
The bit is now excluded from _TIF_WORK_MASK on i386 as it has been
on x86_64.  This means the do_notify_resume() path using it is never
used, so TIF_SINGLESTEP is not cleared on returning to user mode.

Both now leave TIF_SINGLESTEP set when returning to user, so that
it's already set on an int $0x80 system call entry.  This removes
the need for testing TF on the system_call path.  Doing it this way
fixes the regression for PTRACE_SINGLESTEP into a sigreturn syscall,
introduced by commit 1e2e99f0e4aa6363e8515ed17011c210c8f1b52a.

The clear_TF_reenable case that sets TIF_SINGLESTEP can only happen
on a non-exception kernel entry, i.e. sysenter/syscall instruction.
That will always get to the syscall exit tracing path.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86/kernel/entry_32.S  | 4 ----
 arch/x86/kernel/signal_32.c | 6 ------
 arch/x86/kernel/signal_64.c | 6 ------
 3 files changed, 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 6bc07f0f120..0ad987d02b7 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -383,10 +383,6 @@ syscall_exit:
 					# setting need_resched or sigpending
 					# between sampling and the iret
 	TRACE_IRQS_OFF
-	testl $X86_EFLAGS_TF,PT_EFLAGS(%esp)	# If tracing set singlestep flag on exit
-	jz no_singlestep
-	orl $_TIF_SINGLESTEP,TI_flags(%ebp)
-no_singlestep:
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx	# current->work
 	jne syscall_exit_work
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index d9237363096..295b5f5c938 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -657,12 +657,6 @@ static void do_signal(struct pt_regs *regs)
 void
 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
-	/* Pending single-step? */
-	if (thread_info_flags & _TIF_SINGLESTEP) {
-		regs->flags |= X86_EFLAGS_TF;
-		clear_thread_flag(TIF_SINGLESTEP);
-	}
-
 	/* deal with pending signal delivery */
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(regs);
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index e53b267662e..bf87684474f 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -487,12 +487,6 @@ static void do_signal(struct pt_regs *regs)
 void do_notify_resume(struct pt_regs *regs, void *unused,
 		      __u32 thread_info_flags)
 {
-	/* Pending single-step? */
-	if (thread_info_flags & _TIF_SINGLESTEP) {
-		regs->flags |= X86_EFLAGS_TF;
-		clear_thread_flag(TIF_SINGLESTEP);
-	}
-
 #ifdef CONFIG_X86_MCE
 	/* notify userspace of pending MCEs */
 	if (thread_info_flags & _TIF_MCE_NOTIFY)
-- 
cgit v1.2.3


From d4d67150165df8bf1cc05e532f6efca96f907cab Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 9 Jul 2008 02:38:07 -0700
Subject: x86 ptrace: unify syscall tracing

This unifies and cleans up the syscall tracing code on i386 and x86_64.

Using a single function for entry and exit tracing on 32-bit made the
do_syscall_trace() into some terrible spaghetti.  The logic is clear and
simple using separate syscall_trace_enter() and syscall_trace_leave()
functions as on 64-bit.

The unification adds PTRACE_SYSEMU and PTRACE_SYSEMU_SINGLESTEP support
on x86_64, for 32-bit ptrace() callers and for 64-bit ptrace() callers
tracing either 32-bit or 64-bit tasks.  It behaves just like 32-bit.

Changing syscall_trace_enter() to return the syscall number shortens
all the assembly paths, while adding the SYSEMU feature in a simple way.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86/ia32/ia32entry.S  |  17 +++---
 arch/x86/kernel/entry_32.S |  19 +++---
 arch/x86/kernel/entry_64.S |  14 +++--
 arch/x86/kernel/ptrace.c   | 141 +++++++++++++++------------------------------
 4 files changed, 71 insertions(+), 120 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 20371d0635e..8796d190525 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -37,6 +37,11 @@
 	movq	%rax,R8(%rsp)
 	.endm
 
+	/*
+	 * Reload arg registers from stack in case ptrace changed them.
+	 * We don't reload %eax because syscall_trace_enter() returned
+	 * the value it wants us to use in the table lookup.
+	 */
 	.macro LOAD_ARGS32 offset
 	movl \offset(%rsp),%r11d
 	movl \offset+8(%rsp),%r10d
@@ -46,7 +51,6 @@
 	movl \offset+48(%rsp),%edx
 	movl \offset+56(%rsp),%esi
 	movl \offset+64(%rsp),%edi
-	movl \offset+72(%rsp),%eax
 	.endm
 	
 	.macro CFI_STARTPROC32 simple
@@ -137,13 +141,12 @@ ENTRY(ia32_sysenter_target)
  	.previous	
 	GET_THREAD_INFO(%r10)
 	orl    $TS_COMPAT,TI_status(%r10)
-	testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
-		 TI_flags(%r10)
+	testl  $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
 	CFI_REMEMBER_STATE
 	jnz  sysenter_tracesys
-sysenter_do_call:	
 	cmpl	$(IA32_NR_syscalls-1),%eax
 	ja	ia32_badsys
+sysenter_do_call:
 	IA32_ARG_FIXUP 1
 	call	*ia32_sys_call_table(,%rax,8)
 	movq	%rax,RAX-ARGOFFSET(%rsp)
@@ -242,8 +245,7 @@ ENTRY(ia32_cstar_target)
 	.previous	
 	GET_THREAD_INFO(%r10)
 	orl   $TS_COMPAT,TI_status(%r10)
-	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
-		TI_flags(%r10)
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
 	CFI_REMEMBER_STATE
 	jnz   cstar_tracesys
 cstar_do_call:	
@@ -336,8 +338,7 @@ ENTRY(ia32_syscall)
 	SAVE_ARGS 0,0,1
 	GET_THREAD_INFO(%r10)
 	orl   $TS_COMPAT,TI_status(%r10)
-	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
-		TI_flags(%r10)
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
 	jnz ia32_tracesys
 ia32_do_syscall:	
 	cmpl $(IA32_NR_syscalls-1),%eax
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 0ad987d02b7..cadf73f70d3 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -332,7 +332,7 @@ sysenter_past_esp:
 	GET_THREAD_INFO(%ebp)
 
 	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
-	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+	testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 	jnz syscall_trace_entry
 	cmpl $(nr_syscalls), %eax
 	jae syscall_badsys
@@ -370,7 +370,7 @@ ENTRY(system_call)
 	GET_THREAD_INFO(%ebp)
 					# system call tracing in operation / emulation
 	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
-	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+	testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 	jnz syscall_trace_entry
 	cmpl $(nr_syscalls), %eax
 	jae syscall_badsys
@@ -510,12 +510,8 @@ END(work_pending)
 syscall_trace_entry:
 	movl $-ENOSYS,PT_EAX(%esp)
 	movl %esp, %eax
-	xorl %edx,%edx
-	call do_syscall_trace
-	cmpl $0, %eax
-	jne resume_userspace		# ret != 0 -> running under PTRACE_SYSEMU,
-					# so must skip actual syscall
-	movl PT_ORIG_EAX(%esp), %eax
+	call syscall_trace_enter
+	/* What it returned is what we'll actually use.  */
 	cmpl $(nr_syscalls), %eax
 	jnae syscall_call
 	jmp syscall_exit
@@ -524,14 +520,13 @@ END(syscall_trace_entry)
 	# perform syscall exit tracing
 	ALIGN
 syscall_exit_work:
-	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
+	testb $_TIF_WORK_SYSCALL_EXIT, %cl
 	jz work_pending
 	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_ANY)	# could let do_syscall_trace() call
+	ENABLE_INTERRUPTS(CLBR_ANY)	# could let syscall_trace_leave() call
 					# schedule() instead
 	movl %esp, %eax
-	movl $1, %edx
-	call do_syscall_trace
+	call syscall_trace_leave
 	jmp resume_userspace
 END(syscall_exit_work)
 	CFI_ENDPROC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index ae63e584c34..63001c6ecf6 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -349,8 +349,7 @@ ENTRY(system_call_after_swapgs)
 	movq  %rcx,RIP-ARGOFFSET(%rsp)
 	CFI_REL_OFFSET rip,RIP-ARGOFFSET
 	GET_THREAD_INFO(%rcx)
-	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
-		TI_flags(%rcx)
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
 	jnz tracesys
 	cmpq $__NR_syscall_max,%rax
 	ja badsys
@@ -430,7 +429,12 @@ tracesys:
 	FIXUP_TOP_OF_STACK %rdi
 	movq %rsp,%rdi
 	call syscall_trace_enter
-	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
+	/*
+	 * Reload arg registers from stack in case ptrace changed them.
+	 * We don't reload %rax because syscall_trace_enter() returned
+	 * the value it wants us to use in the table lookup.
+	 */
+	LOAD_ARGS ARGOFFSET, 1
 	RESTORE_REST
 	cmpq $__NR_syscall_max,%rax
 	ja   int_ret_from_sys_call	/* RAX(%rsp) set to -ENOSYS above */
@@ -483,7 +487,7 @@ int_very_careful:
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	SAVE_REST
 	/* Check for syscall exit trace */	
-	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
+	testl $_TIF_WORK_SYSCALL_EXIT,%edx
 	jz int_signal
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET 8
@@ -491,7 +495,7 @@ int_very_careful:
 	call syscall_trace_leave
 	popq %rdi
 	CFI_ADJUST_CFA_OFFSET -8
-	andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
+	andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
 	jmp int_restore_rest
 	
 int_signal:
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 77040b6070e..34e77b16a42 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1357,8 +1357,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 #endif
 }
 
-#ifdef CONFIG_X86_32
-
 void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
 {
 	struct siginfo info;
@@ -1377,89 +1375,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
 	force_sig_info(SIGTRAP, &info, tsk);
 }
 
-/* notification of system call entry/exit
- * - triggered by current->work.syscall_trace
- */
-int do_syscall_trace(struct pt_regs *regs, int entryexit)
-{
-	int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
-	/*
-	 * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
-	 * interception
-	 */
-	int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
-	int ret = 0;
-
-	/* do the secure computing check first */
-	if (!entryexit)
-		secure_computing(regs->orig_ax);
-
-	if (unlikely(current->audit_context)) {
-		if (entryexit)
-			audit_syscall_exit(AUDITSC_RESULT(regs->ax),
-						regs->ax);
-		/* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
-		 * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
-		 * not used, entry.S will call us only on syscall exit, not
-		 * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
-		 * calling send_sigtrap() on syscall entry.
-		 *
-		 * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
-		 * is_singlestep is false, despite his name, so we will still do
-		 * the correct thing.
-		 */
-		else if (is_singlestep)
-			goto out;
-	}
-
-	if (!(current->ptrace & PT_PTRACED))
-		goto out;
-
-	/* If a process stops on the 1st tracepoint with SYSCALL_TRACE
-	 * and then is resumed with SYSEMU_SINGLESTEP, it will come in
-	 * here. We have to check this and return */
-	if (is_sysemu && entryexit)
-		return 0;
-
-	/* Fake a debug trap */
-	if (is_singlestep)
-		send_sigtrap(current, regs, 0);
-
- 	if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
-		goto out;
-
-	/* the 0x80 provides a way for the tracing parent to distinguish
-	   between a syscall stop and SIGTRAP delivery */
-	/* Note that the debugger could change the result of test_thread_flag!*/
-	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
-
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
-	ret = is_sysemu;
-out:
-	if (unlikely(current->audit_context) && !entryexit)
-		audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax,
-				    regs->bx, regs->cx, regs->dx, regs->si);
-	if (ret == 0)
-		return 0;
-
-	regs->orig_ax = -1; /* force skip of syscall restarting */
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
-	return 1;
-}
-
-#else  /* CONFIG_X86_64 */
-
 static void syscall_trace(struct pt_regs *regs)
 {
+	if (!(current->ptrace & PT_PTRACED))
+		return;
 
 #if 0
 	printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
@@ -1481,39 +1400,71 @@ static void syscall_trace(struct pt_regs *regs)
 	}
 }
 
-asmlinkage void syscall_trace_enter(struct pt_regs *regs)
+#ifdef CONFIG_X86_32
+# define IS_IA32	1
+#elif defined CONFIG_IA32_EMULATION
+# define IS_IA32	test_thread_flag(TIF_IA32)
+#else
+# define IS_IA32	0
+#endif
+
+/*
+ * We must return the syscall number to actually look up in the table.
+ * This can be -1L to skip running any syscall at all.
+ */
+asmregparm long syscall_trace_enter(struct pt_regs *regs)
 {
+	long ret = 0;
+
 	/* do the secure computing check first */
 	secure_computing(regs->orig_ax);
 
-	if (test_thread_flag(TIF_SYSCALL_TRACE)
-	    && (current->ptrace & PT_PTRACED))
+	if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
+		ret = -1L;
+
+	if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
 		syscall_trace(regs);
 
 	if (unlikely(current->audit_context)) {
-		if (test_thread_flag(TIF_IA32)) {
+		if (IS_IA32)
 			audit_syscall_entry(AUDIT_ARCH_I386,
 					    regs->orig_ax,
 					    regs->bx, regs->cx,
 					    regs->dx, regs->si);
-		} else {
+#ifdef CONFIG_X86_64
+		else
 			audit_syscall_entry(AUDIT_ARCH_X86_64,
 					    regs->orig_ax,
 					    regs->di, regs->si,
 					    regs->dx, regs->r10);
-		}
+#endif
 	}
+
+	return ret ?: regs->orig_ax;
 }
 
-asmlinkage void syscall_trace_leave(struct pt_regs *regs)
+asmregparm void syscall_trace_leave(struct pt_regs *regs)
 {
 	if (unlikely(current->audit_context))
 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
-	if ((test_thread_flag(TIF_SYSCALL_TRACE)
-	     || test_thread_flag(TIF_SINGLESTEP))
-	    && (current->ptrace & PT_PTRACED))
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		syscall_trace(regs);
-}
 
-#endif	/* CONFIG_X86_32 */
+	/*
+	 * If TIF_SYSCALL_EMU is set, we only get here because of
+	 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
+	 * We already reported this syscall instruction in
+	 * syscall_trace_enter(), so don't do any more now.
+	 */
+	if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
+		return;
+
+	/*
+	 * If we are single-stepping, synthesize a trap to follow the
+	 * system call instruction.
+	 */
+	if (test_thread_flag(TIF_SINGLESTEP) &&
+	    (current->ptrace & PT_PTRACED))
+		send_sigtrap(current, regs, 0);
+}
-- 
cgit v1.2.3


From 380fdd7585a4c2f41b48925eba85c0654b7b858b Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 9 Jul 2008 02:39:29 -0700
Subject: x86 ptrace: user-sets-TF nits

This closes some arcane holes in single-step handling that can arise
only when user programs set TF directly (via popf or sigreturn) and
then use vDSO (syscall/sysenter) system call entry.  In those entry
paths, the clear_TF_reenable case hits and we must check TIF_SINGLESTEP
to be sure our bookkeeping stays correct wrt the user's view of TF.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86/kernel/ptrace.c | 10 ++++++++++
 arch/x86/kernel/step.c   | 13 +++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 34e77b16a42..e37dccce85d 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1416,6 +1416,16 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
 {
 	long ret = 0;
 
+	/*
+	 * If we stepped into a sysenter/syscall insn, it trapped in
+	 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
+	 * If user-mode had set TF itself, then it's still clear from
+	 * do_debug() and we need to set it again to restore the user
+	 * state.  If we entered on the slow path, TF was already set.
+	 */
+	if (test_thread_flag(TIF_SINGLESTEP))
+		regs->flags |= X86_EFLAGS_TF;
+
 	/* do the secure computing check first */
 	secure_computing(regs->orig_ax);
 
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 0d2cb363ea7..e8b9863ef8c 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -107,6 +107,19 @@ static int enable_single_step(struct task_struct *child)
 	struct pt_regs *regs = task_pt_regs(child);
 	unsigned long oflags;
 
+	/*
+	 * If we stepped into a sysenter/syscall insn, it trapped in
+	 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
+	 * If user-mode had set TF itself, then it's still clear from
+	 * do_debug() and we need to set it again to restore the user
+	 * state so we don't wrongly set TIF_FORCED_TF below.
+	 * If enable_single_step() was used last and that is what
+	 * set TIF_SINGLESTEP, then both TF and TIF_FORCED_TF are
+	 * already set and our bookkeeping is fine.
+	 */
+	if (unlikely(test_tsk_thread_flag(child, TIF_SINGLESTEP)))
+		regs->flags |= X86_EFLAGS_TF;
+
 	/*
 	 * Always set TIF_SINGLESTEP - this guarantees that
 	 * we single-step system calls etc..  This will also
-- 
cgit v1.2.3


From fab3b58d3b242b5903f78d60d86803a8aecdf6de Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 17 Jul 2008 13:50:15 +0200
Subject: x86 reboot quirks: add Dell Precision WorkStation T5400

as reported in:

  "reboot=bios is mandatory on Dell T5400 server."
  http://bugzilla.kernel.org/show_bug.cgi?id=11108

add a DMI reboot quirk.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>
---
 arch/x86/kernel/reboot.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f8a62160e15..9dcf39c0297 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -177,6 +177,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"),
 		},
 	},
+	{	/* Handle problems with rebooting on Dell T5400's */
+		.callback = set_bios_reboot,
+		.ident = "Dell Precision T5400",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"),
+		},
+	},
 	{	/* Handle problems with rebooting on HP laptops */
 		.callback = set_bios_reboot,
 		.ident = "HP Compaq Laptop",
-- 
cgit v1.2.3


From 93a0886e2368eafb9df5e2021fb185195cee88b2 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 15 Jul 2008 13:43:42 -0700
Subject: x86, xen, power: fix up config dependencies on PM

Xen save/restore needs bits of code enabled by PM_SLEEP, and PM_SLEEP
depends on PM.  So make XEN_SAVE_RESTORE depend on PM and PM_SLEEP
depend on XEN_SAVE_RESTORE.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/Kconfig | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 20b49729bed..3815e425f47 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -23,3 +23,8 @@ config XEN_MAX_DOMAIN_MEMORY
          according to the maximum possible memory size of a Xen
          domain.  This array uses 1 page per gigabyte, so there's no
          need to be too stingy here.
+
+config XEN_SAVE_RESTORE
+       bool
+       depends on PM
+       default y
\ No newline at end of file
-- 
cgit v1.2.3


From 60192db82952ad56ef7bbc4a318e2041ca65ba7d Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Thu, 17 Jul 2008 11:11:17 -0700
Subject: [IA64] improper printk format in acpi-cpufreq

When dprintk is enabled the following warnings are generated:
arch/ia64/kernel/cpufreq/acpi-cpufreq.c: In function 'processor_set_pstate':
arch/ia64/kernel/cpufreq/acpi-cpufreq.c:54: warning: format '%x' expects type 'unsigned int', but argumen
t 3 has type 's64'
arch/ia64/kernel/cpufreq/acpi-cpufreq.c: In function 'processor_get_pstate':
arch/ia64/kernel/cpufreq/acpi-cpufreq.c:76: warning: format '%x' expects type 'unsigned int', but argumen
t 2 has type 's64'

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/cpufreq/acpi-cpufreq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
index b8498ea6206..7b435451b3d 100644
--- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
+++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
@@ -51,7 +51,7 @@ processor_set_pstate (
 	retval = ia64_pal_set_pstate((u64)value);
 
 	if (retval) {
-		dprintk("Failed to set freq to 0x%x, with error 0x%x\n",
+		dprintk("Failed to set freq to 0x%x, with error 0x%lx\n",
 		        value, retval);
 		return -ENODEV;
 	}
@@ -74,7 +74,7 @@ processor_get_pstate (
 
 	if (retval)
 		dprintk("Failed to get current freq with "
-		        "error 0x%x, idx 0x%x\n", retval, *value);
+			"error 0x%lx, idx 0x%x\n", retval, *value);
 
 	return (int)retval;
 }
-- 
cgit v1.2.3


From 740a8de0796dd12890b3c8ddcfabfcb528b78d40 Mon Sep 17 00:00:00 2001
From: "Akiyama, Nobuyuki" <akiyama.nobuyuk@jp.fujitsu.com>
Date: Thu, 17 Jul 2008 11:22:01 -0700
Subject:     [IA64] adding parameter check to module_free()

    module_free() refers the first parameter before checking.
    But it is called like below(in kernel/kprobes). The first parameter is always NULL.
This happens when many probe points(>1024) are set by kprobes.
I encountered this with using SystemTap. It can set many probes easily.

static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
{
...
    if (kip->nused == 0) {
	    hlist_del(&kip->hlist);
	    if (hlist_empty(&kprobe_insn_pages)) {
		...
	    } else {
		    module_free(NULL, kip->insns); //<<< 1st param always NULL
		    kfree(kip);
	    }
	    return 1;
    }
    return 0;
}

Signed-off-by: Akiyama, Nobuyuki <akiyama.nobuyuk@jp.fujitsu.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/module.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index e83e2ea3b3e..29aad349e0c 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -321,7 +321,8 @@ module_alloc (unsigned long size)
 void
 module_free (struct module *mod, void *module_region)
 {
-	if (mod->arch.init_unw_table && module_region == mod->module_init) {
+	if (mod && mod->arch.init_unw_table &&
+	    module_region == mod->module_init) {
 		unw_remove_unwind_table(mod->arch.init_unw_table);
 		mod->arch.init_unw_table = NULL;
 	}
-- 
cgit v1.2.3


From efc7508c9e29944fb3d9edf166d3d584557c33d1 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Wed, 16 Jul 2008 12:47:08 -0600
Subject: [IA64] Avoid overflowing ia64_cpu_to_sapicid in acpi_map_lsapic()

acpi_map_lsapic tries to stuff a long into ia64_cpu_to_sapicid[],
which can only hold ints, so let's fix that.

We need to update the signature of acpi_map_cpu2node() too.

Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/acpi.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 43687cc60df..5d1eb7ee2bf 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -774,7 +774,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
  */
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 static
-int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
+int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 {
 #ifdef CONFIG_ACPI_NUMA
 	int pxm_id;
@@ -854,8 +854,7 @@ int acpi_map_lsapic(acpi_handle handle, int *pcpu)
 	union acpi_object *obj;
 	struct acpi_madt_local_sapic *lsapic;
 	cpumask_t tmp_map;
-	long physid;
-	int cpu;
+	int cpu, physid;
 
 	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
 		return -EINVAL;
-- 
cgit v1.2.3


From 4fdf08b5bf8d449cc9897395895157c6ff8ddc41 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 17 Jul 2008 11:29:24 -0700
Subject: x86: unify and correct the GDT_ENTRY() macro

Merge the GDT_ENTRY() macro between arch/x86/boot/pm.c and
arch/x86/kernel/acpi/sleep.c and put the new one in
<asm-x86/segment.h>.

While we're at it, correct the bitmasks for the limit and flags.  The
new version relies on using ULL constants in order to cause type
promotion rather than explicit casts; this avoids having to include
<linux/types.h> in <asm-x86/segments.h>.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/boot/pm.c           |  6 ------
 arch/x86/kernel/acpi/sleep.c | 10 +---------
 2 files changed, 1 insertion(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
index 328956fdb59..85a1cd8a8ff 100644
--- a/arch/x86/boot/pm.c
+++ b/arch/x86/boot/pm.c
@@ -98,12 +98,6 @@ static void reset_coprocessor(void)
 /*
  * Set up the GDT
  */
-#define GDT_ENTRY(flags, base, limit)		\
-	(((u64)(base & 0xff000000) << 32) |	\
-	 ((u64)flags << 40) |			\
-	 ((u64)(limit & 0x00ff0000) << 32) |	\
-	 ((u64)(base & 0x00ffffff) << 16) |	\
-	 ((u64)(limit & 0x0000ffff)))
 
 struct gdt_ptr {
 	u16 len;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 868de3d5c39..a3ddad18aaa 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -9,6 +9,7 @@
 #include <linux/bootmem.h>
 #include <linux/dmi.h>
 #include <linux/cpumask.h>
+#include <asm/segment.h>
 
 #include "realmode/wakeup.h"
 #include "sleep.h"
@@ -23,15 +24,6 @@ static unsigned long acpi_realmode;
 static char temp_stack[10240];
 #endif
 
-/* XXX: this macro should move to asm-x86/segment.h and be shared with the
-   boot code... */
-#define GDT_ENTRY(flags, base, limit)		\
-	(((u64)(base & 0xff000000) << 32) |	\
-	 ((u64)flags << 40) |			\
-	 ((u64)(limit & 0x00ff0000) << 32) |	\
-	 ((u64)(base & 0x00ffffff) << 16) |	\
-	 ((u64)(limit & 0x0000ffff)))
-
 /**
  * acpi_save_state_mem - save kernel state
  *
-- 
cgit v1.2.3


From fb86611f8f3251865784d5938a485a0238ec1427 Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Thu, 26 Jun 2008 14:53:11 +0200
Subject: [IA64] Remove experimental status of kdump

This patch removes the experimental status of kdump on IA64. kdump is on IA64
now since more than one year and it has proven to be stable.

Signed-off-by: Bernhard Walle <bwalle@suse.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 18bcc10903b..451f2ffb137 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -540,8 +540,8 @@ config KEXEC
 	  strongly in flux, so no good recommendation can be made.
 
 config CRASH_DUMP
-	  bool "kernel crash dumps (EXPERIMENTAL)"
-	  depends on EXPERIMENTAL && IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+	  bool "kernel crash dumps"
+	  depends on IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
 	  help
 	    Generate crash dump after being started by kexec.
 
-- 
cgit v1.2.3


From 64d206d896ff70b828138577d5ff39deda5f1c4d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 18 Jul 2008 00:26:59 +0200
Subject: x86: rename CONFIG_NONPROMISC_DEVMEM to CONFIG_PROMISC_DEVMEM

Linus observed:

> The real bug is that we shouldn't have "double negatives", and
> certainly not negative config options. Making that "promiscuous
> /dev/mem" option a negated thing as a config option was bad.

right ... lets rename this option. There should never be a negation
in config options.

[ that reminds me of CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER, but that
  is for another commit ;-) ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.debug | 7 ++++---
 arch/x86/mm/pat.c      | 6 +++---
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index ae36bfa814e..f0cf5d99079 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -5,10 +5,11 @@ config TRACE_IRQFLAGS_SUPPORT
 
 source "lib/Kconfig.debug"
 
-config NONPROMISC_DEVMEM
-	bool "Filter access to /dev/mem"
+config PROMISC_DEVMEM
+	bool "Allow unlimited access to /dev/mem"
+	default y
 	help
-	  If this option is left off, you allow userspace access to all
+	  If this option is left on, you allow userspace (root) access to all
 	  of memory, including kernel and userspace memory. Accidental
 	  access to this is obviously disastrous, but specific access can
 	  be used by people debugging the kernel.
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index d4585077977..c34dc483839 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -373,8 +373,8 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 	return vma_prot;
 }
 
-#ifdef CONFIG_NONPROMISC_DEVMEM
-/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/
+#ifndef CONFIG_PROMISC_DEVMEM
+/* This check is done in drivers/char/mem.c in case of !PROMISC_DEVMEM*/
 static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 {
 	return 1;
@@ -398,7 +398,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 	}
 	return 1;
 }
-#endif /* CONFIG_NONPROMISC_DEVMEM */
+#endif /* CONFIG_PROMISC_DEVMEM */
 
 int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 				unsigned long size, pgprot_t *vma_prot)
-- 
cgit v1.2.3


From 6879827f4e08da219c99b91e4e1d793a924103e3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 18 Jul 2008 01:21:53 +0200
Subject: x86: remove arch/x86/kernel/smpcommon_32.c

Yinghai Lu noticed that arch/x86/kernel/smpcommon_32.c got
renamed to arch/x86/kernel/smpcommon.c but the old almost-empty
file stayed around. Zap it.

Reported-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/smpcommon_32.c | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 arch/x86/kernel/smpcommon_32.c

(limited to 'arch')

diff --git a/arch/x86/kernel/smpcommon_32.c b/arch/x86/kernel/smpcommon_32.c
deleted file mode 100644
index 8b137891791..00000000000
--- a/arch/x86/kernel/smpcommon_32.c
+++ /dev/null
@@ -1 +0,0 @@
-
-- 
cgit v1.2.3


From c61c65cdcd1021cfbd7be8685ff1cf4f86c68c44 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 5 Jun 2008 11:40:58 -0700
Subject: sparc/kernel/: possible cleanups

This patch contains the following possible cleanups:
- make the following needlessly global code static:
  - apc.c: apc_swift_idle()
  - ebus.c: ebus_blacklist_irq()
  - ebus.c: fill_ebus_child()
  - ebus.c: fill_ebus_device()
  - entry.S: syscall_is_too_hard
  - etra: tsetup_sun4c_stackchk
  - head.S: cputyp
  - head.S: prom_vector_p
  - idprom.c: Sun_Machines[]
  - ioport.c: _sparc_find_resource()
  - ioport.c: create_proc_read_entry()
  - irq.c: struct sparc_irq[]
  - rtrap.S: sun4c_rett_stackchk
  - setup.c: prom_sync_me()
  - setup.c: boot_flags
  - sun4c_irq.c: sun4c_sbint_to_irq()
  - sun4d_irq.c: sbus_tid[]
  - sun4d_irq.c: struct sbus_actions
  - sun4d_irq.c: sun4d_sbint_to_irq()
  - sun4m_irq.c: sun4m_sbint_to_irq()
  - sun4m_irq.c: sun4m_get_irqmask()
  - sun4m_irq.c: sun4m_timers
  - sun4m_smp.c: smp4m_cross_call()
  - sun4m_smp.c: smp4m_blackbox_id()
  - sun4m_smp.c: smp4m_blackbox_current()
  - time.c: sp_clock_typ
  - time.c: sbus_time_init()
  - traps.c: instruction_dump()
  - wof.S: spwin_sun4c_stackchk
  - wuf.S: sun4c_fwin_stackchk
- #if 0 the following unused code:
  - process.c: sparc_backtrace_lock
  - process.c: __show_backtrace()
  - process.c: show_backtrace()
  - process.c: smp_show_backtrace_all_cpus()
- remove the following unused code:
  - entry.S: __handle_exception
  - smp.c: smp_num_cpus
  - smp.c: smp_activated
  - smp.c: __cpu_number_map[]
  - smp.c: __cpu_logical_map[]
  - smp.c: bitops_spinlock
  - traps.c: trap_curbuf
  - traps.c: trapbuf[]
  - traps.c: linux_smp_still_initting
  - traps.c: thiscpus_tbr
  - traps.c: thiscpus_mid

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/apc.c       |  2 +-
 arch/sparc/kernel/ebus.c      |  9 +++++----
 arch/sparc/kernel/entry.S     |  4 +---
 arch/sparc/kernel/etrap.S     |  1 -
 arch/sparc/kernel/head.S      |  2 --
 arch/sparc/kernel/idprom.c    |  2 +-
 arch/sparc/kernel/ioport.c    | 13 +++++++------
 arch/sparc/kernel/irq.c       |  2 +-
 arch/sparc/kernel/process.c   |  3 ++-
 arch/sparc/kernel/rtrap.S     |  2 --
 arch/sparc/kernel/setup.c     |  4 ++--
 arch/sparc/kernel/smp.c       |  7 -------
 arch/sparc/kernel/sun4c_irq.c |  3 ++-
 arch/sparc/kernel/sun4d_irq.c |  7 ++++---
 arch/sparc/kernel/sun4m_irq.c |  7 ++++---
 arch/sparc/kernel/sun4m_smp.c |  9 +++++----
 arch/sparc/kernel/time.c      |  4 ++--
 arch/sparc/kernel/traps.c     |  9 +--------
 arch/sparc/kernel/wof.S       |  1 -
 arch/sparc/kernel/wuf.S       |  1 -
 20 files changed, 38 insertions(+), 54 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c
index 6707422c984..54f7ccd7455 100644
--- a/arch/sparc/kernel/apc.c
+++ b/arch/sparc/kernel/apc.c
@@ -56,7 +56,7 @@ __setup("apc=", apc_setup);
  * CPU idle callback function
  * See .../arch/sparc/kernel/process.c
  */
-void apc_swift_idle(void)
+static void apc_swift_idle(void)
 {
 #ifdef APC_DEBUG_LED
 	set_auxio(0x00, AUXIO_LED); 
diff --git a/arch/sparc/kernel/ebus.c b/arch/sparc/kernel/ebus.c
index 92c6fc07e59..97294232259 100644
--- a/arch/sparc/kernel/ebus.c
+++ b/arch/sparc/kernel/ebus.c
@@ -69,7 +69,7 @@ static inline unsigned long ebus_alloc(size_t size)
 
 /*
  */
-int __init ebus_blacklist_irq(const char *name)
+static int __init ebus_blacklist_irq(const char *name)
 {
 	struct ebus_device_irq *dp;
 
@@ -83,8 +83,8 @@ int __init ebus_blacklist_irq(const char *name)
 	return 0;
 }
 
-void __init fill_ebus_child(struct device_node *dp,
-			    struct linux_ebus_child *dev)
+static void __init fill_ebus_child(struct device_node *dp,
+				   struct linux_ebus_child *dev)
 {
 	const int *regs;
 	const int *irqs;
@@ -144,7 +144,8 @@ void __init fill_ebus_child(struct device_node *dp,
 	}
 }
 
-void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_device *dev)
+static void __init fill_ebus_device(struct device_node *dp,
+				    struct linux_ebus_device *dev)
 {
 	const struct linux_prom_registers *regs;
 	struct linux_ebus_child *child;
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 4bcfe54f878..55d3be1b5d8 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -1317,7 +1317,6 @@ linux_sparc_syscall:
 	bne	linux_fast_syscall
 	 /* Just do first insn from SAVE_ALL in the delay slot */
 
-	.globl	syscall_is_too_hard
 syscall_is_too_hard:
 	SAVE_ALL_HEAD
 	 rd	%wim, %l3
@@ -1544,8 +1543,7 @@ kgdb_trap_low:
 #endif
 
 	.align	4
-	.globl	__handle_exception, flush_patch_exception
-__handle_exception:
+	.globl	flush_patch_exception
 flush_patch_exception:
 	FLUSH_ALL_KERNEL_WINDOWS;
 	ldd	[%o0], %o6
diff --git a/arch/sparc/kernel/etrap.S b/arch/sparc/kernel/etrap.S
index f37d961d67a..e806fcdc46d 100644
--- a/arch/sparc/kernel/etrap.S
+++ b/arch/sparc/kernel/etrap.S
@@ -228,7 +228,6 @@ tsetup_mmu_patchme:
 	 */
 #define glob_tmp     g1
 
-	.globl	tsetup_sun4c_stackchk
 tsetup_sun4c_stackchk:
 	/* Done by caller: andcc %sp, 0x7, %g0 */
 	bne	trap_setup_user_stack_is_bolixed
diff --git a/arch/sparc/kernel/head.S b/arch/sparc/kernel/head.S
index 3bfd6085a91..50d9a16af79 100644
--- a/arch/sparc/kernel/head.S
+++ b/arch/sparc/kernel/head.S
@@ -32,7 +32,6 @@
  */
 
 	.align 4
-        .globl  cputyp
 cputyp:
         .word   1
 
@@ -1280,7 +1279,6 @@ halt_me:
  * gets initialized in c-code so all routines can use it.
  */
 
-	.globl	prom_vector_p
 prom_vector_p:
 		.word 0
 
diff --git a/arch/sparc/kernel/idprom.c b/arch/sparc/kernel/idprom.c
index 7220562cdb3..fc511f3c4c1 100644
--- a/arch/sparc/kernel/idprom.c
+++ b/arch/sparc/kernel/idprom.c
@@ -24,7 +24,7 @@ static struct idprom idprom_buffer;
  * of the Sparc CPU and have a meaningful IDPROM machtype value that we
  * know about.  See asm-sparc/machines.h for empirical constants.
  */
-struct Sun_Machine_Models Sun_Machines[NUM_SUN_MACHINES] = {
+static struct Sun_Machine_Models Sun_Machines[NUM_SUN_MACHINES] = {
 /* First, Sun4's */
 { "Sun 4/100 Series", (SM_SUN4 | SM_4_110) },
 { "Sun 4/200 Series", (SM_SUN4 | SM_4_260) },
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 7b17522f59b..487960919f1 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -49,13 +49,16 @@
 
 #define mmu_inval_dma_area(p, l)	/* Anton pulled it out for 2.4.0-xx */
 
-struct resource *_sparc_find_resource(struct resource *r, unsigned long);
+static struct resource *_sparc_find_resource(struct resource *r,
+					     unsigned long);
 
 static void __iomem *_sparc_ioremap(struct resource *res, u32 bus, u32 pa, int sz);
 static void __iomem *_sparc_alloc_io(unsigned int busno, unsigned long phys,
     unsigned long size, char *name);
 static void _sparc_free_io(struct resource *res);
 
+static void register_proc_sparc_ioport(void);
+
 /* This points to the next to use virtual memory for DVMA mappings */
 static struct resource _sparc_dvma = {
 	.name = "sparc_dvma", .start = DVMA_VADDR, .end = DVMA_END - 1
@@ -539,8 +542,6 @@ void __init sbus_setup_arch_props(struct sbus_bus *sbus, struct device_node *dp)
 
 int __init sbus_arch_preinit(void)
 {
-	extern void register_proc_sparc_ioport(void);
-
 	register_proc_sparc_ioport();
 
 #ifdef CONFIG_SUN4
@@ -853,8 +854,8 @@ _sparc_io_get_info(char *buf, char **start, off_t fpos, int length, int *eof,
  * XXX Too slow. Can have 8192 DVMA pages on sun4m in the worst case.
  * This probably warrants some sort of hashing.
  */
-struct resource *
-_sparc_find_resource(struct resource *root, unsigned long hit)
+static struct resource *_sparc_find_resource(struct resource *root,
+					     unsigned long hit)
 {
         struct resource *tmp;
 
@@ -865,7 +866,7 @@ _sparc_find_resource(struct resource *root, unsigned long hit)
 	return NULL;
 }
 
-void register_proc_sparc_ioport(void)
+static void register_proc_sparc_ioport(void)
 {
 #ifdef CONFIG_PROC_FS
 	create_proc_read_entry("io_map",0,NULL,_sparc_io_get_info,&sparc_iomap);
diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c
index 087390b092b..93e1d1c6529 100644
--- a/arch/sparc/kernel/irq.c
+++ b/arch/sparc/kernel/irq.c
@@ -154,7 +154,7 @@ void (*sparc_init_timers)(irq_handler_t ) =
 struct irqaction static_irqaction[MAX_STATIC_ALLOC];
 int static_irq_count;
 
-struct {
+static struct {
 	struct irqaction *action;
 	int flags;
 } sparc_irq[NR_IRQS];
diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c
index da48d248cc1..e18a5da025d 100644
--- a/arch/sparc/kernel/process.c
+++ b/arch/sparc/kernel/process.c
@@ -177,6 +177,8 @@ void machine_power_off(void)
 	machine_halt();
 }
 
+#if 0
+
 static DEFINE_SPINLOCK(sparc_backtrace_lock);
 
 void __show_backtrace(unsigned long fp)
@@ -228,7 +230,6 @@ void smp_show_backtrace_all_cpus(void)
 }
 #endif
 
-#if 0
 void show_stackframe(struct sparc_stackf *sf)
 {
 	unsigned long size;
diff --git a/arch/sparc/kernel/rtrap.S b/arch/sparc/kernel/rtrap.S
index ce30082ab26..891f460b7b9 100644
--- a/arch/sparc/kernel/rtrap.S
+++ b/arch/sparc/kernel/rtrap.S
@@ -224,8 +224,6 @@ ret_trap_user_stack_is_bolixed:
 	b	signal_p
 	 ld	[%curptr + TI_FLAGS], %g2
 
-
-	.globl	sun4c_rett_stackchk
 sun4c_rett_stackchk:
 	be	1f
 	 and	%fp, 0xfff, %g1		! delay slot
diff --git a/arch/sparc/kernel/setup.c b/arch/sparc/kernel/setup.c
index a0ea0bc6f47..9e451b21202 100644
--- a/arch/sparc/kernel/setup.c
+++ b/arch/sparc/kernel/setup.c
@@ -67,7 +67,7 @@ struct screen_info screen_info = {
 extern unsigned long trapbase;
 
 /* Pretty sick eh? */
-void prom_sync_me(void)
+static void prom_sync_me(void)
 {
 	unsigned long prom_tbr, flags;
 
@@ -97,7 +97,7 @@ void prom_sync_me(void)
 	return;
 }
 
-unsigned int boot_flags __initdata = 0;
+static unsigned int boot_flags __initdata = 0;
 #define BOOTME_DEBUG  0x1
 
 /* Exported for mm/init.c:paging_init. */
diff --git a/arch/sparc/kernel/smp.c b/arch/sparc/kernel/smp.c
index 6724ab90f82..1619ec15c09 100644
--- a/arch/sparc/kernel/smp.c
+++ b/arch/sparc/kernel/smp.c
@@ -35,13 +35,9 @@
 
 #include "irq.h"
 
-int smp_num_cpus = 1;
 volatile unsigned long cpu_callin_map[NR_CPUS] __initdata = {0,};
 unsigned char boot_cpu_id = 0;
 unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
-int smp_activated = 0;
-volatile int __cpu_number_map[NR_CPUS];
-volatile int __cpu_logical_map[NR_CPUS];
 
 cpumask_t cpu_online_map = CPU_MASK_NONE;
 cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
@@ -55,9 +51,6 @@ cpumask_t smp_commenced_mask = CPU_MASK_NONE;
  * instruction which is much better...
  */
 
-/* Used to make bitops atomic */
-unsigned char bitops_spinlock = 0;
-
 void __cpuinit smp_store_cpu_info(int id)
 {
 	int cpu_node;
diff --git a/arch/sparc/kernel/sun4c_irq.c b/arch/sparc/kernel/sun4c_irq.c
index c6ac9fc5256..340fc395fe2 100644
--- a/arch/sparc/kernel/sun4c_irq.c
+++ b/arch/sparc/kernel/sun4c_irq.c
@@ -68,7 +68,8 @@ unsigned char *interrupt_enable = NULL;
 
 static int sun4c_pil_map[] = { 0, 1, 2, 3, 5, 7, 8, 9 };
 
-unsigned int sun4c_sbint_to_irq(struct sbus_dev *sdev, unsigned int sbint)
+static unsigned int sun4c_sbint_to_irq(struct sbus_dev *sdev,
+				       unsigned int sbint)
 {
 	if (sbint >= sizeof(sun4c_pil_map)) {
 		printk(KERN_ERR "%s: bogus SBINT %d\n", sdev->prom_name, sbint);
diff --git a/arch/sparc/kernel/sun4d_irq.c b/arch/sparc/kernel/sun4d_irq.c
index 8ac5661caff..1290b5998f8 100644
--- a/arch/sparc/kernel/sun4d_irq.c
+++ b/arch/sparc/kernel/sun4d_irq.c
@@ -52,13 +52,13 @@ extern struct irqaction static_irqaction[MAX_STATIC_ALLOC];
 extern int static_irq_count;
 unsigned char cpu_leds[32];
 #ifdef CONFIG_SMP
-unsigned char sbus_tid[32];
+static unsigned char sbus_tid[32];
 #endif
 
 static struct irqaction *irq_action[NR_IRQS];
 extern spinlock_t irq_action_lock;
 
-struct sbus_action {
+static struct sbus_action {
 	struct irqaction *action;
 	/* For SMP this needs to be extended */
 } *sbus_actions;
@@ -267,7 +267,8 @@ unsigned int sun4d_build_irq(struct sbus_dev *sdev, int irq)
 		return irq;
 }
 
-unsigned int sun4d_sbint_to_irq(struct sbus_dev *sdev, unsigned int sbint)
+static unsigned int sun4d_sbint_to_irq(struct sbus_dev *sdev,
+				       unsigned int sbint)
 {
 	if (sbint >= sizeof(sbus_to_pil)) {
 		printk(KERN_ERR "%s: bogus SBINT %d\n", sdev->prom_name, sbint);
diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c
index b92d6d2d5b0..94e02de960e 100644
--- a/arch/sparc/kernel/sun4m_irq.c
+++ b/arch/sparc/kernel/sun4m_irq.c
@@ -154,7 +154,8 @@ static unsigned long irq_mask[] = {
 
 static int sun4m_pil_map[] = { 0, 2, 3, 5, 7, 9, 11, 13 };
 
-unsigned int sun4m_sbint_to_irq(struct sbus_dev *sdev, unsigned int sbint) 
+static unsigned int sun4m_sbint_to_irq(struct sbus_dev *sdev,
+				       unsigned int sbint)
 {
 	if (sbint >= sizeof(sun4m_pil_map)) {
 		printk(KERN_ERR "%s: bogus SBINT %d\n", sdev->prom_name, sbint);
@@ -163,7 +164,7 @@ unsigned int sun4m_sbint_to_irq(struct sbus_dev *sdev, unsigned int sbint)
 	return sun4m_pil_map[sbint] | 0x30;
 }
 
-inline unsigned long sun4m_get_irqmask(unsigned int irq)
+static unsigned long sun4m_get_irqmask(unsigned int irq)
 {
 	unsigned long mask;
     
@@ -281,7 +282,7 @@ static void sun4m_set_udt(int cpu)
 #define TIMER_IRQ  	(OBIO_INTR | 10)
 #define PROFILE_IRQ	(OBIO_INTR | 14)
 
-struct sun4m_timer_regs *sun4m_timers;
+static struct sun4m_timer_regs *sun4m_timers;
 unsigned int lvl14_resolution = (((1000000/HZ) + 1) << 10);
 
 static void sun4m_clear_clock_irq(void)
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index ffb875aacb7..406ac1abc83 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -244,8 +244,9 @@ static struct smp_funcall {
 static DEFINE_SPINLOCK(cross_call_lock);
 
 /* Cross calls must be serialized, at least currently. */
-void smp4m_cross_call(smpfunc_t func, unsigned long arg1, unsigned long arg2,
-		    unsigned long arg3, unsigned long arg4, unsigned long arg5)
+static void smp4m_cross_call(smpfunc_t func, unsigned long arg1,
+			     unsigned long arg2, unsigned long arg3,
+			     unsigned long arg4, unsigned long arg5)
 {
 		register int ncpus = SUN4M_NCPUS;
 		unsigned long flags;
@@ -344,7 +345,7 @@ static void __init smp_setup_percpu_timer(void)
 		enable_pil_irq(14);
 }
 
-void __init smp4m_blackbox_id(unsigned *addr)
+static void __init smp4m_blackbox_id(unsigned *addr)
 {
 	int rd = *addr & 0x3e000000;
 	int rs1 = rd >> 11;
@@ -354,7 +355,7 @@ void __init smp4m_blackbox_id(unsigned *addr)
 	addr[2] = 0x80082003 | rd | rs1;	/* and reg, 3, reg */
 }
 
-void __init smp4m_blackbox_current(unsigned *addr)
+static void __init smp4m_blackbox_current(unsigned *addr)
 {
 	int rd = *addr & 0x3e000000;
 	int rs1 = rd >> 11;
diff --git a/arch/sparc/kernel/time.c b/arch/sparc/kernel/time.c
index 53caacbb398..ab3dd0b257d 100644
--- a/arch/sparc/kernel/time.c
+++ b/arch/sparc/kernel/time.c
@@ -46,7 +46,7 @@
 #include "irq.h"
 
 DEFINE_SPINLOCK(rtc_lock);
-enum sparc_clock_type sp_clock_typ;
+static enum sparc_clock_type sp_clock_typ;
 DEFINE_SPINLOCK(mostek_lock);
 void __iomem *mstk48t02_regs = NULL;
 static struct mostek48t08 __iomem *mstk48t08_regs = NULL;
@@ -366,7 +366,7 @@ static int __init clock_init(void)
 fs_initcall(clock_init);
 #endif /* !CONFIG_SUN4 */
 
-void __init sbus_time_init(void)
+static void __init sbus_time_init(void)
 {
 
 	BTFIXUPSET_CALL(bus_do_settimeofday, sbus_do_settimeofday, BTFIXUPCALL_NORM);
diff --git a/arch/sparc/kernel/traps.c b/arch/sparc/kernel/traps.c
index 978e9d85949..ac8ee6ab133 100644
--- a/arch/sparc/kernel/traps.c
+++ b/arch/sparc/kernel/traps.c
@@ -33,9 +33,6 @@ struct trap_trace_entry {
 	unsigned long type;
 };
 
-int trap_curbuf = 0;
-struct trap_trace_entry trapbuf[1024];
-
 void syscall_trace_entry(struct pt_regs *regs)
 {
 	printk("%s[%d]: ", current->comm, task_pid_nr(current));
@@ -72,7 +69,7 @@ void sun4d_nmi(struct pt_regs *regs)
 	prom_halt();
 }
 
-void instruction_dump (unsigned long *pc)
+static void instruction_dump(unsigned long *pc)
 {
 	int i;
 	
@@ -479,10 +476,6 @@ void do_BUG(const char *file, int line)
 
 extern void sparc_cpu_startup(void);
 
-int linux_smp_still_initting;
-unsigned int thiscpus_tbr;
-int thiscpus_mid;
-
 void trap_init(void)
 {
 	extern void thread_info_offsets_are_bolixed_pete(void);
diff --git a/arch/sparc/kernel/wof.S b/arch/sparc/kernel/wof.S
index 4bce38dfe3c..3bbcd8dc9ab 100644
--- a/arch/sparc/kernel/wof.S
+++ b/arch/sparc/kernel/wof.S
@@ -306,7 +306,6 @@ spwin_bad_ustack_from_kernel:
  * As noted above %curptr cannot be touched by this routine at all.
  */
 
-	.globl	spwin_sun4c_stackchk
 spwin_sun4c_stackchk:
 	/* LOCATION: Window to be saved on the stack */
 
diff --git a/arch/sparc/kernel/wuf.S b/arch/sparc/kernel/wuf.S
index 82e5145b0f7..779ff750603 100644
--- a/arch/sparc/kernel/wuf.S
+++ b/arch/sparc/kernel/wuf.S
@@ -243,7 +243,6 @@ fwin_user_finish_up:
 	 */
 
 	.align	4
-	.globl	sun4c_fwin_stackchk
 sun4c_fwin_stackchk:
 	/* LOCATION: Window 'W' */
 
-- 
cgit v1.2.3


From 50215d6511265d46ba14038640b16c5dd7731ff4 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 5 Jun 2008 11:41:51 -0700
Subject: sparc/mm/: possible cleanups

This patch contains the following possible cleanups:
- make the following needlessly global code static:
  - fault.c: force_user_fault()
  - init.c: calc_max_low_pfn()
  - init.c: pgt_cache_water[]
  - init.c: map_high_region()
  - srmmu.c: hwbug_bitmask
  - srmmu.c: srmmu_swapper_pg_dir
  - srmmu.c: srmmu_context_table
  - srmmu.c: is_hypersparc
  - srmmu.c: srmmu_cache_pagetables
  - srmmu.c: srmmu_nocache_size
  - srmmu.c: srmmu_nocache_end
  - srmmu.c: srmmu_get_nocache()
  - srmmu.c: srmmu_free_nocache()
  - srmmu.c: srmmu_early_allocate_ptable_skeleton()
  - srmmu.c: srmmu_nocache_calcsize()
  - srmmu.c: srmmu_nocache_init()
  - srmmu.c: srmmu_alloc_thread_info()
  - srmmu.c: early_pgtable_allocfail()
  - srmmu.c: srmmu_early_allocate_ptable_skeleton()
  - srmmu.c: srmmu_allocate_ptable_skeleton()
  - srmmu.c: srmmu_inherit_prom_mappings()
  - sunami.S: tsunami_copy_1page
- remove the following unused code:
  - init.c: struct sparc_aliases

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/fault.c   |  2 +-
 arch/sparc/mm/init.c    |  8 +++-----
 arch/sparc/mm/srmmu.c   | 38 +++++++++++++++++++++-----------------
 arch/sparc/mm/tsunami.S |  1 -
 4 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/mm/fault.c b/arch/sparc/mm/fault.c
index 0a3cd8f6cfe..3604c2e8670 100644
--- a/arch/sparc/mm/fault.c
+++ b/arch/sparc/mm/fault.c
@@ -451,7 +451,7 @@ asmlinkage void do_sun4c_fault(struct pt_regs *regs, int text_fault, int write,
 }
 
 /* This always deals with user addresses. */
-inline void force_user_fault(unsigned long address, int write)
+static void force_user_fault(unsigned long address, int write)
 {
 	struct vm_area_struct *vma;
 	struct task_struct *tsk = current;
diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c
index 7794ecb896e..8f94a2d62f1 100644
--- a/arch/sparc/mm/init.c
+++ b/arch/sparc/mm/init.c
@@ -128,7 +128,7 @@ unsigned long calc_highpages(void)
 	return nr;
 }
 
-unsigned long calc_max_low_pfn(void)
+static unsigned long calc_max_low_pfn(void)
 {
 	int i;
 	unsigned long tmp = pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT);
@@ -292,7 +292,7 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
  *
  * We simply copy the 2.4 implementation for now.
  */
-int pgt_cache_water[2] = { 25, 50 };
+static int pgt_cache_water[2] = { 25, 50 };
 
 void check_pgt_cache(void)
 {
@@ -356,8 +356,6 @@ void __init paging_init(void)
 	device_scan();
 }
 
-struct cache_palias *sparc_aliases;
-
 static void __init taint_real_pages(void)
 {
 	int i;
@@ -375,7 +373,7 @@ static void __init taint_real_pages(void)
 	}
 }
 
-void map_high_region(unsigned long start_pfn, unsigned long end_pfn)
+static void map_high_region(unsigned long start_pfn, unsigned long end_pfn)
 {
 	unsigned long tmp;
 
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index 23d3291a3e8..c624e04ff03 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -50,7 +50,7 @@
 #include <asm/btfixup.h>
 
 enum mbus_module srmmu_modtype;
-unsigned int hwbug_bitmask;
+static unsigned int hwbug_bitmask;
 int vac_cache_size;
 int vac_line_size;
 
@@ -60,7 +60,7 @@ extern unsigned long last_valid_pfn;
 
 extern unsigned long page_kernel;
 
-pgd_t *srmmu_swapper_pg_dir;
+static pgd_t *srmmu_swapper_pg_dir;
 
 #ifdef CONFIG_SMP
 #define FLUSH_BEGIN(mm)
@@ -83,12 +83,12 @@ BTFIXUPDEF_CALL(void, local_flush_page_for_dma, unsigned long)
 char *srmmu_name;
 
 ctxd_t *srmmu_ctx_table_phys;
-ctxd_t *srmmu_context_table;
+static ctxd_t *srmmu_context_table;
 
 int viking_mxcc_present;
 static DEFINE_SPINLOCK(srmmu_context_spinlock);
 
-int is_hypersparc;
+static int is_hypersparc;
 
 /*
  * In general all page table modifications should use the V8 atomic
@@ -112,11 +112,11 @@ static inline int srmmu_device_memory(unsigned long x)
 	return ((x & 0xF0000000) != 0);
 }
 
-int srmmu_cache_pagetables;
+static int srmmu_cache_pagetables;
 
 /* these will be initialized in srmmu_nocache_calcsize() */
-unsigned long srmmu_nocache_size;
-unsigned long srmmu_nocache_end;
+static unsigned long srmmu_nocache_size;
+static unsigned long srmmu_nocache_end;
 
 /* 1 bit <=> 256 bytes of nocache <=> 64 PTEs */
 #define SRMMU_NOCACHE_BITMAP_SHIFT (PAGE_SHIFT - 4)
@@ -324,7 +324,7 @@ static unsigned long __srmmu_get_nocache(int size, int align)
 	return (SRMMU_NOCACHE_VADDR + (offset << SRMMU_NOCACHE_BITMAP_SHIFT));
 }
 
-unsigned inline long srmmu_get_nocache(int size, int align)
+static unsigned long srmmu_get_nocache(int size, int align)
 {
 	unsigned long tmp;
 
@@ -336,7 +336,7 @@ unsigned inline long srmmu_get_nocache(int size, int align)
 	return tmp;
 }
 
-void srmmu_free_nocache(unsigned long vaddr, int size)
+static void srmmu_free_nocache(unsigned long vaddr, int size)
 {
 	int offset;
 
@@ -369,7 +369,8 @@ void srmmu_free_nocache(unsigned long vaddr, int size)
 	bit_map_clear(&srmmu_nocache_map, offset, size);
 }
 
-void srmmu_early_allocate_ptable_skeleton(unsigned long start, unsigned long end);
+static void srmmu_early_allocate_ptable_skeleton(unsigned long start,
+						 unsigned long end);
 
 extern unsigned long probe_memory(void);	/* in fault.c */
 
@@ -377,7 +378,7 @@ extern unsigned long probe_memory(void);	/* in fault.c */
  * Reserve nocache dynamically proportionally to the amount of
  * system RAM. -- Tomas Szepe <szepe@pinerecords.com>, June 2002
  */
-void srmmu_nocache_calcsize(void)
+static void srmmu_nocache_calcsize(void)
 {
 	unsigned long sysmemavail = probe_memory() / 1024;
 	int srmmu_nocache_npages;
@@ -398,7 +399,7 @@ void srmmu_nocache_calcsize(void)
 	srmmu_nocache_end = SRMMU_NOCACHE_VADDR + srmmu_nocache_size;
 }
 
-void __init srmmu_nocache_init(void)
+static void __init srmmu_nocache_init(void)
 {
 	unsigned int bitmap_bits;
 	pgd_t *pgd;
@@ -645,7 +646,7 @@ static void srmmu_unmapiorange(unsigned long virt_addr, unsigned int len)
  * mappings on the kernel stack without any special code as we did
  * need on the sun4c.
  */
-struct thread_info *srmmu_alloc_thread_info(void)
+static struct thread_info *srmmu_alloc_thread_info(void)
 {
 	struct thread_info *ret;
 
@@ -1045,13 +1046,14 @@ extern void hypersparc_setup_blockops(void);
  *       around 8mb mapped for us.
  */
 
-void __init early_pgtable_allocfail(char *type)
+static void __init early_pgtable_allocfail(char *type)
 {
 	prom_printf("inherit_prom_mappings: Cannot alloc kernel %s.\n", type);
 	prom_halt();
 }
 
-void __init srmmu_early_allocate_ptable_skeleton(unsigned long start, unsigned long end)
+static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start,
+							unsigned long end)
 {
 	pgd_t *pgdp;
 	pmd_t *pmdp;
@@ -1081,7 +1083,8 @@ void __init srmmu_early_allocate_ptable_skeleton(unsigned long start, unsigned l
 	}
 }
 
-void __init srmmu_allocate_ptable_skeleton(unsigned long start, unsigned long end)
+static void __init srmmu_allocate_ptable_skeleton(unsigned long start,
+						  unsigned long end)
 {
 	pgd_t *pgdp;
 	pmd_t *pmdp;
@@ -1116,7 +1119,8 @@ void __init srmmu_allocate_ptable_skeleton(unsigned long start, unsigned long en
  * looking at the prom's page table directly which is what most
  * other OS's do.  Yuck... this is much better.
  */
-void __init srmmu_inherit_prom_mappings(unsigned long start,unsigned long end)
+static void __init srmmu_inherit_prom_mappings(unsigned long start,
+					       unsigned long end)
 {
 	pgd_t *pgdp;
 	pmd_t *pmdp;
diff --git a/arch/sparc/mm/tsunami.S b/arch/sparc/mm/tsunami.S
index db0d6de33a8..4e55e8f7664 100644
--- a/arch/sparc/mm/tsunami.S
+++ b/arch/sparc/mm/tsunami.S
@@ -93,7 +93,6 @@ tsunami_flush_tlb_page_out:
 	ldd	[src + offset + 0x00], t2; \
 	std	t2, [dst + offset + 0x00];
 
-	.globl	tsunami_copy_1page
 tsunami_copy_1page:
 /* NOTE: This routine has to be shorter than 70insns --jj */
 	or	%g0, (PAGE_SIZE >> 8), %g1
-- 
cgit v1.2.3


From 908f5162ca59ed46a928e8416db159777d432e77 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 5 Jun 2008 11:42:40 -0700
Subject: sparc64/kernel/: make code static

This patch makes the following needlessly global code static:
- central.c: struct central_bus
- central.c: struct fhc_list
- central.c: apply_fhc_ranges()
- central.c: apply_central_ranges()
- ds.c: struct ds_states_template[]
- pci_msi.c: sparc64_setup_msi_irq()
- pci_msi.c: sparc64_teardown_msi_irq()
- pci_sun4v.c: struct sun4v_dma_ops
- sys_sparc32.c: cp_compat_stat64()

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/central.c     | 14 +++++++-------
 arch/sparc64/kernel/ds.c          |  2 +-
 arch/sparc64/kernel/pci_msi.c     | 10 +++++-----
 arch/sparc64/kernel/pci_sun4v.c   |  2 +-
 arch/sparc64/kernel/sys_sparc32.c |  3 ++-
 5 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc64/kernel/central.c b/arch/sparc64/kernel/central.c
index b61b8dfb09c..f2e87d0d7e1 100644
--- a/arch/sparc64/kernel/central.c
+++ b/arch/sparc64/kernel/central.c
@@ -16,8 +16,8 @@
 #include <asm/fhc.h>
 #include <asm/starfire.h>
 
-struct linux_central *central_bus = NULL;
-struct linux_fhc *fhc_list = NULL;
+static struct linux_central *central_bus = NULL;
+static struct linux_fhc *fhc_list = NULL;
 
 #define IS_CENTRAL_FHC(__fhc)	((__fhc) == central_bus->child)
 
@@ -79,9 +79,9 @@ static void adjust_regs(struct linux_prom_registers *regp, int nregs,
 }
 
 /* Apply probed fhc ranges to registers passed, if no ranges return. */
-void apply_fhc_ranges(struct linux_fhc *fhc,
-		      struct linux_prom_registers *regs,
-		      int nregs)
+static void apply_fhc_ranges(struct linux_fhc *fhc,
+			     struct linux_prom_registers *regs,
+			     int nregs)
 {
 	if (fhc->num_fhc_ranges)
 		adjust_regs(regs, nregs, fhc->fhc_ranges,
@@ -89,8 +89,8 @@ void apply_fhc_ranges(struct linux_fhc *fhc,
 }
 
 /* Apply probed central ranges to registers passed, if no ranges return. */
-void apply_central_ranges(struct linux_central *central,
-			  struct linux_prom_registers *regs, int nregs)
+static void apply_central_ranges(struct linux_central *central,
+				 struct linux_prom_registers *regs, int nregs)
 {
 	if (central->num_central_ranges)
 		adjust_regs(regs, nregs, central->central_ranges,
diff --git a/arch/sparc64/kernel/ds.c b/arch/sparc64/kernel/ds.c
index edb74f5a118..d0fa5aa3893 100644
--- a/arch/sparc64/kernel/ds.c
+++ b/arch/sparc64/kernel/ds.c
@@ -159,7 +159,7 @@ static void ds_var_data(struct ds_info *dp,
 			struct ds_cap_state *cp,
 			void *buf, int len);
 
-struct ds_cap_state ds_states_template[] = {
+static struct ds_cap_state ds_states_template[] = {
 	{
 		.service_id	= "md-update",
 		.data		= md_update_data,
diff --git a/arch/sparc64/kernel/pci_msi.c b/arch/sparc64/kernel/pci_msi.c
index db5e8fd8f67..60c71e35021 100644
--- a/arch/sparc64/kernel/pci_msi.c
+++ b/arch/sparc64/kernel/pci_msi.c
@@ -120,9 +120,9 @@ static struct irq_chip msi_irq = {
 	/* XXX affinity XXX */
 };
 
-int sparc64_setup_msi_irq(unsigned int *virt_irq_p,
-			  struct pci_dev *pdev,
-			  struct msi_desc *entry)
+static int sparc64_setup_msi_irq(unsigned int *virt_irq_p,
+				 struct pci_dev *pdev,
+				 struct msi_desc *entry)
 {
 	struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
 	const struct sparc64_msiq_ops *ops = pbm->msi_ops;
@@ -179,8 +179,8 @@ out_err:
 	return err;
 }
 
-void sparc64_teardown_msi_irq(unsigned int virt_irq,
-			      struct pci_dev *pdev)
+static void sparc64_teardown_msi_irq(unsigned int virt_irq,
+				     struct pci_dev *pdev)
 {
 	struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
 	const struct sparc64_msiq_ops *ops = pbm->msi_ops;
diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c
index e2bb9790039..a104c80d319 100644
--- a/arch/sparc64/kernel/pci_sun4v.c
+++ b/arch/sparc64/kernel/pci_sun4v.c
@@ -531,7 +531,7 @@ static void dma_4v_sync_sg_for_cpu(struct device *dev,
 	/* Nothing to do... */
 }
 
-const struct dma_ops sun4v_dma_ops = {
+static const struct dma_ops sun4v_dma_ops = {
 	.alloc_coherent			= dma_4v_alloc_coherent,
 	.free_coherent			= dma_4v_free_coherent,
 	.map_single			= dma_4v_map_single,
diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
index ba5bd626b39..590679795ce 100644
--- a/arch/sparc64/kernel/sys_sparc32.c
+++ b/arch/sparc64/kernel/sys_sparc32.c
@@ -359,7 +359,8 @@ int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf)
 	return err;
 }
 
-int cp_compat_stat64(struct kstat *stat, struct compat_stat64 __user *statbuf)
+static int cp_compat_stat64(struct kstat *stat,
+			    struct compat_stat64 __user *statbuf)
 {
 	int err;
 
-- 
cgit v1.2.3


From 9ae95bce73ef2d12fbe32a03ed230a9bef667328 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Mon, 7 Jul 2008 22:30:35 +0200
Subject: sparc: add -m64 when building vmlinux.lds

David Miller noticed that the build of vmlinux.lds
failed to use the -m64 specifier.
This caused the build to break with a bi-arch gcc with
unified headers.

Add the -m64 option to CPPFLAGS_vmlinux.lds so we
have the correct defines available when building
vmliux.lds.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 arch/sparc64/Makefile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/sparc64/Makefile b/arch/sparc64/Makefile
index 4b8f2b084c2..b785a395b12 100644
--- a/arch/sparc64/Makefile
+++ b/arch/sparc64/Makefile
@@ -9,7 +9,9 @@
 
 CHECKFLAGS	+= -D__sparc__ -D__sparc_v9__ -m64
 
-CPPFLAGS_vmlinux.lds += -Usparc
+# Undefine sparc when processing vmlinux.lds - it is used
+# And teach CPP we are doing 64 bit builds (for this case)
+CPPFLAGS_vmlinux.lds += -m64 -Usparc
 
 LDFLAGS		:= -m elf64_sparc
 
-- 
cgit v1.2.3


From f92ffa12f41efab4d4ad2827422d2e0a6c4e0fd2 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 6 Jun 2008 20:51:20 +0200
Subject: sparc: Merge asm-sparc{,64}/mman.h

Renaming the function sparc64_mmap_check() to
sparc_mmap_check() was enough to make the two
header files identical.

:$ diff -u include/asm-sparc/mman.h include/asm-sparc64/mman.h
:-- include/asm-sparc/mman.h	2008-06-13 06:46:39.000000000 +0200
:++ include/asm-sparc64/mman.h	2008-06-13 06:46:39.000000000 +0200
:@@ -1,5 +1,5 @@
:-#ifndef __SPARC_MMAN_H__
:-#define __SPARC_MMAN_H__
:+#ifndef __SPARC64_MMAN_H__
:+#define __SPARC64_MMAN_H__
:
: #include <asm-generic/mman.h>
:
:@@ -23,9 +23,9 @@
:
: #ifdef __KERNEL__
: #ifndef __ASSEMBLY__
:-#define arch_mmap_check(addr,len,flags)	sparc_mmap_check(addr,len)
:-int sparc_mmap_check(unsigned long addr, unsigned long len);
:+#define arch_mmap_check(addr,len,flags)	sparc64_mmap_check(addr,len)
:+int sparc64_mmap_check(unsigned long addr, unsigned long len);
: #endif
: #endif
:
:-#endif /* __SPARC_MMAN_H__ */
:+#endif /* __SPARC64_MMAN_H__ */

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 arch/sparc64/kernel/sys_sparc.c   | 6 +++---
 arch/sparc64/kernel/sys_sparc32.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
index ac1bff58c1a..e1f4eba2e57 100644
--- a/arch/sparc64/kernel/sys_sparc.c
+++ b/arch/sparc64/kernel/sys_sparc.c
@@ -542,7 +542,7 @@ asmlinkage long sparc64_personality(unsigned long personality)
 	return ret;
 }
 
-int sparc64_mmap_check(unsigned long addr, unsigned long len)
+int sparc_mmap_check(unsigned long addr, unsigned long len)
 {
 	if (test_thread_flag(TIF_32BIT)) {
 		if (len >= STACK_TOP32)
@@ -614,9 +614,9 @@ asmlinkage unsigned long sys64_mremap(unsigned long addr,
 		goto out;
 	if (unlikely(new_len >= VA_EXCLUDE_START))
 		goto out;
-	if (unlikely(sparc64_mmap_check(addr, old_len)))
+	if (unlikely(sparc_mmap_check(addr, old_len)))
 		goto out;
-	if (unlikely(sparc64_mmap_check(new_addr, new_len)))
+	if (unlikely(sparc_mmap_check(new_addr, new_len)))
 		goto out;
 
 	down_write(&current->mm->mmap_sem);
diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
index 590679795ce..97b77fb5c50 100644
--- a/arch/sparc64/kernel/sys_sparc32.c
+++ b/arch/sparc64/kernel/sys_sparc32.c
@@ -871,9 +871,9 @@ asmlinkage unsigned long sys32_mremap(unsigned long addr,
 	unsigned long ret = -EINVAL;
 	unsigned long new_addr = __new_addr;
 
-	if (unlikely(sparc64_mmap_check(addr, old_len)))
+	if (unlikely(sparc_mmap_check(addr, old_len)))
 		goto out;
-	if (unlikely(sparc64_mmap_check(new_addr, new_len)))
+	if (unlikely(sparc_mmap_check(new_addr, new_len)))
 		goto out;
 	down_write(&current->mm->mmap_sem);
 	ret = do_mremap(addr, old_len, new_len, flags, new_addr);
-- 
cgit v1.2.3


From b1a8bf92a0303301f3e013e2a2f45a4916453ce7 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 13 Jun 2008 20:20:54 +0200
Subject: sparc: export openprom.h to userspace

sparc64 exports openprom.h to userspace so let sparc follow
the example.
As openprom.h pulled in another not-for-export vaddrs.h header
file it required a few changes to fix the build.

The definition af VMALLOC_* were moved to pgtable as this is
where sparc64 has them.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 arch/sparc/kernel/entry.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 55d3be1b5d8..2f96256dc51 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -19,6 +19,7 @@
 #include <asm/vaddrs.h>
 #include <asm/memreg.h>
 #include <asm/page.h>
+#include <asm/pgtable.h>
 #ifdef CONFIG_SUN4
 #include <asm/pgtsun4.h>
 #else
-- 
cgit v1.2.3


From ab772027ca42a06f7e480077182434a2a30a8037 Mon Sep 17 00:00:00 2001
From: Stoyan Gaydarov <stoyboyker@gmail.com>
Date: Mon, 14 Jul 2008 22:12:29 -0700
Subject: sparc: arch/sparc/kernel/apc.c to unlocked_ioctl

This changes arch/sparc/kernel/apc.c to use unlocked_ioctl

Signed-off-by: Stoyan Gaydarov <stoyboyker@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/apc.c | 40 ++++++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c
index 54f7ccd7455..5267d48fb2c 100644
--- a/arch/sparc/kernel/apc.c
+++ b/arch/sparc/kernel/apc.c
@@ -85,54 +85,70 @@ static int apc_release(struct inode *inode, struct file *f)
 	return 0;
 }
 
-static int apc_ioctl(struct inode *inode, struct file *f, 
-		     unsigned int cmd, unsigned long __arg)
+static long apc_ioctl(struct file *f, unsigned int cmd, unsigned long __arg)
 {
 	__u8 inarg, __user *arg;
 
 	arg = (__u8 __user *) __arg;
+
+	lock_kernel();
+
 	switch (cmd) {
 	case APCIOCGFANCTL:
-		if (put_user(apc_readb(APC_FANCTL_REG) & APC_REGMASK, arg))
-				return -EFAULT;
+		if (put_user(apc_readb(APC_FANCTL_REG) & APC_REGMASK, arg)) {
+			unlock_kernel();
+			return -EFAULT;
+		}
 		break;
 
 	case APCIOCGCPWR:
-		if (put_user(apc_readb(APC_CPOWER_REG) & APC_REGMASK, arg))
+		if (put_user(apc_readb(APC_CPOWER_REG) & APC_REGMASK, arg)) {
+			unlock_kernel();
 			return -EFAULT;
+		}
 		break;
 
 	case APCIOCGBPORT:
-		if (put_user(apc_readb(APC_BPORT_REG) & APC_BPMASK, arg))
+		if (put_user(apc_readb(APC_BPORT_REG) & APC_BPMASK, arg)) {
+			unlock_kernel();
 			return -EFAULT;
+		}
 		break;
 
 	case APCIOCSFANCTL:
-		if (get_user(inarg, arg))
+		if (get_user(inarg, arg)) {
+			unlock_kernel();
 			return -EFAULT;
+		}
 		apc_writeb(inarg & APC_REGMASK, APC_FANCTL_REG);
 		break;
 	case APCIOCSCPWR:
-		if (get_user(inarg, arg))
+		if (get_user(inarg, arg)) {
+			unlock_kernel();
 			return -EFAULT;
+		}
 		apc_writeb(inarg & APC_REGMASK, APC_CPOWER_REG);
 		break;
 	case APCIOCSBPORT:
-		if (get_user(inarg, arg))
+		if (get_user(inarg, arg)) {
+			unlock_kernel();
 			return -EFAULT;
+		}
 		apc_writeb(inarg & APC_BPMASK, APC_BPORT_REG);
 		break;
 	default:
+		unlock_kernel();
 		return -EINVAL;
 	};
 
+	unlock_kernel();
 	return 0;
 }
 
 static const struct file_operations apc_fops = {
-	.ioctl =	apc_ioctl,
-	.open =		apc_open,
-	.release =	apc_release,
+	.unlocked_ioctl =	apc_ioctl,
+	.open =			apc_open,
+	.release =		apc_release,
 };
 
 static struct miscdevice apc_miscdev = { APC_MINOR, APC_DEVNAME, &apc_fops };
-- 
cgit v1.2.3


From 932d06139443ebebcd8d9232941b7216091966d5 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 14 Jul 2008 22:46:33 -0700
Subject: sparc64: Config category "Processor type and features" absent

kernel bugzilla #11059:

sparc64 config menu is missing "Processor type and features",
so add that and move General Setup before Processor menu.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/Kconfig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index fca9246470b..794d22fdf46 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -81,6 +81,10 @@ config GENERIC_HARDIRQS_NO__DO_IRQ
 	bool
 	def_bool y
 
+source "init/Kconfig"
+
+menu "Processor type and features"
+
 choice
 	prompt "Kernel page size"
 	default SPARC64_PAGE_SIZE_8KB
@@ -136,14 +140,10 @@ config HOTPLUG_CPU
 	  can be controlled through /sys/devices/system/cpu/cpu#.
 	  Say N if you want to disable CPU hotplug.
 
-source "init/Kconfig"
-
 config GENERIC_HARDIRQS
 	bool
 	default y
 
-menu "General machine setup"
-
 source "kernel/time/Kconfig"
 
 config SMP
-- 
cgit v1.2.3


From f538f3df4f92f34f5d8bc024d54c12387541cdee Mon Sep 17 00:00:00 2001
From: Robert Reif <reif@earthlink.net>
Date: Mon, 14 Jul 2008 22:57:29 -0700
Subject: sparc32: fix init.c allnoconfig build error

Fix allnoconfig build error.

Signed-off-by: Robert Reif <reif@earthlink.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/init.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c
index 8f94a2d62f1..e103f1bb377 100644
--- a/arch/sparc/mm/init.c
+++ b/arch/sparc/mm/init.c
@@ -22,6 +22,7 @@
 #include <linux/init.h>
 #include <linux/highmem.h>
 #include <linux/bootmem.h>
+#include <linux/pagemap.h>
 
 #include <asm/system.h>
 #include <asm/vac-ops.h>
-- 
cgit v1.2.3


From 4fe3ebec122f23a095cc1d17557c175caaa55ca1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 17 Jul 2008 22:11:32 -0700
Subject: sparc: Use new '%pS' infrastructure to print symbols.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/process.c     | 11 +++++------
 arch/sparc/kernel/traps.c       |  7 +++----
 arch/sparc64/kernel/process.c   | 26 +++++++++-----------------
 arch/sparc64/kernel/traps.c     | 37 +++++++++++++++----------------------
 arch/sparc64/kernel/unaligned.c |  7 +++----
 arch/sparc64/mm/fault.c         |  5 ++---
 6 files changed, 37 insertions(+), 56 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c
index e18a5da025d..4bb430940a6 100644
--- a/arch/sparc/kernel/process.c
+++ b/arch/sparc/kernel/process.c
@@ -1,6 +1,6 @@
 /*  linux/arch/sparc/kernel/process.c
  *
- *  Copyright (C) 1995 David S. Miller (davem@davemloft.net)
+ *  Copyright (C) 1995, 2008 David S. Miller (davem@davemloft.net)
  *  Copyright (C) 1996 Eddie C. Dost   (ecd@skynet.be)
  */
 
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
-#include <linux/kallsyms.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
 #include <linux/ptrace.h>
@@ -198,7 +197,7 @@ void __show_backtrace(unsigned long fp)
 		       rw->ins[4], rw->ins[5],
 		       rw->ins[6],
 		       rw->ins[7]);
-		print_symbol("%s\n", rw->ins[7]);
+		printk("%pS\n", (void *) rw->ins[7]);
 		rw = (struct reg_window *) rw->ins[6];
 	}
 	spin_unlock_irqrestore(&sparc_backtrace_lock, flags);
@@ -265,14 +264,14 @@ void show_regs(struct pt_regs *r)
 
         printk("PSR: %08lx PC: %08lx NPC: %08lx Y: %08lx    %s\n",
 	       r->psr, r->pc, r->npc, r->y, print_tainted());
-	print_symbol("PC: <%s>\n", r->pc);
+	printk("PC: <%pS>\n", (void *) r->pc);
 	printk("%%G: %08lx %08lx  %08lx %08lx  %08lx %08lx  %08lx %08lx\n",
 	       r->u_regs[0], r->u_regs[1], r->u_regs[2], r->u_regs[3],
 	       r->u_regs[4], r->u_regs[5], r->u_regs[6], r->u_regs[7]);
 	printk("%%O: %08lx %08lx  %08lx %08lx  %08lx %08lx  %08lx %08lx\n",
 	       r->u_regs[8], r->u_regs[9], r->u_regs[10], r->u_regs[11],
 	       r->u_regs[12], r->u_regs[13], r->u_regs[14], r->u_regs[15]);
-	print_symbol("RPC: <%s>\n", r->u_regs[15]);
+	printk("RPC: <%pS>\n", (void *) r->u_regs[15]);
 
 	printk("%%L: %08lx %08lx  %08lx %08lx  %08lx %08lx  %08lx %08lx\n",
 	       rw->locals[0], rw->locals[1], rw->locals[2], rw->locals[3],
@@ -307,7 +306,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
 		rw = (struct reg_window *) fp;
 		pc = rw->ins[7];
 		printk("[%08lx : ", pc);
-		print_symbol("%s ] ", pc);
+		printk("%pS ] ", (void *) pc);
 		fp = rw->ins[6];
 	} while (++count < 16);
 	printk("\n");
diff --git a/arch/sparc/kernel/traps.c b/arch/sparc/kernel/traps.c
index ac8ee6ab133..5d45d5fd8c9 100644
--- a/arch/sparc/kernel/traps.c
+++ b/arch/sparc/kernel/traps.c
@@ -1,7 +1,7 @@
 /*
  * arch/sparc/kernel/traps.c
  *
- * Copyright 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright 1995, 2008 David S. Miller (davem@davemloft.net)
  * Copyright 2000 Jakub Jelinek (jakub@redhat.com)
  */
 
@@ -11,7 +11,6 @@
 
 #include <linux/sched.h>  /* for jiffies */
 #include <linux/kernel.h>
-#include <linux/kallsyms.h>
 #include <linux/signal.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
@@ -116,8 +115,8 @@ void die_if_kernel(char *str, struct pt_regs *regs)
 		      count++ < 30				&&
                       (((unsigned long) rw) >= PAGE_OFFSET)	&&
 		      !(((unsigned long) rw) & 0x7)) {
-			printk("Caller[%08lx]", rw->ins[7]);
-			print_symbol(": %s\n", rw->ins[7]);
+			printk("Caller[%08lx]: %pS\n", rw->ins[7],
+			       (void *) rw->ins[7]);
 			rw = (struct reg_window *)rw->ins[6];
 		}
 	}
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index 2084f81a76e..c1f1ff29e97 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -15,7 +15,6 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
-#include <linux/kallsyms.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/smp.h>
@@ -211,7 +210,7 @@ static void show_regwindow(struct pt_regs *regs)
 	printk("i4: %016lx i5: %016lx i6: %016lx i7: %016lx\n",
 	       rwk->ins[4], rwk->ins[5], rwk->ins[6], rwk->ins[7]);
 	if (regs->tstate & TSTATE_PRIV)
-		print_symbol("I7: <%s>\n", rwk->ins[7]);
+		printk("I7: <%pS>\n", (void *) rwk->ins[7]);
 }
 
 #ifdef CONFIG_SMP
@@ -232,7 +231,7 @@ void __show_regs(struct pt_regs * regs)
 #endif
 	printk("TSTATE: %016lx TPC: %016lx TNPC: %016lx Y: %08x    %s\n", regs->tstate,
 	       regs->tpc, regs->tnpc, regs->y, print_tainted());
-	print_symbol("TPC: <%s>\n", regs->tpc);
+	printk("TPC: <%pS>\n", (void *) regs->tpc);
 	printk("g0: %016lx g1: %016lx g2: %016lx g3: %016lx\n",
 	       regs->u_regs[0], regs->u_regs[1], regs->u_regs[2],
 	       regs->u_regs[3]);
@@ -245,7 +244,7 @@ void __show_regs(struct pt_regs * regs)
 	printk("o4: %016lx o5: %016lx sp: %016lx ret_pc: %016lx\n",
 	       regs->u_regs[12], regs->u_regs[13], regs->u_regs[14],
 	       regs->u_regs[15]);
-	print_symbol("RPC: <%s>\n", regs->u_regs[15]);
+	printk("RPC: <%pS>\n", (void *) regs->u_regs[15]);
 	show_regwindow(regs);
 #ifdef CONFIG_SMP
 	spin_unlock(&regdump_lock);
@@ -346,9 +345,6 @@ static void sysrq_handle_globreg(int key, struct tty_struct *tty)
 {
 	struct thread_info *tp = current_thread_info();
 	struct pt_regs *regs = get_irq_regs();
-#ifdef CONFIG_KALLSYMS
-	char buffer[KSYM_SYMBOL_LEN];
-#endif
 	unsigned long flags;
 	int this_cpu, cpu;
 
@@ -377,17 +373,13 @@ static void sysrq_handle_globreg(int key, struct tty_struct *tty)
 		       gp->tstate, gp->tpc, gp->tnpc,
 		       ((tp && tp->task) ? tp->task->comm : "NULL"),
 		       ((tp && tp->task) ? tp->task->pid : -1));
-#ifdef CONFIG_KALLSYMS
+
 		if (gp->tstate & TSTATE_PRIV) {
-			sprint_symbol(buffer, gp->tpc);
-			printk("             TPC[%s] ", buffer);
-			sprint_symbol(buffer, gp->o7);
-			printk("O7[%s] ", buffer);
-			sprint_symbol(buffer, gp->i7);
-			printk("I7[%s]\n", buffer);
-		} else
-#endif
-		{
+			printk("             TPC[%pS] O7[%pS] I7[%pS]\n",
+			       (void *) gp->tpc,
+			       (void *) gp->o7,
+			       (void *) gp->i7);
+		} else {
 			printk("             TPC[%lx] O7[%lx] I7[%lx]\n",
 			       gp->tpc, gp->o7, gp->i7);
 		}
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 36974926265..0aa819c29db 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -1,6 +1,6 @@
 /* arch/sparc64/kernel/traps.c
  *
- * Copyright (C) 1995,1997 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1995,1997,2008 David S. Miller (davem@davemloft.net)
  * Copyright (C) 1997,1999,2000 Jakub Jelinek (jakub@redhat.com)
  */
 
@@ -11,7 +11,6 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
-#include <linux/kallsyms.h>
 #include <linux/signal.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
@@ -74,7 +73,7 @@ static void dump_tl1_traplog(struct tl1_traplog *p)
 		       i + 1,
 		       p->trapstack[i].tstate, p->trapstack[i].tpc,
 		       p->trapstack[i].tnpc, p->trapstack[i].tt);
-		print_symbol("TRAPLOG: TPC<%s>\n", p->trapstack[i].tpc);
+		printk("TRAPLOG: TPC<%pS>\n", (void *) p->trapstack[i].tpc);
 	}
 }
 
@@ -1081,7 +1080,7 @@ static void cheetah_log_errors(struct pt_regs *regs, struct cheetah_err_info *in
 	       regs->tpc, regs->tnpc, regs->u_regs[UREG_I7], regs->tstate);
 	printk("%s" "ERROR(%d): ",
 	       (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id());
-	print_symbol("TPC<%s>\n", regs->tpc);
+	printk("TPC<%pS>\n", (void *) regs->tpc);
 	printk("%s" "ERROR(%d): M_SYND(%lx),  E_SYND(%lx)%s%s\n",
 	       (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
 	       (afsr & CHAFSR_M_SYNDROME) >> CHAFSR_M_SYNDROME_SHIFT,
@@ -1689,7 +1688,7 @@ void cheetah_plus_parity_error(int type, struct pt_regs *regs)
 		       smp_processor_id(),
 		       (type & 0x1) ? 'I' : 'D',
 		       regs->tpc);
-		print_symbol(KERN_EMERG "TPC<%s>\n", regs->tpc);
+		printk(KERN_EMERG "TPC<%pS>\n", (void *) regs->tpc);
 		panic("Irrecoverable Cheetah+ parity error.");
 	}
 
@@ -1697,7 +1696,7 @@ void cheetah_plus_parity_error(int type, struct pt_regs *regs)
 	       smp_processor_id(),
 	       (type & 0x1) ? 'I' : 'D',
 	       regs->tpc);
-	print_symbol(KERN_WARNING "TPC<%s>\n", regs->tpc);
+	printk(KERN_WARNING "TPC<%pS>\n", (void *) regs->tpc);
 }
 
 struct sun4v_error_entry {
@@ -1904,9 +1903,10 @@ void sun4v_itlb_error_report(struct pt_regs *regs, int tl)
 
 	printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n",
 	       regs->tpc, tl);
-	print_symbol(KERN_EMERG "SUN4V-ITLB: TPC<%s>\n", regs->tpc);
+	printk(KERN_EMERG "SUN4V-ITLB: TPC<%pS>\n", (void *) regs->tpc);
 	printk(KERN_EMERG "SUN4V-ITLB: O7[%lx]\n", regs->u_regs[UREG_I7]);
-	print_symbol(KERN_EMERG "SUN4V-ITLB: O7<%s>\n", regs->u_regs[UREG_I7]);
+	printk(KERN_EMERG "SUN4V-ITLB: O7<%pS>\n",
+	       (void *) regs->u_regs[UREG_I7]);
 	printk(KERN_EMERG "SUN4V-ITLB: vaddr[%lx] ctx[%lx] "
 	       "pte[%lx] error[%lx]\n",
 	       sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx,
@@ -1927,9 +1927,10 @@ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl)
 
 	printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n",
 	       regs->tpc, tl);
-	print_symbol(KERN_EMERG "SUN4V-DTLB: TPC<%s>\n", regs->tpc);
+	printk(KERN_EMERG "SUN4V-DTLB: TPC<%pS>\n", (void *) regs->tpc);
 	printk(KERN_EMERG "SUN4V-DTLB: O7[%lx]\n", regs->u_regs[UREG_I7]);
-	print_symbol(KERN_EMERG "SUN4V-DTLB: O7<%s>\n", regs->u_regs[UREG_I7]);
+	printk(KERN_EMERG "SUN4V-DTLB: O7<%pS>\n",
+	       (void *) regs->u_regs[UREG_I7]);
 	printk(KERN_EMERG "SUN4V-DTLB: vaddr[%lx] ctx[%lx] "
 	       "pte[%lx] error[%lx]\n",
 	       sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx,
@@ -2111,10 +2112,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
 	fp = ksp + STACK_BIAS;
 	thread_base = (unsigned long) tp;
 
-	printk("Call Trace:");
-#ifdef CONFIG_KALLSYMS
-	printk("\n");
-#endif
+	printk("Call Trace:\n");
 	do {
 		struct sparc_stackf *sf;
 		struct pt_regs *regs;
@@ -2137,12 +2135,8 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
 			fp = (unsigned long)sf->fp + STACK_BIAS;
 		}
 
-		printk(" [%016lx] ", pc);
-		print_symbol("%s\n", pc);
+		printk(" [%016lx] %pS\n", pc, (void *) pc);
 	} while (++count < 16);
-#ifndef CONFIG_KALLSYMS
-	printk("\n");
-#endif
 }
 
 void dump_stack(void)
@@ -2211,9 +2205,8 @@ void die_if_kernel(char *str, struct pt_regs *regs)
 		while (rw &&
 		       count++ < 30&&
 		       is_kernel_stack(current, rw)) {
-			printk("Caller[%016lx]", rw->ins[7]);
-			print_symbol(": %s", rw->ins[7]);
-			printk("\n");
+			printk("Caller[%016lx]: %pS\n", rw->ins[7],
+			       (void *) rw->ins[7]);
 
 			rw = kernel_stack_up(rw);
 		}
diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c
index afa7fc4f519..203ddfad9f2 100644
--- a/arch/sparc64/kernel/unaligned.c
+++ b/arch/sparc64/kernel/unaligned.c
@@ -2,7 +2,7 @@
  * unaligned.c: Unaligned load/store trap handling with special
  *              cases for the kernel to do them more quickly.
  *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996,2008 David S. Miller (davem@davemloft.net)
  * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
@@ -20,7 +20,6 @@
 #include <asm/uaccess.h>
 #include <linux/smp.h>
 #include <linux/bitops.h>
-#include <linux/kallsyms.h>
 #include <asm/fpumacro.h>
 
 /* #define DEBUG_MNA */
@@ -289,8 +288,8 @@ static void log_unaligned(struct pt_regs *regs)
 	if (count < 5) {
 		last_time = jiffies;
 		count++;
-		printk("Kernel unaligned access at TPC[%lx] ", regs->tpc);
-		print_symbol("%s\n", regs->tpc);
+		printk("Kernel unaligned access at TPC[%lx] %pS\n",
+		       regs->tpc, (void *) regs->tpc);
 	}
 }
 
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index 236f4d228d2..ea7d7ae76bc 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -1,7 +1,7 @@
 /*
  * arch/sparc64/mm/fault.c: Page fault handlers for the 64-bit Sparc.
  *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996, 2008 David S. Miller (davem@davemloft.net)
  * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
  */
 
@@ -18,7 +18,6 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/kprobes.h>
-#include <linux/kallsyms.h>
 #include <linux/kdebug.h>
 
 #include <asm/page.h>
@@ -115,7 +114,7 @@ static void bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr)
 	printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n",
 	       regs->tpc);
 	printk(KERN_CRIT "OOPS: RPC [%016lx]\n", regs->u_regs[15]);
-	print_symbol("RPC: <%s>\n", regs->u_regs[15]);
+	printk("OOPS: RPC <%pS>\n", (void *) regs->u_regs[15]);
 	printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr);
 	dump_stack();
 	unhandled_fault(regs->tpc, current, regs);
-- 
cgit v1.2.3


From d172ad18f9914f70c761a6cad470efc986d5e07e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 17 Jul 2008 23:44:50 -0700
Subject: sparc64: Convert to generic helpers for IPI function calls.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/Kconfig                |  1 +
 arch/sparc64/kernel/smp.c           | 87 ++++++++-----------------------------
 arch/sparc64/kernel/sparc64_ksyms.c |  2 -
 arch/sparc64/kernel/ttable.S        |  7 ++-
 arch/sparc64/mm/ultra.S             |  5 +++
 5 files changed, 29 insertions(+), 73 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 794d22fdf46..1aeb1da9829 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -16,6 +16,7 @@ config SPARC64
 	select HAVE_IDE
 	select HAVE_LMB
 	select HAVE_ARCH_KGDB
+	select USE_GENERIC_SMP_HELPERS if SMP
 
 config GENERIC_TIME
 	bool
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index c099d96f123..7cf72b4bb10 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -788,89 +788,36 @@ static void smp_start_sync_tick_client(int cpu)
 			      0, 0, 0, mask);
 }
 
-/* Send cross call to all processors except self. */
-#define smp_cross_call(func, ctx, data1, data2) \
-	smp_cross_call_masked(func, ctx, data1, data2, cpu_online_map)
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t finished;
-	int wait;
-};
-
-static struct call_data_struct *call_data;
-
 extern unsigned long xcall_call_function;
 
-/**
- * smp_call_function(): Run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-static int sparc64_smp_call_function_mask(void (*func)(void *info), void *info,
-					  int wait, cpumask_t mask)
+void arch_send_call_function_ipi(cpumask_t mask)
 {
-	struct call_data_struct data;
-	int cpus;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.finished, 0);
-	data.wait = wait;
-
-	spin_lock(&call_lock);
-
-	cpu_clear(smp_processor_id(), mask);
-	cpus = cpus_weight(mask);
-	if (!cpus)
-		goto out_unlock;
-
-	call_data = &data;
-	mb();
-
 	smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask);
+}
 
-	/* Wait for response */
-	while (atomic_read(&data.finished) != cpus)
-		cpu_relax();
+extern unsigned long xcall_call_function_single;
 
-out_unlock:
-	spin_unlock(&call_lock);
+void arch_send_call_function_single_ipi(int cpu)
+{
+	cpumask_t mask = cpumask_of_cpu(cpu);
 
-	return 0;
+	smp_cross_call_masked(&xcall_call_function_single, 0, 0, 0, mask);
 }
 
-int smp_call_function(void (*func)(void *info), void *info, int wait)
-{
-	return sparc64_smp_call_function_mask(func, info, wait, cpu_online_map);
-}
+/* Send cross call to all processors except self. */
+#define smp_cross_call(func, ctx, data1, data2) \
+	smp_cross_call_masked(func, ctx, data1, data2, cpu_online_map)
 
 void smp_call_function_client(int irq, struct pt_regs *regs)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
+	clear_softint(1 << irq);
+	generic_smp_call_function_interrupt();
+}
 
+void smp_call_function_single_client(int irq, struct pt_regs *regs)
+{
 	clear_softint(1 << irq);
-	if (call_data->wait) {
-		/* let initiator proceed only after completion */
-		func(info);
-		atomic_inc(&call_data->finished);
-	} else {
-		/* let initiator proceed after getting data */
-		atomic_inc(&call_data->finished);
-		func(info);
-	}
+	generic_smp_call_function_single_interrupt();
 }
 
 static void tsb_sync(void *info)
@@ -890,7 +837,7 @@ static void tsb_sync(void *info)
 
 void smp_tsb_sync(struct mm_struct *mm)
 {
-	sparc64_smp_call_function_mask(tsb_sync, mm, 1, mm->cpu_vm_mask);
+	smp_call_function_mask(mm->cpu_vm_mask, tsb_sync, mm, 1);
 }
 
 extern unsigned long xcall_flush_tlb_mm;
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 49d3ea50c24..504e678ee12 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -108,8 +108,6 @@ EXPORT_SYMBOL(__read_unlock);
 EXPORT_SYMBOL(__write_lock);
 EXPORT_SYMBOL(__write_unlock);
 EXPORT_SYMBOL(__write_trylock);
-
-EXPORT_SYMBOL(smp_call_function);
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_MCOUNT
diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S
index 450053af039..1ade3d6fb7f 100644
--- a/arch/sparc64/kernel/ttable.S
+++ b/arch/sparc64/kernel/ttable.S
@@ -58,7 +58,12 @@ tl0_irq3:	BTRAP(0x43)
 tl0_irq4:	BTRAP(0x44)
 #endif
 tl0_irq5:	TRAP_IRQ(handler_irq, 5)
-tl0_irq6:	BTRAP(0x46) BTRAP(0x47) BTRAP(0x48) BTRAP(0x49)
+#ifdef CONFIG_SMP
+tl0_irq6:	TRAP_IRQ(smp_call_function_single_client, 6)
+#else
+tl0_irq6:	BTRAP(0x46)
+#endif
+tl0_irq7:	BTRAP(0x47) BTRAP(0x48) BTRAP(0x49)
 tl0_irq10:	BTRAP(0x4a) BTRAP(0x4b) BTRAP(0x4c) BTRAP(0x4d)
 tl0_irq14:	TRAP_IRQ(timer_interrupt, 14)
 tl0_irq15:	TRAP_IRQ(handler_irq, 15)
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index 9bb2d90a9df..4c8ca131ffa 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -688,6 +688,11 @@ xcall_call_function:
 	wr		%g0, (1 << PIL_SMP_CALL_FUNC), %set_softint
 	retry
 
+	.globl		xcall_call_function_single
+xcall_call_function_single:
+	wr		%g0, (1 << PIL_SMP_CALL_FUNC_SNGL), %set_softint
+	retry
+
 	.globl		xcall_receive_signal
 xcall_receive_signal:
 	wr		%g0, (1 << PIL_SMP_RECEIVE_SIGNAL), %set_softint
-- 
cgit v1.2.3


From f7fe93344fd3f4ccd406a35f751a61b77f94b0fc Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 17 Jul 2008 23:43:55 -0700
Subject: sparc64: Remove 4MB and 512K base page size options.

Adrian Bunk reported that enabling 4MB page size breaks the build.
The problem is that MAX_ORDER combined with the page shift exceeds the
SECTION_SIZE_BITS we use in asm-sparc64/sparsemem.h

There are several ways I suppose we could work around this.  For one
we could define a CONFIG_FORCE_MAX_ZONEORDER to decrease MAX_ORDER in
these higher page size cases.

But I also know that these page size cases are broken wrt. TLB miss
handling especially on pre-hypervisor systems, and there isn't an easy
way to fix that.

These options were meant to be fun experimental hacks anyways, and
only 8K and 64K make any sense to support.

So remove 512K and 4M base page size support.  Of course, we still
support these page sizes for huge pages.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/Kconfig         | 11 +----------
 arch/sparc64/lib/copy_page.S |  6 +++---
 arch/sparc64/mm/tsb.c        |  6 ------
 3 files changed, 4 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 1aeb1da9829..7c88263256a 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -98,19 +98,11 @@ config SPARC64_PAGE_SIZE_8KB
 	  8KB and 64KB work quite well, since SPARC ELF sections
 	  provide for up to 64KB alignment.
 
-	  Therefore, 512KB and 4MB are for expert hackers only.
-
 	  If you don't know what to do, choose 8KB.
 
 config SPARC64_PAGE_SIZE_64KB
 	bool "64KB"
 
-config SPARC64_PAGE_SIZE_512KB
-	bool "512KB"
-
-config SPARC64_PAGE_SIZE_4MB
-	bool "4MB"
-
 endchoice
 
 config SECCOMP
@@ -226,11 +218,10 @@ config HUGETLB_PAGE_SIZE_4MB
 	bool "4MB"
 
 config HUGETLB_PAGE_SIZE_512K
-	depends on !SPARC64_PAGE_SIZE_4MB && !SPARC64_PAGE_SIZE_512KB
 	bool "512K"
 
 config HUGETLB_PAGE_SIZE_64K
-	depends on !SPARC64_PAGE_SIZE_4MB && !SPARC64_PAGE_SIZE_512KB && !SPARC64_PAGE_SIZE_64KB
+	depends on !SPARC64_PAGE_SIZE_64KB
 	bool "64K"
 
 endchoice
diff --git a/arch/sparc64/lib/copy_page.S b/arch/sparc64/lib/copy_page.S
index 37460666a5c..b243d3b606b 100644
--- a/arch/sparc64/lib/copy_page.S
+++ b/arch/sparc64/lib/copy_page.S
@@ -25,9 +25,9 @@
 
 #define	DCACHE_SIZE	(PAGE_SIZE * 2)
 
-#if (PAGE_SHIFT == 13) || (PAGE_SHIFT == 19)
+#if (PAGE_SHIFT == 13)
 #define PAGE_SIZE_REM	0x80
-#elif (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22)
+#elif (PAGE_SHIFT == 16)
 #define PAGE_SIZE_REM	0x100
 #else
 #error Wrong PAGE_SHIFT specified
@@ -198,7 +198,7 @@ cheetah_copy_page_insn:
 	cmp		%o2, PAGE_SIZE_REM
 	bne,pt		%xcc, 1b
 	 add		%o0, 0x40, %o0
-#if (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22)
+#if (PAGE_SHIFT == 16)
 	TOUCH(f0, f2, f4, f6, f8, f10, f12, f14)
 	ldda		[%o1] ASI_BLK_P, %f32
 	stda		%f48, [%o0] %asi
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
index fe70c8a557b..3547937b17a 100644
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -96,12 +96,6 @@ void flush_tsb_user(struct mmu_gather *mp)
 #elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
 #define HV_PGSZ_IDX_BASE	HV_PGSZ_IDX_64K
 #define HV_PGSZ_MASK_BASE	HV_PGSZ_MASK_64K
-#elif defined(CONFIG_SPARC64_PAGE_SIZE_512KB)
-#define HV_PGSZ_IDX_BASE	HV_PGSZ_IDX_512K
-#define HV_PGSZ_MASK_BASE	HV_PGSZ_MASK_512K
-#elif defined(CONFIG_SPARC64_PAGE_SIZE_4MB)
-#define HV_PGSZ_IDX_BASE	HV_PGSZ_IDX_4MB
-#define HV_PGSZ_MASK_BASE	HV_PGSZ_MASK_4MB
 #else
 #error Broken base page size setting...
 #endif
-- 
cgit v1.2.3


From 432e8765f0206de5bbddcbd4eb1d9611c79b1eaa Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 18 Jul 2008 00:43:52 -0700
Subject: sparc64: Add missing hypervisor service group numbers.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/hvapi.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/sparc64/kernel/hvapi.c b/arch/sparc64/kernel/hvapi.c
index f34f5d6181e..691760b5b01 100644
--- a/arch/sparc64/kernel/hvapi.c
+++ b/arch/sparc64/kernel/hvapi.c
@@ -34,8 +34,12 @@ static struct api_info api_table[] = {
 	{ .group = HV_GRP_LDOM,					},
 	{ .group = HV_GRP_SVC_CHAN,	.flags = FLAG_PRE_API	},
 	{ .group = HV_GRP_NCS,		.flags = FLAG_PRE_API	},
+	{ .group = HV_GRP_RNG,					},
 	{ .group = HV_GRP_NIAG_PERF,	.flags = FLAG_PRE_API	},
 	{ .group = HV_GRP_FIRE_PERF,				},
+	{ .group = HV_GRP_N2_CPU,				},
+	{ .group = HV_GRP_NIU,					},
+	{ .group = HV_GRP_VF_CPU,				},
 	{ .group = HV_GRP_DIAG,		.flags = FLAG_PRE_API	},
 };
 
-- 
cgit v1.2.3


From e7eb32eb3d28788fd66c233618e3f7fbe7beb9e4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 18 Jul 2008 01:49:06 -0700
Subject: sparc64: Update defconfig.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/defconfig | 145 ++++++++++++++++++++++++++-----------------------
 1 file changed, 76 insertions(+), 69 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig
index 76eb832527f..82cab5cc807 100644
--- a/arch/sparc64/defconfig
+++ b/arch/sparc64/defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.26-rc2
-# Fri May 16 13:36:07 2008
+# Linux kernel version: 2.6.26
+# Fri Jul 18 00:47:07 2008
 #
 CONFIG_SPARC=y
 CONFIG_SPARC64=y
@@ -22,18 +22,6 @@ CONFIG_HAVE_SETUP_PER_CPU_AREA=y
 CONFIG_ARCH_NO_VIRT_TO_BUS=y
 CONFIG_OF=y
 CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
-CONFIG_SPARC64_PAGE_SIZE_8KB=y
-# CONFIG_SPARC64_PAGE_SIZE_64KB is not set
-# CONFIG_SPARC64_PAGE_SIZE_512KB is not set
-# CONFIG_SPARC64_PAGE_SIZE_4MB is not set
-CONFIG_SECCOMP=y
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
-# CONFIG_HZ_300 is not set
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
-# CONFIG_SCHED_HRTICK is not set
-CONFIG_HOTPLUG_CPU=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
 #
@@ -105,6 +93,7 @@ CONFIG_KRETPROBES=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 # CONFIG_HAVE_DMA_ATTRS is not set
+CONFIG_USE_GENERIC_SMP_HELPERS=y
 CONFIG_PROC_PAGE_MONITOR=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -121,6 +110,7 @@ CONFIG_STOP_MACHINE=y
 CONFIG_BLOCK=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_BLK_DEV_BSG=y
+# CONFIG_BLK_DEV_INTEGRITY is not set
 CONFIG_BLOCK_COMPAT=y
 
 #
@@ -136,11 +126,21 @@ CONFIG_DEFAULT_AS=y
 # CONFIG_DEFAULT_NOOP is not set
 CONFIG_DEFAULT_IOSCHED="anticipatory"
 CONFIG_CLASSIC_RCU=y
-CONFIG_GENERIC_HARDIRQS=y
 
 #
-# General machine setup
+# Processor type and features
 #
+CONFIG_SPARC64_PAGE_SIZE_8KB=y
+# CONFIG_SPARC64_PAGE_SIZE_64KB is not set
+CONFIG_SECCOMP=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=100
+# CONFIG_SCHED_HRTICK is not set
+CONFIG_HOTPLUG_CPU=y
+CONFIG_GENERIC_HARDIRQS=y
 CONFIG_TICK_ONESHOT=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -342,6 +342,8 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_STANDALONE=y
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
 # CONFIG_DEBUG_DRIVER is not set
 # CONFIG_DEBUG_DEVRES is not set
 # CONFIG_SYS_HYPERVISOR is not set
@@ -366,6 +368,7 @@ CONFIG_CDROM_PKTCDVD_BUFFERS=8
 CONFIG_CDROM_PKTCDVD_WCACHE=y
 CONFIG_ATA_OVER_ETH=m
 CONFIG_SUNVDC=m
+# CONFIG_BLK_DEV_HD is not set
 CONFIG_MISC_DEVICES=y
 # CONFIG_PHANTOM is not set
 # CONFIG_EEPROM_93CX6 is not set
@@ -379,6 +382,7 @@ CONFIG_BLK_DEV_IDE=y
 #
 # Please see Documentation/ide/ide.txt for help/info on IDE drives
 #
+CONFIG_IDE_TIMINGS=y
 # CONFIG_BLK_DEV_IDE_SATA is not set
 CONFIG_BLK_DEV_IDEDISK=y
 # CONFIG_IDEDISK_MULTI_MODE is not set
@@ -429,8 +433,6 @@ CONFIG_BLK_DEV_ALI15X3=y
 # CONFIG_BLK_DEV_VIA82CXXX is not set
 # CONFIG_BLK_DEV_TC86C001 is not set
 CONFIG_BLK_DEV_IDEDMA=y
-# CONFIG_BLK_DEV_HD_ONLY is not set
-# CONFIG_BLK_DEV_HD is not set
 
 #
 # SCSI device support
@@ -504,6 +506,7 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_SUNESP is not set
 # CONFIG_SCSI_SRP is not set
+# CONFIG_SCSI_DH is not set
 # CONFIG_ATA is not set
 CONFIG_MD=y
 CONFIG_BLK_DEV_MD=m
@@ -529,6 +532,10 @@ CONFIG_DM_ZERO=m
 #
 # IEEE 1394 (FireWire) support
 #
+
+#
+# Enable only one of the two stacks, unless you know what you are doing
+#
 # CONFIG_FIREWIRE is not set
 # CONFIG_IEEE1394 is not set
 # CONFIG_I2O is not set
@@ -745,7 +752,8 @@ CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_IPMI_HANDLER is not set
-# CONFIG_HW_RANDOM is not set
+CONFIG_HW_RANDOM=m
+CONFIG_HW_RANDOM_N2RNG=m
 # CONFIG_R3964 is not set
 # CONFIG_APPLICOM is not set
 # CONFIG_RAW_DRIVER is not set
@@ -759,38 +767,58 @@ CONFIG_I2C_ALGOBIT=y
 #
 # I2C Hardware Bus support
 #
+
+#
+# PC SMBus host controller drivers
+#
 # CONFIG_I2C_ALI1535 is not set
 # CONFIG_I2C_ALI1563 is not set
 # CONFIG_I2C_ALI15X3 is not set
 # CONFIG_I2C_AMD756 is not set
 # CONFIG_I2C_AMD8111 is not set
 # CONFIG_I2C_I801 is not set
-# CONFIG_I2C_I810 is not set
+# CONFIG_I2C_ISCH is not set
 # CONFIG_I2C_PIIX4 is not set
 # CONFIG_I2C_NFORCE2 is not set
-# CONFIG_I2C_OCORES is not set
-# CONFIG_I2C_PARPORT_LIGHT is not set
-# CONFIG_I2C_PROSAVAGE is not set
-# CONFIG_I2C_SAVAGE4 is not set
-# CONFIG_I2C_SIMTEC is not set
 # CONFIG_I2C_SIS5595 is not set
 # CONFIG_I2C_SIS630 is not set
 # CONFIG_I2C_SIS96X is not set
-# CONFIG_I2C_TAOS_EVM is not set
-# CONFIG_I2C_STUB is not set
-# CONFIG_I2C_TINY_USB is not set
 # CONFIG_I2C_VIA is not set
 # CONFIG_I2C_VIAPRO is not set
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+# CONFIG_I2C_TINY_USB is not set
+
+#
+# Graphics adapter I2C/DDC channel drivers
+#
 # CONFIG_I2C_VOODOO3 is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
 # CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_STUB is not set
 
 #
 # Miscellaneous I2C Chip support
 #
 # CONFIG_DS1682 is not set
+# CONFIG_AT24 is not set
 # CONFIG_SENSORS_EEPROM is not set
 # CONFIG_SENSORS_PCF8574 is not set
 # CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
 # CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_MAX6875 is not set
 # CONFIG_SENSORS_TSL2550 is not set
@@ -856,6 +884,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_W83627EHF is not set
 # CONFIG_HWMON_DEBUG_CHIP is not set
 # CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
 # CONFIG_WATCHDOG is not set
 
 #
@@ -985,15 +1014,7 @@ CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_VGA16 is not set
 # CONFIG_LOGO_LINUX_CLUT224 is not set
 CONFIG_LOGO_SUN_CLUT224=y
-
-#
-# Sound
-#
 CONFIG_SOUND=m
-
-#
-# Advanced Linux Sound Architecture
-#
 CONFIG_SND=m
 CONFIG_SND_TIMER=m
 CONFIG_SND_PCM=m
@@ -1010,21 +1031,17 @@ CONFIG_SND_SUPPORT_OLD_API=y
 CONFIG_SND_VERBOSE_PROCFS=y
 # CONFIG_SND_VERBOSE_PRINTK is not set
 # CONFIG_SND_DEBUG is not set
-
-#
-# Generic devices
-#
+CONFIG_SND_VMASTER=y
 CONFIG_SND_MPU401_UART=m
 CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_DRIVERS=y
 CONFIG_SND_DUMMY=m
 CONFIG_SND_VIRMIDI=m
 CONFIG_SND_MTPAV=m
 # CONFIG_SND_SERIAL_U16550 is not set
 # CONFIG_SND_MPU401 is not set
-
-#
-# PCI devices
-#
+# CONFIG_SND_AC97_POWER_SAVE is not set
+CONFIG_SND_PCI=y
 # CONFIG_SND_AD1889 is not set
 # CONFIG_SND_ALS300 is not set
 CONFIG_SND_ALI5451=m
@@ -1084,37 +1101,14 @@ CONFIG_SND_ALI5451=m
 # CONFIG_SND_VIRTUOSO is not set
 # CONFIG_SND_VX222 is not set
 # CONFIG_SND_YMFPCI is not set
-# CONFIG_SND_AC97_POWER_SAVE is not set
-
-#
-# USB devices
-#
+CONFIG_SND_USB=y
 # CONFIG_SND_USB_AUDIO is not set
 # CONFIG_SND_USB_CAIAQ is not set
-
-#
-# ALSA Sparc devices
-#
+CONFIG_SND_SPARC=y
 # CONFIG_SND_SUN_AMD7930 is not set
 CONFIG_SND_SUN_CS4231=m
 # CONFIG_SND_SUN_DBRI is not set
-
-#
-# System on Chip audio support
-#
 # CONFIG_SND_SOC is not set
-
-#
-# ALSA SoC audio for Freescale SOCs
-#
-
-#
-# SoC Audio for the Texas Instruments OMAP
-#
-
-#
-# Open Sound System
-#
 # CONFIG_SOUND_PRIME is not set
 CONFIG_AC97_BUS=m
 CONFIG_HID_SUPPORT=y
@@ -1167,6 +1161,7 @@ CONFIG_USB_UHCI_HCD=m
 #
 # CONFIG_USB_ACM is not set
 # CONFIG_USB_PRINTER is not set
+# CONFIG_USB_WDM is not set
 
 #
 # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
@@ -1226,6 +1221,7 @@ CONFIG_USB_STORAGE=m
 # CONFIG_USB_TRANCEVIBRATOR is not set
 # CONFIG_USB_IOWARRIOR is not set
 # CONFIG_USB_TEST is not set
+# CONFIG_USB_ISIGHTFW is not set
 # CONFIG_USB_GADGET is not set
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
@@ -1420,6 +1416,12 @@ CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_LKDTM is not set
 # CONFIG_FAULT_INJECTION is not set
+CONFIG_HAVE_FTRACE=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+# CONFIG_FTRACE is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1486,6 +1488,10 @@ CONFIG_CRYPTO_CRC32C=m
 CONFIG_CRYPTO_MD4=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_MICHAEL_MIC=m
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
 CONFIG_CRYPTO_SHA1=y
 CONFIG_CRYPTO_SHA256=m
 CONFIG_CRYPTO_SHA512=m
@@ -1527,6 +1533,7 @@ CONFIG_BITREVERSE=y
 # CONFIG_GENERIC_FIND_FIRST_BIT is not set
 CONFIG_CRC_CCITT=m
 CONFIG_CRC16=m
+# CONFIG_CRC_T10DIF is not set
 # CONFIG_CRC_ITU_T is not set
 CONFIG_CRC32=y
 # CONFIG_CRC7 is not set
-- 
cgit v1.2.3


From 29cbeb0e17d9d2ca824f62f71cfa7360b3157112 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Thu, 17 Jul 2008 21:50:23 -0700
Subject: x86: use cpu_clear in remove_cpu_from_maps

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/smpboot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 687376ab07e..27456574f07 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1311,7 +1311,7 @@ static void __ref remove_cpu_from_maps(int cpu)
 	cpu_clear(cpu, cpu_callout_map);
 	cpu_clear(cpu, cpu_callin_map);
 	/* was set by cpu_init() */
-	clear_bit(cpu, (unsigned long *)&cpu_initialized);
+	cpu_clear(cpu, cpu_initialized);
 	numa_remove_cpu(cpu);
 }
 
-- 
cgit v1.2.3


From 95c7c23b06bc92f1772b9c9460845f179ba8c39e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 15 Jul 2008 13:42:34 -0700
Subject: xen: report hypervisor version

Various versions of the hypervisor have differences in what ABIs and
features they support.  Print some details into the boot log to help
with remote debugging.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bb508456ef5..5328e46d9cf 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -167,10 +167,14 @@ void xen_vcpu_restore(void)
 
 static void __init xen_banner(void)
 {
+	unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
+	struct xen_extraversion extra;
+	HYPERVISOR_xen_version(XENVER_extraversion, &extra);
+
 	printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
 	       pv_info.name);
-	printk(KERN_INFO "Hypervisor signature: %s%s\n",
-	       xen_start_info->magic,
+	printk(KERN_INFO "Xen version: %d.%d%s%s\n",
+	       version >> 16, version & 0xffff, extra.extraversion,
 	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
 }
 
-- 
cgit v1.2.3


From fbdb7da91b0382d4b148d8b43c2eb4bab642bb5b Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 14 Jul 2008 15:34:09 -0700
Subject: x86_64: ia32_signal.c: use macro instead of immediate

Make and use macro FIX_EFLAGS, instead of immediate value 0x40DD5 in
ia32_restore_sigcontext().

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index cb3856a18c8..dc9b9b9803f 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -36,6 +36,11 @@
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
+#define FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | \
+			 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
+			 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
+			 X86_EFLAGS_CF)
+
 asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
 
@@ -248,7 +253,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 	regs->ss |= 3;
 
 	err |= __get_user(tmpflags, &sc->flags);
-	regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5);
+	regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
 	/* disable syscall checks */
 	regs->orig_ax = -1;
 
-- 
cgit v1.2.3


From 1f067167a83d1c7f80437fd1d32b55508aaca009 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 15 Jul 2008 00:02:28 -0700
Subject: x86: seperate memtest from init_64.c

it's separate functionality that deserves its own file.

This also prepares 32-bit memtest support.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/Makefile  |   1 +
 arch/x86/mm/init_64.c | 112 ---------------------------------------------
 arch/x86/mm/memtest.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 124 insertions(+), 112 deletions(-)
 create mode 100644 arch/x86/mm/memtest.c

(limited to 'arch')

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 9873716e9f7..1fbb844c3d7 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_K8_NUMA)		+= k8topology_64.o
 endif
 obj-$(CONFIG_ACPI_NUMA)		+= srat_$(BITS).o
 
+obj-$(CONFIG_MEMTEST)		+= memtest.o
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 306049edd55..ec37121f670 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -517,118 +517,6 @@ static void __init init_gbpages(void)
 		direct_gbpages = 0;
 }
 
-#ifdef CONFIG_MEMTEST
-
-static void __init memtest(unsigned long start_phys, unsigned long size,
-				 unsigned pattern)
-{
-	unsigned long i;
-	unsigned long *start;
-	unsigned long start_bad;
-	unsigned long last_bad;
-	unsigned long val;
-	unsigned long start_phys_aligned;
-	unsigned long count;
-	unsigned long incr;
-
-	switch (pattern) {
-	case 0:
-		val = 0UL;
-		break;
-	case 1:
-		val = -1UL;
-		break;
-	case 2:
-		val = 0x5555555555555555UL;
-		break;
-	case 3:
-		val = 0xaaaaaaaaaaaaaaaaUL;
-		break;
-	default:
-		return;
-	}
-
-	incr = sizeof(unsigned long);
-	start_phys_aligned = ALIGN(start_phys, incr);
-	count = (size - (start_phys_aligned - start_phys))/incr;
-	start = __va(start_phys_aligned);
-	start_bad = 0;
-	last_bad = 0;
-
-	for (i = 0; i < count; i++)
-		start[i] = val;
-	for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
-		if (*start != val) {
-			if (start_phys_aligned == last_bad + incr) {
-				last_bad += incr;
-			} else {
-				if (start_bad) {
-					printk(KERN_CONT "\n  %016lx bad mem addr %016lx - %016lx reserved",
-						val, start_bad, last_bad + incr);
-					reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
-				}
-				start_bad = last_bad = start_phys_aligned;
-			}
-		}
-	}
-	if (start_bad) {
-		printk(KERN_CONT "\n  %016lx bad mem addr %016lx - %016lx reserved",
-			val, start_bad, last_bad + incr);
-		reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
-	}
-
-}
-
-/* default is disabled */
-static int memtest_pattern __initdata;
-
-static int __init parse_memtest(char *arg)
-{
-	if (arg)
-		memtest_pattern = simple_strtoul(arg, NULL, 0);
-	return 0;
-}
-
-early_param("memtest", parse_memtest);
-
-static void __init early_memtest(unsigned long start, unsigned long end)
-{
-	u64 t_start, t_size;
-	unsigned pattern;
-
-	if (!memtest_pattern)
-		return;
-
-	printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
-	for (pattern = 0; pattern < memtest_pattern; pattern++) {
-		t_start = start;
-		t_size = 0;
-		while (t_start < end) {
-			t_start = find_e820_area_size(t_start, &t_size, 1);
-
-			/* done ? */
-			if (t_start >= end)
-				break;
-			if (t_start + t_size > end)
-				t_size = end - t_start;
-
-			printk(KERN_CONT "\n  %016llx - %016llx pattern %d",
-				(unsigned long long)t_start,
-				(unsigned long long)t_start + t_size, pattern);
-
-			memtest(t_start, t_size, pattern);
-
-			t_start += t_size;
-		}
-	}
-	printk(KERN_CONT "\n");
-}
-#else
-static void __init early_memtest(unsigned long start, unsigned long end)
-{
-}
-#endif
-
 static unsigned long __init kernel_physical_mapping_init(unsigned long start,
 						unsigned long end,
 						unsigned long page_size_mask)
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
new file mode 100644
index 00000000000..672e17f8262
--- /dev/null
+++ b/arch/x86/mm/memtest.c
@@ -0,0 +1,123 @@
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/pfn.h>
+
+#include <asm/e820.h>
+
+static void __init memtest(unsigned long start_phys, unsigned long size,
+				 unsigned pattern)
+{
+	unsigned long i;
+	unsigned long *start;
+	unsigned long start_bad;
+	unsigned long last_bad;
+	unsigned long val;
+	unsigned long start_phys_aligned;
+	unsigned long count;
+	unsigned long incr;
+
+	switch (pattern) {
+	case 0:
+		val = 0UL;
+		break;
+	case 1:
+		val = -1UL;
+		break;
+	case 2:
+#ifdef CONFIG_X86_64
+		val = 0x5555555555555555UL;
+#else
+		val = 0x55555555UL;
+#endif
+		break;
+	case 3:
+#ifdef CONFIG_X86_64
+		val = 0xaaaaaaaaaaaaaaaaUL;
+#else
+		val = 0xaaaaaaaaUL;
+#endif
+		break;
+	default:
+		return;
+	}
+
+	incr = sizeof(unsigned long);
+	start_phys_aligned = ALIGN(start_phys, incr);
+	count = (size - (start_phys_aligned - start_phys))/incr;
+	start = __va(start_phys_aligned);
+	start_bad = 0;
+	last_bad = 0;
+
+	for (i = 0; i < count; i++)
+		start[i] = val;
+	for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
+		if (*start != val) {
+			if (start_phys_aligned == last_bad + incr) {
+				last_bad += incr;
+			} else {
+				if (start_bad) {
+					printk(KERN_CONT "\n  %010lx bad mem addr %010lx - %010lx reserved",
+						val, start_bad, last_bad + incr);
+					reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
+				}
+				start_bad = last_bad = start_phys_aligned;
+			}
+		}
+	}
+	if (start_bad) {
+		printk(KERN_CONT "\n  %016lx bad mem addr %010lx - %010lx reserved",
+			val, start_bad, last_bad + incr);
+		reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
+	}
+
+}
+
+/* default is disabled */
+static int memtest_pattern __initdata;
+
+static int __init parse_memtest(char *arg)
+{
+	if (arg)
+		memtest_pattern = simple_strtoul(arg, NULL, 0);
+	return 0;
+}
+
+early_param("memtest", parse_memtest);
+
+void __init early_memtest(unsigned long start, unsigned long end)
+{
+	u64 t_start, t_size;
+	unsigned pattern;
+
+	if (!memtest_pattern)
+		return;
+
+	printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
+	for (pattern = 0; pattern < memtest_pattern; pattern++) {
+		t_start = start;
+		t_size = 0;
+		while (t_start < end) {
+			t_start = find_e820_area_size(t_start, &t_size, 1);
+
+			/* done ? */
+			if (t_start >= end)
+				break;
+			if (t_start + t_size > end)
+				t_size = end - t_start;
+
+			printk(KERN_CONT "\n  %010llx - %010llx pattern %d",
+				(unsigned long long)t_start,
+				(unsigned long long)t_start + t_size, pattern);
+
+			memtest(t_start, t_size, pattern);
+
+			t_start += t_size;
+		}
+	}
+	printk(KERN_CONT "\n");
+}
-- 
cgit v1.2.3


From caadbdce240c43e3e46c82fce6c00eb7f01e1beb Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 15 Jul 2008 00:03:44 -0700
Subject: x86: enable memory tester support on 32-bit

only supports memory below max_low_pfn.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig      | 1 -
 arch/x86/mm/init_32.c | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 96e0c2ebc38..03980cb0429 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -447,7 +447,6 @@ config PARAVIRT_DEBUG
 
 config MEMTEST
 	bool "Memtest"
-	depends on X86_64
 	help
 	  This option adds a kernel parameter 'memtest', which allows memtest
 	  to be set.
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9689a5138e6..3eeab6d0065 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -844,6 +844,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
 		reserve_early(table_start << PAGE_SHIFT,
 				 table_end << PAGE_SHIFT, "PGTABLE");
 
+	if (!after_init_bootmem)
+		early_memtest(start, end);
+
 	return end >> PAGE_SHIFT;
 }
 
-- 
cgit v1.2.3


From 78cbac65fd77242f3e5d77f4d7a71e8bc869fe4d Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@fastmail.fm>
Date: Thu, 10 Jul 2008 21:14:52 +0200
Subject: x86: traps_xx: refactor die() like in x86_64

Make the diff between the traps_32.c and traps_64.c a bit smaller.

Change traps_32.c to look more like traps_64.c:
 - move lock information to file scope
 - split out oops_begin() and oops_end() from die()
 - increment nest counter in oops_begin

Only whitespace change in traps_64.c

No functional changes intended.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Acked-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/traps_32.c | 95 +++++++++++++++++++++++++---------------------
 arch/x86/kernel/traps_64.c |  2 +-
 2 files changed, 52 insertions(+), 45 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 8a768973c4f..51cccde376a 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -383,6 +383,54 @@ int is_valid_bugaddr(unsigned long ip)
 	return ud2 == 0x0b0f;
 }
 
+static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
+
+unsigned __kprobes long oops_begin(void)
+{
+	unsigned long flags;
+
+	oops_enter();
+
+	if (die_owner != raw_smp_processor_id()) {
+		console_verbose();
+		raw_local_irq_save(flags);
+		__raw_spin_lock(&die_lock);
+		die_owner = smp_processor_id();
+		die_nest_count = 0;
+		bust_spinlocks(1);
+	} else {
+		raw_local_irq_save(flags);
+	}
+	die_nest_count++;
+	return flags;
+}
+
+void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+{
+	bust_spinlocks(0);
+	die_owner = -1;
+	add_taint(TAINT_DIE);
+	__raw_spin_unlock(&die_lock);
+	raw_local_irq_restore(flags);
+
+	if (!regs)
+		return;
+
+	if (kexec_should_crash(current))
+		crash_kexec(regs);
+
+	if (in_interrupt())
+		panic("Fatal exception in interrupt");
+
+	if (panic_on_oops)
+		panic("Fatal exception");
+
+	oops_exit();
+	do_exit(signr);
+}
+
 int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 {
 	unsigned short ss;
@@ -423,31 +471,9 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
  */
 void die(const char *str, struct pt_regs *regs, long err)
 {
-	static struct {
-		raw_spinlock_t lock;
-		u32 lock_owner;
-		int lock_owner_depth;
-	} die = {
-		.lock =			__RAW_SPIN_LOCK_UNLOCKED,
-		.lock_owner =		-1,
-		.lock_owner_depth =	0
-	};
-	unsigned long flags;
-
-	oops_enter();
-
-	if (die.lock_owner != raw_smp_processor_id()) {
-		console_verbose();
-		raw_local_irq_save(flags);
-		__raw_spin_lock(&die.lock);
-		die.lock_owner = smp_processor_id();
-		die.lock_owner_depth = 0;
-		bust_spinlocks(1);
-	} else {
-		raw_local_irq_save(flags);
-	}
+	unsigned long flags = oops_begin();
 
-	if (++die.lock_owner_depth < 3) {
+	if (die_nest_count < 3) {
 		report_bug(regs->ip, regs);
 
 		if (__die(str, regs, err))
@@ -456,26 +482,7 @@ void die(const char *str, struct pt_regs *regs, long err)
 		printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
 	}
 
-	bust_spinlocks(0);
-	die.lock_owner = -1;
-	add_taint(TAINT_DIE);
-	__raw_spin_unlock(&die.lock);
-	raw_local_irq_restore(flags);
-
-	if (!regs)
-		return;
-
-	if (kexec_should_crash(current))
-		crash_kexec(regs);
-
-	if (in_interrupt())
-		panic("Fatal exception in interrupt");
-
-	if (panic_on_oops)
-		panic("Fatal exception");
-
-	oops_exit();
-	do_exit(SIGSEGV);
+	oops_end(flags, regs, SIGSEGV);
 }
 
 static inline void
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 2696a683778..babdbe673b7 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -518,7 +518,7 @@ unsigned __kprobes long oops_begin(void)
 }
 
 void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
-{ 
+{
 	die_owner = -1;
 	bust_spinlocks(0);
 	die_nest_count--;
-- 
cgit v1.2.3


From 7dedcee394a3f61475d08002bd12e8068d044216 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Thu, 10 Jul 2008 21:16:39 +0200
Subject: x86: traps_xx: modify x86_64 to use _log_lvl variants

i386 has show_trace_log_lvl and show_stack_log_lvl, allowing
traces to be emitted with log-level annotations. This patch
introduces them to x86_64, but log_lvl is only ever set to
an empty string. Output of traces is unchanged.

i386-chunk is whitespace-only.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/traps_32.c |  2 +-
 arch/x86/kernel/traps_64.c | 24 ++++++++++++++++--------
 2 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 51cccde376a..c971dce3847 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -256,7 +256,7 @@ static const struct stacktrace_ops print_trace_ops = {
 
 static void
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		   unsigned long *stack, unsigned long bp, char *log_lvl)
+		unsigned long *stack, unsigned long bp, char *log_lvl)
 {
 	dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
 	printk("%s =======================\n", log_lvl);
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index babdbe673b7..c664e696200 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -355,17 +355,24 @@ static const struct stacktrace_ops print_trace_ops = {
 	.address = print_trace_address,
 };
 
-void show_trace(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp)
+static void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+		unsigned long *stack, unsigned long bp, char *log_lvl)
 {
 	printk("\nCall Trace:\n");
-	dump_trace(task, regs, stack, bp, &print_trace_ops, NULL);
+	dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
 	printk("\n");
 }
 
+void show_trace(struct task_struct *task, struct pt_regs *regs,
+		unsigned long *stack, unsigned long bp)
+{
+	show_trace_log_lvl(task, regs, stack, bp, "");
+}
+
 static void
-_show_stack(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *sp, unsigned long bp)
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+		unsigned long *sp, unsigned long bp, char *log_lvl)
 {
 	unsigned long *stack;
 	int i;
@@ -399,12 +406,12 @@ _show_stack(struct task_struct *task, struct pt_regs *regs,
 		printk(" %016lx", *stack++);
 		touch_nmi_watchdog();
 	}
-	show_trace(task, regs, sp, bp);
+	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
 }
 
 void show_stack(struct task_struct *task, unsigned long *sp)
 {
-	_show_stack(task, NULL, sp, 0);
+	show_stack_log_lvl(task, NULL, sp, 0, "");
 }
 
 /*
@@ -454,7 +461,8 @@ void show_registers(struct pt_regs *regs)
 		u8 *ip;
 
 		printk("Stack: ");
-		_show_stack(NULL, regs, (unsigned long *)sp, regs->bp);
+		show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
+				regs->bp, "");
 		printk("\n");
 
 		printk(KERN_EMERG "Code: ");
-- 
cgit v1.2.3


From 3f9b5cc018566ad9562df0648395649aebdbc5e0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 18 Jul 2008 16:30:05 +0200
Subject: x86: re-enable OPTIMIZE_INLINING

re-enable OPTIMIZE_INLINING more widely. Jeff Dike fixed the remaining
outstanding issue in this commit:

| commit 4f81c5350b44bcc501ab6f8a089b16d064b4d2f6
| Author: Jeff Dike <jdike@addtoit.com>
| Date:   Mon Jul 7 13:36:56 2008 -0400
|
|     [UML] fix gcc ICEs and unresolved externs
[...]
|    This patch reintroduces unit-at-a-time for gcc >= 4.0, bringing back the
|    possibility of Uli's crash.  If that happens, we'll debug it.

it's still default-off and thus opt-in.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.debug | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index ae36bfa814e..ffd5913b35d 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -287,7 +287,6 @@ config CPA_DEBUG
 
 config OPTIMIZE_INLINING
 	bool "Allow gcc to uninline functions marked 'inline'"
-	depends on BROKEN
 	help
 	  This option determines if the kernel forces gcc to inline the functions
 	  developers have marked 'inline'. Doing so takes away freedom from gcc to
@@ -298,5 +297,7 @@ config OPTIMIZE_INLINING
 	  become the default in the future, until then this option is there to
 	  test gcc for this.
 
+	  If unsure, say N.
+
 endmenu
 
-- 
cgit v1.2.3


From 8b2b9c1af065a45ef00c26964420489a53581779 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 15 Jul 2008 17:09:03 +0900
Subject: x86, intel_cacheinfo: fix use-after-free cache_kobject

This avoids calling kobject_uevent() with cache_kobject that has
already been deallocated in an error path.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/intel_cacheinfo.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 2c8afafa18e..ff517f0b8cc 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -780,15 +780,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 			}
 			kobject_put(per_cpu(cache_kobject, cpu));
 			cpuid4_cache_sysfs_exit(cpu);
-			break;
+			return retval;
 		}
 		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
 	}
-	if (!retval)
-		cpu_set(cpu, cache_dev_map);
+	cpu_set(cpu, cache_dev_map);
 
 	kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
-	return retval;
+	return 0;
 }
 
 static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
-- 
cgit v1.2.3


From 47129654226b5bd418afe533ce4e11d6a0b6d6e4 Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Sun, 6 Jul 2008 20:13:49 +0400
Subject: x86 setup.c: cleanup includes

x86: remove double includes in setup.c

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Cc: yhlu.kernel@gmail.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 531b55b8e81..4a2b8acc1d9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -57,12 +57,8 @@
 #include <linux/slab.h>
 #include <linux/user.h>
 #include <linux/delay.h>
-#include <linux/highmem.h>
 
 #include <linux/kallsyms.h>
-#include <linux/edd.h>
-#include <linux/iscsi_ibft.h>
-#include <linux/kexec.h>
 #include <linux/cpufreq.h>
 #include <linux/dma-mapping.h>
 #include <linux/ctype.h>
@@ -104,7 +100,6 @@
 #include <asm/paravirt.h>
 
 #include <asm/percpu.h>
-#include <asm/sections.h>
 #include <asm/topology.h>
 #include <asm/apicdef.h>
 #ifdef CONFIG_X86_64
-- 
cgit v1.2.3


From 9781f39fd209cd93ab98b669814191acc67f32fd Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Thu, 10 Jul 2008 17:13:19 +0200
Subject: x86: consolidate the definition of the force_mwait variable

The force_mwait variable iss defined either in
arch/x86/kernel/cpu/amd.c or in arch/x86/kernel/setup_64.c, but it is
only initialized and used in arch/x86/kernel/process.c. This patch
moves the declaration to arch/x86/kernel/process.c.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: michael@free-electrons.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c | 2 --
 arch/x86/kernel/process.c | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 81a07ca65d4..cae9cabc303 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -24,8 +24,6 @@
 extern void vide(void);
 __asm__(".align 4\nvide: ret");
 
-int force_mwait __cpuinitdata;
-
 static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 {
 	if (cpuid_eax(0x80000000) >= 0x80000007) {
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4d629c62f4f..74f2d196adb 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -15,6 +15,7 @@ unsigned long idle_nomwait;
 EXPORT_SYMBOL(idle_nomwait);
 
 struct kmem_cache *task_xstate_cachep;
+static int force_mwait __cpuinitdata;
 
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
-- 
cgit v1.2.3


From 5ff4789d045cdaec7629e027e4f8ff8e34308b81 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 14 Jul 2008 20:11:18 +0200
Subject: AMD IOMMU: set iommu for device from ACPI code too

The device<->iommu relationship has to be set from the information in the ACPI
table too. This patch adds this logic to the driver.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu_init.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 9bf1b8111b0..7661b02d720 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -426,11 +426,18 @@ static void set_dev_entry_bit(u16 devid, u8 bit)
 	amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
 }
 
+/* Writes the specific IOMMU for a device into the rlookup table */
+static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
+{
+	amd_iommu_rlookup_table[devid] = iommu;
+}
+
 /*
  * This function takes the device specific flags read from the ACPI
  * table and sets up the device table entry with that information
  */
-static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
+static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
+					   u16 devid, u32 flags, u32 ext_flags)
 {
 	if (flags & ACPI_DEVFLAG_INITPASS)
 		set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
@@ -446,12 +453,8 @@ static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
 		set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
 	if (flags & ACPI_DEVFLAG_LINT1)
 		set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
-}
 
-/* Writes the specific IOMMU for a device into the rlookup table */
-static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
-{
-	amd_iommu_rlookup_table[devid] = iommu;
+	set_iommu_for_device(iommu, devid);
 }
 
 /*
@@ -550,11 +553,12 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 		case IVHD_DEV_ALL:
 			for (dev_i = iommu->first_device;
 					dev_i <= iommu->last_device; ++dev_i)
-				set_dev_entry_from_acpi(dev_i, e->flags, 0);
+				set_dev_entry_from_acpi(iommu, dev_i,
+							e->flags, 0);
 			break;
 		case IVHD_DEV_SELECT:
 			devid = e->devid;
-			set_dev_entry_from_acpi(devid, e->flags, 0);
+			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
 			break;
 		case IVHD_DEV_SELECT_RANGE_START:
 			devid_start = e->devid;
@@ -565,7 +569,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 		case IVHD_DEV_ALIAS:
 			devid = e->devid;
 			devid_to = e->ext >> 8;
-			set_dev_entry_from_acpi(devid, e->flags, 0);
+			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
 			amd_iommu_alias_table[devid] = devid_to;
 			break;
 		case IVHD_DEV_ALIAS_RANGE:
@@ -577,7 +581,8 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 			break;
 		case IVHD_DEV_EXT_SELECT:
 			devid = e->devid;
-			set_dev_entry_from_acpi(devid, e->flags, e->ext);
+			set_dev_entry_from_acpi(iommu, devid, e->flags,
+						e->ext);
 			break;
 		case IVHD_DEV_EXT_SELECT_RANGE:
 			devid_start = e->devid;
@@ -590,7 +595,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 			for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
 				if (alias)
 					amd_iommu_alias_table[dev_i] = devid_to;
-				set_dev_entry_from_acpi(
+				set_dev_entry_from_acpi(iommu,
 						amd_iommu_alias_table[dev_i],
 						flags, ext_flags);
 			}
-- 
cgit v1.2.3


From 6ac8d51f01d345af5ea4209004a9ea29b2f20891 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Tue, 15 Jul 2008 21:09:13 +0530
Subject: x86: introducing asm-x86/traps.h

Declaring x86 traps under one hood.
Declaring x86 do_traps before defining them.

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/traps_32.c | 21 +--------------------
 arch/x86/kernel/traps_64.c | 22 +---------------------
 2 files changed, 2 insertions(+), 41 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index c971dce3847..03df8e45e5a 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -58,6 +58,7 @@
 #include <asm/nmi.h>
 #include <asm/smp.h>
 #include <asm/io.h>
+#include <asm/traps.h>
 
 #include "mach_traps.h"
 
@@ -77,26 +78,6 @@ char ignore_fpu_irq;
 gate_desc idt_table[256]
 	__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
 
-asmlinkage void divide_error(void);
-asmlinkage void debug(void);
-asmlinkage void nmi(void);
-asmlinkage void int3(void);
-asmlinkage void overflow(void);
-asmlinkage void bounds(void);
-asmlinkage void invalid_op(void);
-asmlinkage void device_not_available(void);
-asmlinkage void coprocessor_segment_overrun(void);
-asmlinkage void invalid_TSS(void);
-asmlinkage void segment_not_present(void);
-asmlinkage void stack_segment(void);
-asmlinkage void general_protection(void);
-asmlinkage void page_fault(void);
-asmlinkage void coprocessor_error(void);
-asmlinkage void simd_coprocessor_error(void);
-asmlinkage void alignment_check(void);
-asmlinkage void spurious_interrupt_bug(void);
-asmlinkage void machine_check(void);
-
 int panic_on_unrecovered_nmi;
 int kstack_depth_to_print = 24;
 static unsigned int code_bytes = 64;
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index c664e696200..3f18d73f420 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -51,30 +51,10 @@
 #include <asm/pgalloc.h>
 #include <asm/proto.h>
 #include <asm/pda.h>
+#include <asm/traps.h>
 
 #include <mach_traps.h>
 
-asmlinkage void divide_error(void);
-asmlinkage void debug(void);
-asmlinkage void nmi(void);
-asmlinkage void int3(void);
-asmlinkage void overflow(void);
-asmlinkage void bounds(void);
-asmlinkage void invalid_op(void);
-asmlinkage void device_not_available(void);
-asmlinkage void double_fault(void);
-asmlinkage void coprocessor_segment_overrun(void);
-asmlinkage void invalid_TSS(void);
-asmlinkage void segment_not_present(void);
-asmlinkage void stack_segment(void);
-asmlinkage void general_protection(void);
-asmlinkage void page_fault(void);
-asmlinkage void coprocessor_error(void);
-asmlinkage void simd_coprocessor_error(void);
-asmlinkage void alignment_check(void);
-asmlinkage void spurious_interrupt_bug(void);
-asmlinkage void machine_check(void);
-
 int panic_on_unrecovered_nmi;
 int kstack_depth_to_print = 12;
 static unsigned int code_bytes = 64;
-- 
cgit v1.2.3


From 1181f8b5f0302580af0958169ef4497c3eb57a61 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 3 Jul 2008 13:12:13 -0700
Subject: x86_32: remove redundant KERN_INFO

This printk has a KERN_ facility level in the format string.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/signal_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index d9237363096..d633d801f85 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -212,7 +212,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
 
 badframe:
 	if (show_unhandled_signals && printk_ratelimit()) {
-		printk(KERN_INFO "%s%s[%d] bad frame in sigreturn frame:"
+		printk("%s%s[%d] bad frame in sigreturn frame:"
 			"%p ip:%lx sp:%lx oeax:%lx",
 		    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
 		    current->comm, task_pid_nr(current), frame, regs->ip,
-- 
cgit v1.2.3


From fa10c51a04a43ced5fd6033f19a74d2c82198b34 Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Wed, 9 Jul 2008 22:28:24 +0400
Subject: arch/x86/kernel/cpu/common_64.c: remove double inclusions

x86: remove double inclusions in arch/x86/kernel/cpu/common_64.c

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Cc: yhlu.kernel@gmail.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common_64.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 7b8cc72feb4..2a4475beea4 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -7,12 +7,9 @@
 #include <linux/module.h>
 #include <linux/kgdb.h>
 #include <linux/topology.h>
-#include <linux/string.h>
 #include <linux/delay.h>
 #include <linux/smp.h>
-#include <linux/module.h>
 #include <linux/percpu.h>
-#include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/msr.h>
 #include <asm/io.h>
-- 
cgit v1.2.3


From 812b121d55316333a3480b294523d4e52f9dd366 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 16 Jul 2008 19:21:31 -0700
Subject: x86_64: ia32_signal.c: remove signal number conversion

This was old code that was needed for iBCS and x86-64 never supported that.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index dc9b9b9803f..20af4c79579 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -520,7 +520,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 			compat_sigset_t *set, struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
-	struct exec_domain *ed = current_thread_info()->exec_domain;
 	void __user *restorer;
 	int err = 0;
 
@@ -543,8 +542,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto give_sigsegv;
 
-	err |= __put_user((ed && ed->signal_invmap && sig < 32
-			   ? ed->signal_invmap[sig] : sig), &frame->sig);
+	err |= __put_user(sig, &frame->sig);
 	err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
 	err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
 	err |= copy_siginfo_to_user32(&frame->info, info);
-- 
cgit v1.2.3


From f2ba93929fdb91fd806be20e959a50f7db82790e Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 18 Jul 2008 13:35:37 +0100
Subject: x86: check function status in EDD boot code

Without checking the return value of get_edd_info() and adding the
entry only in the success case, 6 devices show up under
/sys/firmware/edd/, no matter how many devices are actually present.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/boot/edd.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index 03399d64013..d93cbc6464d 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -167,9 +167,8 @@ void query_edd(void)
 		 * Scan the BIOS-supported hard disks and query EDD
 		 * information...
 		 */
-		get_edd_info(devno, &ei);
-
-		if (boot_params.eddbuf_entries < EDDMAXNR) {
+		if (!get_edd_info(devno, &ei)
+		    && boot_params.eddbuf_entries < EDDMAXNR) {
 			memcpy(edp, &ei, sizeof ei);
 			edp++;
 			boot_params.eddbuf_entries++;
-- 
cgit v1.2.3


From 369c99205f633d1e4038b15f5dc4a5500a4359c3 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 18 Jul 2008 13:37:53 +0100
Subject: x86: fix two modpost warnings

Even though it's only the difference of the two __initdata symbols
that's being calculated, modpost still doesn't like this. So rather
calculate the size once in an __init function and store it for later
use.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/vdso/vma.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 19a6cfaf5db..257ba4a10ab 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -21,7 +21,8 @@ unsigned int __read_mostly vdso_enabled = 1;
 extern char vdso_start[], vdso_end[];
 extern unsigned short vdso_sync_cpuid;
 
-struct page **vdso_pages;
+static struct page **vdso_pages;
+static unsigned vdso_size;
 
 static inline void *var_ref(void *p, char *name)
 {
@@ -38,6 +39,7 @@ static int __init init_vdso_vars(void)
 	int i;
 	char *vbase;
 
+	vdso_size = npages << PAGE_SHIFT;
 	vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
 	if (!vdso_pages)
 		goto oom;
@@ -101,20 +103,19 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
 	struct mm_struct *mm = current->mm;
 	unsigned long addr;
 	int ret;
-	unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE);
 
 	if (!vdso_enabled)
 		return 0;
 
 	down_write(&mm->mmap_sem);
-	addr = vdso_addr(mm->start_stack, len);
-	addr = get_unmapped_area(NULL, addr, len, 0, 0);
+	addr = vdso_addr(mm->start_stack, vdso_size);
+	addr = get_unmapped_area(NULL, addr, vdso_size, 0, 0);
 	if (IS_ERR_VALUE(addr)) {
 		ret = addr;
 		goto up_fail;
 	}
 
-	ret = install_special_mapping(mm, addr, len,
+	ret = install_special_mapping(mm, addr, vdso_size,
 				      VM_READ|VM_EXEC|
 				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
 				      VM_ALWAYSDUMP,
-- 
cgit v1.2.3


From 08e1a13e7d14ba5d6a22bf4b8c6e11128d3bcdfe Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 18 Jul 2008 13:44:16 +0100
Subject: x86: reduce forbid_dac's visibility

It's not used anywhere outside its declaring file.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/pci-dma.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 8467ec2320f..702714bd151 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -9,8 +9,7 @@
 #include <asm/calgary.h>
 #include <asm/amd_iommu.h>
 
-int forbid_dac __read_mostly;
-EXPORT_SYMBOL(forbid_dac);
+static int forbid_dac __read_mostly;
 
 const struct dma_mapping_ops *dma_ops;
 EXPORT_SYMBOL(dma_ops);
-- 
cgit v1.2.3


From 08ad8afaa0f7343e9c64eec5dbbb178e390e03a2 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 18 Jul 2008 13:45:20 +0100
Subject: x86: reduce force_mwait visibility

It's not used anywhere outside its single referencing file.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/amd.c | 2 --
 arch/x86/kernel/process.c | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 81a07ca65d4..cae9cabc303 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -24,8 +24,6 @@
 extern void vide(void);
 __asm__(".align 4\nvide: ret");
 
-int force_mwait __cpuinitdata;
-
 static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 {
 	if (cpuid_eax(0x80000000) >= 0x80000007) {
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 158bd6a16f6..9f94bb1c811 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -199,6 +199,7 @@ static void poll_idle(void)
  *
  * idle=mwait overrides this decision and forces the usage of mwait.
  */
+static int __cpuinitdata force_mwait;
 
 #define MWAIT_INFO			0x05
 #define MWAIT_ECX_EXTENDED_INFO		0x01
-- 
cgit v1.2.3


From 2ddf9b7b3e6660199269e34cfa27148440ddc3bf Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 18 Jul 2008 13:32:23 +0100
Subject: i386/xen: add proper unwind annotations to xen_sysenter_target

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/entry_32.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 6bc07f0f120..ad5264c29e9 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1024,6 +1024,7 @@ ENDPROC(kernel_thread_helper)
 ENTRY(xen_sysenter_target)
 	RING0_INT_FRAME
 	addl $5*4, %esp		/* remove xen-provided frame */
+	CFI_ADJUST_CFA_OFFSET -5*4
 	jmp sysenter_past_esp
 	CFI_ENDPROC
 
-- 
cgit v1.2.3


From ae79cdaacb5599781f8bb49f4bdd5723029669cf Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 18 Jul 2008 16:08:13 -0700
Subject: x86: Add a arch directory for x86 under debugfs

Add a directory for x86 arch under debugfs. Can be used to accumulate all
x86 specific debugfs files.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/kdebugfs.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index c0320599171..f2d43bc7551 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -12,9 +12,13 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 
 #include <asm/setup.h>
 
+struct dentry *arch_debugfs_dir;
+EXPORT_SYMBOL(arch_debugfs_dir);
+
 #ifdef CONFIG_DEBUG_BOOT_PARAMS
 struct setup_data_node {
 	u64 paddr;
@@ -209,6 +213,10 @@ static int __init arch_kdebugfs_init(void)
 {
 	int error = 0;
 
+	arch_debugfs_dir = debugfs_create_dir("x86", NULL);
+	if (!arch_debugfs_dir)
+		return -ENOMEM;
+
 #ifdef CONFIG_DEBUG_BOOT_PARAMS
 	error = boot_params_kdebugfs_init();
 #endif
-- 
cgit v1.2.3


From fec0962e0bed407927b9ff54bb0596a3ab7e4b61 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 18 Jul 2008 16:08:14 -0700
Subject: x86: Add a debugfs interface to dump PAT memtype

Add a debugfs interface to list out all the PAT memtype reservations.
Appears at debugfs x86/pat_memtype_list and output format is
type @ <start addr>-<end addr>

We do not hold the lock while printing the entire list. So, the list may not be
a consistent copy in case where regions are getting added or deleted
at the same time.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/pat.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index d4585077977..0917a540a55 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -12,6 +12,8 @@
 #include <linux/gfp.h>
 #include <linux/fs.h>
 #include <linux/bootmem.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
 #include <asm/msr.h>
 #include <asm/tlbflush.h>
@@ -489,3 +491,89 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
 
 	free_memtype(addr, addr + size);
 }
+
+#if defined(CONFIG_DEBUG_FS)
+
+/* get Nth element of the linked list */
+static struct memtype *memtype_get_idx(loff_t pos)
+{
+	struct memtype *list_node, *print_entry;
+	int i = 1;
+
+	print_entry  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
+	if (!print_entry)
+		return NULL;
+
+	spin_lock(&memtype_lock);
+	list_for_each_entry(list_node, &memtype_list, nd) {
+		if (pos == i) {
+			*print_entry = *list_node;
+			spin_unlock(&memtype_lock);
+			return print_entry;
+		}
+		++i;
+	}
+	spin_unlock(&memtype_lock);
+	kfree(print_entry);
+	return NULL;
+}
+
+static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	if (*pos == 0) {
+		++*pos;
+		seq_printf(seq, "PAT memtype list:\n");
+	}
+
+	return memtype_get_idx(*pos);
+}
+
+static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return memtype_get_idx(*pos);
+}
+
+static void memtype_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int memtype_seq_show(struct seq_file *seq, void *v)
+{
+	struct memtype *print_entry = (struct memtype *)v;
+
+	seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
+			print_entry->start, print_entry->end);
+	kfree(print_entry);
+	return 0;
+}
+
+static struct seq_operations memtype_seq_ops = {
+	.start = memtype_seq_start,
+	.next  = memtype_seq_next,
+	.stop  = memtype_seq_stop,
+	.show  = memtype_seq_show,
+};
+
+static int memtype_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &memtype_seq_ops);
+}
+
+static const struct file_operations memtype_fops = {
+	.open    = memtype_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+static int __init pat_memtype_list_init(void)
+{
+	debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
+				NULL, &memtype_fops);
+	return 0;
+}
+
+late_initcall(pat_memtype_list_init);
+
+#endif /* CONFIG_DEBUG_FS */
-- 
cgit v1.2.3


From e5849e71adcbb774ce40f09c1bcb48acca3b6da7 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Fri, 18 Jul 2008 17:28:40 -0700
Subject: x86: remove arch_get_ram_range

no user now

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/e820.c | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 28c29180b38..df1b32fa88d 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1367,24 +1367,3 @@ void __init setup_memory_map(void)
 	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
 	e820_print_map(who);
 }
-
-#ifdef CONFIG_X86_64
-int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
-{
-	int i;
-
-	if (slot < 0 || slot >= e820.nr_map)
-		return -1;
-	for (i = slot; i < e820.nr_map; i++) {
-		if (e820.map[i].type != E820_RAM)
-			continue;
-		break;
-	}
-	if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
-		return -1;
-	*addr = e820.map[i].addr;
-	*size = min_t(u64, e820.map[i].size + e820.map[i].addr,
-		max_pfn << PAGE_SHIFT) - *addr;
-	return i + 1;
-}
-#endif
-- 
cgit v1.2.3


From e4f25060b87a627f5cda84b8134911d43c919458 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 19 Jul 2008 00:44:32 -0700
Subject: sparc: Remove Sparc's asm-offsets for sclow.S

Remove Sparc's asm-offsets for sclow.S as the (E)UID/(E)GID size and
offset definitions will cease to be correct if COW credentials are
merged.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/asm-offsets.c | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c
index cd3f7694e9b..b5bb99ed892 100644
--- a/arch/sparc/kernel/asm-offsets.c
+++ b/arch/sparc/kernel/asm-offsets.c
@@ -18,18 +18,6 @@ int foo(void)
 {
 	DEFINE(AOFF_task_thread, offsetof(struct task_struct, thread));
 	BLANK();
-	/* XXX This is the stuff for sclow.S, kill it. */
-	DEFINE(AOFF_task_pid, offsetof(struct task_struct, pid));
-	DEFINE(AOFF_task_uid, offsetof(struct task_struct, uid));
-	DEFINE(AOFF_task_gid, offsetof(struct task_struct, gid));
-	DEFINE(AOFF_task_euid, offsetof(struct task_struct, euid));
-	DEFINE(AOFF_task_egid, offsetof(struct task_struct, egid));
-	/* DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); */
-	DEFINE(ASIZ_task_uid,	sizeof(current->uid));
-	DEFINE(ASIZ_task_gid,	sizeof(current->gid));
-	DEFINE(ASIZ_task_euid,	sizeof(current->euid));
-	DEFINE(ASIZ_task_egid,	sizeof(current->egid));
-	BLANK();
 	DEFINE(AOFF_thread_fork_kpsr,
 			offsetof(struct thread_struct, fork_kpsr));
 	BLANK();
-- 
cgit v1.2.3


From d092633bff3b19faffc480fe9810805e7792a029 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 18 Jul 2008 00:26:59 +0200
Subject: Subject: devmem, x86: fix rename of CONFIG_NONPROMISC_DEVMEM From:
 Arjan van de Ven <arjan@infradead.org> Date: Sat, 19 Jul 2008 15:47:17 -0700

CONFIG_NONPROMISC_DEVMEM was a rather confusing name - but renaming it
to CONFIG_PROMISC_DEVMEM causes problems on architectures that do not
support this feature; this patch renames it to CONFIG_STRICT_DEVMEM,
so that architectures can opt-in into it.

( the polarity of the option is still the same as it was originally; it
  needs to be for now to not break architectures that don't have the
  infastructure yet to support this feature)

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: "V.Radhakrishnan" <rk@atr-labs.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
---
 arch/x86/Kconfig.debug            | 9 +++++----
 arch/x86/configs/i386_defconfig   | 2 +-
 arch/x86/configs/x86_64_defconfig | 2 +-
 arch/x86/mm/pat.c                 | 6 +++---
 4 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index f0cf5d99079..51c82147795 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -5,14 +5,15 @@ config TRACE_IRQFLAGS_SUPPORT
 
 source "lib/Kconfig.debug"
 
-config PROMISC_DEVMEM
-	bool "Allow unlimited access to /dev/mem"
-	default y
+config STRICT_DEVMEM
+	bool "Filter access to /dev/mem"
 	help
 	  If this option is left on, you allow userspace (root) access to all
 	  of memory, including kernel and userspace memory. Accidental
 	  access to this is obviously disastrous, but specific access can
-	  be used by people debugging the kernel.
+	  be used by people debugging the kernel. Note that with PAT support
+	  enabled, even in this case there are restrictions on /dev/mem
+	  use due to the cache aliasing requirements.
 
 	  If this option is switched on, the /dev/mem file only allows
 	  userspace access to PCI space and the BIOS code and data regions.
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 9bc34e2033e..4d73f53287b 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -2047,7 +2047,7 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
 # CONFIG_SAMPLES is not set
 # CONFIG_KGDB is not set
 CONFIG_HAVE_ARCH_KGDB=y
-# CONFIG_NONPROMISC_DEVMEM is not set
+# CONFIG_STRICT_DEVMEM is not set
 CONFIG_EARLY_PRINTK=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DEBUG_STACK_USAGE=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index ae5124e064d..a4045242962 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -2012,7 +2012,7 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
 # CONFIG_SAMPLES is not set
 # CONFIG_KGDB is not set
 CONFIG_HAVE_ARCH_KGDB=y
-# CONFIG_NONPROMISC_DEVMEM is not set
+# CONFIG_STRICT_DEVMEM is not set
 CONFIG_EARLY_PRINTK=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DEBUG_STACK_USAGE=y
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index c34dc483839..6bb597f4d70 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -373,8 +373,8 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 	return vma_prot;
 }
 
-#ifndef CONFIG_PROMISC_DEVMEM
-/* This check is done in drivers/char/mem.c in case of !PROMISC_DEVMEM*/
+#ifdef CONFIG_STRICT_DEVMEM
+/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
 static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 {
 	return 1;
@@ -398,7 +398,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 	}
 	return 1;
 }
-#endif /* CONFIG_PROMISC_DEVMEM */
+#endif /* CONFIG_STRICT_DEVMEM */
 
 int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 				unsigned long size, pgprot_t *vma_prot)
-- 
cgit v1.2.3


From 5f1f2b3d9dbaee82cd532f28da459adcbf611499 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Fri, 18 Jul 2008 16:16:23 -0700
Subject: x86: improve debug printout: add target bootmem range in
 early_res_to_bootmem()

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index df1b32fa88d..6c60aeaac15 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -877,7 +877,8 @@ void __init early_res_to_bootmem(u64 start, u64 end)
 	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
 		count++;
 
-	printk(KERN_INFO "(%d early reservations) ==> bootmem\n", count);
+	printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n",
+			 count, start, end);
 	for (i = 0; i < count; i++) {
 		struct early_res *r = &early_res[i];
 		printk(KERN_INFO "  #%d [%010llx - %010llx] %16s", i,
-- 
cgit v1.2.3


From 3c9cb6de1e5ad37d1558fdb0d9d2bed5a7bac0d9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 19 Jul 2008 02:07:25 -0700
Subject: x86: introduce x86_quirks

introduce x86_quirks array of boot-time quirk methods.

No change in functionality intended.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/e820.c         |  9 ++-------
 arch/x86/kernel/mpparse.c      | 17 +++++------------
 arch/x86/kernel/setup.c        |  4 ++++
 arch/x86/kernel/visws_quirks.c | 42 ++++++++++++++++++++----------------------
 arch/x86/mach-default/setup.c  | 24 ++++++++----------------
 5 files changed, 39 insertions(+), 57 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 6c60aeaac15..9af89078f7b 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1299,11 +1299,6 @@ void __init e820_reserve_resources(void)
 	}
 }
 
-/*
- * Non-standard memory setup can be specified via this quirk:
- */
-char * (*arch_memory_setup_quirk)(void);
-
 char *__init default_machine_specific_memory_setup(void)
 {
 	char *who = "BIOS-e820";
@@ -1344,8 +1339,8 @@ char *__init default_machine_specific_memory_setup(void)
 
 char *__init __attribute__((weak)) machine_specific_memory_setup(void)
 {
-	if (arch_memory_setup_quirk) {
-		char *who = arch_memory_setup_quirk();
+	if (x86_quirks->arch_memory_setup) {
+		char *who = x86_quirks->arch_memory_setup();
 
 		if (who)
 			return who;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 3b25e49380c..3cbd2df3abe 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -27,6 +27,7 @@
 #include <asm/bios_ebda.h>
 #include <asm/e820.h>
 #include <asm/trampoline.h>
+#include <asm/setup.h>
 
 #include <mach_apic.h>
 #ifdef CONFIG_X86_32
@@ -725,12 +726,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 
 static struct intel_mp_floating *mpf_found;
 
-/*
- * Machine specific quirk for finding the SMP config before other setup
- * activities destroy the table:
- */
-int (*mach_get_smp_config_quirk)(unsigned int early);
-
 /*
  * Scan the memory blocks for an SMP configuration block.
  */
@@ -738,8 +733,8 @@ static void __init __get_smp_config(unsigned int early)
 {
 	struct intel_mp_floating *mpf = mpf_found;
 
-	if (mach_get_smp_config_quirk) {
-		if (mach_get_smp_config_quirk(early))
+	if (x86_quirks->mach_get_smp_config) {
+		if (x86_quirks->mach_get_smp_config(early))
 			return;
 	}
 	if (acpi_lapic && early)
@@ -899,14 +894,12 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
 	return 0;
 }
 
-int (*mach_find_smp_config_quirk)(unsigned int reserve);
-
 static void __init __find_smp_config(unsigned int reserve)
 {
 	unsigned int address;
 
-	if (mach_find_smp_config_quirk) {
-		if (mach_find_smp_config_quirk(reserve))
+	if (x86_quirks->mach_find_smp_config) {
+		if (x86_quirks->mach_find_smp_config(reserve))
 			return;
 	}
 	/*
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4a2b8acc1d9..bbcc13d0b56 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -574,6 +574,10 @@ static int __init setup_elfcorehdr(char *arg)
 early_param("elfcorehdr", setup_elfcorehdr);
 #endif
 
+static struct x86_quirks default_x86_quirks __initdata;
+
+struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index e94bdb6add1..41e01b145c4 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -73,7 +73,7 @@ int is_visws_box(void)
 	return visws_board_type >= 0;
 }
 
-static int __init visws_time_init_quirk(void)
+static int __init visws_time_init(void)
 {
 	printk(KERN_INFO "Starting Cobalt Timer system clock\n");
 
@@ -93,7 +93,7 @@ static int __init visws_time_init_quirk(void)
 	return 0;
 }
 
-static int __init visws_pre_intr_init_quirk(void)
+static int __init visws_pre_intr_init(void)
 {
 	init_VISWS_APIC_irqs();
 
@@ -114,7 +114,7 @@ EXPORT_SYMBOL(sgivwfb_mem_size);
 
 long long mem_size __initdata = 0;
 
-static char * __init visws_memory_setup_quirk(void)
+static char * __init visws_memory_setup(void)
 {
 	long long gfx_mem_size = 8 * MB;
 
@@ -176,7 +176,7 @@ static void visws_machine_power_off(void)
 	outl(PIIX_SPECIAL_STOP, 0xCFC);
 }
 
-static int __init visws_get_smp_config_quirk(unsigned int early)
+static int __init visws_get_smp_config(unsigned int early)
 {
 	/*
 	 * Prevent MP-table parsing by the generic code:
@@ -192,7 +192,7 @@ extern unsigned int __cpuinitdata maxcpus;
  * No problem for Linux.
  */
 
-static void __init MP_processor_info (struct mpc_config_processor *m)
+static void __init MP_processor_info(struct mpc_config_processor *m)
 {
 	int ver, logical_apicid;
 	physid_mask_t apic_cpus;
@@ -232,7 +232,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
 	apic_version[m->mpc_apicid] = ver;
 }
 
-int __init visws_find_smp_config_quirk(unsigned int reserve)
+static int __init visws_find_smp_config(unsigned int reserve)
 {
 	struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS);
 	unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
@@ -258,7 +258,17 @@ int __init visws_find_smp_config_quirk(unsigned int reserve)
 	return 1;
 }
 
-extern int visws_trap_init_quirk(void);
+static int visws_trap_init(void);
+
+static struct x86_quirks visws_x86_quirks __initdata = {
+	.arch_time_init		= visws_time_init,
+	.arch_pre_intr_init	= visws_pre_intr_init,
+	.arch_memory_setup	= visws_memory_setup,
+	.arch_intr_init		= NULL,
+	.arch_trap_init		= visws_trap_init,
+	.mach_get_smp_config	= visws_get_smp_config,
+	.mach_find_smp_config	= visws_find_smp_config,
+};
 
 void __init visws_early_detect(void)
 {
@@ -272,16 +282,10 @@ void __init visws_early_detect(void)
 
 	/*
 	 * Install special quirks for timer, interrupt and memory setup:
-	 */
-	arch_time_init_quirk		= visws_time_init_quirk;
-	arch_pre_intr_init_quirk	= visws_pre_intr_init_quirk;
-	arch_memory_setup_quirk		= visws_memory_setup_quirk;
-
-	/*
 	 * Fall back to generic behavior for traps:
+	 * Override generic MP-table parsing:
 	 */
-	arch_intr_init_quirk		= NULL;
-	arch_trap_init_quirk		= visws_trap_init_quirk;
+	x86_quirks = &visws_x86_quirks;
 
 	/*
 	 * Install reboot quirks:
@@ -294,12 +298,6 @@ void __init visws_early_detect(void)
 	 */
 	no_broadcast = 0;
 
-	/*
-	 * Override generic MP-table parsing:
-	 */
-	mach_get_smp_config_quirk	= visws_get_smp_config_quirk;
-	mach_find_smp_config_quirk	= visws_find_smp_config_quirk;
-
 #ifdef CONFIG_X86_IO_APIC
 	/*
 	 * Turn off IO-APIC detection and initialization:
@@ -426,7 +424,7 @@ static __init void cobalt_init(void)
 		co_apic_read(CO_APIC_ID));
 }
 
-int __init visws_trap_init_quirk(void)
+static int __init visws_trap_init(void)
 {
 	lithium_init();
 	cobalt_init();
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 48278fa7d3d..631dbed9fb9 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -10,14 +10,6 @@
 #include <asm/e820.h>
 #include <asm/setup.h>
 
-/*
- * Any quirks to be performed to initialize timers/irqs/etc?
- */
-int (*arch_time_init_quirk)(void);
-int (*arch_pre_intr_init_quirk)(void);
-int (*arch_intr_init_quirk)(void);
-int (*arch_trap_init_quirk)(void);
-
 #ifdef CONFIG_HOTPLUG_CPU
 #define DEFAULT_SEND_IPI	(1)
 #else
@@ -37,8 +29,8 @@ int no_broadcast=DEFAULT_SEND_IPI;
  **/
 void __init pre_intr_init_hook(void)
 {
-	if (arch_pre_intr_init_quirk) {
-		if (arch_pre_intr_init_quirk())
+	if (x86_quirks->arch_pre_intr_init) {
+		if (x86_quirks->arch_pre_intr_init())
 			return;
 	}
 	init_ISA_irqs();
@@ -64,8 +56,8 @@ static struct irqaction irq2 = {
  **/
 void __init intr_init_hook(void)
 {
-	if (arch_intr_init_quirk) {
-		if (arch_intr_init_quirk())
+	if (x86_quirks->arch_intr_init) {
+		if (x86_quirks->arch_intr_init())
 			return;
 	}
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -97,8 +89,8 @@ void __init pre_setup_arch_hook(void)
  **/
 void __init trap_init_hook(void)
 {
-	if (arch_trap_init_quirk) {
-		if (arch_trap_init_quirk())
+	if (x86_quirks->arch_trap_init) {
+		if (x86_quirks->arch_trap_init())
 			return;
 	}
 }
@@ -119,13 +111,13 @@ static struct irqaction irq0  = {
  **/
 void __init time_init_hook(void)
 {
-	if (arch_time_init_quirk) {
+	if (x86_quirks->arch_time_init) {
 		/*
 		 * A nonzero return code does not mean failure, it means
 		 * that the architecture quirk does not want any
 		 * generic (timer) setup to be performed after this:
 		 */
-		if (arch_time_init_quirk())
+		if (x86_quirks->arch_time_init())
 			return;
 	}
 
-- 
cgit v1.2.3


From 64898a8bad8c94ad7a4bd5cc86b66edfbb081f4a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 19 Jul 2008 18:01:16 -0700
Subject: x86: extend and use x86_quirks to clean up NUMAQ code

add these new x86_quirks methods:

	int *mpc_record;
	int (*mpc_apic_id)(struct mpc_config_processor *m);
	void (*mpc_oem_bus_info)(struct mpc_config_bus *m, char *name);
	void (*mpc_oem_pci_bus)(struct mpc_config_bus *m);
	void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
                                    unsigned short oemsize);

... and move NUMAQ related mps table handling to numaq_32.c.

also move the call to smp_read_mpc_oem() to smp_read_mpc() directly.

Should not change functionality, albeit it would be nice to get it
tested on real NUMAQ as well ...

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/mpparse.c  | 191 +++++----------------------------------------
 arch/x86/kernel/numaq_32.c | 190 +++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 200 insertions(+), 181 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 3cbd2df3abe..6ae005ccaed 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -49,76 +49,6 @@ static int __init mpf_checksum(unsigned char *mp, int len)
 	return sum & 0xFF;
 }
 
-#ifdef CONFIG_X86_NUMAQ
-int found_numaq;
-/*
- * Have to match translation table entries to main table entries by counter
- * hence the mpc_record variable .... can't see a less disgusting way of
- * doing this ....
- */
-struct mpc_config_translation {
-	unsigned char mpc_type;
-	unsigned char trans_len;
-	unsigned char trans_type;
-	unsigned char trans_quad;
-	unsigned char trans_global;
-	unsigned char trans_local;
-	unsigned short trans_reserved;
-};
-
-
-static int mpc_record;
-static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
-    __cpuinitdata;
-
-static inline int generate_logical_apicid(int quad, int phys_apicid)
-{
-	return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
-}
-
-
-static inline int mpc_apic_id(struct mpc_config_processor *m,
-			struct mpc_config_translation *translation_record)
-{
-	int quad = translation_record->trans_quad;
-	int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
-
-	printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
-	       m->mpc_apicid,
-	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-	       m->mpc_apicver, quad, logical_apicid);
-	return logical_apicid;
-}
-
-int mp_bus_id_to_node[MAX_MP_BUSSES];
-
-int mp_bus_id_to_local[MAX_MP_BUSSES];
-
-static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
-	struct mpc_config_translation *translation)
-{
-	int quad = translation->trans_quad;
-	int local = translation->trans_local;
-
-	mp_bus_id_to_node[m->mpc_busid] = quad;
-	mp_bus_id_to_local[m->mpc_busid] = local;
-	printk(KERN_INFO "Bus #%d is %s (node %d)\n",
-	       m->mpc_busid, name, quad);
-}
-
-int quad_local_to_mp_bus_id [NR_CPUS/4][4];
-static void mpc_oem_pci_bus(struct mpc_config_bus *m,
-	struct mpc_config_translation *translation)
-{
-	int quad = translation->trans_quad;
-	int local = translation->trans_local;
-
-	quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
-}
-
-#endif
-
 static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
 {
 	int apicid;
@@ -128,14 +58,12 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
 		disabled_cpus++;
 		return;
 	}
-#ifdef CONFIG_X86_NUMAQ
-	if (found_numaq)
-		apicid = mpc_apic_id(m, translation_table[mpc_record]);
+
+	if (x86_quirks->mpc_apic_id)
+		apicid = x86_quirks->mpc_apic_id(m);
 	else
 		apicid = m->mpc_apicid;
-#else
-	apicid = m->mpc_apicid;
-#endif
+
 	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
 		bootup_cpu = " (Bootup-CPU)";
 		boot_cpu_physical_apicid = m->mpc_apicid;
@@ -152,12 +80,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
 	memcpy(str, m->mpc_bustype, 6);
 	str[6] = 0;
 
-#ifdef CONFIG_X86_NUMAQ
-	if (found_numaq)
-		mpc_oem_bus_info(m, str, translation_table[mpc_record]);
-#else
-	printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
-#endif
+	if (x86_quirks->mpc_oem_bus_info)
+		x86_quirks->mpc_oem_bus_info(m, str);
+	else
+		printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
 
 #if MAX_MP_BUSSES < 256
 	if (m->mpc_busid >= MAX_MP_BUSSES) {
@@ -174,10 +100,9 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
 #endif
 	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
-#ifdef CONFIG_X86_NUMAQ
-		if (found_numaq)
-			mpc_oem_pci_bus(m, translation_table[mpc_record]);
-#endif
+		if (x86_quirks->mpc_oem_pci_bus)
+			x86_quirks->mpc_oem_pci_bus(m);
+
 		clear_bit(m->mpc_busid, mp_bus_not_pci);
 #if defined(CONFIG_EISA) || defined (CONFIG_MCA)
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
@@ -317,83 +242,6 @@ static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
 		m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
 }
 
-#ifdef CONFIG_X86_NUMAQ
-static void __init MP_translation_info(struct mpc_config_translation *m)
-{
-	printk(KERN_INFO
-	       "Translation: record %d, type %d, quad %d, global %d, local %d\n",
-	       mpc_record, m->trans_type, m->trans_quad, m->trans_global,
-	       m->trans_local);
-
-	if (mpc_record >= MAX_MPC_ENTRY)
-		printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
-	else
-		translation_table[mpc_record] = m;	/* stash this for later */
-	if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
-		node_set_online(m->trans_quad);
-}
-
-/*
- * Read/parse the MPC oem tables
- */
-
-static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
-				    unsigned short oemsize)
-{
-	int count = sizeof(*oemtable);	/* the header size */
-	unsigned char *oemptr = ((unsigned char *)oemtable) + count;
-
-	mpc_record = 0;
-	printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
-	       oemtable);
-	if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
-		printk(KERN_WARNING
-		       "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
-		       oemtable->oem_signature[0], oemtable->oem_signature[1],
-		       oemtable->oem_signature[2], oemtable->oem_signature[3]);
-		return;
-	}
-	if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
-		printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
-		return;
-	}
-	while (count < oemtable->oem_length) {
-		switch (*oemptr) {
-		case MP_TRANSLATION:
-			{
-				struct mpc_config_translation *m =
-				    (struct mpc_config_translation *)oemptr;
-				MP_translation_info(m);
-				oemptr += sizeof(*m);
-				count += sizeof(*m);
-				++mpc_record;
-				break;
-			}
-		default:
-			{
-				printk(KERN_WARNING
-				       "Unrecognised OEM table entry type! - %d\n",
-				       (int)*oemptr);
-				return;
-			}
-		}
-	}
-}
-
-void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
-				 char *productid)
-{
-	if (strncmp(oem, "IBM NUMA", 8))
-		printk("Warning!  Not a NUMA-Q system!\n");
-	else
-		found_numaq = 1;
-
-	if (mpc->mpc_oemptr)
-		smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
-				 mpc->mpc_oemsize);
-}
-#endif /* CONFIG_X86_NUMAQ */
-
 /*
  * Read/parse the MPC
  */
@@ -458,7 +306,6 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
 	} else
 		mps_oem_check(mpc, oem, str);
 #endif
-
 	/* save the local APIC address, it might be non-default */
 	if (!acpi_lapic)
 		mp_lapic_addr = mpc->mpc_lapic;
@@ -466,12 +313,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
 	if (early)
 		return 1;
 
+	if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) {
+		struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr;
+		x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize);
+	}
+
 	/*
 	 *      Now process the configuration blocks.
 	 */
-#ifdef CONFIG_X86_NUMAQ
-	mpc_record = 0;
-#endif
+	if (x86_quirks->mpc_record)
+		*x86_quirks->mpc_record = 0;
+
 	while (count < mpc->mpc_length) {
 		switch (*mpt) {
 		case MP_PROCESSOR:
@@ -537,9 +389,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
 			count = mpc->mpc_length;
 			break;
 		}
-#ifdef CONFIG_X86_NUMAQ
-		++mpc_record;
-#endif
+		if (x86_quirks->mpc_record)
+			(*x86_quirks->mpc_record)++;
 	}
 
 #ifdef CONFIG_X86_GENERICARCH
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index a23e8233b9a..7f4e00d1d89 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -33,6 +33,7 @@
 #include <asm/processor.h>
 #include <asm/mpspec.h>
 #include <asm/e820.h>
+#include <asm/setup.h>
 
 #define	MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
 
@@ -71,6 +72,181 @@ static void __init smp_dump_qct(void)
 	}
 }
 
+
+void __init numaq_tsc_disable(void)
+{
+	if (!found_numaq)
+		return;
+
+	if (num_online_nodes() > 1) {
+		printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
+		setup_clear_cpu_cap(X86_FEATURE_TSC);
+	}
+}
+
+int found_numaq;
+/*
+ * Have to match translation table entries to main table entries by counter
+ * hence the mpc_record variable .... can't see a less disgusting way of
+ * doing this ....
+ */
+struct mpc_config_translation {
+	unsigned char mpc_type;
+	unsigned char trans_len;
+	unsigned char trans_type;
+	unsigned char trans_quad;
+	unsigned char trans_global;
+	unsigned char trans_local;
+	unsigned short trans_reserved;
+};
+
+/* x86_quirks member */
+static int mpc_record;
+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
+    __cpuinitdata;
+
+static inline int generate_logical_apicid(int quad, int phys_apicid)
+{
+	return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
+}
+
+/* x86_quirks member */
+static int mpc_apic_id(struct mpc_config_processor *m)
+{
+	int quad = translation_table[mpc_record]->trans_quad;
+	int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
+
+	printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver, quad, logical_apicid);
+	return logical_apicid;
+}
+
+int mp_bus_id_to_node[MAX_MP_BUSSES];
+
+int mp_bus_id_to_local[MAX_MP_BUSSES];
+
+/* x86_quirks member */
+static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name)
+{
+	int quad = translation_table[mpc_record]->trans_quad;
+	int local = translation_table[mpc_record]->trans_local;
+
+	mp_bus_id_to_node[m->mpc_busid] = quad;
+	mp_bus_id_to_local[m->mpc_busid] = local;
+	printk(KERN_INFO "Bus #%d is %s (node %d)\n",
+	       m->mpc_busid, name, quad);
+}
+
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+
+/* x86_quirks member */
+static void mpc_oem_pci_bus(struct mpc_config_bus *m)
+{
+	int quad = translation_table[mpc_record]->trans_quad;
+	int local = translation_table[mpc_record]->trans_local;
+
+	quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
+}
+
+static void __init MP_translation_info(struct mpc_config_translation *m)
+{
+	printk(KERN_INFO
+	       "Translation: record %d, type %d, quad %d, global %d, local %d\n",
+	       mpc_record, m->trans_type, m->trans_quad, m->trans_global,
+	       m->trans_local);
+
+	if (mpc_record >= MAX_MPC_ENTRY)
+		printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
+	else
+		translation_table[mpc_record] = m;	/* stash this for later */
+	if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
+		node_set_online(m->trans_quad);
+}
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+	int sum = 0;
+
+	while (len--)
+		sum += *mp++;
+
+	return sum & 0xFF;
+}
+
+/*
+ * Read/parse the MPC oem tables
+ */
+
+static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
+				    unsigned short oemsize)
+{
+	int count = sizeof(*oemtable);	/* the header size */
+	unsigned char *oemptr = ((unsigned char *)oemtable) + count;
+
+	mpc_record = 0;
+	printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
+	       oemtable);
+	if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
+		printk(KERN_WARNING
+		       "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+		       oemtable->oem_signature[0], oemtable->oem_signature[1],
+		       oemtable->oem_signature[2], oemtable->oem_signature[3]);
+		return;
+	}
+	if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
+		printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
+		return;
+	}
+	while (count < oemtable->oem_length) {
+		switch (*oemptr) {
+		case MP_TRANSLATION:
+			{
+				struct mpc_config_translation *m =
+				    (struct mpc_config_translation *)oemptr;
+				MP_translation_info(m);
+				oemptr += sizeof(*m);
+				count += sizeof(*m);
+				++mpc_record;
+				break;
+			}
+		default:
+			{
+				printk(KERN_WARNING
+				       "Unrecognised OEM table entry type! - %d\n",
+				       (int)*oemptr);
+				return;
+			}
+		}
+	}
+}
+
+static struct x86_quirks numaq_x86_quirks __initdata = {
+	.arch_time_init		= NULL,
+	.arch_pre_intr_init	= NULL,
+	.arch_memory_setup	= NULL,
+	.arch_intr_init		= NULL,
+	.arch_trap_init		= NULL,
+	.mach_get_smp_config	= NULL,
+	.mach_find_smp_config	= NULL,
+	.mpc_record		= &mpc_record,
+	.mpc_apic_id		= mpc_apic_id,
+	.mpc_oem_bus_info	= mpc_oem_bus_info,
+	.mpc_oem_pci_bus	= mpc_oem_pci_bus,
+	.smp_read_mpc_oem	= smp_read_mpc_oem,
+};
+
+void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
+				 char *productid)
+{
+	if (strncmp(oem, "IBM NUMA", 8))
+		printk("Warning!  Not a NUMA-Q system!\n");
+	else
+		found_numaq = 1;
+}
+
 static __init void early_check_numaq(void)
 {
 	/*
@@ -82,6 +258,9 @@ static __init void early_check_numaq(void)
 	 */
 	if (smp_found_config)
 		early_get_smp_config();
+
+	if (found_numaq)
+		x86_quirks = &numaq_x86_quirks;
 }
 
 int __init get_memcfg_numaq(void)
@@ -92,14 +271,3 @@ int __init get_memcfg_numaq(void)
 	smp_dump_qct();
 	return 1;
 }
-
-void __init numaq_tsc_disable(void)
-{
-	if (!found_numaq)
-		return;
-
-	if (num_online_nodes() > 1) {
-		printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
-		setup_clear_cpu_cap(X86_FEATURE_TSC);
-	}
-}
-- 
cgit v1.2.3


From 63b5d7af2556a7de6bf72c5dd0b85a32fb4c3767 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sat, 19 Jul 2008 18:02:26 -0700
Subject: x86: add ->pre_time_init to x86_quirks

so NUMAQ can use that to call numaq_pre_time_init()

This allows us to remove a NUMAQ special from arch/x86/kernel/setup.c.

(and paves the way to remove the NUMAQ subarch)

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/numaq_32.c    |  7 +++++++
 arch/x86/kernel/setup.c       |  8 --------
 arch/x86/kernel/time_32.c     |  1 +
 arch/x86/mach-default/setup.c | 10 ++++++++++
 4 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 7f4e00d1d89..b8c45610b20 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -84,6 +84,12 @@ void __init numaq_tsc_disable(void)
 	}
 }
 
+static int __init numaq_pre_time_init(void)
+{
+	numaq_tsc_disable();
+	return 0;
+}
+
 int found_numaq;
 /*
  * Have to match translation table entries to main table entries by counter
@@ -224,6 +230,7 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
 }
 
 static struct x86_quirks numaq_x86_quirks __initdata = {
+	.arch_pre_time_init	= numaq_pre_time_init,
 	.arch_time_init		= NULL,
 	.arch_pre_intr_init	= NULL,
 	.arch_memory_setup	= NULL,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index bbcc13d0b56..4064616cfa8 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -853,14 +853,6 @@ void __init setup_arch(char **cmdline_p)
 	init_cpu_to_node();
 #endif
 
-#ifdef CONFIG_X86_NUMAQ
-	/*
-	 * need to check online nodes num, call it
-	 * here before time_init/tsc_init
-	 */
-	numaq_tsc_disable();
-#endif
-
 	init_apic_mappings();
 	ioapic_init_mappings();
 
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 059ca6ee59b..ffe3c664afc 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -129,6 +129,7 @@ void __init hpet_time_init(void)
  */
 void __init time_init(void)
 {
+	pre_time_init_hook();
 	tsc_init();
 	late_time_init = choose_time_init();
 }
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 631dbed9fb9..3d317836be9 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -102,6 +102,16 @@ static struct irqaction irq0  = {
 	.name = "timer"
 };
 
+/**
+ * pre_time_init_hook - do any specific initialisations before.
+ *
+ **/
+void __init pre_time_init_hook(void)
+{
+	if (x86_quirks->arch_pre_time_init)
+		x86_quirks->arch_pre_time_init();
+}
+
 /**
  * time_init_hook - do any specific initialisations for the system timer.
  *
-- 
cgit v1.2.3


From e3a61b0a8c0e342e700a61cd554b01050f333a36 Mon Sep 17 00:00:00 2001
From: Simon Arlott <simon@fire.lp0.eu>
Date: Sat, 19 Jul 2008 23:32:54 +0100
Subject: x86: add unknown_nmi_panic kernel parameter

It's not possible to enable the unknown_nmi_panic sysctl option
until init is run. It's useful to be able to panic the kernel
during boot too, this adds a parameter to enable this option.

Signed-off-by: Simon Arlott <simon@fire.lp0.eu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/nmi.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ec024b3baad..e0b44b7b717 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -448,6 +448,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 
 #ifdef CONFIG_SYSCTL
 
+static int __init setup_unknown_nmi_panic(char *str)
+{
+	unknown_nmi_panic = 1;
+	return 1;
+}
+__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+
 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
 {
 	unsigned char reason = get_nmi_reason();
-- 
cgit v1.2.3


From 8b2cf73cc11cf29a21c51c453a3205f23d888915 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Sun, 27 Apr 2008 12:14:13 -0700
Subject: KVM: add statics were possible, function definition in lapic.h

Noticed by sparse:
arch/x86/kvm/vmx.c:1583:6: warning: symbol 'vmx_disable_intercept_for_msr' was not declared. Should it be static?
arch/x86/kvm/x86.c:3406:5: warning: symbol 'kvm_task_switch_16' was not declared. Should it be static?
arch/x86/kvm/x86.c:3429:5: warning: symbol 'kvm_task_switch_32' was not declared. Should it be static?
arch/x86/kvm/mmu.c:1968:6: warning: symbol 'kvm_mmu_remove_one_alloc_mmu_page' was not declared. Should it be static?
arch/x86/kvm/mmu.c:2014:6: warning: symbol 'mmu_destroy_caches' was not declared. Should it be static?
arch/x86/kvm/lapic.c:862:5: warning: symbol 'kvm_lapic_get_base' was not declared. Should it be static?
arch/x86/kvm/i8254.c:94:5: warning: symbol 'pit_get_gate' was not declared. Should it be static?
arch/x86/kvm/i8254.c:196:5: warning: symbol '__pit_timer_fn' was not declared. Should it be static?
arch/x86/kvm/i8254.c:561:6: warning: symbol '__inject_pit_timer_intr' was not declared. Should it be static?

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/i8254.c | 6 +++---
 arch/x86/kvm/lapic.h | 1 +
 arch/x86/kvm/mmu.c   | 2 +-
 arch/x86/kvm/vmx.c   | 2 +-
 arch/x86/kvm/x86.c   | 4 ++--
 5 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 3829aa7b663..735ec9a0b36 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -91,7 +91,7 @@ static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
 	c->gate = val;
 }
 
-int pit_get_gate(struct kvm *kvm, int channel)
+static int pit_get_gate(struct kvm *kvm, int channel)
 {
 	WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
 
@@ -193,7 +193,7 @@ static void pit_latch_status(struct kvm *kvm, int channel)
 	}
 }
 
-int __pit_timer_fn(struct kvm_kpit_state *ps)
+static int __pit_timer_fn(struct kvm_kpit_state *ps)
 {
 	struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
 	struct kvm_kpit_timer *pt = &ps->pit_timer;
@@ -575,7 +575,7 @@ void kvm_free_pit(struct kvm *kvm)
 	}
 }
 
-void __inject_pit_timer_intr(struct kvm *kvm)
+static void __inject_pit_timer_intr(struct kvm *kvm)
 {
 	mutex_lock(&kvm->lock);
 	kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 676c396c9ce..81858881287 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -31,6 +31,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu);
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
+u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7e7c3969f7a..8e449dbcc59 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1948,7 +1948,7 @@ void kvm_mmu_zap_all(struct kvm *kvm)
 	kvm_flush_remote_tlbs(kvm);
 }
 
-void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm)
+static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm)
 {
 	struct kvm_mmu_page *page;
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 10ce6ee4c49..39739305980 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1821,7 +1821,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx)
 	spin_unlock(&vmx_vpid_lock);
 }
 
-void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
+static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
 {
 	void *va;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0faa2546b1c..45dc2b6a9c8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3449,7 +3449,7 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
+static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
 		       struct desc_struct *cseg_desc,
 		       struct desc_struct *nseg_desc)
 {
@@ -3472,7 +3472,7 @@ out:
 	return ret;
 }
 
-int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
+static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
 		       struct desc_struct *cseg_desc,
 		       struct desc_struct *nseg_desc)
 {
-- 
cgit v1.2.3


From c7bf23babc959b186335d2640959a1b8633588de Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 30 Apr 2008 17:55:59 +0200
Subject: KVM: VMX: move APIC_ACCESS trace entry to generic code

This patch moves the trace entry for APIC accesses from the VMX code to the
generic lapic code. This way APIC accesses from SVM will also be traced.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/lapic.c | 4 ++++
 arch/x86/kvm/vmx.c   | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ebc03f5ae16..f9201fbc61d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -572,6 +572,8 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 {
 	u32 val = 0;
 
+	KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
+
 	if (offset >= LAPIC_MMIO_LENGTH)
 		return 0;
 
@@ -695,6 +697,8 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
 	offset &= 0xff0;
 
+	KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
+
 	switch (offset) {
 	case APIC_ID:		/* Local APIC ID */
 		apic_set_reg(apic, APIC_ID, val);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 39739305980..8c951d3eab3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2554,8 +2554,6 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
 	offset = exit_qualification & 0xffful;
 
-	KVMTRACE_1D(APIC_ACCESS, vcpu, (u32)offset, handler);
-
 	er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
 
 	if (er !=  EMULATE_DONE) {
-- 
cgit v1.2.3


From c47f098d69ed2bd7343e54095ff4aa2533253bee Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 30 Apr 2008 17:56:00 +0200
Subject: KVM: SVM: implement dedicated NMI exit handler

With an exit handler for NMI intercepts its possible to account them using
kvmtrace.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/svm.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6b0d5fa5bab..8a2118b09fd 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1081,6 +1081,11 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
 }
 
+static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	return 1;
+}
+
 static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	return 1;
@@ -1365,7 +1370,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 	[SVM_EXIT_EXCP_BASE + NM_VECTOR] 	= nm_interception,
 	[SVM_EXIT_EXCP_BASE + MC_VECTOR] 	= mc_interception,
 	[SVM_EXIT_INTR] 			= nop_on_interception,
-	[SVM_EXIT_NMI]				= nop_on_interception,
+	[SVM_EXIT_NMI]				= nmi_interception,
 	[SVM_EXIT_SMI]				= nop_on_interception,
 	[SVM_EXIT_INIT]				= nop_on_interception,
 	[SVM_EXIT_VINTR]			= interrupt_window_interception,
-- 
cgit v1.2.3


From a069805579a390f0fa91694f6963bcc4b2cecc6b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 30 Apr 2008 17:56:01 +0200
Subject: KVM: SVM: implement dedicated INTR exit handler

With an exit handler for INTR intercepts its possible to account them using
kvmtrace.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/svm.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8a2118b09fd..0eac1a5060a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1086,6 +1086,12 @@ static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	return 1;
 }
 
+static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	++svm->vcpu.stat.irq_exits;
+	return 1;
+}
+
 static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	return 1;
@@ -1369,7 +1375,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 	[SVM_EXIT_EXCP_BASE + PF_VECTOR] 	= pf_interception,
 	[SVM_EXIT_EXCP_BASE + NM_VECTOR] 	= nm_interception,
 	[SVM_EXIT_EXCP_BASE + MC_VECTOR] 	= mc_interception,
-	[SVM_EXIT_INTR] 			= nop_on_interception,
+	[SVM_EXIT_INTR] 			= intr_interception,
 	[SVM_EXIT_NMI]				= nmi_interception,
 	[SVM_EXIT_SMI]				= nop_on_interception,
 	[SVM_EXIT_INIT]				= nop_on_interception,
-- 
cgit v1.2.3


From 54e445ca8411ec892f986d9f8c11b8c1806ecde4 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 30 Apr 2008 17:56:02 +0200
Subject: KVM: add missing kvmtrace bits

This patch adds some kvmtrace bits to the generic x86 code
where it is instrumented from SVM.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 45dc2b6a9c8..59084a3981c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2020,6 +2020,7 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
 
 int emulate_clts(struct kvm_vcpu *vcpu)
 {
+	KVMTRACE_0D(CLTS, vcpu, handler);
 	kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
 	return X86EMUL_CONTINUE;
 }
@@ -2600,27 +2601,41 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
 
 unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
 {
+	unsigned long value;
+
 	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
 	switch (cr) {
 	case 0:
-		return vcpu->arch.cr0;
+		value = vcpu->arch.cr0;
+		break;
 	case 2:
-		return vcpu->arch.cr2;
+		value = vcpu->arch.cr2;
+		break;
 	case 3:
-		return vcpu->arch.cr3;
+		value = vcpu->arch.cr3;
+		break;
 	case 4:
-		return vcpu->arch.cr4;
+		value = vcpu->arch.cr4;
+		break;
 	case 8:
-		return kvm_get_cr8(vcpu);
+		value = kvm_get_cr8(vcpu);
+		break;
 	default:
 		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
 		return 0;
 	}
+	KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
+		    (u32)((u64)value >> 32), handler);
+
+	return value;
 }
 
 void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
 		     unsigned long *rflags)
 {
+	KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
+		    (u32)((u64)val >> 32), handler);
+
 	switch (cr) {
 	case 0:
 		kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
-- 
cgit v1.2.3


From af9ca2d703f4cefbf6441bfe127c4191092ad394 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 30 Apr 2008 17:56:03 +0200
Subject: KVM: SVM: add missing kvmtrace markers

This patch adds the missing kvmtrace markers to the svm
module of kvm.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/svm.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0eac1a5060a..8953292acfd 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -949,7 +949,9 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
 
 static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
 {
-	return to_svm(vcpu)->db_regs[dr];
+	unsigned long val = to_svm(vcpu)->db_regs[dr];
+	KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
+	return val;
 }
 
 static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
@@ -1004,6 +1006,12 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
 	fault_address  = svm->vmcb->control.exit_info_2;
 	error_code = svm->vmcb->control.exit_info_1;
+
+	if (!npt_enabled)
+		KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code,
+			    (u32)fault_address, (u32)(fault_address >> 32),
+			    handler);
+
 	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
 }
 
@@ -1083,12 +1091,14 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
 static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
+	KVMTRACE_0D(NMI, &svm->vcpu, handler);
 	return 1;
 }
 
 static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	++svm->vcpu.stat.irq_exits;
+	KVMTRACE_0D(INTR, &svm->vcpu, handler);
 	return 1;
 }
 
@@ -1230,6 +1240,9 @@ static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	if (svm_get_msr(&svm->vcpu, ecx, &data))
 		kvm_inject_gp(&svm->vcpu, 0);
 	else {
+		KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data,
+			    (u32)(data >> 32), handler);
+
 		svm->vmcb->save.rax = data & 0xffffffff;
 		svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
 		svm->next_rip = svm->vmcb->save.rip + 2;
@@ -1315,6 +1328,10 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
 	u64 data = (svm->vmcb->save.rax & -1u)
 		| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
+
+	KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32),
+		    handler);
+
 	svm->next_rip = svm->vmcb->save.rip + 2;
 	if (svm_set_msr(&svm->vcpu, ecx, data))
 		kvm_inject_gp(&svm->vcpu, 0);
@@ -1334,6 +1351,8 @@ static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 static int interrupt_window_interception(struct vcpu_svm *svm,
 				   struct kvm_run *kvm_run)
 {
+	KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
+
 	svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
 	/*
@@ -1408,6 +1427,9 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u32 exit_code = svm->vmcb->control.exit_code;
 
+	KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
+		    (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
+
 	if (npt_enabled) {
 		int mmu_reload = 0;
 		if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
@@ -1481,6 +1503,8 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 {
 	struct vmcb_control_area *control;
 
+	KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler);
+
 	control = &svm->vmcb->control;
 	control->int_vector = irq;
 	control->int_ctl &= ~V_INTR_PRIO_MASK;
-- 
cgit v1.2.3


From d2ebb4103ff349af6dac14955bf93e57487a6694 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 30 Apr 2008 17:56:04 +0200
Subject: KVM: SVM: add tracing support for TDP page faults

To distinguish between real page faults and nested page faults they should be
traced as different events. This is implemented by this patch.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/svm.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8953292acfd..218949cce1a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1011,6 +1011,10 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 		KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code,
 			    (u32)fault_address, (u32)(fault_address >> 32),
 			    handler);
+	else
+		KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
+			    (u32)fault_address, (u32)(fault_address >> 32),
+			    handler);
 
 	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
 }
-- 
cgit v1.2.3


From f697554515b06e8d7264f316b25e6da943407142 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno <aurelien@aurel32.net>
Date: Fri, 2 May 2008 17:02:23 +0200
Subject: KVM: PIT: support mode 3

The in-kernel PIT emulation ignores pending timers if operating
under mode 3, which for example Hurd uses.

This mode should output a square wave, high for (N+1)/2 counts and low
for (N-1)/2 counts. As we only care about the resulting interrupts, the
period is N, and mode 3 is the same as mode 2 with regard to
interrupts.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/i8254.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 735ec9a0b36..60074dc66bd 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -308,6 +308,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
 		create_pit_timer(&ps->pit_timer, val, 0);
 		break;
 	case 2:
+	case 3:
 		create_pit_timer(&ps->pit_timer, val, 1);
 		break;
 	default:
-- 
cgit v1.2.3


From 14ae51b6c068ef7ab52dc2d53fe226e6189f2ab2 Mon Sep 17 00:00:00 2001
From: Chris Lalancette <clalance@redhat.com>
Date: Mon, 5 May 2008 13:05:16 -0400
Subject: KVM: SVM: Fake MSR_K7 performance counters

Attached is a patch that fixes a guest crash when booting older Linux kernels.
The problem stems from the fact that we are currently emulating
MSR_K7_EVNTSEL[0-3], but not emulating MSR_K7_PERFCTR[0-3].  Because of this,
setup_k7_watchdog() in the Linux kernel receives a GPF when it attempts to
write into MSR_K7_PERFCTR, which causes an OOPs.

The patch fixes it by just "fake" emulating the appropriate MSRs, throwing
away the data in the process.  This causes the NMI watchdog to not actually
work, but it's not such a big deal in a virtualized environment.

When we get a write to one of these counters, we printk_ratelimit() a warning.
I decided to print it out for all writes, even if the data is 0; it doesn't
seem to make sense to me to special case when data == 0.

Tested by myself on a RHEL-4 guest, and Joerg Roedel on a Windows XP 64-bit
guest.

Signed-off-by: Chris Lalancette <clalance@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/svm.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 218949cce1a..992ab711587 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1312,16 +1312,19 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 	case MSR_K7_EVNTSEL1:
 	case MSR_K7_EVNTSEL2:
 	case MSR_K7_EVNTSEL3:
+	case MSR_K7_PERFCTR0:
+	case MSR_K7_PERFCTR1:
+	case MSR_K7_PERFCTR2:
+	case MSR_K7_PERFCTR3:
 		/*
-		 * only support writing 0 to the performance counters for now
-		 * to make Windows happy. Should be replaced by a real
-		 * performance counter emulation later.
+		 * Just discard all writes to the performance counters; this
+		 * should keep both older linux and windows 64-bit guests
+		 * happy
 		 */
-		if (data != 0)
-			goto unhandled;
+		pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
+
 		break;
 	default:
-	unhandled:
 		return kvm_set_msr_common(vcpu, ecx, data);
 	}
 	return 0;
-- 
cgit v1.2.3


From 7682f2d0dd3ff5bd2756eac018a5b4e7e30ef16c Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Mon, 12 May 2008 19:25:43 +0300
Subject: KVM: VMX: Trivial vmcs_write64() code simplification

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/vmx.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8c951d3eab3..fff8e23433d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -431,10 +431,8 @@ static void vmcs_write32(unsigned long field, u32 value)
 
 static void vmcs_write64(unsigned long field, u64 value)
 {
-#ifdef CONFIG_X86_64
-	vmcs_writel(field, value);
-#else
 	vmcs_writel(field, value);
+#ifndef CONFIG_X86_64
 	asm volatile ("");
 	vmcs_writel(field+1, value >> 32);
 #endif
-- 
cgit v1.2.3


From 1b7fcd3263e5f12dba43d27b64e1578bec070c28 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Thu, 15 May 2008 13:51:35 +0300
Subject: KVM: MMU: Fix false flooding when a pte points to page table

The KVM MMU tries to detect when a speculative pte update is not actually
used by demand fault, by checking the accessed bit of the shadow pte.  If
the shadow pte has not been accessed, we deem that page table flooded and
remove the shadow page table, allowing further pte updates to proceed
without emulation.

However, if the pte itself points at a page table and only used for write
operations, the accessed bit will never be set since all access will happen
through the emulator.

This is exactly what happens with kscand on old (2.4.x) HIGHMEM kernels.
The kernel points a kmap_atomic() pte at a page table, and then
proceeds with read-modify-write operations to look at the dirty and accessed
bits.  We get a false flood trigger on the kmap ptes, which results in the
mmu spending all its time setting up and tearing down shadows.

Fix by setting the shadow accessed bit on emulated accesses.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.c | 17 ++++++++++++++++-
 arch/x86/kvm/mmu.h |  3 ++-
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8e449dbcc59..53f1ed852ca 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1122,8 +1122,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		else
 			kvm_release_pfn_clean(pfn);
 	}
-	if (!ptwrite || !*ptwrite)
+	if (speculative) {
 		vcpu->arch.last_pte_updated = shadow_pte;
+		vcpu->arch.last_pte_gfn = gfn;
+	}
 }
 
 static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
@@ -1671,6 +1673,18 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	vcpu->arch.update_pte.pfn = pfn;
 }
 
+static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	u64 *spte = vcpu->arch.last_pte_updated;
+
+	if (spte
+	    && vcpu->arch.last_pte_gfn == gfn
+	    && shadow_accessed_mask
+	    && !(*spte & shadow_accessed_mask)
+	    && is_shadow_present_pte(*spte))
+		set_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
+}
+
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		       const u8 *new, int bytes)
 {
@@ -1694,6 +1708,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
 	mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
 	spin_lock(&vcpu->kvm->mmu_lock);
+	kvm_mmu_access_page(vcpu, gfn);
 	kvm_mmu_free_some_pages(vcpu);
 	++vcpu->kvm->stat.mmu_pte_write;
 	kvm_mmu_audit(vcpu, "pre pte write");
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 1730757bbc7..258e5d56298 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -15,7 +15,8 @@
 #define PT_USER_MASK (1ULL << 2)
 #define PT_PWT_MASK (1ULL << 3)
 #define PT_PCD_MASK (1ULL << 4)
-#define PT_ACCESSED_MASK (1ULL << 5)
+#define PT_ACCESSED_SHIFT 5
+#define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
 #define PT_DIRTY_MASK (1ULL << 6)
 #define PT_PAGE_SIZE_MASK (1ULL << 7)
 #define PT_PAT_MASK (1ULL << 7)
-- 
cgit v1.2.3


From 4ecac3fd6dc2629ad76a658a486f081c44aef10e Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Tue, 13 May 2008 13:23:38 +0300
Subject: KVM: Handle virtualization instruction #UD faults during reboot

KVM turns off hardware virtualization extensions during reboot, in order
to disassociate the memory used by the virtualization extensions from the
processor, and in order to have the system in a consistent state.
Unfortunately virtual machines may still be running while this goes on,
and once virtualization extensions are turned off, any virtulization
instruction will #UD on execution.

Fix by adding an exception handler to virtualization instructions; if we get
an exception during reboot, we simply spin waiting for the reset to complete.
If it's a true exception, BUG() so we can have our stack trace.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/svm.c | 20 +++++++++++---------
 arch/x86/kvm/vmx.c | 25 ++++++++++++++-----------
 2 files changed, 25 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 992ab711587..9390a31c06f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -27,6 +27,8 @@
 
 #include <asm/desc.h>
 
+#define __ex(x) __kvm_handle_fault_on_reboot(x)
+
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
@@ -129,17 +131,17 @@ static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
 
 static inline void clgi(void)
 {
-	asm volatile (SVM_CLGI);
+	asm volatile (__ex(SVM_CLGI));
 }
 
 static inline void stgi(void)
 {
-	asm volatile (SVM_STGI);
+	asm volatile (__ex(SVM_STGI));
 }
 
 static inline void invlpga(unsigned long addr, u32 asid)
 {
-	asm volatile (SVM_INVLPGA :: "a"(addr), "c"(asid));
+	asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid));
 }
 
 static inline unsigned long kvm_read_cr2(void)
@@ -1758,17 +1760,17 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		/* Enter guest mode */
 		"push %%rax \n\t"
 		"mov %c[vmcb](%[svm]), %%rax \n\t"
-		SVM_VMLOAD "\n\t"
-		SVM_VMRUN "\n\t"
-		SVM_VMSAVE "\n\t"
+		__ex(SVM_VMLOAD) "\n\t"
+		__ex(SVM_VMRUN) "\n\t"
+		__ex(SVM_VMSAVE) "\n\t"
 		"pop %%rax \n\t"
 #else
 		/* Enter guest mode */
 		"push %%eax \n\t"
 		"mov %c[vmcb](%[svm]), %%eax \n\t"
-		SVM_VMLOAD "\n\t"
-		SVM_VMRUN "\n\t"
-		SVM_VMSAVE "\n\t"
+		__ex(SVM_VMLOAD) "\n\t"
+		__ex(SVM_VMRUN) "\n\t"
+		__ex(SVM_VMSAVE) "\n\t"
 		"pop %%eax \n\t"
 #endif
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fff8e23433d..b80b4d14163 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -30,6 +30,8 @@
 #include <asm/io.h>
 #include <asm/desc.h>
 
+#define __ex(x) __kvm_handle_fault_on_reboot(x)
+
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
@@ -278,7 +280,7 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
 	u64 gva;
     } operand = { vpid, 0, gva };
 
-    asm volatile (ASM_VMX_INVVPID
+    asm volatile (__ex(ASM_VMX_INVVPID)
 		  /* CF==1 or ZF==1 --> rc = -1 */
 		  "; ja 1f ; ud2 ; 1:"
 		  : : "a"(&operand), "c"(ext) : "cc", "memory");
@@ -290,7 +292,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa)
 		u64 eptp, gpa;
 	} operand = {eptp, gpa};
 
-	asm volatile (ASM_VMX_INVEPT
+	asm volatile (__ex(ASM_VMX_INVEPT)
 			/* CF==1 or ZF==1 --> rc = -1 */
 			"; ja 1f ; ud2 ; 1:\n"
 			: : "a" (&operand), "c" (ext) : "cc", "memory");
@@ -311,7 +313,7 @@ static void vmcs_clear(struct vmcs *vmcs)
 	u64 phys_addr = __pa(vmcs);
 	u8 error;
 
-	asm volatile (ASM_VMX_VMCLEAR_RAX "; setna %0"
+	asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0"
 		      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
 		      : "cc", "memory");
 	if (error)
@@ -378,7 +380,7 @@ static unsigned long vmcs_readl(unsigned long field)
 {
 	unsigned long value;
 
-	asm volatile (ASM_VMX_VMREAD_RDX_RAX
+	asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX)
 		      : "=a"(value) : "d"(field) : "cc");
 	return value;
 }
@@ -413,7 +415,7 @@ static void vmcs_writel(unsigned long field, unsigned long value)
 {
 	u8 error;
 
-	asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0"
+	asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0"
 		       : "=q"(error) : "a"(value), "d"(field) : "cc");
 	if (unlikely(error))
 		vmwrite_error(field, value);
@@ -621,7 +623,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		u8 error;
 
 		per_cpu(current_vmcs, cpu) = vmx->vmcs;
-		asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
+		asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
 			      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
 			      : "cc");
 		if (error)
@@ -1030,13 +1032,14 @@ static void hardware_enable(void *garbage)
 		       MSR_IA32_FEATURE_CONTROL_LOCKED |
 		       MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED);
 	write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */
-	asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr)
+	asm volatile (ASM_VMX_VMXON_RAX
+		      : : "a"(&phys_addr), "m"(phys_addr)
 		      : "memory", "cc");
 }
 
 static void hardware_disable(void *garbage)
 {
-	asm volatile (ASM_VMX_VMXOFF : : : "cc");
+	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 	write_cr4(read_cr4() & ~X86_CR4_VMXE);
 }
 
@@ -2834,7 +2837,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		"push %%edx; push %%ebp;"
 		"push %%ecx \n\t"
 #endif
-		ASM_VMX_VMWRITE_RSP_RDX "\n\t"
+		__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
 		/* Check if vmlaunch of vmresume is needed */
 		"cmpl $0, %c[launched](%0) \n\t"
 		/* Load guest registers.  Don't clobber flags. */
@@ -2869,9 +2872,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 #endif
 		/* Enter guest mode */
 		"jne .Llaunched \n\t"
-		ASM_VMX_VMLAUNCH "\n\t"
+		__ex(ASM_VMX_VMLAUNCH) "\n\t"
 		"jmp .Lkvm_vmx_return \n\t"
-		".Llaunched: " ASM_VMX_VMRESUME "\n\t"
+		".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t"
 		".Lkvm_vmx_return: "
 		/* Save guest registers, load host registers, keep flags */
 #ifdef CONFIG_X86_64
-- 
cgit v1.2.3


From 543e42436643d68ad007d0bae2f485caac9c8a02 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Tue, 13 May 2008 16:22:47 +0300
Subject: KVM: VMX: Add list of potentially locally cached vcpus

VMX hardware can cache the contents of a vcpu's vmcs.  This cache needs
to be flushed when migrating a vcpu to another cpu, or (which is the case
that interests us here) when disabling hardware virtualization on a cpu.

The current implementation of decaching iterates over the list of all vcpus,
picks the ones that are potentially cached on the cpu that is being offlined,
and flushes the cache.  The problem is that it uses mutex_trylock() to gain
exclusive access to the vcpu, which fires off a (benign) warning about using
the mutex in an interrupt context.

To avoid this, and to make things generally nicer, add a new per-cpu list
of potentially cached vcus.  This makes the decaching code much simpler.  The
list is vmx-specific since other hardware doesn't have this issue.

[andrea: fix crash on suspend/resume]

Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/vmx.c | 24 ++++++++++++++++++++++--
 arch/x86/kvm/x86.c | 27 ---------------------------
 2 files changed, 22 insertions(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b80b4d14163..4d179d10637 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -55,6 +55,7 @@ struct vmcs {
 
 struct vcpu_vmx {
 	struct kvm_vcpu       vcpu;
+	struct list_head      local_vcpus_link;
 	int                   launched;
 	u8                    fail;
 	u32                   idt_vectoring_info;
@@ -93,6 +94,7 @@ static int init_rmode(struct kvm *kvm);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
+static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
 
 static struct page *vmx_io_bitmap_a;
 static struct page *vmx_io_bitmap_b;
@@ -331,6 +333,9 @@ static void __vcpu_clear(void *arg)
 	if (per_cpu(current_vmcs, cpu) == vmx->vmcs)
 		per_cpu(current_vmcs, cpu) = NULL;
 	rdtscll(vmx->vcpu.arch.host_tsc);
+	list_del(&vmx->local_vcpus_link);
+	vmx->vcpu.cpu = -1;
+	vmx->launched = 0;
 }
 
 static void vcpu_clear(struct vcpu_vmx *vmx)
@@ -338,7 +343,6 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
 	if (vmx->vcpu.cpu == -1)
 		return;
 	smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1);
-	vmx->launched = 0;
 }
 
 static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
@@ -617,6 +621,10 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		vcpu_clear(vmx);
 		kvm_migrate_timers(vcpu);
 		vpid_sync_vcpu_all(vmx);
+		local_irq_disable();
+		list_add(&vmx->local_vcpus_link,
+			 &per_cpu(vcpus_on_cpu, cpu));
+		local_irq_enable();
 	}
 
 	if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
@@ -1022,6 +1030,7 @@ static void hardware_enable(void *garbage)
 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
 	u64 old;
 
+	INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
 	rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
 	if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED |
 		    MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED))
@@ -1037,8 +1046,19 @@ static void hardware_enable(void *garbage)
 		      : "memory", "cc");
 }
 
+static void vmclear_local_vcpus(void)
+{
+	int cpu = raw_smp_processor_id();
+	struct vcpu_vmx *vmx, *n;
+
+	list_for_each_entry_safe(vmx, n, &per_cpu(vcpus_on_cpu, cpu),
+				 local_vcpus_link)
+		__vcpu_clear(vmx);
+}
+
 static void hardware_disable(void *garbage)
 {
+	vmclear_local_vcpus();
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 	write_cr4(read_cr4() & ~X86_CR4_VMXE);
 }
@@ -2967,7 +2987,7 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	if (vmx->vmcs) {
-		on_each_cpu(__vcpu_clear, vmx, 1);
+		vcpu_clear(vmx);
 		free_vmcs(vmx->vmcs);
 		vmx->vmcs = NULL;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 59084a3981c..8c14ddcaba7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -823,33 +823,6 @@ out:
  */
 void decache_vcpus_on_cpu(int cpu)
 {
-	struct kvm *vm;
-	struct kvm_vcpu *vcpu;
-	int i;
-
-	spin_lock(&kvm_lock);
-	list_for_each_entry(vm, &vm_list, vm_list)
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = vm->vcpus[i];
-			if (!vcpu)
-				continue;
-			/*
-			 * If the vcpu is locked, then it is running on some
-			 * other cpu and therefore it is not cached on the
-			 * cpu in question.
-			 *
-			 * If it's not locked, check the last cpu it executed
-			 * on.
-			 */
-			if (mutex_trylock(&vcpu->mutex)) {
-				if (vcpu->cpu == cpu) {
-					kvm_x86_ops->vcpu_decache(vcpu);
-					vcpu->cpu = -1;
-				}
-				mutex_unlock(&vcpu->mutex);
-			}
-		}
-	spin_unlock(&kvm_lock);
 }
 
 int kvm_dev_ioctl_check_extension(long ext)
-- 
cgit v1.2.3


From 7cc8883074b040aa8c1ebd3a17463b0ea3a9ef16 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Tue, 13 May 2008 16:29:20 +0300
Subject: KVM: Remove decache_vcpus_on_cpu() and related callbacks

Obsoleted by the vmx-specific per-cpu list.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/ia64/kvm/kvm-ia64.c   | 8 --------
 arch/powerpc/kvm/powerpc.c | 4 ----
 arch/s390/kvm/kvm-s390.c   | 4 ----
 arch/x86/kvm/svm.c         | 5 -----
 arch/x86/kvm/vmx.c         | 6 ------
 arch/x86/kvm/x86.c         | 8 --------
 6 files changed, 35 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 68c978be9a5..7c504be5797 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1035,14 +1035,6 @@ static void kvm_free_vmm_area(void)
 	}
 }
 
-/*
- * Make sure that a cpu that is being hot-unplugged does not have any vcpus
- * cached on it. Leave it as blank for IA64.
- */
-void decache_vcpus_on_cpu(int cpu)
-{
-}
-
 static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 }
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 777e0f34e0e..0513b359851 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -240,10 +240,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 }
 
-void decache_vcpus_on_cpu(int cpu)
-{
-}
-
 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
                                     struct kvm_debug_guest *dbg)
 {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6558b09ff57..4585c8ac2b0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -79,10 +79,6 @@ void kvm_arch_hardware_disable(void *garbage)
 {
 }
 
-void decache_vcpus_on_cpu(int cpu)
-{
-}
-
 int kvm_arch_hardware_setup(void)
 {
 	return 0;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9390a31c06f..238e8f3afaf 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -709,10 +709,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 	rdtscll(vcpu->arch.host_tsc);
 }
 
-static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
-{
-}
-
 static void svm_cache_regs(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -1933,7 +1929,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.prepare_guest_switch = svm_prepare_guest_switch,
 	.vcpu_load = svm_vcpu_load,
 	.vcpu_put = svm_vcpu_put,
-	.vcpu_decache = svm_vcpu_decache,
 
 	.set_guest_debug = svm_guest_debug,
 	.get_msr = svm_get_msr,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4d179d10637..b99bb37e5de 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -692,11 +692,6 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
 	update_exception_bitmap(vcpu);
 }
 
-static void vmx_vcpu_decache(struct kvm_vcpu *vcpu)
-{
-	vcpu_clear(to_vmx(vcpu));
-}
-
 static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
 {
 	return vmcs_readl(GUEST_RFLAGS);
@@ -3114,7 +3109,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.prepare_guest_switch = vmx_save_host_state,
 	.vcpu_load = vmx_vcpu_load,
 	.vcpu_put = vmx_vcpu_put,
-	.vcpu_decache = vmx_vcpu_decache,
 
 	.set_guest_debug = set_guest_debug,
 	.guest_debug_pre = kvm_guest_debug_pre,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8c14ddcaba7..fd03b4465bc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -817,14 +817,6 @@ out:
 	return r;
 }
 
-/*
- * Make sure that a cpu that is being hot-unplugged does not have any vcpus
- * cached on it.
- */
-void decache_vcpus_on_cpu(int cpu)
-{
-}
-
 int kvm_dev_ioctl_check_extension(long ext)
 {
 	int r;
-- 
cgit v1.2.3


From 50d40d7fb9b09e68a657c68837fcfa067b70cc42 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 25 May 2008 14:38:15 +0300
Subject: KVM: Remove unnecessary ->decache_regs() call

Since we aren't modifying any register, there's no need to decache
the register state.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fd03b4465bc..5f00c60f0af 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2297,7 +2297,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
 	kvm_x86_ops->cache_regs(vcpu);
 	memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4);
-	kvm_x86_ops->decache_regs(vcpu);
 
 	kvm_x86_ops->skip_emulated_instruction(vcpu);
 
-- 
cgit v1.2.3


From 3419ffc8e45a5344abc87684cbca6cdc5c9c8a01 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng.yang@intel.com>
Date: Thu, 15 May 2008 09:52:48 +0800
Subject: KVM: IOAPIC/LAPIC: Enable NMI support

[avi: fix ia64 build breakage]

Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/lapic.c | 3 ++-
 arch/x86/kvm/x86.c   | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index f9201fbc61d..e48d1939403 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -356,8 +356,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 	case APIC_DM_SMI:
 		printk(KERN_DEBUG "Ignoring guest SMI\n");
 		break;
+
 	case APIC_DM_NMI:
-		printk(KERN_DEBUG "Ignoring guest NMI\n");
+		kvm_inject_nmi(vcpu);
 		break;
 
 	case APIC_DM_INIT:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5f00c60f0af..19974dde656 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -173,6 +173,12 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
 	kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
 }
 
+void kvm_inject_nmi(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.nmi_pending = 1;
+}
+EXPORT_SYMBOL_GPL(kvm_inject_nmi);
+
 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
 {
 	WARN_ON(vcpu->arch.exception.pending);
-- 
cgit v1.2.3


From f08864b42a45581a64558aa5b6b673c77b97ee5d Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng.yang@intel.com>
Date: Thu, 15 May 2008 18:23:25 +0800
Subject: KVM: VMX: Enable NMI with in-kernel irqchip

Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/vmx.c | 124 +++++++++++++++++++++++++++++++++++++++++++++--------
 arch/x86/kvm/vmx.h |  12 +++++-
 arch/x86/kvm/x86.c |   1 +
 3 files changed, 118 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b99bb37e5de..1bb99465720 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -264,6 +264,11 @@ static inline int cpu_has_vmx_vpid(void)
 		SECONDARY_EXEC_ENABLE_VPID);
 }
 
+static inline int cpu_has_virtual_nmis(void)
+{
+	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
+}
+
 static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
 {
 	int i;
@@ -1088,7 +1093,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	u32 _vmentry_control = 0;
 
 	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
-	opt = 0;
+	opt = PIN_BASED_VIRTUAL_NMIS;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
 				&_pin_based_exec_control) < 0)
 		return -EIO;
@@ -2130,6 +2135,13 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 			irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
 }
 
+static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
+{
+	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
+	vcpu->arch.nmi_pending = 0;
+}
+
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
 	int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2653,6 +2665,19 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	return 1;
 }
 
+static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u32 cpu_based_vm_exec_control;
+
+	/* clear pending NMI */
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+	++vcpu->stat.nmi_window_exits;
+
+	return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -2663,6 +2688,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
 	[EXIT_REASON_EXCEPTION_NMI]           = handle_exception,
 	[EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
 	[EXIT_REASON_TRIPLE_FAULT]            = handle_triple_fault,
+	[EXIT_REASON_NMI_WINDOW]	      = handle_nmi_window,
 	[EXIT_REASON_IO_INSTRUCTION]          = handle_io,
 	[EXIT_REASON_CR_ACCESS]               = handle_cr,
 	[EXIT_REASON_DR_ACCESS]               = handle_dr,
@@ -2750,17 +2776,52 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 }
 
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
+
+	if (!cpu_has_virtual_nmis())
+		return;
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
+{
+	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+	return !(guest_intr & (GUEST_INTR_STATE_NMI |
+			       GUEST_INTR_STATE_MOV_SS |
+			       GUEST_INTR_STATE_STI));
+}
+
+static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
+{
+	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+	return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
+			       GUEST_INTR_STATE_STI)) &&
+		(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
+}
+
+static void enable_intr_window(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.nmi_pending)
+		enable_nmi_window(vcpu);
+	else if (kvm_cpu_has_interrupt(vcpu))
+		enable_irq_window(vcpu);
+}
+
 static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u32 idtv_info_field, intr_info_field;
-	int has_ext_irq, interrupt_window_open;
+	u32 idtv_info_field, intr_info_field, exit_intr_info_field;
 	int vector;
 
 	update_tpr_threshold(vcpu);
 
-	has_ext_irq = kvm_cpu_has_interrupt(vcpu);
 	intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
+	exit_intr_info_field = vmcs_read32(VM_EXIT_INTR_INFO);
 	idtv_info_field = vmx->idt_vectoring_info;
 	if (intr_info_field & INTR_INFO_VALID_MASK) {
 		if (idtv_info_field & INTR_INFO_VALID_MASK) {
@@ -2768,8 +2829,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 			if (printk_ratelimit())
 				printk(KERN_ERR "Fault when IDT_Vectoring\n");
 		}
-		if (has_ext_irq)
-			enable_irq_window(vcpu);
+		enable_intr_window(vcpu);
 		return;
 	}
 	if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) {
@@ -2779,30 +2839,56 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 			u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK;
 
 			vmx_inject_irq(vcpu, vect);
-			if (unlikely(has_ext_irq))
-				enable_irq_window(vcpu);
+			enable_intr_window(vcpu);
 			return;
 		}
 
 		KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler);
 
-		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+		/*
+		 * SDM 3: 25.7.1.2
+		 * Clear bit "block by NMI" before VM entry if a NMI delivery
+		 * faulted.
+		 */
+		if ((idtv_info_field & VECTORING_INFO_TYPE_MASK)
+		    == INTR_TYPE_NMI_INTR && cpu_has_virtual_nmis())
+			vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+				vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+				~GUEST_INTR_STATE_NMI);
+
+		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field
+				& ~INTR_INFO_RESVD_BITS_MASK);
 		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
 				vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
 
 		if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK))
 			vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
 				vmcs_read32(IDT_VECTORING_ERROR_CODE));
-		if (unlikely(has_ext_irq))
-			enable_irq_window(vcpu);
+		enable_intr_window(vcpu);
 		return;
 	}
-	if (!has_ext_irq)
+	if (cpu_has_virtual_nmis()) {
+		/*
+		 * SDM 3: 25.7.1.2
+		 * Re-set bit "block by NMI" before VM entry if vmexit caused by
+		 * a guest IRET fault.
+		 */
+		if ((exit_intr_info_field & INTR_INFO_UNBLOCK_NMI) &&
+		    (exit_intr_info_field & INTR_INFO_VECTOR_MASK) != 8)
+			vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+				vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) |
+				GUEST_INTR_STATE_NMI);
+		else if (vcpu->arch.nmi_pending) {
+			if (vmx_nmi_enabled(vcpu))
+				vmx_inject_nmi(vcpu);
+			enable_intr_window(vcpu);
+			return;
+		}
+
+	}
+	if (!kvm_cpu_has_interrupt(vcpu))
 		return;
-	interrupt_window_open =
-		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
-		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
-	if (interrupt_window_open) {
+	if (vmx_irq_enabled(vcpu)) {
 		vector = kvm_cpu_get_interrupt(vcpu);
 		vmx_inject_irq(vcpu, vector);
 		kvm_timer_intr_post(vcpu, vector);
@@ -2963,7 +3049,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		fixup_rmode_irq(vmx);
 
 	vcpu->arch.interrupt_window_open =
-		(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
+		(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+		 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
 
 	asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 	vmx->launched = 1;
@@ -2971,7 +3058,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
 	/* We need to handle NMIs before interrupts are enabled */
-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
+	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
+	    (intr_info & INTR_INFO_VALID_MASK)) {
 		KVMTRACE_0D(NMI, vcpu, handler);
 		asm("int $2");
 	}
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 79d94c610df..425a13436b3 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -40,6 +40,7 @@
 #define CPU_BASED_CR8_LOAD_EXITING              0x00080000
 #define CPU_BASED_CR8_STORE_EXITING             0x00100000
 #define CPU_BASED_TPR_SHADOW                    0x00200000
+#define CPU_BASED_VIRTUAL_NMI_PENDING		0x00400000
 #define CPU_BASED_MOV_DR_EXITING                0x00800000
 #define CPU_BASED_UNCOND_IO_EXITING             0x01000000
 #define CPU_BASED_USE_IO_BITMAPS                0x02000000
@@ -216,7 +217,7 @@ enum vmcs_field {
 #define EXIT_REASON_TRIPLE_FAULT        2
 
 #define EXIT_REASON_PENDING_INTERRUPT   7
-
+#define EXIT_REASON_NMI_WINDOW		8
 #define EXIT_REASON_TASK_SWITCH         9
 #define EXIT_REASON_CPUID               10
 #define EXIT_REASON_HLT                 12
@@ -251,7 +252,9 @@ enum vmcs_field {
 #define INTR_INFO_VECTOR_MASK           0xff            /* 7:0 */
 #define INTR_INFO_INTR_TYPE_MASK        0x700           /* 10:8 */
 #define INTR_INFO_DELIVER_CODE_MASK     0x800           /* 11 */
+#define INTR_INFO_UNBLOCK_NMI		0x1000		/* 12 */
 #define INTR_INFO_VALID_MASK            0x80000000      /* 31 */
+#define INTR_INFO_RESVD_BITS_MASK       0x7ffff000
 
 #define VECTORING_INFO_VECTOR_MASK           	INTR_INFO_VECTOR_MASK
 #define VECTORING_INFO_TYPE_MASK        	INTR_INFO_INTR_TYPE_MASK
@@ -259,9 +262,16 @@ enum vmcs_field {
 #define VECTORING_INFO_VALID_MASK       	INTR_INFO_VALID_MASK
 
 #define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
+#define INTR_TYPE_NMI_INTR		(2 << 8) /* NMI */
 #define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
 #define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
 
+/* GUEST_INTERRUPTIBILITY_INFO flags. */
+#define GUEST_INTR_STATE_STI		0x00000001
+#define GUEST_INTR_STATE_MOV_SS		0x00000002
+#define GUEST_INTR_STATE_SMI		0x00000004
+#define GUEST_INTR_STATE_NMI		0x00000008
+
 /*
  * Exit Qualifications for MOV for Control Register Access
  */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19974dde656..05b54976c89 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -72,6 +72,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "mmio_exits", VCPU_STAT(mmio_exits) },
 	{ "signal_exits", VCPU_STAT(signal_exits) },
 	{ "irq_window", VCPU_STAT(irq_window_exits) },
+	{ "nmi_window", VCPU_STAT(nmi_window_exits) },
 	{ "halt_exits", VCPU_STAT(halt_exits) },
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "hypercalls", VCPU_STAT(hypercalls) },
-- 
cgit v1.2.3


From 9ba075a664dff836fd6fb93f90fcc827f7683d91 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Mon, 26 May 2008 20:06:35 +0300
Subject: KVM: MTRR support

Add emulation for the memory type range registers, needed by VMware esx 3.5,
and by pci device assignment.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 48 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 05b54976c89..5f67a7c54e8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -611,6 +611,38 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
 }
 
+static bool msr_mtrr_valid(unsigned msr)
+{
+	switch (msr) {
+	case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
+	case MSR_MTRRfix64K_00000:
+	case MSR_MTRRfix16K_80000:
+	case MSR_MTRRfix16K_A0000:
+	case MSR_MTRRfix4K_C0000:
+	case MSR_MTRRfix4K_C8000:
+	case MSR_MTRRfix4K_D0000:
+	case MSR_MTRRfix4K_D8000:
+	case MSR_MTRRfix4K_E0000:
+	case MSR_MTRRfix4K_E8000:
+	case MSR_MTRRfix4K_F0000:
+	case MSR_MTRRfix4K_F8000:
+	case MSR_MTRRdefType:
+	case MSR_IA32_CR_PAT:
+		return true;
+	case 0x2f8:
+		return true;
+	}
+	return false;
+}
+
+static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	if (!msr_mtrr_valid(msr))
+		return 1;
+
+	vcpu->arch.mtrr[msr - 0x200] = data;
+	return 0;
+}
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
@@ -632,8 +664,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		break;
 	case MSR_IA32_UCODE_REV:
 	case MSR_IA32_UCODE_WRITE:
-	case 0x200 ... 0x2ff: /* MTRRs */
 		break;
+	case 0x200 ... 0x2ff:
+		return set_msr_mtrr(vcpu, msr, data);
 	case MSR_IA32_APICBASE:
 		kvm_set_apic_base(vcpu, data);
 		break;
@@ -691,6 +724,15 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 	return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
 }
 
+static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	if (!msr_mtrr_valid(msr))
+		return 1;
+
+	*pdata = vcpu->arch.mtrr[msr - 0x200];
+	return 0;
+}
+
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
 	u64 data;
@@ -712,11 +754,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_IA32_MC0_MISC+16:
 	case MSR_IA32_UCODE_REV:
 	case MSR_IA32_EBL_CR_POWERON:
-		/* MTRR registers */
-	case 0xfe:
-	case 0x200 ... 0x2ff:
 		data = 0;
 		break;
+	case MSR_MTRRcap:
+		data = 0x500 | KVM_NR_VAR_MTRR;
+		break;
+	case 0x200 ... 0x2ff:
+		return get_msr_mtrr(vcpu, msr, pdata);
 	case 0xcd: /* fsb frequency */
 		data = 3;
 		break;
-- 
cgit v1.2.3


From 3e6e0aab1ba1e8b354ce01f5659336f9aee69437 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Tue, 27 May 2008 10:18:46 +0200
Subject: KVM: Prefixes segment functions that will be exported with "kvm_"

Prefixes functions that will be exported with kvm_.
We also prefixed set_segment() even if it still static
to be coherent.

signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Laurent Vivier <laurent.vivier@bull.net>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86.c | 78 +++++++++++++++++++++++++++---------------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5f67a7c54e8..4c94fad7f01 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3100,8 +3100,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return 0;
 }
 
-static void get_segment(struct kvm_vcpu *vcpu,
-			struct kvm_segment *var, int seg)
+void kvm_get_segment(struct kvm_vcpu *vcpu,
+		     struct kvm_segment *var, int seg)
 {
 	kvm_x86_ops->get_segment(vcpu, var, seg);
 }
@@ -3110,7 +3110,7 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
 {
 	struct kvm_segment cs;
 
-	get_segment(vcpu, &cs, VCPU_SREG_CS);
+	kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
 	*db = cs.db;
 	*l = cs.l;
 }
@@ -3124,15 +3124,15 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
 	vcpu_load(vcpu);
 
-	get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
-	get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
-	get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
-	get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
-	get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
-	get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
+	kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
+	kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
+	kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
+	kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
+	kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
+	kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
 
-	get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
-	get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
+	kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
+	kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
 
 	kvm_x86_ops->get_idt(vcpu, &dt);
 	sregs->idt.limit = dt.limit;
@@ -3184,7 +3184,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-static void set_segment(struct kvm_vcpu *vcpu,
+static void kvm_set_segment(struct kvm_vcpu *vcpu,
 			struct kvm_segment *var, int seg)
 {
 	kvm_x86_ops->set_segment(vcpu, var, seg);
@@ -3221,7 +3221,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
 	if (selector & 1 << 2) {
 		struct kvm_segment kvm_seg;
 
-		get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
+		kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
 
 		if (kvm_seg.unusable)
 			dtable->limit = 0;
@@ -3327,7 +3327,7 @@ static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
 {
 	struct kvm_segment kvm_seg;
 
-	get_segment(vcpu, &kvm_seg, seg);
+	kvm_get_segment(vcpu, &kvm_seg, seg);
 	return kvm_seg.selector;
 }
 
@@ -3343,8 +3343,8 @@ static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
-				   int type_bits, int seg)
+int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
+				int type_bits, int seg)
 {
 	struct kvm_segment kvm_seg;
 
@@ -3357,7 +3357,7 @@ static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
 		if (!kvm_seg.s)
 			kvm_seg.unusable = 1;
 
-	set_segment(vcpu, &kvm_seg, seg);
+	kvm_set_segment(vcpu, &kvm_seg, seg);
 	return 0;
 }
 
@@ -3403,25 +3403,25 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
 	vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi;
 	vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi;
 
-	if (load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
+	if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
 		return 1;
 
-	if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
+	if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
 		return 1;
 
-	if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
+	if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
 		return 1;
 
-	if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
+	if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
 		return 1;
 
-	if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
+	if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
 		return 1;
 
-	if (load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
+	if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
 		return 1;
 
-	if (load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
+	if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
 		return 1;
 	return 0;
 }
@@ -3462,19 +3462,19 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
 	vcpu->arch.regs[VCPU_REGS_RSI] = tss->si;
 	vcpu->arch.regs[VCPU_REGS_RDI] = tss->di;
 
-	if (load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
+	if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
 		return 1;
 
-	if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
+	if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
 		return 1;
 
-	if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
+	if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
 		return 1;
 
-	if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
+	if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
 		return 1;
 
-	if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
+	if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
 		return 1;
 	return 0;
 }
@@ -3532,7 +3532,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
 	struct desc_struct nseg_desc;
 	int ret = 0;
 
-	get_segment(vcpu, &tr_seg, VCPU_SREG_TR);
+	kvm_get_segment(vcpu, &tr_seg, VCPU_SREG_TR);
 
 	if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
 		goto out;
@@ -3591,7 +3591,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
 	kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
 	seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
 	tr_seg.type = 11;
-	set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
+	kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
 out:
 	kvm_x86_ops->decache_regs(vcpu);
 	return ret;
@@ -3658,15 +3658,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 		}
 	}
 
-	set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
-	set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
-	set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
-	set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
-	set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
-	set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
+	kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
+	kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
+	kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
+	kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
+	kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
+	kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
 
-	set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
-	set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
+	kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
+	kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
 
 	vcpu_put(vcpu);
 
-- 
cgit v1.2.3


From 89c696383d6eb493351a89d450d8ad7a55cbe1da Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Tue, 27 May 2008 10:22:20 +0200
Subject: KVM: x86 emulator: Update c->dst.bytes in decode instruction

Update c->dst.bytes in decode instruction instead of instruction
itself.  It's needed because if c->dst.bytes is equal to 0, the
instruction is not emulated.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Laurent Vivier <laurent.vivier@bull.net>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 932f216d890..a928aa6cdad 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1049,6 +1049,7 @@ done_prefixes:
 		break;
 	case DstMem:
 		if ((c->d & ModRM) && c->modrm_mod == 3) {
+			c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
 			c->dst.type = OP_REG;
 			c->dst.val = c->dst.orig_val = c->modrm_val;
 			c->dst.ptr = c->modrm_ptr;
-- 
cgit v1.2.3


From 954cd36f7613ac6d084abe33114dd45a8e0dbe92 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Tue, 27 May 2008 10:19:08 +0200
Subject: KVM: x86 emulator: add support for jmp far 0xea

Add support for jmp far (opcode 0xea) instruction.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Laurent Vivier <laurent.vivier@bull.net>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index a928aa6cdad..48b62cc3bd0 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -168,7 +168,8 @@ static u16 opcode_table[256] = {
 	/* 0xE0 - 0xE7 */
 	0, 0, 0, 0, 0, 0, 0, 0,
 	/* 0xE8 - 0xEF */
-	ImplicitOps | Stack, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps,
+	ImplicitOps | Stack, SrcImm | ImplicitOps,
+	ImplicitOps, SrcImmByte | ImplicitOps,
 	0, 0, 0, 0,
 	/* 0xF0 - 0xF7 */
 	0, 0, 0, 0,
@@ -1661,7 +1662,33 @@ special_insn:
 		break;
 	}
 	case 0xe9: /* jmp rel */
-	case 0xeb: /* jmp rel short */
+		goto jmp;
+	case 0xea: /* jmp far */ {
+		uint32_t eip;
+		uint16_t sel;
+
+		switch (c->op_bytes) {
+		case 2:
+			eip = insn_fetch(u16, 2, c->eip);
+			break;
+		case 4:
+			eip = insn_fetch(u32, 4, c->eip);
+			break;
+		default:
+			DPRINTF("jmp far: Invalid op_bytes\n");
+			goto cannot_emulate;
+		}
+		sel = insn_fetch(u16, 2, c->eip);
+		if (kvm_load_segment_descriptor(ctxt->vcpu, sel, 9, VCPU_SREG_CS) < 0) {
+			DPRINTF("jmp far: Failed to load CS descriptor\n");
+			goto cannot_emulate;
+		}
+
+		c->eip = eip;
+		break;
+	}
+	case 0xeb:
+	      jmp:		/* jmp rel short */
 		jmp_rel(c, c->src.val);
 		c->dst.type = OP_NONE; /* Disable writeback. */
 		break;
-- 
cgit v1.2.3


From 615ac125618dc7b40ecb418e8b353d31ccf0e518 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Tue, 27 May 2008 10:19:16 +0200
Subject: KVM: x86 emulator: adds support to mov r,imm (opcode 0xb8)
 instruction

Add support to mov r, imm (0xb8) instruction.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Laurent Vivier <laurent.vivier@bull.net>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 48b62cc3bd0..21d7ff6a8ec 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -152,7 +152,8 @@ static u16 opcode_table[256] = {
 	ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
 	ByteOp | ImplicitOps | String, ImplicitOps | String,
 	/* 0xB0 - 0xBF */
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	DstReg | SrcImm | Mov, 0, 0, 0, 0, 0, 0, 0,
 	/* 0xC0 - 0xC7 */
 	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
 	0, ImplicitOps | Stack, 0, 0,
@@ -1624,6 +1625,8 @@ special_insn:
 	case 0xae ... 0xaf:	/* scas */
 		DPRINTF("Urk! I don't handle SCAS.\n");
 		goto cannot_emulate;
+	case 0xb8: /* mov r, imm */
+		goto mov;
 	case 0xc0 ... 0xc1:
 		emulate_grp2(ctxt);
 		break;
-- 
cgit v1.2.3


From 4257198ae2c36e030a0947fef661c8de973778be Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Tue, 27 May 2008 14:49:15 +0200
Subject: KVM: x86 emulator: Add support for mov seg, r (0x8e) instruction

Add support for mov r, sreg (0x8c) instruction.

[avi: drop the sreg decoding table in favor of 1:1 encoding]

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Laurent Vivier <laurent.vivier@bull.net>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 21d7ff6a8ec..b049b6bf9a7 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -138,7 +138,8 @@ static u16 opcode_table[256] = {
 	/* 0x88 - 0x8F */
 	ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
 	ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
-	0, ModRM | DstReg, 0, Group | Group1A,
+	0, ModRM | DstReg,
+	DstReg | SrcMem | ModRM | Mov, Group | Group1A,
 	/* 0x90 - 0x9F */
 	0, 0, 0, 0, 0, 0, 0, 0,
 	0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
@@ -1520,6 +1521,28 @@ special_insn:
 	case 0x8d: /* lea r16/r32, m */
 		c->dst.val = c->modrm_ea;
 		break;
+	case 0x8e: { /* mov seg, r/m16 */
+		uint16_t sel;
+		int type_bits;
+		int err;
+
+		sel = c->src.val;
+		if (c->modrm_reg <= 5) {
+			type_bits = (c->modrm_reg == 1) ? 9 : 1;
+			err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
+							  type_bits, c->modrm_reg);
+		} else {
+			printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n",
+					c->modrm);
+			goto cannot_emulate;
+		}
+
+		if (err < 0)
+			goto cannot_emulate;
+
+		c->dst.type = OP_NONE;  /* Disable writeback. */
+		break;
+	}
 	case 0x8f:		/* pop (sole member of Grp1a) */
 		rc = emulate_grp1a(ctxt, ops);
 		if (rc != 0)
-- 
cgit v1.2.3


From 38d5bc6d50a4368be08b39b02efb9cbbe1dd60d0 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Tue, 27 May 2008 15:13:28 +0200
Subject: KVM: x86 emulator: Add support for mov r, sreg (0x8c) instruction

Add support for mov r, sreg (0x8c) instruction

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Laurent Vivier <laurent.vivier@bull.net>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index b049b6bf9a7..2a9db4d90ba 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -138,7 +138,7 @@ static u16 opcode_table[256] = {
 	/* 0x88 - 0x8F */
 	ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
 	ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
-	0, ModRM | DstReg,
+	DstMem | SrcReg | ModRM | Mov, ModRM | DstReg,
 	DstReg | SrcMem | ModRM | Mov, Group | Group1A,
 	/* 0x90 - 0x9F */
 	0, 0, 0, 0, 0, 0, 0, 0,
@@ -1518,6 +1518,19 @@ special_insn:
 		break;
 	case 0x88 ... 0x8b:	/* mov */
 		goto mov;
+	case 0x8c: { /* mov r/m, sreg */
+		struct kvm_segment segreg;
+
+		if (c->modrm_reg <= 5)
+			kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg);
+		else {
+			printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n",
+			       c->modrm);
+			goto cannot_emulate;
+		}
+		c->dst.val = segreg.selector;
+		break;
+	}
 	case 0x8d: /* lea r16/r32, m */
 		c->dst.val = c->modrm_ea;
 		break;
-- 
cgit v1.2.3


From eab9f71feb1851b5b700ca12ae614b6a0a441021 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Thu, 29 May 2008 14:20:16 +0300
Subject: KVM: MMU: Optimize prefetch_page()

Instead of reading each pte individually, read 256 bytes worth of ptes and
batch process them.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/paging_tmpl.h | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 934c7b61939..4d918220bae 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -460,8 +460,9 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
 static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
 				 struct kvm_mmu_page *sp)
 {
-	int i, offset = 0, r = 0;
-	pt_element_t pt;
+	int i, j, offset, r;
+	pt_element_t pt[256 / sizeof(pt_element_t)];
+	gpa_t pte_gpa;
 
 	if (sp->role.metaphysical
 	    || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
@@ -469,19 +470,20 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
 		return;
 	}
 
-	if (PTTYPE == 32)
+	pte_gpa = gfn_to_gpa(sp->gfn);
+	if (PTTYPE == 32) {
 		offset = sp->role.quadrant << PT64_LEVEL_BITS;
+		pte_gpa += offset * sizeof(pt_element_t);
+	}
 
-	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
-		gpa_t pte_gpa = gfn_to_gpa(sp->gfn);
-		pte_gpa += (i+offset) * sizeof(pt_element_t);
-
-		r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &pt,
-					  sizeof(pt_element_t));
-		if (r || is_present_pte(pt))
-			sp->spt[i] = shadow_trap_nonpresent_pte;
-		else
-			sp->spt[i] = shadow_notrap_nonpresent_pte;
+	for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) {
+		r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
+		pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
+		for (j = 0; j < ARRAY_SIZE(pt); ++j)
+			if (r || is_present_pte(pt[j]))
+				sp->spt[i+j] = shadow_trap_nonpresent_pte;
+			else
+				sp->spt[i+j] = shadow_notrap_nonpresent_pte;
 	}
 }
 
-- 
cgit v1.2.3


From 19e43636b5af1c8b9cc8406af674835284abab0c Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Thu, 29 May 2008 14:26:29 +0300
Subject: KVM: x86 emulator: simplify push imm8 emulation

Instead of fetching the data explicitly, use SrcImmByte.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 2a9db4d90ba..4e037ea8fe6 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -121,7 +121,7 @@ static u16 opcode_table[256] = {
 	0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
 	0, 0, 0, 0,
 	/* 0x68 - 0x6F */
-	0, 0, ImplicitOps | Mov | Stack, 0,
+	0, 0, SrcImmByte | Mov | Stack, 0,
 	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* insb, insw/insd */
 	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* outsb, outsw/outsd */
 	/* 0x70 - 0x77 */
@@ -1425,8 +1425,6 @@ special_insn:
 		c->dst.val = (s32) c->src.val;
 		break;
 	case 0x6a: /* push imm8 */
-		c->src.val = 0L;
-		c->src.val = insn_fetch(s8, 1, c->eip);
 		emulate_push(ctxt);
 		break;
 	case 0x6c:		/* insb */
-- 
cgit v1.2.3


From 91ed7a0e15c6f6ff57f5cf70feabdba56a999863 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Thu, 29 May 2008 14:38:38 +0300
Subject: KVM: x86 emulator: implement 'push imm' (opcode 0x68)

Encountered in FC6 boot sequence, now that we don't force ss.rpl = 0 during
the protected mode transition.  Not really necessary, but nice to have.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 4e037ea8fe6..b90857c7656 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -121,7 +121,7 @@ static u16 opcode_table[256] = {
 	0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
 	0, 0, 0, 0,
 	/* 0x68 - 0x6F */
-	0, 0, SrcImmByte | Mov | Stack, 0,
+	SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
 	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* insb, insw/insd */
 	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* outsb, outsw/outsd */
 	/* 0x70 - 0x77 */
@@ -1424,6 +1424,7 @@ special_insn:
 			goto cannot_emulate;
 		c->dst.val = (s32) c->src.val;
 		break;
+	case 0x68: /* push imm */
 	case 0x6a: /* push imm8 */
 		emulate_push(ctxt);
 		break;
-- 
cgit v1.2.3


From d761a501cf9cd4fa08ff35d252ff08b8c31ce677 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Thu, 29 May 2008 14:55:03 +0300
Subject: KVM: MMU: Move nonpaging_prefetch_page()

In preparation for next patch. No code change.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 53f1ed852ca..62741b7c422 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -776,6 +776,15 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
 	BUG();
 }
 
+static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
+				    struct kvm_mmu_page *sp)
+{
+	int i;
+
+	for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
+		sp->spt[i] = shadow_trap_nonpresent_pte;
+}
+
 static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 {
 	unsigned index;
@@ -1213,15 +1222,6 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 }
 
 
-static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
-				    struct kvm_mmu_page *sp)
-{
-	int i;
-
-	for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
-		sp->spt[i] = shadow_trap_nonpresent_pte;
-}
-
 static void mmu_free_roots(struct kvm_vcpu *vcpu)
 {
 	int i;
-- 
cgit v1.2.3


From 131d82791b628d4aeafd94ddc74a9b68f3d15a83 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Thu, 29 May 2008 14:56:28 +0300
Subject: KVM: MMU: Avoid page prefetch on SVM

SVM cannot benefit from page prefetching since guest page fault bypass
cannot by made to work there.  Avoid accessing the guest page table in
this case.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 62741b7c422..5ebb2788bd7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -850,7 +850,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	hlist_add_head(&sp->hash_link, bucket);
 	if (!metaphysical)
 		rmap_write_protect(vcpu->kvm, gfn);
-	vcpu->arch.mmu.prefetch_page(vcpu, sp);
+	if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
+		vcpu->arch.mmu.prefetch_page(vcpu, sp);
+	else
+		nonpaging_prefetch_page(vcpu, sp);
 	return sp;
 }
 
-- 
cgit v1.2.3


From 92760499d01ef91518119908eb9b8798b6c9bd3f Mon Sep 17 00:00:00 2001
From: Laurent Vivier <Laurent.Vivier@bull.net>
Date: Fri, 30 May 2008 16:05:53 +0200
Subject: KVM: kvm_io_device: extend in_range() to manage len and write
 attribute

Modify member in_range() of structure kvm_io_device to pass length and the type
of the I/O (write or read).

This modification allows to use kvm_io_device with coalesced MMIO.

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/ia64/kvm/kvm-ia64.c |  6 +++---
 arch/x86/kvm/i8254.c     |  6 ++++--
 arch/x86/kvm/i8259.c     |  3 ++-
 arch/x86/kvm/lapic.c     |  3 ++-
 arch/x86/kvm/x86.c       | 28 +++++++++++++++++-----------
 5 files changed, 28 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 7c504be5797..bb58df7cc41 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -195,11 +195,11 @@ int kvm_dev_ioctl_check_extension(long ext)
 }
 
 static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
-					gpa_t addr)
+					gpa_t addr, int len, int is_write)
 {
 	struct kvm_io_device *dev;
 
-	dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+	dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, is_write);
 
 	return dev;
 }
@@ -231,7 +231,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	kvm_run->exit_reason = KVM_EXIT_MMIO;
 	return 0;
 mmio:
-	mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr);
+	mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr, p->size, !p->dir);
 	if (mmio_dev) {
 		if (!p->dir)
 			kvm_iodevice_write(mmio_dev, p->addr, p->size,
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 60074dc66bd..9e3391e9a1b 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -460,7 +460,8 @@ static void pit_ioport_read(struct kvm_io_device *this,
 	mutex_unlock(&pit_state->lock);
 }
 
-static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
+static int pit_in_range(struct kvm_io_device *this, gpa_t addr,
+			int len, int is_write)
 {
 	return ((addr >= KVM_PIT_BASE_ADDRESS) &&
 		(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
@@ -501,7 +502,8 @@ static void speaker_ioport_read(struct kvm_io_device *this,
 	mutex_unlock(&pit_state->lock);
 }
 
-static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
+static int speaker_in_range(struct kvm_io_device *this, gpa_t addr,
+			    int len, int is_write)
 {
 	return (addr == KVM_SPEAKER_BASE_ADDRESS);
 }
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index ab29cf2def4..5857f59ad4a 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -346,7 +346,8 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
 	return s->elcr;
 }
 
-static int picdev_in_range(struct kvm_io_device *this, gpa_t addr)
+static int picdev_in_range(struct kvm_io_device *this, gpa_t addr,
+			   int len, int is_write)
 {
 	switch (addr) {
 	case 0x20:
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e48d1939403..180ba7316da 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -785,7 +785,8 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
 }
 
-static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
+static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr,
+			   int len, int size)
 {
 	struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
 	int ret = 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4c94fad7f01..ab3f5552d69 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1797,13 +1797,14 @@ static void kvm_init_msr_list(void)
  * Only apic need an MMIO device hook, so shortcut now..
  */
 static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
-						gpa_t addr)
+						gpa_t addr, int len,
+						int is_write)
 {
 	struct kvm_io_device *dev;
 
 	if (vcpu->arch.apic) {
 		dev = &vcpu->arch.apic->dev;
-		if (dev->in_range(dev, addr))
+		if (dev->in_range(dev, addr, len, is_write))
 			return dev;
 	}
 	return NULL;
@@ -1811,13 +1812,15 @@ static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
 
 
 static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
-						gpa_t addr)
+						gpa_t addr, int len,
+						int is_write)
 {
 	struct kvm_io_device *dev;
 
-	dev = vcpu_find_pervcpu_dev(vcpu, addr);
+	dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
 	if (dev == NULL)
-		dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+		dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
+					  is_write);
 	return dev;
 }
 
@@ -1885,7 +1888,7 @@ mmio:
 	 * Is this MMIO handled locally?
 	 */
 	mutex_lock(&vcpu->kvm->lock);
-	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
+	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
 	if (mmio_dev) {
 		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
 		mutex_unlock(&vcpu->kvm->lock);
@@ -1940,7 +1943,7 @@ mmio:
 	 * Is this MMIO handled locally?
 	 */
 	mutex_lock(&vcpu->kvm->lock);
-	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
+	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
 	if (mmio_dev) {
 		kvm_iodevice_write(mmio_dev, gpa, bytes, val);
 		mutex_unlock(&vcpu->kvm->lock);
@@ -2317,9 +2320,10 @@ static void pio_string_write(struct kvm_io_device *pio_dev,
 }
 
 static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
-					       gpa_t addr)
+					       gpa_t addr, int len,
+					       int is_write)
 {
-	return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr);
+	return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
 }
 
 int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
@@ -2351,7 +2355,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
 	kvm_x86_ops->skip_emulated_instruction(vcpu);
 
-	pio_dev = vcpu_find_pio_dev(vcpu, port);
+	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
 	if (pio_dev) {
 		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
 		complete_pio(vcpu);
@@ -2433,7 +2437,9 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 		}
 	}
 
-	pio_dev = vcpu_find_pio_dev(vcpu, port);
+	pio_dev = vcpu_find_pio_dev(vcpu, port,
+				    vcpu->arch.pio.cur_count,
+				    !vcpu->arch.pio.in);
 	if (!vcpu->arch.pio.in) {
 		/* string PIO write */
 		ret = pio_copy_data(vcpu);
-- 
cgit v1.2.3


From 542472b53ea9e0add0ba23976018210191d84754 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <Laurent.Vivier@bull.net>
Date: Fri, 30 May 2008 16:05:55 +0200
Subject: KVM: Add coalesced MMIO support (x86 part)

This patch enables coalesced MMIO for x86 architecture.
It defines KVM_MMIO_PAGE_OFFSET and KVM_CAP_COALESCED_MMIO.
It enables the compilation of coalesced_mmio.c.

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/Makefile | 3 ++-
 arch/x86/kvm/x86.c    | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index c97d35c218d..d0e940bb6f4 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -2,7 +2,8 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
+                coalesced_mmio.o)
 ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ab3f5552d69..d731d4fff1a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -885,6 +885,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_MP_STATE:
 		r = 1;
 		break;
+	case KVM_CAP_COALESCED_MMIO:
+		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
+		break;
 	case KVM_CAP_VAPIC:
 		r = !kvm_x86_ops->cpu_has_accelerated_tpr();
 		break;
-- 
cgit v1.2.3


From 588968b6b7d34e6a88f538d1db9aca47b203623e Mon Sep 17 00:00:00 2001
From: Laurent Vivier <Laurent.Vivier@bull.net>
Date: Fri, 30 May 2008 16:05:56 +0200
Subject: KVM: Add coalesced MMIO support (powerpc part)

This patch enables coalesced MMIO for powerpc architecture.
It defines KVM_MMIO_PAGE_OFFSET and KVM_CAP_COALESCED_MMIO.
It enables the compilation of coalesced_mmio.c.

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/powerpc/kvm/Makefile  | 2 +-
 arch/powerpc/kvm/powerpc.c | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index d0d358d367e..04e3449e1f4 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -4,7 +4,7 @@
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
 kvm-objs := $(common-objs) powerpc.o emulate.o booke_guest.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0513b359851..b850d249702 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -145,6 +145,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_USER_MEMORY:
 		r = 1;
 		break;
+	case KVM_CAP_COALESCED_MMIO:
+		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
+		break;
 	default:
 		r = 0;
 		break;
-- 
cgit v1.2.3


From 7f39f8ac177db258200053074aa7a3d98656b1cf Mon Sep 17 00:00:00 2001
From: Laurent Vivier <Laurent.Vivier@bull.net>
Date: Fri, 30 May 2008 16:05:57 +0200
Subject: KVM: Add coalesced MMIO support (ia64 part)

This patch enables coalesced MMIO for ia64 architecture.
It defines KVM_MMIO_PAGE_OFFSET and KVM_CAP_COALESCED_MMIO.
It enables the compilation of coalesced_mmio.c.

[akpm: fix compile error on ia64]

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/ia64/kvm/Makefile   | 3 ++-
 arch/ia64/kvm/kvm-ia64.c | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 112791dd254..bf22fb9e6dc 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -43,7 +43,8 @@ $(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
+		coalesced_mmio.o)
 
 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index bb58df7cc41..9408b30576d 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -187,6 +187,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 
 		r = 1;
 		break;
+	case KVM_CAP_COALESCED_MMIO:
+		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
+		break;
 	default:
 		r = 0;
 	}
-- 
cgit v1.2.3


From 622395a9e63bf87a16faecf555ed02375cbae5b7 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 11 Jun 2008 19:52:53 -0300
Subject: KVM: only abort guest entry if timer count goes from 0->1

Only abort guest entry if the timer count went from 0->1, since for 1->2
or larger the bit will either be set already or a timer irq will have
been injected.

Using atomic_inc_and_test() for it also introduces an SMP barrier
to the LAPIC version (thought it was unecessary because of timer
migration, but guest can be scheduled to a different pCPU between exit
and kvm_vcpu_block(), so there is the possibility for a race).

Noticed by Avi.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/i8254.c | 11 ++++-------
 arch/x86/kvm/lapic.c |  4 ++--
 2 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 9e3391e9a1b..c0f7872a912 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -198,14 +198,11 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
 	struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
 	struct kvm_kpit_timer *pt = &ps->pit_timer;
 
-	atomic_inc(&pt->pending);
-	smp_mb__after_atomic_inc();
-	if (vcpu0) {
+	if (!atomic_inc_and_test(&pt->pending))
 		set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
-		if (waitqueue_active(&vcpu0->wq)) {
-			vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-			wake_up_interruptible(&vcpu0->wq);
-		}
+	if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
+		vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+		wake_up_interruptible(&vcpu0->wq);
 	}
 
 	pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 180ba7316da..73f43de69f6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -945,8 +945,8 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
 	int result = 0;
 	wait_queue_head_t *q = &apic->vcpu->wq;
 
-	atomic_inc(&apic->timer.pending);
-	set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
+	if(!atomic_inc_and_test(&apic->timer.pending))
+		set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
 	if (waitqueue_active(q)) {
 		apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 		wake_up_interruptible(q);
-- 
cgit v1.2.3


From 25be46080f1a446cb2bda3daadbd22a5682b955e Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Tue, 10 Jun 2008 10:46:53 -0300
Subject: KVM: Do not calculate linear rip in emulation failure report

If we're not gonna do anything (case in which failure is already
reported), we do not need to even bother with calculating the linear rip.

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d731d4fff1a..5d21bb69d88 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2081,11 +2081,11 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
 	unsigned long rip = vcpu->arch.rip;
 	unsigned long rip_linear;
 
-	rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
-
 	if (reported)
 		return;
 
+	rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
+
 	emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
 
 	printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
-- 
cgit v1.2.3


From f76c710d759250a43976bcfcab6af6ebb94b7dc2 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Fri, 13 Jun 2008 22:45:42 +0300
Subject: KVM: Use printk_rlimit() instead of reporting emulation failures just
 once

Emulation failure reports are useful, so allow more than one per the lifetime
of the module.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5d21bb69d88..d1db5aa5c7f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2076,12 +2076,11 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
 
 void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
 {
-	static int reported;
 	u8 opcodes[4];
 	unsigned long rip = vcpu->arch.rip;
 	unsigned long rip_linear;
 
-	if (reported)
+	if (!printk_ratelimit())
 		return;
 
 	rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
@@ -2090,7 +2089,6 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
 
 	printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
 	       context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
-	reported = 1;
 }
 EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
 
-- 
cgit v1.2.3


From b13354f8f092884fa8d79472404de4907b25d579 Mon Sep 17 00:00:00 2001
From: Mohammed Gamal <m.gamal005@gmail.com>
Date: Sun, 15 Jun 2008 19:37:38 +0300
Subject: KVM: x86 emulator: emulate nop and xchg reg, acc (opcodes 0x90 -
 0x97)

Signed-off-by: Mohammed Gamal <m.gamal005@gmail.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index b90857c7656..28082913919 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -140,8 +140,9 @@ static u16 opcode_table[256] = {
 	ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
 	DstMem | SrcReg | ModRM | Mov, ModRM | DstReg,
 	DstReg | SrcMem | ModRM | Mov, Group | Group1A,
-	/* 0x90 - 0x9F */
-	0, 0, 0, 0, 0, 0, 0, 0,
+	/* 0x90 - 0x97 */
+	DstReg, DstReg, DstReg, DstReg,	DstReg, DstReg, DstReg, DstReg,
+	/* 0x98 - 0x9F */
 	0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
 	/* 0xA0 - 0xA7 */
 	ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
@@ -1493,6 +1494,7 @@ special_insn:
 		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
 		break;
 	case 0x86 ... 0x87:	/* xchg */
+	xchg:
 		/* Write back the register source. */
 		switch (c->dst.bytes) {
 		case 1:
@@ -1560,6 +1562,17 @@ special_insn:
 		if (rc != 0)
 			goto done;
 		break;
+	case 0x90: /* nop / xchg r8,rax */
+		if (!(c->rex_prefix & 1)) { /* nop */
+			c->dst.type = OP_NONE;
+			break;
+		}
+	case 0x91 ... 0x97: /* xchg reg,rax */
+		c->src.type = c->dst.type = OP_REG;
+		c->src.bytes = c->dst.bytes = c->op_bytes;
+		c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX];
+		c->src.val = *(c->src.ptr);
+		goto xchg;
 	case 0x9c: /* pushf */
 		c->src.val =  (unsigned long) ctxt->eflags;
 		emulate_push(ctxt);
-- 
cgit v1.2.3


From 8684c0af0b2bab770c257e2a04e1546eed35fa56 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 15 Jun 2008 21:13:41 -0700
Subject: KVM: x86 emulator: handle undecoded rex.b with r/m = 5 in certain
 cases

x86_64 does not decode rex.b in certain cases, where the r/m field = 5.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 28082913919..3721cfddc97 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -750,6 +750,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 
 			switch (base_reg) {
 			case 5:
+			case 13:
 				if (c->modrm_mod != 0)
 					c->modrm_ea += c->regs[base_reg];
 				else
@@ -767,6 +768,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 			}
 			break;
 		case 5:
+		case 13:
 			if (c->modrm_mod != 0)
 				c->modrm_ea += c->regs[c->modrm_rm];
 			else if (ctxt->mode == X86EMUL_MODE_PROT64)
-- 
cgit v1.2.3


From dc71d0f1620790ec8e54101ca37e7b31e31208a8 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 15 Jun 2008 21:23:17 -0700
Subject: KVM: x86 emulator: simplify sib decoding

Instead of using sparse switches, use simpler if/else sequences.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 3721cfddc97..ca7ab2469a4 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -748,24 +748,12 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 			base_reg |= sib & 7;
 			scale = sib >> 6;
 
-			switch (base_reg) {
-			case 5:
-			case 13:
-				if (c->modrm_mod != 0)
-					c->modrm_ea += c->regs[base_reg];
-				else
-					c->modrm_ea +=
-						insn_fetch(s32, 4, c->eip);
-				break;
-			default:
+			if ((base_reg & 7) == 5 && c->modrm_mod == 0)
+				c->modrm_ea += insn_fetch(s32, 4, c->eip);
+			else
 				c->modrm_ea += c->regs[base_reg];
-			}
-			switch (index_reg) {
-			case 4:
-				break;
-			default:
+			if (index_reg != 4)
 				c->modrm_ea += c->regs[index_reg] << scale;
-			}
 			break;
 		case 5:
 		case 13:
-- 
cgit v1.2.3


From 84411d85dacdb6665578608c6a70fc8b819761a8 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 15 Jun 2008 21:53:26 -0700
Subject: KVM: x86 emulator: simplify r/m decoding

Consolidate the duplicated code when not in any special case.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index ca7ab2469a4..c3a823174f3 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -740,9 +740,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 		c->modrm_ea = (u16)c->modrm_ea;
 	} else {
 		/* 32/64-bit ModR/M decode. */
-		switch (c->modrm_rm) {
-		case 4:
-		case 12:
+		if ((c->modrm_rm & 7) == 4) {
 			sib = insn_fetch(u8, 1, c->eip);
 			index_reg |= (sib >> 3) & 7;
 			base_reg |= sib & 7;
@@ -754,18 +752,11 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 				c->modrm_ea += c->regs[base_reg];
 			if (index_reg != 4)
 				c->modrm_ea += c->regs[index_reg] << scale;
-			break;
-		case 5:
-		case 13:
-			if (c->modrm_mod != 0)
-				c->modrm_ea += c->regs[c->modrm_rm];
-			else if (ctxt->mode == X86EMUL_MODE_PROT64)
+		} else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
+			if (ctxt->mode == X86EMUL_MODE_PROT64)
 				rip_relative = 1;
-			break;
-		default:
+		} else
 			c->modrm_ea += c->regs[c->modrm_rm];
-			break;
-		}
 		switch (c->modrm_mod) {
 		case 0:
 			if (c->modrm_rm == 5)
-- 
cgit v1.2.3


From f5b4edcd52e78556800f90d08bfc9126416ac82f Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 15 Jun 2008 22:09:11 -0700
Subject: KVM: x86 emulator: simplify rip relative decoding

rip relative decoding is relative to the instruction pointer of the next
instruction; by moving address adjustment until after decoding is complete,
we remove the need to determine the instruction size.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index c3a823174f3..20b604489c3 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -664,7 +664,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 {
 	struct decode_cache *c = &ctxt->decode;
 	u8 sib;
-	int index_reg = 0, base_reg = 0, scale, rip_relative = 0;
+	int index_reg = 0, base_reg = 0, scale;
 	int rc = 0;
 
 	if (c->rex_prefix) {
@@ -754,7 +754,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 				c->modrm_ea += c->regs[index_reg] << scale;
 		} else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
 			if (ctxt->mode == X86EMUL_MODE_PROT64)
-				rip_relative = 1;
+				c->rip_relative = 1;
 		} else
 			c->modrm_ea += c->regs[c->modrm_rm];
 		switch (c->modrm_mod) {
@@ -770,22 +770,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 			break;
 		}
 	}
-	if (rip_relative) {
-		c->modrm_ea += c->eip;
-		switch (c->d & SrcMask) {
-		case SrcImmByte:
-			c->modrm_ea += 1;
-			break;
-		case SrcImm:
-			if (c->d & ByteOp)
-				c->modrm_ea += 1;
-			else
-				if (c->op_bytes == 8)
-					c->modrm_ea += 4;
-				else
-					c->modrm_ea += c->op_bytes;
-		}
-	}
 done:
 	return rc;
 }
@@ -1044,6 +1028,9 @@ done_prefixes:
 		break;
 	}
 
+	if (c->rip_relative)
+		c->modrm_ea += c->eip;
+
 done:
 	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
 }
-- 
cgit v1.2.3


From 0adc8675d645940139d12477e5e05b8a0a7a1117 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 15 Jun 2008 22:45:54 -0700
Subject: KVM: x86 emulator: avoid segment base adjust for lea

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 20b604489c3..38926b7da64 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -940,7 +940,7 @@ done_prefixes:
 	    c->override_base != &ctxt->gs_base)
 		c->override_base = NULL;
 
-	if (c->override_base)
+	if (c->override_base && !(!c->twobyte && c->b == 0x8d))
 		c->modrm_ea += *c->override_base;
 
 	if (c->ad_bytes != 8)
-- 
cgit v1.2.3


From 7a5b56dfd3a682a51fc84682290d5147872a8e99 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 22 Jun 2008 16:22:51 +0300
Subject: KVM: x86 emulator: lazily evaluate segment registers

Instead of prefetching all segment bases before emulation, read them at the
last moment.  Since most of them are unneeded, we save some cycles on
Intel machines where this is a bit expensive.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86.c         | 21 ----------
 arch/x86/kvm/x86_emulate.c | 96 +++++++++++++++++++++++++++-------------------
 2 files changed, 57 insertions(+), 60 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d1db5aa5c7f..f726ba79fd3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2126,27 +2126,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 			? X86EMUL_MODE_PROT64 :	cs_db
 			? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
 
-		if (vcpu->arch.emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
-			vcpu->arch.emulate_ctxt.cs_base = 0;
-			vcpu->arch.emulate_ctxt.ds_base = 0;
-			vcpu->arch.emulate_ctxt.es_base = 0;
-			vcpu->arch.emulate_ctxt.ss_base = 0;
-		} else {
-			vcpu->arch.emulate_ctxt.cs_base =
-					get_segment_base(vcpu, VCPU_SREG_CS);
-			vcpu->arch.emulate_ctxt.ds_base =
-					get_segment_base(vcpu, VCPU_SREG_DS);
-			vcpu->arch.emulate_ctxt.es_base =
-					get_segment_base(vcpu, VCPU_SREG_ES);
-			vcpu->arch.emulate_ctxt.ss_base =
-					get_segment_base(vcpu, VCPU_SREG_SS);
-		}
-
-		vcpu->arch.emulate_ctxt.gs_base =
-					get_segment_base(vcpu, VCPU_SREG_GS);
-		vcpu->arch.emulate_ctxt.fs_base =
-					get_segment_base(vcpu, VCPU_SREG_FS);
-
 		r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
 
 		/* Reject the instructions other than VMCALL/VMMCALL when
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 38926b7da64..18ca25c2d4a 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -522,6 +522,39 @@ static inline void jmp_rel(struct decode_cache *c, int rel)
 	register_address_increment(c, &c->eip, rel);
 }
 
+static void set_seg_override(struct decode_cache *c, int seg)
+{
+	c->has_seg_override = true;
+	c->seg_override = seg;
+}
+
+static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
+{
+	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
+		return 0;
+
+	return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg);
+}
+
+static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
+				       struct decode_cache *c)
+{
+	if (!c->has_seg_override)
+		return 0;
+
+	return seg_base(ctxt, c->seg_override);
+}
+
+static unsigned long es_base(struct x86_emulate_ctxt *ctxt)
+{
+	return seg_base(ctxt, VCPU_SREG_ES);
+}
+
+static unsigned long ss_base(struct x86_emulate_ctxt *ctxt)
+{
+	return seg_base(ctxt, VCPU_SREG_SS);
+}
+
 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
 			      struct x86_emulate_ops *ops,
 			      unsigned long linear, u8 *dest)
@@ -735,8 +768,8 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 		}
 		if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
 		    (c->modrm_rm == 6 && c->modrm_mod != 0))
-			if (!c->override_base)
-				c->override_base = &ctxt->ss_base;
+			if (!c->has_seg_override)
+				set_seg_override(c, VCPU_SREG_SS);
 		c->modrm_ea = (u16)c->modrm_ea;
 	} else {
 		/* 32/64-bit ModR/M decode. */
@@ -807,6 +840,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
 	memset(c, 0, sizeof(struct decode_cache));
 	c->eip = ctxt->vcpu->arch.rip;
+	ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
 	memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
 
 	switch (mode) {
@@ -845,23 +879,15 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 				/* switch between 2/4 bytes */
 				c->ad_bytes = def_ad_bytes ^ 6;
 			break;
+		case 0x26:	/* ES override */
 		case 0x2e:	/* CS override */
-			c->override_base = &ctxt->cs_base;
-			break;
+		case 0x36:	/* SS override */
 		case 0x3e:	/* DS override */
-			c->override_base = &ctxt->ds_base;
-			break;
-		case 0x26:	/* ES override */
-			c->override_base = &ctxt->es_base;
+			set_seg_override(c, (c->b >> 3) & 3);
 			break;
 		case 0x64:	/* FS override */
-			c->override_base = &ctxt->fs_base;
-			break;
 		case 0x65:	/* GS override */
-			c->override_base = &ctxt->gs_base;
-			break;
-		case 0x36:	/* SS override */
-			c->override_base = &ctxt->ss_base;
+			set_seg_override(c, c->b & 7);
 			break;
 		case 0x40 ... 0x4f: /* REX */
 			if (mode != X86EMUL_MODE_PROT64)
@@ -933,15 +959,11 @@ done_prefixes:
 	if (rc)
 		goto done;
 
-	if (!c->override_base)
-		c->override_base = &ctxt->ds_base;
-	if (mode == X86EMUL_MODE_PROT64 &&
-	    c->override_base != &ctxt->fs_base &&
-	    c->override_base != &ctxt->gs_base)
-		c->override_base = NULL;
+	if (!c->has_seg_override)
+		set_seg_override(c, VCPU_SREG_DS);
 
-	if (c->override_base && !(!c->twobyte && c->b == 0x8d))
-		c->modrm_ea += *c->override_base;
+	if (!(!c->twobyte && c->b == 0x8d))
+		c->modrm_ea += seg_override_base(ctxt, c);
 
 	if (c->ad_bytes != 8)
 		c->modrm_ea = (u32)c->modrm_ea;
@@ -1043,7 +1065,7 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
 	c->dst.bytes = c->op_bytes;
 	c->dst.val = c->src.val;
 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
-	c->dst.ptr = (void *) register_address(c, ctxt->ss_base,
+	c->dst.ptr = (void *) register_address(c, ss_base(ctxt),
 					       c->regs[VCPU_REGS_RSP]);
 }
 
@@ -1053,7 +1075,7 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
 	struct decode_cache *c = &ctxt->decode;
 	int rc;
 
-	rc = ops->read_std(register_address(c, ctxt->ss_base,
+	rc = ops->read_std(register_address(c, ss_base(ctxt),
 					    c->regs[VCPU_REGS_RSP]),
 			   &c->dst.val, c->dst.bytes, ctxt->vcpu);
 	if (rc != 0)
@@ -1375,11 +1397,11 @@ special_insn:
 		register_address_increment(c, &c->regs[VCPU_REGS_RSP],
 					   -c->op_bytes);
 		c->dst.ptr = (void *) register_address(
-			c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]);
+			c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]);
 		break;
 	case 0x58 ... 0x5f: /* pop reg */
 	pop_instruction:
-		if ((rc = ops->read_std(register_address(c, ctxt->ss_base,
+		if ((rc = ops->read_std(register_address(c, ss_base(ctxt),
 			c->regs[VCPU_REGS_RSP]), c->dst.ptr,
 			c->op_bytes, ctxt->vcpu)) != 0)
 			goto done;
@@ -1405,7 +1427,7 @@ special_insn:
 				c->rep_prefix ?
 				address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
 				(ctxt->eflags & EFLG_DF),
-				register_address(c, ctxt->es_base,
+				register_address(c, es_base(ctxt),
 						 c->regs[VCPU_REGS_RDI]),
 				c->rep_prefix,
 				c->regs[VCPU_REGS_RDX]) == 0) {
@@ -1421,9 +1443,8 @@ special_insn:
 				c->rep_prefix ?
 				address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
 				(ctxt->eflags & EFLG_DF),
-				register_address(c, c->override_base ?
-							*c->override_base :
-							ctxt->ds_base,
+					 register_address(c,
+					  seg_override_base(ctxt, c),
 						 c->regs[VCPU_REGS_RSI]),
 				c->rep_prefix,
 				c->regs[VCPU_REGS_RDX]) == 0) {
@@ -1559,11 +1580,10 @@ special_insn:
 		c->dst.type = OP_MEM;
 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
 		c->dst.ptr = (unsigned long *)register_address(c,
-						   ctxt->es_base,
+						   es_base(ctxt),
 						   c->regs[VCPU_REGS_RDI]);
 		if ((rc = ops->read_emulated(register_address(c,
-		      c->override_base ? *c->override_base :
-					ctxt->ds_base,
+					   seg_override_base(ctxt, c),
 					c->regs[VCPU_REGS_RSI]),
 					&c->dst.val,
 					c->dst.bytes, ctxt->vcpu)) != 0)
@@ -1579,8 +1599,7 @@ special_insn:
 		c->src.type = OP_NONE; /* Disable writeback. */
 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
 		c->src.ptr = (unsigned long *)register_address(c,
-				c->override_base ? *c->override_base :
-						   ctxt->ds_base,
+				       seg_override_base(ctxt, c),
 						   c->regs[VCPU_REGS_RSI]);
 		if ((rc = ops->read_emulated((unsigned long)c->src.ptr,
 						&c->src.val,
@@ -1591,7 +1610,7 @@ special_insn:
 		c->dst.type = OP_NONE; /* Disable writeback. */
 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
 		c->dst.ptr = (unsigned long *)register_address(c,
-						   ctxt->es_base,
+						   es_base(ctxt),
 						   c->regs[VCPU_REGS_RDI]);
 		if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
 						&c->dst.val,
@@ -1615,7 +1634,7 @@ special_insn:
 		c->dst.type = OP_MEM;
 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
 		c->dst.ptr = (unsigned long *)register_address(c,
-						   ctxt->es_base,
+						   es_base(ctxt),
 						   c->regs[VCPU_REGS_RDI]);
 		c->dst.val = c->regs[VCPU_REGS_RAX];
 		register_address_increment(c, &c->regs[VCPU_REGS_RDI],
@@ -1627,8 +1646,7 @@ special_insn:
 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
 		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
 		if ((rc = ops->read_emulated(register_address(c,
-				c->override_base ? *c->override_base :
-						   ctxt->ds_base,
+						 seg_override_base(ctxt, c),
 						 c->regs[VCPU_REGS_RSI]),
 						 &c->dst.val,
 						 c->dst.bytes,
-- 
cgit v1.2.3


From 6ada8cca79cb971f5da7d1756f4f9292e3ef1e03 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 22 Jun 2008 16:45:24 +0300
Subject: KVM: MMU: When debug is enabled, make it a run-time parameter

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5ebb2788bd7..5994645dcee 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -66,7 +66,8 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
 #endif
 
 #if defined(MMU_DEBUG) || defined(AUDIT)
-static int dbg = 1;
+static int dbg = 0;
+module_param(dbg, bool, 0644);
 #endif
 
 #ifndef MMU_DEBUG
-- 
cgit v1.2.3


From db475c39eca0f2e44953d96e768d7ce808ab85bd Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 22 Jun 2008 16:46:22 +0300
Subject: KVM: MMU: Fix printk format

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5994645dcee..1fd8e3b58cc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1116,7 +1116,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		mark_page_dirty(vcpu->kvm, gfn);
 
 	pgprintk("%s: setting spte %llx\n", __func__, spte);
-	pgprintk("instantiating %s PTE (%s) at %d (%llx) addr %llx\n",
+	pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n",
 		 (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB",
 		 (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte);
 	set_shadow_pte(shadow_pte, spte);
-- 
cgit v1.2.3


From 65267ea1b3e768dc54b63cd7fad520d89c27d350 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng.yang@intel.com>
Date: Wed, 18 Jun 2008 14:43:38 +0800
Subject: KVM: VMX: Fix a wrong usage of vmcs_config

The function ept_update_paging_mode_cr0() write to
CPU_BASED_VM_EXEC_CONTROL based on vmcs_config.cpu_based_exec_ctrl. That's
wrong because the variable may not consistent with the content in the
CPU_BASE_VM_EXEC_CONTROL MSR.

Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/vmx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1bb99465720..6a3a4038f3b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1441,7 +1441,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
 	if (!(cr0 & X86_CR0_PG)) {
 		/* From paging/starting to nonpaging */
 		vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
-			     vmcs_config.cpu_based_exec_ctrl |
+			     vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
 			     (CPU_BASED_CR3_LOAD_EXITING |
 			      CPU_BASED_CR3_STORE_EXITING));
 		vcpu->arch.cr0 = cr0;
@@ -1451,7 +1451,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
 	} else if (!is_paging(vcpu)) {
 		/* From nonpaging to paging */
 		vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
-			     vmcs_config.cpu_based_exec_ctrl &
+			     vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
 			     ~(CPU_BASED_CR3_LOAD_EXITING |
 			       CPU_BASED_CR3_STORE_EXITING));
 		vcpu->arch.cr0 = cr0;
-- 
cgit v1.2.3


From efa67e0d1f51842393606034051d805ab9948abd Mon Sep 17 00:00:00 2001
From: Chris Lalancette <clalance@redhat.com>
Date: Fri, 20 Jun 2008 09:51:30 +0200
Subject: KVM: VMX: Fake emulate Intel perfctr MSRs

Older linux guests (in this case, 2.6.9) can attempt to
access the performance counter MSRs without a fixup section, and injecting
a GPF kills the guest.  Work around by allowing the guest to write those MSRs.

Tested by me on RHEL-4 i386 and x86_64 guests, as well as F-9 guests.

Signed-off-by: Chris Lalancette <clalance@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/vmx.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6a3a4038f3b..d493a97e788 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -920,6 +920,18 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 		break;
 	case MSR_IA32_TIME_STAMP_COUNTER:
 		guest_write_tsc(data);
+		break;
+	case MSR_P6_PERFCTR0:
+	case MSR_P6_PERFCTR1:
+	case MSR_P6_EVNTSEL0:
+	case MSR_P6_EVNTSEL1:
+		/*
+		 * Just discard all writes to the performance counters; this
+		 * should keep both older linux and windows 64-bit guests
+		 * happy
+		 */
+		pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
+
 		break;
 	default:
 		vmx_load_host_state(vmx);
-- 
cgit v1.2.3


From f8b78fa3d406f3a2dc038e2b47749013a9295994 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 23 Jun 2008 12:04:25 -0300
Subject: KVM: move slots_lock acquision down to vapic_exit

There is no need to grab slots_lock if the vapic_page will not
be touched.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f726ba79fd3..55906e4c467 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2787,8 +2787,10 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
 	if (!apic || !apic->vapic_addr)
 		return;
 
+	down_read(&vcpu->kvm->slots_lock);
 	kvm_release_page_dirty(apic->vapic_page);
 	mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
+	up_read(&vcpu->kvm->slots_lock);
 }
 
 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -2944,9 +2946,7 @@ out:
 
 	post_kvm_run_save(vcpu, kvm_run);
 
-	down_read(&vcpu->kvm->slots_lock);
 	vapic_exit(vcpu);
-	up_read(&vcpu->kvm->slots_lock);
 
 	return r;
 }
-- 
cgit v1.2.3


From dfdded7c41e5b68c79a9f8a942d41f56bc265ba4 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Fri, 27 Jun 2008 15:05:34 +0200
Subject: KVM: Fix memory leak on guest exit

This patch fixes a memory leak, we want to free the physmem when destroying
the vm.

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/s390/kvm/kvm-s390.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4585c8ac2b0..b802ce6f675 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -194,6 +194,7 @@ out_nokvm:
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	debug_unregister(kvm->arch.dbf);
+	kvm_free_physmem(kvm);
 	free_page((unsigned long)(kvm->arch.sca));
 	kfree(kvm);
 	module_put(THIS_MODULE);
-- 
cgit v1.2.3


From 4da29e909ea8087de09e27476f91f51a070cabe8 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 27 Jun 2008 15:05:38 +0200
Subject: KVM: s390: Set guest storage limit and offset to sane values

Some machines do not accept 16EB as guest storage limit. Lets change the
default for the guest storage limit to a sane value. We also should set
the guest_origin to what userspace thinks it is. This allows guests
starting at an address != 0.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/s390/kvm/kvm-s390.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b802ce6f675..cdab57c5bc7 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -247,11 +247,16 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.sie_block->gbea = 1;
 }
 
+/* The current code can have up to 256 pages for virtio */
+#define VIRTIODESCSPACE (256ul * 4096ul)
+
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
-	vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
-	vcpu->arch.sie_block->gmsor = 0x000000000000;
+	vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize +
+				      vcpu->kvm->arch.guest_origin +
+				      VIRTIODESCSPACE - 1ul;
+	vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
 	vcpu->arch.sie_block->ecb   = 2;
 	vcpu->arch.sie_block->eca   = 0xC1002001U;
 	setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
-- 
cgit v1.2.3


From 180c12fb22bd17c7187ae1bce023d24a42b2980c Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 27 Jun 2008 15:05:40 +0200
Subject: KVM: s390: rename private structures

While doing some tests with our lcrash implementation I have seen a
naming conflict with prefix_info in kvm_host.h vs. addrconf.h

To avoid future conflicts lets rename private definitions in
asm/kvm_host.h by adding the kvm_s390 prefix.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/s390/kvm/interrupt.c | 32 ++++++++++++++++----------------
 arch/s390/kvm/kvm-s390.c  |  3 ++-
 arch/s390/kvm/priv.c      |  2 +-
 arch/s390/kvm/sigp.c      | 20 ++++++++++----------
 4 files changed, 29 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 84a7fed4cd4..11230b0db95 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -31,7 +31,7 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
 }
 
 static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
-				      struct interrupt_info *inti)
+				      struct kvm_s390_interrupt_info *inti)
 {
 	switch (inti->type) {
 	case KVM_S390_INT_EMERGENCY:
@@ -91,7 +91,7 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
 }
 
 static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
-				      struct interrupt_info *inti)
+				      struct kvm_s390_interrupt_info *inti)
 {
 	switch (inti->type) {
 	case KVM_S390_INT_EMERGENCY:
@@ -111,7 +111,7 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
 }
 
 static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
-				   struct interrupt_info *inti)
+				   struct kvm_s390_interrupt_info *inti)
 {
 	const unsigned short table[] = { 2, 4, 4, 6 };
 	int rc, exception = 0;
@@ -290,9 +290,9 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 {
-	struct local_interrupt *li = &vcpu->arch.local_int;
-	struct float_interrupt *fi = vcpu->arch.local_int.float_int;
-	struct interrupt_info  *inti;
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
+	struct kvm_s390_interrupt_info  *inti;
 	int rc = 0;
 
 	if (atomic_read(&li->active)) {
@@ -408,9 +408,9 @@ void kvm_s390_idle_wakeup(unsigned long data)
 
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
-	struct local_interrupt *li = &vcpu->arch.local_int;
-	struct float_interrupt *fi = vcpu->arch.local_int.float_int;
-	struct interrupt_info  *n, *inti = NULL;
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
+	struct kvm_s390_interrupt_info  *n, *inti = NULL;
 	int deliver;
 
 	__reset_intercept_indicators(vcpu);
@@ -465,8 +465,8 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 
 int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 {
-	struct local_interrupt *li = &vcpu->arch.local_int;
-	struct interrupt_info *inti;
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_interrupt_info *inti;
 
 	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
 	if (!inti)
@@ -487,9 +487,9 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 int kvm_s390_inject_vm(struct kvm *kvm,
 		       struct kvm_s390_interrupt *s390int)
 {
-	struct local_interrupt *li;
-	struct float_interrupt *fi;
-	struct interrupt_info *inti;
+	struct kvm_s390_local_interrupt *li;
+	struct kvm_s390_float_interrupt *fi;
+	struct kvm_s390_interrupt_info *inti;
 	int sigcpu;
 
 	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
@@ -544,8 +544,8 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 			 struct kvm_s390_interrupt *s390int)
 {
-	struct local_interrupt *li;
-	struct interrupt_info *inti;
+	struct kvm_s390_local_interrupt *li;
+	struct kvm_s390_interrupt_info *inti;
 
 	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
 	if (!inti)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index cdab57c5bc7..399acf3f64d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -275,7 +275,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	if (!vcpu)
 		goto out_nomem;
 
-	vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL);
+	vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
+					get_zeroed_page(GFP_KERNEL);
 
 	if (!vcpu->arch.sie_block)
 		goto out_free_cpu;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index c02286c6a93..2e2d2ffb6a0 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -199,7 +199,7 @@ out:
 
 static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
 {
-	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
 	int cpus = 0;
 	int n;
 
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 0a236acfb5f..5a556114eaa 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -45,7 +45,7 @@
 
 static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg)
 {
-	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
 	int rc;
 
 	if (cpu_addr >= KVM_MAX_VCPUS)
@@ -71,9 +71,9 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg)
 
 static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
 {
-	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
-	struct local_interrupt *li;
-	struct interrupt_info *inti;
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_local_interrupt *li;
+	struct kvm_s390_interrupt_info *inti;
 	int rc;
 
 	if (cpu_addr >= KVM_MAX_VCPUS)
@@ -108,9 +108,9 @@ unlock:
 
 static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
 {
-	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
-	struct local_interrupt *li;
-	struct interrupt_info *inti;
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_local_interrupt *li;
+	struct kvm_s390_interrupt_info *inti;
 	int rc;
 
 	if (cpu_addr >= KVM_MAX_VCPUS)
@@ -169,9 +169,9 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
 static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
 			     u64 *reg)
 {
-	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
-	struct local_interrupt *li;
-	struct interrupt_info *inti;
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_local_interrupt *li;
+	struct kvm_s390_interrupt_info *inti;
 	int rc;
 	u8 tmp;
 
-- 
cgit v1.2.3


From 0da1db75a2feca54564add30828bab658982481c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 2 Jul 2008 16:02:11 +0200
Subject: KVM: SVM: fix suspend/resume support

On suspend the svm_hardware_disable function is called which frees all svm_data
variables. On resume they are not re-allocated. This patch removes the
deallocation of svm_data from the hardware_disable function to the
hardware_unsetup function which is not called on suspend.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/svm.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 238e8f3afaf..858e2970223 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -272,19 +272,11 @@ static int has_svm(void)
 
 static void svm_hardware_disable(void *garbage)
 {
-	struct svm_cpu_data *svm_data
-		= per_cpu(svm_data, raw_smp_processor_id());
-
-	if (svm_data) {
-		uint64_t efer;
+	uint64_t efer;
 
-		wrmsrl(MSR_VM_HSAVE_PA, 0);
-		rdmsrl(MSR_EFER, efer);
-		wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
-		per_cpu(svm_data, raw_smp_processor_id()) = NULL;
-		__free_page(svm_data->save_area);
-		kfree(svm_data);
-	}
+	wrmsrl(MSR_VM_HSAVE_PA, 0);
+	rdmsrl(MSR_EFER, efer);
+	wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
 }
 
 static void svm_hardware_enable(void *garbage)
@@ -323,6 +315,19 @@ static void svm_hardware_enable(void *garbage)
 	       page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
 }
 
+static void svm_cpu_uninit(int cpu)
+{
+	struct svm_cpu_data *svm_data
+		= per_cpu(svm_data, raw_smp_processor_id());
+
+	if (!svm_data)
+		return;
+
+	per_cpu(svm_data, raw_smp_processor_id()) = NULL;
+	__free_page(svm_data->save_area);
+	kfree(svm_data);
+}
+
 static int svm_cpu_init(int cpu)
 {
 	struct svm_cpu_data *svm_data;
@@ -460,6 +465,11 @@ err:
 
 static __exit void svm_hardware_unsetup(void)
 {
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		svm_cpu_uninit(cpu);
+
 	__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
 	iopm_base = 0;
 }
-- 
cgit v1.2.3


From 7e37c2998a5a0b00134f6227167694b710f57ac0 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 1 Jul 2008 01:19:19 +0300
Subject: x86: KVM guest: make kvm_smp_prepare_boot_cpu() static

This patch makes the needlessly global kvm_smp_prepare_boot_cpu() static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kernel/kvmclock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 87edf1ceb1d..d02def06ca9 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -113,7 +113,7 @@ static void kvm_setup_secondary_clock(void)
 #endif
 
 #ifdef CONFIG_SMP
-void __init kvm_smp_prepare_boot_cpu(void)
+static void __init kvm_smp_prepare_boot_cpu(void)
 {
 	WARN_ON(kvm_register_clock("primary cpu clock"));
 	native_smp_prepare_boot_cpu();
-- 
cgit v1.2.3


From 5a4c92880493945678315a6df810f7a21f55b985 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 3 Jul 2008 18:33:02 -0300
Subject: KVM: mmu_shrink: kvm_mmu_zap_page requires slots_lock to be held

kvm_mmu_zap_page() needs slots lock held (rmap_remove->gfn_to_memslot,
for example).

Since kvm_lock spinlock is held in mmu_shrink(), do a non-blocking
down_read_trylock().

Untested.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1fd8e3b58cc..ff7cf632175 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1987,6 +1987,8 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
 	list_for_each_entry(kvm, &vm_list, vm_list) {
 		int npages;
 
+		if (!down_read_trylock(&kvm->slots_lock))
+			continue;
 		spin_lock(&kvm->mmu_lock);
 		npages = kvm->arch.n_alloc_mmu_pages -
 			 kvm->arch.n_free_mmu_pages;
@@ -1999,6 +2001,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
 		nr_to_scan--;
 
 		spin_unlock(&kvm->mmu_lock);
+		up_read(&kvm->slots_lock);
 	}
 	if (kvm_freed)
 		list_move_tail(&kvm_freed->vm_list, &vm_list);
-- 
cgit v1.2.3


From 4e1096d27f3d095735c1c69c7b0a26a06a0d454e Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng.yang@intel.com>
Date: Sun, 6 Jul 2008 19:16:51 +0800
Subject: KVM: VMX: Add ept_sync_context in flush_tlb

Fix a potention issue caused by kvm_mmu_slot_remove_write_access(). The
old behavior don't sync EPT TLB with modified EPT entry, which result
in inconsistent content of EPT TLB and EPT table.

Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/vmx.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d493a97e788..fff3b490976 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -91,6 +91,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 }
 
 static int init_rmode(struct kvm *kvm);
+static u64 construct_eptp(unsigned long root_hpa);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -1422,6 +1423,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
 static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
 {
 	vpid_sync_vcpu_all(to_vmx(vcpu));
+	if (vm_need_ept())
+		ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
 }
 
 static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From ac9f6dc0db0b5582ebf8bb720d7c41c3d2159013 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 6 Jul 2008 15:48:31 +0300
Subject: KVM: Apply the kernel sigmask to vcpus blocked due to being
 uninitialized

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 55906e4c467..89fc8565ede 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2958,15 +2958,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	vcpu_load(vcpu);
 
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
 		kvm_vcpu_block(vcpu);
-		vcpu_put(vcpu);
-		return -EAGAIN;
+		r = -EAGAIN;
+		goto out;
 	}
 
-	if (vcpu->sigset_active)
-		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
-
 	/* re-sync apic's tpr */
 	if (!irqchip_in_kernel(vcpu->kvm))
 		kvm_set_cr8(vcpu, kvm_run->cr8);
-- 
cgit v1.2.3


From 19fdfa0d133ae216e9d1c69a8333fe63fcf8e584 Mon Sep 17 00:00:00 2001
From: Mohammed Gamal <m.gamal005@gmail.com>
Date: Sun, 6 Jul 2008 16:51:26 +0300
Subject: KVM: x86 emulator: Fix HLT instruction

This patch fixes issue encountered with HLT instruction
under FreeDOS's HIMEM XMS Driver.

The HLT instruction jumped directly to the done label and
skips updating the EIP value, therefore causing the guest
to spin endlessly on the same instruction.

The patch changes the instruction so that it writes back
the updated EIP value.

Signed-off-by: Mohammed Gamal <m.gamal005@gmail.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 18ca25c2d4a..8bc63f62fbb 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1731,7 +1731,7 @@ special_insn:
 		break;
 	case 0xf4:              /* hlt */
 		ctxt->vcpu->arch.halt_request = 1;
-		goto done;
+		break;
 	case 0xf5:	/* cmc */
 		/* complement carry flag from eflags reg */
 		ctxt->eflags ^= EFLG_CF;
-- 
cgit v1.2.3


From c65bbfa1d693d375da51f9c8aa9fb26f09fa19ed Mon Sep 17 00:00:00 2001
From: Ben-Ami Yassour <benami@il.ibm.com>
Date: Sun, 6 Jul 2008 17:15:07 +0300
Subject: KVM: check injected pic irq within valid pic irqs

Check that an injected pic irq is between 0 and 15.

Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/i8259.c | 6 ++++--
 arch/x86/kvm/irq.h   | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 5857f59ad4a..c31164e8aa4 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -130,8 +130,10 @@ void kvm_pic_set_irq(void *opaque, int irq, int level)
 {
 	struct kvm_pic *s = opaque;
 
-	pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
-	pic_update_irq(s);
+	if (irq >= 0 && irq < PIC_NUM_PINS) {
+		pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
+		pic_update_irq(s);
+	}
 }
 
 /*
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2a15be2275c..7ca47cbb48b 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -30,6 +30,8 @@
 #include "ioapic.h"
 #include "lapic.h"
 
+#define PIC_NUM_PINS 16
+
 struct kvm;
 struct kvm_vcpu;
 
-- 
cgit v1.2.3


From d6e88aec07aa8f6c7e4024f5734ec659fd7c5a40 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Thu, 10 Jul 2008 16:53:33 +0300
Subject: KVM: Prefix some x86 low level function with kvm_, to avoid namespace
 issues

Fixes compilation with CONFIG_VMI enabled.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/svm.c | 12 ++++++------
 arch/x86/kvm/vmx.c | 24 ++++++++++++------------
 arch/x86/kvm/x86.c | 18 +++++++++---------
 3 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 858e2970223..b756e876dce 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1710,9 +1710,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	sync_lapic_to_cr8(vcpu);
 
 	save_host_msrs(vcpu);
-	fs_selector = read_fs();
-	gs_selector = read_gs();
-	ldt_selector = read_ldt();
+	fs_selector = kvm_read_fs();
+	gs_selector = kvm_read_gs();
+	ldt_selector = kvm_read_ldt();
 	svm->host_cr2 = kvm_read_cr2();
 	svm->host_dr6 = read_dr6();
 	svm->host_dr7 = read_dr7();
@@ -1845,9 +1845,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	write_dr7(svm->host_dr7);
 	kvm_write_cr2(svm->host_cr2);
 
-	load_fs(fs_selector);
-	load_gs(gs_selector);
-	load_ldt(ldt_selector);
+	kvm_load_fs(fs_selector);
+	kvm_load_gs(gs_selector);
+	kvm_load_ldt(ldt_selector);
 	load_host_msrs(vcpu);
 
 	reload_tss(vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fff3b490976..0cac6370171 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -484,7 +484,7 @@ static void reload_tss(void)
 	struct descriptor_table gdt;
 	struct desc_struct *descs;
 
-	get_gdt(&gdt);
+	kvm_get_gdt(&gdt);
 	descs = (void *)gdt.base;
 	descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
 	load_TR_desc();
@@ -540,9 +540,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 	 * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
 	 * allow segment selectors with cpl > 0 or ti == 1.
 	 */
-	vmx->host_state.ldt_sel = read_ldt();
+	vmx->host_state.ldt_sel = kvm_read_ldt();
 	vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
-	vmx->host_state.fs_sel = read_fs();
+	vmx->host_state.fs_sel = kvm_read_fs();
 	if (!(vmx->host_state.fs_sel & 7)) {
 		vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
 		vmx->host_state.fs_reload_needed = 0;
@@ -550,7 +550,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 		vmcs_write16(HOST_FS_SELECTOR, 0);
 		vmx->host_state.fs_reload_needed = 1;
 	}
-	vmx->host_state.gs_sel = read_gs();
+	vmx->host_state.gs_sel = kvm_read_gs();
 	if (!(vmx->host_state.gs_sel & 7))
 		vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
 	else {
@@ -586,15 +586,15 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 	++vmx->vcpu.stat.host_state_reload;
 	vmx->host_state.loaded = 0;
 	if (vmx->host_state.fs_reload_needed)
-		load_fs(vmx->host_state.fs_sel);
+		kvm_load_fs(vmx->host_state.fs_sel);
 	if (vmx->host_state.gs_ldt_reload_needed) {
-		load_ldt(vmx->host_state.ldt_sel);
+		kvm_load_ldt(vmx->host_state.ldt_sel);
 		/*
 		 * If we have to reload gs, we must take care to
 		 * preserve our gs base.
 		 */
 		local_irq_save(flags);
-		load_gs(vmx->host_state.gs_sel);
+		kvm_load_gs(vmx->host_state.gs_sel);
 #ifdef CONFIG_X86_64
 		wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
 #endif
@@ -654,8 +654,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		 * Linux uses per-cpu TSS and GDT, so set these when switching
 		 * processors.
 		 */
-		vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */
-		get_gdt(&dt);
+		vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
+		kvm_get_gdt(&dt);
 		vmcs_writel(HOST_GDTR_BASE, dt.base);   /* 22.2.4 */
 
 		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
@@ -1943,8 +1943,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
 	vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
 	vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
-	vmcs_write16(HOST_FS_SELECTOR, read_fs());    /* 22.2.4 */
-	vmcs_write16(HOST_GS_SELECTOR, read_gs());    /* 22.2.4 */
+	vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs());    /* 22.2.4 */
+	vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs());    /* 22.2.4 */
 	vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
 #ifdef CONFIG_X86_64
 	rdmsrl(MSR_FS_BASE, a);
@@ -1958,7 +1958,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 	vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  /* 22.2.4 */
 
-	get_idt(&dt);
+	kvm_get_idt(&dt);
 	vmcs_writel(HOST_IDTR_BASE, dt.base);   /* 22.2.4 */
 
 	asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 89fc8565ede..b131f3c0cf6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3767,14 +3767,14 @@ void fx_init(struct kvm_vcpu *vcpu)
 	 * allocate ram with GFP_KERNEL.
 	 */
 	if (!used_math())
-		fx_save(&vcpu->arch.host_fx_image);
+		kvm_fx_save(&vcpu->arch.host_fx_image);
 
 	/* Initialize guest FPU by resetting ours and saving into guest's */
 	preempt_disable();
-	fx_save(&vcpu->arch.host_fx_image);
-	fx_finit();
-	fx_save(&vcpu->arch.guest_fx_image);
-	fx_restore(&vcpu->arch.host_fx_image);
+	kvm_fx_save(&vcpu->arch.host_fx_image);
+	kvm_fx_finit();
+	kvm_fx_save(&vcpu->arch.guest_fx_image);
+	kvm_fx_restore(&vcpu->arch.host_fx_image);
 	preempt_enable();
 
 	vcpu->arch.cr0 |= X86_CR0_ET;
@@ -3791,8 +3791,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 		return;
 
 	vcpu->guest_fpu_loaded = 1;
-	fx_save(&vcpu->arch.host_fx_image);
-	fx_restore(&vcpu->arch.guest_fx_image);
+	kvm_fx_save(&vcpu->arch.host_fx_image);
+	kvm_fx_restore(&vcpu->arch.guest_fx_image);
 }
 EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
 
@@ -3802,8 +3802,8 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 		return;
 
 	vcpu->guest_fpu_loaded = 0;
-	fx_save(&vcpu->arch.guest_fx_image);
-	fx_restore(&vcpu->arch.host_fx_image);
+	kvm_fx_save(&vcpu->arch.guest_fx_image);
+	kvm_fx_restore(&vcpu->arch.host_fx_image);
 	++vcpu->stat.fpu_reload;
 }
 EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
-- 
cgit v1.2.3


From 34d4cb8fca1f2a31be152b74797e6cd160ec9de6 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 10 Jul 2008 20:49:31 -0300
Subject: KVM: MMU: nuke shadowed pgtable pages and ptes on memslot destruction

Flush the shadow mmu before removing regions to avoid stale entries.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/ia64/kvm/kvm-ia64.c   | 3 +++
 arch/powerpc/kvm/powerpc.c | 4 ++++
 arch/s390/kvm/kvm-s390.c   | 4 ++++
 arch/x86/kvm/x86.c         | 5 +++++
 4 files changed, 16 insertions(+)

(limited to 'arch')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 9408b30576d..2672f4d278a 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1455,6 +1455,9 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	return 0;
 }
 
+void kvm_arch_flush_shadow(struct kvm *kvm)
+{
+}
 
 long kvm_arch_dev_ioctl(struct file *filp,
 		unsigned int ioctl, unsigned long arg)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index b850d249702..53826a5f6c0 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -170,6 +170,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	return 0;
 }
 
+void kvm_arch_flush_shadow(struct kvm *kvm)
+{
+}
+
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvm_vcpu *vcpu;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 399acf3f64d..1782cbcd282 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -675,6 +675,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	return 0;
 }
 
+void kvm_arch_flush_shadow(struct kvm *kvm)
+{
+}
+
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	return gfn;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b131f3c0cf6..9f1cdb011cf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4032,6 +4032,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	return 0;
 }
 
+void kvm_arch_flush_shadow(struct kvm *kvm)
+{
+	kvm_mmu_zap_all(kvm);
+}
+
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
-- 
cgit v1.2.3


From 376c53c2b30d4a1955240f59f4ecd959aa118f92 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 10 Jul 2008 20:54:29 -0300
Subject: KVM: MMU: improve invalid shadow root page handling

Harden kvm_mmu_zap_page() against invalid root pages that
had been shadowed from memslots that are gone.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ff7cf632175..7f57da66382 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -930,14 +930,17 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 	}
 	kvm_mmu_page_unlink_children(kvm, sp);
 	if (!sp->root_count) {
-		if (!sp->role.metaphysical)
+		if (!sp->role.metaphysical && !sp->role.invalid)
 			unaccount_shadowed(kvm, sp->gfn);
 		hlist_del(&sp->hash_link);
 		kvm_mmu_free_page(kvm, sp);
 	} else {
+		int invalid = sp->role.invalid;
 		list_move(&sp->link, &kvm->arch.active_mmu_pages);
 		sp->role.invalid = 1;
 		kvm_reload_remote_mmus(kvm);
+		if (!sp->role.metaphysical && !invalid)
+			unaccount_shadowed(kvm, sp->gfn);
 	}
 	kvm_mmu_reset_last_pte_updated(kvm);
 }
-- 
cgit v1.2.3


From 2a7c5b8b550b1fb1db9eb490420132e637f5dcb4 Mon Sep 17 00:00:00 2001
From: Glauber Costa <gcosta@redhat.com>
Date: Thu, 10 Jul 2008 17:08:15 -0300
Subject: KVM: x86 emulator: emulate clflush

If the guest issues a clflush in a mmio address, the instruction
can trap into the hypervisor. Currently, we do not decode clflush
properly, causing the guest to hang. This patch fixes this emulating
clflush (opcode 0f ae).

Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 8bc63f62fbb..f2f90468f8b 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -219,7 +219,7 @@ static u16 twobyte_table[256] = {
 	/* 0xA0 - 0xA7 */
 	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
 	/* 0xA8 - 0xAF */
-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
+	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0,
 	/* 0xB0 - 0xB7 */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
 	    DstMem | SrcReg | ModRM | BitOp,
@@ -1947,6 +1947,8 @@ twobyte_insn:
 		c->src.val &= (c->dst.bytes << 3) - 1;
 		emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
 		break;
+	case 0xae:              /* clflush */
+		break;
 	case 0xb0 ... 0xb1:	/* cmpxchg */
 		/*
 		 * Save real source value, then compare EAX against
-- 
cgit v1.2.3


From 722c05f2192070bac0208b2c16ce13929b32d92f Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 13 Jul 2008 11:33:54 +0300
Subject: KVM: MMU: Fix potential race setting upper shadow ptes on nonpae
 hosts

The direct mapped shadow code (used for real mode and two dimensional paging)
sets upper-level ptes using direct assignment rather than calling
set_shadow_pte().  A nonpae host will split this into two writes, which opens
up a race if another vcpu accesses the same memory area.

Fix by calling set_shadow_pte() instead of assigning directly.

Noticed by Izik Eidus.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7f57da66382..b0e4ddca6c1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1189,9 +1189,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 				return -ENOMEM;
 			}
 
-			table[index] = __pa(new_table->spt)
-				| PT_PRESENT_MASK | PT_WRITABLE_MASK
-				| shadow_user_mask | shadow_x_mask;
+			set_shadow_pte(&table[index],
+				       __pa(new_table->spt)
+				       | PT_PRESENT_MASK | PT_WRITABLE_MASK
+				       | shadow_user_mask | shadow_x_mask);
 		}
 		table_addr = table[index] & PT64_BASE_ADDR_MASK;
 	}
-- 
cgit v1.2.3


From 3450004a8cec8bab246372a1cabb9c2483b1e6c3 Mon Sep 17 00:00:00 2001
From: Dmitri Vorobiev <dmitri.vorobiev@movial.fi>
Date: Tue, 15 Jul 2008 19:57:30 +0300
Subject: [MIPS] PCI: Make the pcibios_max_latency variable static

The pcibios_max_latency variable is needlessly defined global, and this
patch makes it static.

Build-tested using malta_defconfig.

Signed-off-by: Dmitri Vorobiev <dmitri.vorobiev@movial.fi>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/pci/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index d7d6cb063d2..77bd5b68dc4 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -204,7 +204,7 @@ static int pcibios_enable_resources(struct pci_dev *dev, int mask)
  *  If we set up a device for bus mastering, we need to check the latency
  *  timer as certain crappy BIOSes forget to set it properly.
  */
-unsigned int pcibios_max_latency = 255;
+static unsigned int pcibios_max_latency = 255;
 
 void pcibios_set_master(struct pci_dev *dev)
 {
-- 
cgit v1.2.3


From f028b8605613ade67fda554e30d367911d6c7222 Mon Sep 17 00:00:00 2001
From: Dmitri Vorobiev <dmitri.vorobiev@movial.fi>
Date: Tue, 15 Jul 2008 19:57:31 +0300
Subject: [MIPS] Fix missing prototypes in asm/fpu.h

While building the Malta defconfig, sparse spat the following
warnings:

>>>>>>>>>>>>>>>>>>
arch/mips/math-emu/kernel_linkage.c:31:6: warning: symbol
'fpu_emulator_init_fpu' was not declared. Should it be static?

arch/mips/math-emu/kernel_linkage.c:54:5: warning: symbol
'fpu_emulator_save_context' was not declared. Should it be
static?

arch/mips/math-emu/kernel_linkage.c:68:5: warning: symbol
'fpu_emulator_restore_context' was not declared. Should it be
static?
>>>>>>>>>>>>>>>>>>

This patch fixes these errors by adding the proper prototypes
to the include/asm-mips/fpu.h header, and actually using this
header in the sparse-spotted source file.

Build-tested with Malta defconfig.

Signed-off-by: Dmitri Vorobiev <dmitri.vorobiev@movial.fi>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/math-emu/kernel_linkage.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/mips/math-emu/kernel_linkage.c b/arch/mips/math-emu/kernel_linkage.c
index ed49ef01ac5..52e6c58c8de 100644
--- a/arch/mips/math-emu/kernel_linkage.c
+++ b/arch/mips/math-emu/kernel_linkage.c
@@ -24,6 +24,7 @@
 #include <asm/signal.h>
 #include <asm/uaccess.h>
 
+#include <asm/fpu.h>
 #include <asm/fpu_emulator.h>
 
 #define SIGNALLING_NAN 0x7ff800007ff80000LL
-- 
cgit v1.2.3


From 36e5c21de51e83bfa17c1e7334050edd2eda3d47 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Wed, 16 Jul 2008 14:06:15 +0200
Subject: [MIPS] IP22, IP28: Fix merge bug

Instead of one SGI_HAS_HAL2 for IP22 and one for IP28, IP28 got two of
them... Let's give IP22 some ALSA sound, too.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>[MIPS] IP22, IP28: Fix merge bug

Instead of one SGI_HAS_HAL2 for IP22 and one for IP28, IP28 got two of
them... Let's give IP22 some ALSA sound, too.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d21df5f1b1f..30edc395dce 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -330,6 +330,7 @@ config SGI_IP22
 	select SGI_HAS_DS1286
 	select SGI_HAS_I8042
 	select SGI_HAS_INDYDOG
+	select SGI_HAS_HAL2
 	select SGI_HAS_SEEQ
 	select SGI_HAS_WD93
 	select SGI_HAS_ZILOG
@@ -386,7 +387,6 @@ config SGI_IP28
 	select SGI_HAS_I8042
 	select SGI_HAS_INDYDOG
 	select SGI_HAS_HAL2
-	select SGI_HAS_HAL2
 	select SGI_HAS_SEEQ
 	select SGI_HAS_WD93
 	select SGI_HAS_ZILOG
-- 
cgit v1.2.3


From 5a334fa9240411121f5dda9605fc7fd98429e8c5 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Wed, 16 Jul 2008 15:18:54 +0200
Subject: [MIPS] IP22: Use common SGI button driver

Use the Indy/O2 button driver.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/sgi-ip22/ip22-platform.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/sgi-ip22/ip22-platform.c b/arch/mips/sgi-ip22/ip22-platform.c
index fc6df96305e..60141235ec4 100644
--- a/arch/mips/sgi-ip22/ip22-platform.c
+++ b/arch/mips/sgi-ip22/ip22-platform.c
@@ -188,8 +188,7 @@ static int __init sgi_button_devinit(void)
 	if (ip22_is_fullhouse())
 		return 0; /* full house has no volume buttons */
 
-	return IS_ERR(platform_device_register_simple("sgiindybtns",
-						      -1, NULL, 0));
+	return IS_ERR(platform_device_register_simple("sgibtns", -1, NULL, 0));
 }
 
 device_initcall(sgi_button_devinit);
-- 
cgit v1.2.3


From 36a0a3cd45b49ceff78ac28efef1cbeec413d8c2 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Wed, 16 Jul 2008 15:18:58 +0200
Subject: [MIPS] IP32: Use common SGI button driver

Use the Indy/O2 button driver.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/sgi-ip32/ip32-platform.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/sgi-ip32/ip32-platform.c b/arch/mips/sgi-ip32/ip32-platform.c
index 2ee401ba0b2..3d63721e0e8 100644
--- a/arch/mips/sgi-ip32/ip32-platform.c
+++ b/arch/mips/sgi-ip32/ip32-platform.c
@@ -85,18 +85,7 @@ device_initcall(sgio2audio_devinit);
 
 static __init int sgio2btns_devinit(void)
 {
-	struct platform_device *pd;
-	int ret;
-
-	pd = platform_device_alloc("sgio2btns", -1);
-	if (!pd)
-		return -ENOMEM;
-
-	ret = platform_device_add(pd);
-	if (ret)
-		platform_device_put(pd);
-
-	return ret;
+	return IS_ERR(platform_device_register_simple("sgibtns", -1, NULL, 0));
 }
 
 device_initcall(sgio2btns_devinit);
-- 
cgit v1.2.3


From 73b4390fb23456964201abda79f1210fe337d01a Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 16 Jul 2008 16:12:25 +0100
Subject: [MIPS] Routerboard 532: Support for base system

Signed-off-by: Phil Sutter <n0-1@freewrt.org>
Signed-off-by: Florian Fainelli <florian.fainelli@telecomint.eu>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig                 |   20 +-
 arch/mips/Makefile                |    7 +
 arch/mips/configs/rb532_defconfig | 1314 +++++++++++++++++++++++++++++++++++++
 arch/mips/pci/Makefile            |    1 +
 arch/mips/pci/fixup-rc32434.c     |   69 ++
 arch/mips/pci/ops-rc32434.c       |  207 ++++++
 arch/mips/pci/pci-rc32434.c       |  221 +++++++
 arch/mips/rb532/Makefile          |    7 +
 arch/mips/rb532/devices.c         |  331 ++++++++++
 arch/mips/rb532/gpio.c            |  220 +++++++
 arch/mips/rb532/irq.c             |  209 ++++++
 arch/mips/rb532/prom.c            |  158 +++++
 arch/mips/rb532/serial.c          |   53 ++
 arch/mips/rb532/setup.c           |   79 +++
 arch/mips/rb532/time.c            |   67 ++
 15 files changed, 2962 insertions(+), 1 deletion(-)
 create mode 100644 arch/mips/configs/rb532_defconfig
 create mode 100644 arch/mips/pci/fixup-rc32434.c
 create mode 100644 arch/mips/pci/ops-rc32434.c
 create mode 100644 arch/mips/pci/pci-rc32434.c
 create mode 100644 arch/mips/rb532/Makefile
 create mode 100644 arch/mips/rb532/devices.c
 create mode 100644 arch/mips/rb532/gpio.c
 create mode 100644 arch/mips/rb532/irq.c
 create mode 100644 arch/mips/rb532/prom.c
 create mode 100644 arch/mips/rb532/serial.c
 create mode 100644 arch/mips/rb532/setup.c
 create mode 100644 arch/mips/rb532/time.c

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 30edc395dce..b9c754f4070 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -558,6 +558,24 @@ config MACH_TX39XX
 config MACH_TX49XX
 	bool "Toshiba TX49 series based machines"
 
+config MIKROTIK_RB532
+	bool "Mikrotik RB532 boards"
+	select CEVT_R4K
+	select CSRC_R4K
+	select DMA_NONCOHERENT
+	select GENERIC_HARDIRQS_NO__DO_IRQ
+	select HW_HAS_PCI
+	select IRQ_CPU
+	select SYS_HAS_CPU_MIPS32_R1
+	select SYS_SUPPORTS_32BIT_KERNEL
+	select SYS_SUPPORTS_LITTLE_ENDIAN
+	select SWAP_IO_SPACE
+	select BOOT_RAW
+	select GENERIC_GPIO
+	help
+	  Support the Mikrotik(tm) RouterBoard 532 series,
+	  based on the IDT RC32434 SoC.
+
 config WR_PPMC
 	bool "Wind River PPMC board"
 	select CEVT_R4K
@@ -899,7 +917,7 @@ config BOOT_ELF32
 
 config MIPS_L1_CACHE_SHIFT
 	int
-	default "4" if MACH_DECSTATION
+	default "4" if MACH_DECSTATION || MIKROTIK_RB532
 	default "7" if SGI_IP22 || SGI_IP27 || SGI_IP28 || SNI_RM
 	default "4" if PMC_MSP4200_EVAL
 	default "5"
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 356453322b4..9aab51caf16 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -559,6 +559,13 @@ load-$(CONFIG_MACH_TX49XX)	+= 0xffffffff80100000
 #
 core-$(CONFIG_TOSHIBA_JMR3927)	+= arch/mips/txx9/jmr3927/
 
+#
+# Routerboard 532 board
+#
+core-$(CONFIG_MIKROTIK_RB532)	+= arch/mips/rb532/
+cflags-$(CONFIG_MIKROTIK_RB532) += -Iinclude/asm-mips/mach-rc32434
+load-$(CONFIG_MIKROTIK_RB532)	+= 0xffffffff80101000
+
 #
 # Toshiba RBTX4927 board or
 # Toshiba RBTX4937 board
diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig
new file mode 100644
index 00000000000..f28dc32974e
--- /dev/null
+++ b/arch/mips/configs/rb532_defconfig
@@ -0,0 +1,1314 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.25
+# Mon Apr 28 12:24:17 2008
+#
+CONFIG_MIPS=y
+
+#
+# Machine selection
+#
+# CONFIG_MACH_ALCHEMY is not set
+# CONFIG_BASLER_EXCITE is not set
+# CONFIG_BCM47XX is not set
+# CONFIG_MIPS_COBALT is not set
+# CONFIG_MACH_DECSTATION is not set
+# CONFIG_MACH_JAZZ is not set
+# CONFIG_LASAT is not set
+# CONFIG_LEMOTE_FULONG is not set
+# CONFIG_MIPS_ATLAS is not set
+# CONFIG_MIPS_MALTA is not set
+# CONFIG_MIPS_SEAD is not set
+# CONFIG_MIPS_SIM is not set
+# CONFIG_MARKEINS is not set
+# CONFIG_MACH_VR41XX is not set
+# CONFIG_PNX8550_JBS is not set
+# CONFIG_PNX8550_STB810 is not set
+# CONFIG_PMC_MSP is not set
+# CONFIG_PMC_YOSEMITE is not set
+# CONFIG_SGI_IP22 is not set
+# CONFIG_SGI_IP27 is not set
+# CONFIG_SGI_IP28 is not set
+# CONFIG_SGI_IP32 is not set
+# CONFIG_SIBYTE_CRHINE is not set
+# CONFIG_SIBYTE_CARMEL is not set
+# CONFIG_SIBYTE_CRHONE is not set
+# CONFIG_SIBYTE_RHONE is not set
+# CONFIG_SIBYTE_SWARM is not set
+# CONFIG_SIBYTE_LITTLESUR is not set
+# CONFIG_SIBYTE_SENTOSA is not set
+# CONFIG_SIBYTE_BIGSUR is not set
+# CONFIG_SNI_RM is not set
+# CONFIG_TOSHIBA_JMR3927 is not set
+CONFIG_MIKROTIK_RB532=y
+# CONFIG_TOSHIBA_RBTX4927 is not set
+# CONFIG_TOSHIBA_RBTX4938 is not set
+# CONFIG_WR_PPMC is not set
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_ARCH_SUPPORTS_OPROFILE=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CMOS_UPDATE=y
+CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_BOOT_RAW=y
+CONFIG_CEVT_R4K=y
+CONFIG_CSRC_R4K=y
+CONFIG_DMA_NONCOHERENT=y
+CONFIG_DMA_NEED_PCI_MAP_STATE=y
+# CONFIG_HOTPLUG_CPU is not set
+# CONFIG_NO_IOPORT is not set
+CONFIG_GENERIC_GPIO=y
+# CONFIG_CPU_BIG_ENDIAN is not set
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_SYS_SUPPORTS_LITTLE_ENDIAN=y
+CONFIG_IRQ_CPU=y
+CONFIG_SWAP_IO_SPACE=y
+CONFIG_MIPS_L1_CACHE_SHIFT=4
+
+#
+# CPU selection
+#
+# CONFIG_CPU_LOONGSON2 is not set
+CONFIG_CPU_MIPS32_R1=y
+# CONFIG_CPU_MIPS32_R2 is not set
+# CONFIG_CPU_MIPS64_R1 is not set
+# CONFIG_CPU_MIPS64_R2 is not set
+# CONFIG_CPU_R3000 is not set
+# CONFIG_CPU_TX39XX is not set
+# CONFIG_CPU_VR41XX is not set
+# CONFIG_CPU_R4300 is not set
+# CONFIG_CPU_R4X00 is not set
+# CONFIG_CPU_TX49XX is not set
+# CONFIG_CPU_R5000 is not set
+# CONFIG_CPU_R5432 is not set
+# CONFIG_CPU_R6000 is not set
+# CONFIG_CPU_NEVADA is not set
+# CONFIG_CPU_R8000 is not set
+# CONFIG_CPU_R10000 is not set
+# CONFIG_CPU_RM7000 is not set
+# CONFIG_CPU_RM9000 is not set
+# CONFIG_CPU_SB1 is not set
+CONFIG_SYS_HAS_CPU_MIPS32_R1=y
+CONFIG_CPU_MIPS32=y
+CONFIG_CPU_MIPSR1=y
+CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y
+CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y
+
+#
+# Kernel type
+#
+CONFIG_32BIT=y
+# CONFIG_64BIT is not set
+CONFIG_PAGE_SIZE_4KB=y
+# CONFIG_PAGE_SIZE_8KB is not set
+# CONFIG_PAGE_SIZE_16KB is not set
+# CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_CPU_HAS_PREFETCH=y
+CONFIG_MIPS_MT_DISABLED=y
+# CONFIG_MIPS_MT_SMP is not set
+# CONFIG_MIPS_MT_SMTC is not set
+CONFIG_CPU_HAS_LLSC=y
+CONFIG_CPU_HAS_SYNC=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_CPU_SUPPORTS_HIGHMEM=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_VIRT_TO_BUS=y
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+# CONFIG_HZ_48 is not set
+CONFIG_HZ_100=y
+# CONFIG_HZ_128 is not set
+# CONFIG_HZ_250 is not set
+# CONFIG_HZ_256 is not set
+# CONFIG_HZ_1000 is not set
+# CONFIG_HZ_1024 is not set
+CONFIG_SYS_SUPPORTS_ARBIT_HZ=y
+CONFIG_HZ=100
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+# CONFIG_KEXEC is not set
+# CONFIG_SECCOMP is not set
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_POSIX_MQUEUE is not set
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_CGROUPS is not set
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_EMBEDDED=y
+CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_KALLSYMS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+# CONFIG_ELF_CORE is not set
+CONFIG_COMPAT_BRK=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_ANON_INODES=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_HAVE_KPROBES is not set
+# CONFIG_HAVE_KRETPROBES is not set
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+# CONFIG_KMOD is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IOSCHED_AS is not set
+CONFIG_IOSCHED_DEADLINE=y
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_DEFAULT_AS is not set
+CONFIG_DEFAULT_DEADLINE=y
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="deadline"
+CONFIG_CLASSIC_RCU=y
+
+#
+# Bus options (PCI, PCMCIA, EISA, ISA, TC)
+#
+CONFIG_HW_HAS_PCI=y
+CONFIG_PCI=y
+CONFIG_PCI_DOMAINS=y
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+CONFIG_PCI_LEGACY=y
+CONFIG_MMU=y
+# CONFIG_PCCARD is not set
+# CONFIG_HOTPLUG_PCI is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+CONFIG_TRAD_SIGNALS=y
+
+#
+# Power management options
+#
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+# CONFIG_PM is not set
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_ASK_IP_FIB_HASH=y
+# CONFIG_IP_FIB_TRIE is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+CONFIG_ARPD=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=m
+CONFIG_INET_TCP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_BIC=m
+CONFIG_TCP_CONG_CUBIC=m
+CONFIG_TCP_CONG_WESTWOOD=m
+CONFIG_TCP_CONG_HTCP=m
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_VEGAS=y
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+# CONFIG_DEFAULT_BIC is not set
+# CONFIG_DEFAULT_CUBIC is not set
+# CONFIG_DEFAULT_HTCP is not set
+CONFIG_DEFAULT_VEGAS=y
+# CONFIG_DEFAULT_WESTWOOD is not set
+# CONFIG_DEFAULT_RENO is not set
+CONFIG_DEFAULT_TCP_CONG="vegas"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IP_VS is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_NETFILTER_ADVANCED=y
+# CONFIG_BRIDGE_NETFILTER is not set
+
+#
+# Core Netfilter Configuration
+#
+# CONFIG_NETFILTER_NETLINK_QUEUE is not set
+# CONFIG_NETFILTER_NETLINK_LOG is not set
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CT_ACCT=y
+CONFIG_NF_CONNTRACK_MARK=y
+# CONFIG_NF_CONNTRACK_EVENTS is not set
+# CONFIG_NF_CT_PROTO_DCCP is not set
+# CONFIG_NF_CT_PROTO_SCTP is not set
+# CONFIG_NF_CT_PROTO_UDPLITE is not set
+# CONFIG_NF_CONNTRACK_AMANDA is not set
+CONFIG_NF_CONNTRACK_FTP=m
+# CONFIG_NF_CONNTRACK_H323 is not set
+CONFIG_NF_CONNTRACK_IRC=m
+# CONFIG_NF_CONNTRACK_NETBIOS_NS is not set
+# CONFIG_NF_CONNTRACK_PPTP is not set
+# CONFIG_NF_CONNTRACK_SANE is not set
+# CONFIG_NF_CONNTRACK_SIP is not set
+CONFIG_NF_CONNTRACK_TFTP=m
+# CONFIG_NF_CT_NETLINK is not set
+CONFIG_NETFILTER_XTABLES=y
+# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set
+# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set
+# CONFIG_NETFILTER_XT_TARGET_DSCP is not set
+# CONFIG_NETFILTER_XT_TARGET_MARK is not set
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set
+# CONFIG_NETFILTER_XT_TARGET_RATEEST is not set
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set
+# CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+# CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+# CONFIG_NETFILTER_XT_MATCH_CONNMARK is not set
+# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+# CONFIG_NETFILTER_XT_MATCH_DSCP is not set
+# CONFIG_NETFILTER_XT_MATCH_ESP is not set
+# CONFIG_NETFILTER_XT_MATCH_HELPER is not set
+# CONFIG_NETFILTER_XT_MATCH_IPRANGE is not set
+# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+# CONFIG_NETFILTER_XT_MATCH_MAC is not set
+# CONFIG_NETFILTER_XT_MATCH_MARK is not set
+# CONFIG_NETFILTER_XT_MATCH_OWNER is not set
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=y
+# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set
+# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set
+# CONFIG_NETFILTER_XT_MATCH_RATEEST is not set
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+# CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set
+# CONFIG_NETFILTER_XT_MATCH_STRING is not set
+# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set
+# CONFIG_NETFILTER_XT_MATCH_TIME is not set
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_NF_CONNTRACK_PROC_COMPAT=y
+# CONFIG_IP_NF_QUEUE is not set
+CONFIG_IP_NF_IPTABLES=y
+# CONFIG_IP_NF_MATCH_RECENT is not set
+# CONFIG_IP_NF_MATCH_ECN is not set
+# CONFIG_IP_NF_MATCH_AH is not set
+# CONFIG_IP_NF_MATCH_TTL is not set
+CONFIG_IP_NF_MATCH_ADDRTYPE=m
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+# CONFIG_IP_NF_TARGET_LOG is not set
+# CONFIG_IP_NF_TARGET_ULOG is not set
+CONFIG_NF_NAT=y
+CONFIG_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+# CONFIG_IP_NF_TARGET_REDIRECT is not set
+# CONFIG_IP_NF_TARGET_NETMAP is not set
+# CONFIG_NF_NAT_SNMP_BASIC is not set
+CONFIG_NF_NAT_FTP=m
+CONFIG_NF_NAT_IRC=m
+CONFIG_NF_NAT_TFTP=m
+# CONFIG_NF_NAT_AMANDA is not set
+# CONFIG_NF_NAT_PPTP is not set
+# CONFIG_NF_NAT_H323 is not set
+# CONFIG_NF_NAT_SIP is not set
+CONFIG_IP_NF_MANGLE=y
+# CONFIG_IP_NF_TARGET_ECN is not set
+# CONFIG_IP_NF_TARGET_TTL is not set
+# CONFIG_IP_NF_TARGET_CLUSTERIP is not set
+CONFIG_IP_NF_RAW=m
+# CONFIG_IP_NF_ARPTABLES is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+CONFIG_BRIDGE=y
+CONFIG_VLAN_8021Q=y
+# CONFIG_DECNET is not set
+CONFIG_LLC=y
+CONFIG_LLC2=m
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+CONFIG_NET_SCHED=y
+
+#
+# Queueing/Scheduling
+#
+CONFIG_NET_SCH_CBQ=m
+# CONFIG_NET_SCH_HTB is not set
+# CONFIG_NET_SCH_HFSC is not set
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RR=m
+# CONFIG_NET_SCH_RED is not set
+# CONFIG_NET_SCH_SFQ is not set
+# CONFIG_NET_SCH_TEQL is not set
+# CONFIG_NET_SCH_TBF is not set
+# CONFIG_NET_SCH_GRED is not set
+# CONFIG_NET_SCH_DSMARK is not set
+CONFIG_NET_SCH_NETEM=m
+# CONFIG_NET_SCH_INGRESS is not set
+
+#
+# Classification
+#
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+# CONFIG_NET_CLS_FLOW is not set
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_STACK=32
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=y
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+# CONFIG_NET_ACT_NAT is not set
+CONFIG_NET_ACT_PEDIT=m
+# CONFIG_NET_ACT_SIMP is not set
+CONFIG_NET_CLS_IND=y
+CONFIG_NET_SCH_FIFO=y
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+CONFIG_HAMRADIO=y
+
+#
+# Packet Radio protocols
+#
+# CONFIG_AX25 is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+CONFIG_FIB_RULES=y
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+CONFIG_WIRELESS_EXT=y
+# CONFIG_MAC80211 is not set
+# CONFIG_IEEE80211 is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+# CONFIG_MTD_CONCAT is not set
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_REDBOOT_PARTS is not set
+# CONFIG_MTD_CMDLINE_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+# CONFIG_MTD_CFI is not set
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+# CONFIG_MTD_INTEL_VR_NOR is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_PMC551 is not set
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+CONFIG_MTD_BLOCK2MTD=y
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_VERIFY_WRITE=y
+# CONFIG_MTD_NAND_ECC_SMC is not set
+# CONFIG_MTD_NAND_MUSEUM_IDS is not set
+CONFIG_MTD_NAND_IDS=y
+# CONFIG_MTD_NAND_DISKONCHIP is not set
+# CONFIG_MTD_NAND_CAFE is not set
+# CONFIG_MTD_NAND_NANDSIM is not set
+CONFIG_MTD_NAND_PLATFORM=y
+# CONFIG_MTD_ONENAND is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_SX8 is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_PHANTOM is not set
+# CONFIG_EEPROM_93CX6 is not set
+# CONFIG_SGI_IOC4 is not set
+# CONFIG_TIFM_CORE is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
+# CONFIG_SCSI_NETLINK is not set
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+# CONFIG_BLK_DEV_SD is not set
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+CONFIG_SCSI_LOWLEVEL=y
+# CONFIG_ISCSI_TCP is not set
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_SCSI_AIC94XX is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_SCSI_ARCMSR is not set
+# CONFIG_MEGARAID_NEWGEN is not set
+# CONFIG_MEGARAID_LEGACY is not set
+# CONFIG_MEGARAID_SAS is not set
+# CONFIG_SCSI_HPTIOP is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_MVSAS is not set
+# CONFIG_SCSI_STEX is not set
+# CONFIG_SCSI_SYM53C8XX_2 is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+# CONFIG_SCSI_QLA_FC is not set
+# CONFIG_SCSI_QLA_ISCSI is not set
+# CONFIG_SCSI_LPFC is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+# CONFIG_SCSI_SRP is not set
+CONFIG_ATA=y
+# CONFIG_ATA_NONSTANDARD is not set
+# CONFIG_SATA_PMP is not set
+# CONFIG_SATA_AHCI is not set
+# CONFIG_SATA_SIL24 is not set
+CONFIG_ATA_SFF=y
+# CONFIG_SATA_SVW is not set
+# CONFIG_ATA_PIIX is not set
+# CONFIG_SATA_MV is not set
+# CONFIG_SATA_NV is not set
+# CONFIG_PDC_ADMA is not set
+# CONFIG_SATA_QSTOR is not set
+# CONFIG_SATA_PROMISE is not set
+# CONFIG_SATA_SX4 is not set
+# CONFIG_SATA_SIL is not set
+# CONFIG_SATA_SIS is not set
+# CONFIG_SATA_ULI is not set
+# CONFIG_SATA_VIA is not set
+# CONFIG_SATA_VITESSE is not set
+# CONFIG_SATA_INIC162X is not set
+# CONFIG_PATA_ALI is not set
+# CONFIG_PATA_AMD is not set
+# CONFIG_PATA_ARTOP is not set
+# CONFIG_PATA_ATIIXP is not set
+# CONFIG_PATA_CMD640_PCI is not set
+# CONFIG_PATA_CMD64X is not set
+# CONFIG_PATA_CS5520 is not set
+# CONFIG_PATA_CS5530 is not set
+# CONFIG_PATA_CYPRESS is not set
+# CONFIG_PATA_EFAR is not set
+# CONFIG_ATA_GENERIC is not set
+# CONFIG_PATA_HPT366 is not set
+# CONFIG_PATA_HPT37X is not set
+# CONFIG_PATA_HPT3X2N is not set
+# CONFIG_PATA_HPT3X3 is not set
+# CONFIG_PATA_IT821X is not set
+# CONFIG_PATA_IT8213 is not set
+# CONFIG_PATA_JMICRON is not set
+# CONFIG_PATA_TRIFLEX is not set
+# CONFIG_PATA_MARVELL is not set
+# CONFIG_PATA_MPIIX is not set
+# CONFIG_PATA_OLDPIIX is not set
+# CONFIG_PATA_NETCELL is not set
+# CONFIG_PATA_NINJA32 is not set
+# CONFIG_PATA_NS87410 is not set
+# CONFIG_PATA_NS87415 is not set
+# CONFIG_PATA_OPTI is not set
+# CONFIG_PATA_OPTIDMA is not set
+# CONFIG_PATA_PDC_OLD is not set
+# CONFIG_PATA_RADISYS is not set
+CONFIG_PATA_RB532=y
+# CONFIG_PATA_RZ1000 is not set
+# CONFIG_PATA_SC1200 is not set
+# CONFIG_PATA_SERVERWORKS is not set
+# CONFIG_PATA_PDC2027X is not set
+# CONFIG_PATA_SIL680 is not set
+# CONFIG_PATA_SIS is not set
+# CONFIG_PATA_VIA is not set
+# CONFIG_PATA_WINBOND is not set
+# CONFIG_PATA_PLATFORM is not set
+# CONFIG_MD is not set
+# CONFIG_FUSION is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_FIREWIRE is not set
+# CONFIG_IEEE1394 is not set
+# CONFIG_I2O is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NETDEVICES_MULTIQUEUE is not set
+CONFIG_IFB=m
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_ARCNET is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_AX88796 is not set
+CONFIG_KORINA=y
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_CASSINI is not set
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_DM9000 is not set
+# CONFIG_NET_TULIP is not set
+# CONFIG_HP100 is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+CONFIG_NET_PCI=y
+# CONFIG_PCNET32 is not set
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_B44 is not set
+# CONFIG_FORCEDETH is not set
+# CONFIG_TC35815 is not set
+# CONFIG_EEPRO100 is not set
+# CONFIG_E100 is not set
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+# CONFIG_NE2K_PCI is not set
+# CONFIG_8139CP is not set
+# CONFIG_8139TOO is not set
+# CONFIG_R6040 is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_TLAN is not set
+CONFIG_VIA_RHINE=y
+# CONFIG_VIA_RHINE_MMIO is not set
+CONFIG_VIA_RHINE_NAPI=y
+# CONFIG_SC92031 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+# CONFIG_TR is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+CONFIG_WLAN_80211=y
+# CONFIG_IPW2100 is not set
+# CONFIG_IPW2200 is not set
+# CONFIG_LIBERTAS is not set
+# CONFIG_HERMES is not set
+CONFIG_ATMEL=m
+# CONFIG_PCI_ATMEL is not set
+# CONFIG_PRISM54 is not set
+# CONFIG_IWLWIFI_LEDS is not set
+# CONFIG_HOSTAP is not set
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+# CONFIG_PPP_SYNC_TTY is not set
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_BSDCOMP=m
+# CONFIG_PPP_MPPE is not set
+CONFIG_PPPOE=m
+CONFIG_PPPOL2TP=m
+# CONFIG_SLIP is not set
+CONFIG_SLHC=m
+# CONFIG_NET_FC is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+# CONFIG_KEYBOARD_STOWAWAY is not set
+# CONFIG_KEYBOARD_GPIO is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+# CONFIG_NOZOMI is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_NR_UARTS=2
+CONFIG_SERIAL_8250_RUNTIME_UARTS=2
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
+CONFIG_UNIX98_PTYS=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_IPMI_HANDLER is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_RTC is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_DEVPORT=y
+# CONFIG_I2C is not set
+
+#
+# SPI support
+#
+# CONFIG_SPI is not set
+# CONFIG_SPI_MASTER is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+
+#
+# Watchdog Device Drivers
+#
+# CONFIG_SOFT_WATCHDOG is not set
+
+#
+# PCI-based Watchdog Cards
+#
+# CONFIG_PCIPCWATCHDOG is not set
+# CONFIG_WDTPCI is not set
+
+#
+# Sonics Silicon Backplane
+#
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+
+#
+# Multimedia devices
+#
+CONFIG_VIDEO_DEV=m
+CONFIG_VIDEO_V4L2_COMMON=m
+CONFIG_VIDEO_ALLOW_V4L1=y
+CONFIG_VIDEO_V4L1_COMPAT=y
+CONFIG_VIDEO_V4L2=m
+CONFIG_VIDEO_V4L1=m
+CONFIG_VIDEO_CAPTURE_DRIVERS=y
+# CONFIG_VIDEO_ADV_DEBUG is not set
+# CONFIG_VIDEO_HELPER_CHIPS_AUTO is not set
+
+#
+# Encoders/decoders and other helper chips
+#
+
+#
+# Audio decoders
+#
+
+#
+# Video decoders
+#
+
+#
+# Video and audio decoders
+#
+
+#
+# MPEG video encoders
+#
+# CONFIG_VIDEO_CX2341X is not set
+
+#
+# Video encoders
+#
+
+#
+# Video improvement chips
+#
+# CONFIG_VIDEO_VIVI is not set
+# CONFIG_VIDEO_CPIA is not set
+# CONFIG_VIDEO_STRADIS is not set
+# CONFIG_SOC_CAMERA is not set
+# CONFIG_RADIO_ADAPTERS is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_DRM is not set
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+CONFIG_HID_SUPPORT=y
+# CONFIG_HID is not set
+CONFIG_USB_SUPPORT=y
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_ARCH_HAS_EHCI=y
+# CONFIG_USB is not set
+# CONFIG_USB_OTG_WHITELIST is not set
+# CONFIG_USB_OTG_BLACKLIST_HUB is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+# CONFIG_USB_GADGET is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+
+#
+# LED drivers
+#
+# CONFIG_LEDS_GPIO is not set
+
+#
+# LED Triggers
+#
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
+# CONFIG_INFINIBAND is not set
+CONFIG_RTC_LIB=y
+# CONFIG_RTC_CLASS is not set
+# CONFIG_UIO is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4DEV_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_CONFIGFS_FS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_FS_DEBUG=0
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
+CONFIG_JFFS2_SUMMARY=y
+# CONFIG_JFFS2_FS_XATTR is not set
+CONFIG_JFFS2_COMPRESSION_OPTIONS=y
+CONFIG_JFFS2_ZLIB=y
+# CONFIG_JFFS2_LZO is not set
+CONFIG_JFFS2_RTIME=y
+# CONFIG_JFFS2_RUBIN is not set
+# CONFIG_JFFS2_CMODE_NONE is not set
+CONFIG_JFFS2_CMODE_PRIORITY=y
+# CONFIG_JFFS2_CMODE_SIZE is not set
+# CONFIG_JFFS2_CMODE_FAVOURLZO is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+# CONFIG_NFS_FS is not set
+# CONFIG_NFSD is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+CONFIG_MAC_PARTITION=y
+CONFIG_MSDOS_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_KARMA_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_SYSV68_PARTITION is not set
+# CONFIG_NLS is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_KERNEL is not set
+# CONFIG_SAMPLES is not set
+CONFIG_CMDLINE=""
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+CONFIG_CRYPTO_ALGAPI=m
+CONFIG_CRYPTO_AEAD=m
+CONFIG_CRYPTO_BLKCIPHER=m
+# CONFIG_CRYPTO_MANAGER is not set
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+CONFIG_CRYPTO_TEST=m
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+# CONFIG_CRYPTO_CBC is not set
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+# CONFIG_CRYPTO_MD5 is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_LZO is not set
+# CONFIG_CRYPTO_HW is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+# CONFIG_GENERIC_FIND_FIRST_BIT is not set
+CONFIG_CRC_CCITT=m
+CONFIG_CRC16=m
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+CONFIG_LIBCRC32C=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_TEXTSEARCH=y
+CONFIG_TEXTSEARCH_KMP=m
+CONFIG_TEXTSEARCH_BM=m
+CONFIG_TEXTSEARCH_FSM=m
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index 57e34cafa49..15e01aec37f 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -49,3 +49,4 @@ obj-$(CONFIG_TOSHIBA_RBTX4938)	+= fixup-rbtx4938.o
 obj-$(CONFIG_VICTOR_MPC30X)	+= fixup-mpc30x.o
 obj-$(CONFIG_ZAO_CAPCELLA)	+= fixup-capcella.o
 obj-$(CONFIG_WR_PPMC)		+= fixup-wrppmc.o
+obj-$(CONFIG_MIKROTIK_RB532)	+= pci-rc32434.o ops-rc32434.o fixup-rc32434.o
diff --git a/arch/mips/pci/fixup-rc32434.c b/arch/mips/pci/fixup-rc32434.c
new file mode 100644
index 00000000000..75b90dcb7a0
--- /dev/null
+++ b/arch/mips/pci/fixup-rc32434.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2001 MontaVista Software Inc.
+ * Author: MontaVista Software, Inc.
+ *         	stevel@mvista.com or source@mvista.com
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+ *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <asm/mach-rc32434/rc32434.h>
+
+static int __devinitdata irq_map[2][12] = {
+	{0, 0, 2, 3, 2, 3, 0, 0, 0, 0, 0, 1},
+	{0, 0, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}
+};
+
+int __devinit pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	int irq = 0;
+
+	if (dev->bus->number < 2 && PCI_SLOT(dev->devfn) < 12)
+		irq = irq_map[dev->bus->number][PCI_SLOT(dev->devfn)];
+
+	return irq + GROUP4_IRQ_BASE + 4;
+}
+
+static void rc32434_pci_early_fixup(struct pci_dev *dev)
+{
+	if (PCI_SLOT(dev->devfn) == 6 && dev->bus->number == 0) {
+		/* disable prefetched memory range */
+		pci_write_config_word(dev, PCI_PREF_MEMORY_LIMIT, 0);
+		pci_write_config_word(dev, PCI_PREF_MEMORY_BASE, 0x10);
+
+		pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, 4);
+	}
+}
+
+/*
+ * The fixup applies to both the IDT and VIA devices present on the board
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, rc32434_pci_early_fixup);
+
+/* Do platform specific device initialization at pci_enable_device() time */
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+	return 0;
+}
diff --git a/arch/mips/pci/ops-rc32434.c b/arch/mips/pci/ops-rc32434.c
new file mode 100644
index 00000000000..d1f8fa210ca
--- /dev/null
+++ b/arch/mips/pci/ops-rc32434.c
@@ -0,0 +1,207 @@
+/*
+ *  BRIEF MODULE DESCRIPTION
+ *     pci_ops for IDT EB434 board
+ *
+ *  Copyright 2004 IDT Inc. (rischelp@idt.com)
+ *  Copyright 2006 Felix Fietkau <nbd@openwrt.org>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+ *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+
+#include <asm/cpu.h>
+#include <asm/mach-rc32434/rc32434.h>
+#include <asm/mach-rc32434/pci.h>
+
+#define PCI_ACCESS_READ  0
+#define PCI_ACCESS_WRITE 1
+
+
+#define PCI_CFG_SET(bus, slot, func, off) \
+	(rc32434_pci->pcicfga = (0x80000000 | \
+				((bus) << 16) | ((slot)<<11) | \
+				((func)<<8) | (off)))
+
+static inline int config_access(unsigned char access_type,
+				struct pci_bus *bus, unsigned int devfn,
+				unsigned char where, u32 *data)
+{
+	unsigned int slot = PCI_SLOT(devfn);
+	u8 func = PCI_FUNC(devfn);
+
+	/* Setup address */
+	PCI_CFG_SET(bus->number, slot, func, where);
+	rc32434_sync();
+
+	if (access_type == PCI_ACCESS_WRITE)
+		rc32434_pci->pcicfgd = *data;
+	else
+		*data = rc32434_pci->pcicfgd;
+
+	rc32434_sync();
+
+	return 0;
+}
+
+
+/*
+ * We can't address 8 and 16 bit words directly.  Instead we have to
+ * read/write a 32bit word and mask/modify the data we actually want.
+ */
+static int read_config_byte(struct pci_bus *bus, unsigned int devfn,
+			    int where, u8 *val)
+{
+	u32 data;
+	int ret;
+
+	ret = config_access(PCI_ACCESS_READ, bus, devfn, where, &data);
+	*val = (data >> ((where & 3) << 3)) & 0xff;
+	return ret;
+}
+
+static int read_config_word(struct pci_bus *bus, unsigned int devfn,
+			    int where, u16 *val)
+{
+	u32 data;
+	int ret;
+
+	ret = config_access(PCI_ACCESS_READ, bus, devfn, where, &data);
+	*val = (data >> ((where & 3) << 3)) & 0xffff;
+	return ret;
+}
+
+static int read_config_dword(struct pci_bus *bus, unsigned int devfn,
+			     int where, u32 *val)
+{
+	int ret;
+	int delay = 1;
+
+	/*
+	 * Don't scan too far, else there will be errors with plugged in
+	 * daughterboard (rb564).
+	 */
+	if (bus->number == 0 && PCI_SLOT(devfn) > 21)
+		return 0;
+
+retry:
+	ret = config_access(PCI_ACCESS_READ, bus, devfn, where, val);
+
+	/*
+	 * Certain devices react delayed at device scan time, this
+	 * gives them time to settle
+	 */
+	if (where == PCI_VENDOR_ID) {
+		if (ret == 0xffffffff || ret == 0x00000000 ||
+		    ret == 0x0000ffff || ret == 0xffff0000) {
+			if (delay > 4)
+				return 0;
+			delay *= 2;
+			msleep(delay);
+			goto retry;
+		}
+	}
+
+	return ret;
+}
+
+static int
+write_config_byte(struct pci_bus *bus, unsigned int devfn, int where,
+		  u8 val)
+{
+	u32 data = 0;
+
+	if (config_access(PCI_ACCESS_READ, bus, devfn, where, &data))
+		return -1;
+
+	data = (data & ~(0xff << ((where & 3) << 3))) |
+	    (val << ((where & 3) << 3));
+
+	if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data))
+		return -1;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+
+static int
+write_config_word(struct pci_bus *bus, unsigned int devfn, int where,
+		  u16 val)
+{
+	u32 data = 0;
+
+	if (config_access(PCI_ACCESS_READ, bus, devfn, where, &data))
+		return -1;
+
+	data = (data & ~(0xffff << ((where & 3) << 3))) |
+	    (val << ((where & 3) << 3));
+
+	if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data))
+		return -1;
+
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+
+static int
+write_config_dword(struct pci_bus *bus, unsigned int devfn, int where,
+		   u32 val)
+{
+	if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &val))
+		return -1;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_config_read(struct pci_bus *bus, unsigned int devfn,
+			   int where, int size, u32 *val)
+{
+	switch (size) {
+	case 1:
+		return read_config_byte(bus, devfn, where, (u8 *) val);
+	case 2:
+		return read_config_word(bus, devfn, where, (u16 *) val);
+	default:
+		return read_config_dword(bus, devfn, where, val);
+	}
+}
+
+static int pci_config_write(struct pci_bus *bus, unsigned int devfn,
+			    int where, int size, u32 val)
+{
+	switch (size) {
+	case 1:
+		return write_config_byte(bus, devfn, where, (u8) val);
+	case 2:
+		return write_config_word(bus, devfn, where, (u16) val);
+	default:
+		return write_config_dword(bus, devfn, where, val);
+	}
+}
+
+struct pci_ops rc32434_pci_ops = {
+	.read = pci_config_read,
+	.write = pci_config_write,
+};
diff --git a/arch/mips/pci/pci-rc32434.c b/arch/mips/pci/pci-rc32434.c
new file mode 100644
index 00000000000..1c2821e2f49
--- /dev/null
+++ b/arch/mips/pci/pci-rc32434.c
@@ -0,0 +1,221 @@
+/*
+ *  BRIEF MODULE DESCRIPTION
+ *     PCI initialization for IDT EB434 board
+ *
+ *  Copyright 2004 IDT Inc. (rischelp@idt.com)
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+ *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <asm/mach-rc32434/rc32434.h>
+#include <asm/mach-rc32434/pci.h>
+
+#define PCI_ACCESS_READ  0
+#define PCI_ACCESS_WRITE 1
+
+/* define an unsigned array for the PCI registers */
+static unsigned int korina_cnfg_regs[25] = {
+	KORINA_CNFG1, KORINA_CNFG2, KORINA_CNFG3, KORINA_CNFG4,
+	KORINA_CNFG5, KORINA_CNFG6, KORINA_CNFG7, KORINA_CNFG8,
+	KORINA_CNFG9, KORINA_CNFG10, KORINA_CNFG11, KORINA_CNFG12,
+	KORINA_CNFG13, KORINA_CNFG14, KORINA_CNFG15, KORINA_CNFG16,
+	KORINA_CNFG17, KORINA_CNFG18, KORINA_CNFG19, KORINA_CNFG20,
+	KORINA_CNFG21, KORINA_CNFG22, KORINA_CNFG23, KORINA_CNFG24
+};
+static struct resource rc32434_res_pci_mem1;
+static struct resource rc32434_res_pci_mem2;
+
+static struct resource rc32434_res_pci_mem1 = {
+	.name = "PCI MEM1",
+	.start = 0x50000000,
+	.end = 0x5FFFFFFF,
+	.flags = IORESOURCE_MEM,
+	.parent = &rc32434_res_pci_mem1,
+	.sibling = NULL,
+	.child = &rc32434_res_pci_mem2
+};
+
+static struct resource rc32434_res_pci_mem2 = {
+	.name = "PCI Mem2",
+	.start = 0x60000000,
+	.end = 0x6FFFFFFF,
+	.flags = IORESOURCE_MEM,
+	.parent = &rc32434_res_pci_mem1,
+	.sibling = NULL,
+	.child = NULL
+};
+
+static struct resource rc32434_res_pci_io1 = {
+	.name = "PCI I/O1",
+	.start = 0x18800000,
+	.end = 0x188FFFFF,
+	.flags = IORESOURCE_IO,
+};
+
+extern struct pci_ops rc32434_pci_ops;
+
+#define PCI_MEM1_START	PCI_ADDR_START
+#define PCI_MEM1_END	(PCI_ADDR_START + CPUTOPCI_MEM_WIN - 1)
+#define PCI_MEM2_START	(PCI_ADDR_START + CPUTOPCI_MEM_WIN)
+#define PCI_MEM2_END	(PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN)  - 1)
+#define PCI_IO1_START	(PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN))
+#define PCI_IO1_END 							\
+	(PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN) + CPUTOPCI_IO_WIN - 1)
+#define PCI_IO2_START							\
+	(PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN) + CPUTOPCI_IO_WIN)
+#define PCI_IO2_END 							\
+	(PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN) + (2 * CPUTOPCI_IO_WIN) - 1)
+
+struct pci_controller rc32434_controller2;
+
+struct pci_controller rc32434_controller = {
+	.pci_ops = &rc32434_pci_ops,
+	.mem_resource = &rc32434_res_pci_mem1,
+	.io_resource = &rc32434_res_pci_io1,
+	.mem_offset = 0,
+	.io_offset = 0,
+
+};
+
+#ifdef __MIPSEB__
+#define PCI_ENDIAN_FLAG PCILBAC_sb_m
+#else
+#define PCI_ENDIAN_FLAG 0
+#endif
+
+static int __init rc32434_pcibridge_init(void)
+{
+	unsigned int pcicvalue, pcicdata = 0;
+	unsigned int dummyread, pcicntlval;
+	int loopCount;
+	unsigned int pci_config_addr;
+
+	pcicvalue = rc32434_pci->pcic;
+	pcicvalue = (pcicvalue >> PCIM_SHFT) & PCIM_BIT_LEN;
+	if (!((pcicvalue == PCIM_H_EA) ||
+	      (pcicvalue == PCIM_H_IA_FIX) ||
+	      (pcicvalue == PCIM_H_IA_RR))) {
+		pr_err(KERN_ERR "PCI init error!!!\n");
+		/* Not in Host Mode, return ERROR */
+		return -1;
+	}
+	/* Enables the Idle Grant mode, Arbiter Parking */
+	pcicdata |= (PCI_CTL_IGM | PCI_CTL_EAP | PCI_CTL_EN);
+	rc32434_pci->pcic = pcicdata;	/* Enable the PCI bus Interface */
+	/* Zero out the PCI status & PCI Status Mask */
+	for (;;) {
+		pcicdata = rc32434_pci->pcis;
+		if (!(pcicdata & PCI_STAT_RIP))
+			break;
+	}
+
+	rc32434_pci->pcis = 0;
+	rc32434_pci->pcism = 0xFFFFFFFF;
+	/* Zero out the PCI decoupled registers */
+	rc32434_pci->pcidac = 0;	/*
+					 * disable PCI decoupled accesses at
+					 * initialization
+					 */
+	rc32434_pci->pcidas = 0;	/* clear the status */
+	rc32434_pci->pcidasm = 0x0000007F;	/* Mask all the interrupts */
+	/* Mask PCI Messaging Interrupts */
+	rc32434_pci_msg->pciiic = 0;
+	rc32434_pci_msg->pciiim = 0xFFFFFFFF;
+	rc32434_pci_msg->pciioic = 0;
+	rc32434_pci_msg->pciioim = 0;
+
+
+	/* Setup PCILB0 as Memory Window */
+	rc32434_pci->pcilba[0].address = (unsigned int) (PCI_ADDR_START);
+
+	/* setup the PCI map address as same as the local address */
+
+	rc32434_pci->pcilba[0].mapping = (unsigned int) (PCI_ADDR_START);
+
+
+	/* Setup PCILBA1 as MEM */
+	rc32434_pci->pcilba[0].control =
+	    (((SIZE_256MB & 0x1f) << PCI_LBAC_SIZE_BIT) | PCI_ENDIAN_FLAG);
+	dummyread = rc32434_pci->pcilba[0].control;	/* flush the CPU write Buffers */
+	rc32434_pci->pcilba[1].address = 0x60000000;
+	rc32434_pci->pcilba[1].mapping = 0x60000000;
+
+	/* setup PCILBA2 as IO Window */
+	rc32434_pci->pcilba[1].control =
+	    (((SIZE_256MB & 0x1f) << PCI_LBAC_SIZE_BIT) | PCI_ENDIAN_FLAG);
+	dummyread = rc32434_pci->pcilba[1].control;	/* flush the CPU write Buffers */
+	rc32434_pci->pcilba[2].address = 0x18C00000;
+	rc32434_pci->pcilba[2].mapping = 0x18FFFFFF;
+
+	/* setup PCILBA2 as IO Window */
+	rc32434_pci->pcilba[2].control =
+	    (((SIZE_4MB & 0x1f) << PCI_LBAC_SIZE_BIT) | PCI_ENDIAN_FLAG);
+	dummyread = rc32434_pci->pcilba[2].control;	/* flush the CPU write Buffers */
+
+	/* Setup PCILBA3 as IO Window */
+	rc32434_pci->pcilba[3].address = 0x18800000;
+	rc32434_pci->pcilba[3].mapping = 0x18800000;
+	rc32434_pci->pcilba[3].control =
+	    ((((SIZE_1MB & 0x1ff) << PCI_LBAC_SIZE_BIT) | PCI_LBAC_MSI) |
+	     PCI_ENDIAN_FLAG);
+	dummyread = rc32434_pci->pcilba[3].control;	/* flush the CPU write Buffers */
+
+	pci_config_addr = (unsigned int) (0x80000004);
+	for (loopCount = 0; loopCount < 24; loopCount++) {
+		rc32434_pci->pcicfga = pci_config_addr;
+		dummyread = rc32434_pci->pcicfga;
+		rc32434_pci->pcicfgd = korina_cnfg_regs[loopCount];
+		dummyread = rc32434_pci->pcicfgd;
+		pci_config_addr += 4;
+	}
+	rc32434_pci->pcitc =
+	    (unsigned int) ((PCITC_RTIMER_VAL & 0xff) << PCI_TC_RTIMER_BIT) |
+	    ((PCITC_DTIMER_VAL & 0xff) << PCI_TC_DTIMER_BIT);
+
+	pcicntlval = rc32434_pci->pcic;
+	pcicntlval &= ~PCI_CTL_TNR;
+	rc32434_pci->pcic = pcicntlval;
+	pcicntlval = rc32434_pci->pcic;
+
+	return 0;
+}
+
+static int __init rc32434_pci_init(void)
+{
+	pr_info("PCI: Initializing PCI\n");
+
+	ioport_resource.start = rc32434_res_pci_io1.start;
+	ioport_resource.end = rc32434_res_pci_io1.end;
+
+	rc32434_pcibridge_init();
+
+	register_pci_controller(&rc32434_controller);
+	rc32434_sync();
+
+	return 0;
+}
+
+arch_initcall(rc32434_pci_init);
diff --git a/arch/mips/rb532/Makefile b/arch/mips/rb532/Makefile
new file mode 100644
index 00000000000..8f0b6b6a162
--- /dev/null
+++ b/arch/mips/rb532/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the RB532 board specific parts of the kernel
+#
+
+obj-y	 += irq.o time.o setup.o serial.o prom.o gpio.o devices.o
+
+EXTRA_CFLAGS += -Werror
diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c
new file mode 100644
index 00000000000..44fb0a62877
--- /dev/null
+++ b/arch/mips/rb532/devices.c
@@ -0,0 +1,331 @@
+/*
+ *  RouterBoard 500 Platform devices
+ *
+ *  Copyright (C) 2006 Felix Fietkau <nbd@openwrt.org>
+ *  Copyright (C) 2007 Florian Fainelli <florian@openwrt.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/gpio_keys.h>
+#include <linux/input.h>
+
+#include <asm/bootinfo.h>
+
+#include <asm/mach-rc32434/rc32434.h>
+#include <asm/mach-rc32434/dma.h>
+#include <asm/mach-rc32434/dma_v.h>
+#include <asm/mach-rc32434/eth.h>
+#include <asm/mach-rc32434/rb.h>
+#include <asm/mach-rc32434/integ.h>
+#include <asm/mach-rc32434/gpio.h>
+
+#define ETH0_DMA_RX_IRQ   	(GROUP1_IRQ_BASE + 0)
+#define ETH0_DMA_TX_IRQ   	(GROUP1_IRQ_BASE + 1)
+#define ETH0_RX_OVR_IRQ   	(GROUP3_IRQ_BASE + 9)
+#define ETH0_TX_UND_IRQ   	(GROUP3_IRQ_BASE + 10)
+
+#define ETH0_RX_DMA_ADDR  (DMA0_BASE_ADDR + 0 * DMA_CHAN_OFFSET)
+#define ETH0_TX_DMA_ADDR  (DMA0_BASE_ADDR + 1 * DMA_CHAN_OFFSET)
+
+/* NAND definitions */
+#define GPIO_RDY (1 << 0x08)
+#define GPIO_WPX (1 << 0x09)
+#define GPIO_ALE (1 << 0x0a)
+#define GPIO_CLE (1 << 0x0b)
+
+extern char *board_type;
+
+static struct resource korina_dev0_res[] = {
+	{
+		.name = "korina_regs",
+		.start = ETH0_BASE_ADDR,
+		.end = ETH0_BASE_ADDR + sizeof(struct eth_regs),
+		.flags = IORESOURCE_MEM,
+	 }, {
+		.name = "korina_rx",
+		.start = ETH0_DMA_RX_IRQ,
+		.end = ETH0_DMA_RX_IRQ,
+		.flags = IORESOURCE_IRQ
+	}, {
+		.name = "korina_tx",
+		.start = ETH0_DMA_TX_IRQ,
+		.end = ETH0_DMA_TX_IRQ,
+		.flags = IORESOURCE_IRQ
+	}, {
+		.name = "korina_ovr",
+		.start = ETH0_RX_OVR_IRQ,
+		.end = ETH0_RX_OVR_IRQ,
+		.flags = IORESOURCE_IRQ
+	}, {
+		.name = "korina_und",
+		.start = ETH0_TX_UND_IRQ,
+		.end = ETH0_TX_UND_IRQ,
+		.flags = IORESOURCE_IRQ
+	}, {
+		.name = "korina_dma_rx",
+		.start = ETH0_RX_DMA_ADDR,
+		.end = ETH0_RX_DMA_ADDR + DMA_CHAN_OFFSET - 1,
+		.flags = IORESOURCE_MEM,
+	 }, {
+		.name = "korina_dma_tx",
+		.start = ETH0_TX_DMA_ADDR,
+		.end = ETH0_TX_DMA_ADDR + DMA_CHAN_OFFSET - 1,
+		.flags = IORESOURCE_MEM,
+	 }
+};
+
+static struct korina_device korina_dev0_data = {
+	.name = "korina0",
+	.mac = {0xde, 0xca, 0xff, 0xc0, 0xff, 0xee}
+};
+
+static struct platform_device korina_dev0 = {
+	.id = 0,
+	.name = "korina",
+	.dev.platform_data = &korina_dev0_data,
+	.resource = korina_dev0_res,
+	.num_resources = ARRAY_SIZE(korina_dev0_res),
+};
+
+#define CF_GPIO_NUM 13
+
+static struct resource cf_slot0_res[] = {
+	{
+		.name = "cf_membase",
+		.flags = IORESOURCE_MEM
+	}, {
+		.name = "cf_irq",
+		.start = (8 + 4 * 32 + CF_GPIO_NUM),	/* 149 */
+		.end = (8 + 4 * 32 + CF_GPIO_NUM),
+		.flags = IORESOURCE_IRQ
+	}
+};
+
+static struct cf_device cf_slot0_data = {
+	.gpio_pin = 13
+};
+
+static struct platform_device cf_slot0 = {
+	.id = 0,
+	.name = "pata-rb532-cf",
+	.dev.platform_data = &cf_slot0_data,
+	.resource = cf_slot0_res,
+	.num_resources = ARRAY_SIZE(cf_slot0_res),
+};
+
+/* Resources and device for NAND */
+static int rb532_dev_ready(struct mtd_info *mtd)
+{
+	return readl(IDT434_REG_BASE + GPIOD) & GPIO_RDY;
+}
+
+static void rb532_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+{
+	struct nand_chip *chip = mtd->priv;
+	unsigned char orbits, nandbits;
+
+	if (ctrl & NAND_CTRL_CHANGE) {
+		orbits = (ctrl & NAND_CLE) << 1;
+		orbits |= (ctrl & NAND_ALE) >> 1;
+
+		nandbits = (~ctrl & NAND_CLE) << 1;
+		nandbits |= (~ctrl & NAND_ALE) >> 1;
+
+		set_latch_u5(orbits, nandbits);
+	}
+	if (cmd != NAND_CMD_NONE)
+		writeb(cmd, chip->IO_ADDR_W);
+}
+
+static struct resource nand_slot0_res[] = {
+	[0] = {
+		.name = "nand_membase",
+		.flags = IORESOURCE_MEM
+	}
+};
+
+static struct platform_nand_data rb532_nand_data = {
+	.ctrl.dev_ready = rb532_dev_ready,
+	.ctrl.cmd_ctrl	= rb532_cmd_ctrl,
+};
+
+static struct platform_device nand_slot0 = {
+	.name = "gen_nand",
+	.id = -1,
+	.resource = nand_slot0_res,
+	.num_resources = ARRAY_SIZE(nand_slot0_res),
+	.dev.platform_data = &rb532_nand_data,
+};
+
+static struct mtd_partition rb532_partition_info[] = {
+	{
+		.name = "Routerboard NAND boot",
+		.offset = 0,
+		.size = 4 * 1024 * 1024,
+	}, {
+		.name = "rootfs",
+		.offset = MTDPART_OFS_NXTBLK,
+		.size = MTDPART_SIZ_FULL,
+	}
+};
+
+static struct platform_device rb532_led = {
+	.name = "rb532-led",
+	.id = 0,
+};
+
+static struct gpio_keys_button rb532_gpio_btn[] = {
+	{
+		.gpio = 1,
+		.code = BTN_0,
+		.desc = "S1",
+		.active_low = 1,
+	}
+};
+
+static struct gpio_keys_platform_data rb532_gpio_btn_data = {
+	.buttons = rb532_gpio_btn,
+	.nbuttons = ARRAY_SIZE(rb532_gpio_btn),
+};
+
+static struct platform_device rb532_button = {
+	.name 	= "gpio-keys",
+	.id	= -1,
+	.dev	= {
+		.platform_data = &rb532_gpio_btn_data,
+	}
+};
+
+static struct resource rb532_wdt_res[] = {
+	{
+		.name = "rb532_wdt_res",
+		.start = INTEG0_BASE_ADDR,
+		.end = INTEG0_BASE_ADDR + sizeof(struct integ),
+		.flags = IORESOURCE_MEM,
+	}
+};
+
+static struct platform_device rb532_wdt = {
+	.name 		= "rc32434_wdt",
+	.id 		= -1,
+	.resource 	= rb532_wdt_res,
+	.num_resources	= ARRAY_SIZE(rb532_wdt_res),
+};
+
+static struct platform_device *rb532_devs[] = {
+	&korina_dev0,
+	&nand_slot0,
+	&cf_slot0,
+	&rb532_led,
+	&rb532_button,
+	&rb532_wdt
+};
+
+static void __init parse_mac_addr(char *macstr)
+{
+	int i, j;
+	unsigned char result, value;
+
+	for (i = 0; i < 6; i++) {
+		result = 0;
+
+		if (i != 5 && *(macstr + 2) != ':')
+			return;
+
+		for (j = 0; j < 2; j++) {
+			if (isxdigit(*macstr)
+			    && (value =
+				isdigit(*macstr) ? *macstr -
+				'0' : toupper(*macstr) - 'A' + 10) < 16) {
+				result = result * 16 + value;
+				macstr++;
+			} else
+				return;
+		}
+
+		macstr++;
+		korina_dev0_data.mac[i] = result;
+	}
+}
+
+
+/* DEVICE CONTROLLER 1 */
+#define CFG_DC_DEV1 	((void *)0xb8010010)
+#define CFG_DC_DEV2 	((void *)0xb8010020)
+#define CFG_DC_DEVBASE    0x0
+#define CFG_DC_DEVMASK    0x4
+#define CFG_DC_DEVC       0x8
+#define CFG_DC_DEVTC      0xC
+
+/* NAND definitions */
+#define NAND_CHIP_DELAY	25
+
+static void __init rb532_nand_setup(void)
+{
+	switch (mips_machtype) {
+	case MACH_MIKROTIK_RB532A:
+		set_latch_u5(LO_FOFF | LO_CEX,
+				LO_ULED | LO_ALE | LO_CLE | LO_WPX);
+		break;
+	default:
+		set_latch_u5(LO_WPX | LO_FOFF | LO_CEX,
+				LO_ULED | LO_ALE | LO_CLE);
+		break;
+	}
+
+	/* Setup NAND specific settings */
+	rb532_nand_data.chip.nr_chips = 1;
+	rb532_nand_data.chip.nr_partitions = ARRAY_SIZE(rb532_partition_info);
+	rb532_nand_data.chip.partitions = rb532_partition_info;
+	rb532_nand_data.chip.chip_delay = NAND_CHIP_DELAY;
+	rb532_nand_data.chip.options = NAND_NO_AUTOINCR;
+}
+
+
+static int __init plat_setup_devices(void)
+{
+	/* Look for the CF card reader */
+	if (!readl(CFG_DC_DEV1 + CFG_DC_DEVMASK))
+		rb532_devs[1] = NULL;
+	else {
+		cf_slot0_res[0].start =
+		    readl(CFG_DC_DEV1 + CFG_DC_DEVBASE);
+		cf_slot0_res[0].end = cf_slot0_res[0].start + 0x1000;
+	}
+
+	/* Read the NAND resources from the device controller */
+	nand_slot0_res[0].start = readl(CFG_DC_DEV2 + CFG_DC_DEVBASE);
+	nand_slot0_res[0].end = nand_slot0_res[0].start + 0x1000;
+
+	/* Initialise the NAND device */
+	rb532_nand_setup();
+
+	return platform_add_devices(rb532_devs, ARRAY_SIZE(rb532_devs));
+}
+
+static int __init setup_kmac(char *s)
+{
+	printk(KERN_INFO "korina mac = %s\n", s);
+	parse_mac_addr(s);
+	return 0;
+}
+
+__setup("kmac=", setup_kmac);
+
+arch_initcall(plat_setup_devices);
diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c
new file mode 100644
index 00000000000..b2fe82dba0a
--- /dev/null
+++ b/arch/mips/rb532/gpio.c
@@ -0,0 +1,220 @@
+/*
+ *  Miscellaneous functions for IDT EB434 board
+ *
+ *  Copyright 2004 IDT Inc. (rischelp@idt.com)
+ *  Copyright 2006 Phil Sutter <n0-1@freewrt.org>
+ *  Copyright 2007 Florian Fainelli <florian@openwrt.org>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+ *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+
+#include <asm/addrspace.h>
+
+#include <asm/mach-rc32434/rb.h>
+
+struct rb532_gpio_reg __iomem *rb532_gpio_reg0;
+EXPORT_SYMBOL(rb532_gpio_reg0);
+
+struct mpmc_device dev3;
+
+static struct resource rb532_gpio_reg0_res[] = {
+	{
+		.name 	= "gpio_reg0",
+		.start 	= (u32)(IDT434_REG_BASE + GPIOBASE),
+		.end 	= (u32)(IDT434_REG_BASE + GPIOBASE + sizeof(struct rb532_gpio_reg)),
+		.flags 	= IORESOURCE_MEM,
+	}
+};
+
+static struct resource rb532_dev3_ctl_res[] = {
+	{
+		.name	= "dev3_ctl",
+		.start	= (u32)(IDT434_REG_BASE + DEV3BASE),
+		.end	= (u32)(IDT434_REG_BASE + DEV3BASE + sizeof(struct dev_reg)),
+		.flags	= IORESOURCE_MEM,
+	}
+};
+
+void set_434_reg(unsigned reg_offs, unsigned bit, unsigned len, unsigned val)
+{
+	unsigned flags, data;
+	unsigned i = 0;
+
+	spin_lock_irqsave(&dev3.lock, flags);
+
+	data = *(volatile unsigned *) (IDT434_REG_BASE + reg_offs);
+	for (i = 0; i != len; ++i) {
+		if (val & (1 << i))
+			data |= (1 << (i + bit));
+		else
+			data &= ~(1 << (i + bit));
+	}
+	writel(data, (IDT434_REG_BASE + reg_offs));
+
+	spin_unlock_irqrestore(&dev3.lock, flags);
+}
+EXPORT_SYMBOL(set_434_reg);
+
+unsigned get_434_reg(unsigned reg_offs)
+{
+	return readl(IDT434_REG_BASE + reg_offs);
+}
+EXPORT_SYMBOL(get_434_reg);
+
+void set_latch_u5(unsigned char or_mask, unsigned char nand_mask)
+{
+	unsigned flags;
+
+	spin_lock_irqsave(&dev3.lock, flags);
+
+	dev3.state = (dev3.state | or_mask) & ~nand_mask;
+	writel(dev3.state, &dev3.base);
+
+	spin_unlock_irqrestore(&dev3.lock, flags);
+}
+EXPORT_SYMBOL(set_latch_u5);
+
+unsigned char get_latch_u5(void)
+{
+	return dev3.state;
+}
+EXPORT_SYMBOL(get_latch_u5);
+
+int rb532_gpio_get_value(unsigned gpio)
+{
+	return readl(&rb532_gpio_reg0->gpiod) & (1 << gpio);
+}
+EXPORT_SYMBOL(rb532_gpio_get_value);
+
+void rb532_gpio_set_value(unsigned gpio, int value)
+{
+	unsigned tmp;
+
+	tmp = readl(&rb532_gpio_reg0->gpiod) & ~(1 << gpio);
+	if (value)
+		tmp |= 1 << gpio;
+
+	writel(tmp, (void *)&rb532_gpio_reg0->gpiod);
+}
+EXPORT_SYMBOL(rb532_gpio_set_value);
+
+int rb532_gpio_direction_input(unsigned gpio)
+{
+	writel(readl(&rb532_gpio_reg0->gpiocfg) & ~(1 << gpio),
+	       (void *)&rb532_gpio_reg0->gpiocfg);
+
+	return 0;
+}
+EXPORT_SYMBOL(rb532_gpio_direction_input);
+
+int rb532_gpio_direction_output(unsigned gpio, int value)
+{
+	gpio_set_value(gpio, value);
+	writel(readl(&rb532_gpio_reg0->gpiocfg) | (1 << gpio),
+	       (void *)&rb532_gpio_reg0->gpiocfg);
+
+	return 0;
+}
+EXPORT_SYMBOL(rb532_gpio_direction_output);
+
+void rb532_gpio_set_int_level(unsigned gpio, int value)
+{
+	unsigned tmp;
+
+	tmp = readl(&rb532_gpio_reg0->gpioilevel) & ~(1 << gpio);
+	if (value)
+		tmp |= 1 << gpio;
+	writel(tmp, (void *)&rb532_gpio_reg0->gpioilevel);
+}
+EXPORT_SYMBOL(rb532_gpio_set_int_level);
+
+int rb532_gpio_get_int_level(unsigned gpio)
+{
+	return readl(&rb532_gpio_reg0->gpioilevel) & (1 << gpio);
+}
+EXPORT_SYMBOL(rb532_gpio_get_int_level);
+
+void rb532_gpio_set_int_status(unsigned gpio, int value)
+{
+	unsigned tmp;
+
+	tmp = readl(&rb532_gpio_reg0->gpioistat);
+	if (value)
+		tmp |= 1 << gpio;
+	writel(tmp, (void *)&rb532_gpio_reg0->gpioistat);
+}
+EXPORT_SYMBOL(rb532_gpio_set_int_status);
+
+int rb532_gpio_get_int_status(unsigned gpio)
+{
+	return readl(&rb532_gpio_reg0->gpioistat) & (1 << gpio);
+}
+EXPORT_SYMBOL(rb532_gpio_get_int_status);
+
+void rb532_gpio_set_func(unsigned gpio, int value)
+{
+	unsigned tmp;
+
+	tmp = readl(&rb532_gpio_reg0->gpiofunc);
+	if (value)
+		tmp |= 1 << gpio;
+	writel(tmp, (void *)&rb532_gpio_reg0->gpiofunc);
+}
+EXPORT_SYMBOL(rb532_gpio_set_func);
+
+int rb532_gpio_get_func(unsigned gpio)
+{
+	return readl(&rb532_gpio_reg0->gpiofunc) & (1 << gpio);
+}
+EXPORT_SYMBOL(rb532_gpio_get_func);
+
+int __init rb532_gpio_init(void)
+{
+	rb532_gpio_reg0 = ioremap_nocache(rb532_gpio_reg0_res[0].start,
+				rb532_gpio_reg0_res[0].end -
+				rb532_gpio_reg0_res[0].start);
+
+	if (!rb532_gpio_reg0) {
+		printk(KERN_ERR "rb532: cannot remap GPIO register 0\n");
+		return -ENXIO;
+	}
+
+	dev3.base = ioremap_nocache(rb532_dev3_ctl_res[0].start,
+				rb532_dev3_ctl_res[0].end -
+				rb532_dev3_ctl_res[0].start);
+
+	if (!dev3.base) {
+		printk(KERN_ERR "rb532: cannot remap device controller 3\n");
+		return -ENXIO;
+	}
+
+	return 0;
+}
+arch_initcall(rb532_gpio_init);
diff --git a/arch/mips/rb532/irq.c b/arch/mips/rb532/irq.c
new file mode 100644
index 00000000000..c0d0f950caf
--- /dev/null
+++ b/arch/mips/rb532/irq.c
@@ -0,0 +1,209 @@
+/*
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+ *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Copyright 2002 MontaVista Software Inc.
+ * Author: MontaVista Software, Inc.
+ *              stevel@mvista.com or source@mvista.com
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/timex.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/delay.h>
+
+#include <asm/bootinfo.h>
+#include <asm/time.h>
+#include <asm/mipsregs.h>
+#include <asm/system.h>
+
+#include <asm/mach-rc32434/rc32434.h>
+
+struct intr_group {
+	u32 mask;	/* mask of valid bits in pending/mask registers */
+	volatile u32 *base_addr;
+};
+
+#define RC32434_NR_IRQS  (GROUP4_IRQ_BASE + 32)
+
+#if (NR_IRQS < RC32434_NR_IRQS)
+#error Too little irqs defined. Did you override <asm/irq.h> ?
+#endif
+
+static const struct intr_group intr_group[NUM_INTR_GROUPS] = {
+	{
+		.mask	= 0x0000efff,
+		.base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 0 * IC_GROUP_OFFSET)},
+	{
+		.mask	= 0x00001fff,
+		.base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 1 * IC_GROUP_OFFSET)},
+	{
+		.mask	= 0x00000007,
+		.base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 2 * IC_GROUP_OFFSET)},
+	{
+		.mask	= 0x0003ffff,
+		.base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 3 * IC_GROUP_OFFSET)},
+	{
+		.mask	= 0xffffffff,
+		.base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 4 * IC_GROUP_OFFSET)}
+};
+
+#define READ_PEND(base) (*(base))
+#define READ_MASK(base) (*(base + 2))
+#define WRITE_MASK(base, val) (*(base + 2) = (val))
+
+static inline int irq_to_group(unsigned int irq_nr)
+{
+	return (irq_nr - GROUP0_IRQ_BASE) >> 5;
+}
+
+static inline int group_to_ip(unsigned int group)
+{
+	return group + 2;
+}
+
+static inline void enable_local_irq(unsigned int ip)
+{
+	int ipnum = 0x100 << ip;
+
+	set_c0_status(ipnum);
+}
+
+static inline void disable_local_irq(unsigned int ip)
+{
+	int ipnum = 0x100 << ip;
+
+	clear_c0_status(ipnum);
+}
+
+static inline void ack_local_irq(unsigned int ip)
+{
+	int ipnum = 0x100 << ip;
+
+	clear_c0_cause(ipnum);
+}
+
+static void rb532_enable_irq(unsigned int irq_nr)
+{
+	int ip = irq_nr - GROUP0_IRQ_BASE;
+	unsigned int group, intr_bit;
+	volatile unsigned int *addr;
+
+	if (ip < 0)
+		enable_local_irq(irq_nr);
+	else {
+		group = ip >> 5;
+
+		ip &= (1 << 5) - 1;
+		intr_bit = 1 << ip;
+
+		enable_local_irq(group_to_ip(group));
+
+		addr = intr_group[group].base_addr;
+		WRITE_MASK(addr, READ_MASK(addr) & ~intr_bit);
+	}
+}
+
+static void rb532_disable_irq(unsigned int irq_nr)
+{
+	int ip = irq_nr - GROUP0_IRQ_BASE;
+	unsigned int group, intr_bit, mask;
+	volatile unsigned int *addr;
+
+	if (ip < 0) {
+		disable_local_irq(irq_nr);
+	} else {
+		group = ip >> 5;
+
+		ip &= (1 << 5) - 1;
+		intr_bit = 1 << ip;
+		addr = intr_group[group].base_addr;
+		mask = READ_MASK(addr);
+		mask |= intr_bit;
+		WRITE_MASK(addr, mask);
+
+		/*
+		 * if there are no more interrupts enabled in this
+		 * group, disable corresponding IP
+		 */
+		if (mask == intr_group[group].mask)
+			disable_local_irq(group_to_ip(group));
+	}
+}
+
+static void rb532_mask_and_ack_irq(unsigned int irq_nr)
+{
+	rb532_disable_irq(irq_nr);
+	ack_local_irq(group_to_ip(irq_to_group(irq_nr)));
+}
+
+static struct irq_chip rc32434_irq_type = {
+	.name		= "RB532",
+	.ack		= rb532_disable_irq,
+	.mask		= rb532_disable_irq,
+	.mask_ack	= rb532_mask_and_ack_irq,
+	.unmask		= rb532_enable_irq,
+};
+
+void __init arch_init_irq(void)
+{
+	int i;
+
+	pr_info("Initializing IRQ's: %d out of %d\n", RC32434_NR_IRQS, NR_IRQS);
+
+	for (i = 0; i < RC32434_NR_IRQS; i++)
+		set_irq_chip_and_handler(i,  &rc32434_irq_type,
+					handle_level_irq);
+}
+
+/* Main Interrupt dispatcher */
+asmlinkage void plat_irq_dispatch(void)
+{
+	unsigned int ip, pend, group;
+	volatile unsigned int *addr;
+	unsigned int cp0_cause = read_c0_cause() & read_c0_status();
+
+	if (cp0_cause & CAUSEF_IP7) {
+		do_IRQ(7);
+	} else {
+		ip = (cp0_cause & 0x7c00);
+		if (ip) {
+			group = 21 + (fls(ip) - 32);
+
+			addr = intr_group[group].base_addr;
+
+			pend = READ_PEND(addr);
+			pend &= ~READ_MASK(addr);	/* only unmasked interrupts */
+			pend = 39 + (fls(pend) - 32);
+			do_IRQ((group << 5) + pend);
+		}
+	}
+}
diff --git a/arch/mips/rb532/prom.c b/arch/mips/rb532/prom.c
new file mode 100644
index 00000000000..1bc0af8febf
--- /dev/null
+++ b/arch/mips/rb532/prom.c
@@ -0,0 +1,158 @@
+/*
+ *  RouterBoard 500 specific prom routines
+ *
+ *  Copyright (C) 2003, Peter Sadik <peter.sadik@idt.com>
+ *  Copyright (C) 2005-2006, P.Christeas <p_christ@hol.gr>
+ *  Copyright (C) 2007, Gabor Juhos <juhosg@openwrt.org>
+ *			Felix Fietkau <nbd@openwrt.org>
+ *			Florian Fainelli <florian@openwrt.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the
+ *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ *  Boston, MA  02110-1301, USA.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/console.h>
+#include <linux/bootmem.h>
+#include <linux/ioport.h>
+#include <linux/blkdev.h>
+
+#include <asm/bootinfo.h>
+#include <asm/mach-rc32434/ddr.h>
+#include <asm/mach-rc32434/prom.h>
+
+extern void __init setup_serial_port(void);
+
+unsigned int idt_cpu_freq = 132000000;
+EXPORT_SYMBOL(idt_cpu_freq);
+unsigned int gpio_bootup_state;
+EXPORT_SYMBOL(gpio_bootup_state);
+
+static struct resource ddr_reg[] = {
+	{
+		.name = "ddr-reg",
+		.start = DDR0_PHYS_ADDR,
+		.end = DDR0_PHYS_ADDR + sizeof(struct ddr_ram),
+		.flags = IORESOURCE_MEM,
+	}
+};
+
+void __init prom_free_prom_memory(void)
+{
+	/* No prom memory to free */
+}
+
+static inline int match_tag(char *arg, const char *tag)
+{
+	return strncmp(arg, tag, strlen(tag)) == 0;
+}
+
+static inline unsigned long tag2ul(char *arg, const char *tag)
+{
+	char *num;
+
+	num = arg + strlen(tag);
+	return simple_strtoul(num, 0, 10);
+}
+
+void __init prom_setup_cmdline(void)
+{
+	char cmd_line[CL_SIZE];
+	char *cp, *board;
+	int prom_argc;
+	char **prom_argv, **prom_envp;
+	int i;
+
+	prom_argc = fw_arg0;
+	prom_argv = (char **) fw_arg1;
+	prom_envp = (char **) fw_arg2;
+
+	cp = cmd_line;
+		/* Note: it is common that parameters start
+		 * at argv[1] and not argv[0],
+		 * however, our elf loader starts at [0] */
+	for (i = 0; i < prom_argc; i++) {
+		if (match_tag(prom_argv[i], FREQ_TAG)) {
+			idt_cpu_freq = tag2ul(prom_argv[i], FREQ_TAG);
+			continue;
+		}
+#ifdef IGNORE_CMDLINE_MEM
+		/* parses out the "mem=xx" arg */
+		if (match_tag(prom_argv[i], MEM_TAG))
+			continue;
+#endif
+		if (i > 0)
+			*(cp++) = ' ';
+		if (match_tag(prom_argv[i], BOARD_TAG)) {
+			board = prom_argv[i] + strlen(BOARD_TAG);
+
+			if (match_tag(board, BOARD_RB532A))
+				mips_machtype = MACH_MIKROTIK_RB532A;
+			else
+				mips_machtype = MACH_MIKROTIK_RB532;
+		}
+
+		if (match_tag(prom_argv[i], GPIO_TAG))
+			gpio_bootup_state = tag2ul(prom_argv[i], GPIO_TAG);
+
+		strcpy(cp, prom_argv[i]);
+		cp += strlen(prom_argv[i]);
+	}
+	*(cp++) = ' ';
+
+	i = strlen(arcs_cmdline);
+	if (i > 0) {
+		*(cp++) = ' ';
+		strcpy(cp, arcs_cmdline);
+		cp += strlen(arcs_cmdline);
+	}
+	if (gpio_bootup_state & 0x02)
+		strcpy(cp, GPIO_INIT_NOBUTTON);
+	else
+		strcpy(cp, GPIO_INIT_BUTTON);
+
+	cmd_line[CL_SIZE-1] = '\0';
+
+	strcpy(arcs_cmdline, cmd_line);
+}
+
+void __init prom_init(void)
+{
+	struct ddr_ram __iomem *ddr;
+	phys_t memsize;
+	phys_t ddrbase;
+
+	ddr = ioremap_nocache(ddr_reg[0].start,
+			ddr_reg[0].end - ddr_reg[0].start);
+
+	if (!ddr) {
+		printk(KERN_ERR "Unable to remap DDR register\n");
+		return;
+	}
+
+	ddrbase = (phys_t)&ddr->ddrbase;
+	memsize = (phys_t)&ddr->ddrmask;
+	memsize = 0 - memsize;
+
+	prom_setup_cmdline();
+
+	/* give all RAM to boot allocator,
+	 * except for the first 0x400 and the last 0x200 bytes */
+	add_memory_region(ddrbase + 0x400, memsize - 0x600, BOOT_MEM_RAM);
+}
diff --git a/arch/mips/rb532/serial.c b/arch/mips/rb532/serial.c
new file mode 100644
index 00000000000..1a05b5ddee0
--- /dev/null
+++ b/arch/mips/rb532/serial.c
@@ -0,0 +1,53 @@
+/*
+ *  BRIEF MODULE DESCRIPTION
+ *     Serial port initialisation.
+ *
+ *  Copyright 2004 IDT Inc. (rischelp@idt.com)
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+ *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/serial_core.h>
+#include <linux/serial_8250.h>
+
+#include <asm/serial.h>
+#include <asm/mach-rc32434/rc32434.h>
+
+extern unsigned int idt_cpu_freq;
+
+static struct uart_port rb532_uart = {
+	.type = PORT_16550A,
+	.line = 0,
+	.irq = RC32434_UART0_IRQ,
+	.iotype = UPIO_MEM,
+	.membase = (char *)KSEG1ADDR(RC32434_UART0_BASE),
+	.regshift = 2
+};
+
+int __init setup_serial_port(void)
+{
+	rb532_uart.uartclk = idt_cpu_freq;
+
+	return early_serial_setup(&rb532_uart);
+}
+arch_initcall(setup_serial_port);
diff --git a/arch/mips/rb532/setup.c b/arch/mips/rb532/setup.c
new file mode 100644
index 00000000000..7aafa95ac20
--- /dev/null
+++ b/arch/mips/rb532/setup.c
@@ -0,0 +1,79 @@
+/*
+ * setup.c - boot time setup code
+ */
+
+#include <linux/init.h>
+
+#include <asm/bootinfo.h>
+#include <asm/reboot.h>
+#include <asm/time.h>
+#include <linux/ioport.h>
+
+#include <asm/mach-rc32434/rc32434.h>
+#include <asm/mach-rc32434/pci.h>
+
+struct pci_reg __iomem *pci_reg;
+EXPORT_SYMBOL(pci_reg);
+
+static struct resource pci0_res[] = {
+	{
+		.name = "pci_reg0",
+		.start = PCI0_BASE_ADDR,
+		.end = PCI0_BASE_ADDR + sizeof(struct pci_reg),
+		.flags = IORESOURCE_MEM,
+	}
+};
+
+static void rb_machine_restart(char *command)
+{
+	/* just jump to the reset vector */
+	writel(0x80000001, (void *)KSEG1ADDR(RC32434_REG_BASE + RC32434_RST));
+	((void (*)(void)) KSEG1ADDR(0x1FC00000u))();
+}
+
+static void rb_machine_halt(void)
+{
+	for (;;)
+		continue;
+}
+
+void __init plat_mem_setup(void)
+{
+	u32 val;
+
+	_machine_restart = rb_machine_restart;
+	_machine_halt = rb_machine_halt;
+	pm_power_off = rb_machine_halt;
+
+	set_io_port_base(KSEG1);
+
+	pci_reg = ioremap_nocache(pci0_res[0].start,
+				pci0_res[0].end - pci0_res[0].start);
+	if (!pci_reg) {
+		printk(KERN_ERR "Could not remap PCI registers\n");
+		return;
+	}
+
+	val = __raw_readl(&pci_reg->pcic);
+	val &= 0xFFFFFF7;
+	__raw_writel(val, (void *)&pci_reg->pcic);
+
+#ifdef CONFIG_PCI
+	/* Enable PCI interrupts in EPLD Mask register */
+	*epld_mask = 0x0;
+	*(epld_mask + 1) = 0x0;
+#endif
+	write_c0_wired(0);
+}
+
+const char *get_system_type(void)
+{
+	switch (mips_machtype) {
+	case MACH_MIKROTIK_RB532A:
+		return "Mikrotik RB532A";
+		break;
+	default:
+		return "Mikrotik RB532";
+		break;
+	}
+}
diff --git a/arch/mips/rb532/time.c b/arch/mips/rb532/time.c
new file mode 100644
index 00000000000..db74edf8cef
--- /dev/null
+++ b/arch/mips/rb532/time.c
@@ -0,0 +1,67 @@
+/*
+ * Carsten Langgaard, carstenl@mips.com
+ * Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ *  Setting up the clock on the MIPS boards.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel_stat.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/mc146818rtc.h>
+#include <linux/irq.h>
+#include <linux/timex.h>
+
+#include <asm/mipsregs.h>
+#include <asm/debug.h>
+#include <asm/time.h>
+#include <asm/mach-rc32434/rc32434.h>
+
+extern unsigned int idt_cpu_freq;
+
+/*
+ * Figure out the r4k offset, the amount to increment the compare
+ * register for each time tick. There is no RTC available.
+ *
+ * The RC32434 counts at half the CPU *core* speed.
+ */
+static unsigned long __init cal_r4koff(void)
+{
+	mips_hpt_frequency = idt_cpu_freq * IDT_CLOCK_MULT / 2;
+
+	return mips_hpt_frequency / HZ;
+}
+
+void __init plat_time_init(void)
+{
+	unsigned int est_freq, flags;
+	unsigned long r4k_offset;
+
+	local_irq_save(flags);
+
+	printk(KERN_INFO "calculating r4koff... ");
+	r4k_offset = cal_r4koff();
+	printk("%08lx(%d)\n", r4k_offset, (int) r4k_offset);
+
+	est_freq = 2 * r4k_offset * HZ;
+	est_freq += 5000;	/* round */
+	est_freq -= est_freq % 10000;
+	printk(KERN_INFO "CPU frequency %d.%02d MHz\n", est_freq / 1000000,
+	       (est_freq % 1000000) * 100 / 1000000);
+	local_irq_restore(flags);
+}
-- 
cgit v1.2.3


From 8d795f2a5cf73338a467ac82bdeb73225e987c45 Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Fri, 18 Jul 2008 00:43:48 +0900
Subject: [MIPS] TXx9: Miscellaneous build fixes

* Fix build if only RBTX4927 or RBTX4938 was selected.
* Move gpio helpers to generic part.
* Select SOC_TX4938 for RBTX4927/37 board.
* Fix parent of rbtx4938_fpga_resource.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/txx9/Kconfig          |  2 ++
 arch/mips/txx9/generic/setup.c  | 20 ++++++++++++++++++++
 arch/mips/txx9/rbtx4938/setup.c | 14 +-------------
 3 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/txx9/Kconfig b/arch/mips/txx9/Kconfig
index b92a134ef12..6de4c5aa92b 100644
--- a/arch/mips/txx9/Kconfig
+++ b/arch/mips/txx9/Kconfig
@@ -7,6 +7,8 @@ config TOSHIBA_RBTX4927
 	bool "Toshiba RBTX49[23]7 board"
 	depends on MACH_TX49XX
 	select SOC_TX4927
+	# TX4937 is subset of TX4938
+	select SOC_TX4938
 	help
 	  This Toshiba board is based on the TX4927 processor. Say Y here to
 	  support this machine type
diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c
index 5afc5d5cab0..8caef07701b 100644
--- a/arch/mips/txx9/generic/setup.c
+++ b/arch/mips/txx9/generic/setup.c
@@ -94,6 +94,22 @@ void clk_put(struct clk *clk)
 }
 EXPORT_SYMBOL(clk_put);
 
+/* GPIO support */
+
+#ifdef CONFIG_GENERIC_GPIO
+int gpio_to_irq(unsigned gpio)
+{
+	return -EINVAL;
+}
+EXPORT_SYMBOL(gpio_to_irq);
+
+int irq_to_gpio(unsigned irq)
+{
+	return -EINVAL;
+}
+EXPORT_SYMBOL(irq_to_gpio);
+#endif
+
 extern struct txx9_board_vec jmr3927_vec;
 extern struct txx9_board_vec rbtx4927_vec;
 extern struct txx9_board_vec rbtx4937_vec;
@@ -126,15 +142,19 @@ void __init prom_init(void)
 #endif
 #ifdef CONFIG_CPU_TX49XX
 	switch (TX4938_REV_PCODE()) {
+#ifdef CONFIG_TOSHIBA_RBTX4927
 	case 0x4927:
 		txx9_board_vec = &rbtx4927_vec;
 		break;
 	case 0x4937:
 		txx9_board_vec = &rbtx4937_vec;
 		break;
+#endif
+#ifdef CONFIG_TOSHIBA_RBTX4938
 	case 0x4938:
 		txx9_board_vec = &rbtx4938_vec;
 		break;
+#endif
 	}
 #endif
 
diff --git a/arch/mips/txx9/rbtx4938/setup.c b/arch/mips/txx9/rbtx4938/setup.c
index aaa987ae0f8..c2da92396b7 100644
--- a/arch/mips/txx9/rbtx4938/setup.c
+++ b/arch/mips/txx9/rbtx4938/setup.c
@@ -457,7 +457,7 @@ static void __init rbtx4938_mem_setup(void)
 	rbtx4938_fpga_resource.start = CPHYSADDR(RBTX4938_FPGA_REG_ADDR);
 	rbtx4938_fpga_resource.end = CPHYSADDR(RBTX4938_FPGA_REG_ADDR) + 0xffff;
 	rbtx4938_fpga_resource.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-	if (request_resource(&iomem_resource, &rbtx4938_fpga_resource))
+	if (request_resource(&txx9_ce_res[2], &rbtx4938_fpga_resource))
 		printk("request resource for fpga failed\n");
 
 	_machine_restart = rbtx4938_machine_restart;
@@ -488,18 +488,6 @@ static int __init rbtx4938_ne_init(void)
 	return IS_ERR(dev) ? PTR_ERR(dev) : 0;
 }
 
-/* GPIO support */
-
-int gpio_to_irq(unsigned gpio)
-{
-	return -EINVAL;
-}
-
-int irq_to_gpio(unsigned irq)
-{
-	return -EINVAL;
-}
-
 static DEFINE_SPINLOCK(rbtx4938_spi_gpio_lock);
 
 static void rbtx4938_spi_gpio_set(struct gpio_chip *chip, unsigned int offset,
-- 
cgit v1.2.3


From fc22c3571c86cc36f4eb29336ce40c04a666ee98 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 16 Jul 2008 19:25:40 +0300
Subject: [MIPS] mips/sgi-ip22/ip28-berr.c: fix the build

Commit 3e6ea3b0d7a93550a93a265e732413d3a5aaf0d2 (linux-mips.org) /
52f4f6bbcff5510f662a002ec1219660ea25af62 (kernel.org)
([MIPS] Use kernel-supplied ARRAY_SIZE() macro.)
causes the following compile error:

<--  snip  -->

...
  CC      arch/mips/sgi-ip22/ip28-berr.o
/home/bunk/linux/kernel-2.6/git/linux-2.6/arch/mips/sgi-ip22/ip28-berr.c: In function 'ip28_be_interrupt':
/home/bunk/linux/kernel-2.6/git/linux-2.6/arch/mips/sgi-ip22/ip28-berr.c:415: error: subscripted value is neither array nor pointer
/home/bunk/linux/kernel-2.6/git/linux-2.6/arch/mips/sgi-ip22/ip28-berr.c:415: error: subscripted value is neither array nor pointer
/home/bunk/linux/kernel-2.6/git/linux-2.6/arch/mips/sgi-ip22/ip28-berr.c:415: warning: type defaults to 'int' in declaration of 'type name'
/home/bunk/linux/kernel-2.6/git/linux-2.6/arch/mips/sgi-ip22/ip28-berr.c:424: error: subscripted value is neither array nor pointer
/home/bunk/linux/kernel-2.6/git/linux-2.6/arch/mips/sgi-ip22/ip28-berr.c:424: error: subscripted value is neither array nor pointer
/home/bunk/linux/kernel-2.6/git/linux-2.6/arch/mips/sgi-ip22/ip28-berr.c:424: warning: type defaults to 'int' in declaration of 'type name'
make[2]: *** [arch/mips/sgi-ip22/ip28-berr.o] Error 1

<--  snip  -->

Using ARRAY_SIZE in these places in arch/mips/sgi-ip22/ip28-berr.c was
bogus, and therefore gets reverted by this patch.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/sgi-ip22/ip28-berr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c
index fee7a2e0e53..30e12e2ec4b 100644
--- a/arch/mips/sgi-ip22/ip28-berr.c
+++ b/arch/mips/sgi-ip22/ip28-berr.c
@@ -412,7 +412,7 @@ static int ip28_be_interrupt(const struct pt_regs *regs)
 	 * Now we have an asynchronous bus error, speculatively or DMA caused.
 	 * Need to search all DMA descriptors for the error address.
 	 */
-	for (i = 0; i < ARRAY_SIZE(hpc3); ++i) {
+	for (i = 0; i < sizeof(hpc3)/sizeof(struct hpc3_stat); ++i) {
 		struct hpc3_stat *hp = (struct hpc3_stat *)&hpc3 + i;
 		if ((cpu_err_stat & CPU_ERRMASK) &&
 		    (cpu_err_addr == hp->ndptr || cpu_err_addr == hp->cbp))
@@ -421,7 +421,7 @@ static int ip28_be_interrupt(const struct pt_regs *regs)
 		    (gio_err_addr == hp->ndptr || gio_err_addr == hp->cbp))
 			break;
 	}
-	if (i < ARRAY_SIZE(hpc3)) {
+	if (i < sizeof(hpc3)/sizeof(struct hpc3_stat)) {
 		struct hpc3_stat *hp = (struct hpc3_stat *)&hpc3 + i;
 		printk(KERN_ERR "at DMA addresses: HPC3 @ %08lx:"
 		       " ctl %08x, ndp %08x, cbp %08x\n",
-- 
cgit v1.2.3


From b5d5accc7a2eb41f43ef346f3b258ba2f6342a1c Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Date: Fri, 18 Jul 2008 23:03:15 +0900
Subject: [MIPS] Cobalt: Fix I/O port resource range

LCD and buttons don't use I/O port space.

Signed-off-by: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cobalt/setup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/cobalt/setup.c b/arch/mips/cobalt/setup.c
index dd23beb8604..b5164422724 100644
--- a/arch/mips/cobalt/setup.c
+++ b/arch/mips/cobalt/setup.c
@@ -81,8 +81,8 @@ void __init plat_mem_setup(void)
 
 	set_io_port_base(CKSEG1ADDR(GT_DEF_PCI0_IO_BASE));
 
-	/* I/O port resource must include LCD/buttons */
-	ioport_resource.end = 0x0fffffff;
+	/* I/O port resource */
+	ioport_resource.end = 0x01ffffff;
 
 	/* These resources have been reserved by VIA SuperI/O chip. */
 	for (i = 0; i < ARRAY_SIZE(cobalt_reserved_resources); i++)
-- 
cgit v1.2.3


From 255033a9bb900a06c9a7798908ce12557d24fb66 Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Sat, 19 Jul 2008 01:51:41 +0900
Subject: [MIPS] TXx9: Cleanups for 64-bit support

* Unify (and fix) mem_tx4938.c and mem_tx4927.c
* Simplify prom_init
* Kill volatiles and unused definitions for tx4927.h and tx4938.h

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/txx9/generic/Makefile     |   2 +-
 arch/mips/txx9/generic/mem_tx4927.c |  94 +++++----------------------
 arch/mips/txx9/generic/mem_tx4938.c | 124 ------------------------------------
 arch/mips/txx9/rbtx4927/prom.c      |   6 +-
 arch/mips/txx9/rbtx4938/prom.c      |   6 +-
 arch/mips/txx9/rbtx4938/setup.c     |  11 ++--
 6 files changed, 24 insertions(+), 219 deletions(-)
 delete mode 100644 arch/mips/txx9/generic/mem_tx4938.c

(limited to 'arch')

diff --git a/arch/mips/txx9/generic/Makefile b/arch/mips/txx9/generic/Makefile
index 668fdaad644..ab274ede9a7 100644
--- a/arch/mips/txx9/generic/Makefile
+++ b/arch/mips/txx9/generic/Makefile
@@ -5,7 +5,7 @@
 obj-y	+= setup.o
 obj-$(CONFIG_PCI)	+= pci.o
 obj-$(CONFIG_SOC_TX4927)	+= mem_tx4927.o irq_tx4927.o
-obj-$(CONFIG_SOC_TX4938)	+= mem_tx4938.o irq_tx4938.o
+obj-$(CONFIG_SOC_TX4938)	+= mem_tx4927.o irq_tx4938.o
 obj-$(CONFIG_TOSHIBA_FPCIB0)	+= smsc_fdc37m81x.o
 obj-$(CONFIG_KGDB)	+= dbgio.o
 
diff --git a/arch/mips/txx9/generic/mem_tx4927.c b/arch/mips/txx9/generic/mem_tx4927.c
index 12dfc377bf2..ef6ea6e9787 100644
--- a/arch/mips/txx9/generic/mem_tx4927.c
+++ b/arch/mips/txx9/generic/mem_tx4927.c
@@ -1,5 +1,5 @@
 /*
- * linux/arch/mips/tx4927/common/tx4927_prom.c
+ * linux/arch/mips/txx9/generic/mem_tx4927.c
  *
  * common tx4927 memory interface
  *
@@ -32,8 +32,9 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/io.h>
+#include <asm/txx9/tx4927.h>
 
-static unsigned int __init tx4927_process_sdccr(unsigned long addr)
+static unsigned int __init tx4927_process_sdccr(u64 __iomem *addr)
 {
 	u64 val;
 	unsigned int sdccr_ce;
@@ -45,97 +46,32 @@ static unsigned int __init tx4927_process_sdccr(unsigned long addr)
 	unsigned int rs = 0;
 	unsigned int cs = 0;
 	unsigned int mw = 0;
-	unsigned int msize = 0;
 
-	val = __raw_readq((void __iomem *)addr);
+	val = __raw_readq(addr);
 
 	/* MVMCP -- need #defs for these bits masks */
 	sdccr_ce = ((val & (1 << 10)) >> 10);
 	sdccr_bs = ((val & (1 << 8)) >> 8);
 	sdccr_rs = ((val & (3 << 5)) >> 5);
-	sdccr_cs = ((val & (3 << 2)) >> 2);
+	sdccr_cs = ((val & (7 << 2)) >> 2);
 	sdccr_mw = ((val & (1 << 0)) >> 0);
 
 	if (sdccr_ce) {
-		switch (sdccr_bs) {
-		case 0:{
-				bs = 2;
-				break;
-			}
-		case 1:{
-				bs = 4;
-				break;
-			}
-		}
-		switch (sdccr_rs) {
-		case 0:{
-				rs = 2048;
-				break;
-			}
-		case 1:{
-				rs = 4096;
-				break;
-			}
-		case 2:{
-				rs = 8192;
-				break;
-			}
-		case 3:{
-				rs = 0;
-				break;
-			}
-		}
-		switch (sdccr_cs) {
-		case 0:{
-				cs = 256;
-				break;
-			}
-		case 1:{
-				cs = 512;
-				break;
-			}
-		case 2:{
-				cs = 1024;
-				break;
-			}
-		case 3:{
-				cs = 2048;
-				break;
-			}
-		}
-		switch (sdccr_mw) {
-		case 0:{
-				mw = 8;
-				break;
-			}	/* 8 bytes = 64 bits */
-		case 1:{
-				mw = 4;
-				break;
-			}	/* 4 bytes = 32 bits */
-		}
+		bs = 2 << sdccr_bs;
+		rs = 2048 << sdccr_rs;
+		cs = 256 << sdccr_cs;
+		mw = 8 >> sdccr_mw;
 	}
 
-	/*            bytes per chip     MB per chip      num chips */
-	msize = (((rs * cs * mw) / (1024 * 1024)) * bs);
-
-	return (msize);
+	return rs * cs * mw * bs;
 }
 
-
 unsigned int __init tx4927_get_mem_size(void)
 {
-	unsigned int c0;
-	unsigned int c1;
-	unsigned int c2;
-	unsigned int c3;
-	unsigned int total;
-
-	/* MVMCP -- need #defs for these registers */
-	c0 = tx4927_process_sdccr(0xff1f8000);
-	c1 = tx4927_process_sdccr(0xff1f8008);
-	c2 = tx4927_process_sdccr(0xff1f8010);
-	c3 = tx4927_process_sdccr(0xff1f8018);
-	total = c0 + c1 + c2 + c3;
+	unsigned int total = 0;
+	int i;
 
-	return (total);
+	for (i = 0; i < ARRAY_SIZE(tx4927_sdramcptr->cr); i++)
+		total += tx4927_process_sdccr(&tx4927_sdramcptr->cr[i]);
+	return total;
 }
diff --git a/arch/mips/txx9/generic/mem_tx4938.c b/arch/mips/txx9/generic/mem_tx4938.c
deleted file mode 100644
index 20baeaeba4c..00000000000
--- a/arch/mips/txx9/generic/mem_tx4938.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * linux/arch/mips/tx4938/common/prom.c
- *
- * common tx4938 memory interface
- * Copyright (C) 2000-2001 Toshiba Corporation
- *
- * 2003-2005 (c) MontaVista Software, Inc. This file is licensed under the
- * terms of the GNU General Public License version 2. This program is
- * licensed "as is" without any warranty of any kind, whether express
- * or implied.
- *
- * Support for TX4938 in 2.6 - Manish Lachwani (mlachwani@mvista.com)
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/io.h>
-
-static unsigned int __init
-tx4938_process_sdccr(u64 * addr)
-{
-	u64 val;
-	unsigned int sdccr_ce;
-	unsigned int sdccr_rs;
-	unsigned int sdccr_cs;
-	unsigned int sdccr_mw;
-	unsigned int rs = 0;
-	unsigned int cs = 0;
-	unsigned int mw = 0;
-	unsigned int bc = 4;
-	unsigned int msize = 0;
-
-	val = ____raw_readq((void __iomem *)addr);
-
-	/* MVMCP -- need #defs for these bits masks */
-	sdccr_ce = ((val & (1 << 10)) >> 10);
-	sdccr_rs = ((val & (3 << 5)) >> 5);
-	sdccr_cs = ((val & (7 << 2)) >> 2);
-	sdccr_mw = ((val & (1 << 0)) >> 0);
-
-	if (sdccr_ce) {
-		switch (sdccr_rs) {
-		case 0:{
-				rs = 2048;
-				break;
-			}
-		case 1:{
-				rs = 4096;
-				break;
-			}
-		case 2:{
-				rs = 8192;
-				break;
-			}
-		default:{
-				rs = 0;
-				break;
-			}
-		}
-		switch (sdccr_cs) {
-		case 0:{
-				cs = 256;
-				break;
-			}
-		case 1:{
-				cs = 512;
-				break;
-			}
-		case 2:{
-				cs = 1024;
-				break;
-			}
-		case 3:{
-				cs = 2048;
-				break;
-			}
-		case 4:{
-				cs = 4096;
-				break;
-			}
-		default:{
-				cs = 0;
-				break;
-			}
-		}
-		switch (sdccr_mw) {
-		case 0:{
-				mw = 8;
-				break;
-			}	/* 8 bytes = 64 bits */
-		case 1:{
-				mw = 4;
-				break;
-			}	/* 4 bytes = 32 bits */
-		}
-	}
-
-	/*           bytes per chip    MB per chip          bank count */
-	msize = (((rs * cs * mw) / (1024 * 1024)) * (bc));
-
-	/* MVMCP -- bc hard coded to 4 from table 9.3.1     */
-	/*          boad supports bc=2 but no way to detect */
-
-	return (msize);
-}
-
-unsigned int __init
-tx4938_get_mem_size(void)
-{
-	unsigned int c0;
-	unsigned int c1;
-	unsigned int c2;
-	unsigned int c3;
-	unsigned int total;
-
-	/* MVMCP -- need #defs for these registers */
-	c0 = tx4938_process_sdccr((u64 *) 0xff1f8000);
-	c1 = tx4938_process_sdccr((u64 *) 0xff1f8008);
-	c2 = tx4938_process_sdccr((u64 *) 0xff1f8010);
-	c3 = tx4938_process_sdccr((u64 *) 0xff1f8018);
-	total = c0 + c1 + c2 + c3;
-
-	return (total);
-}
diff --git a/arch/mips/txx9/rbtx4927/prom.c b/arch/mips/txx9/rbtx4927/prom.c
index 942e627d2dc..5c0de54ebdd 100644
--- a/arch/mips/txx9/rbtx4927/prom.c
+++ b/arch/mips/txx9/rbtx4927/prom.c
@@ -36,10 +36,6 @@
 
 void __init rbtx4927_prom_init(void)
 {
-	extern int tx4927_get_mem_size(void);
-	int msize;
-
 	prom_init_cmdline();
-	msize = tx4927_get_mem_size();
-	add_memory_region(0, msize << 20, BOOT_MEM_RAM);
+	add_memory_region(0, tx4927_get_mem_size(), BOOT_MEM_RAM);
 }
diff --git a/arch/mips/txx9/rbtx4938/prom.c b/arch/mips/txx9/rbtx4938/prom.c
index fbb37458ddb..ee189519ce5 100644
--- a/arch/mips/txx9/rbtx4938/prom.c
+++ b/arch/mips/txx9/rbtx4938/prom.c
@@ -18,12 +18,8 @@
 
 void __init rbtx4938_prom_init(void)
 {
-	extern int tx4938_get_mem_size(void);
-	int msize;
 #ifndef CONFIG_TX4938_NAND_BOOT
 	prom_init_cmdline();
 #endif
-
-	msize = tx4938_get_mem_size();
-	add_memory_region(0, msize << 20, BOOT_MEM_RAM);
+	add_memory_region(0, tx4938_get_mem_size(), BOOT_MEM_RAM);
 }
diff --git a/arch/mips/txx9/rbtx4938/setup.c b/arch/mips/txx9/rbtx4938/setup.c
index c2da92396b7..c1e076c7b2d 100644
--- a/arch/mips/txx9/rbtx4938/setup.c
+++ b/arch/mips/txx9/rbtx4938/setup.c
@@ -310,7 +310,7 @@ void __init tx4938_board_setup(void)
 
 	printk(KERN_INFO "%s SDRAMC --", txx9_pcode_str);
 	for (i = 0; i < 4; i++) {
-		unsigned long long cr = tx4938_sdramcptr->cr[i];
+		u64 cr = TX4938_SDRAMC_CR(i);
 		unsigned long ram_base, ram_size;
 		if (!((unsigned long)cr & 0x00000400))
 			continue;	/* disabled */
@@ -318,20 +318,21 @@ void __init tx4938_board_setup(void)
 		ram_size = ((unsigned long)(cr >> 33) + 1) << 21;
 		if (ram_base >= 0x20000000)
 			continue;	/* high memory (ignore) */
-		printk(" CR%d:%016Lx", i, cr);
+		printk(KERN_CONT " CR%d:%016llx", i, cr);
 		tx4938_sdram_resource[i].name = "SDRAM";
 		tx4938_sdram_resource[i].start = ram_base;
 		tx4938_sdram_resource[i].end = ram_base + ram_size - 1;
 		tx4938_sdram_resource[i].flags = IORESOURCE_MEM;
 		request_resource(&iomem_resource, &tx4938_sdram_resource[i]);
 	}
-	printk(" TR:%09Lx\n", tx4938_sdramcptr->tr);
+	printk(KERN_CONT " TR:%09llx\n", ____raw_readq(&tx4938_sdramcptr->tr));
 
 	/* SRAM */
-	if (tx4938_sramcptr->cr & 1) {
+	if (____raw_readq(&tx4938_sramcptr->cr) & 1) {
 		unsigned int size = 0x800;
 		unsigned long base =
-			(tx4938_sramcptr->cr >> (39-11)) & ~(size - 1);
+			(____raw_readq(&tx4938_sramcptr->cr) >> (39-11))
+			& ~(size - 1);
 		tx4938_sram_resource.name = "SRAM";
 		tx4938_sram_resource.start = base;
 		tx4938_sram_resource.end = base + size - 1;
-- 
cgit v1.2.3


From 94a4c32939dede9328c6e4face335eb8441fc18d Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Sat, 19 Jul 2008 01:51:47 +0900
Subject: [MIPS] TXx9: Add 64-bit support

SYS_SUPPORTS_64BIT_KERNEL is enabled for RBTX4927/RBTX4938, but
actually it was broken for long time (or from the beginning).  Now it
should work.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/txx9/generic/Makefile       |   4 +-
 arch/mips/txx9/generic/irq_tx4927.c   |   2 +-
 arch/mips/txx9/generic/irq_tx4938.c   |   2 +-
 arch/mips/txx9/generic/setup.c        |  16 ++-
 arch/mips/txx9/generic/setup_tx4927.c | 194 +++++++++++++++++++++++++
 arch/mips/txx9/generic/setup_tx4938.c | 259 ++++++++++++++++++++++++++++++++++
 arch/mips/txx9/jmr3927/setup.c        |   8 --
 arch/mips/txx9/rbtx4927/irq.c         |  12 +-
 arch/mips/txx9/rbtx4927/setup.c       |  89 ++++--------
 arch/mips/txx9/rbtx4938/setup.c       | 218 ++--------------------------
 10 files changed, 510 insertions(+), 294 deletions(-)
 create mode 100644 arch/mips/txx9/generic/setup_tx4927.c
 create mode 100644 arch/mips/txx9/generic/setup_tx4938.c

(limited to 'arch')

diff --git a/arch/mips/txx9/generic/Makefile b/arch/mips/txx9/generic/Makefile
index ab274ede9a7..9c120771e65 100644
--- a/arch/mips/txx9/generic/Makefile
+++ b/arch/mips/txx9/generic/Makefile
@@ -4,8 +4,8 @@
 
 obj-y	+= setup.o
 obj-$(CONFIG_PCI)	+= pci.o
-obj-$(CONFIG_SOC_TX4927)	+= mem_tx4927.o irq_tx4927.o
-obj-$(CONFIG_SOC_TX4938)	+= mem_tx4927.o irq_tx4938.o
+obj-$(CONFIG_SOC_TX4927)	+= mem_tx4927.o setup_tx4927.o irq_tx4927.o
+obj-$(CONFIG_SOC_TX4938)	+= mem_tx4927.o setup_tx4938.o irq_tx4938.o
 obj-$(CONFIG_TOSHIBA_FPCIB0)	+= smsc_fdc37m81x.o
 obj-$(CONFIG_KGDB)	+= dbgio.o
 
diff --git a/arch/mips/txx9/generic/irq_tx4927.c b/arch/mips/txx9/generic/irq_tx4927.c
index 6377bd8a905..cbea1fdde82 100644
--- a/arch/mips/txx9/generic/irq_tx4927.c
+++ b/arch/mips/txx9/generic/irq_tx4927.c
@@ -31,7 +31,7 @@
 void __init tx4927_irq_init(void)
 {
 	mips_cpu_irq_init();
-	txx9_irq_init(TX4927_IRC_REG);
+	txx9_irq_init(TX4927_IRC_REG & 0xfffffffffULL);
 	set_irq_chained_handler(MIPS_CPU_IRQ_BASE + TX4927_IRC_INT,
 				handle_simple_irq);
 }
diff --git a/arch/mips/txx9/generic/irq_tx4938.c b/arch/mips/txx9/generic/irq_tx4938.c
index 5fc86c9c9d2..6eac684bf19 100644
--- a/arch/mips/txx9/generic/irq_tx4938.c
+++ b/arch/mips/txx9/generic/irq_tx4938.c
@@ -19,7 +19,7 @@
 void __init tx4938_irq_init(void)
 {
 	mips_cpu_irq_init();
-	txx9_irq_init(TX4938_IRC_REG);
+	txx9_irq_init(TX4938_IRC_REG & 0xfffffffffULL);
 	set_irq_chained_handler(MIPS_CPU_IRQ_BASE + TX4938_IRC_INT,
 				handle_simple_irq);
 }
diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c
index 8caef07701b..3715a8f5ea4 100644
--- a/arch/mips/txx9/generic/setup.c
+++ b/arch/mips/txx9/generic/setup.c
@@ -30,6 +30,7 @@ struct resource txx9_ce_res[8];
 static char txx9_ce_res_name[8][4];	/* "CEn" */
 
 /* pcode, internal register */
+unsigned int txx9_pcode;
 char txx9_pcode_str[8];
 static struct resource txx9_reg_res = {
 	.name = txx9_pcode_str,
@@ -59,15 +60,16 @@ unsigned int txx9_master_clock;
 unsigned int txx9_cpu_clock;
 unsigned int txx9_gbus_clock;
 
+int txx9_ccfg_toeon __initdata = 1;
 
 /* Minimum CLK support */
 
 struct clk *clk_get(struct device *dev, const char *id)
 {
 	if (!strcmp(id, "spi-baseclk"))
-		return (struct clk *)(txx9_gbus_clock / 2 / 4);
+		return (struct clk *)((unsigned long)txx9_gbus_clock / 2 / 4);
 	if (!strcmp(id, "imbus_clk"))
-		return (struct clk *)(txx9_gbus_clock / 2);
+		return (struct clk *)((unsigned long)txx9_gbus_clock / 2);
 	return ERR_PTR(-ENOENT);
 }
 EXPORT_SYMBOL(clk_get);
@@ -123,6 +125,12 @@ void __init prom_init_cmdline(void)
 	int argc = (int)fw_arg0;
 	char **argv = (char **)fw_arg1;
 	int i;			/* Always ignore the "-c" at argv[0] */
+#ifdef CONFIG_64BIT
+	char *fixed_argv[32];
+	for (i = 0; i < argc; i++)
+		fixed_argv[i] = (char *)(long)(*((__s32 *)argv + i));
+	argv = fixed_argv;
+#endif
 
 	/* ignore all built-in args if any f/w args given */
 	if (argc > 1)
@@ -180,6 +188,10 @@ char * __init prom_getcmdline(void)
 /* wrappers */
 void __init plat_mem_setup(void)
 {
+	ioport_resource.start = 0;
+	ioport_resource.end = ~0UL;	/* no limit */
+	iomem_resource.start = 0;
+	iomem_resource.end = ~0UL;	/* no limit */
 	txx9_board_vec->mem_setup();
 }
 
diff --git a/arch/mips/txx9/generic/setup_tx4927.c b/arch/mips/txx9/generic/setup_tx4927.c
new file mode 100644
index 00000000000..89d6e28add9
--- /dev/null
+++ b/arch/mips/txx9/generic/setup_tx4927.c
@@ -0,0 +1,194 @@
+/*
+ * TX4927 setup routines
+ * Based on linux/arch/mips/txx9/rbtx4938/setup.c,
+ *	    and RBTX49xx patch from CELF patch archive.
+ *
+ * 2003-2005 (c) MontaVista Software, Inc.
+ * (C) Copyright TOSHIBA CORPORATION 2000-2001, 2004-2007
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/serial_core.h>
+#include <linux/param.h>
+#include <asm/txx9irq.h>
+#include <asm/txx9tmr.h>
+#include <asm/txx9pio.h>
+#include <asm/txx9/generic.h>
+#include <asm/txx9/tx4927.h>
+
+void __init tx4927_wdr_init(void)
+{
+	/* clear WatchDogReset (W1C) */
+	tx4927_ccfg_set(TX4927_CCFG_WDRST);
+	/* do reset on watchdog */
+	tx4927_ccfg_set(TX4927_CCFG_WR);
+}
+
+static struct resource tx4927_sdram_resource[4];
+
+void __init tx4927_setup(void)
+{
+	int i;
+	__u32 divmode;
+	int cpuclk = 0;
+	u64 ccfg;
+
+	txx9_reg_res_init(TX4927_REV_PCODE(), TX4927_REG_BASE,
+			  TX4927_REG_SIZE);
+
+	/* SDRAMC,EBUSC are configured by PROM */
+	for (i = 0; i < 8; i++) {
+		if (!(TX4927_EBUSC_CR(i) & 0x8))
+			continue;	/* disabled */
+		txx9_ce_res[i].start = (unsigned long)TX4927_EBUSC_BA(i);
+		txx9_ce_res[i].end =
+			txx9_ce_res[i].start + TX4927_EBUSC_SIZE(i) - 1;
+		request_resource(&iomem_resource, &txx9_ce_res[i]);
+	}
+
+	/* clocks */
+	ccfg = ____raw_readq(&tx4927_ccfgptr->ccfg);
+	if (txx9_master_clock) {
+		/* calculate gbus_clock and cpu_clock from master_clock */
+		divmode = (__u32)ccfg & TX4927_CCFG_DIVMODE_MASK;
+		switch (divmode) {
+		case TX4927_CCFG_DIVMODE_8:
+		case TX4927_CCFG_DIVMODE_10:
+		case TX4927_CCFG_DIVMODE_12:
+		case TX4927_CCFG_DIVMODE_16:
+			txx9_gbus_clock = txx9_master_clock * 4; break;
+		default:
+			txx9_gbus_clock = txx9_master_clock;
+		}
+		switch (divmode) {
+		case TX4927_CCFG_DIVMODE_2:
+		case TX4927_CCFG_DIVMODE_8:
+			cpuclk = txx9_gbus_clock * 2; break;
+		case TX4927_CCFG_DIVMODE_2_5:
+		case TX4927_CCFG_DIVMODE_10:
+			cpuclk = txx9_gbus_clock * 5 / 2; break;
+		case TX4927_CCFG_DIVMODE_3:
+		case TX4927_CCFG_DIVMODE_12:
+			cpuclk = txx9_gbus_clock * 3; break;
+		case TX4927_CCFG_DIVMODE_4:
+		case TX4927_CCFG_DIVMODE_16:
+			cpuclk = txx9_gbus_clock * 4; break;
+		}
+		txx9_cpu_clock = cpuclk;
+	} else {
+		if (txx9_cpu_clock == 0)
+			txx9_cpu_clock = 200000000;	/* 200MHz */
+		/* calculate gbus_clock and master_clock from cpu_clock */
+		cpuclk = txx9_cpu_clock;
+		divmode = (__u32)ccfg & TX4927_CCFG_DIVMODE_MASK;
+		switch (divmode) {
+		case TX4927_CCFG_DIVMODE_2:
+		case TX4927_CCFG_DIVMODE_8:
+			txx9_gbus_clock = cpuclk / 2; break;
+		case TX4927_CCFG_DIVMODE_2_5:
+		case TX4927_CCFG_DIVMODE_10:
+			txx9_gbus_clock = cpuclk * 2 / 5; break;
+		case TX4927_CCFG_DIVMODE_3:
+		case TX4927_CCFG_DIVMODE_12:
+			txx9_gbus_clock = cpuclk / 3; break;
+		case TX4927_CCFG_DIVMODE_4:
+		case TX4927_CCFG_DIVMODE_16:
+			txx9_gbus_clock = cpuclk / 4; break;
+		}
+		switch (divmode) {
+		case TX4927_CCFG_DIVMODE_8:
+		case TX4927_CCFG_DIVMODE_10:
+		case TX4927_CCFG_DIVMODE_12:
+		case TX4927_CCFG_DIVMODE_16:
+			txx9_master_clock = txx9_gbus_clock / 4; break;
+		default:
+			txx9_master_clock = txx9_gbus_clock;
+		}
+	}
+	/* change default value to udelay/mdelay take reasonable time */
+	loops_per_jiffy = txx9_cpu_clock / HZ / 2;
+
+	/* CCFG */
+	tx4927_wdr_init();
+	/* clear BusErrorOnWrite flag (W1C) */
+	tx4927_ccfg_set(TX4927_CCFG_BEOW);
+	/* enable Timeout BusError */
+	if (txx9_ccfg_toeon)
+		tx4927_ccfg_set(TX4927_CCFG_TOE);
+
+	/* DMA selection */
+	txx9_clear64(&tx4927_ccfgptr->pcfg, TX4927_PCFG_DMASEL_ALL);
+
+	/* Use external clock for external arbiter */
+	if (!(____raw_readq(&tx4927_ccfgptr->ccfg) & TX4927_CCFG_PCIARB))
+		txx9_clear64(&tx4927_ccfgptr->pcfg, TX4927_PCFG_PCICLKEN_ALL);
+
+	printk(KERN_INFO "%s -- %dMHz(M%dMHz) CRIR:%08x CCFG:%llx PCFG:%llx\n",
+	       txx9_pcode_str,
+	       (cpuclk + 500000) / 1000000,
+	       (txx9_master_clock + 500000) / 1000000,
+	       (__u32)____raw_readq(&tx4927_ccfgptr->crir),
+	       (unsigned long long)____raw_readq(&tx4927_ccfgptr->ccfg),
+	       (unsigned long long)____raw_readq(&tx4927_ccfgptr->pcfg));
+
+	printk(KERN_INFO "%s SDRAMC --", txx9_pcode_str);
+	for (i = 0; i < 4; i++) {
+		__u64 cr = TX4927_SDRAMC_CR(i);
+		unsigned long base, size;
+		if (!((__u32)cr & 0x00000400))
+			continue;	/* disabled */
+		base = (unsigned long)(cr >> 49) << 21;
+		size = (((unsigned long)(cr >> 33) & 0x7fff) + 1) << 21;
+		printk(" CR%d:%016llx", i, (unsigned long long)cr);
+		tx4927_sdram_resource[i].name = "SDRAM";
+		tx4927_sdram_resource[i].start = base;
+		tx4927_sdram_resource[i].end = base + size - 1;
+		tx4927_sdram_resource[i].flags = IORESOURCE_MEM;
+		request_resource(&iomem_resource, &tx4927_sdram_resource[i]);
+	}
+	printk(" TR:%09llx\n",
+	       (unsigned long long)____raw_readq(&tx4927_sdramcptr->tr));
+
+	/* TMR */
+	/* disable all timers */
+	for (i = 0; i < TX4927_NR_TMR; i++)
+		txx9_tmr_init(TX4927_TMR_REG(i) & 0xfffffffffULL);
+
+	/* PIO */
+	txx9_gpio_init(TX4927_PIO_REG & 0xfffffffffULL, 0, TX4927_NUM_PIO);
+	__raw_writel(0, &tx4927_pioptr->maskcpu);
+	__raw_writel(0, &tx4927_pioptr->maskext);
+}
+
+void __init tx4927_time_init(unsigned int tmrnr)
+{
+	if (____raw_readq(&tx4927_ccfgptr->ccfg) & TX4927_CCFG_TINTDIS)
+		txx9_clockevent_init(TX4927_TMR_REG(tmrnr) & 0xfffffffffULL,
+				     TXX9_IRQ_BASE + TX4927_IR_TMR(tmrnr),
+				     TXX9_IMCLK);
+}
+
+void __init tx4927_setup_serial(void)
+{
+#ifdef CONFIG_SERIAL_TXX9
+	int i;
+	struct uart_port req;
+
+	for (i = 0; i < 2; i++) {
+		memset(&req, 0, sizeof(req));
+		req.line = i;
+		req.iotype = UPIO_MEM;
+		req.membase = (unsigned char __iomem *)TX4927_SIO_REG(i);
+		req.mapbase = TX4927_SIO_REG(i) & 0xfffffffffULL;
+		req.irq = TXX9_IRQ_BASE + TX4927_IR_SIO(i);
+		req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/;
+		req.uartclk = TXX9_IMCLK;
+		early_serial_txx9_setup(&req);
+	}
+#endif /* CONFIG_SERIAL_TXX9 */
+}
diff --git a/arch/mips/txx9/generic/setup_tx4938.c b/arch/mips/txx9/generic/setup_tx4938.c
new file mode 100644
index 00000000000..317378d8579
--- /dev/null
+++ b/arch/mips/txx9/generic/setup_tx4938.c
@@ -0,0 +1,259 @@
+/*
+ * TX4938/4937 setup routines
+ * Based on linux/arch/mips/txx9/rbtx4938/setup.c,
+ *	    and RBTX49xx patch from CELF patch archive.
+ *
+ * 2003-2005 (c) MontaVista Software, Inc.
+ * (C) Copyright TOSHIBA CORPORATION 2000-2001, 2004-2007
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/serial_core.h>
+#include <linux/param.h>
+#include <asm/txx9irq.h>
+#include <asm/txx9tmr.h>
+#include <asm/txx9pio.h>
+#include <asm/txx9/generic.h>
+#include <asm/txx9/tx4938.h>
+
+void __init tx4938_wdr_init(void)
+{
+	/* clear WatchDogReset (W1C) */
+	tx4938_ccfg_set(TX4938_CCFG_WDRST);
+	/* do reset on watchdog */
+	tx4938_ccfg_set(TX4938_CCFG_WR);
+}
+
+static struct resource tx4938_sdram_resource[4];
+static struct resource tx4938_sram_resource;
+
+#define TX4938_SRAM_SIZE 0x800
+
+void __init tx4938_setup(void)
+{
+	int i;
+	__u32 divmode;
+	int cpuclk = 0;
+	u64 ccfg;
+
+	txx9_reg_res_init(TX4938_REV_PCODE(), TX4938_REG_BASE,
+			  TX4938_REG_SIZE);
+
+	/* SDRAMC,EBUSC are configured by PROM */
+	for (i = 0; i < 8; i++) {
+		if (!(TX4938_EBUSC_CR(i) & 0x8))
+			continue;	/* disabled */
+		txx9_ce_res[i].start = (unsigned long)TX4938_EBUSC_BA(i);
+		txx9_ce_res[i].end =
+			txx9_ce_res[i].start + TX4938_EBUSC_SIZE(i) - 1;
+		request_resource(&iomem_resource, &txx9_ce_res[i]);
+	}
+
+	/* clocks */
+	ccfg = ____raw_readq(&tx4938_ccfgptr->ccfg);
+	if (txx9_master_clock) {
+		/* calculate gbus_clock and cpu_clock from master_clock */
+		divmode = (__u32)ccfg & TX4938_CCFG_DIVMODE_MASK;
+		switch (divmode) {
+		case TX4938_CCFG_DIVMODE_8:
+		case TX4938_CCFG_DIVMODE_10:
+		case TX4938_CCFG_DIVMODE_12:
+		case TX4938_CCFG_DIVMODE_16:
+		case TX4938_CCFG_DIVMODE_18:
+			txx9_gbus_clock = txx9_master_clock * 4; break;
+		default:
+			txx9_gbus_clock = txx9_master_clock;
+		}
+		switch (divmode) {
+		case TX4938_CCFG_DIVMODE_2:
+		case TX4938_CCFG_DIVMODE_8:
+			cpuclk = txx9_gbus_clock * 2; break;
+		case TX4938_CCFG_DIVMODE_2_5:
+		case TX4938_CCFG_DIVMODE_10:
+			cpuclk = txx9_gbus_clock * 5 / 2; break;
+		case TX4938_CCFG_DIVMODE_3:
+		case TX4938_CCFG_DIVMODE_12:
+			cpuclk = txx9_gbus_clock * 3; break;
+		case TX4938_CCFG_DIVMODE_4:
+		case TX4938_CCFG_DIVMODE_16:
+			cpuclk = txx9_gbus_clock * 4; break;
+		case TX4938_CCFG_DIVMODE_4_5:
+		case TX4938_CCFG_DIVMODE_18:
+			cpuclk = txx9_gbus_clock * 9 / 2; break;
+		}
+		txx9_cpu_clock = cpuclk;
+	} else {
+		if (txx9_cpu_clock == 0)
+			txx9_cpu_clock = 300000000;	/* 300MHz */
+		/* calculate gbus_clock and master_clock from cpu_clock */
+		cpuclk = txx9_cpu_clock;
+		divmode = (__u32)ccfg & TX4938_CCFG_DIVMODE_MASK;
+		switch (divmode) {
+		case TX4938_CCFG_DIVMODE_2:
+		case TX4938_CCFG_DIVMODE_8:
+			txx9_gbus_clock = cpuclk / 2; break;
+		case TX4938_CCFG_DIVMODE_2_5:
+		case TX4938_CCFG_DIVMODE_10:
+			txx9_gbus_clock = cpuclk * 2 / 5; break;
+		case TX4938_CCFG_DIVMODE_3:
+		case TX4938_CCFG_DIVMODE_12:
+			txx9_gbus_clock = cpuclk / 3; break;
+		case TX4938_CCFG_DIVMODE_4:
+		case TX4938_CCFG_DIVMODE_16:
+			txx9_gbus_clock = cpuclk / 4; break;
+		case TX4938_CCFG_DIVMODE_4_5:
+		case TX4938_CCFG_DIVMODE_18:
+			txx9_gbus_clock = cpuclk * 2 / 9; break;
+		}
+		switch (divmode) {
+		case TX4938_CCFG_DIVMODE_8:
+		case TX4938_CCFG_DIVMODE_10:
+		case TX4938_CCFG_DIVMODE_12:
+		case TX4938_CCFG_DIVMODE_16:
+		case TX4938_CCFG_DIVMODE_18:
+			txx9_master_clock = txx9_gbus_clock / 4; break;
+		default:
+			txx9_master_clock = txx9_gbus_clock;
+		}
+	}
+	/* change default value to udelay/mdelay take reasonable time */
+	loops_per_jiffy = txx9_cpu_clock / HZ / 2;
+
+	/* CCFG */
+	tx4938_wdr_init();
+	/* clear BusErrorOnWrite flag (W1C) */
+	tx4938_ccfg_set(TX4938_CCFG_BEOW);
+	/* enable Timeout BusError */
+	if (txx9_ccfg_toeon)
+		tx4938_ccfg_set(TX4938_CCFG_TOE);
+
+	/* DMA selection */
+	txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_DMASEL_ALL);
+
+	/* Use external clock for external arbiter */
+	if (!(____raw_readq(&tx4938_ccfgptr->ccfg) & TX4938_CCFG_PCIARB))
+		txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_PCICLKEN_ALL);
+
+	printk(KERN_INFO "%s -- %dMHz(M%dMHz) CRIR:%08x CCFG:%llx PCFG:%llx\n",
+	       txx9_pcode_str,
+	       (cpuclk + 500000) / 1000000,
+	       (txx9_master_clock + 500000) / 1000000,
+	       (__u32)____raw_readq(&tx4938_ccfgptr->crir),
+	       (unsigned long long)____raw_readq(&tx4938_ccfgptr->ccfg),
+	       (unsigned long long)____raw_readq(&tx4938_ccfgptr->pcfg));
+
+	printk(KERN_INFO "%s SDRAMC --", txx9_pcode_str);
+	for (i = 0; i < 4; i++) {
+		__u64 cr = TX4938_SDRAMC_CR(i);
+		unsigned long base, size;
+		if (!((__u32)cr & 0x00000400))
+			continue;	/* disabled */
+		base = (unsigned long)(cr >> 49) << 21;
+		size = (((unsigned long)(cr >> 33) & 0x7fff) + 1) << 21;
+		printk(" CR%d:%016llx", i, (unsigned long long)cr);
+		tx4938_sdram_resource[i].name = "SDRAM";
+		tx4938_sdram_resource[i].start = base;
+		tx4938_sdram_resource[i].end = base + size - 1;
+		tx4938_sdram_resource[i].flags = IORESOURCE_MEM;
+		request_resource(&iomem_resource, &tx4938_sdram_resource[i]);
+	}
+	printk(" TR:%09llx\n",
+	       (unsigned long long)____raw_readq(&tx4938_sdramcptr->tr));
+
+	/* SRAM */
+	if (txx9_pcode == 0x4938 && ____raw_readq(&tx4938_sramcptr->cr) & 1) {
+		unsigned int size = TX4938_SRAM_SIZE;
+		tx4938_sram_resource.name = "SRAM";
+		tx4938_sram_resource.start =
+			(____raw_readq(&tx4938_sramcptr->cr) >> (39-11))
+			& ~(size - 1);
+		tx4938_sram_resource.end =
+			tx4938_sram_resource.start + TX4938_SRAM_SIZE - 1;
+		tx4938_sram_resource.flags = IORESOURCE_MEM;
+		request_resource(&iomem_resource, &tx4938_sram_resource);
+	}
+
+	/* TMR */
+	/* disable all timers */
+	for (i = 0; i < TX4938_NR_TMR; i++)
+		txx9_tmr_init(TX4938_TMR_REG(i) & 0xfffffffffULL);
+
+	/* DMA */
+	for (i = 0; i < 2; i++)
+		____raw_writeq(TX4938_DMA_MCR_MSTEN,
+			       (void __iomem *)(TX4938_DMA_REG(i) + 0x50));
+
+	/* PIO */
+	txx9_gpio_init(TX4938_PIO_REG & 0xfffffffffULL, 0, TX4938_NUM_PIO);
+	__raw_writel(0, &tx4938_pioptr->maskcpu);
+	__raw_writel(0, &tx4938_pioptr->maskext);
+
+	if (txx9_pcode == 0x4938) {
+		__u64 pcfg = ____raw_readq(&tx4938_ccfgptr->pcfg);
+		/* set PCIC1 reset */
+		txx9_set64(&tx4938_ccfgptr->clkctr, TX4938_CLKCTR_PCIC1RST);
+		if (pcfg & (TX4938_PCFG_ETH0_SEL | TX4938_PCFG_ETH1_SEL)) {
+			mdelay(1);	/* at least 128 cpu clock */
+			/* clear PCIC1 reset */
+			txx9_clear64(&tx4938_ccfgptr->clkctr,
+				     TX4938_CLKCTR_PCIC1RST);
+		} else {
+			printk(KERN_INFO "%s: stop PCIC1\n", txx9_pcode_str);
+			/* stop PCIC1 */
+			txx9_set64(&tx4938_ccfgptr->clkctr,
+				   TX4938_CLKCTR_PCIC1CKD);
+		}
+		if (!(pcfg & TX4938_PCFG_ETH0_SEL)) {
+			printk(KERN_INFO "%s: stop ETH0\n", txx9_pcode_str);
+			txx9_set64(&tx4938_ccfgptr->clkctr,
+				   TX4938_CLKCTR_ETH0RST);
+			txx9_set64(&tx4938_ccfgptr->clkctr,
+				   TX4938_CLKCTR_ETH0CKD);
+		}
+		if (!(pcfg & TX4938_PCFG_ETH1_SEL)) {
+			printk(KERN_INFO "%s: stop ETH1\n", txx9_pcode_str);
+			txx9_set64(&tx4938_ccfgptr->clkctr,
+				   TX4938_CLKCTR_ETH1RST);
+			txx9_set64(&tx4938_ccfgptr->clkctr,
+				   TX4938_CLKCTR_ETH1CKD);
+		}
+	}
+}
+
+void __init tx4938_time_init(unsigned int tmrnr)
+{
+	if (____raw_readq(&tx4938_ccfgptr->ccfg) & TX4938_CCFG_TINTDIS)
+		txx9_clockevent_init(TX4938_TMR_REG(tmrnr) & 0xfffffffffULL,
+				     TXX9_IRQ_BASE + TX4938_IR_TMR(tmrnr),
+				     TXX9_IMCLK);
+}
+
+void __init tx4938_setup_serial(void)
+{
+#ifdef CONFIG_SERIAL_TXX9
+	int i;
+	struct uart_port req;
+	unsigned int ch_mask = 0;
+
+	if (__raw_readq(&tx4938_ccfgptr->pcfg) & TX4938_PCFG_ETH0_SEL)
+		ch_mask |= 1 << 1; /* disable SIO1 by PCFG setting */
+	for (i = 0; i < 2; i++) {
+		if ((1 << i) & ch_mask)
+			continue;
+		memset(&req, 0, sizeof(req));
+		req.line = i;
+		req.iotype = UPIO_MEM;
+		req.membase = (unsigned char __iomem *)TX4938_SIO_REG(i);
+		req.mapbase = TX4938_SIO_REG(i) & 0xfffffffffULL;
+		req.irq = TXX9_IRQ_BASE + TX4938_IR_SIO(i);
+		req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/;
+		req.uartclk = TXX9_IMCLK;
+		early_serial_txx9_setup(&req);
+	}
+#endif /* CONFIG_SERIAL_TXX9 */
+}
diff --git a/arch/mips/txx9/jmr3927/setup.c b/arch/mips/txx9/jmr3927/setup.c
index 5e35ef73c5a..03647ebe413 100644
--- a/arch/mips/txx9/jmr3927/setup.c
+++ b/arch/mips/txx9/jmr3927/setup.c
@@ -105,14 +105,6 @@ static void __init jmr3927_mem_setup(void)
 	_machine_halt = jmr3927_machine_halt;
 	pm_power_off = jmr3927_machine_power_off;
 
-	/*
-	 * IO/MEM resources.
-	 */
-	ioport_resource.start = 0;
-	ioport_resource.end = 0xffffffff;
-	iomem_resource.start = 0;
-	iomem_resource.end = 0xffffffff;
-
 	/* Reboot on panic */
 	panic_timeout = 180;
 
diff --git a/arch/mips/txx9/rbtx4927/irq.c b/arch/mips/txx9/rbtx4927/irq.c
index 70f13211bc2..cd748a93032 100644
--- a/arch/mips/txx9/rbtx4927/irq.c
+++ b/arch/mips/txx9/rbtx4927/irq.c
@@ -126,14 +126,12 @@ static struct irq_chip toshiba_rbtx4927_irq_ioc_type = {
 	.mask_ack = toshiba_rbtx4927_irq_ioc_disable,
 	.unmask = toshiba_rbtx4927_irq_ioc_enable,
 };
-#define TOSHIBA_RBTX4927_IOC_INTR_ENAB (void __iomem *)0xbc002000UL
-#define TOSHIBA_RBTX4927_IOC_INTR_STAT (void __iomem *)0xbc002006UL
 
 static int toshiba_rbtx4927_irq_nested(int sw_irq)
 {
 	u8 level3;
 
-	level3 = readb(TOSHIBA_RBTX4927_IOC_INTR_STAT) & 0x1f;
+	level3 = readb(rbtx4927_imstat_addr) & 0x1f;
 	if (level3)
 		sw_irq = RBTX4927_IRQ_IOC + fls(level3) - 1;
 	return (sw_irq);
@@ -154,18 +152,18 @@ static void toshiba_rbtx4927_irq_ioc_enable(unsigned int irq)
 {
 	unsigned char v;
 
-	v = readb(TOSHIBA_RBTX4927_IOC_INTR_ENAB);
+	v = readb(rbtx4927_imask_addr);
 	v |= (1 << (irq - RBTX4927_IRQ_IOC));
-	writeb(v, TOSHIBA_RBTX4927_IOC_INTR_ENAB);
+	writeb(v, rbtx4927_imask_addr);
 }
 
 static void toshiba_rbtx4927_irq_ioc_disable(unsigned int irq)
 {
 	unsigned char v;
 
-	v = readb(TOSHIBA_RBTX4927_IOC_INTR_ENAB);
+	v = readb(rbtx4927_imask_addr);
 	v &= ~(1 << (irq - RBTX4927_IRQ_IOC));
-	writeb(v, TOSHIBA_RBTX4927_IOC_INTR_ENAB);
+	writeb(v, rbtx4927_imask_addr);
 	mmiowb();
 }
 
diff --git a/arch/mips/txx9/rbtx4927/setup.c b/arch/mips/txx9/rbtx4927/setup.c
index 1657fd935da..3da20ea3e55 100644
--- a/arch/mips/txx9/rbtx4927/setup.c
+++ b/arch/mips/txx9/rbtx4927/setup.c
@@ -53,17 +53,10 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/reboot.h>
-#include <asm/time.h>
-#include <asm/txx9tmr.h>
 #include <asm/txx9/generic.h>
 #include <asm/txx9/pci.h>
 #include <asm/txx9/rbtx4927.h>
 #include <asm/txx9/tx4938.h>	/* for TX4937 */
-#ifdef CONFIG_SERIAL_TXX9
-#include <linux/serial_core.h>
-#endif
-
-static int tx4927_ccfg_toeon = 1;
 
 #ifdef CONFIG_PCI
 static void __init tx4927_pci_setup(void)
@@ -184,14 +177,14 @@ static void toshiba_rbtx4927_restart(char *command)
 	printk(KERN_NOTICE "System Rebooting...\n");
 
 	/* enable the s/w reset register */
-	writeb(RBTX4927_SW_RESET_ENABLE_SET, RBTX4927_SW_RESET_ENABLE);
+	writeb(1, rbtx4927_softresetlock_addr);
 
 	/* wait for enable to be seen */
-	while ((readb(RBTX4927_SW_RESET_ENABLE) &
-		RBTX4927_SW_RESET_ENABLE_SET) == 0x00);
+	while (!(readb(rbtx4927_softresetlock_addr) & 1))
+		;
 
 	/* do a s/w reset */
-	writeb(RBTX4927_SW_RESET_DO_SET, RBTX4927_SW_RESET_DO);
+	writeb(1, rbtx4927_softreset_addr);
 
 	/* do something passive while waiting for reset */
 	local_irq_disable();
@@ -213,9 +206,11 @@ static void toshiba_rbtx4927_power_off(void)
 	/* no return */
 }
 
+static void __init rbtx4927_clock_init(void);
+static void __init rbtx4937_clock_init(void);
+
 static void __init rbtx4927_mem_setup(void)
 {
-	int i;
 	u32 cp0_config;
 	char *argptr;
 
@@ -227,16 +222,18 @@ static void __init rbtx4927_mem_setup(void)
 	cp0_config = cp0_config & ~(TX49_CONF_IC | TX49_CONF_DC);
 	write_c0_config(cp0_config);
 
-	ioport_resource.end = 0xffffffff;
-	iomem_resource.end = 0xffffffff;
+	if (TX4927_REV_PCODE() == 0x4927) {
+		rbtx4927_clock_init();
+		tx4927_setup();
+	} else {
+		rbtx4937_clock_init();
+		tx4938_setup();
+	}
 
 	_machine_restart = toshiba_rbtx4927_restart;
 	_machine_halt = toshiba_rbtx4927_halt;
 	pm_power_off = toshiba_rbtx4927_power_off;
 
-	for (i = 0; i < TX4927_NR_TMR; i++)
-		txx9_tmr_init(TX4927_TMR_REG(0) & 0xfffffffffULL);
-
 #ifdef CONFIG_PCI
 	txx9_alloc_pci_controller(&txx9_primary_pcic,
 				  RBTX4927_PCIMEM, RBTX4927_PCIMEM_SIZE,
@@ -245,36 +242,13 @@ static void __init rbtx4927_mem_setup(void)
 	set_io_port_base(KSEG1 + RBTX4927_ISA_IO_OFFSET);
 #endif
 
-	/* CCFG */
-	/* do reset on watchdog */
-	tx4927_ccfg_set(TX4927_CCFG_WR);
-	/* enable Timeout BusError */
-	if (tx4927_ccfg_toeon)
-		tx4927_ccfg_set(TX4927_CCFG_TOE);
-
-#ifdef CONFIG_SERIAL_TXX9
-	{
-		extern int early_serial_txx9_setup(struct uart_port *port);
-		struct uart_port req;
-		for(i = 0; i < 2; i++) {
-			memset(&req, 0, sizeof(req));
-			req.line = i;
-			req.iotype = UPIO_MEM;
-			req.membase = (char *)(0xff1ff300 + i * 0x100);
-			req.mapbase = 0xff1ff300 + i * 0x100;
-			req.irq = TXX9_IRQ_BASE + TX4927_IR_SIO(i);
-			req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/;
-			req.uartclk = 50000000;
-			early_serial_txx9_setup(&req);
-		}
-	}
+	tx4927_setup_serial();
 #ifdef CONFIG_SERIAL_TXX9_CONSOLE
         argptr = prom_getcmdline();
         if (strstr(argptr, "console=") == NULL) {
                 strcat(argptr, " console=ttyS0,38400");
         }
 #endif
-#endif
 
 #ifdef CONFIG_ROOT_NFS
         argptr = prom_getcmdline();
@@ -291,19 +265,7 @@ static void __init rbtx4927_mem_setup(void)
 #endif
 }
 
-static void __init rbtx49x7_common_time_init(void)
-{
-	/* change default value to udelay/mdelay take reasonable time */
-	loops_per_jiffy = txx9_cpu_clock / HZ / 2;
-
-	mips_hpt_frequency = txx9_cpu_clock / 2;
-	if (____raw_readq(&tx4927_ccfgptr->ccfg) & TX4927_CCFG_TINTDIS)
-		txx9_clockevent_init(TX4927_TMR_REG(0) & 0xfffffffffULL,
-				     TXX9_IRQ_BASE + 17,
-				     50000000);
-}
-
-static void __init rbtx4927_time_init(void)
+static void __init rbtx4927_clock_init(void)
 {
 	/*
 	 * ASSUMPTION: PCIDIVMODE is configured for PCI 33MHz or 66MHz.
@@ -325,11 +287,9 @@ static void __init rbtx4927_time_init(void)
 	default:
 		txx9_cpu_clock = 200000000;	/* 200MHz */
 	}
-
-	rbtx49x7_common_time_init();
 }
 
-static void __init rbtx4937_time_init(void)
+static void __init rbtx4937_clock_init(void)
 {
 	/*
 	 * ASSUMPTION: PCIDIVMODE is configured for PCI 33MHz or 66MHz.
@@ -357,15 +317,18 @@ static void __init rbtx4937_time_init(void)
 	default:
 		txx9_cpu_clock = 333333333;	/* 333MHz */
 	}
+}
 
-	rbtx49x7_common_time_init();
+static void __init rbtx4927_time_init(void)
+{
+	tx4927_time_init(0);
 }
 
 static int __init toshiba_rbtx4927_rtc_init(void)
 {
-	static struct resource __initdata res = {
-		.start	= 0x1c010000,
-		.end	= 0x1c010000 + 0x800 - 1,
+	struct resource res = {
+		.start	= RBTX4927_BRAMRTC_BASE - IO_BASE,
+		.end	= RBTX4927_BRAMRTC_BASE - IO_BASE + 0x800 - 1,
 		.flags	= IORESOURCE_MEM,
 	};
 	struct platform_device *dev =
@@ -375,7 +338,7 @@ static int __init toshiba_rbtx4927_rtc_init(void)
 
 static int __init rbtx4927_ne_init(void)
 {
-	static struct resource __initdata res[] = {
+	struct resource res[] = {
 		{
 			.start	= RBTX4927_RTL_8019_BASE,
 			.end	= RBTX4927_RTL_8019_BASE + 0x20 - 1,
@@ -434,7 +397,7 @@ struct txx9_board_vec rbtx4937_vec __initdata = {
 	.prom_init = rbtx4927_prom_init,
 	.mem_setup = rbtx4927_mem_setup,
 	.irq_setup = rbtx4927_irq_setup,
-	.time_init = rbtx4937_time_init,
+	.time_init = rbtx4927_time_init,
 	.device_init = rbtx4927_device_init,
 	.arch_init = rbtx4937_arch_init,
 #ifdef CONFIG_PCI
diff --git a/arch/mips/txx9/rbtx4938/setup.c b/arch/mips/txx9/rbtx4938/setup.c
index c1e076c7b2d..6c2b99bb8af 100644
--- a/arch/mips/txx9/rbtx4938/setup.c
+++ b/arch/mips/txx9/rbtx4938/setup.c
@@ -20,21 +20,14 @@
 #include <linux/gpio.h>
 
 #include <asm/reboot.h>
-#include <asm/time.h>
-#include <asm/txx9tmr.h>
 #include <asm/io.h>
 #include <asm/txx9/generic.h>
 #include <asm/txx9/pci.h>
 #include <asm/txx9/rbtx4938.h>
-#ifdef CONFIG_SERIAL_TXX9
-#include <linux/serial_core.h>
-#endif
 #include <linux/spi/spi.h>
 #include <asm/txx9/spi.h>
 #include <asm/txx9pio.h>
 
-static int tx4938_ccfg_toeon = 1;
-
 static void rbtx4938_machine_halt(void)
 {
         printk(KERN_NOTICE "System Halted\n");
@@ -182,189 +175,10 @@ static void __init rbtx4938_spi_setup(void)
 }
 
 static struct resource rbtx4938_fpga_resource;
-static struct resource tx4938_sdram_resource[4];
-static struct resource tx4938_sram_resource;
-
-void __init tx4938_board_setup(void)
-{
-	int i;
-	unsigned long divmode;
-	int cpuclk = 0;
-	unsigned long pcode = TX4938_REV_PCODE();
-
-	ioport_resource.start = 0;
-	ioport_resource.end = 0xffffffff;
-	iomem_resource.start = 0;
-	iomem_resource.end = 0xffffffff;	/* expand to 4GB */
-
-	txx9_reg_res_init(pcode, TX4938_REG_BASE,
-			  TX4938_REG_SIZE);
-	/* SDRAMC,EBUSC are configured by PROM */
-	for (i = 0; i < 8; i++) {
-		if (!(TX4938_EBUSC_CR(i) & 0x8))
-			continue;	/* disabled */
-		txx9_ce_res[i].start = (unsigned long)TX4938_EBUSC_BA(i);
-		txx9_ce_res[i].end =
-			txx9_ce_res[i].start + TX4938_EBUSC_SIZE(i) - 1;
-		request_resource(&iomem_resource, &txx9_ce_res[i]);
-	}
-
-	/* clocks */
-	if (txx9_master_clock) {
-		u64 ccfg = ____raw_readq(&tx4938_ccfgptr->ccfg);
-		/* calculate gbus_clock and cpu_clock_freq from master_clock */
-		divmode = (__u32)ccfg & TX4938_CCFG_DIVMODE_MASK;
-		switch (divmode) {
-		case TX4938_CCFG_DIVMODE_8:
-		case TX4938_CCFG_DIVMODE_10:
-		case TX4938_CCFG_DIVMODE_12:
-		case TX4938_CCFG_DIVMODE_16:
-		case TX4938_CCFG_DIVMODE_18:
-			txx9_gbus_clock = txx9_master_clock * 4; break;
-		default:
-			txx9_gbus_clock = txx9_master_clock;
-		}
-		switch (divmode) {
-		case TX4938_CCFG_DIVMODE_2:
-		case TX4938_CCFG_DIVMODE_8:
-			cpuclk = txx9_gbus_clock * 2; break;
-		case TX4938_CCFG_DIVMODE_2_5:
-		case TX4938_CCFG_DIVMODE_10:
-			cpuclk = txx9_gbus_clock * 5 / 2; break;
-		case TX4938_CCFG_DIVMODE_3:
-		case TX4938_CCFG_DIVMODE_12:
-			cpuclk = txx9_gbus_clock * 3; break;
-		case TX4938_CCFG_DIVMODE_4:
-		case TX4938_CCFG_DIVMODE_16:
-			cpuclk = txx9_gbus_clock * 4; break;
-		case TX4938_CCFG_DIVMODE_4_5:
-		case TX4938_CCFG_DIVMODE_18:
-			cpuclk = txx9_gbus_clock * 9 / 2; break;
-		}
-		txx9_cpu_clock = cpuclk;
-	} else {
-		u64 ccfg = ____raw_readq(&tx4938_ccfgptr->ccfg);
-		if (txx9_cpu_clock == 0) {
-			txx9_cpu_clock = 300000000;	/* 300MHz */
-		}
-		/* calculate gbus_clock and master_clock from cpu_clock_freq */
-		cpuclk = txx9_cpu_clock;
-		divmode = (__u32)ccfg & TX4938_CCFG_DIVMODE_MASK;
-		switch (divmode) {
-		case TX4938_CCFG_DIVMODE_2:
-		case TX4938_CCFG_DIVMODE_8:
-			txx9_gbus_clock = cpuclk / 2; break;
-		case TX4938_CCFG_DIVMODE_2_5:
-		case TX4938_CCFG_DIVMODE_10:
-			txx9_gbus_clock = cpuclk * 2 / 5; break;
-		case TX4938_CCFG_DIVMODE_3:
-		case TX4938_CCFG_DIVMODE_12:
-			txx9_gbus_clock = cpuclk / 3; break;
-		case TX4938_CCFG_DIVMODE_4:
-		case TX4938_CCFG_DIVMODE_16:
-			txx9_gbus_clock = cpuclk / 4; break;
-		case TX4938_CCFG_DIVMODE_4_5:
-		case TX4938_CCFG_DIVMODE_18:
-			txx9_gbus_clock = cpuclk * 2 / 9; break;
-		}
-		switch (divmode) {
-		case TX4938_CCFG_DIVMODE_8:
-		case TX4938_CCFG_DIVMODE_10:
-		case TX4938_CCFG_DIVMODE_12:
-		case TX4938_CCFG_DIVMODE_16:
-		case TX4938_CCFG_DIVMODE_18:
-			txx9_master_clock = txx9_gbus_clock / 4; break;
-		default:
-			txx9_master_clock = txx9_gbus_clock;
-		}
-	}
-	/* change default value to udelay/mdelay take reasonable time */
-	loops_per_jiffy = txx9_cpu_clock / HZ / 2;
-
-	/* CCFG */
-	/* clear WatchDogReset,BusErrorOnWrite flag (W1C) */
-	tx4938_ccfg_set(TX4938_CCFG_WDRST | TX4938_CCFG_BEOW);
-	/* do reset on watchdog */
-	tx4938_ccfg_set(TX4938_CCFG_WR);
-	/* clear PCIC1 reset */
-	txx9_clear64(&tx4938_ccfgptr->clkctr, TX4938_CLKCTR_PCIC1RST);
-
-	/* enable Timeout BusError */
-	if (tx4938_ccfg_toeon)
-		tx4938_ccfg_set(TX4938_CCFG_TOE);
-
-	/* DMA selection */
-	txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_DMASEL_ALL);
-
-	/* Use external clock for external arbiter */
-	if (!(____raw_readq(&tx4938_ccfgptr->ccfg) & TX4938_CCFG_PCIARB))
-		txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_PCICLKEN_ALL);
-
-	printk(KERN_INFO "%s -- %dMHz(M%dMHz) CRIR:%08x CCFG:%llx PCFG:%llx\n",
-	       txx9_pcode_str,
-	       (cpuclk + 500000) / 1000000,
-	       (txx9_master_clock + 500000) / 1000000,
-	       (__u32)____raw_readq(&tx4938_ccfgptr->crir),
-	       (unsigned long long)____raw_readq(&tx4938_ccfgptr->ccfg),
-	       (unsigned long long)____raw_readq(&tx4938_ccfgptr->pcfg));
-
-	printk(KERN_INFO "%s SDRAMC --", txx9_pcode_str);
-	for (i = 0; i < 4; i++) {
-		u64 cr = TX4938_SDRAMC_CR(i);
-		unsigned long ram_base, ram_size;
-		if (!((unsigned long)cr & 0x00000400))
-			continue;	/* disabled */
-		ram_base = (unsigned long)(cr >> 49) << 21;
-		ram_size = ((unsigned long)(cr >> 33) + 1) << 21;
-		if (ram_base >= 0x20000000)
-			continue;	/* high memory (ignore) */
-		printk(KERN_CONT " CR%d:%016llx", i, cr);
-		tx4938_sdram_resource[i].name = "SDRAM";
-		tx4938_sdram_resource[i].start = ram_base;
-		tx4938_sdram_resource[i].end = ram_base + ram_size - 1;
-		tx4938_sdram_resource[i].flags = IORESOURCE_MEM;
-		request_resource(&iomem_resource, &tx4938_sdram_resource[i]);
-	}
-	printk(KERN_CONT " TR:%09llx\n", ____raw_readq(&tx4938_sdramcptr->tr));
-
-	/* SRAM */
-	if (____raw_readq(&tx4938_sramcptr->cr) & 1) {
-		unsigned int size = 0x800;
-		unsigned long base =
-			(____raw_readq(&tx4938_sramcptr->cr) >> (39-11))
-			& ~(size - 1);
-		tx4938_sram_resource.name = "SRAM";
-		tx4938_sram_resource.start = base;
-		tx4938_sram_resource.end = base + size - 1;
-		tx4938_sram_resource.flags = IORESOURCE_MEM;
-		request_resource(&iomem_resource, &tx4938_sram_resource);
-	}
-
-	/* TMR */
-	for (i = 0; i < TX4938_NR_TMR; i++)
-		txx9_tmr_init(TX4938_TMR_REG(i) & 0xfffffffffULL);
-
-	/* enable DMA */
-	for (i = 0; i < 2; i++)
-		____raw_writeq(TX4938_DMA_MCR_MSTEN,
-			       (void __iomem *)(TX4938_DMA_REG(i) + 0x50));
-
-	/* PIO */
-	__raw_writel(0, &tx4938_pioptr->maskcpu);
-	__raw_writel(0, &tx4938_pioptr->maskext);
-
-#ifdef CONFIG_PCI
-	txx9_alloc_pci_controller(&txx9_primary_pcic, 0, 0, 0, 0);
-#endif
-}
 
 static void __init rbtx4938_time_init(void)
 {
-	mips_hpt_frequency = txx9_cpu_clock / 2;
-	if (____raw_readq(&tx4938_ccfgptr->ccfg) & TX4938_CCFG_TINTDIS)
-		txx9_clockevent_init(TX4938_TMR_REG(0) & 0xfffffffffULL,
-				     TXX9_IRQ_BASE + TX4938_IR_TMR(0),
-				     txx9_gbus_clock / 2);
+	tx4938_time_init(0);
 }
 
 static void __init rbtx4938_mem_setup(void)
@@ -372,39 +186,24 @@ static void __init rbtx4938_mem_setup(void)
 	unsigned long long pcfg;
 	char *argptr;
 
-	iomem_resource.end = 0xffffffff;	/* 4GB */
-
 	if (txx9_master_clock == 0)
 		txx9_master_clock = 25000000; /* 25MHz */
-	tx4938_board_setup();
-#ifndef CONFIG_PCI
+
+	tx4938_setup();
+
+#ifdef CONFIG_PCI
+	txx9_alloc_pci_controller(&txx9_primary_pcic, 0, 0, 0, 0);
+#else
 	set_io_port_base(RBTX4938_ETHER_BASE);
 #endif
 
-#ifdef CONFIG_SERIAL_TXX9
-	{
-		extern int early_serial_txx9_setup(struct uart_port *port);
-		int i;
-		struct uart_port req;
-		for(i = 0; i < 2; i++) {
-			memset(&req, 0, sizeof(req));
-			req.line = i;
-			req.iotype = UPIO_MEM;
-			req.membase = (char *)(0xff1ff300 + i * 0x100);
-			req.mapbase = 0xff1ff300 + i * 0x100;
-			req.irq = RBTX4938_IRQ_IRC_SIO(i);
-			req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/;
-			req.uartclk = 50000000;
-			early_serial_txx9_setup(&req);
-		}
-	}
+	tx4938_setup_serial();
 #ifdef CONFIG_SERIAL_TXX9_CONSOLE
         argptr = prom_getcmdline();
         if (strstr(argptr, "console=") == NULL) {
                 strcat(argptr, " console=ttyS0,38400");
         }
 #endif
-#endif
 
 #ifdef CONFIG_TOSHIBA_RBTX4938_MPLEX_PIO58_61
 	printk("PIOSEL: disabling both ata and nand selection\n");
@@ -568,7 +367,6 @@ static int __init rbtx4938_spi_init(void)
 
 static void __init rbtx4938_arch_init(void)
 {
-	txx9_gpio_init(TX4938_PIO_REG & 0xfffffffffULL, 0, 16);
 	gpiochip_add(&rbtx4938_spi_gpio_chip);
 	rbtx4938_pci_setup();
 	rbtx4938_spi_init();
-- 
cgit v1.2.3


From e0eb730757665d7e8ec0e79d9042a9311f3edb7e Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Sat, 19 Jul 2008 01:51:52 +0900
Subject: [MIPS] TXx9: Fix some sparse warnings

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/txx9/generic/setup.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c
index 3715a8f5ea4..8c60c78b9a9 100644
--- a/arch/mips/txx9/generic/setup.c
+++ b/arch/mips/txx9/generic/setup.c
@@ -19,7 +19,9 @@
 #include <linux/module.h>
 #include <linux/clk.h>
 #include <linux/err.h>
+#include <linux/gpio.h>
 #include <asm/bootinfo.h>
+#include <asm/time.h>
 #include <asm/txx9/generic.h>
 #ifdef CONFIG_CPU_TX49XX
 #include <asm/txx9/tx4938.h>
-- 
cgit v1.2.3


From 4914ad4a9f2d484a68422700ba8493db73c7c411 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Sun, 20 Jul 2008 11:34:39 +0100
Subject: [MIPS] 32-bit compat: Delete unused sys_truncate64 and
 sys_ftruncate64.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/linux32.c | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 65af3cc90ab..c266211ed65 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -129,23 +129,6 @@ out:
 	return error;
 }
 
-
-asmlinkage int sys_truncate64(const char __user *path, unsigned int high,
-			      unsigned int low)
-{
-	if ((int)high < 0)
-		return -EINVAL;
-	return sys_truncate(path, ((long) high << 32) | low);
-}
-
-asmlinkage int sys_ftruncate64(unsigned int fd, unsigned int high,
-			       unsigned int low)
-{
-	if ((int)high < 0)
-		return -EINVAL;
-	return sys_ftruncate(fd, ((long) high << 32) | low);
-}
-
 /*
  * sys_execve() executes a new program.
  */
-- 
cgit v1.2.3


From 8213bbf9c1c0009872a3278aa7a83ec8f3508195 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Sun, 20 Jul 2008 13:16:46 +0100
Subject: [MIPS] Rename MIPS sys_pipe syscall entry point to something
 MIPS-specific.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/scall32-o32.S | 2 +-
 arch/mips/kernel/scall64-64.S  | 2 +-
 arch/mips/kernel/scall64-n32.S | 2 +-
 arch/mips/kernel/scall64-o32.S | 2 +-
 arch/mips/kernel/syscall.c     | 9 ++++++++-
 5 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index c058c0b61a2..fc4fd4d705e 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -354,7 +354,7 @@ einval:	li	v0, -EINVAL
 	sys	sys_mkdir		2
 	sys	sys_rmdir		1	/* 4040 */
 	sys	sys_dup			1
-	sys	sys_pipe		0
+	sys	sysm_pipe		0
 	sys	sys_times		1
 	sys	sys_ni_syscall		0
 	sys	sys_brk			1	/* 4045 */
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index dc597b600c6..2b73fd1e452 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -219,7 +219,7 @@ sys_call_table:
 	PTR	sys_readv
 	PTR	sys_writev
 	PTR	sys_access			/* 5020 */
-	PTR	sys_pipe
+	PTR	sysm_pipe
 	PTR	sys_select
 	PTR	sys_sched_yield
 	PTR	sys_mremap
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 12940eca789..2654e75d2fe 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -141,7 +141,7 @@ EXPORT(sysn32_call_table)
 	PTR	compat_sys_readv
 	PTR	compat_sys_writev
 	PTR	sys_access			/* 6020 */
-	PTR	sys_pipe
+	PTR	sysm_pipe
 	PTR	compat_sys_select
 	PTR	sys_sched_yield
 	PTR	sys_mremap
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 9a275efb4f0..76167bea5a7 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -247,7 +247,7 @@ sys_call_table:
 	PTR	sys_mkdir
 	PTR	sys_rmdir			/* 4040 */
 	PTR	sys_dup
-	PTR	sys_pipe
+	PTR	sysm_pipe
 	PTR	compat_sys_times
 	PTR	sys_ni_syscall
 	PTR	sys_brk				/* 4045 */
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index af1bdc89748..3523c8d12ed 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -40,7 +40,14 @@
 #include <asm/sysmips.h>
 #include <asm/uaccess.h>
 
-asmlinkage int sys_pipe(nabi_no_regargs volatile struct pt_regs regs)
+/*
+ * For historic reasons the pipe(2) syscall on MIPS has an unusual calling
+ * convention.  It returns results in registers $v0 / $v1 which means there
+ * is no need for it to do verify the validity of a userspace pointer
+ * argument.  Historically that used to be expensive in Linux.  These days
+ * the performance advantage is negligible.
+ */
+asmlinkage int sysm_pipe(nabi_no_regargs volatile struct pt_regs regs)
 {
 	int fd[2];
 	int error, res;
-- 
cgit v1.2.3


From 15648f154a8faea97cbe931e189cf0a57fd066f4 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Wed, 16 Jul 2008 21:52:25 +0100
Subject: simserial: Fix up for ldisc changes

Noted by Tony Luck although I've done the patches differently and also
removed some other bogus oddments.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/hp/sim/simserial.c | 46 +++-----------------------------------------
 1 file changed, 3 insertions(+), 43 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index 23cafc80d2a..24b1ad5334c 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -193,18 +193,6 @@ static irqreturn_t rs_interrupt_single(int irq, void *dev_id)
  * -------------------------------------------------------------------
  */
 
-#if 0
-/*
- * not really used in our situation so keep them commented out for now
- */
-static DECLARE_TASK_QUEUE(tq_serial); /* used to be at the top of the file */
-static void do_serial_bh(void)
-{
-	run_task_queue(&tq_serial);
-	printk(KERN_ERR "do_serial_bh: called\n");
-}
-#endif
-
 static void do_softint(struct work_struct *private_)
 {
 	printk(KERN_ERR "simserial: do_softint called\n");
@@ -351,11 +339,7 @@ static void rs_flush_buffer(struct tty_struct *tty)
 	info->xmit.head = info->xmit.tail = 0;
 	local_irq_restore(flags);
 
-	wake_up_interruptible(&tty->write_wait);
-
-	if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
-	    tty->ldisc.write_wakeup)
-		(tty->ldisc.write_wakeup)(tty);
+	tty_wakeup(tty);
 }
 
 /*
@@ -404,12 +388,6 @@ static void rs_unthrottle(struct tty_struct * tty)
 	printk(KERN_INFO "simrs_unthrottle called\n");
 }
 
-/*
- * rs_break() --- routine which turns the break handling on or off
- */
-static void rs_break(struct tty_struct *tty, int break_state)
-{
-}
 
 static int rs_ioctl(struct tty_struct *tty, struct file * file,
 		    unsigned int cmd, unsigned long arg)
@@ -422,14 +400,6 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file,
 	}
 
 	switch (cmd) {
-		case TIOCMGET:
-			printk(KERN_INFO "rs_ioctl: TIOCMGET called\n");
-			return -EINVAL;
-		case TIOCMBIS:
-		case TIOCMBIC:
-		case TIOCMSET:
-			printk(KERN_INFO "rs_ioctl: TIOCMBIS/BIC/SET called\n");
-			return -EINVAL;
 		case TIOCGSERIAL:
 			printk(KERN_INFO "simrs_ioctl TIOCGSERIAL called\n");
 			return 0;
@@ -488,14 +458,6 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file,
 
 static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios)
 {
-	unsigned int cflag = tty->termios->c_cflag;
-
-	if (   (cflag == old_termios->c_cflag)
-	    && (   RELEVANT_IFLAG(tty->termios->c_iflag)
-		== RELEVANT_IFLAG(old_termios->c_iflag)))
-	  return;
-
-
 	/* Handle turning off CRTSCTS */
 	if ((old_termios->c_cflag & CRTSCTS) &&
 	    !(tty->termios->c_cflag & CRTSCTS)) {
@@ -623,9 +585,8 @@ static void rs_close(struct tty_struct *tty, struct file * filp)
 	 * the line discipline to only process XON/XOFF characters.
 	 */
 	shutdown(info);
-	if (tty->ops->flush_buffer)
-		tty->ops->flush_buffer(tty);
-	if (tty->ldisc.flush_buffer) tty->ldisc.flush_buffer(tty);
+	rs_flush_buffer(tty);
+	tty_ldisc_flush(tty);
 	info->event = 0;
 	info->tty = NULL;
 	if (info->blocked_open) {
@@ -955,7 +916,6 @@ static const struct tty_operations hp_ops = {
 	.stop = rs_stop,
 	.start = rs_start,
 	.hangup = rs_hangup,
-	.break_ctl = rs_break,
 	.wait_until_sent = rs_wait_until_sent,
 	.read_proc = rs_read_proc,
 };
-- 
cgit v1.2.3


From 34492b5834ede63d896c93ccba9a4657a8435dc2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 16 Jul 2008 21:54:01 +0100
Subject: MN10300: Fix MN10300's serial port driver to get at its tty_struct

Fix MN10300's serial port driver to get at its tty_struct as this moved
from struct uart_info into struct tty_port in patch:

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mn10300/kernel/mn10300-serial.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c
index b9c268c6b2f..8b054e7a8ae 100644
--- a/arch/mn10300/kernel/mn10300-serial.c
+++ b/arch/mn10300/kernel/mn10300-serial.c
@@ -392,7 +392,7 @@ static int mask_test_and_clear(volatile u8 *ptr, u8 mask)
 static void mn10300_serial_receive_interrupt(struct mn10300_serial_port *port)
 {
 	struct uart_icount *icount = &port->uart.icount;
-	struct tty_struct *tty = port->uart.info->tty;
+	struct tty_struct *tty = port->uart.info->port.tty;
 	unsigned ix;
 	int count;
 	u8 st, ch, push, status, overrun;
-- 
cgit v1.2.3


From f30828a6745281edda735f642b5f814e1123ecd3 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 17 Jul 2008 21:16:08 +0200
Subject: m68k: remove CVS keywords

This patch removes CVS keywords that weren't updated for a long time
from comments.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/sun3/idprom.c       | 2 +-
 arch/m68k/sun3/prom/Makefile  | 1 -
 arch/m68k/sun3/prom/console.c | 2 +-
 arch/m68k/sun3/prom/init.c    | 2 +-
 arch/m68k/sun3/prom/misc.c    | 2 +-
 arch/m68k/sun3/prom/printf.c  | 2 +-
 6 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/sun3/idprom.c b/arch/m68k/sun3/idprom.c
index dca6ab6a4ed..6c5336d62fa 100644
--- a/arch/m68k/sun3/idprom.c
+++ b/arch/m68k/sun3/idprom.c
@@ -1,4 +1,4 @@
-/* $Id: idprom.c,v 1.22 1996/11/13 05:09:25 davem Exp $
+/*
  * idprom.c: Routines to load the idprom into kernel addresses and
  *           interpret the data contained within.
  *
diff --git a/arch/m68k/sun3/prom/Makefile b/arch/m68k/sun3/prom/Makefile
index 6e48ae2a717..da7eac06bca 100644
--- a/arch/m68k/sun3/prom/Makefile
+++ b/arch/m68k/sun3/prom/Makefile
@@ -1,4 +1,3 @@
-# $Id: Makefile,v 1.5 1995/11/25 00:59:48 davem Exp $
 # Makefile for the Sun Boot PROM interface library under
 # Linux.
 #
diff --git a/arch/m68k/sun3/prom/console.c b/arch/m68k/sun3/prom/console.c
index 52c1427863d..5812560b70f 100644
--- a/arch/m68k/sun3/prom/console.c
+++ b/arch/m68k/sun3/prom/console.c
@@ -1,4 +1,4 @@
-/* $Id: console.c,v 1.10 1996/12/18 06:46:54 tridge Exp $
+/*
  * console.c: Routines that deal with sending and receiving IO
  *            to/from the current console device using the PROM.
  *
diff --git a/arch/m68k/sun3/prom/init.c b/arch/m68k/sun3/prom/init.c
index 202adfcc316..5f85681af5a 100644
--- a/arch/m68k/sun3/prom/init.c
+++ b/arch/m68k/sun3/prom/init.c
@@ -1,4 +1,4 @@
-/* $Id: init.c,v 1.9 1996/12/18 06:46:55 tridge Exp $
+/*
  * init.c:  Initialize internal variables used by the PROM
  *          library functions.
  *
diff --git a/arch/m68k/sun3/prom/misc.c b/arch/m68k/sun3/prom/misc.c
index b88716f2c68..3d60e1337f7 100644
--- a/arch/m68k/sun3/prom/misc.c
+++ b/arch/m68k/sun3/prom/misc.c
@@ -1,4 +1,4 @@
-/* $Id: misc.c,v 1.15 1997/05/14 20:45:00 davem Exp $
+/*
  * misc.c:  Miscellaneous prom functions that don't belong
  *          anywhere else.
  *
diff --git a/arch/m68k/sun3/prom/printf.c b/arch/m68k/sun3/prom/printf.c
index e7bfde377b5..c8cf98d97f2 100644
--- a/arch/m68k/sun3/prom/printf.c
+++ b/arch/m68k/sun3/prom/printf.c
@@ -1,4 +1,4 @@
-/* $Id: printf.c,v 1.5 1996/04/04 16:31:07 tridge Exp $
+/*
  * printf.c:  Internal prom library printf facility.
  *
  * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
-- 
cgit v1.2.3


From 97d26e73d729c8d967bc5eb9086321956c444dd4 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Thu, 17 Jul 2008 21:16:11 +0200
Subject: m68k: vmlinux-std/sun3.lds.S cleanup - use PAGE_SIZE macro

This patch includes page.h header into linker script that
allow us to use PAGE_SIZE macro instead of numeric constant

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/kernel/vmlinux-std.lds  | 3 ++-
 arch/m68k/kernel/vmlinux-sun3.lds | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 7537cc5e615..99b0784c055 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -1,6 +1,7 @@
 /* ld script to make m68k Linux kernel */
 
 #include <asm-generic/vmlinux.lds.h>
+#include <asm/page.h>
 
 OUTPUT_FORMAT("elf32-m68k", "elf32-m68k", "elf32-m68k")
 OUTPUT_ARCH(m68k)
@@ -41,7 +42,7 @@ SECTIONS
   _edata = .;			/* End of data section */
 
   /* will be freed after init */
-  . = ALIGN(4096);		/* Init code and data */
+  . = ALIGN(PAGE_SIZE);		/* Init code and data */
   __init_begin = .;
   .init.text : {
 	_sinittext = .;
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index cdc313e7c29..8a4919e4d36 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -1,6 +1,7 @@
 /* ld script to make m68k Linux kernel */
 
 #include <asm-generic/vmlinux.lds.h>
+#include <asm/page.h>
 
 OUTPUT_FORMAT("elf32-m68k", "elf32-m68k", "elf32-m68k")
 OUTPUT_ARCH(m68k)
@@ -34,7 +35,7 @@ SECTIONS
   _edata = .;
 
   /* will be freed after init */
-  . = ALIGN(8192);	/* Init code and data */
+  . = ALIGN(PAGE_SIZE);	/* Init code and data */
 __init_begin = .;
 	.init.text : {
 		_sinittext = .;
@@ -61,12 +62,12 @@ __init_begin = .;
 	}
 	SECURITY_INIT
 #ifdef CONFIG_BLK_DEV_INITRD
-	. = ALIGN(8192);
+	. = ALIGN(PAGE_SIZE);
 	__initramfs_start = .;
 	.init.ramfs : { *(.init.ramfs) }
 	__initramfs_end = .;
 #endif
-	. = ALIGN(8192);
+	. = ALIGN(PAGE_SIZE);
 	__init_end = .;
 	.data.init.task : { *(.data.init_task) }
 
-- 
cgit v1.2.3


From edfd92f67eec1bdd905dd7841416eaf945a5b92f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 17 Jul 2008 21:16:12 +0200
Subject: m68k: Allow no CPU/platform type for allnoconfig

Allow no CPU/platform type for allnoconfig
  - Provide a dummy value for FPSTATESIZE if no CPU type was selected
  - Provide a dummy value for NR_IRQS if no platform type was selected
  - Warn the user if no CPU or platform type was selected

Note: you still cannot build an allnoconfig kernel, as CONFIG_SWAP=n doesn't
build and we cannot easily fix that
(http://groups.google.com/group/linux.kernel/browse_thread/thread/d430c78b07e1827b)

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/kernel/setup.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/m68k/kernel/setup.c b/arch/m68k/kernel/setup.c
index a9fb83a8c18..b1f39e4cedb 100644
--- a/arch/m68k/kernel/setup.c
+++ b/arch/m68k/kernel/setup.c
@@ -26,6 +26,7 @@
 
 #include <asm/bootinfo.h>
 #include <asm/setup.h>
+#include <asm/fpu.h>
 #include <asm/irq.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
@@ -40,6 +41,11 @@
 #include <asm/dvma.h>
 #endif
 
+#if !FPSTATESIZE || !NR_IRQS
+#warning No CPU/platform type selected, your kernel will not work!
+#warning Are you building an allnoconfig kernel?
+#endif
+
 unsigned long m68k_machtype;
 EXPORT_SYMBOL(m68k_machtype);
 unsigned long m68k_cputype;
-- 
cgit v1.2.3


From a0c14d28df8fcf939a8efd9332ace164e9f931fb Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Thu, 17 Jul 2008 21:16:13 +0200
Subject: Stringify support commas

> This is a no-no for those archs that still use -traditional.
 > > I dunno if this is a problem for you at the moment and the
 > > right fix is anyway to nuke -traditional.
 > >
 > >     Sam

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/fpsp040/Makefile  | 1 -
 arch/m68k/ifpsp060/Makefile | 1 -
 arch/m68k/kernel/Makefile   | 2 --
 arch/m68k/lib/Makefile      | 2 --
 arch/m68k/math-emu/Makefile | 2 --
 5 files changed, 8 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/fpsp040/Makefile b/arch/m68k/fpsp040/Makefile
index 0214d2f6f8b..9506d883ace 100644
--- a/arch/m68k/fpsp040/Makefile
+++ b/arch/m68k/fpsp040/Makefile
@@ -10,7 +10,6 @@ obj-y    := bindec.o binstr.o decbin.o do_func.o gen_except.o get_op.o \
 	    x_bsun.o x_fline.o x_operr.o x_ovfl.o x_snan.o x_store.o \
 	    x_unfl.o x_unimp.o x_unsupp.o bugfix.o skeleton.o
 
-EXTRA_AFLAGS := -traditional
 EXTRA_LDFLAGS := -x
 
 $(OS_OBJS): fpsp.h
diff --git a/arch/m68k/ifpsp060/Makefile b/arch/m68k/ifpsp060/Makefile
index 2fe8472cb5e..43b43504945 100644
--- a/arch/m68k/ifpsp060/Makefile
+++ b/arch/m68k/ifpsp060/Makefile
@@ -6,5 +6,4 @@
 
 obj-y := fskeleton.o iskeleton.o os.o
 
-EXTRA_AFLAGS := -traditional
 EXTRA_LDFLAGS := -x
diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile
index 7a62a718143..3a7f6222550 100644
--- a/arch/m68k/kernel/Makefile
+++ b/arch/m68k/kernel/Makefile
@@ -16,5 +16,3 @@ devres-y = ../../../kernel/irq/devres.o
 
 obj-$(CONFIG_PCI)	+= bios32.o
 obj-y$(CONFIG_MMU_SUN3) += dma.o	# no, it's not a typo
-
-EXTRA_AFLAGS := -traditional
diff --git a/arch/m68k/lib/Makefile b/arch/m68k/lib/Makefile
index a18af095cd7..af9abf8d9d9 100644
--- a/arch/m68k/lib/Makefile
+++ b/arch/m68k/lib/Makefile
@@ -2,7 +2,5 @@
 # Makefile for m68k-specific library files..
 #
 
-EXTRA_AFLAGS := -traditional
-
 lib-y	:= ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
 	   checksum.o string.o uaccess.o
diff --git a/arch/m68k/math-emu/Makefile b/arch/m68k/math-emu/Makefile
index 53994040181..a0935bf9836 100644
--- a/arch/m68k/math-emu/Makefile
+++ b/arch/m68k/math-emu/Makefile
@@ -2,8 +2,6 @@
 # Makefile for the linux kernel.
 #
 
-EXTRA_AFLAGS := -traditional
-
 #EXTRA_AFLAGS += -DFPU_EMU_DEBUG
 #EXTRA_CFLAGS += -DFPU_EMU_DEBUG
 
-- 
cgit v1.2.3


From b739912efc02f80cc4dc5eaef07e5bc7eafee1b0 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 17 Jul 2008 21:16:14 +0200
Subject: m68k: make multi_defconfig the default defconfig

It seems to match the intention behind multi_defconfig to make it the
default defconfig.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile
index b15173f28a2..8133dbc4496 100644
--- a/arch/m68k/Makefile
+++ b/arch/m68k/Makefile
@@ -13,7 +13,7 @@
 # Copyright (C) 1994 by Hamish Macdonald
 #
 
-KBUILD_DEFCONFIG := amiga_defconfig
+KBUILD_DEFCONFIG := multi_defconfig
 
 # override top level makefile
 AS += -m68020
-- 
cgit v1.2.3


From d33b4432e634246eef00ef4d425939c253f70dd6 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 17 Jul 2008 21:16:15 +0200
Subject: m68k: remove AP1000 code

Unless I miss something that's code for a sparc machine even the sparc
code no longer supports that got copied to m68k when these files were
copied.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/sun3/prom/console.c | 4 ----
 arch/m68k/sun3/prom/init.c    | 9 ---------
 arch/m68k/sun3/prom/printf.c  | 5 -----
 3 files changed, 18 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/sun3/prom/console.c b/arch/m68k/sun3/prom/console.c
index 5812560b70f..2bcb6e4bfe5 100644
--- a/arch/m68k/sun3/prom/console.c
+++ b/arch/m68k/sun3/prom/console.c
@@ -104,8 +104,6 @@ prom_query_input_device()
 				return PROMDEV_ITTYB;
 		}
 		return PROMDEV_I_UNK;
-	case PROM_AP1000:
-		return PROMDEV_I_UNK;
 	};
 }
 #endif
@@ -166,8 +164,6 @@ prom_query_output_device()
 			};
 		}
 		break;
-	case PROM_AP1000:
-		return PROMDEV_I_UNK;
 	};
 	return PROMDEV_O_UNK;
 }
diff --git a/arch/m68k/sun3/prom/init.c b/arch/m68k/sun3/prom/init.c
index 5f85681af5a..d8e6349336b 100644
--- a/arch/m68k/sun3/prom/init.c
+++ b/arch/m68k/sun3/prom/init.c
@@ -31,11 +31,6 @@ extern void prom_ranges_init(void);
 
 void __init prom_init(struct linux_romvec *rp)
 {
-#ifdef CONFIG_AP1000
-	extern struct linux_romvec *ap_prom_init(void);
-	rp = ap_prom_init();
-#endif
-
 	romvec = rp;
 #ifndef CONFIG_SUN3
 	switch(romvec->pv_romvers) {
@@ -53,10 +48,6 @@ void __init prom_init(struct linux_romvec *rp)
 		prom_printf("PROMLIB: Sun IEEE Prom not supported yet\n");
 		prom_halt();
 		break;
-	case 42: /* why not :-) */
-		prom_vers = PROM_AP1000;
-		break;
-
 	default:
 		prom_printf("PROMLIB: Bad PROM version %d\n",
 			    romvec->pv_romvers);
diff --git a/arch/m68k/sun3/prom/printf.c b/arch/m68k/sun3/prom/printf.c
index c8cf98d97f2..df85018f487 100644
--- a/arch/m68k/sun3/prom/printf.c
+++ b/arch/m68k/sun3/prom/printf.c
@@ -37,10 +37,6 @@ prom_printf(char *fmt, ...)
 
 	bptr = ppbuf;
 
-#ifdef CONFIG_AP1000
-        ap_write(1,bptr,strlen(bptr));
-#else
-
 #ifdef CONFIG_KGDB
 	if (kgdb_initialized) {
 		printk("kgdb_initialized = %d\n", kgdb_initialized);
@@ -53,7 +49,6 @@ prom_printf(char *fmt, ...)
 
 		prom_putchar(ch);
 	}
-#endif
 #endif
 	va_end(args);
 	return;
-- 
cgit v1.2.3


From 8468afc039f03837066132be14cdd9e5fa726f0b Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 17 Jul 2008 21:16:21 +0200
Subject: export amiga_vblank

This patch fixes the following build error:

<--  snip  -->

..
  Building modules, stage 2.
  MODPOST 1203 modules
ERROR: "amiga_vblank" [drivers/video/amifb.ko] undefined!
..
make[2]: *** [__modpost] Error 1

<--  snip  -->

Reported-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/amiga/config.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 50f5daab46b..dd0734e54bb 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -51,6 +51,8 @@ unsigned long amiga_chipset;
 EXPORT_SYMBOL(amiga_chipset);
 
 unsigned char amiga_vblank;
+EXPORT_SYMBOL(amiga_vblank);
+
 unsigned char amiga_psfreq;
 
 struct amiga_hw_present amiga_hw_present;
-- 
cgit v1.2.3


From 0795dbcc4c4c93a929463957993c04cf5fec346c Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 17 Jul 2008 21:16:23 +0200
Subject: m68k/amiga/: possible cleanups

This patch contains the following possible cleanups:
- amiints.c: add a proper prototype for amiga_init_IRQ() in
             include/asm-m68k/amigaints.h
- make the following needlessly global code static:
  - config.c: amiga_model
  - config.c: amiga_psfreq
  - config.c: amiga_serial_console_write()
- #if 0 the following unused functions:
  - config.c: amiga_serial_puts()
  - config.c: amiga_serial_console_wait_key()
  - config.c: amiga_serial_gets()
- remove the following unused variable:
  - config.c: amiga_masterclock

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/amiga/config.c | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index dd0734e54bb..df679d96b1c 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -36,14 +36,11 @@
 #include <asm/machdep.h>
 #include <asm/io.h>
 
-unsigned long amiga_model;
-EXPORT_SYMBOL(amiga_model);
+static unsigned long amiga_model;
 
 unsigned long amiga_eclock;
 EXPORT_SYMBOL(amiga_eclock);
 
-unsigned long amiga_masterclock;
-
 unsigned long amiga_colorclock;
 EXPORT_SYMBOL(amiga_colorclock);
 
@@ -53,7 +50,7 @@ EXPORT_SYMBOL(amiga_chipset);
 unsigned char amiga_vblank;
 EXPORT_SYMBOL(amiga_vblank);
 
-unsigned char amiga_psfreq;
+static unsigned char amiga_psfreq;
 
 struct amiga_hw_present amiga_hw_present;
 EXPORT_SYMBOL(amiga_hw_present);
@@ -94,8 +91,6 @@ static char *amiga_models[] __initdata = {
 static char amiga_model_name[13] = "Amiga ";
 
 static void amiga_sched_init(irq_handler_t handler);
-/* amiga specific irq functions */
-extern void amiga_init_IRQ(void);
 static void amiga_get_model(char *model);
 static int amiga_get_hardware_list(char *buffer);
 /* amiga specific timer functions */
@@ -109,8 +104,6 @@ static void amiga_reset(void);
 extern void amiga_init_sound(void);
 static void amiga_mem_console_write(struct console *co, const char *b,
 				    unsigned int count);
-void amiga_serial_console_write(struct console *co, const char *s,
-				unsigned int count);
 #ifdef CONFIG_HEARTBEAT
 static void amiga_heartbeat(int on);
 #endif
@@ -420,8 +413,7 @@ void __init config_amiga(void)
 	mach_heartbeat = amiga_heartbeat;
 #endif
 
-	/* Fill in the clock values (based on the 700 kHz E-Clock) */
-	amiga_masterclock = 40*amiga_eclock;	/* 28 MHz */
+	/* Fill in the clock value (based on the 700 kHz E-Clock) */
 	amiga_colorclock = 5*amiga_eclock;	/* 3.5 MHz */
 
 	/* clear all DMA bits */
@@ -819,8 +811,8 @@ static void amiga_serial_putc(char c)
 		;
 }
 
-void amiga_serial_console_write(struct console *co, const char *s,
-				unsigned int count)
+static void amiga_serial_console_write(struct console *co, const char *s,
+				       unsigned int count)
 {
 	while (count--) {
 		if (*s == '\n')
@@ -829,7 +821,7 @@ void amiga_serial_console_write(struct console *co, const char *s,
 	}
 }
 
-#ifdef CONFIG_SERIAL_CONSOLE
+#if 0
 void amiga_serial_puts(const char *s)
 {
 	amiga_serial_console_write(NULL, s, strlen(s));
-- 
cgit v1.2.3


From 5575d0a3c9676b2886adad67dd4b2ac126a49f1f Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 17 Jul 2008 21:16:24 +0200
Subject: m68k/atari/debug.c: possible cleanups

This patch contains the following possible cleanups:
- make the following needlessly global functions (always) static:
  - atari_mfp_console_write()
  - atari_scc_console_write()
  - atari_midi_console_write()
  - atari_init_mfp_port()
  - atari_init_scc_port()
  - atari_init_midi_port()
- #if 0 the following unused functions:
  - atari_mfp_console_wait_key()
  - atari_scc_console_wait_key()
  - atari_midi_console_wait_key()
- remove the following unused variables:
  - atari_MFP_init_done
  - atari_SCC_init_done

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/atari/debug.c | 37 +++++++------------------------------
 1 file changed, 7 insertions(+), 30 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/atari/debug.c b/arch/m68k/atari/debug.c
index 043ddbc61c7..702b15ccfab 100644
--- a/arch/m68k/atari/debug.c
+++ b/arch/m68k/atari/debug.c
@@ -20,14 +20,6 @@
 #include <asm/atarihw.h>
 #include <asm/atariints.h>
 
-/* Flag that Modem1 port is already initialized and used */
-int atari_MFP_init_done;
-EXPORT_SYMBOL(atari_MFP_init_done);
-
-/* Flag that Modem1 port is already initialized and used */
-int atari_SCC_init_done;
-EXPORT_SYMBOL(atari_SCC_init_done);
-
 /* Can be set somewhere, if a SCC master reset has already be done and should
  * not be repeated; used by kgdb */
 int atari_SCC_reset_done;
@@ -47,8 +39,8 @@ static inline void ata_mfp_out(char c)
 	mfp.usart_dta = c;
 }
 
-void atari_mfp_console_write(struct console *co, const char *str,
-			     unsigned int count)
+static void atari_mfp_console_write(struct console *co, const char *str,
+				    unsigned int count)
 {
 	while (count--) {
 		if (*str == '\n')
@@ -66,8 +58,8 @@ static inline void ata_scc_out(char c)
 	scc.cha_b_data = c;
 }
 
-void atari_scc_console_write(struct console *co, const char *str,
-			     unsigned int count)
+static void atari_scc_console_write(struct console *co, const char *str,
+				    unsigned int count)
 {
 	while (count--) {
 		if (*str == '\n')
@@ -83,8 +75,8 @@ static inline void ata_midi_out(char c)
 	acia.mid_data = c;
 }
 
-void atari_midi_console_write(struct console *co, const char *str,
-			      unsigned int count)
+static void atari_midi_console_write(struct console *co, const char *str,
+				     unsigned int count)
 {
 	while (count--) {
 		if (*str == '\n')
@@ -136,7 +128,7 @@ static void atari_par_console_write(struct console *co, const char *str,
 	}
 }
 
-#ifdef CONFIG_SERIAL_CONSOLE
+#if 0
 int atari_mfp_console_wait_key(struct console *co)
 {
 	while (!(mfp.rcv_stat & 0x80))	/* wait for rx buf filled */
@@ -166,11 +158,7 @@ int atari_midi_console_wait_key(struct console *co)
  * SCC serial ports. They're used by the debugging interface, kgdb, and the
  * serial console code.
  */
-#ifndef CONFIG_SERIAL_CONSOLE
 static void __init atari_init_mfp_port(int cflag)
-#else
-void atari_init_mfp_port(int cflag)
-#endif
 {
 	/*
 	 * timer values for 1200...115200 bps; > 38400 select 110, 134, or 150
@@ -193,8 +181,6 @@ void atari_init_mfp_port(int cflag)
 	mfp.tim_dt_d = baud_table[baud];
 	mfp.tim_ct_cd |= 0x01;		/* start timer D, 1:4 */
 	mfp.trn_stat |= 0x01;		/* enable TX */
-
-	atari_MFP_init_done = 1;
 }
 
 #define SCC_WRITE(reg, val)				\
@@ -214,11 +200,7 @@ void atari_init_mfp_port(int cflag)
 			MFPDELAY();			\
 	} while (0)
 
-#ifndef CONFIG_SERIAL_CONSOLE
 static void __init atari_init_scc_port(int cflag)
-#else
-void atari_init_scc_port(int cflag)
-#endif
 {
 	extern int atari_SCC_reset_done;
 	static int clksrc_table[9] =
@@ -277,14 +259,9 @@ void atari_init_scc_port(int cflag)
 	SCC_WRITE(5, reg5 | 8);
 
 	atari_SCC_reset_done = 1;
-	atari_SCC_init_done = 1;
 }
 
-#ifndef CONFIG_SERIAL_CONSOLE
 static void __init atari_init_midi_port(int cflag)
-#else
-void atari_init_midi_port(int cflag)
-#endif
 {
 	int baud = cflag & CBAUD;
 	int csize = ((cflag & CSIZE) == CS8) ? 0x10 : 0x00;
-- 
cgit v1.2.3


From 8dfbdf4abad6e5a7bbd097bf7e2c0ec41e0c54b4 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 17 Jul 2008 21:16:25 +0200
Subject: m68k/mac/: possible cleanups

This patch contains the following possible cleanups:
- make the following needlessly global code (always) static:
  - baboon.c: struct baboon
  - baboon.c: baboon_irq()
  - config.c: mac_orig_videoaddr
  - config.c: mac_identify()
  - config.c: mac_report_hardware()
  - config.c: mac_debug_console_write()
  - config.c: mac_sccb_console_write()
  - config.c: mac_scca_console_write()
  - config.c: mac_init_scc_port()
  - oss.c: oss_irq()
  - oss.c: oss_nubus_irq()
  - psc.c: psc_debug_dump()
  - psc.c: psc_dma_die_die_die()
  - via.c: rbv_clear
- remove the unused bootparse.c
- #if 0 the following unused functions:
  - config.c: mac_debugging_short()
  - config.c: mac_debugging_long()
- remove the following unused code:
  - config.c: mac_bisize
  - config.c: mac_env
  - config.c: mac_SCC_init_done
  - config.c: mac_SCC_reset_done
  - config.c: mac_init_scca_port()
  - config.c: mac_init_sccb_port()

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/mac/Makefile    |   2 +-
 arch/m68k/mac/baboon.c    |  26 +++++-----
 arch/m68k/mac/bootparse.c | 122 ----------------------------------------------
 arch/m68k/mac/config.c    |  11 ++---
 arch/m68k/mac/debug.c     |  41 ++++------------
 arch/m68k/mac/oss.c       |   8 +--
 arch/m68k/mac/psc.c       |   4 +-
 arch/m68k/mac/via.c       |   2 +-
 8 files changed, 35 insertions(+), 181 deletions(-)
 delete mode 100644 arch/m68k/mac/bootparse.c

(limited to 'arch')

diff --git a/arch/m68k/mac/Makefile b/arch/m68k/mac/Makefile
index 1d265ba365a..daebd80bdef 100644
--- a/arch/m68k/mac/Makefile
+++ b/arch/m68k/mac/Makefile
@@ -2,5 +2,5 @@
 # Makefile for Linux arch/m68k/mac source directory
 #
 
-obj-y		:= config.o bootparse.o macints.o iop.o via.o oss.o psc.o \
+obj-y		:= config.o macints.o iop.o via.o oss.o psc.o \
 			baboon.o macboing.o debug.o misc.o
diff --git a/arch/m68k/mac/baboon.c b/arch/m68k/mac/baboon.c
index 673a1085984..dae9c982aa8 100644
--- a/arch/m68k/mac/baboon.c
+++ b/arch/m68k/mac/baboon.c
@@ -23,9 +23,7 @@
 /* #define DEBUG_IRQS */
 
 int baboon_present;
-volatile struct baboon *baboon;
-
-irqreturn_t baboon_irq(int, void *);
+static volatile struct baboon *baboon;
 
 #if 0
 extern int macide_ack_intr(struct ata_channel *);
@@ -49,21 +47,11 @@ void __init baboon_init(void)
 	printk("Baboon detected at %p\n", baboon);
 }
 
-/*
- * Register the Baboon interrupt dispatcher on nubus slot $C.
- */
-
-void __init baboon_register_interrupts(void)
-{
-	request_irq(IRQ_NUBUS_C, baboon_irq, IRQ_FLG_LOCK|IRQ_FLG_FAST,
-		    "baboon", (void *) baboon);
-}
-
 /*
  * Baboon interrupt handler. This works a lot like a VIA.
  */
 
-irqreturn_t baboon_irq(int irq, void *dev_id)
+static irqreturn_t baboon_irq(int irq, void *dev_id)
 {
 	int irq_bit, irq_num;
 	unsigned char events;
@@ -95,6 +83,16 @@ irqreturn_t baboon_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+/*
+ * Register the Baboon interrupt dispatcher on nubus slot $C.
+ */
+
+void __init baboon_register_interrupts(void)
+{
+	request_irq(IRQ_NUBUS_C, baboon_irq, IRQ_FLG_LOCK|IRQ_FLG_FAST,
+		    "baboon", (void *) baboon);
+}
+
 void baboon_irq_enable(int irq) {
 #ifdef DEBUG_IRQUSE
 	printk("baboon_irq_enable(%d)\n", irq);
diff --git a/arch/m68k/mac/bootparse.c b/arch/m68k/mac/bootparse.c
deleted file mode 100644
index 36d22360982..00000000000
--- a/arch/m68k/mac/bootparse.c
+++ /dev/null
@@ -1,122 +0,0 @@
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <asm/irq.h>
-#include <asm/setup.h>
-#include <asm/bootinfo.h>
-#include <asm/macintosh.h>
-
-/*
- *	Booter vars
- */
-
-int boothowto;
-int _boothowto;
-
-/*
- *	Called early to parse the environment (passed to us from the booter)
- *	into a bootinfo struct. Will die as soon as we have our own booter
- */
-
-#define atol(x)	simple_strtoul(x,NULL,0)
-
-void parse_booter(char *env)
-{
-	char *name;
-	char *value;
-#if 0
-	while(0 && *env)
-#else
-	while(*env)
-#endif
-	{
-		name=env;
-		value=name;
-		while(*value!='='&&*value)
-			value++;
-		if(*value=='=')
-			*value++=0;
-		env=value;
-		while(*env)
-			env++;
-		env++;
-#if 0
-		if(strcmp(name,"VIDEO_ADDR")==0)
-			mac_mch.videoaddr=atol(value);
-		if(strcmp(name,"ROW_BYTES")==0)
-			mac_mch.videorow=atol(value);
-		if(strcmp(name,"SCREEN_DEPTH")==0)
-			mac_mch.videodepth=atol(value);
-		if(strcmp(name,"DIMENSIONS")==0)
-			mac_mch.dimensions=atol(value);
-#endif
-		if(strcmp(name,"BOOTTIME")==0)
-			mac_bi_data.boottime=atol(value);
-		if(strcmp(name,"GMTBIAS")==0)
-			mac_bi_data.gmtbias=atol(value);
-		if(strcmp(name,"BOOTERVER")==0)
-			mac_bi_data.bootver=atol(value);
-		if(strcmp(name,"MACOS_VIDEO")==0)
-			mac_bi_data.videological=atol(value);
-		if(strcmp(name,"MACOS_SCC")==0)
-			mac_bi_data.sccbase=atol(value);
-		if(strcmp(name,"MACHINEID")==0)
-			mac_bi_data.id=atol(value);
-		if(strcmp(name,"MEMSIZE")==0)
-			mac_bi_data.memsize=atol(value);
-		if(strcmp(name,"SERIAL_MODEM_FLAGS")==0)
-			mac_bi_data.serialmf=atol(value);
-		if(strcmp(name,"SERIAL_MODEM_HSKICLK")==0)
-			mac_bi_data.serialhsk=atol(value);
-		if(strcmp(name,"SERIAL_MODEM_GPICLK")==0)
-			mac_bi_data.serialgpi=atol(value);
-		if(strcmp(name,"SERIAL_PRINT_FLAGS")==0)
-			mac_bi_data.printmf=atol(value);
-		if(strcmp(name,"SERIAL_PRINT_HSKICLK")==0)
-			mac_bi_data.printhsk=atol(value);
-		if(strcmp(name,"SERIAL_PRINT_GPICLK")==0)
-			mac_bi_data.printgpi=atol(value);
-		if(strcmp(name,"PROCESSOR")==0)
-			mac_bi_data.cpuid=atol(value);
-		if(strcmp(name,"ROMBASE")==0)
-			mac_bi_data.rombase=atol(value);
-		if(strcmp(name,"TIMEDBRA")==0)
-			mac_bi_data.timedbra=atol(value);
-		if(strcmp(name,"ADBDELAY")==0)
-			mac_bi_data.adbdelay=atol(value);
-	}
-#if 0	/* XXX: TODO with m68k_mach_* */
-	/* Fill in the base stuff */
-	boot_info.machtype=MACH_MAC;
-	/* Read this from the macinfo we got ! */
-/*	boot_info.cputype=CPU_68020|FPUB_68881;*/
-/*	boot_info.memory[0].addr=0;*/
-/*	boot_info.memory[0].size=((mac_bi_data.id>>7)&31)<<20;*/
-	boot_info.num_memory=1;		/* On a MacII */
-	boot_info.ramdisk_size=0;	/* For now */
-	*boot_info.command_line=0;
-#endif
- }
-
-
-void print_booter(char *env)
-{
-	char *name;
-	char *value;
-	while(*env)
-	{
-		name=env;
-		value=name;
-		while(*value!='='&&*value)
-			value++;
-		if(*value=='=')
-			*value++=0;
-		env=value;
-		while(*env)
-			env++;
-		env++;
-		printk("%s=%s\n", name,value);
-	}
- }
-
-
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index ad3e3bacae3..c45e18449f3 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -46,7 +46,6 @@
 /* Mac bootinfo struct */
 
 struct mac_booter_data mac_bi_data;
-int mac_bisize = sizeof mac_bi_data;
 
 /* New m68k bootinfo stuff and videobase */
 
@@ -55,10 +54,8 @@ extern struct mem_info m68k_memory[NUM_MEMINFO];
 
 extern struct mem_info m68k_ramdisk;
 
-void *mac_env;					/* Loaded by the boot asm */
-
 /* The phys. video addr. - might be bogus on some machines */
-unsigned long mac_orig_videoaddr;
+static unsigned long mac_orig_videoaddr;
 
 /* Mac specific timer functions */
 extern unsigned long mac_gettimeoffset(void);
@@ -79,6 +76,8 @@ extern void mac_mksound(unsigned int, unsigned int);
 extern void nubus_sweep_video(void);
 
 static void mac_get_model(char *str);
+static void mac_identify(void);
+static void mac_report_hardware(void);
 
 static void __init mac_sched_init(irq_handler_t vector)
 {
@@ -765,7 +764,7 @@ static struct mac_model mac_data_table[] = {
 	}
 };
 
-void __init mac_identify(void)
+static void __init mac_identify(void)
 {
 	struct mac_model *m;
 
@@ -821,7 +820,7 @@ void __init mac_identify(void)
 	baboon_init();
 }
 
-void __init mac_report_hardware(void)
+static void __init mac_report_hardware(void)
 {
 	printk(KERN_INFO "Apple Macintosh %s\n", macintosh_config->name);
 }
diff --git a/arch/m68k/mac/debug.c b/arch/m68k/mac/debug.c
index e8a57138b4a..2165740786a 100644
--- a/arch/m68k/mac/debug.c
+++ b/arch/m68k/mac/debug.c
@@ -51,6 +51,8 @@ extern void mac_serial_print(const char *);
 static int peng, line;
 #endif
 
+#if 0
+
 void mac_debugging_short(int pos, short num)
 {
 #ifdef DEBUG_SCREEN
@@ -125,6 +127,8 @@ void mac_debugging_long(int pos, long addr)
 #endif
 }
 
+#endif  /*  0  */
+
 #ifdef DEBUG_SERIAL
 /*
  * TODO: serial debug code
@@ -142,12 +146,6 @@ struct mac_SCC {
 
 # define scc (*((volatile struct mac_SCC*)mac_bi_data.sccbase))
 
-/* Flag that serial port is already initialized and used */
-int mac_SCC_init_done;
-/* Can be set somewhere, if a SCC master reset has already be done and should
- * not be repeated; used by kgdb */
-int mac_SCC_reset_done;
-
 static int scc_port = -1;
 
 static struct console mac_console_driver = {
@@ -171,8 +169,8 @@ static struct console mac_console_driver = {
  * this driver if Mac.
  */
 
-void mac_debug_console_write(struct console *co, const char *str,
-			     unsigned int count)
+static void mac_debug_console_write(struct console *co, const char *str,
+				    unsigned int count)
 {
 	mac_serial_print(str);
 }
@@ -209,8 +207,8 @@ static inline void mac_scca_out(char c)
 	scc.cha_a_data = c;
 }
 
-void mac_sccb_console_write(struct console *co, const char *str,
-			    unsigned int count)
+static void mac_sccb_console_write(struct console *co, const char *str,
+				   unsigned int count)
 {
 	while (count--) {
 		if (*str == '\n')
@@ -219,8 +217,8 @@ void mac_sccb_console_write(struct console *co, const char *str,
 	}
 }
 
-void mac_scca_console_write(struct console *co, const char *str,
-			    unsigned int count)
+static void mac_scca_console_write(struct console *co, const char *str,
+				   unsigned int count)
 {
 	while (count--) {
 		if (*str == '\n')
@@ -265,14 +263,8 @@ void mac_scca_console_write(struct console *co, const char *str,
 		    barrier();				\
 	} while(0)
 
-#ifndef CONFIG_SERIAL_CONSOLE
 static void __init mac_init_scc_port(int cflag, int port)
-#else
-void mac_init_scc_port(int cflag, int port)
-#endif
 {
-	extern int mac_SCC_reset_done;
-
 	/*
 	 * baud rates: 1200, 1800, 2400, 4800, 9600, 19.2k, 38.4k, 57.6k, 115.2k
 	 */
@@ -340,22 +332,9 @@ void mac_init_scc_port(int cflag, int port)
 		SCCA_WRITE(3, reg3 | 1);
 		SCCA_WRITE(5, reg5 | 8);
 	}
-
-	mac_SCC_reset_done = 1;
-	mac_SCC_init_done = 1;
 }
 #endif /* DEBUG_SERIAL */
 
-void mac_init_scca_port(int cflag)
-{
-	mac_init_scc_port(cflag, 0);
-}
-
-void mac_init_sccb_port(int cflag)
-{
-	mac_init_scc_port(cflag, 1);
-}
-
 static int __init mac_debug_setup(char *arg)
 {
 	if (!MACH_IS_MAC)
diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c
index 3c943d2ec57..43d83e054b8 100644
--- a/arch/m68k/mac/oss.c
+++ b/arch/m68k/mac/oss.c
@@ -30,8 +30,8 @@
 int oss_present;
 volatile struct mac_oss *oss;
 
-irqreturn_t oss_irq(int, void *);
-irqreturn_t oss_nubus_irq(int, void *);
+static irqreturn_t oss_irq(int, void *);
+static irqreturn_t oss_nubus_irq(int, void *);
 
 extern irqreturn_t via1_irq(int, void *);
 extern irqreturn_t mac_scc_dispatch(int, void *);
@@ -92,7 +92,7 @@ void __init oss_nubus_init(void)
  * and SCSI; everything else is routed to its own autovector IRQ.
  */
 
-irqreturn_t oss_irq(int irq, void *dev_id)
+static irqreturn_t oss_irq(int irq, void *dev_id)
 {
 	int events;
 
@@ -126,7 +126,7 @@ irqreturn_t oss_irq(int irq, void *dev_id)
  * Unlike the VIA/RBV this is on its own autovector interrupt level.
  */
 
-irqreturn_t oss_nubus_irq(int irq, void *dev_id)
+static irqreturn_t oss_nubus_irq(int irq, void *dev_id)
 {
 	int events, irq_bit, i;
 
diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c
index d66f723b17c..f84a4dd64f9 100644
--- a/arch/m68k/mac/psc.c
+++ b/arch/m68k/mac/psc.c
@@ -36,7 +36,7 @@ irqreturn_t psc_irq(int, void *);
  * Debugging dump, used in various places to see what's going on.
  */
 
-void psc_debug_dump(void)
+static void psc_debug_dump(void)
 {
 	int	i;
 
@@ -55,7 +55,7 @@ void psc_debug_dump(void)
  * expanded to cover what I think are the other 7 channels.
  */
 
-void psc_dma_die_die_die(void)
+static void psc_dma_die_die_die(void)
 {
 	int i;
 
diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c
index fa485df4160..f3b27d04a31 100644
--- a/arch/m68k/mac/via.c
+++ b/arch/m68k/mac/via.c
@@ -45,7 +45,7 @@ volatile long *via_memory_bogon=(long *)&via_memory_bogon;
 int rbv_present;
 int via_alt_mapping;
 EXPORT_SYMBOL(via_alt_mapping);
-__u8 rbv_clear;
+static __u8 rbv_clear;
 
 /*
  * Globals for accessing the VIA chip registers without having to
-- 
cgit v1.2.3


From 22deb527ce5d13e07652f81a53032aa0214ea8c3 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 17 Jul 2008 21:16:26 +0200
Subject: m68k/q40/config.c: make functions static

This patch makes the following needlessly global functions static:
- q40_reset()
- q40_halt()
- q40_disable_irqs()
- q40_gettimeoffset()
- q40_hwclk()
- q40_get_ss()
- q40_set_clock_mmss()

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/q40/config.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index 476e18eca75..be9de2f3dc4 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -41,14 +41,12 @@ static void q40_get_model(char *model);
 static int  q40_get_hardware_list(char *buffer);
 extern void q40_sched_init(irq_handler_t handler);
 
-extern unsigned long q40_gettimeoffset(void);
-extern int q40_hwclk(int, struct rtc_time *);
-extern unsigned int q40_get_ss(void);
-extern int q40_set_clock_mmss(unsigned long);
+static unsigned long q40_gettimeoffset(void);
+static int q40_hwclk(int, struct rtc_time *);
+static unsigned int q40_get_ss(void);
+static int q40_set_clock_mmss(unsigned long);
 static int q40_get_rtc_pll(struct rtc_pll_info *pll);
 static int q40_set_rtc_pll(struct rtc_pll_info *pll);
-extern void q40_reset(void);
-void q40_halt(void);
 extern void q40_waitbut(void);
 void q40_set_vectors(void);
 
@@ -127,7 +125,7 @@ static void q40_heartbeat(int on)
 }
 #endif
 
-void q40_reset(void)
+static void q40_reset(void)
 {
         halted = 1;
         printk("\n\n*******************************************\n"
@@ -137,7 +135,8 @@ void q40_reset(void)
 	while (1)
 		;
 }
-void q40_halt(void)
+
+static void q40_halt(void)
 {
         halted = 1;
         printk("\n\n*******************\n"
@@ -165,7 +164,8 @@ static unsigned int serports[] =
 {
 	0x3f8,0x2f8,0x3e8,0x2e8,0
 };
-void q40_disable_irqs(void)
+
+static void q40_disable_irqs(void)
 {
 	unsigned i, j;
 
@@ -227,7 +227,7 @@ static inline unsigned char bin2bcd(unsigned char b)
 }
 
 
-unsigned long q40_gettimeoffset(void)
+static unsigned long q40_gettimeoffset(void)
 {
 	return 5000 * (ql_ticks != 0);
 }
@@ -248,7 +248,7 @@ unsigned long q40_gettimeoffset(void)
  * };
  */
 
-int q40_hwclk(int op, struct rtc_time *t)
+static int q40_hwclk(int op, struct rtc_time *t)
 {
 	if (op) {
 		/* Write.... */
@@ -285,7 +285,7 @@ int q40_hwclk(int op, struct rtc_time *t)
 	return 0;
 }
 
-unsigned int q40_get_ss(void)
+static unsigned int q40_get_ss(void)
 {
 	return bcd2bin(Q40_RTC_SECS);
 }
@@ -295,7 +295,7 @@ unsigned int q40_get_ss(void)
  * clock is out by > 30 minutes.  Logic lifted from atari code.
  */
 
-int q40_set_clock_mmss(unsigned long nowtime)
+static int q40_set_clock_mmss(unsigned long nowtime)
 {
 	int retval = 0;
 	short real_seconds = nowtime % 60, real_minutes = (nowtime / 60) % 60;
-- 
cgit v1.2.3


From 07b8125949de66b6552966de8d4280c3a8620359 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 17 Jul 2008 21:16:27 +0200
Subject: m68k/sun3/: possible cleanups

This patch contains the following possible cleanups:
- make the following needlessly global code static:
  - config.c: sun3_bootmem_alloc()
  - config.c: sun3_sched_init()
  - dvma.c: dvma_page()
  - idprom.c: struct Sun_Machines[]
  - mmu_emu.c: struct ctx_alloc[]
  - sun3dvma.c: iommu_use[]
  - sun3ints.c: led_pattern[]
- remove the unused sbus.c

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/sun3/Makefile   |  2 +-
 arch/m68k/sun3/config.c   |  7 ++++---
 arch/m68k/sun3/dvma.c     |  2 +-
 arch/m68k/sun3/idprom.c   |  2 +-
 arch/m68k/sun3/mmu_emu.c  |  2 +-
 arch/m68k/sun3/sbus.c     | 27 ---------------------------
 arch/m68k/sun3/sun3dvma.c |  2 +-
 arch/m68k/sun3/sun3ints.c |  2 +-
 8 files changed, 10 insertions(+), 36 deletions(-)
 delete mode 100644 arch/m68k/sun3/sbus.c

(limited to 'arch')

diff --git a/arch/m68k/sun3/Makefile b/arch/m68k/sun3/Makefile
index be1a8470d63..38ba0e0ceda 100644
--- a/arch/m68k/sun3/Makefile
+++ b/arch/m68k/sun3/Makefile
@@ -2,6 +2,6 @@
 # Makefile for Linux arch/m68k/sun3 source directory
 #
 
-obj-y	:= sun3ints.o sun3dvma.o sbus.o idprom.o
+obj-y	:= sun3ints.o sun3dvma.o idprom.o
 
 obj-$(CONFIG_SUN3) += config.o mmu_emu.o leds.o dvma.o intersil.o
diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c
index c0fbd278fbb..732087d0735 100644
--- a/arch/m68k/sun3/config.c
+++ b/arch/m68k/sun3/config.c
@@ -36,7 +36,7 @@ extern char _text, _end;
 char sun3_reserved_pmeg[SUN3_PMEGS_NUM];
 
 extern unsigned long sun3_gettimeoffset(void);
-extern void sun3_sched_init(irq_handler_t handler);
+static void sun3_sched_init(irq_handler_t handler);
 extern void sun3_get_model (char* model);
 extern void idprom_init (void);
 extern int sun3_hwclk(int set, struct rtc_time *t);
@@ -114,7 +114,8 @@ static void sun3_halt (void)
 
 /* sun3 bootmem allocation */
 
-void __init sun3_bootmem_alloc(unsigned long memory_start, unsigned long memory_end)
+static void __init sun3_bootmem_alloc(unsigned long memory_start,
+				      unsigned long memory_end)
 {
 	unsigned long start_page;
 
@@ -164,7 +165,7 @@ void __init config_sun3(void)
 	sun3_bootmem_alloc(memory_start, memory_end);
 }
 
-void __init sun3_sched_init(irq_handler_t timer_routine)
+static void __init sun3_sched_init(irq_handler_t timer_routine)
 {
 	sun3_disable_interrupts();
         intersil_clock->cmd_reg=(INTERSIL_RUN|INTERSIL_INT_DISABLE|INTERSIL_24H_MODE);
diff --git a/arch/m68k/sun3/dvma.c b/arch/m68k/sun3/dvma.c
index d2b3093f240..d522eaab455 100644
--- a/arch/m68k/sun3/dvma.c
+++ b/arch/m68k/sun3/dvma.c
@@ -19,7 +19,7 @@
 
 static unsigned long ptelist[120];
 
-inline unsigned long dvma_page(unsigned long kaddr, unsigned long vaddr)
+static unsigned long dvma_page(unsigned long kaddr, unsigned long vaddr)
 {
 	unsigned long pte;
 	unsigned long j;
diff --git a/arch/m68k/sun3/idprom.c b/arch/m68k/sun3/idprom.c
index 6c5336d62fa..c86ac37d198 100644
--- a/arch/m68k/sun3/idprom.c
+++ b/arch/m68k/sun3/idprom.c
@@ -25,7 +25,7 @@ static struct idprom idprom_buffer;
  * of the Sparc CPU and have a meaningful IDPROM machtype value that we
  * know about.  See asm-sparc/machines.h for empirical constants.
  */
-struct Sun_Machine_Models Sun_Machines[NUM_SUN_MACHINES] = {
+static struct Sun_Machine_Models Sun_Machines[NUM_SUN_MACHINES] = {
 /* First, Sun3's */
     { .name = "Sun 3/160 Series",	.id_machtype = (SM_SUN3 | SM_3_160) },
     { .name = "Sun 3/50",		.id_machtype = (SM_SUN3 | SM_3_50) },
diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c
index fb0f6a20cc3..60f9d4500d7 100644
--- a/arch/m68k/sun3/mmu_emu.c
+++ b/arch/m68k/sun3/mmu_emu.c
@@ -55,7 +55,7 @@ unsigned char pmeg_ctx[PMEGS_NUM];
 
 /* pointers to the mm structs for each task in each
    context. 0xffffffff is a marker for kernel context */
-struct mm_struct *ctx_alloc[CONTEXTS_NUM] = {
+static struct mm_struct *ctx_alloc[CONTEXTS_NUM] = {
     [0] = (struct mm_struct *)0xffffffff
 };
 
diff --git a/arch/m68k/sun3/sbus.c b/arch/m68k/sun3/sbus.c
deleted file mode 100644
index babdbfa3cda..00000000000
--- a/arch/m68k/sun3/sbus.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * SBus helper functions
- *
- * Sun3 don't have a sbus, but many of the used devices are also
- * used on Sparc machines with sbus. To avoid having a lot of
- * duplicate code, we provide necessary glue stuff to make using
- * of the sbus driver code possible.
- *
- * (C) 1999 Thomas Bogendoerfer (tsbogend@alpha.franken.de)
- */
-
-#include <linux/types.h>
-#include <linux/compiler.h>
-#include <linux/init.h>
-
-int __init sbus_init(void)
-{
-	return 0;
-}
-
-void *sparc_alloc_io (u32 address, void *virtual, int len, char *name,
-                      u32 bus_type, int rdonly)
-{
-	return (void *)address;
-}
-
-subsys_initcall(sbus_init);
diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c
index 8709677fa02..f9277e8b415 100644
--- a/arch/m68k/sun3/sun3dvma.c
+++ b/arch/m68k/sun3/sun3dvma.c
@@ -29,7 +29,7 @@ static inline void dvma_unmap_iommu(unsigned long a, int b)
 extern void sun3_dvma_init(void);
 #endif
 
-unsigned long iommu_use[IOMMU_TOTAL_ENTRIES];
+static unsigned long iommu_use[IOMMU_TOTAL_ENTRIES];
 
 #define dvma_index(baddr) ((baddr - DVMA_START) >> DVMA_PAGE_SHIFT)
 
diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c
index cf93481adb1..7364cd67455 100644
--- a/arch/m68k/sun3/sun3ints.c
+++ b/arch/m68k/sun3/sun3ints.c
@@ -30,7 +30,7 @@ void sun3_enable_interrupts(void)
 	sun3_enable_irq(0);
 }
 
-int led_pattern[8] = {
+static int led_pattern[8] = {
        ~(0x80), ~(0x01),
        ~(0x40), ~(0x02),
        ~(0x20), ~(0x04),
-- 
cgit v1.2.3


From 635c0a217425f6f37422b85bcc88a7af9efc457c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 17 Jul 2008 21:16:29 +0200
Subject: m68k/apollo: Add missing call to apollo_parse_bootinfo()

Add the missing call to apollo_parse_bootinfo(), which had been lost from a
big Apollo support patch by Peter De Schrijver in 1999.

Thanks to Adrian Bunk for noticing!

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/kernel/setup.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/m68k/kernel/setup.c b/arch/m68k/kernel/setup.c
index b1f39e4cedb..ea1e44da19b 100644
--- a/arch/m68k/kernel/setup.c
+++ b/arch/m68k/kernel/setup.c
@@ -122,6 +122,7 @@ extern int bvme6000_parse_bootinfo(const struct bi_record *);
 extern int mvme16x_parse_bootinfo(const struct bi_record *);
 extern int mvme147_parse_bootinfo(const struct bi_record *);
 extern int hp300_parse_bootinfo(const struct bi_record *);
+extern int apollo_parse_bootinfo(const struct bi_record *);
 
 extern void config_amiga(void);
 extern void config_atari(void);
@@ -189,6 +190,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record)
 				unknown = mvme147_parse_bootinfo(record);
 			else if (MACH_IS_HP300)
 				unknown = hp300_parse_bootinfo(record);
+			else if (MACH_IS_APOLLO)
+				unknown = apollo_parse_bootinfo(record);
 			else
 				unknown = 1;
 		}
-- 
cgit v1.2.3


From 62bc654e794feb5242c31a59dcc36bab64f7d917 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 17 Jul 2008 21:16:30 +0200
Subject: m68k/Mac: remove the unused ADB_KEYBOARD option

When the driver was removed back in 2002 the option was forgotten.

Reported-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/Kconfig | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 55ea52fe6ac..8012ff7d751 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -578,18 +578,6 @@ config MAC_HID
 	depends on INPUT_ADBHID
 	default y
 
-config ADB_KEYBOARD
-	bool "Support for ADB keyboard (old driver)"
-	depends on MAC && !INPUT_ADBHID
-	help
-	  This option allows you to use an ADB keyboard attached to your
-	  machine. Note that this disables any other (ie. PS/2) keyboard
-	  support, even if your machine is physically capable of using both at
-	  the same time.
-
-	  If you use an ADB keyboard (4 pin connector), say Y here.
-	  If you use a PS/2 keyboard (6 pin connector), say N here.
-
 config HPDCA
 	tristate "HP DCA serial support"
 	depends on DIO && SERIAL_8250
-- 
cgit v1.2.3


From 7ccaee5cadd7a771773bbb878e139697511ebdde Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 17 Jul 2008 21:16:31 +0200
Subject: m68k/Atari: remove the dead ATARI_SCC{,_DMA} options

It seems the driver was removed back in kernel 2.3 but the options were
forgotten.

Reported-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/Kconfig | 24 +-----------------------
 1 file changed, 1 insertion(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 8012ff7d751..8c5e1de68fc 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -490,28 +490,6 @@ config ATARI_MFPSER
 	  Note for Falcon users: You also have an MFP port, it's just not
 	  wired to the outside... But you could use the port under Linux.
 
-config ATARI_SCC
-	tristate "Atari SCC serial support"
-	depends on ATARI
-	---help---
-	  If you have serial ports based on a Zilog SCC chip (Modem2, Serial2,
-	  LAN) and like to use them under Linux, say Y. All built-in SCC's are
-	  supported (TT, MegaSTE, Falcon), and also the ST-ESCC. If you have
-	  two connectors for channel A (Serial2 and LAN), they are visible as
-	  two separate devices.
-
-	  To compile this driver as a module, choose M here.
-
-config ATARI_SCC_DMA
-	bool "Atari SCC serial DMA support"
-	depends on ATARI_SCC
-	help
-	  This enables DMA support for receiving data on channel A of the SCC.
-	  If you have a TT you may say Y here and read
-	  drivers/char/atari_SCC.README. All other users should say N here,
-	  because only the TT has SCC-DMA, even if your machine keeps claiming
-	  so at boot time.
-
 config ATARI_MIDI
 	tristate "Atari MIDI serial support"
 	depends on ATARI
@@ -628,7 +606,7 @@ config DN_SERIAL
 
 config SERIAL_CONSOLE
 	bool "Support for serial port console"
-	depends on (AMIGA || ATARI || MAC || SUN3 || SUN3X || VME || APOLLO) && (ATARI_MFPSER=y || ATARI_SCC=y || ATARI_MIDI=y || MAC_SCC=y || AMIGA_BUILTIN_SERIAL=y || GVPIOEXT=y || MULTIFACE_III_TTY=y || SERIAL=y || MVME147_SCC || SERIAL167 || MVME162_SCC || BVME6000_SCC || DN_SERIAL)
+	depends on (AMIGA || ATARI || MAC || SUN3 || SUN3X || VME || APOLLO) && (ATARI_MFPSER=y || ATARI_MIDI=y || MAC_SCC=y || AMIGA_BUILTIN_SERIAL=y || GVPIOEXT=y || MULTIFACE_III_TTY=y || SERIAL=y || MVME147_SCC || SERIAL167 || MVME162_SCC || BVME6000_SCC || DN_SERIAL)
 	---help---
 	  If you say Y here, it will be possible to use a serial port as the
 	  system console (the system console is the device which receives all
-- 
cgit v1.2.3


From 7b6b948fc0d60a704c15b1cd72345a98e759dd62 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Thu, 17 Jul 2008 21:16:33 +0200
Subject: arch/m68k/mm/motorola.c: Eliminate NULL test and memset after
 alloc_bootmem

As noted by Akinobu Mita in patch b1fceac2b9e04d278316b2faddf276015fc06e3b,
alloc_bootmem and related functions never return NULL and always return a
zeroed region of memory.  Thus a NULL test or memset after calls to these
functions is unnecessary.

This was fixed using the following semantic patch.
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@
expression E;
statement S;
@@

E = \(alloc_bootmem\|alloc_bootmem_low\|alloc_bootmem_pages\|alloc_bootmem_low_pages\)(...)
.. when != E
(
- BUG_ON (E == NULL);
|
- if (E == NULL) S
)

@@
expression E,E1;
@@

E = \(alloc_bootmem\|alloc_bootmem_low\|alloc_bootmem_pages\|alloc_bootmem_low_pages\)(...)
.. when != E
- memset(E,0,E1);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/mm/motorola.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 30d34f28502..226795bdf35 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -285,7 +285,6 @@ void __init paging_init(void)
 	 * to a couple of allocated pages
 	 */
 	empty_zero_page = alloc_bootmem_pages(PAGE_SIZE);
-	memset(empty_zero_page, 0, PAGE_SIZE);
 
 	/*
 	 * Set up SFC/DFC registers
-- 
cgit v1.2.3


From 93026e217b46b70f9719caf69e716fa3bbe1d20c Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Thu, 17 Jul 2008 21:16:34 +0200
Subject: arch/m68k/mm/sun3mmu.c: Eliminate NULL test and memset after
 alloc_bootmem

As noted by Akinobu Mita in patch b1fceac2b9e04d278316b2faddf276015fc06e3b,
alloc_bootmem and related functions never return NULL and always return a
zeroed region of memory.  Thus a NULL test or memset after calls to these
functions is unnecessary.

This was fixed using the following semantic patch.
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@
expression E;
statement S;
@@

E = \(alloc_bootmem\|alloc_bootmem_low\|alloc_bootmem_pages\|alloc_bootmem_low_pages\)(...)
.. when != E
(
- BUG_ON (E == NULL);
|
- if (E == NULL) S
)

@@
expression E,E1;
@@

E = \(alloc_bootmem\|alloc_bootmem_low\|alloc_bootmem_pages\|alloc_bootmem_low_pages\)(...)
.. when != E
- memset(E,0,E1);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/mm/sun3mmu.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c
index 6a6513aa1ce..edceefc1887 100644
--- a/arch/m68k/mm/sun3mmu.c
+++ b/arch/m68k/mm/sun3mmu.c
@@ -53,7 +53,6 @@ void __init paging_init(void)
 	wp_works_ok = 0;
 #endif
 	empty_zero_page = alloc_bootmem_pages(PAGE_SIZE);
-	memset(empty_zero_page, 0, PAGE_SIZE);
 
 	address = PAGE_OFFSET;
 	pg_dir = swapper_pg_dir;
-- 
cgit v1.2.3


From 9175fc06aee79c349790672178d3fd7507d75c86 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Mon, 21 Jul 2008 01:38:14 -0700
Subject: x86: use setup_clear_cpu_cap() when disabling the lapic

... so don't need to call clear_cpu_cap again in early_identify_cpu,
and could use cleared_cpu_caps like other places.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic_32.c       | 5 +----
 arch/x86/kernel/apic_64.c       | 2 +-
 arch/x86/kernel/cpu/common_64.c | 4 ----
 3 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index a437d027f20..e9a00e5074b 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1214,9 +1214,6 @@ int apic_version[MAX_APICS];
 
 int __init APIC_init_uniprocessor(void)
 {
-	if (disable_apic)
-		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
-
 	if (!smp_found_config && !cpu_has_apic)
 		return -1;
 
@@ -1700,7 +1697,7 @@ early_param("lapic", parse_lapic);
 static int __init parse_nolapic(char *arg)
 {
 	disable_apic = 1;
-	clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+	setup_clear_cpu_cap(X86_FEATURE_APIC);
 	return 0;
 }
 early_param("nolapic", parse_nolapic);
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 1e3d32e27c1..16e586cacbd 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -1337,7 +1337,7 @@ early_param("apic", apic_set_verbosity);
 static __init int setup_disableapic(char *str)
 {
 	disable_apic = 1;
-	clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+	setup_clear_cpu_cap(X86_FEATURE_APIC);
 	return 0;
 }
 early_param("disableapic", setup_disableapic);
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 7b8cc72feb4..0485cf64452 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -324,10 +324,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 		cpu_devs[c->x86_vendor]->c_early_init(c);
 
 	validate_pat_support(c);
-
-	/* early_param could clear that, but recall get it set again */
-	if (disable_apic)
-		clear_cpu_cap(c, X86_FEATURE_APIC);
 }
 
 /*
-- 
cgit v1.2.3


From 7edf8891ad7aef5f4e97991fed6fb0e605e96ea0 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Mon, 21 Jul 2008 01:39:03 -0700
Subject: x86: remove extra calling to get ext cpuid level

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common_64.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 0485cf64452..daee611f014 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -305,7 +305,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 			c->x86_capability[2] = cpuid_edx(0x80860001);
 	}
 
-	c->extended_cpuid_level = cpuid_eax(0x80000000);
 	if (c->extended_cpuid_level >= 0x80000007)
 		c->x86_power = cpuid_edx(0x80000007);
 
-- 
cgit v1.2.3


From cfc1b9a6a683c835a20d5b565ade55baf639f72f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 21 Jul 2008 21:35:38 +0200
Subject: x86: convert Dprintk to pr_debug

There are a couple of places where (P)Dprintk is used which is an old
compile time enabled printk wrapper. Convert it to the generic
pr_debug().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/acpi/boot.c            |  6 ++--
 arch/x86/kernel/cpu/perfctr-watchdog.c |  4 +--
 arch/x86/kernel/setup_percpu.c         |  6 ++--
 arch/x86/kernel/smpboot.c              | 52 +++++++++++++++++-----------------
 arch/x86/mm/numa_64.c                  |  4 ---
 arch/x86/pci/early.c                   | 16 +++++------
 6 files changed, 41 insertions(+), 47 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f489d7a9be9..fa88a1d7129 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1021,7 +1021,7 @@ void __init mp_config_acpi_legacy_irqs(void)
 	mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
 #endif
 	set_bit(MP_ISA_BUS, mp_bus_not_pci);
-	Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+	pr_debug("Bus #%d is ISA\n", MP_ISA_BUS);
 
 #ifdef CONFIG_X86_ES7000
 	/*
@@ -1127,8 +1127,8 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 		return gsi;
 	}
 	if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
-		Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
-			mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+		pr_debug(KERN_DEBUG "Pin %d-%d already programmed\n",
+			 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
 #ifdef CONFIG_X86_32
 		return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
 #else
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 6d4bdc02388..de7439f82b9 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -250,7 +250,7 @@ static void write_watchdog_counter(unsigned int perfctr_msr,
 
 	do_div(count, nmi_hz);
 	if(descr)
-		Dprintk("setting %s to -0x%08Lx\n", descr, count);
+		pr_debug("setting %s to -0x%08Lx\n", descr, count);
 	wrmsrl(perfctr_msr, 0 - count);
 }
 
@@ -261,7 +261,7 @@ static void write_watchdog_counter32(unsigned int perfctr_msr,
 
 	do_div(count, nmi_hz);
 	if(descr)
-		Dprintk("setting %s to -0x%08Lx\n", descr, count);
+		pr_debug("setting %s to -0x%08Lx\n", descr, count);
 	wrmsr(perfctr_msr, (u32)(-count), 0);
 }
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index cac68430d31..f7745f94c00 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -227,8 +227,8 @@ static void __init setup_node_to_cpumask_map(void)
 	/* allocate the map */
 	map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
 
-	Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n",
-		map, nr_node_ids);
+	pr_debug(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n",
+		 map, nr_node_ids);
 
 	/* node_to_cpumask() will now work */
 	node_to_cpumask_map = map;
@@ -248,7 +248,7 @@ void __cpuinit numa_set_node(int cpu, int node)
 		per_cpu(x86_cpu_to_node_map, cpu) = node;
 
 	else
-		Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
+		pr_debug("Setting node for non-present cpu %d\n", cpu);
 }
 
 void __cpuinit numa_clear_node(int cpu)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 27640196eb7..4b53a647bc0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -216,7 +216,7 @@ static void __cpuinit smp_callin(void)
 		panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
 					phys_id, cpuid);
 	}
-	Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+	pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
 
 	/*
 	 * STARTUP IPIs are fragile beasts as they might sometimes
@@ -251,7 +251,7 @@ static void __cpuinit smp_callin(void)
 	 * boards)
 	 */
 
-	Dprintk("CALLIN, before setup_local_APIC().\n");
+	pr_debug("CALLIN, before setup_local_APIC().\n");
 	smp_callin_clear_local_apic();
 	setup_local_APIC();
 	end_local_APIC_setup();
@@ -266,7 +266,7 @@ static void __cpuinit smp_callin(void)
 	local_irq_enable();
 	calibrate_delay();
 	local_irq_disable();
-	Dprintk("Stack at about %p\n", &cpuid);
+	pr_debug("Stack at about %p\n", &cpuid);
 
 	/*
 	 * Save our processor parameters
@@ -513,7 +513,7 @@ static void impress_friends(void)
 	/*
 	 * Allow the user to impress friends.
 	 */
-	Dprintk("Before bogomips.\n");
+	pr_debug("Before bogomips.\n");
 	for_each_possible_cpu(cpu)
 		if (cpu_isset(cpu, cpu_callout_map))
 			bogosum += cpu_data(cpu).loops_per_jiffy;
@@ -523,7 +523,7 @@ static void impress_friends(void)
 		bogosum/(500000/HZ),
 		(bogosum/(5000/HZ))%100);
 
-	Dprintk("Before bogocount - setting activated=1.\n");
+	pr_debug("Before bogocount - setting activated=1.\n");
 }
 
 static inline void __inquire_remote_apic(int apicid)
@@ -585,7 +585,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
 	/* Kick the second */
 	apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
 
-	Dprintk("Waiting for send to finish...\n");
+	pr_debug("Waiting for send to finish...\n");
 	send_status = safe_apic_wait_icr_idle();
 
 	/*
@@ -596,7 +596,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
 	if (maxlvt > 3)			/* Due to the Pentium erratum 3AP.  */
 		apic_write(APIC_ESR, 0);
 	accept_status = (apic_read(APIC_ESR) & 0xEF);
-	Dprintk("NMI sent.\n");
+	pr_debug("NMI sent.\n");
 
 	if (send_status)
 		printk(KERN_ERR "APIC never delivered???\n");
@@ -631,7 +631,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 		apic_read(APIC_ESR);
 	}
 
-	Dprintk("Asserting INIT.\n");
+	pr_debug("Asserting INIT.\n");
 
 	/*
 	 * Turn INIT on target chip
@@ -644,12 +644,12 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 	apic_write(APIC_ICR,
 		   APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
 
-	Dprintk("Waiting for send to finish...\n");
+	pr_debug("Waiting for send to finish...\n");
 	send_status = safe_apic_wait_icr_idle();
 
 	mdelay(10);
 
-	Dprintk("Deasserting INIT.\n");
+	pr_debug("Deasserting INIT.\n");
 
 	/* Target chip */
 	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
@@ -657,7 +657,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 	/* Send IPI */
 	apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
 
-	Dprintk("Waiting for send to finish...\n");
+	pr_debug("Waiting for send to finish...\n");
 	send_status = safe_apic_wait_icr_idle();
 
 	mb();
@@ -684,14 +684,14 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 	/*
 	 * Run STARTUP IPI loop.
 	 */
-	Dprintk("#startup loops: %d.\n", num_starts);
+	pr_debug("#startup loops: %d.\n", num_starts);
 
 	for (j = 1; j <= num_starts; j++) {
-		Dprintk("Sending STARTUP #%d.\n", j);
+		pr_debug("Sending STARTUP #%d.\n", j);
 		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */
 			apic_write(APIC_ESR, 0);
 		apic_read(APIC_ESR);
-		Dprintk("After apic_write.\n");
+		pr_debug("After apic_write.\n");
 
 		/*
 		 * STARTUP IPI
@@ -709,9 +709,9 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 		 */
 		udelay(300);
 
-		Dprintk("Startup point 1.\n");
+		pr_debug("Startup point 1.\n");
 
-		Dprintk("Waiting for send to finish...\n");
+		pr_debug("Waiting for send to finish...\n");
 		send_status = safe_apic_wait_icr_idle();
 
 		/*
@@ -724,7 +724,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 		if (send_status || accept_status)
 			break;
 	}
-	Dprintk("After Startup.\n");
+	pr_debug("After Startup.\n");
 
 	if (send_status)
 		printk(KERN_ERR "APIC never delivered???\n");
@@ -875,7 +875,7 @@ do_rest:
 
 	if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
 
-		Dprintk("Setting warm reset code and vector.\n");
+		pr_debug("Setting warm reset code and vector.\n");
 
 		store_NMI_vector(&nmi_high, &nmi_low);
 
@@ -896,9 +896,9 @@ do_rest:
 		/*
 		 * allow APs to start initializing.
 		 */
-		Dprintk("Before Callout %d.\n", cpu);
+		pr_debug("Before Callout %d.\n", cpu);
 		cpu_set(cpu, cpu_callout_map);
-		Dprintk("After Callout %d.\n", cpu);
+		pr_debug("After Callout %d.\n", cpu);
 
 		/*
 		 * Wait 5s total for a response
@@ -911,10 +911,10 @@ do_rest:
 
 		if (cpu_isset(cpu, cpu_callin_map)) {
 			/* number CPUs logically, starting from 1 (BSP is 0) */
-			Dprintk("OK.\n");
+			pr_debug("OK.\n");
 			printk(KERN_INFO "CPU%d: ", cpu);
 			print_cpu_info(&cpu_data(cpu));
-			Dprintk("CPU has booted.\n");
+			pr_debug("CPU has booted.\n");
 		} else {
 			boot_error = 1;
 			if (*((volatile unsigned char *)trampoline_base)
@@ -959,7 +959,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
 
 	WARN_ON(irqs_disabled());
 
-	Dprintk("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
+	pr_debug("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
 
 	if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
 	    !physid_isset(apicid, phys_cpu_present_map)) {
@@ -971,7 +971,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
 	 * Already booted CPU?
 	 */
 	if (cpu_isset(cpu, cpu_callin_map)) {
-		Dprintk("do_boot_cpu %d Already started\n", cpu);
+		pr_debug("do_boot_cpu %d Already started\n", cpu);
 		return -ENOSYS;
 	}
 
@@ -998,7 +998,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
 	err = do_boot_cpu(apicid, cpu);
 #endif
 	if (err) {
-		Dprintk("do_boot_cpu failed %d\n", err);
+		pr_debug("do_boot_cpu failed %d\n", err);
 		return -EIO;
 	}
 
@@ -1202,7 +1202,7 @@ void __init native_smp_prepare_boot_cpu(void)
 
 void __init native_smp_cpus_done(unsigned int max_cpus)
 {
-	Dprintk("Boot done.\n");
+	pr_debug("Boot done.\n");
 
 	impress_friends();
 	smp_checks();
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index b432d578177..9782f42dd31 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -20,10 +20,6 @@
 #include <asm/acpi.h>
 #include <asm/k8.h>
 
-#ifndef Dprintk
-#define Dprintk(x...)
-#endif
-
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
 
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index 858dbe3399f..86631ccbc25 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -7,15 +7,13 @@
 /* Direct PCI access. This is used for PCI accesses in early boot before
    the PCI subsystem works. */
 
-#define PDprintk(x...)
-
 u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset)
 {
 	u32 v;
 	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
 	v = inl(0xcfc);
 	if (v != 0xffffffff)
-		PDprintk("%x reading 4 from %x: %x\n", slot, offset, v);
+		pr_debug("%x reading 4 from %x: %x\n", slot, offset, v);
 	return v;
 }
 
@@ -24,7 +22,7 @@ u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset)
 	u8 v;
 	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
 	v = inb(0xcfc + (offset&3));
-	PDprintk("%x reading 1 from %x: %x\n", slot, offset, v);
+	pr_debug("%x reading 1 from %x: %x\n", slot, offset, v);
 	return v;
 }
 
@@ -33,28 +31,28 @@ u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset)
 	u16 v;
 	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
 	v = inw(0xcfc + (offset&2));
-	PDprintk("%x reading 2 from %x: %x\n", slot, offset, v);
+	pr_debug("%x reading 2 from %x: %x\n", slot, offset, v);
 	return v;
 }
 
 void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset,
 				    u32 val)
 {
-	PDprintk("%x writing to %x: %x\n", slot, offset, val);
+	pr_debug("%x writing to %x: %x\n", slot, offset, val);
 	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
 	outl(val, 0xcfc);
 }
 
 void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val)
 {
-	PDprintk("%x writing to %x: %x\n", slot, offset, val);
+	pr_debug("%x writing to %x: %x\n", slot, offset, val);
 	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
 	outb(val, 0xcfc + (offset&3));
 }
 
 void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val)
 {
-	PDprintk("%x writing to %x: %x\n", slot, offset, val);
+	pr_debug("%x writing to %x: %x\n", slot, offset, val);
 	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
 	outw(val, 0xcfc + (offset&2));
 }
@@ -71,7 +69,7 @@ void early_dump_pci_device(u8 bus, u8 slot, u8 func)
 	int j;
 	u32 val;
 
-	printk("PCI: %02x:%02x:%02x", bus, slot, func);
+	printk(KERN_INFO "PCI: %02x:%02x:%02x", bus, slot, func);
 
 	for (i = 0; i < 256; i += 4) {
 		if (!(i & 0x0f))
-- 
cgit v1.2.3


From f2d0f1dea41fd6c7a347e71b505a155096643517 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 21 Jul 2008 13:04:08 -0700
Subject: x86: Fix help message for STRICT_DEVMEM config option

The message talked about "left on" when it meant to say disabled.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 85a87d2ac0c..092f019e033 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -8,7 +8,7 @@ source "lib/Kconfig.debug"
 config STRICT_DEVMEM
 	bool "Filter access to /dev/mem"
 	help
-	  If this option is left on, you allow userspace (root) access to all
+	  If this option is disabled, you allow userspace (root) access to all
 	  of memory, including kernel and userspace memory. Accidental
 	  access to this is obviously disastrous, but specific access can
 	  be used by people debugging the kernel. Note that with PAT support
-- 
cgit v1.2.3