29 files changed, 628 insertions, 4313 deletions
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig
index b987164fca4..c9d32db9d76 100644
--- a/arch/ppc64/Kconfig
+++ b/arch/ppc64/Kconfig
@@ -47,11 +47,16 @@ config ARCH_MAY_HAVE_PC_FDC
 	bool
 	default y
 
+config PPC_STD_MMU
+	bool
+	default y
+
 # We optimistically allocate largepages from the VM, so make the limit
 # large enough (16MB). This badly named config option is actually
 # max order + 1
 config FORCE_MAX_ZONEORDER
 	int
+	default "9" if PPC_64K_PAGES
 	default "13"
 
 source "init/Kconfig"
@@ -169,6 +174,16 @@ config KEXEC
 	  support.  As of this writing the exact hardware interface is
 	  strongly in flux, so no good recommendation can be made.
 
+source "drivers/cpufreq/Kconfig"
+
+config CPU_FREQ_PMAC64
+	bool "Support for some Apple G5s"
+	depends on CPU_FREQ && PMAC_SMU && PPC64
+	select CPU_FREQ_TABLE
+	help
+	  This adds support for frequency switching on Apple iMac G5,
+	  and some of the more recent desktop G5 machines as well.
+
 config IBMVIO
 	depends on PPC_PSERIES || PPC_ISERIES
 	bool
@@ -282,6 +297,10 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
 	def_bool y
 	depends on NEED_MULTIPLE_NODES
 
+config ARCH_MEMORY_PROBE
+	def_bool y
+	depends on MEMORY_HOTPLUG
+
 # Some NUMA nodes have memory ranges that span
 # other nodes.  Even though a pfn is valid and
 # between a node's start and end pfns, it may not
@@ -294,6 +313,15 @@ config NODES_SPAN_OTHER_NODES
 	def_bool y
 	depends on NEED_MULTIPLE_NODES
 
+config PPC_64K_PAGES
+	bool "64k page size"
+	help
+	  This option changes the kernel logical page size to 64k. On machines
+          without processor support for 64k pages, the kernel will simulate
+          them by loading each individual 4k page on demand transparently,
+          while on hardware with such support, it will be used to map
+          normal application pages.
+
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
 	depends on SMP
diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug
index f16a5030527..b258c9314a1 100644
--- a/arch/ppc64/Kconfig.debug
+++ b/arch/ppc64/Kconfig.debug
@@ -55,10 +55,6 @@ config XMON_DEFAULT
 	  xmon is normally disabled unless booted with 'xmon=on'.
 	  Use 'xmon=off' to disable xmon init during runtime.
 
-config PPCDBG
-	bool "Include PPCDBG realtime debugging"
-	depends on DEBUG_KERNEL
-
 config IRQSTACKS
 	bool "Use separate kernel stacks when processing interrupts"
 	help
diff --git a/arch/ppc64/boot/addRamDisk.c b/arch/ppc64/boot/addRamDisk.c
index 7f2c0947339..c02a99952be 100644
--- a/arch/ppc64/boot/addRamDisk.c
+++ b/arch/ppc64/boot/addRamDisk.c
@@ -5,11 +5,59 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <string.h>
+#include <elf.h>
 
 #define ElfHeaderSize  (64 * 1024)
 #define ElfPages  (ElfHeaderSize / 4096)
 #define KERNELBASE (0xc000000000000000)
+#define _ALIGN_UP(addr,size)	(((addr)+((size)-1))&(~((size)-1)))
 
+struct addr_range {
+	unsigned long long addr;
+	unsigned long memsize;
+	unsigned long offset;
+};
+
+static int check_elf64(void *p, int size, struct addr_range *r)
+{
+	Elf64_Ehdr *elf64 = p;
+	Elf64_Phdr *elf64ph;
+
+	if (elf64->e_ident[EI_MAG0] != ELFMAG0 ||
+	    elf64->e_ident[EI_MAG1] != ELFMAG1 ||
+	    elf64->e_ident[EI_MAG2] != ELFMAG2 ||
+	    elf64->e_ident[EI_MAG3] != ELFMAG3 ||
+	    elf64->e_ident[EI_CLASS] != ELFCLASS64 ||
+	    elf64->e_ident[EI_DATA] != ELFDATA2MSB ||
+	    elf64->e_type != ET_EXEC || elf64->e_machine != EM_PPC64)
+		return 0;
+
+	if ((elf64->e_phoff + sizeof(Elf64_Phdr)) > size)
+		return 0;
+
+	elf64ph = (Elf64_Phdr *) ((unsigned long)elf64 +
+				  (unsigned long)elf64->e_phoff);
+
+	r->memsize = (unsigned long)elf64ph->p_memsz;
+	r->offset = (unsigned long)elf64ph->p_offset;
+	r->addr = (unsigned long long)elf64ph->p_vaddr;
+
+#ifdef DEBUG
+	printf("PPC64 ELF file, ph:\n");
+	printf("p_type   0x%08x\n", elf64ph->p_type);
+	printf("p_flags  0x%08x\n", elf64ph->p_flags);
+	printf("p_offset 0x%016llx\n", elf64ph->p_offset);
+	printf("p_vaddr  0x%016llx\n", elf64ph->p_vaddr);
+	printf("p_paddr  0x%016llx\n", elf64ph->p_paddr);
+	printf("p_filesz 0x%016llx\n", elf64ph->p_filesz);
+	printf("p_memsz  0x%016llx\n", elf64ph->p_memsz);
+	printf("p_align  0x%016llx\n", elf64ph->p_align);
+	printf("... skipping 0x%08lx bytes of ELF header\n",
+	       (unsigned long)elf64ph->p_offset);
+#endif
+
+	return 64;
+}
 void get4k(FILE *file, char *buf )
 {
 	unsigned j;
@@ -34,97 +82,92 @@ void death(const char *msg, FILE *fdesc, const char *fname)
 int main(int argc, char **argv)
 {
 	char inbuf[4096];
-	FILE *ramDisk = NULL;
-	FILE *sysmap = NULL;
-	FILE *inputVmlinux = NULL;
-	FILE *outputVmlinux = NULL;
-  
-	unsigned i = 0;
-	unsigned long ramFileLen = 0;
-	unsigned long ramLen = 0;
-	unsigned long roundR = 0;
-  
-	unsigned long sysmapFileLen = 0;
-	unsigned long sysmapLen = 0;
-	unsigned long sysmapPages = 0;
-	char* ptr_end = NULL; 
-	unsigned long offset_end = 0;
-
-	unsigned long kernelLen = 0;
-	unsigned long actualKernelLen = 0;
-	unsigned long round = 0;
-	unsigned long roundedKernelLen = 0;
-	unsigned long ramStartOffs = 0;
-	unsigned long ramPages = 0;
-	unsigned long roundedKernelPages = 0;
-	unsigned long hvReleaseData = 0;
+	struct addr_range vmlinux;
+	FILE *ramDisk;
+	FILE *inputVmlinux;
+	FILE *outputVmlinux;
+
+	char *rd_name, *lx_name, *out_name;
+
+	size_t i;
+	unsigned long ramFileLen;
+	unsigned long ramLen;
+	unsigned long roundR;
+	unsigned long offset_end;
+
+	unsigned long kernelLen;
+	unsigned long actualKernelLen;
+	unsigned long round;
+	unsigned long roundedKernelLen;
+	unsigned long ramStartOffs;
+	unsigned long ramPages;
+	unsigned long roundedKernelPages;
+	unsigned long hvReleaseData;
 	u_int32_t eyeCatcher = 0xc8a5d9c4;
-	unsigned long naca = 0;
-	unsigned long xRamDisk = 0;
-	unsigned long xRamDiskSize = 0;
-	long padPages = 0;
+	unsigned long naca;
+	unsigned long xRamDisk;
+	unsigned long xRamDiskSize;
+	long padPages;
   
   
 	if (argc < 2) {
 		fprintf(stderr, "Name of RAM disk file missing.\n");
 		exit(1);
 	}
+	rd_name = argv[1];
 
 	if (argc < 3) {
-		fprintf(stderr, "Name of System Map input file is missing.\n");
-		exit(1);
-	}
-  
-	if (argc < 4) {
 		fprintf(stderr, "Name of vmlinux file missing.\n");
 		exit(1);
 	}
+	lx_name = argv[2];
 
-	if (argc < 5) {
+	if (argc < 4) {
 		fprintf(stderr, "Name of vmlinux output file missing.\n");
 		exit(1);
 	}
+	out_name = argv[3];
 
 
-	ramDisk = fopen(argv[1], "r");
+	ramDisk = fopen(rd_name, "r");
 	if ( ! ramDisk ) {
-		fprintf(stderr, "RAM disk file \"%s\" failed to open.\n", argv[1]);
+		fprintf(stderr, "RAM disk file \"%s\" failed to open.\n", rd_name);
 		exit(1);
 	}
 
-	sysmap = fopen(argv[2], "r");
-	if ( ! sysmap ) {
-		fprintf(stderr, "System Map file \"%s\" failed to open.\n", argv[2]);
-		exit(1);
-	}
-  
-	inputVmlinux = fopen(argv[3], "r");
+	inputVmlinux = fopen(lx_name, "r");
 	if ( ! inputVmlinux ) {
-		fprintf(stderr, "vmlinux file \"%s\" failed to open.\n", argv[3]);
+		fprintf(stderr, "vmlinux file \"%s\" failed to open.\n", lx_name);
 		exit(1);
 	}
   
-	outputVmlinux = fopen(argv[4], "w+");
+	outputVmlinux = fopen(out_name, "w+");
 	if ( ! outputVmlinux ) {
-		fprintf(stderr, "output vmlinux file \"%s\" failed to open.\n", argv[4]);
+		fprintf(stderr, "output vmlinux file \"%s\" failed to open.\n", out_name);
 		exit(1);
 	}
-  
-  
-  
+
+	i = fread(inbuf, 1, sizeof(inbuf), inputVmlinux);
+	if (i != sizeof(inbuf)) {
+		fprintf(stderr, "can not read vmlinux file %s: %u\n", lx_name, i);
+		exit(1);
+	}
+
+	i = check_elf64(inbuf, sizeof(inbuf), &vmlinux);
+	if (i == 0) {
+		fprintf(stderr, "You must have a linux kernel specified as argv[2]\n");
+		exit(1);
+	}
+
 	/* Input Vmlinux file */
 	fseek(inputVmlinux, 0, SEEK_END);
 	kernelLen = ftell(inputVmlinux);
 	fseek(inputVmlinux, 0, SEEK_SET);
-	printf("kernel file size = %d\n", kernelLen);
-	if ( kernelLen == 0 ) {
-		fprintf(stderr, "You must have a linux kernel specified as argv[3]\n");
-		exit(1);
-	}
+	printf("kernel file size = %lu\n", kernelLen);
 
 	actualKernelLen = kernelLen - ElfHeaderSize;
 
-	printf("actual kernel length (minus ELF header) = %d\n", actualKernelLen);
+	printf("actual kernel length (minus ELF header) = %lu\n", actualKernelLen);
 
 	round = actualKernelLen % 4096;
 	roundedKernelLen = actualKernelLen;
@@ -134,39 +177,7 @@ int main(int argc, char **argv)
 	roundedKernelPages = roundedKernelLen / 4096;
 	printf("Vmlinux pages to copy = %ld/0x%lx \n", roundedKernelPages, roundedKernelPages);
 
-
-
-	/* Input System Map file */
-	/* (needs to be processed simply to determine if we need to add pad pages due to the static variables not being included in the vmlinux) */
-	fseek(sysmap, 0, SEEK_END);
-	sysmapFileLen = ftell(sysmap);
-	fseek(sysmap, 0, SEEK_SET);
-	printf("%s file size = %ld/0x%lx \n", argv[2], sysmapFileLen, sysmapFileLen);
-
-	sysmapLen = sysmapFileLen;
-
-	roundR = 4096 - (sysmapLen % 4096);
-	if (roundR) {
-		printf("Rounding System Map file up to a multiple of 4096, adding %ld/0x%lx \n", roundR, roundR);
-		sysmapLen += roundR;
-	}
-	printf("Rounded System Map size is %ld/0x%lx \n", sysmapLen, sysmapLen);
-  
-	/* Process the Sysmap file to determine where _end is */
-	sysmapPages = sysmapLen / 4096;
-	/* read the whole file line by line, expect that it doesn't fail */
-	while ( fgets(inbuf, 4096, sysmap) )  ;
-	/* search for _end in the last page of the system map */
-	ptr_end = strstr(inbuf, " _end");
-	if (!ptr_end) {
-		fprintf(stderr, "Unable to find _end in the sysmap file \n");
-		fprintf(stderr, "inbuf: \n");
-		fprintf(stderr, "%s \n", inbuf);
-		exit(1);
-	}
-	printf("Found _end in the last page of the sysmap - backing up 10 characters it looks like %s", ptr_end-10);
-	/* convert address of _end in system map to hex offset. */
-	offset_end = (unsigned int)strtol(ptr_end-10, NULL, 16);
+	offset_end = _ALIGN_UP(vmlinux.memsize, 4096);
 	/* calc how many pages we need to insert between the vmlinux and the start of the ram disk */
 	padPages = offset_end/4096 - roundedKernelPages;
 
@@ -194,7 +205,7 @@ int main(int argc, char **argv)
 	fseek(ramDisk, 0, SEEK_END);
 	ramFileLen = ftell(ramDisk);
 	fseek(ramDisk, 0, SEEK_SET);
-	printf("%s file size = %ld/0x%lx \n", argv[1], ramFileLen, ramFileLen);
+	printf("%s file size = %ld/0x%lx \n", rd_name, ramFileLen, ramFileLen);
 
 	ramLen = ramFileLen;
 
@@ -248,19 +259,19 @@ int main(int argc, char **argv)
 	/* fseek to the hvReleaseData pointer */
 	fseek(outputVmlinux, ElfHeaderSize + 0x24, SEEK_SET);
 	if (fread(&hvReleaseData, 4, 1, outputVmlinux) != 1) {
-		death("Could not read hvReleaseData pointer\n", outputVmlinux, argv[4]);
+		death("Could not read hvReleaseData pointer\n", outputVmlinux, out_name);
 	}
 	hvReleaseData = ntohl(hvReleaseData); /* Convert to native int */
-	printf("hvReleaseData is at %08x\n", hvReleaseData);
+	printf("hvReleaseData is at %08lx\n", hvReleaseData);
 
 	/* fseek to the hvReleaseData */
 	fseek(outputVmlinux, ElfHeaderSize + hvReleaseData, SEEK_SET);
 	if (fread(inbuf, 0x40, 1, outputVmlinux) != 1) {
-		death("Could not read hvReleaseData\n", outputVmlinux, argv[4]);
+		death("Could not read hvReleaseData\n", outputVmlinux, out_name);
 	}
 	/* Check hvReleaseData sanity */
 	if (memcmp(inbuf, &eyeCatcher, 4) != 0) {
-		death("hvReleaseData is invalid\n", outputVmlinux, argv[4]);
+		death("hvReleaseData is invalid\n", outputVmlinux, out_name);
 	}
 	/* Get the naca pointer */
 	naca = ntohl(*((u_int32_t*) &inbuf[0x0C])) - KERNELBASE;
@@ -269,13 +280,13 @@ int main(int argc, char **argv)
 	/* fseek to the naca */
 	fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET);
 	if (fread(inbuf, 0x18, 1, outputVmlinux) != 1) {
-		death("Could not read naca\n", outputVmlinux, argv[4]);
+		death("Could not read naca\n", outputVmlinux, out_name);
 	}
 	xRamDisk = ntohl(*((u_int32_t *) &inbuf[0x0c]));
 	xRamDiskSize = ntohl(*((u_int32_t *) &inbuf[0x14]));
 	/* Make sure a RAM disk isn't already present */
 	if ((xRamDisk != 0) || (xRamDiskSize != 0)) {
-		death("RAM disk is already attached to this kernel\n", outputVmlinux, argv[4]);
+		death("RAM disk is already attached to this kernel\n", outputVmlinux, out_name);
 	}
 	/* Fill in the values */
 	*((u_int32_t *) &inbuf[0x0c]) = htonl(ramStartOffs);
@@ -285,15 +296,15 @@ int main(int argc, char **argv)
 	fflush(outputVmlinux);
 	fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET);
 	if (fwrite(inbuf, 0x18, 1, outputVmlinux) != 1) {
-		death("Could not write naca\n", outputVmlinux, argv[4]);
+		death("Could not write naca\n", outputVmlinux, out_name);
 	}
-	printf("Ram Disk of 0x%lx pages is attached to the kernel at offset 0x%08x\n",
+	printf("Ram Disk of 0x%lx pages is attached to the kernel at offset 0x%08lx\n",
 	       ramPages, ramStartOffs);
 
 	/* Done */
 	fclose(outputVmlinux);
 	/* Set permission to executable */
-	chmod(argv[4], S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH);
+	chmod(out_name, S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH);
 
 	return 0;
 }
diff --git a/arch/ppc64/boot/main.c b/arch/ppc64/boot/main.c
index c1dc876bcca..e0dde24a72c 100644
--- a/arch/ppc64/boot/main.c
+++ b/arch/ppc64/boot/main.c
@@ -203,8 +203,15 @@ void start(unsigned long a1, unsigned long a2, void *promptr, void *sp)
 		if (elf64ph->p_type == PT_LOAD && elf64ph->p_offset != 0)
 			break;
 	}
-	vmlinux.size = (unsigned long)elf64ph->p_filesz;
-	vmlinux.memsize = (unsigned long)elf64ph->p_memsz;
+	vmlinux.size = (unsigned long)elf64ph->p_filesz +
+		(unsigned long)elf64ph->p_offset;
+	/* We need to claim the memsize plus the file offset since gzip
+	 * will expand the header (file offset), then the kernel, then
+	 * possible rubbish we don't care about. But the kernel bss must
+	 * be claimed (it will be zero'd by the kernel itself)
+	 */
+	vmlinux.memsize = (unsigned long)elf64ph->p_memsz +
+		(unsigned long)elf64ph->p_offset;
 	printf("Allocating 0x%lx bytes for kernel ...\n\r", vmlinux.memsize);
 	vmlinux.addr = try_claim(vmlinux.memsize);
 	if (vmlinux.addr == 0) {
diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile
index c441aebe764..58b19f10765 100644
--- a/arch/ppc64/kernel/Makefile
+++ b/arch/ppc64/kernel/Makefile
@@ -11,12 +11,11 @@ obj-y               :=	misc.o prom.o
 
 endif
 
-obj-y               +=	irq.o idle.o dma.o \
-			align.o pacaData.o \
-			udbg.o ioctl32.o \
+obj-y               +=	idle.o dma.o \
+			align.o \
+			udbg.o \
 			rtc.o \
-			cpu_setup_power4.o \
-			iommu.o sysfs.o vdso.o firmware.o
+			iommu.o vdso.o
 obj-y += vdso32/ vdso64/
 
 pci-obj-$(CONFIG_PPC_MULTIPLATFORM)	+= pci_dn.o pci_direct_iommu.o
@@ -31,15 +30,10 @@ endif
 obj-$(CONFIG_PPC_PSERIES) += udbg_16550.o
 
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o
-obj-$(CONFIG_EEH)		+= eeh.o
-obj-$(CONFIG_PROC_FS)		+= proc_ppc64.o
 obj-$(CONFIG_MODULES)		+= module.o
 ifneq ($(CONFIG_PPC_MERGE),y)
 obj-$(CONFIG_MODULES)		+= ppc_ksyms.o
 endif
-obj-$(CONFIG_PPC_RTAS)		+= rtas_pci.o
-obj-$(CONFIG_SCANLOG)		+= scanlog.o
-obj-$(CONFIG_LPARCFG)		+= lparcfg.o
 obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 ifneq ($(CONFIG_PPC_MERGE),y)
 obj-$(CONFIG_BOOTX_TEXT)	+= btext.o
@@ -52,8 +46,6 @@ obj-$(CONFIG_PPC_MAPLE)		+= udbg_16550.o
 
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 
-CFLAGS_ioctl32.o += -Ifs/
-
 ifneq ($(CONFIG_PPC_MERGE),y)
 ifeq ($(CONFIG_PPC_ISERIES),y)
 arch/ppc64/kernel/head.o: arch/powerpc/kernel/lparmap.s
diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c
index 504dee836d2..84ab5c18ef5 100644
--- a/arch/ppc64/kernel/asm-offsets.c
+++ b/arch/ppc64/kernel/asm-offsets.c
@@ -74,7 +74,6 @@ int main(void)
 	DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
 	DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
 	DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
-	DEFINE(PLATFORM, offsetof(struct systemcfg, platform));
 	DEFINE(PLATFORM_LPAR, PLATFORM_LPAR);
 
 	/* paca */
@@ -93,6 +92,9 @@ int main(void)
 	DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
 	DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+#ifdef CONFIG_PPC_64K_PAGES
+	DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir));
+#endif
 #ifdef CONFIG_HUGETLB_PAGE
 	DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
 	DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
diff --git a/arch/ppc64/kernel/cpu_setup_power4.S b/arch/ppc64/kernel/cpu_setup_power4.S
deleted file mode 100644
index 1fb673c511f..00000000000
--- a/arch/ppc64/kernel/cpu_setup_power4.S
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * This file contains low level CPU setup functions.
- *    Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cache.h>
-
-_GLOBAL(__970_cpu_preinit)
-	/*
-	 * Do nothing if not running in HV mode
-	 */
-	mfmsr	r0
-	rldicl.	r0,r0,4,63
-	beqlr
-
-	/*
-	 * Deal only with PPC970 and PPC970FX.
-	 */
-	mfspr	r0,SPRN_PVR
-	srwi	r0,r0,16
-	cmpwi	r0,0x39
-	beq	1f
-	cmpwi	r0,0x3c
-	beq	1f
-	cmpwi	r0,0x44
-	bnelr
-1:
-
-	/* Make sure HID4:rm_ci is off before MMU is turned off, that large
-	 * pages are enabled with HID4:61 and clear HID5:DCBZ_size and
-	 * HID5:DCBZ32_ill
-	 */
-	li	r0,0
-	mfspr	r3,SPRN_HID4
-	rldimi	r3,r0,40,23	/* clear bit 23 (rm_ci) */
-	rldimi	r3,r0,2,61	/* clear bit 61 (lg_pg_en) */
-	sync
-	mtspr	SPRN_HID4,r3
-	isync
-	sync
-	mfspr	r3,SPRN_HID5
-	rldimi	r3,r0,6,56	/* clear bits 56 & 57 (DCBZ*) */
-	sync
-	mtspr	SPRN_HID5,r3
-	isync
-	sync
-
-	/* Setup some basic HID1 features */
-	mfspr	r0,SPRN_HID1
-	li	r3,0x1200		/* enable i-fetch cacheability */
-	sldi	r3,r3,44		/* and prefetch */
-	or	r0,r0,r3
-	mtspr	SPRN_HID1,r0
-	mtspr	SPRN_HID1,r0
-	isync
-
-	/* Clear HIOR */
-	li	r0,0
-	sync
-	mtspr	SPRN_HIOR,0		/* Clear interrupt prefix */
-	isync
-	blr
-
-_GLOBAL(__setup_cpu_power4)
-	blr
-
-_GLOBAL(__setup_cpu_be)
-        /* Set large page sizes LP=0: 16MB, LP=1: 64KB */
-        addi    r3, 0,  0
-        ori     r3, r3, HID6_LB
-        sldi    r3, r3, 32
-        nor     r3, r3, r3
-        mfspr   r4, SPRN_HID6
-        and     r4, r4, r3
-        addi    r3, 0, 0x02000
-        sldi    r3, r3, 32
-        or      r4, r4, r3
-        mtspr   SPRN_HID6, r4
-	blr
-
-_GLOBAL(__setup_cpu_ppc970)
-	mfspr	r0,SPRN_HID0
-	li	r11,5			/* clear DOZE and SLEEP */
-	rldimi	r0,r11,52,8		/* set NAP and DPM */
-	mtspr	SPRN_HID0,r0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	sync
-	isync
-	blr
-
-/* Definitions for the table use to save CPU states */
-#define CS_HID0		0
-#define CS_HID1		8
-#define	CS_HID4		16
-#define CS_HID5		24
-#define CS_SIZE		32
-
-	.data
-	.balign	L1_CACHE_BYTES,0
-cpu_state_storage:	
-	.space	CS_SIZE
-	.balign	L1_CACHE_BYTES,0
-	.text
-	
-/* Called in normal context to backup CPU 0 state. This
- * does not include cache settings. This function is also
- * called for machine sleep. This does not include the MMU
- * setup, BATs, etc... but rather the "special" registers
- * like HID0, HID1, HID4, etc...
- */
-_GLOBAL(__save_cpu_setup)
-	/* Some CR fields are volatile, we back it up all */
-	mfcr	r7
-
-	/* Get storage ptr */
-	LOADADDR(r5,cpu_state_storage)
-
-	/* We only deal with 970 for now */
-	mfspr	r0,SPRN_PVR
-	srwi	r0,r0,16
-	cmpwi	r0,0x39
-	beq	1f
-	cmpwi	r0,0x3c
-	beq	1f
-	cmpwi	r0,0x44
-	bne	2f
-
-1:	/* Save HID0,1,4 and 5 */
-	mfspr	r3,SPRN_HID0
-	std	r3,CS_HID0(r5)
-	mfspr	r3,SPRN_HID1
-	std	r3,CS_HID1(r5)
-	mfspr	r3,SPRN_HID4
-	std	r3,CS_HID4(r5)
-	mfspr	r3,SPRN_HID5
-	std	r3,CS_HID5(r5)
-	
-2:
-	mtcr	r7
-	blr
-
-/* Called with no MMU context (typically MSR:IR/DR off) to
- * restore CPU state as backed up by the previous
- * function. This does not include cache setting
- */
-_GLOBAL(__restore_cpu_setup)
-	/* Get storage ptr (FIXME when using anton reloc as we
-	 * are running with translation disabled here
-	 */
-	LOADADDR(r5,cpu_state_storage)
-
-	/* We only deal with 970 for now */
-	mfspr	r0,SPRN_PVR
-	srwi	r0,r0,16
-	cmpwi	r0,0x39
-	beq	1f
-	cmpwi	r0,0x3c
-	beq	1f
-	cmpwi	r0,0x44
-	bnelr
-
-1:	/* Before accessing memory, we make sure rm_ci is clear */
-	li	r0,0
-	mfspr	r3,SPRN_HID4
-	rldimi	r3,r0,40,23	/* clear bit 23 (rm_ci) */
-	sync
-	mtspr	SPRN_HID4,r3
-	isync
-	sync
-
-	/* Clear interrupt prefix */
-	li	r0,0
-	sync
-	mtspr	SPRN_HIOR,0
-	isync
-
-	/* Restore HID0 */
-	ld	r3,CS_HID0(r5)
-	sync
-	isync
-	mtspr	SPRN_HID0,r3
-	mfspr	r3,SPRN_HID0
-	mfspr	r3,SPRN_HID0
-	mfspr	r3,SPRN_HID0
-	mfspr	r3,SPRN_HID0
-	mfspr	r3,SPRN_HID0
-	mfspr	r3,SPRN_HID0
-	sync
-	isync
-
-	/* Restore HID1 */
-	ld	r3,CS_HID1(r5)
-	sync
-	isync
-	mtspr	SPRN_HID1,r3
-	mtspr	SPRN_HID1,r3
-	sync
-	isync
-	
-	/* Restore HID4 */
-	ld	r3,CS_HID4(r5)
-	sync
-	isync
-	mtspr	SPRN_HID4,r3
-	sync
-	isync
-
-	/* Restore HID5 */
-	ld	r3,CS_HID5(r5)
-	sync
-	isync
-	mtspr	SPRN_HID5,r3
-	sync
-	isync
-	blr
-
diff --git a/arch/ppc64/kernel/eeh.c b/arch/ppc64/kernel/eeh.c
deleted file mode 100644
index 035d1b14a20..00000000000
--- a/arch/ppc64/kernel/eeh.c
+++ /dev/null
@@ -1,943 +0,0 @@
-/*
- * eeh.c
- * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/bootmem.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/mm.h>
-#include <linux/notifier.h>
-#include <linux/pci.h>
-#include <linux/proc_fs.h>
-#include <linux/rbtree.h>
-#include <linux/seq_file.h>
-#include <linux/spinlock.h>
-#include <asm/eeh.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/rtas.h>
-#include <asm/atomic.h>
-#include <asm/systemcfg.h>
-#include <asm/ppc-pci.h>
-
-#undef DEBUG
-
-/** Overview:
- *  EEH, or "Extended Error Handling" is a PCI bridge technology for
- *  dealing with PCI bus errors that can't be dealt with within the
- *  usual PCI framework, except by check-stopping the CPU.  Systems
- *  that are designed for high-availability/reliability cannot afford
- *  to crash due to a "mere" PCI error, thus the need for EEH.
- *  An EEH-capable bridge operates by converting a detected error
- *  into a "slot freeze", taking the PCI adapter off-line, making
- *  the slot behave, from the OS'es point of view, as if the slot
- *  were "empty": all reads return 0xff's and all writes are silently
- *  ignored.  EEH slot isolation events can be triggered by parity
- *  errors on the address or data busses (e.g. during posted writes),
- *  which in turn might be caused by dust, vibration, humidity,
- *  radioactivity or plain-old failed hardware.
- *
- *  Note, however, that one of the leading causes of EEH slot
- *  freeze events are buggy device drivers, buggy device microcode,
- *  or buggy device hardware.  This is because any attempt by the
- *  device to bus-master data to a memory address that is not
- *  assigned to the device will trigger a slot freeze.   (The idea
- *  is to prevent devices-gone-wild from corrupting system memory).
- *  Buggy hardware/drivers will have a miserable time co-existing
- *  with EEH.
- *
- *  Ideally, a PCI device driver, when suspecting that an isolation
- *  event has occured (e.g. by reading 0xff's), will then ask EEH
- *  whether this is the case, and then take appropriate steps to
- *  reset the PCI slot, the PCI device, and then resume operations.
- *  However, until that day,  the checking is done here, with the
- *  eeh_check_failure() routine embedded in the MMIO macros.  If
- *  the slot is found to be isolated, an "EEH Event" is synthesized
- *  and sent out for processing.
- */
-
-/** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */
-#define BUID_HI(buid) ((buid) >> 32)
-#define BUID_LO(buid) ((buid) & 0xffffffff)
-
-/* EEH event workqueue setup. */
-static DEFINE_SPINLOCK(eeh_eventlist_lock);
-LIST_HEAD(eeh_eventlist);
-static void eeh_event_handler(void *);
-DECLARE_WORK(eeh_event_wq, eeh_event_handler, NULL);
-
-static struct notifier_block *eeh_notifier_chain;
-
-/*
- * If a device driver keeps reading an MMIO register in an interrupt
- * handler after a slot isolation event has occurred, we assume it
- * is broken and panic.  This sets the threshold for how many read
- * attempts we allow before panicking.
- */
-#define EEH_MAX_FAILS	1000
-static atomic_t eeh_fail_count;
-
-/* RTAS tokens */
-static int ibm_set_eeh_option;
-static int ibm_set_slot_reset;
-static int ibm_read_slot_reset_state;
-static int ibm_read_slot_reset_state2;
-static int ibm_slot_error_detail;
-
-static int eeh_subsystem_enabled;
-
-/* Buffer for reporting slot-error-detail rtas calls */
-static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
-static DEFINE_SPINLOCK(slot_errbuf_lock);
-static int eeh_error_buf_size;
-
-/* System monitoring statistics */
-static DEFINE_PER_CPU(unsigned long, total_mmio_ffs);
-static DEFINE_PER_CPU(unsigned long, false_positives);
-static DEFINE_PER_CPU(unsigned long, ignored_failures);
-static DEFINE_PER_CPU(unsigned long, slot_resets);
-
-/**
- * The pci address cache subsystem.  This subsystem places
- * PCI device address resources into a red-black tree, sorted
- * according to the address range, so that given only an i/o
- * address, the corresponding PCI device can be **quickly**
- * found. It is safe to perform an address lookup in an interrupt
- * context; this ability is an important feature.
- *
- * Currently, the only customer of this code is the EEH subsystem;
- * thus, this code has been somewhat tailored to suit EEH better.
- * In particular, the cache does *not* hold the addresses of devices
- * for which EEH is not enabled.
- *
- * (Implementation Note: The RB tree seems to be better/faster
- * than any hash algo I could think of for this problem, even
- * with the penalty of slow pointer chases for d-cache misses).
- */
-struct pci_io_addr_range
-{
-	struct rb_node rb_node;
-	unsigned long addr_lo;
-	unsigned long addr_hi;
-	struct pci_dev *pcidev;
-	unsigned int flags;
-};
-
-static struct pci_io_addr_cache
-{
-	struct rb_root rb_root;
-	spinlock_t piar_lock;
-} pci_io_addr_cache_root;
-
-static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
-{
-	struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
-
-	while (n) {
-		struct pci_io_addr_range *piar;
-		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
-
-		if (addr < piar->addr_lo) {
-			n = n->rb_left;
-		} else {
-			if (addr > piar->addr_hi) {
-				n = n->rb_right;
-			} else {
-				pci_dev_get(piar->pcidev);
-				return piar->pcidev;
-			}
-		}
-	}
-
-	return NULL;
-}
-
-/**
- * pci_get_device_by_addr - Get device, given only address
- * @addr: mmio (PIO) phys address or i/o port number
- *
- * Given an mmio phys address, or a port number, find a pci device
- * that implements this address.  Be sure to pci_dev_put the device
- * when finished.  I/O port numbers are assumed to be offset
- * from zero (that is, they do *not* have pci_io_addr added in).
- * It is safe to call this function within an interrupt.
- */
-static struct pci_dev *pci_get_device_by_addr(unsigned long addr)
-{
-	struct pci_dev *dev;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-	dev = __pci_get_device_by_addr(addr);
-	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-	return dev;
-}
-
-#ifdef DEBUG
-/*
- * Handy-dandy debug print routine, does nothing more
- * than print out the contents of our addr cache.
- */
-static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
-{
-	struct rb_node *n;
-	int cnt = 0;
-
-	n = rb_first(&cache->rb_root);
-	while (n) {
-		struct pci_io_addr_range *piar;
-		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
-		printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n",
-		       (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
-		       piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
-		cnt++;
-		n = rb_next(n);
-	}
-}
-#endif
-
-/* Insert address range into the rb tree. */
-static struct pci_io_addr_range *
-pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
-		      unsigned long ahi, unsigned int flags)
-{
-	struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
-	struct rb_node *parent = NULL;
-	struct pci_io_addr_range *piar;
-
-	/* Walk tree, find a place to insert into tree */
-	while (*p) {
-		parent = *p;
-		piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
-		if (alo < piar->addr_lo) {
-			p = &parent->rb_left;
-		} else if (ahi > piar->addr_hi) {
-			p = &parent->rb_right;
-		} else {
-			if (dev != piar->pcidev ||
-			    alo != piar->addr_lo || ahi != piar->addr_hi) {
-				printk(KERN_WARNING "PIAR: overlapping address range\n");
-			}
-			return piar;
-		}
-	}
-	piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
-	if (!piar)
-		return NULL;
-
-	piar->addr_lo = alo;
-	piar->addr_hi = ahi;
-	piar->pcidev = dev;
-	piar->flags = flags;
-
-	rb_link_node(&piar->rb_node, parent, p);
-	rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
-
-	return piar;
-}
-
-static void __pci_addr_cache_insert_device(struct pci_dev *dev)
-{
-	struct device_node *dn;
-	struct pci_dn *pdn;
-	int i;
-	int inserted = 0;
-
-	dn = pci_device_to_OF_node(dev);
-	if (!dn) {
-		printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n",
-			pci_name(dev));
-		return;
-	}
-
-	/* Skip any devices for which EEH is not enabled. */
-	pdn = dn->data;
-	if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
-	    pdn->eeh_mode & EEH_MODE_NOCHECK) {
-#ifdef DEBUG
-		printk(KERN_INFO "PCI: skip building address cache for=%s\n",
-		       pci_name(dev));
-#endif
-		return;
-	}
-
-	/* The cache holds a reference to the device... */
-	pci_dev_get(dev);
-
-	/* Walk resources on this device, poke them into the tree */
-	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
-		unsigned long start = pci_resource_start(dev,i);
-		unsigned long end = pci_resource_end(dev,i);
-		unsigned int flags = pci_resource_flags(dev,i);
-
-		/* We are interested only bus addresses, not dma or other stuff */
-		if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
-			continue;
-		if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
-			 continue;
-		pci_addr_cache_insert(dev, start, end, flags);
-		inserted = 1;
-	}
-
-	/* If there was nothing to add, the cache has no reference... */
-	if (!inserted)
-		pci_dev_put(dev);
-}
-
-/**
- * pci_addr_cache_insert_device - Add a device to the address cache
- * @dev: PCI device whose I/O addresses we are interested in.
- *
- * In order to support the fast lookup of devices based on addresses,
- * we maintain a cache of devices that can be quickly searched.
- * This routine adds a device to that cache.
- */
-void pci_addr_cache_insert_device(struct pci_dev *dev)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-	__pci_addr_cache_insert_device(dev);
-	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-}
-
-static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
-{
-	struct rb_node *n;
-	int removed = 0;
-
-restart:
-	n = rb_first(&pci_io_addr_cache_root.rb_root);
-	while (n) {
-		struct pci_io_addr_range *piar;
-		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
-
-		if (piar->pcidev == dev) {
-			rb_erase(n, &pci_io_addr_cache_root.rb_root);
-			removed = 1;
-			kfree(piar);
-			goto restart;
-		}
-		n = rb_next(n);
-	}
-
-	/* The cache no longer holds its reference to this device... */
-	if (removed)
-		pci_dev_put(dev);
-}
-
-/**
- * pci_addr_cache_remove_device - remove pci device from addr cache
- * @dev: device to remove
- *
- * Remove a device from the addr-cache tree.
- * This is potentially expensive, since it will walk
- * the tree multiple times (once per resource).
- * But so what; device removal doesn't need to be that fast.
- */
-void pci_addr_cache_remove_device(struct pci_dev *dev)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-	__pci_addr_cache_remove_device(dev);
-	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-}
-
-/**
- * pci_addr_cache_build - Build a cache of I/O addresses
- *
- * Build a cache of pci i/o addresses.  This cache will be used to
- * find the pci device that corresponds to a given address.
- * This routine scans all pci busses to build the cache.
- * Must be run late in boot process, after the pci controllers
- * have been scaned for devices (after all device resources are known).
- */
-void __init pci_addr_cache_build(void)
-{
-	struct pci_dev *dev = NULL;
-
-	spin_lock_init(&pci_io_addr_cache_root.piar_lock);
-
-	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
-		/* Ignore PCI bridges ( XXX why ??) */
-		if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) {
-			continue;
-		}
-		pci_addr_cache_insert_device(dev);
-	}
-
-#ifdef DEBUG
-	/* Verify tree built up above, echo back the list of addrs. */
-	pci_addr_cache_print(&pci_io_addr_cache_root);
-#endif
-}
-
-/* --------------------------------------------------------------- */
-/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */
-
-/**
- * eeh_register_notifier - Register to find out about EEH events.
- * @nb: notifier block to callback on events
- */
-int eeh_register_notifier(struct notifier_block *nb)
-{
-	return notifier_chain_register(&eeh_notifier_chain, nb);
-}
-
-/**
- * eeh_unregister_notifier - Unregister to an EEH event notifier.
- * @nb: notifier block to callback on events
- */
-int eeh_unregister_notifier(struct notifier_block *nb)
-{
-	return notifier_chain_unregister(&eeh_notifier_chain, nb);
-}
-
-/**
- * read_slot_reset_state - Read the reset state of a device node's slot
- * @dn: device node to read
- * @rets: array to return results in
- */
-static int read_slot_reset_state(struct device_node *dn, int rets[])
-{
-	int token, outputs;
-	struct pci_dn *pdn = dn->data;
-
-	if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
-		token = ibm_read_slot_reset_state2;
-		outputs = 4;
-	} else {
-		token = ibm_read_slot_reset_state;
-		outputs = 3;
-	}
-
-	return rtas_call(token, 3, outputs, rets, pdn->eeh_config_addr,
-			 BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
-}
-
-/**
- * eeh_panic - call panic() for an eeh event that cannot be handled.
- * The philosophy of this routine is that it is better to panic and
- * halt the OS than it is to risk possible data corruption by
- * oblivious device drivers that don't know better.
- *
- * @dev pci device that had an eeh event
- * @reset_state current reset state of the device slot
- */
-static void eeh_panic(struct pci_dev *dev, int reset_state)
-{
-	/*
-	 * XXX We should create a separate sysctl for this.
-	 *
-	 * Since the panic_on_oops sysctl is used to halt the system
-	 * in light of potential corruption, we can use it here.
-	 */
-	if (panic_on_oops)
-		panic("EEH: MMIO failure (%d) on device:%s\n", reset_state,
-		      pci_name(dev));
-	else {
-		__get_cpu_var(ignored_failures)++;
-		printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n",
-		       reset_state, pci_name(dev));
-	}
-}
-
-/**
- * eeh_event_handler - dispatch EEH events.  The detection of a frozen
- * slot can occur inside an interrupt, where it can be hard to do
- * anything about it.  The goal of this routine is to pull these
- * detection events out of the context of the interrupt handler, and
- * re-dispatch them for processing at a later time in a normal context.
- *
- * @dummy - unused
- */
-static void eeh_event_handler(void *dummy)
-{
-	unsigned long flags;
-	struct eeh_event	*event;
-
-	while (1) {
-		spin_lock_irqsave(&eeh_eventlist_lock, flags);
-		event = NULL;
-		if (!list_empty(&eeh_eventlist)) {
-			event = list_entry(eeh_eventlist.next, struct eeh_event, list);
-			list_del(&event->list);
-		}
-		spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
-		if (event == NULL)
-			break;
-
-		printk(KERN_INFO "EEH: MMIO failure (%d), notifiying device "
-		       "%s\n", event->reset_state,
-		       pci_name(event->dev));
-
-		atomic_set(&eeh_fail_count, 0);
-		notifier_call_chain (&eeh_notifier_chain,
-				     EEH_NOTIFY_FREEZE, event);
-
-		__get_cpu_var(slot_resets)++;
-
-		pci_dev_put(event->dev);
-		kfree(event);
-	}
-}
-
-/**
- * eeh_token_to_phys - convert EEH address token to phys address
- * @token i/o token, should be address in the form 0xE....
- */
-static inline unsigned long eeh_token_to_phys(unsigned long token)
-{
-	pte_t *ptep;
-	unsigned long pa;
-
-	ptep = find_linux_pte(init_mm.pgd, token);
-	if (!ptep)
-		return token;
-	pa = pte_pfn(*ptep) << PAGE_SHIFT;
-
-	return pa | (token & (PAGE_SIZE-1));
-}
-
-/**
- * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
- * @dn device node
- * @dev pci device, if known
- *
- * Check for an EEH failure for the given device node.  Call this
- * routine if the result of a read was all 0xff's and you want to
- * find out if this is due to an EEH slot freeze.  This routine
- * will query firmware for the EEH status.
- *
- * Returns 0 if there has not been an EEH error; otherwise returns
- * a non-zero value and queues up a solt isolation event notification.
- *
- * It is safe to call this routine in an interrupt context.
- */
-int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
-{
-	int ret;
-	int rets[3];
-	unsigned long flags;
-	int rc, reset_state;
-	struct eeh_event  *event;
-	struct pci_dn *pdn;
-
-	__get_cpu_var(total_mmio_ffs)++;
-
-	if (!eeh_subsystem_enabled)
-		return 0;
-
-	if (!dn)
-		return 0;
-	pdn = dn->data;
-
-	/* Access to IO BARs might get this far and still not want checking. */
-	if (!pdn->eeh_capable || !(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
-	    pdn->eeh_mode & EEH_MODE_NOCHECK) {
-		return 0;
-	}
-
-	if (!pdn->eeh_config_addr) {
-		return 0;
-	}
-
-	/*
-	 * If we already have a pending isolation event for this
-	 * slot, we know it's bad already, we don't need to check...
-	 */
-	if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
-		atomic_inc(&eeh_fail_count);
-		if (atomic_read(&eeh_fail_count) >= EEH_MAX_FAILS) {
-			/* re-read the slot reset state */
-			if (read_slot_reset_state(dn, rets) != 0)
-				rets[0] = -1;	/* reset state unknown */
-			eeh_panic(dev, rets[0]);
-		}
-		return 0;
-	}
-
-	/*
-	 * Now test for an EEH failure.  This is VERY expensive.
-	 * Note that the eeh_config_addr may be a parent device
-	 * in the case of a device behind a bridge, or it may be
-	 * function zero of a multi-function device.
-	 * In any case they must share a common PHB.
-	 */
-	ret = read_slot_reset_state(dn, rets);
-	if (!(ret == 0 && rets[1] == 1 && (rets[0] == 2 || rets[0] == 4))) {
-		__get_cpu_var(false_positives)++;
-		return 0;
-	}
-
-	/* prevent repeated reports of this failure */
-	pdn->eeh_mode |= EEH_MODE_ISOLATED;
-
-	reset_state = rets[0];
-
-	spin_lock_irqsave(&slot_errbuf_lock, flags);
-	memset(slot_errbuf, 0, eeh_error_buf_size);
-
-	rc = rtas_call(ibm_slot_error_detail,
-	               8, 1, NULL, pdn->eeh_config_addr,
-	               BUID_HI(pdn->phb->buid),
-	               BUID_LO(pdn->phb->buid), NULL, 0,
-	               virt_to_phys(slot_errbuf),
-	               eeh_error_buf_size,
-	               1 /* Temporary Error */);
-
-	if (rc == 0)
-		log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
-	spin_unlock_irqrestore(&slot_errbuf_lock, flags);
-
-	printk(KERN_INFO "EEH: MMIO failure (%d) on device: %s %s\n",
-	       rets[0], dn->name, dn->full_name);
-	event = kmalloc(sizeof(*event), GFP_ATOMIC);
-	if (event == NULL) {
-		eeh_panic(dev, reset_state);
-		return 1;
- 	}
-
-	event->dev = dev;
-	event->dn = dn;
-	event->reset_state = reset_state;
-
-	/* We may or may not be called in an interrupt context */
-	spin_lock_irqsave(&eeh_eventlist_lock, flags);
-	list_add(&event->list, &eeh_eventlist);
-	spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
-
-	/* Most EEH events are due to device driver bugs.  Having
-	 * a stack trace will help the device-driver authors figure
-	 * out what happened.  So print that out. */
-	dump_stack();
-	schedule_work(&eeh_event_wq);
-
-	return 0;
-}
-
-EXPORT_SYMBOL(eeh_dn_check_failure);
-
-/**
- * eeh_check_failure - check if all 1's data is due to EEH slot freeze
- * @token i/o token, should be address in the form 0xA....
- * @val value, should be all 1's (XXX why do we need this arg??)
- *
- * Check for an eeh failure at the given token address.
- * Check for an EEH failure at the given token address.  Call this
- * routine if the result of a read was all 0xff's and you want to
- * find out if this is due to an EEH slot freeze event.  This routine
- * will query firmware for the EEH status.
- *
- * Note this routine is safe to call in an interrupt context.
- */
-unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
-{
-	unsigned long addr;
-	struct pci_dev *dev;
-	struct device_node *dn;
-
-	/* Finding the phys addr + pci device; this is pretty quick. */
-	addr = eeh_token_to_phys((unsigned long __force) token);
-	dev = pci_get_device_by_addr(addr);
-	if (!dev)
-		return val;
-
-	dn = pci_device_to_OF_node(dev);
-	eeh_dn_check_failure (dn, dev);
-
-	pci_dev_put(dev);
-	return val;
-}
-
-EXPORT_SYMBOL(eeh_check_failure);
-
-struct eeh_early_enable_info {
-	unsigned int buid_hi;
-	unsigned int buid_lo;
-};
-
-/* Enable eeh for the given device node. */
-static void *early_enable_eeh(struct device_node *dn, void *data)
-{
-	struct eeh_early_enable_info *info = data;
-	int ret;
-	char *status = get_property(dn, "status", NULL);
-	u32 *class_code = (u32 *)get_property(dn, "class-code", NULL);
-	u32 *vendor_id = (u32 *)get_property(dn, "vendor-id", NULL);
-	u32 *device_id = (u32 *)get_property(dn, "device-id", NULL);
-	u32 *regs;
-	int enable;
-	struct pci_dn *pdn = dn->data;
-
-	pdn->eeh_mode = 0;
-
-	if (status && strcmp(status, "ok") != 0)
-		return NULL;	/* ignore devices with bad status */
-
-	/* Ignore bad nodes. */
-	if (!class_code || !vendor_id || !device_id)
-		return NULL;
-
-	/* There is nothing to check on PCI to ISA bridges */
-	if (dn->type && !strcmp(dn->type, "isa")) {
-		pdn->eeh_mode |= EEH_MODE_NOCHECK;
-		return NULL;
-	}
-
-	/*
-	 * Now decide if we are going to "Disable" EEH checking
-	 * for this device.  We still run with the EEH hardware active,
-	 * but we won't be checking for ff's.  This means a driver
-	 * could return bad data (very bad!), an interrupt handler could
-	 * hang waiting on status bits that won't change, etc.
-	 * But there are a few cases like display devices that make sense.
-	 */
-	enable = 1;	/* i.e. we will do checking */
-	if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY)
-		enable = 0;
-
-	if (!enable)
-		pdn->eeh_mode |= EEH_MODE_NOCHECK;
-
-	/* Ok... see if this device supports EEH.  Some do, some don't,
-	 * and the only way to find out is to check each and every one. */
-	regs = (u32 *)get_property(dn, "reg", NULL);
-	if (regs) {
-		/* First register entry is addr (00BBSS00)  */
-		/* Try to enable eeh */
-		ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
-				regs[0], info->buid_hi, info->buid_lo,
-				EEH_ENABLE);
-		if (ret == 0) {
-			eeh_subsystem_enabled = 1;
-			pdn->eeh_mode |= EEH_MODE_SUPPORTED;
-			pdn->eeh_config_addr = regs[0];
-#ifdef DEBUG
-			printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name);
-#endif
-		} else {
-
-			/* This device doesn't support EEH, but it may have an
-			 * EEH parent, in which case we mark it as supported. */
-			if (dn->parent && dn->parent->data
-			    && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
-				/* Parent supports EEH. */
-				pdn->eeh_mode |= EEH_MODE_SUPPORTED;
-				pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
-				return NULL;
-			}
-		}
-	} else {
-		printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
-		       dn->full_name);
-	}
-
-	return NULL; 
-}
-
-/*
- * Initialize EEH by trying to enable it for all of the adapters in the system.
- * As a side effect we can determine here if eeh is supported at all.
- * Note that we leave EEH on so failed config cycles won't cause a machine
- * check.  If a user turns off EEH for a particular adapter they are really
- * telling Linux to ignore errors.  Some hardware (e.g. POWER5) won't
- * grant access to a slot if EEH isn't enabled, and so we always enable
- * EEH for all slots/all devices.
- *
- * The eeh-force-off option disables EEH checking globally, for all slots.
- * Even if force-off is set, the EEH hardware is still enabled, so that
- * newer systems can boot.
- */
-void __init eeh_init(void)
-{
-	struct device_node *phb, *np;
-	struct eeh_early_enable_info info;
-
-	np = of_find_node_by_path("/rtas");
-	if (np == NULL)
-		return;
-
-	ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
-	ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
-	ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
-	ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
-	ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
-
-	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
-		return;
-
-	eeh_error_buf_size = rtas_token("rtas-error-log-max");
-	if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
-		eeh_error_buf_size = 1024;
-	}
-	if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
-		printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
-		      "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
-		eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
-	}
-
-	/* Enable EEH for all adapters.  Note that eeh requires buid's */
-	for (phb = of_find_node_by_name(NULL, "pci"); phb;
-	     phb = of_find_node_by_name(phb, "pci")) {
-		unsigned long buid;
-		struct pci_dn *pci;
-
-		buid = get_phb_buid(phb);
-		if (buid == 0 || phb->data == NULL)
-			continue;
-
-		pci = phb->data;
-		info.buid_lo = BUID_LO(buid);
-		info.buid_hi = BUID_HI(buid);
-		traverse_pci_devices(phb, early_enable_eeh, &info);
-	}
-
-	if (eeh_subsystem_enabled)
-		printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
-	else
-		printk(KERN_WARNING "EEH: No capable adapters found\n");
-}
-
-/**
- * eeh_add_device_early - enable EEH for the indicated device_node
- * @dn: device node for which to set up EEH
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices that were added after system boot (e.g. hotplug, dlpar).
- * This routine must be called before any i/o is performed to the
- * adapter (inluding any config-space i/o).
- * Whether this actually enables EEH or not for this device depends
- * on the CEC architecture, type of the device, on earlier boot
- * command-line arguments & etc.
- */
-void eeh_add_device_early(struct device_node *dn)
-{
-	struct pci_controller *phb;
-	struct eeh_early_enable_info info;
-
-	if (!dn || !dn->data)
-		return;
-	phb = PCI_DN(dn)->phb;
-	if (NULL == phb || 0 == phb->buid) {
-		printk(KERN_WARNING "EEH: Expected buid but found none\n");
-		return;
-	}
-
-	info.buid_hi = BUID_HI(phb->buid);
-	info.buid_lo = BUID_LO(phb->buid);
-	early_enable_eeh(dn, &info);
-}
-EXPORT_SYMBOL(eeh_add_device_early);
-
-/**
- * eeh_add_device_late - perform EEH initialization for the indicated pci device
- * @dev: pci device for which to set up EEH
- *
- * This routine must be used to complete EEH initialization for PCI
- * devices that were added after system boot (e.g. hotplug, dlpar).
- */
-void eeh_add_device_late(struct pci_dev *dev)
-{
-	if (!dev || !eeh_subsystem_enabled)
-		return;
-
-#ifdef DEBUG
-	printk(KERN_DEBUG "EEH: adding device %s\n", pci_name(dev));
-#endif
-
-	pci_addr_cache_insert_device (dev);
-}
-EXPORT_SYMBOL(eeh_add_device_late);
-
-/**
- * eeh_remove_device - undo EEH setup for the indicated pci device
- * @dev: pci device to be removed
- *
- * This routine should be when a device is removed from a running
- * system (e.g. by hotplug or dlpar).
- */
-void eeh_remove_device(struct pci_dev *dev)
-{
-	if (!dev || !eeh_subsystem_enabled)
-		return;
-
-	/* Unregister the device with the EEH/PCI address search system */
-#ifdef DEBUG
-	printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev));
-#endif
-	pci_addr_cache_remove_device(dev);
-}
-EXPORT_SYMBOL(eeh_remove_device);
-
-static int proc_eeh_show(struct seq_file *m, void *v)
-{
-	unsigned int cpu;
-	unsigned long ffs = 0, positives = 0, failures = 0;
-	unsigned long resets = 0;
-
-	for_each_cpu(cpu) {
-		ffs += per_cpu(total_mmio_ffs, cpu);
-		positives += per_cpu(false_positives, cpu);
-		failures += per_cpu(ignored_failures, cpu);
-		resets += per_cpu(slot_resets, cpu);
-	}
-
-	if (0 == eeh_subsystem_enabled) {
-		seq_printf(m, "EEH Subsystem is globally disabled\n");
-		seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs);
-	} else {
-		seq_printf(m, "EEH Subsystem is enabled\n");
-		seq_printf(m, "eeh_total_mmio_ffs=%ld\n"
-			   "eeh_false_positives=%ld\n"
-			   "eeh_ignored_failures=%ld\n"
-			   "eeh_slot_resets=%ld\n"
-				"eeh_fail_count=%d\n",
-			   ffs, positives, failures, resets,
-				eeh_fail_count.counter);
-	}
-
-	return 0;
-}
-
-static int proc_eeh_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_eeh_show, NULL);
-}
-
-static struct file_operations proc_eeh_operations = {
-	.open      = proc_eeh_open,
-	.read      = seq_read,
-	.llseek    = seq_lseek,
-	.release   = single_release,
-};
-
-static int __init eeh_init_proc(void)
-{
-	struct proc_dir_entry *e;
-
-	if (systemcfg->platform & PLATFORM_PSERIES) {
-		e = create_proc_entry("ppc64/eeh", 0, NULL);
-		if (e)
-			e->proc_fops = &proc_eeh_operations;
-	}
-
-	return 0;
-}
-__initcall(eeh_init_proc);
diff --git a/arch/ppc64/kernel/firmware.c b/arch/ppc64/kernel/firmware.c
deleted file mode 100644
index d8432c0fb27..00000000000
--- a/arch/ppc64/kernel/firmware.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- *  arch/ppc64/kernel/firmware.c
- *
- *  Extracted from cputable.c
- *
- *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
- *
- *  Modifications for ppc64:
- *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
- *  Copyright (C) 2005 Stephen Rothwell, IBM Corporation
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/config.h>
-
-#include <asm/firmware.h>
-
-unsigned long ppc64_firmware_features;
-
-#ifdef CONFIG_PPC_PSERIES
-firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
-	{FW_FEATURE_PFT,		"hcall-pft"},
-	{FW_FEATURE_TCE,		"hcall-tce"},
-	{FW_FEATURE_SPRG0,		"hcall-sprg0"},
-	{FW_FEATURE_DABR,		"hcall-dabr"},
-	{FW_FEATURE_COPY,		"hcall-copy"},
-	{FW_FEATURE_ASR,		"hcall-asr"},
-	{FW_FEATURE_DEBUG,		"hcall-debug"},
-	{FW_FEATURE_PERF,		"hcall-perf"},
-	{FW_FEATURE_DUMP,		"hcall-dump"},
-	{FW_FEATURE_INTERRUPT,		"hcall-interrupt"},
-	{FW_FEATURE_MIGRATE,		"hcall-migrate"},
-	{FW_FEATURE_PERFMON,		"hcall-perfmon"},
-	{FW_FEATURE_CRQ,		"hcall-crq"},
-	{FW_FEATURE_VIO,		"hcall-vio"},
-	{FW_FEATURE_RDMA,		"hcall-rdma"},
-	{FW_FEATURE_LLAN,		"hcall-lLAN"},
-	{FW_FEATURE_BULK,		"hcall-bulk"},
-	{FW_FEATURE_XDABR,		"hcall-xdabr"},
-	{FW_FEATURE_MULTITCE,		"hcall-multi-tce"},
-	{FW_FEATURE_SPLPAR,		"hcall-splpar"},
-};
-#endif
diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S
index db1cf397be2..1c869ea72d2 100644
--- a/arch/ppc64/kernel/head.S
+++ b/arch/ppc64/kernel/head.S
@@ -28,7 +28,6 @@
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
-#include <asm/systemcfg.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/bug.h>
@@ -195,11 +194,11 @@ exception_marker:
 #define EX_R12		24
 #define EX_R13		32
 #define EX_SRR0		40
-#define EX_R3		40	/* SLB miss saves R3, but not SRR0 */
 #define EX_DAR		48
-#define EX_LR		48	/* SLB miss saves LR, but not DAR */
 #define EX_DSISR	56
 #define EX_CCR		60
+#define EX_R3		64
+#define EX_LR		72
 
 #define EXCEPTION_PROLOG_PSERIES(area, label)				\
 	mfspr	r13,SPRN_SPRG3;		/* get paca address into r13 */	\
@@ -419,17 +418,22 @@ data_access_slb_pSeries:
 	mtspr	SPRN_SPRG1,r13
 	RUNLATCH_ON(r13)
 	mfspr	r13,SPRN_SPRG3		/* get paca address into r13 */
+	std	r3,PACA_EXSLB+EX_R3(r13)
+	mfspr	r3,SPRN_DAR
 	std	r9,PACA_EXSLB+EX_R9(r13)	/* save r9 - r12 */
+	mfcr	r9
+#ifdef __DISABLED__
+	/* Keep that around for when we re-implement dynamic VSIDs */
+	cmpdi	r3,0
+	bge	slb_miss_user_pseries
+#endif /* __DISABLED__ */
 	std	r10,PACA_EXSLB+EX_R10(r13)
 	std	r11,PACA_EXSLB+EX_R11(r13)
 	std	r12,PACA_EXSLB+EX_R12(r13)
-	std	r3,PACA_EXSLB+EX_R3(r13)
-	mfspr	r9,SPRN_SPRG1
-	std	r9,PACA_EXSLB+EX_R13(r13)
-	mfcr	r9
+	mfspr	r10,SPRN_SPRG1
+	std	r10,PACA_EXSLB+EX_R13(r13)
 	mfspr	r12,SPRN_SRR1		/* and SRR1 */
-	mfspr	r3,SPRN_DAR
-	b	.do_slb_miss		/* Rel. branch works in real mode */
+	b	.slb_miss_realmode	/* Rel. branch works in real mode */
 
 	STD_EXCEPTION_PSERIES(0x400, instruction_access)
 
@@ -440,17 +444,22 @@ instruction_access_slb_pSeries:
 	mtspr	SPRN_SPRG1,r13
 	RUNLATCH_ON(r13)
 	mfspr	r13,SPRN_SPRG3		/* get paca address into r13 */
+	std	r3,PACA_EXSLB+EX_R3(r13)
+	mfspr	r3,SPRN_SRR0		/* SRR0 is faulting address */
 	std	r9,PACA_EXSLB+EX_R9(r13)	/* save r9 - r12 */
+	mfcr	r9
+#ifdef __DISABLED__
+	/* Keep that around for when we re-implement dynamic VSIDs */
+	cmpdi	r3,0
+	bge	slb_miss_user_pseries
+#endif /* __DISABLED__ */
 	std	r10,PACA_EXSLB+EX_R10(r13)
 	std	r11,PACA_EXSLB+EX_R11(r13)
 	std	r12,PACA_EXSLB+EX_R12(r13)
-	std	r3,PACA_EXSLB+EX_R3(r13)
-	mfspr	r9,SPRN_SPRG1
-	std	r9,PACA_EXSLB+EX_R13(r13)
-	mfcr	r9
+	mfspr	r10,SPRN_SPRG1
+	std	r10,PACA_EXSLB+EX_R13(r13)
 	mfspr	r12,SPRN_SRR1		/* and SRR1 */
-	mfspr	r3,SPRN_SRR0			/* SRR0 is faulting address */
-	b	.do_slb_miss		/* Rel. branch works in real mode */
+	b	.slb_miss_realmode	/* Rel. branch works in real mode */
 
 	STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
 	STD_EXCEPTION_PSERIES(0x600, alignment)
@@ -509,6 +518,38 @@ _GLOBAL(do_stab_bolted_pSeries)
 	EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
 
 /*
+ * We have some room here  we use that to put
+ * the peries slb miss user trampoline code so it's reasonably
+ * away from slb_miss_user_common to avoid problems with rfid
+ *
+ * This is used for when the SLB miss handler has to go virtual,
+ * which doesn't happen for now anymore but will once we re-implement
+ * dynamic VSIDs for shared page tables
+ */
+#ifdef __DISABLED__
+slb_miss_user_pseries:
+	std	r10,PACA_EXGEN+EX_R10(r13)
+	std	r11,PACA_EXGEN+EX_R11(r13)
+	std	r12,PACA_EXGEN+EX_R12(r13)
+	mfspr	r10,SPRG1
+	ld	r11,PACA_EXSLB+EX_R9(r13)
+	ld	r12,PACA_EXSLB+EX_R3(r13)
+	std	r10,PACA_EXGEN+EX_R13(r13)
+	std	r11,PACA_EXGEN+EX_R9(r13)
+	std	r12,PACA_EXGEN+EX_R3(r13)
+	clrrdi	r12,r13,32
+	mfmsr	r10
+	mfspr	r11,SRR0			/* save SRR0 */
+	ori	r12,r12,slb_miss_user_common@l	/* virt addr of handler */
+	ori	r10,r10,MSR_IR|MSR_DR|MSR_RI
+	mtspr	SRR0,r12
+	mfspr	r12,SRR1			/* and SRR1 */
+	mtspr	SRR1,r10
+	rfid
+	b	.				/* prevent spec. execution */
+#endif /* __DISABLED__ */
+
+/*
  * Vectors for the FWNMI option.  Share common code.
  */
 	.globl system_reset_fwnmi
@@ -559,22 +600,59 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
 	.globl	data_access_slb_iSeries
 data_access_slb_iSeries:
 	mtspr	SPRN_SPRG1,r13		/* save r13 */
-	EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+	mfspr	r13,SPRN_SPRG3		/* get paca address into r13 */
 	std	r3,PACA_EXSLB+EX_R3(r13)
-	ld	r12,PACALPPACA+LPPACASRR1(r13)
 	mfspr	r3,SPRN_DAR
-	b	.do_slb_miss
+	std	r9,PACA_EXSLB+EX_R9(r13)
+	mfcr	r9
+#ifdef __DISABLED__
+	cmpdi	r3,0
+	bge	slb_miss_user_iseries
+#endif
+	std	r10,PACA_EXSLB+EX_R10(r13)
+	std	r11,PACA_EXSLB+EX_R11(r13)
+	std	r12,PACA_EXSLB+EX_R12(r13)
+	mfspr	r10,SPRN_SPRG1
+	std	r10,PACA_EXSLB+EX_R13(r13)
+	ld	r12,PACALPPACA+LPPACASRR1(r13);
+	b	.slb_miss_realmode
 
 	STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
 
 	.globl	instruction_access_slb_iSeries
 instruction_access_slb_iSeries:
 	mtspr	SPRN_SPRG1,r13		/* save r13 */
-	EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+	mfspr	r13,SPRN_SPRG3		/* get paca address into r13 */
 	std	r3,PACA_EXSLB+EX_R3(r13)
-	ld	r12,PACALPPACA+LPPACASRR1(r13)
-	ld	r3,PACALPPACA+LPPACASRR0(r13)
-	b	.do_slb_miss
+	ld	r3,PACALPPACA+LPPACASRR0(r13)	/* get SRR0 value */
+	std	r9,PACA_EXSLB+EX_R9(r13)
+	mfcr	r9
+#ifdef __DISABLED__
+	cmpdi	r3,0
+	bge	.slb_miss_user_iseries
+#endif
+	std	r10,PACA_EXSLB+EX_R10(r13)
+	std	r11,PACA_EXSLB+EX_R11(r13)
+	std	r12,PACA_EXSLB+EX_R12(r13)
+	mfspr	r10,SPRN_SPRG1
+	std	r10,PACA_EXSLB+EX_R13(r13)
+	ld	r12,PACALPPACA+LPPACASRR1(r13);
+	b	.slb_miss_realmode
+
+#ifdef __DISABLED__
+slb_miss_user_iseries:
+	std	r10,PACA_EXGEN+EX_R10(r13)
+	std	r11,PACA_EXGEN+EX_R11(r13)
+	std	r12,PACA_EXGEN+EX_R12(r13)
+	mfspr	r10,SPRG1
+	ld	r11,PACA_EXSLB+EX_R9(r13)
+	ld	r12,PACA_EXSLB+EX_R3(r13)
+	std	r10,PACA_EXGEN+EX_R13(r13)
+	std	r11,PACA_EXGEN+EX_R9(r13)
+	std	r12,PACA_EXGEN+EX_R3(r13)
+	EXCEPTION_PROLOG_ISERIES_2
+	b	slb_miss_user_common
+#endif
 
 	MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt)
 	STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN)
@@ -809,6 +887,126 @@ instruction_access_common:
 	li	r5,0x400
 	b	.do_hash_page		/* Try to handle as hpte fault */
 
+/*
+ * Here is the common SLB miss user that is used when going to virtual
+ * mode for SLB misses, that is currently not used
+ */
+#ifdef __DISABLED__
+	.align	7
+	.globl	slb_miss_user_common
+slb_miss_user_common:
+	mflr	r10
+	std	r3,PACA_EXGEN+EX_DAR(r13)
+	stw	r9,PACA_EXGEN+EX_CCR(r13)
+	std	r10,PACA_EXGEN+EX_LR(r13)
+	std	r11,PACA_EXGEN+EX_SRR0(r13)
+	bl	.slb_allocate_user
+
+	ld	r10,PACA_EXGEN+EX_LR(r13)
+	ld	r3,PACA_EXGEN+EX_R3(r13)
+	lwz	r9,PACA_EXGEN+EX_CCR(r13)
+	ld	r11,PACA_EXGEN+EX_SRR0(r13)
+	mtlr	r10
+	beq-	slb_miss_fault
+
+	andi.	r10,r12,MSR_RI		/* check for unrecoverable exception */
+	beq-	unrecov_user_slb
+	mfmsr	r10
+
+.machine push
+.machine "power4"
+	mtcrf	0x80,r9
+.machine pop
+
+	clrrdi	r10,r10,2		/* clear RI before setting SRR0/1 */
+	mtmsrd	r10,1
+
+	mtspr	SRR0,r11
+	mtspr	SRR1,r12
+
+	ld	r9,PACA_EXGEN+EX_R9(r13)
+	ld	r10,PACA_EXGEN+EX_R10(r13)
+	ld	r11,PACA_EXGEN+EX_R11(r13)
+	ld	r12,PACA_EXGEN+EX_R12(r13)
+	ld	r13,PACA_EXGEN+EX_R13(r13)
+	rfid
+	b	.
+
+slb_miss_fault:
+	EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
+	ld	r4,PACA_EXGEN+EX_DAR(r13)
+	li	r5,0
+	std	r4,_DAR(r1)
+	std	r5,_DSISR(r1)
+	b	.handle_page_fault
+
+unrecov_user_slb:
+	EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
+	DISABLE_INTS
+	bl	.save_nvgprs
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.unrecoverable_exception
+	b	1b
+
+#endif /* __DISABLED__ */
+
+
+/*
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r12 contain the saved SRR1, SRR0 is still ready for return
+ * r3 has the faulting address
+ * r9 - r13 are saved in paca->exslb.
+ * r3 is saved in paca->slb_r3
+ * We assume we aren't going to take any exceptions during this procedure.
+ */
+_GLOBAL(slb_miss_realmode)
+	mflr	r10
+
+	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
+	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */
+
+	bl	.slb_allocate_realmode
+
+	/* All done -- return from exception. */
+
+	ld	r10,PACA_EXSLB+EX_LR(r13)
+	ld	r3,PACA_EXSLB+EX_R3(r13)
+	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
+#ifdef CONFIG_PPC_ISERIES
+	ld	r11,PACALPPACA+LPPACASRR0(r13)	/* get SRR0 value */
+#endif /* CONFIG_PPC_ISERIES */
+
+	mtlr	r10
+
+	andi.	r10,r12,MSR_RI	/* check for unrecoverable exception */
+	beq-	unrecov_slb
+
+.machine	push
+.machine	"power4"
+	mtcrf	0x80,r9
+	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
+.machine	pop
+
+#ifdef CONFIG_PPC_ISERIES
+	mtspr	SPRN_SRR0,r11
+	mtspr	SPRN_SRR1,r12
+#endif /* CONFIG_PPC_ISERIES */
+	ld	r9,PACA_EXSLB+EX_R9(r13)
+	ld	r10,PACA_EXSLB+EX_R10(r13)
+	ld	r11,PACA_EXSLB+EX_R11(r13)
+	ld	r12,PACA_EXSLB+EX_R12(r13)
+	ld	r13,PACA_EXSLB+EX_R13(r13)
+	rfid
+	b	.	/* prevent speculative execution */
+
+unrecov_slb:
+	EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
+	DISABLE_INTS
+	bl	.save_nvgprs
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.unrecoverable_exception
+	b	1b
+
 	.align	7
 	.globl hardware_interrupt_common
 	.globl hardware_interrupt_entry
@@ -1139,62 +1337,6 @@ _GLOBAL(do_stab_bolted)
 	b	.	/* prevent speculative execution */
 
 /*
- * r13 points to the PACA, r9 contains the saved CR,
- * r11 and r12 contain the saved SRR0 and SRR1.
- * r3 has the faulting address
- * r9 - r13 are saved in paca->exslb.
- * r3 is saved in paca->slb_r3
- * We assume we aren't going to take any exceptions during this procedure.
- */
-_GLOBAL(do_slb_miss)
-	mflr	r10
-
-	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
-	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */
-
-	bl	.slb_allocate			/* handle it */
-
-	/* All done -- return from exception. */
-
-	ld	r10,PACA_EXSLB+EX_LR(r13)
-	ld	r3,PACA_EXSLB+EX_R3(r13)
-	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
-#ifdef CONFIG_PPC_ISERIES
-	ld	r11,PACALPPACA+LPPACASRR0(r13)	/* get SRR0 value */
-#endif /* CONFIG_PPC_ISERIES */
-
-	mtlr	r10
-
-	andi.	r10,r12,MSR_RI	/* check for unrecoverable exception */
-	beq-	unrecov_slb
-
-.machine	push
-.machine	"power4"
-	mtcrf	0x80,r9
-	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
-.machine	pop
-
-#ifdef CONFIG_PPC_ISERIES
-	mtspr	SPRN_SRR0,r11
-	mtspr	SPRN_SRR1,r12
-#endif /* CONFIG_PPC_ISERIES */
-	ld	r9,PACA_EXSLB+EX_R9(r13)
-	ld	r10,PACA_EXSLB+EX_R10(r13)
-	ld	r11,PACA_EXSLB+EX_R11(r13)
-	ld	r12,PACA_EXSLB+EX_R12(r13)
-	ld	r13,PACA_EXSLB+EX_R13(r13)
-	rfid
-	b	.	/* prevent speculative execution */
-
-unrecov_slb:
-	EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
-	DISABLE_INTS
-	bl	.save_nvgprs
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.unrecoverable_exception
-	b	1b
-
-/*
  * Space for CPU0's segment table.
  *
  * On iSeries, the hypervisor must fill in at least one entry before
@@ -1558,18 +1700,9 @@ _GLOBAL(__secondary_start)
 	HMT_MEDIUM			/* Set thread priority to MEDIUM */
 
 	ld	r2,PACATOC(r13)
-	li	r6,0
-	stb	r6,PACAPROCENABLED(r13)
-
-#ifndef CONFIG_PPC_ISERIES
-	/* Initialize the page table pointer register. */
-	LOADADDR(r6,_SDR1)
-	ld	r6,0(r6)		/* get the value of _SDR1	 */
-	mtspr	SPRN_SDR1,r6			/* set the htab location	 */
-#endif
-	/* Initialize the first segment table (or SLB) entry		 */
-	ld	r3,PACASTABVIRT(r13)	/* get addr of segment table	 */
-	bl	.stab_initialize
+
+	/* Do early setup for that CPU */
+	bl	.early_setup_secondary
 
 	/* Initialize the kernel stack.  Just a repeat for iSeries.	 */
 	LOADADDR(r3,current_set)
@@ -1578,37 +1711,6 @@ _GLOBAL(__secondary_start)
 	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
 	std	r1,PACAKSAVE(r13)
 
-	ld	r3,PACASTABREAL(r13)	/* get raddr of segment table	 */
-	ori	r4,r3,1			/* turn on valid bit		 */
-
-#ifdef CONFIG_PPC_ISERIES
-	li	r0,-1			/* hypervisor call */
-	li	r3,1
-	sldi	r3,r3,63		/* 0x8000000000000000 */
-	ori	r3,r3,4			/* 0x8000000000000004 */
-	sc				/* HvCall_setASR */
-#else
-	/* set the ASR */
-	ld	r3,systemcfg@got(r2)	/* r3 = ptr to systemcfg	 */
-	ld	r3,0(r3)
-	lwz	r3,PLATFORM(r3)		/* r3 = platform flags		 */
-	andi.	r3,r3,PLATFORM_LPAR	/* Test if bit 0 is set (LPAR bit) */
-	beq	98f			/* branch if result is 0  */
-	mfspr	r3,SPRN_PVR
-	srwi	r3,r3,16
-	cmpwi	r3,0x37			/* SStar  */
-	beq	97f
-	cmpwi	r3,0x36			/* IStar  */
-	beq	97f
-	cmpwi	r3,0x34			/* Pulsar */
-	bne	98f
-97:	li	r3,H_SET_ASR		/* hcall = H_SET_ASR */
-	HVSC				/* Invoking hcall */
-	b	99f
-98:					/* !(rpa hypervisor) || !(star)  */
-	mtasr	r4			/* set the stab location	 */
-99:
-#endif
 	li	r7,0
 	mtlr	r7
 
@@ -1750,40 +1852,6 @@ _STATIC(start_here_multiplatform)
 	mr	r3,r31
  	bl	.early_setup
 
-	/* set the ASR */
-	ld	r3,PACASTABREAL(r13)
-	ori	r4,r3,1			/* turn on valid bit		 */
-	ld	r3,systemcfg@got(r2)	/* r3 = ptr to systemcfg */
-	ld	r3,0(r3)
-	lwz	r3,PLATFORM(r3)		/* r3 = platform flags */
-	andi.	r3,r3,PLATFORM_LPAR	/* Test if bit 0 is set (LPAR bit) */
-	beq	98f			/* branch if result is 0  */
-	mfspr	r3,SPRN_PVR
-	srwi	r3,r3,16
-	cmpwi	r3,0x37			/* SStar */
-	beq	97f
-	cmpwi	r3,0x36			/* IStar  */
-	beq	97f
-	cmpwi	r3,0x34			/* Pulsar */
-	bne	98f
-97:	li	r3,H_SET_ASR		/* hcall = H_SET_ASR */
-	HVSC				/* Invoking hcall */
-	b	99f
-98:					/* !(rpa hypervisor) || !(star) */
-	mtasr	r4			/* set the stab location	*/
-99:
-	/* Set SDR1 (hash table pointer) */
-	ld	r3,systemcfg@got(r2)	/* r3 = ptr to systemcfg */
-	ld	r3,0(r3)
-	lwz	r3,PLATFORM(r3)		/* r3 = platform flags */
-	/* Test if bit 0 is set (LPAR bit) */
-	andi.	r3,r3,PLATFORM_LPAR
-	bne	98f			/* branch if result is !0  */
-	LOADADDR(r6,_SDR1)		/* Only if NOT LPAR */
-	sub	r6,r6,r26
-	ld	r6,0(r6)		/* get the value of _SDR1 */
-	mtspr	SPRN_SDR1,r6			/* set the htab location  */
-98: 
 	LOADADDR(r3,.start_here_common)
 	SET_REG_TO_CONST(r4, MSR_KERNEL)
 	mtspr	SPRN_SRR0,r3
diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c
index 8abd2ad9283..b879d3057ef 100644
--- a/arch/ppc64/kernel/idle.c
+++ b/arch/ppc64/kernel/idle.c
@@ -26,22 +26,18 @@
 #include <asm/processor.h>
 #include <asm/cputable.h>
 #include <asm/time.h>
-#include <asm/systemcfg.h>
 #include <asm/machdep.h>
+#include <asm/smp.h>
 
 extern void power4_idle(void);
 
 void default_idle(void)
 {
-	long oldval;
 	unsigned int cpu = smp_processor_id();
+	set_thread_flag(TIF_POLLING_NRFLAG);
 
 	while (1) {
-		oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
-		if (!oldval) {
-			set_thread_flag(TIF_POLLING_NRFLAG);
-
+		if (!need_resched()) {
 			while (!need_resched() && !cpu_is_offline(cpu)) {
 				ppc64_runlatch_off();
 
@@ -54,13 +50,12 @@ void default_idle(void)
 			}
 
 			HMT_medium();
-			clear_thread_flag(TIF_POLLING_NRFLAG);
-		} else {
-			set_need_resched();
 		}
 
 		ppc64_runlatch_on();
+		preempt_enable_no_resched();
 		schedule();
+		preempt_disable();
 		if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
 			cpu_die();
 	}
@@ -76,7 +71,9 @@ void native_idle(void)
 
 		if (need_resched()) {
 			ppc64_runlatch_on();
+			preempt_enable_no_resched();
 			schedule();
+			preempt_disable();
 		}
 
 		if (cpu_is_offline(smp_processor_id()) &&
diff --git a/arch/ppc64/kernel/ioctl32.c b/arch/ppc64/kernel/ioctl32.c
deleted file mode 100644
index ba4a899045c..00000000000
--- a/arch/ppc64/kernel/ioctl32.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/* 
- * ioctl32.c: Conversion between 32bit and 64bit native ioctls.
- * 
- * Based on sparc64 ioctl32.c by:
- *
- * Copyright (C) 1997-2000  Jakub Jelinek  (jakub@redhat.com)
- * Copyright (C) 1998  Eddie C. Dost  (ecd@skynet.be)
- *
- * ppc64 changes:
- *
- * Copyright (C) 2000  Ken Aaker (kdaaker@rchland.vnet.ibm.com)
- * Copyright (C) 2001  Anton Blanchard (antonb@au.ibm.com)
- *
- * These routines maintain argument size conversion between 32bit and 64bit
- * ioctls.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define INCLUDES
-#include "compat_ioctl.c"
-#include <linux/syscalls.h>
-
-#define CODE
-#include "compat_ioctl.c"
-
-#define HANDLE_IOCTL(cmd,handler) { cmd, (ioctl_trans_handler_t)handler, NULL },
-#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl)
-
-#define IOCTL_TABLE_START \
-	struct ioctl_trans ioctl_start[] = {
-#define IOCTL_TABLE_END \
-	};
-
-IOCTL_TABLE_START
-#include <linux/compat_ioctl.h>
-#define DECLARES
-#include "compat_ioctl.c"
-
-/* Little p (/dev/rtc, /dev/envctrl, etc.) */
-COMPATIBLE_IOCTL(_IOR('p', 20, int[7])) /* RTCGET */
-COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */
-
-IOCTL_TABLE_END
-
-int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/arch/ppc64/kernel/irq.c b/arch/ppc64/kernel/irq.c
deleted file mode 100644
index 87474584033..00000000000
--- a/arch/ppc64/kernel/irq.c
+++ /dev/null
@@ -1,519 +0,0 @@
-/*
- *  arch/ppc/kernel/irq.c
- *
- *  Derived from arch/i386/kernel/irq.c
- *    Copyright (C) 1992 Linus Torvalds
- *  Adapted from arch/i386 by Gary Thomas
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *  Updated and modified by Cort Dougan (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Cort Dougan
- *  Adapted for Power Macintosh by Paul Mackerras
- *    Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * This file contains the code used by various IRQ handling routines:
- * asking for different IRQ's should be done through these routines
- * instead of just grabbing them. Thus setups with different IRQ numbers
- * shouldn't result in any weird surprises, and installing new handlers
- * should be easier.
- */
-
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/threads.h>
-#include <linux/kernel_stat.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/timex.h>
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/irq.h>
-#include <linux/proc_fs.h>
-#include <linux/random.h>
-#include <linux/kallsyms.h>
-#include <linux/profile.h>
-#include <linux/bitops.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/irq.h>
-#include <asm/cache.h>
-#include <asm/prom.h>
-#include <asm/ptrace.h>
-#include <asm/iseries/it_lp_queue.h>
-#include <asm/machdep.h>
-#include <asm/paca.h>
-
-#ifdef CONFIG_SMP
-extern void iSeries_smp_message_recv( struct pt_regs * );
-#endif
-
-extern irq_desc_t irq_desc[NR_IRQS];
-EXPORT_SYMBOL(irq_desc);
-
-int distribute_irqs = 1;
-int __irq_offset_value;
-int ppc_spurious_interrupts;
-u64 ppc64_interrupt_controller;
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-	int i = *(loff_t *) v, j;
-	struct irqaction * action;
-	irq_desc_t *desc;
-	unsigned long flags;
-
-	if (i == 0) {
-		seq_printf(p, "           ");
-		for (j=0; j<NR_CPUS; j++) {
-			if (cpu_online(j))
-				seq_printf(p, "CPU%d       ",j);
-		}
-		seq_putc(p, '\n');
-	}
-
-	if (i < NR_IRQS) {
-		desc = get_irq_desc(i);
-		spin_lock_irqsave(&desc->lock, flags);
-		action = desc->action;
-		if (!action || !action->handler)
-			goto skip;
-		seq_printf(p, "%3d: ", i);
-#ifdef CONFIG_SMP
-		for (j = 0; j < NR_CPUS; j++) {
-			if (cpu_online(j))
-				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-		}
-#else
-		seq_printf(p, "%10u ", kstat_irqs(i));
-#endif /* CONFIG_SMP */
-		if (desc->handler)
-			seq_printf(p, " %s ", desc->handler->typename );
-		else
-			seq_printf(p, "  None      ");
-		seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge  ");
-		seq_printf(p, "    %s",action->name);
-		for (action=action->next; action; action = action->next)
-			seq_printf(p, ", %s", action->name);
-		seq_putc(p, '\n');
-skip:
-		spin_unlock_irqrestore(&desc->lock, flags);
-	} else if (i == NR_IRQS)
-		seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts);
-	return 0;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-void fixup_irqs(cpumask_t map)
-{
-	unsigned int irq;
-	static int warned;
-
-	for_each_irq(irq) {
-		cpumask_t mask;
-
-		if (irq_desc[irq].status & IRQ_PER_CPU)
-			continue;
-
-		cpus_and(mask, irq_affinity[irq], map);
-		if (any_online_cpu(mask) == NR_CPUS) {
-			printk("Breaking affinity for irq %i\n", irq);
-			mask = map;
-		}
-		if (irq_desc[irq].handler->set_affinity)
-			irq_desc[irq].handler->set_affinity(irq, mask);
-		else if (irq_desc[irq].action && !(warned++))
-			printk("Cannot set affinity for irq %i\n", irq);
-	}
-
-	local_irq_enable();
-	mdelay(1);
-	local_irq_disable();
-}
-#endif
-
-extern int noirqdebug;
-
-/*
- * Eventually, this should take an array of interrupts and an array size
- * so it can dispatch multiple interrupts.
- */
-void ppc_irq_dispatch_handler(struct pt_regs *regs, int irq)
-{
-	int status;
-	struct irqaction *action;
-	int cpu = smp_processor_id();
-	irq_desc_t *desc = get_irq_desc(irq);
-	irqreturn_t action_ret;
-#ifdef CONFIG_IRQSTACKS
-	struct thread_info *curtp, *irqtp;
-#endif
-
-	kstat_cpu(cpu).irqs[irq]++;
-
-	if (desc->status & IRQ_PER_CPU) {
-		/* no locking required for CPU-local interrupts: */
-		ack_irq(irq);
-		action_ret = handle_IRQ_event(irq, regs, desc->action);
-		desc->handler->end(irq);
-		return;
-	}
-
-	spin_lock(&desc->lock);
-	ack_irq(irq);	
-	/*
-	   REPLAY is when Linux resends an IRQ that was dropped earlier
-	   WAITING is used by probe to mark irqs that are being tested
-	   */
-	status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
-	status |= IRQ_PENDING; /* we _want_ to handle it */
-
-	/*
-	 * If the IRQ is disabled for whatever reason, we cannot
-	 * use the action we have.
-	 */
-	action = NULL;
-	if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
-		action = desc->action;
-		if (!action || !action->handler) {
-			ppc_spurious_interrupts++;
-			printk(KERN_DEBUG "Unhandled interrupt %x, disabled\n", irq);
-			/* We can't call disable_irq here, it would deadlock */
-			if (!desc->depth)
-				desc->depth = 1;
-			desc->status |= IRQ_DISABLED;
-			/* This is not a real spurrious interrupt, we
-			 * have to eoi it, so we jump to out
-			 */
-			mask_irq(irq);
-			goto out;
-		}
-		status &= ~IRQ_PENDING; /* we commit to handling */
-		status |= IRQ_INPROGRESS; /* we are handling it */
-	}
-	desc->status = status;
-
-	/*
-	 * If there is no IRQ handler or it was disabled, exit early.
-	   Since we set PENDING, if another processor is handling
-	   a different instance of this same irq, the other processor
-	   will take care of it.
-	 */
-	if (unlikely(!action))
-		goto out;
-
-	/*
-	 * Edge triggered interrupts need to remember
-	 * pending events.
-	 * This applies to any hw interrupts that allow a second
-	 * instance of the same irq to arrive while we are in do_IRQ
-	 * or in the handler. But the code here only handles the _second_
-	 * instance of the irq, not the third or fourth. So it is mostly
-	 * useful for irq hardware that does not mask cleanly in an
-	 * SMP environment.
-	 */
-	for (;;) {
-		spin_unlock(&desc->lock);
-
-#ifdef CONFIG_IRQSTACKS
-		/* Switch to the irq stack to handle this */
-		curtp = current_thread_info();
-		irqtp = hardirq_ctx[smp_processor_id()];
-		if (curtp != irqtp) {
-			irqtp->task = curtp->task;
-			irqtp->flags = 0;
-			action_ret = call_handle_IRQ_event(irq, regs, action, irqtp);
-			irqtp->task = NULL;
-			if (irqtp->flags)
-				set_bits(irqtp->flags, &curtp->flags);
-		} else
-#endif
-			action_ret = handle_IRQ_event(irq, regs, action);
-
-		spin_lock(&desc->lock);
-		if (!noirqdebug)
-			note_interrupt(irq, desc, action_ret, regs);
-		if (likely(!(desc->status & IRQ_PENDING)))
-			break;
-		desc->status &= ~IRQ_PENDING;
-	}
-out:
-	desc->status &= ~IRQ_INPROGRESS;
-	/*
-	 * The ->end() handler has to deal with interrupts which got
-	 * disabled while the handler was running.
-	 */
-	if (desc->handler) {
-		if (desc->handler->end)
-			desc->handler->end(irq);
-		else if (desc->handler->enable)
-			desc->handler->enable(irq);
-	}
-	spin_unlock(&desc->lock);
-}
-
-#ifdef CONFIG_PPC_ISERIES
-void do_IRQ(struct pt_regs *regs)
-{
-	struct paca_struct *lpaca;
-
-	irq_enter();
-
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
-	/* Debugging check for stack overflow: is there less than 2KB free? */
-	{
-		long sp;
-
-		sp = __get_SP() & (THREAD_SIZE-1);
-
-		if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
-			printk("do_IRQ: stack overflow: %ld\n",
-				sp - sizeof(struct thread_info));
-			dump_stack();
-		}
-	}
-#endif
-
-	lpaca = get_paca();
-#ifdef CONFIG_SMP
-	if (lpaca->lppaca.int_dword.fields.ipi_cnt) {
-		lpaca->lppaca.int_dword.fields.ipi_cnt = 0;
-		iSeries_smp_message_recv(regs);
-	}
-#endif /* CONFIG_SMP */
-	if (hvlpevent_is_pending())
-		process_hvlpevents(regs);
-
-	irq_exit();
-
-	if (lpaca->lppaca.int_dword.fields.decr_int) {
-		lpaca->lppaca.int_dword.fields.decr_int = 0;
-		/* Signal a fake decrementer interrupt */
-		timer_interrupt(regs);
-	}
-}
-
-#else	/* CONFIG_PPC_ISERIES */
-
-void do_IRQ(struct pt_regs *regs)
-{
-	int irq;
-
-	irq_enter();
-
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
-	/* Debugging check for stack overflow: is there less than 2KB free? */
-	{
-		long sp;
-
-		sp = __get_SP() & (THREAD_SIZE-1);
-
-		if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
-			printk("do_IRQ: stack overflow: %ld\n",
-				sp - sizeof(struct thread_info));
-			dump_stack();
-		}
-	}
-#endif
-
-	irq = ppc_md.get_irq(regs);
-
-	if (irq >= 0)
-		ppc_irq_dispatch_handler(regs, irq);
-	else
-		/* That's not SMP safe ... but who cares ? */
-		ppc_spurious_interrupts++;
-
-	irq_exit();
-}
-#endif	/* CONFIG_PPC_ISERIES */
-
-void __init init_IRQ(void)
-{
-	static int once = 0;
-
-	if (once)
-		return;
-
-	once++;
-
-	ppc_md.init_IRQ();
-	irq_ctx_init();
-}
-
-#ifndef CONFIG_PPC_ISERIES
-/*
- * Virtual IRQ mapping code, used on systems with XICS interrupt controllers.
- */
-
-#define UNDEFINED_IRQ 0xffffffff
-unsigned int virt_irq_to_real_map[NR_IRQS];
-
-/*
- * Don't use virtual irqs 0, 1, 2 for devices.
- * The pcnet32 driver considers interrupt numbers < 2 to be invalid,
- * and 2 is the XICS IPI interrupt.
- * We limit virtual irqs to 17 less than NR_IRQS so that when we
- * offset them by 16 (to reserve the first 16 for ISA interrupts)
- * we don't end up with an interrupt number >= NR_IRQS.
- */
-#define MIN_VIRT_IRQ	3
-#define MAX_VIRT_IRQ	(NR_IRQS - NUM_ISA_INTERRUPTS - 1)
-#define NR_VIRT_IRQS	(MAX_VIRT_IRQ - MIN_VIRT_IRQ + 1)
-
-void
-virt_irq_init(void)
-{
-	int i;
-	for (i = 0; i < NR_IRQS; i++)
-		virt_irq_to_real_map[i] = UNDEFINED_IRQ;
-}
-
-/* Create a mapping for a real_irq if it doesn't already exist.
- * Return the virtual irq as a convenience.
- */
-int virt_irq_create_mapping(unsigned int real_irq)
-{
-	unsigned int virq, first_virq;
-	static int warned;
-
-	if (ppc64_interrupt_controller == IC_OPEN_PIC)
-		return real_irq;	/* no mapping for openpic (for now) */
-
-	if (ppc64_interrupt_controller == IC_CELL_PIC)
-		return real_irq;	/* no mapping for iic either */
-
-	/* don't map interrupts < MIN_VIRT_IRQ */
-	if (real_irq < MIN_VIRT_IRQ) {
-		virt_irq_to_real_map[real_irq] = real_irq;
-		return real_irq;
-	}
-
-	/* map to a number between MIN_VIRT_IRQ and MAX_VIRT_IRQ */
-	virq = real_irq;
-	if (virq > MAX_VIRT_IRQ)
-		virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ;
-
-	/* search for this number or a free slot */
-	first_virq = virq;
-	while (virt_irq_to_real_map[virq] != UNDEFINED_IRQ) {
-		if (virt_irq_to_real_map[virq] == real_irq)
-			return virq;
-		if (++virq > MAX_VIRT_IRQ)
-			virq = MIN_VIRT_IRQ;
-		if (virq == first_virq)
-			goto nospace;	/* oops, no free slots */
-	}
-
-	virt_irq_to_real_map[virq] = real_irq;
-	return virq;
-
- nospace:
-	if (!warned) {
-		printk(KERN_CRIT "Interrupt table is full\n");
-		printk(KERN_CRIT "Increase NR_IRQS (currently %d) "
-		       "in your kernel sources and rebuild.\n", NR_IRQS);
-		warned = 1;
-	}
-	return NO_IRQ;
-}
-
-/*
- * In most cases will get a hit on the very first slot checked in the
- * virt_irq_to_real_map.  Only when there are a large number of
- * IRQs will this be expensive.
- */
-unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
-{
-	unsigned int virq;
-	unsigned int first_virq;
-
-	virq = real_irq;
-
-	if (virq > MAX_VIRT_IRQ)
-		virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ;
-
-	first_virq = virq;
-
-	do {
-		if (virt_irq_to_real_map[virq] == real_irq)
-			return virq;
-
-		virq++;
-
-		if (virq >= MAX_VIRT_IRQ)
-			virq = 0;
-
-	} while (first_virq != virq);
-
-	return NO_IRQ;
-
-}
-
-#endif /* CONFIG_PPC_ISERIES */
-
-#ifdef CONFIG_IRQSTACKS
-struct thread_info *softirq_ctx[NR_CPUS];
-struct thread_info *hardirq_ctx[NR_CPUS];
-
-void irq_ctx_init(void)
-{
-	struct thread_info *tp;
-	int i;
-
-	for_each_cpu(i) {
-		memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
-		tp = softirq_ctx[i];
-		tp->cpu = i;
-		tp->preempt_count = SOFTIRQ_OFFSET;
-
-		memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
-		tp = hardirq_ctx[i];
-		tp->cpu = i;
-		tp->preempt_count = HARDIRQ_OFFSET;
-	}
-}
-
-void do_softirq(void)
-{
-	unsigned long flags;
-	struct thread_info *curtp, *irqtp;
-
-	if (in_interrupt())
-		return;
-
-	local_irq_save(flags);
-
-	if (local_softirq_pending()) {
-		curtp = current_thread_info();
-		irqtp = softirq_ctx[smp_processor_id()];
-		irqtp->task = curtp->task;
-		call_do_softirq(irqtp);
-		irqtp->task = NULL;
-	}
-
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL(do_softirq);
-
-#endif /* CONFIG_IRQSTACKS */
-
-static int __init setup_noirqdistrib(char *str)
-{
-	distribute_irqs = 0;
-	return 1;
-}
-
-__setup("noirqdistrib", setup_noirqdistrib);
diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c
index ed876a5178a..511af54e623 100644
--- a/arch/ppc64/kernel/kprobes.c
+++ b/arch/ppc64/kernel/kprobes.c
@@ -30,19 +30,14 @@
 #include <linux/config.h>
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
-#include <linux/spinlock.h>
 #include <linux/preempt.h>
 #include <asm/cacheflush.h>
 #include <asm/kdebug.h>
 #include <asm/sstep.h>
 
 static DECLARE_MUTEX(kprobe_mutex);
-
-static struct kprobe *current_kprobe;
-static unsigned long kprobe_status, kprobe_saved_msr;
-static struct kprobe *kprobe_prev;
-static unsigned long kprobe_status_prev, kprobe_saved_msr_prev;
-static struct pt_regs jprobe_saved_regs;
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
@@ -108,20 +103,28 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 		regs->nip = (unsigned long)p->ainsn.insn;
 }
 
-static inline void save_previous_kprobe(void)
+static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+	kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr;
+}
+
+static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
-	kprobe_prev = current_kprobe;
-	kprobe_status_prev = kprobe_status;
-	kprobe_saved_msr_prev = kprobe_saved_msr;
+	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+	kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr;
 }
 
-static inline void restore_previous_kprobe(void)
+static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+				struct kprobe_ctlblk *kcb)
 {
-	current_kprobe = kprobe_prev;
-	kprobe_status = kprobe_status_prev;
-	kprobe_saved_msr = kprobe_saved_msr_prev;
+	__get_cpu_var(current_kprobe) = p;
+	kcb->kprobe_saved_msr = regs->msr;
 }
 
+/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
 				      struct pt_regs *regs)
 {
@@ -145,19 +148,24 @@ static inline int kprobe_handler(struct pt_regs *regs)
 	struct kprobe *p;
 	int ret = 0;
 	unsigned int *addr = (unsigned int *)regs->nip;
+	struct kprobe_ctlblk *kcb;
+
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
 
 	/* Check we're not actually recursing */
 	if (kprobe_running()) {
-		/* We *are* holding lock here, so this is safe.
-		   Disarm the probe we just hit, and ignore it. */
 		p = get_kprobe(addr);
 		if (p) {
 			kprobe_opcode_t insn = *p->ainsn.insn;
-			if (kprobe_status == KPROBE_HIT_SS &&
+			if (kcb->kprobe_status == KPROBE_HIT_SS &&
 					is_trap(insn)) {
 				regs->msr &= ~MSR_SE;
-				regs->msr |= kprobe_saved_msr;
-				unlock_kprobes();
+				regs->msr |= kcb->kprobe_saved_msr;
 				goto no_kprobe;
 			}
 			/* We have reentered the kprobe_handler(), since
@@ -166,27 +174,24 @@ static inline int kprobe_handler(struct pt_regs *regs)
 			 * just single step on the instruction of the new probe
 			 * without calling any user handlers.
 			 */
-			save_previous_kprobe();
-			current_kprobe = p;
-			kprobe_saved_msr = regs->msr;
+			save_previous_kprobe(kcb);
+			set_current_kprobe(p, regs, kcb);
+			kcb->kprobe_saved_msr = regs->msr;
 			p->nmissed++;
 			prepare_singlestep(p, regs);
-			kprobe_status = KPROBE_REENTER;
+			kcb->kprobe_status = KPROBE_REENTER;
 			return 1;
 		} else {
-			p = current_kprobe;
+			p = __get_cpu_var(current_kprobe);
 			if (p->break_handler && p->break_handler(p, regs)) {
 				goto ss_probe;
 			}
 		}
-		/* If it's not ours, can't be delete race, (we hold lock). */
 		goto no_kprobe;
 	}
 
-	lock_kprobes();
 	p = get_kprobe(addr);
 	if (!p) {
-		unlock_kprobes();
 		if (*addr != BREAKPOINT_INSTRUCTION) {
 			/*
 			 * PowerPC has multiple variants of the "trap"
@@ -209,24 +214,19 @@ static inline int kprobe_handler(struct pt_regs *regs)
 		goto no_kprobe;
 	}
 
-	kprobe_status = KPROBE_HIT_ACTIVE;
-	current_kprobe = p;
-	kprobe_saved_msr = regs->msr;
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+	set_current_kprobe(p, regs, kcb);
 	if (p->pre_handler && p->pre_handler(p, regs))
 		/* handler has already set things up, so skip ss setup */
 		return 1;
 
 ss_probe:
 	prepare_singlestep(p, regs);
-	kprobe_status = KPROBE_HIT_SS;
-	/*
-	 * This preempt_disable() matches the preempt_enable_no_resched()
-	 * in post_kprobe_handler().
-	 */
-	preempt_disable();
+	kcb->kprobe_status = KPROBE_HIT_SS;
 	return 1;
 
 no_kprobe:
+	preempt_enable_no_resched();
 	return ret;
 }
 
@@ -251,9 +251,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
         struct kretprobe_instance *ri = NULL;
         struct hlist_head *head;
         struct hlist_node *node, *tmp;
-	unsigned long orig_ret_address = 0;
+	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
+	spin_lock_irqsave(&kretprobe_lock, flags);
         head = kretprobe_inst_table_head(current);
 
 	/*
@@ -292,12 +293,14 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
 	regs->nip = orig_ret_address;
 
-	unlock_kprobes();
+	reset_current_kprobe();
+	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	preempt_enable_no_resched();
 
         /*
          * By returning a non-zero value, we are telling
-         * kprobe_handler() that we have handled unlocking
-         * and re-enabling preemption.
+         * kprobe_handler() that we don't want the post_handler
+         * to run (and have re-enabled preemption)
          */
         return 1;
 }
@@ -323,23 +326,26 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
 
 static inline int post_kprobe_handler(struct pt_regs *regs)
 {
-	if (!kprobe_running())
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (!cur)
 		return 0;
 
-	if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) {
-		kprobe_status = KPROBE_HIT_SSDONE;
-		current_kprobe->post_handler(current_kprobe, regs, 0);
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
 	}
 
-	resume_execution(current_kprobe, regs);
-	regs->msr |= kprobe_saved_msr;
+	resume_execution(cur, regs);
+	regs->msr |= kcb->kprobe_saved_msr;
 
 	/*Restore back the original saved kprobes variables and continue. */
-	if (kprobe_status == KPROBE_REENTER) {
-		restore_previous_kprobe();
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
 		goto out;
 	}
-	unlock_kprobes();
+	reset_current_kprobe();
 out:
 	preempt_enable_no_resched();
 
@@ -354,19 +360,20 @@ out:
 	return 1;
 }
 
-/* Interrupts disabled, kprobe_lock held. */
 static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
-	if (current_kprobe->fault_handler
-	    && current_kprobe->fault_handler(current_kprobe, regs, trapnr))
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
 		return 1;
 
-	if (kprobe_status & KPROBE_HIT_SS) {
-		resume_execution(current_kprobe, regs);
+	if (kcb->kprobe_status & KPROBE_HIT_SS) {
+		resume_execution(cur, regs);
 		regs->msr &= ~MSR_SE;
-		regs->msr |= kprobe_saved_msr;
+		regs->msr |= kcb->kprobe_saved_msr;
 
-		unlock_kprobes();
+		reset_current_kprobe();
 		preempt_enable_no_resched();
 	}
 	return 0;
@@ -381,11 +388,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 	struct die_args *args = (struct die_args *)data;
 	int ret = NOTIFY_DONE;
 
-	/*
-	 * Interrupts are not disabled here.  We need to disable
-	 * preemption, because kprobe_running() uses smp_processor_id().
-	 */
-	preempt_disable();
 	switch (val) {
 	case DIE_BPT:
 		if (kprobe_handler(args->regs))
@@ -396,22 +398,25 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 			ret = NOTIFY_STOP;
 		break;
 	case DIE_PAGE_FAULT:
+		/* kprobe_running() needs smp_processor_id() */
+		preempt_disable();
 		if (kprobe_running() &&
 		    kprobe_fault_handler(args->regs, args->trapnr))
 			ret = NOTIFY_STOP;
+		preempt_enable();
 		break;
 	default:
 		break;
 	}
-	preempt_enable_no_resched();
 	return ret;
 }
 
 int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
-	memcpy(&jprobe_saved_regs, regs, sizeof(struct pt_regs));
+	memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
 
 	/* setup return addr to the jprobe handler routine */
 	regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry);
@@ -431,12 +436,15 @@ void __kprobes jprobe_return_end(void)
 
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
 	/*
 	 * FIXME - we should ideally be validating that we got here 'cos
 	 * of the "trap" in jprobe_return() above, before restoring the
 	 * saved regs...
 	 */
-	memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs));
+	memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+	preempt_enable_no_resched();
 	return 1;
 }
 
diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c
deleted file mode 100644
index e86155770bb..00000000000
--- a/arch/ppc64/kernel/lparcfg.c
+++ /dev/null
@@ -1,613 +0,0 @@
-/*
- * PowerPC64 LPAR Configuration Information Driver
- *
- * Dave Engebretsen engebret@us.ibm.com
- *    Copyright (c) 2003 Dave Engebretsen
- * Will Schmidt willschm@us.ibm.com
- *    SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
- *    seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
- * Nathan Lynch nathanl@austin.ibm.com
- *    Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
- * This driver creates a proc file at /proc/ppc64/lparcfg which contains
- * keyword - value pairs that specify the configuration of the partition.
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/seq_file.h>
-#include <asm/uaccess.h>
-#include <asm/iseries/hv_lp_config.h>
-#include <asm/lppaca.h>
-#include <asm/hvcall.h>
-#include <asm/firmware.h>
-#include <asm/rtas.h>
-#include <asm/system.h>
-#include <asm/time.h>
-#include <asm/iseries/it_exp_vpd_panel.h>
-#include <asm/prom.h>
-
-#define MODULE_VERS "1.6"
-#define MODULE_NAME "lparcfg"
-
-/* #define LPARCFG_DEBUG */
-
-/* find a better place for this function... */
-void log_plpar_hcall_return(unsigned long rc, char *tag)
-{
-	if (rc == 0)		/* success, return */
-		return;
-/* check for null tag ? */
-	if (rc == H_Hardware)
-		printk(KERN_INFO
-		       "plpar-hcall (%s) failed with hardware fault\n", tag);
-	else if (rc == H_Function)
-		printk(KERN_INFO
-		       "plpar-hcall (%s) failed; function not allowed\n", tag);
-	else if (rc == H_Authority)
-		printk(KERN_INFO
-		       "plpar-hcall (%s) failed; not authorized to this function\n",
-		       tag);
-	else if (rc == H_Parameter)
-		printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n",
-		       tag);
-	else
-		printk(KERN_INFO
-		       "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n",
-		       tag, rc);
-
-}
-
-static struct proc_dir_entry *proc_ppc64_lparcfg;
-#define LPARCFG_BUFF_SIZE 4096
-
-#ifdef CONFIG_PPC_ISERIES
-
-/*
- * For iSeries legacy systems, the PPA purr function is available from the
- * emulated_time_base field in the paca.
- */
-static unsigned long get_purr(void)
-{
-	unsigned long sum_purr = 0;
-	int cpu;
-	struct paca_struct *lpaca;
-
-	for_each_cpu(cpu) {
-		lpaca = paca + cpu;
-		sum_purr += lpaca->lppaca.emulated_time_base;
-
-#ifdef PURR_DEBUG
-		printk(KERN_INFO "get_purr for cpu (%d) has value (%ld) \n",
-			cpu, lpaca->lppaca.emulated_time_base);
-#endif
-	}
-	return sum_purr;
-}
-
-#define lparcfg_write NULL
-
-/* 
- * Methods used to fetch LPAR data when running on an iSeries platform.
- */
-static int lparcfg_data(struct seq_file *m, void *v)
-{
-	unsigned long pool_id, lp_index;
-	int shared, entitled_capacity, max_entitled_capacity;
-	int processors, max_processors;
-	struct paca_struct *lpaca = get_paca();
-	unsigned long purr = get_purr();
-
-	seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
-
-	shared = (int)(lpaca->lppaca_ptr->shared_proc);
-	seq_printf(m, "serial_number=%c%c%c%c%c%c%c\n",
-		   e2a(xItExtVpdPanel.mfgID[2]),
-		   e2a(xItExtVpdPanel.mfgID[3]),
-		   e2a(xItExtVpdPanel.systemSerial[1]),
-		   e2a(xItExtVpdPanel.systemSerial[2]),
-		   e2a(xItExtVpdPanel.systemSerial[3]),
-		   e2a(xItExtVpdPanel.systemSerial[4]),
-		   e2a(xItExtVpdPanel.systemSerial[5]));
-
-	seq_printf(m, "system_type=%c%c%c%c\n",
-		   e2a(xItExtVpdPanel.machineType[0]),
-		   e2a(xItExtVpdPanel.machineType[1]),
-		   e2a(xItExtVpdPanel.machineType[2]),
-		   e2a(xItExtVpdPanel.machineType[3]));
-
-	lp_index = HvLpConfig_getLpIndex();
-	seq_printf(m, "partition_id=%d\n", (int)lp_index);
-
-	seq_printf(m, "system_active_processors=%d\n",
-		   (int)HvLpConfig_getSystemPhysicalProcessors());
-
-	seq_printf(m, "system_potential_processors=%d\n",
-		   (int)HvLpConfig_getSystemPhysicalProcessors());
-
-	processors = (int)HvLpConfig_getPhysicalProcessors();
-	seq_printf(m, "partition_active_processors=%d\n", processors);
-
-	max_processors = (int)HvLpConfig_getMaxPhysicalProcessors();
-	seq_printf(m, "partition_potential_processors=%d\n", max_processors);
-
-	if (shared) {
-		entitled_capacity = HvLpConfig_getSharedProcUnits();
-		max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits();
-	} else {
-		entitled_capacity = processors * 100;
-		max_entitled_capacity = max_processors * 100;
-	}
-	seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity);
-
-	seq_printf(m, "partition_max_entitled_capacity=%d\n",
-		   max_entitled_capacity);
-
-	if (shared) {
-		pool_id = HvLpConfig_getSharedPoolIndex();
-		seq_printf(m, "pool=%d\n", (int)pool_id);
-		seq_printf(m, "pool_capacity=%d\n",
-			   (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) *
-				 100));
-		seq_printf(m, "purr=%ld\n", purr);
-	}
-
-	seq_printf(m, "shared_processor_mode=%d\n", shared);
-
-	return 0;
-}
-#endif				/* CONFIG_PPC_ISERIES */
-
-#ifdef CONFIG_PPC_PSERIES
-/* 
- * Methods used to fetch LPAR data when running on a pSeries platform.
- */
-
-/*
- * H_GET_PPP hcall returns info in 4 parms.
- *  entitled_capacity,unallocated_capacity,
- *  aggregation, resource_capability).
- *
- *  R4 = Entitled Processor Capacity Percentage. 
- *  R5 = Unallocated Processor Capacity Percentage.
- *  R6 (AABBCCDDEEFFGGHH).
- *      XXXX - reserved (0)
- *          XXXX - reserved (0)
- *              XXXX - Group Number
- *                  XXXX - Pool Number.
- *  R7 (IIJJKKLLMMNNOOPP).
- *      XX - reserved. (0)
- *        XX - bit 0-6 reserved (0).   bit 7 is Capped indicator.
- *          XX - variable processor Capacity Weight
- *            XX - Unallocated Variable Processor Capacity Weight.
- *              XXXX - Active processors in Physical Processor Pool.
- *                  XXXX  - Processors active on platform. 
- */
-static unsigned int h_get_ppp(unsigned long *entitled,
-			      unsigned long *unallocated,
-			      unsigned long *aggregation,
-			      unsigned long *resource)
-{
-	unsigned long rc;
-	rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated,
-			      aggregation, resource);
-
-	log_plpar_hcall_return(rc, "H_GET_PPP");
-
-	return rc;
-}
-
-static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
-{
-	unsigned long rc;
-	unsigned long dummy;
-	rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy);
-
-	log_plpar_hcall_return(rc, "H_PIC");
-}
-
-static unsigned long get_purr(void);
-
-/* Track sum of all purrs across all processors. This is used to further */
-/* calculate usage values by different applications                       */
-
-static unsigned long get_purr(void)
-{
-	unsigned long sum_purr = 0;
-	int cpu;
-	struct cpu_usage *cu;
-
-	for_each_cpu(cpu) {
-		cu = &per_cpu(cpu_usage_array, cpu);
-		sum_purr += cu->current_tb;
-	}
-	return sum_purr;
-}
-
-#define SPLPAR_CHARACTERISTICS_TOKEN 20
-#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
-
-/*
- * parse_system_parameter_string()
- * Retrieve the potential_processors, max_entitled_capacity and friends
- * through the get-system-parameter rtas call.  Replace keyword strings as
- * necessary.
- */
-static void parse_system_parameter_string(struct seq_file *m)
-{
-	int call_status;
-
-	char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
-	if (!local_buffer) {
-		printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
-		       __FILE__, __FUNCTION__, __LINE__);
-		return;
-	}
-
-	spin_lock(&rtas_data_buf_lock);
-	memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH);
-	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
-				NULL,
-				SPLPAR_CHARACTERISTICS_TOKEN,
-				__pa(rtas_data_buf));
-	memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH);
-	spin_unlock(&rtas_data_buf_lock);
-
-	if (call_status != 0) {
-		printk(KERN_INFO
-		       "%s %s Error calling get-system-parameter (0x%x)\n",
-		       __FILE__, __FUNCTION__, call_status);
-	} else {
-		int splpar_strlen;
-		int idx, w_idx;
-		char *workbuffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
-		if (!workbuffer) {
-			printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
-			       __FILE__, __FUNCTION__, __LINE__);
-			kfree(local_buffer);			
-			return;
-		}
-#ifdef LPARCFG_DEBUG
-		printk(KERN_INFO "success calling get-system-parameter \n");
-#endif
-		splpar_strlen = local_buffer[0] * 16 + local_buffer[1];
-		local_buffer += 2;	/* step over strlen value */
-
-		memset(workbuffer, 0, SPLPAR_MAXLENGTH);
-		w_idx = 0;
-		idx = 0;
-		while ((*local_buffer) && (idx < splpar_strlen)) {
-			workbuffer[w_idx++] = local_buffer[idx++];
-			if ((local_buffer[idx] == ',')
-			    || (local_buffer[idx] == '\0')) {
-				workbuffer[w_idx] = '\0';
-				if (w_idx) {
-					/* avoid the empty string */
-					seq_printf(m, "%s\n", workbuffer);
-				}
-				memset(workbuffer, 0, SPLPAR_MAXLENGTH);
-				idx++;	/* skip the comma */
-				w_idx = 0;
-			} else if (local_buffer[idx] == '=') {
-				/* code here to replace workbuffer contents
-				   with different keyword strings */
-				if (0 == strcmp(workbuffer, "MaxEntCap")) {
-					strcpy(workbuffer,
-					       "partition_max_entitled_capacity");
-					w_idx = strlen(workbuffer);
-				}
-				if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
-					strcpy(workbuffer,
-					       "system_potential_processors");
-					w_idx = strlen(workbuffer);
-				}
-			}
-		}
-		kfree(workbuffer);
-		local_buffer -= 2;	/* back up over strlen value */
-	}
-	kfree(local_buffer);
-}
-
-static int lparcfg_count_active_processors(void);
-
-/* Return the number of processors in the system.
- * This function reads through the device tree and counts
- * the virtual processors, this does not include threads.
- */
-static int lparcfg_count_active_processors(void)
-{
-	struct device_node *cpus_dn = NULL;
-	int count = 0;
-
-	while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
-#ifdef LPARCFG_DEBUG
-		printk(KERN_ERR "cpus_dn %p \n", cpus_dn);
-#endif
-		count++;
-	}
-	return count;
-}
-
-static int lparcfg_data(struct seq_file *m, void *v)
-{
-	int partition_potential_processors;
-	int partition_active_processors;
-	struct device_node *rootdn;
-	const char *model = "";
-	const char *system_id = "";
-	unsigned int *lp_index_ptr, lp_index = 0;
-	struct device_node *rtas_node;
-	int *lrdrp;
-
-	rootdn = find_path_device("/");
-	if (rootdn) {
-		model = get_property(rootdn, "model", NULL);
-		system_id = get_property(rootdn, "system-id", NULL);
-		lp_index_ptr = (unsigned int *)
-		    get_property(rootdn, "ibm,partition-no", NULL);
-		if (lp_index_ptr)
-			lp_index = *lp_index_ptr;
-	}
-
-	seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
-
-	seq_printf(m, "serial_number=%s\n", system_id);
-
-	seq_printf(m, "system_type=%s\n", model);
-
-	seq_printf(m, "partition_id=%d\n", (int)lp_index);
-
-	rtas_node = find_path_device("/rtas");
-	lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity", NULL);
-
-	if (lrdrp == NULL) {
-		partition_potential_processors = systemcfg->processorCount;
-	} else {
-		partition_potential_processors = *(lrdrp + 4);
-	}
-
-	partition_active_processors = lparcfg_count_active_processors();
-
-	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
-		unsigned long h_entitled, h_unallocated;
-		unsigned long h_aggregation, h_resource;
-		unsigned long pool_idle_time, pool_procs;
-		unsigned long purr;
-
-		h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
-			  &h_resource);
-
-		seq_printf(m, "R4=0x%lx\n", h_entitled);
-		seq_printf(m, "R5=0x%lx\n", h_unallocated);
-		seq_printf(m, "R6=0x%lx\n", h_aggregation);
-		seq_printf(m, "R7=0x%lx\n", h_resource);
-
-		purr = get_purr();
-
-		/* this call handles the ibm,get-system-parameter contents */
-		parse_system_parameter_string(m);
-
-		seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
-
-		seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
-
-		seq_printf(m, "system_active_processors=%ld\n",
-			   (h_resource >> 0 * 8) & 0xffff);
-
-		/* pool related entries are apropriate for shared configs */
-		if (paca[0].lppaca.shared_proc) {
-
-			h_pic(&pool_idle_time, &pool_procs);
-
-			seq_printf(m, "pool=%ld\n",
-				   (h_aggregation >> 0 * 8) & 0xffff);
-
-			/* report pool_capacity in percentage */
-			seq_printf(m, "pool_capacity=%ld\n",
-				   ((h_resource >> 2 * 8) & 0xffff) * 100);
-
-			seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
-
-			seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
-		}
-
-		seq_printf(m, "unallocated_capacity_weight=%ld\n",
-			   (h_resource >> 4 * 8) & 0xFF);
-
-		seq_printf(m, "capacity_weight=%ld\n",
-			   (h_resource >> 5 * 8) & 0xFF);
-
-		seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
-
-		seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
-
-		seq_printf(m, "purr=%ld\n", purr);
-
-	} else {		/* non SPLPAR case */
-
-		seq_printf(m, "system_active_processors=%d\n",
-			   partition_potential_processors);
-
-		seq_printf(m, "system_potential_processors=%d\n",
-			   partition_potential_processors);
-
-		seq_printf(m, "partition_max_entitled_capacity=%d\n",
-			   partition_potential_processors * 100);
-
-		seq_printf(m, "partition_entitled_capacity=%d\n",
-			   partition_active_processors * 100);
-	}
-
-	seq_printf(m, "partition_active_processors=%d\n",
-		   partition_active_processors);
-
-	seq_printf(m, "partition_potential_processors=%d\n",
-		   partition_potential_processors);
-
-	seq_printf(m, "shared_processor_mode=%d\n", paca[0].lppaca.shared_proc);
-
-	return 0;
-}
-
-/*
- * Interface for changing system parameters (variable capacity weight
- * and entitled capacity).  Format of input is "param_name=value";
- * anything after value is ignored.  Valid parameters at this time are
- * "partition_entitled_capacity" and "capacity_weight".  We use
- * H_SET_PPP to alter parameters.
- *
- * This function should be invoked only on systems with
- * FW_FEATURE_SPLPAR.
- */
-static ssize_t lparcfg_write(struct file *file, const char __user * buf,
-			     size_t count, loff_t * off)
-{
-	char *kbuf;
-	char *tmp;
-	u64 new_entitled, *new_entitled_ptr = &new_entitled;
-	u8 new_weight, *new_weight_ptr = &new_weight;
-
-	unsigned long current_entitled;	/* parameters for h_get_ppp */
-	unsigned long dummy;
-	unsigned long resource;
-	u8 current_weight;
-
-	ssize_t retval = -ENOMEM;
-
-	kbuf = kmalloc(count, GFP_KERNEL);
-	if (!kbuf)
-		goto out;
-
-	retval = -EFAULT;
-	if (copy_from_user(kbuf, buf, count))
-		goto out;
-
-	retval = -EINVAL;
-	kbuf[count - 1] = '\0';
-	tmp = strchr(kbuf, '=');
-	if (!tmp)
-		goto out;
-
-	*tmp++ = '\0';
-
-	if (!strcmp(kbuf, "partition_entitled_capacity")) {
-		char *endp;
-		*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
-		if (endp == tmp)
-			goto out;
-		new_weight_ptr = &current_weight;
-	} else if (!strcmp(kbuf, "capacity_weight")) {
-		char *endp;
-		*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
-		if (endp == tmp)
-			goto out;
-		new_entitled_ptr = &current_entitled;
-	} else
-		goto out;
-
-	/* Get our current parameters */
-	retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
-	if (retval) {
-		retval = -EIO;
-		goto out;
-	}
-
-	current_weight = (resource >> 5 * 8) & 0xFF;
-
-	pr_debug("%s: current_entitled = %lu, current_weight = %lu\n",
-		 __FUNCTION__, current_entitled, current_weight);
-
-	pr_debug("%s: new_entitled = %lu, new_weight = %lu\n",
-		 __FUNCTION__, *new_entitled_ptr, *new_weight_ptr);
-
-	retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
-				    *new_weight_ptr);
-
-	if (retval == H_Success || retval == H_Constrained) {
-		retval = count;
-	} else if (retval == H_Busy) {
-		retval = -EBUSY;
-	} else if (retval == H_Hardware) {
-		retval = -EIO;
-	} else if (retval == H_Parameter) {
-		retval = -EINVAL;
-	} else {
-		printk(KERN_WARNING "%s: received unknown hv return code %ld",
-		       __FUNCTION__, retval);
-		retval = -EIO;
-	}
-
-      out:
-	kfree(kbuf);
-	return retval;
-}
-
-#endif				/* CONFIG_PPC_PSERIES */
-
-static int lparcfg_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, lparcfg_data, NULL);
-}
-
-struct file_operations lparcfg_fops = {
-      .owner	= THIS_MODULE,
-      .read	= seq_read,
-      .open	= lparcfg_open,
-      .release	= single_release,
-};
-
-int __init lparcfg_init(void)
-{
-	struct proc_dir_entry *ent;
-	mode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
-
-	/* Allow writing if we have FW_FEATURE_SPLPAR */
-	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
-		lparcfg_fops.write = lparcfg_write;
-		mode |= S_IWUSR;
-	}
-
-	ent = create_proc_entry("ppc64/lparcfg", mode, NULL);
-	if (ent) {
-		ent->proc_fops = &lparcfg_fops;
-		ent->data = kmalloc(LPARCFG_BUFF_SIZE, GFP_KERNEL);
-		if (!ent->data) {
-			printk(KERN_ERR
-			       "Failed to allocate buffer for lparcfg\n");
-			remove_proc_entry("lparcfg", ent->parent);
-			return -ENOMEM;
-		}
-	} else {
-		printk(KERN_ERR "Failed to create ppc64/lparcfg\n");
-		return -EIO;
-	}
-
-	proc_ppc64_lparcfg = ent;
-	return 0;
-}
-
-void __exit lparcfg_cleanup(void)
-{
-	if (proc_ppc64_lparcfg) {
-		if (proc_ppc64_lparcfg->data) {
-			kfree(proc_ppc64_lparcfg->data);
-		}
-		remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent);
-	}
-}
-
-module_init(lparcfg_init);
-module_exit(lparcfg_cleanup);
-MODULE_DESCRIPTION("Interface for LPAR configuration data");
-MODULE_AUTHOR("Dave Engebretsen");
-MODULE_LICENSE("GPL");
diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c
index ff8679f260f..07ea03598c0 100644
--- a/arch/ppc64/kernel/machine_kexec.c
+++ b/arch/ppc64/kernel/machine_kexec.c
@@ -24,6 +24,7 @@
 #include <asm/mmu.h>
 #include <asm/sections.h>	/* _end */
 #include <asm/prom.h>
+#include <asm/smp.h>
 
 #define HASH_GROUP_SIZE 0x80	/* size of each hash group, asm/mmu.h */
 
diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S
index 077507ffbab..492bca6137e 100644
--- a/arch/ppc64/kernel/misc.S
+++ b/arch/ppc64/kernel/misc.S
@@ -78,12 +78,12 @@ _GLOBAL(call_do_softirq)
 	mtlr	r0
 	blr
 
-_GLOBAL(call_handle_IRQ_event)
+_GLOBAL(call___do_IRQ)
 	mflr	r0
 	std	r0,16(r1)
-	stdu	r1,THREAD_SIZE-112(r6)
-	mr	r1,r6
-	bl	.handle_IRQ_event
+	stdu	r1,THREAD_SIZE-112(r5)
+	mr	r1,r5
+	bl	.__do_IRQ
 	ld	r1,0(r1)
 	ld	r0,16(r1)
 	mtlr	r0
@@ -560,7 +560,7 @@ _GLOBAL(real_readb)
 	isync
 	blr
 
-	/*
+/*
  * Do an IO access in real mode
  */
 _GLOBAL(real_writeb)
@@ -593,6 +593,76 @@ _GLOBAL(real_writeb)
 #endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */
 
 /*
+ * SCOM access functions for 970 (FX only for now)
+ *
+ * unsigned long scom970_read(unsigned int address);
+ * void scom970_write(unsigned int address, unsigned long value);
+ *
+ * The address passed in is the 24 bits register address. This code
+ * is 970 specific and will not check the status bits, so you should
+ * know what you are doing.
+ */
+_GLOBAL(scom970_read)
+	/* interrupts off */
+	mfmsr	r4
+	ori	r0,r4,MSR_EE
+	xori	r0,r0,MSR_EE
+	mtmsrd	r0,1
+
+	/* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+	 * (including parity). On current CPUs they must be 0'd,
+	 * and finally or in RW bit
+	 */
+	rlwinm	r3,r3,8,0,15
+	ori	r3,r3,0x8000
+
+	/* do the actual scom read */
+	sync
+	mtspr	SPRN_SCOMC,r3
+	isync
+	mfspr	r3,SPRN_SCOMD
+	isync
+	mfspr	r0,SPRN_SCOMC
+	isync
+
+	/* XXX:	fixup result on some buggy 970's (ouch ! we lost a bit, bah
+	 * that's the best we can do). Not implemented yet as we don't use
+	 * the scom on any of the bogus CPUs yet, but may have to be done
+	 * ultimately
+	 */
+
+	/* restore interrupts */
+	mtmsrd	r4,1
+	blr
+
+
+_GLOBAL(scom970_write)
+	/* interrupts off */
+	mfmsr	r5
+	ori	r0,r5,MSR_EE
+	xori	r0,r0,MSR_EE
+	mtmsrd	r0,1
+
+	/* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+	 * (including parity). On current CPUs they must be 0'd.
+	 */
+
+	rlwinm	r3,r3,8,0,15
+
+	sync
+	mtspr	SPRN_SCOMD,r4      /* write data */
+	isync
+	mtspr	SPRN_SCOMC,r3      /* write command */
+	isync
+	mfspr	3,SPRN_SCOMC
+	isync
+
+	/* restore interrupts */
+	mtmsrd	r5,1
+	blr
+
+
+/*
  * Create a kernel thread
  *   kernel_thread(fn, arg, flags)
  */
diff --git a/arch/ppc64/kernel/nvram.c b/arch/ppc64/kernel/nvram.c
index 4fb1a9f5060..c0fcd29918c 100644
--- a/arch/ppc64/kernel/nvram.c
+++ b/arch/ppc64/kernel/nvram.c
@@ -31,7 +31,6 @@
 #include <asm/rtas.h>
 #include <asm/prom.h>
 #include <asm/machdep.h>
-#include <asm/systemcfg.h>
 
 #undef DEBUG_NVRAM
 
@@ -167,7 +166,7 @@ static int dev_nvram_ioctl(struct inode *inode, struct file *file,
 	case IOC_NVRAM_GET_OFFSET: {
 		int part, offset;
 
-		if (systemcfg->platform != PLATFORM_POWERMAC)
+		if (_machine != PLATFORM_POWERMAC)
 			return -EINVAL;
 		if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
 			return -EFAULT;
@@ -450,7 +449,7 @@ static int nvram_setup_partition(void)
 	 * in our nvram, as Apple defined partitions use pretty much
 	 * all of the space
 	 */
-	if (systemcfg->platform == PLATFORM_POWERMAC)
+	if (_machine == PLATFORM_POWERMAC)
 		return -ENOSPC;
 
 	/* see if we have an OS partition that meets our needs.
diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c
deleted file mode 100644
index 5e27e5a6a35..00000000000
--- a/arch/ppc64/kernel/pacaData.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * c 2001 PPC 64 Team, IBM Corp
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#include <linux/config.h>
-#include <linux/types.h>
-#include <linux/threads.h>
-#include <linux/module.h>
-
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/page.h>
-
-#include <asm/lppaca.h>
-#include <asm/iseries/it_lp_queue.h>
-#include <asm/paca.h>
-
-static union {
-	struct systemcfg	data;
-	u8			page[PAGE_SIZE];
-} systemcfg_store __page_aligned;
-struct systemcfg *systemcfg = &systemcfg_store.data;
-EXPORT_SYMBOL(systemcfg);
-
-
-/* This symbol is provided by the linker - let it fill in the paca
- * field correctly */
-extern unsigned long __toc_start;
-
-/* The Paca is an array with one entry per processor.  Each contains an 
- * lppaca, which contains the information shared between the
- * hypervisor and Linux.  Each also contains an ItLpRegSave area which
- * is used by the hypervisor to save registers.
- * On systems with hardware multi-threading, there are two threads
- * per processor.  The Paca array must contain an entry for each thread.
- * The VPD Areas will give a max logical processors = 2 * max physical
- * processors.  The processor VPD array needs one entry per physical
- * processor (not thread).
- */
-#define PACA_INIT_COMMON(number, start, asrr, asrv)			    \
-	.lock_token = 0x8000,						    \
-	.paca_index = (number),		/* Paca Index */		    \
-	.default_decr = 0x00ff0000,	/* Initial Decr */		    \
-	.kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL,		    \
-	.stab_real = (asrr), 		/* Real pointer to segment table */ \
-	.stab_addr = (asrv),		/* Virt pointer to segment table */ \
-	.cpu_start = (start),		/* Processor start */		    \
-	.hw_cpu_id = 0xffff,						    \
-	.lppaca = {							    \
-		.desc = 0xd397d781,	/* "LpPa" */			    \
-		.size = sizeof(struct lppaca),				    \
-		.dyn_proc_status = 2,					    \
-		.decr_val = 0x00ff0000,					    \
-		.fpregs_in_use = 1,					    \
-		.end_of_quantum = 0xfffffffffffffffful,			    \
-		.slb_count = 64,					    \
-		.vmxregs_in_use = 0,					    \
-	},								    \
-
-#ifdef CONFIG_PPC_ISERIES
-#define PACA_INIT_ISERIES(number)					    \
-	.lppaca_ptr = &paca[number].lppaca,				    \
-	.reg_save_ptr = &paca[number].reg_save,				    \
-	.reg_save = {							    \
-		.xDesc = 0xd397d9e2,	/* "LpRS" */			    \
-		.xSize = sizeof(struct ItLpRegSave)			    \
-	}
-
-#define PACA_INIT(number)						    \
-{									    \
-	PACA_INIT_COMMON(number, 0, 0, 0)				    \
-	PACA_INIT_ISERIES(number)					    \
-}
-
-#define BOOTCPU_PACA_INIT(number)					    \
-{									    \
-	PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab)		    \
-	PACA_INIT_ISERIES(number)					    \
-}
-
-#else
-#define PACA_INIT(number)						    \
-{									    \
-	PACA_INIT_COMMON(number, 0, 0, 0)				    \
-}
-
-#define BOOTCPU_PACA_INIT(number)					    \
-{									    \
-	PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab)    \
-}
-#endif
-
-struct paca_struct paca[] = {
-	BOOTCPU_PACA_INIT(0),
-#if NR_CPUS > 1
-	PACA_INIT(  1), PACA_INIT(  2), PACA_INIT(  3),
-#if NR_CPUS > 4
-	PACA_INIT(  4), PACA_INIT(  5), PACA_INIT(  6), PACA_INIT(  7),
-#if NR_CPUS > 8
-	PACA_INIT(  8), PACA_INIT(  9), PACA_INIT( 10), PACA_INIT( 11),
-	PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15),
-	PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19),
-	PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23),
-	PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27),
-	PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31),
-#if NR_CPUS > 32
-	PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35),
-	PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39),
-	PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43),
-	PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47),
-	PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51),
-	PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55),
-	PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59),
-	PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63),
-#if NR_CPUS > 64
-	PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67),
-	PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71),
-	PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75),
-	PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79),
-	PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83),
-	PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87),
-	PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91),
-	PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95),
-	PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99),
-	PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103),
-	PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107),
-	PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111),
-	PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115),
-	PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119),
-	PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123),
-	PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127),
-#endif
-#endif
-#endif
-#endif
-#endif
-};
-EXPORT_SYMBOL(paca);
diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c
index 3d2106b022a..3cef1b8f57f 100644
--- a/arch/ppc64/kernel/pci.c
+++ b/arch/ppc64/kernel/pci.c
@@ -295,8 +295,8 @@ static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev)
 	}
 }
 
-static struct pci_dev *of_create_pci_dev(struct device_node *node,
-					 struct pci_bus *bus, int devfn)
+struct pci_dev *of_create_pci_dev(struct device_node *node,
+				 struct pci_bus *bus, int devfn)
 {
 	struct pci_dev *dev;
 	const char *type;
@@ -354,10 +354,9 @@ static struct pci_dev *of_create_pci_dev(struct device_node *node,
 
 	return dev;
 }
+EXPORT_SYMBOL(of_create_pci_dev);
 
-static void of_scan_pci_bridge(struct device_node *node, struct pci_dev *dev);
-
-static void __devinit of_scan_bus(struct device_node *node,
+void __devinit of_scan_bus(struct device_node *node,
 				  struct pci_bus *bus)
 {
 	struct device_node *child = NULL;
@@ -381,9 +380,10 @@ static void __devinit of_scan_bus(struct device_node *node,
 
 	do_bus_setup(bus);
 }
+EXPORT_SYMBOL(of_scan_bus);
 
-static void __devinit of_scan_pci_bridge(struct device_node *node,
-					 struct pci_dev *dev)
+void __devinit of_scan_pci_bridge(struct device_node *node,
+			 	struct pci_dev *dev)
 {
 	struct pci_bus *bus;
 	u32 *busrange, *ranges;
@@ -464,9 +464,10 @@ static void __devinit of_scan_pci_bridge(struct device_node *node,
 	else if (mode == PCI_PROBE_NORMAL)
 		pci_scan_child_bus(bus);
 }
+EXPORT_SYMBOL(of_scan_pci_bridge);
 #endif /* CONFIG_PPC_MULTIPLATFORM */
 
-static void __devinit scan_phb(struct pci_controller *hose)
+void __devinit scan_phb(struct pci_controller *hose)
 {
 	struct pci_bus *bus;
 	struct device_node *node = hose->arch_data;
@@ -547,6 +548,11 @@ static int __init pcibios_init(void)
 	if (ppc64_isabridge_dev != NULL)
 		printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev));
 
+#ifdef CONFIG_PPC_MULTIPLATFORM
+	/* map in PCI I/O space */
+	phbs_remap_io();
+#endif
+
 	printk("PCI: Probing PCI hardware done\n");
 
 	return 0;
@@ -1276,12 +1282,9 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
 	 * G5 machines... So when something asks for bus 0 io base
 	 * (bus 0 is HT root), we return the AGP one instead.
 	 */
-#ifdef CONFIG_PPC_PMAC
-	if (systemcfg->platform == PLATFORM_POWERMAC &&
-	    machine_is_compatible("MacRISC4"))
+	if (machine_is_compatible("MacRISC4"))
 		if (in_bus == 0)
 			in_bus = 0xf0;
-#endif /* CONFIG_PPC_PMAC */
 
 	/* That syscall isn't quite compatible with PCI domains, but it's
 	 * used on pre-domains setup. We return the first match
diff --git a/arch/ppc64/kernel/pci_dn.c b/arch/ppc64/kernel/pci_dn.c
index 1a443a7ada4..12c4c9e9bbc 100644
--- a/arch/ppc64/kernel/pci_dn.c
+++ b/arch/ppc64/kernel/pci_dn.c
@@ -43,7 +43,7 @@ static void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
 	u32 *regs;
 	struct pci_dn *pdn;
 
-	if (phb->is_dynamic)
+	if (mem_init_done)
 		pdn = kmalloc(sizeof(*pdn), GFP_KERNEL);
 	else
 		pdn = alloc_bootmem(sizeof(*pdn));
@@ -120,6 +120,14 @@ void *traverse_pci_devices(struct device_node *start, traverse_func pre,
 	return NULL;
 }
 
+/** 
+ * pci_devs_phb_init_dynamic - setup pci devices under this PHB
+ * phb: pci-to-host bridge (top-level bridge connecting to cpu)
+ *
+ * This routine is called both during boot, (before the memory
+ * subsystem is set up, before kmalloc is valid) and during the 
+ * dynamic lpar operation of adding a PHB to a running system.
+ */
 void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb)
 {
 	struct device_node * dn = (struct device_node *) phb->arch_data;
@@ -201,9 +209,14 @@ static struct notifier_block pci_dn_reconfig_nb = {
 	.notifier_call = pci_dn_reconfig_notifier,
 };
 
-/*
- * Actually initialize the phbs.
- * The buswalk on this phb has not happened yet.
+/** 
+ * pci_devs_phb_init - Initialize phbs and pci devs under them.
+ * 
+ * This routine walks over all phb's (pci-host bridges) on the
+ * system, and sets up assorted pci-related structures 
+ * (including pci info in the device node structs) for each
+ * pci device found underneath.  This routine runs once,
+ * early in the boot sequence.
  */
 void __init pci_devs_phb_init(void)
 {
diff --git a/arch/ppc64/kernel/proc_ppc64.c b/arch/ppc64/kernel/proc_ppc64.c
deleted file mode 100644
index 24e955ee948..00000000000
--- a/arch/ppc64/kernel/proc_ppc64.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * arch/ppc64/kernel/proc_ppc64.c
- *
- * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/proc_fs.h>
-#include <linux/slab.h>
-#include <linux/kernel.h>
-
-#include <asm/systemcfg.h>
-#include <asm/rtas.h>
-#include <asm/uaccess.h>
-#include <asm/prom.h>
-
-static loff_t  page_map_seek( struct file *file, loff_t off, int whence);
-static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
-			      loff_t *ppos);
-static int     page_map_mmap( struct file *file, struct vm_area_struct *vma );
-
-static struct file_operations page_map_fops = {
-	.llseek	= page_map_seek,
-	.read	= page_map_read,
-	.mmap	= page_map_mmap
-};
-
-/*
- * Create the ppc64 and ppc64/rtas directories early. This allows us to
- * assume that they have been previously created in drivers.
- */
-static int __init proc_ppc64_create(void)
-{
-	struct proc_dir_entry *root;
-
-	root = proc_mkdir("ppc64", NULL);
-	if (!root)
-		return 1;
-
-	if (!(systemcfg->platform & (PLATFORM_PSERIES | PLATFORM_CELL)))
-		return 0;
-
-	if (!proc_mkdir("rtas", root))
-		return 1;
-
-	if (!proc_symlink("rtas", NULL, "ppc64/rtas"))
-		return 1;
-
-	return 0;
-}
-core_initcall(proc_ppc64_create);
-
-static int __init proc_ppc64_init(void)
-{
-	struct proc_dir_entry *pde;
-
-	pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL);
-	if (!pde)
-		return 1;
-	pde->nlink = 1;
-	pde->data = systemcfg;
-	pde->size = PAGE_SIZE;
-	pde->proc_fops = &page_map_fops;
-
-	return 0;
-}
-__initcall(proc_ppc64_init);
-
-static loff_t page_map_seek( struct file *file, loff_t off, int whence)
-{
-	loff_t new;
-	struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
-
-	switch(whence) {
-	case 0:
-		new = off;
-		break;
-	case 1:
-		new = file->f_pos + off;
-		break;
-	case 2:
-		new = dp->size + off;
-		break;
-	default:
-		return -EINVAL;
-	}
-	if ( new < 0 || new > dp->size )
-		return -EINVAL;
-	return (file->f_pos = new);
-}
-
-static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
-			      loff_t *ppos)
-{
-	struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
-	return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size);
-}
-
-static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
-{
-	struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
-
-	vma->vm_flags |= VM_SHM | VM_LOCKED;
-
-	if ((vma->vm_end - vma->vm_start) > dp->size)
-		return -EINVAL;
-
-	remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT,
-						dp->size, vma->vm_page_prot);
-	return 0;
-}
-
diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c
index 97bfceb5353..fbad2c36078 100644
--- a/arch/ppc64/kernel/prom.c
+++ b/arch/ppc64/kernel/prom.c
@@ -31,6 +31,7 @@
 #include <linux/initrd.h>
 #include <linux/bitops.h>
 #include <linux/module.h>
+#include <linux/module.h>
 
 #include <asm/prom.h>
 #include <asm/rtas.h>
@@ -46,7 +47,6 @@
 #include <asm/pgtable.h>
 #include <asm/pci.h>
 #include <asm/iommu.h>
-#include <asm/ppcdebug.h>
 #include <asm/btext.h>
 #include <asm/sections.h>
 #include <asm/machdep.h>
@@ -318,7 +318,7 @@ static int __devinit finish_node_interrupts(struct device_node *np,
 		}
 
 		/* We offset irq numbers for the u3 MPIC by 128 in PowerMac */
-		if (systemcfg->platform == PLATFORM_POWERMAC && ic && ic->parent) {
+		if (_machine == PLATFORM_POWERMAC && ic && ic->parent) {
 			char *name = get_property(ic->parent, "name", NULL);
 			if (name && !strcmp(name, "u3"))
 				np->intrs[intrcount].line += 128;
@@ -635,10 +635,10 @@ static inline char *find_flat_dt_string(u32 offset)
  * used to extract the memory informations at boot before we can
  * unflatten the tree
  */
-static int __init scan_flat_dt(int (*it)(unsigned long node,
-					 const char *uname, int depth,
-					 void *data),
-			       void *data)
+int __init of_scan_flat_dt(int (*it)(unsigned long node,
+				     const char *uname, int depth,
+				     void *data),
+			   void *data)
 {
 	unsigned long p = ((unsigned long)initial_boot_params) +
 		initial_boot_params->off_dt_struct;
@@ -695,8 +695,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node,
  * This  function can be used within scan_flattened_dt callback to get
  * access to properties
  */
-static void* __init get_flat_dt_prop(unsigned long node, const char *name,
-				     unsigned long *size)
+void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
+				 unsigned long *size)
 {
 	unsigned long p = node;
 
@@ -996,7 +996,7 @@ void __init unflatten_device_tree(void)
 static int __init early_init_dt_scan_cpus(unsigned long node,
 					  const char *uname, int depth, void *data)
 {
-	char *type = get_flat_dt_prop(node, "device_type", NULL);
+	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 	u32 *prop;
 	unsigned long size;
 
@@ -1004,17 +1004,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	if (type == NULL || strcmp(type, "cpu") != 0)
 		return 0;
 
-	/* On LPAR, look for the first ibm,pft-size property for the  hash table size
-	 */
-	if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) {
-		u32 *pft_size;
-		pft_size = (u32 *)get_flat_dt_prop(node, "ibm,pft-size", NULL);
-		if (pft_size != NULL) {
-			/* pft_size[0] is the NUMA CEC cookie */
-			ppc64_pft_size = pft_size[1];
-		}
-	}
-
 	if (initial_boot_params && initial_boot_params->version >= 2) {
 		/* version 2 of the kexec param format adds the phys cpuid
 		 * of booted proc.
@@ -1023,8 +1012,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 		boot_cpuid = 0;
 	} else {
 		/* Check if it's the boot-cpu, set it's hw index in paca now */
-		if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) {
-			u32 *prop = get_flat_dt_prop(node, "reg", NULL);
+		if (of_get_flat_dt_prop(node, "linux,boot-cpu", NULL)
+		    != NULL) {
+			u32 *prop = of_get_flat_dt_prop(node, "reg", NULL);
 			set_hard_smp_processor_id(0, prop == NULL ? 0 : *prop);
 			boot_cpuid_phys = get_hard_smp_processor_id(0);
 		}
@@ -1032,14 +1022,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 
 #ifdef CONFIG_ALTIVEC
 	/* Check if we have a VMX and eventually update CPU features */
-	prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", NULL);
+	prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", NULL);
 	if (prop && (*prop) > 0) {
 		cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
 		cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
 	}
 
 	/* Same goes for Apple's "altivec" property */
-	prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL);
+	prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL);
 	if (prop) {
 		cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
 		cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
@@ -1051,7 +1041,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	 * this by looking at the size of the ibm,ppc-interrupt-server#s
 	 * property
 	 */
-	prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
+	prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
 				       &size);
 	cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
 	if (prop && ((size / sizeof(u32)) > 1))
@@ -1072,26 +1062,26 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 		return 0;
 
 	/* get platform type */
-	prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL);
+	prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
 	if (prop == NULL)
 		return 0;
-	systemcfg->platform = *prop;
+	_machine = *prop;
 
 	/* check if iommu is forced on or off */
-	if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
+	if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
 		iommu_is_off = 1;
-	if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
+	if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
 		iommu_force_on = 1;
 
- 	prop64 = (u64*)get_flat_dt_prop(node, "linux,memory-limit", NULL);
+ 	prop64 = (u64*)of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
  	if (prop64)
  		memory_limit = *prop64;
 
- 	prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
+ 	prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-start",NULL);
  	if (prop64)
  		tce_alloc_start = *prop64;
 
- 	prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
+ 	prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
  	if (prop64)
  		tce_alloc_end = *prop64;
 
@@ -1102,9 +1092,12 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 	{
 		u64 *basep, *entryp;
 
-		basep = (u64*)get_flat_dt_prop(node, "linux,rtas-base", NULL);
-		entryp = (u64*)get_flat_dt_prop(node, "linux,rtas-entry", NULL);
-		prop = (u32*)get_flat_dt_prop(node, "linux,rtas-size", NULL);
+		basep = (u64*)of_get_flat_dt_prop(node,
+						  "linux,rtas-base", NULL);
+		entryp = (u64*)of_get_flat_dt_prop(node,
+						   "linux,rtas-entry", NULL);
+		prop = (u32*)of_get_flat_dt_prop(node,
+						 "linux,rtas-size", NULL);
 		if (basep && entryp && prop) {
 			rtas.base = *basep;
 			rtas.entry = *entryp;
@@ -1125,11 +1118,11 @@ static int __init early_init_dt_scan_root(unsigned long node,
 	if (depth != 0)
 		return 0;
 
-	prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL);
+	prop = (u32 *)of_get_flat_dt_prop(node, "#size-cells", NULL);
 	dt_root_size_cells = (prop == NULL) ? 1 : *prop;
 	DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
 
-	prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL);
+	prop = (u32 *)of_get_flat_dt_prop(node, "#address-cells", NULL);
 	dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
 	DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
 	
@@ -1161,7 +1154,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
 static int __init early_init_dt_scan_memory(unsigned long node,
 					    const char *uname, int depth, void *data)
 {
-	char *type = get_flat_dt_prop(node, "device_type", NULL);
+	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 	cell_t *reg, *endp;
 	unsigned long l;
 
@@ -1169,7 +1162,7 @@ static int __init early_init_dt_scan_memory(unsigned long node,
 	if (type == NULL || strcmp(type, "memory") != 0)
 		return 0;
 
-	reg = (cell_t *)get_flat_dt_prop(node, "reg", &l);
+	reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
 	if (reg == NULL)
 		return 0;
 
@@ -1225,26 +1218,20 @@ void __init early_init_devtree(void *params)
 	/* Setup flat device-tree pointer */
 	initial_boot_params = params;
 
-	/* By default, hash size is not set */
-	ppc64_pft_size = 0;
-
 	/* Retreive various informations from the /chosen node of the
 	 * device-tree, including the platform type, initrd location and
 	 * size, TCE reserve, and more ...
 	 */
-	scan_flat_dt(early_init_dt_scan_chosen, NULL);
+	of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
 
 	/* Scan memory nodes and rebuild LMBs */
 	lmb_init();
-	scan_flat_dt(early_init_dt_scan_root, NULL);
-	scan_flat_dt(early_init_dt_scan_memory, NULL);
+	of_scan_flat_dt(early_init_dt_scan_root, NULL);
+	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
 	lmb_enforce_memory_limit(memory_limit);
 	lmb_analyze();
-	systemcfg->physicalMemorySize = lmb_phys_mem_size();
 	lmb_reserve(0, __pa(klimit));
 
-	DBG("Phys. mem: %lx\n", systemcfg->physicalMemorySize);
-
 	/* Reserve LMB regions used by kernel, initrd, dt, etc... */
 	early_reserve_mem();
 
@@ -1253,26 +1240,8 @@ void __init early_init_devtree(void *params)
 	/* Retreive hash table size from flattened tree plus other
 	 * CPU related informations (altivec support, boot CPU ID, ...)
 	 */
-	scan_flat_dt(early_init_dt_scan_cpus, NULL);
-
-	/* If hash size wasn't obtained above, we calculate it now based on
-	 * the total RAM size
-	 */
-	if (ppc64_pft_size == 0) {
-		unsigned long rnd_mem_size, pteg_count;
-
-		/* round mem_size up to next power of 2 */
-		rnd_mem_size = 1UL << __ilog2(systemcfg->physicalMemorySize);
-		if (rnd_mem_size < systemcfg->physicalMemorySize)
-			rnd_mem_size <<= 1;
-
-		/* # pages / 2 */
-		pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11);
+	of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
 
-		ppc64_pft_size = __ilog2(pteg_count << 7);
-	}
-
-	DBG("Hash pftSize: %x\n", (int)ppc64_pft_size);
 	DBG(" <- early_init_devtree()\n");
 }
 
@@ -1781,7 +1750,7 @@ static int of_finish_dynamic_node(struct device_node *node,
 	/* We don't support that function on PowerMac, at least
 	 * not yet
 	 */
-	if (systemcfg->platform == PLATFORM_POWERMAC)
+	if (_machine == PLATFORM_POWERMAC)
 		return -ENODEV;
 
 	/* fix up new node's linux_phandle field */
@@ -1894,17 +1863,32 @@ get_property(struct device_node *np, const char *name, int *lenp)
 EXPORT_SYMBOL(get_property);
 
 /*
- * Add a property to a node
+ * Add a property to a node.
  */
-void
+int
 prom_add_property(struct device_node* np, struct property* prop)
 {
-	struct property **next = &np->properties;
+	struct property **next;
 
 	prop->next = NULL;	
-	while (*next)
+	write_lock(&devtree_lock);
+	next = &np->properties;
+	while (*next) {
+		if (strcmp(prop->name, (*next)->name) == 0) {
+			/* duplicate ! don't insert it */
+			write_unlock(&devtree_lock);
+			return -1;
+		}
 		next = &(*next)->next;
+	}
 	*next = prop;
+	write_unlock(&devtree_lock);
+
+	/* try to add to proc as well if it was initialized */
+	if (np->pde)
+		proc_device_tree_add_prop(np->pde, prop);
+
+	return 0;
 }
 
 #if 0
diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c
index a4bbca6dbb8..6375f40b23d 100644
--- a/arch/ppc64/kernel/prom_init.c
+++ b/arch/ppc64/kernel/prom_init.c
@@ -44,7 +44,6 @@
 #include <asm/pgtable.h>
 #include <asm/pci.h>
 #include <asm/iommu.h>
-#include <asm/ppcdebug.h>
 #include <asm/btext.h>
 #include <asm/sections.h>
 #include <asm/machdep.h>
@@ -1825,7 +1824,7 @@ static void __init fixup_device_tree(void)
 	if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev))
 	    == PROM_ERROR)
 		return;
-	if (u3_rev != 0x35 && u3_rev != 0x37)
+	if (u3_rev < 0x35 || u3_rev > 0x39)
 		return;
 	/* does it need fixup ? */
 	if (prom_getproplen(i2c, "interrupts") > 0)
@@ -1935,7 +1934,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long
 	/*
 	 * On pSeries, inform the firmware about our capabilities
 	 */
-	if (RELOC(of_platform) & PLATFORM_PSERIES)
+	if (RELOC(of_platform) == PLATFORM_PSERIES ||
+	    RELOC(of_platform) == PLATFORM_PSERIES_LPAR)
 		prom_send_capabilities();
 
 	/*
diff --git a/arch/ppc64/kernel/rtas_pci.c b/arch/ppc64/kernel/rtas_pci.c
deleted file mode 100644
index 3ad15c90fbb..00000000000
--- a/arch/ppc64/kernel/rtas_pci.c
+++ /dev/null
@@ -1,516 +0,0 @@
-/*
- * arch/ppc64/kernel/rtas_pci.c
- *
- * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
- * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * RTAS specific routines for PCI.
- * 
- * Based on code from pci.c, chrp_pci.c and pSeries_pci.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *    
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/irq.h>
-#include <asm/prom.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <asm/iommu.h>
-#include <asm/rtas.h>
-#include <asm/mpic.h>
-#include <asm/ppc-pci.h>
-
-/* RTAS tokens */
-static int read_pci_config;
-static int write_pci_config;
-static int ibm_read_pci_config;
-static int ibm_write_pci_config;
-
-static int config_access_valid(struct pci_dn *dn, int where)
-{
-	if (where < 256)
-		return 1;
-	if (where < 4096 && dn->pci_ext_config_space)
-		return 1;
-
-	return 0;
-}
-
-static int of_device_available(struct device_node * dn)
-{
-        char * status;
-
-        status = get_property(dn, "status", NULL);
-
-        if (!status)
-                return 1;
-
-        if (!strcmp(status, "okay"))
-                return 1;
-
-        return 0;
-}
-
-static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val)
-{
-	int returnval = -1;
-	unsigned long buid, addr;
-	int ret;
-	struct pci_dn *pdn;
-
-	if (!dn || !dn->data)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	pdn = dn->data;
-	if (!config_access_valid(pdn, where))
-		return PCIBIOS_BAD_REGISTER_NUMBER;
-
-	addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
-		(pdn->devfn << 8) | (where & 0xff);
-	buid = pdn->phb->buid;
-	if (buid) {
-		ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
-				addr, buid >> 32, buid & 0xffffffff, size);
-	} else {
-		ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
-	}
-	*val = returnval;
-
-	if (ret)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	if (returnval == EEH_IO_ERROR_VALUE(size) &&
-	    eeh_dn_check_failure (dn, NULL))
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int rtas_pci_read_config(struct pci_bus *bus,
-				unsigned int devfn,
-				int where, int size, u32 *val)
-{
-	struct device_node *busdn, *dn;
-
-	if (bus->self)
-		busdn = pci_device_to_OF_node(bus->self);
-	else
-		busdn = bus->sysdata;	/* must be a phb */
-
-	/* Search only direct children of the bus */
-	for (dn = busdn->child; dn; dn = dn->sibling)
-		if (dn->data && PCI_DN(dn)->devfn == devfn
-		    && of_device_available(dn))
-			return rtas_read_config(dn, where, size, val);
-
-	return PCIBIOS_DEVICE_NOT_FOUND;
-}
-
-int rtas_write_config(struct device_node *dn, int where, int size, u32 val)
-{
-	unsigned long buid, addr;
-	int ret;
-	struct pci_dn *pdn;
-
-	if (!dn || !dn->data)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	pdn = dn->data;
-	if (!config_access_valid(pdn, where))
-		return PCIBIOS_BAD_REGISTER_NUMBER;
-
-	addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
-		(pdn->devfn << 8) | (where & 0xff);
-	buid = pdn->phb->buid;
-	if (buid) {
-		ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr, buid >> 32, buid & 0xffffffff, size, (ulong) val);
-	} else {
-		ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val);
-	}
-
-	if (ret)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int rtas_pci_write_config(struct pci_bus *bus,
-				 unsigned int devfn,
-				 int where, int size, u32 val)
-{
-	struct device_node *busdn, *dn;
-
-	if (bus->self)
-		busdn = pci_device_to_OF_node(bus->self);
-	else
-		busdn = bus->sysdata;	/* must be a phb */
-
-	/* Search only direct children of the bus */
-	for (dn = busdn->child; dn; dn = dn->sibling)
-		if (dn->data && PCI_DN(dn)->devfn == devfn
-		    && of_device_available(dn))
-			return rtas_write_config(dn, where, size, val);
-	return PCIBIOS_DEVICE_NOT_FOUND;
-}
-
-struct pci_ops rtas_pci_ops = {
-	rtas_pci_read_config,
-	rtas_pci_write_config
-};
-
-int is_python(struct device_node *dev)
-{
-	char *model = (char *)get_property(dev, "model", NULL);
-
-	if (model && strstr(model, "Python"))
-		return 1;
-
-	return 0;
-}
-
-static int get_phb_reg_prop(struct device_node *dev,
-			    unsigned int addr_size_words,
-			    struct reg_property64 *reg)
-{
-	unsigned int *ui_ptr = NULL, len;
-
-	/* Found a PHB, now figure out where his registers are mapped. */
-	ui_ptr = (unsigned int *)get_property(dev, "reg", &len);
-	if (ui_ptr == NULL)
-		return 1;
-
-	if (addr_size_words == 1) {
-		reg->address = ((struct reg_property32 *)ui_ptr)->address;
-		reg->size    = ((struct reg_property32 *)ui_ptr)->size;
-	} else {
-		*reg = *((struct reg_property64 *)ui_ptr);
-	}
-
-	return 0;
-}
-
-static void python_countermeasures(struct device_node *dev,
-				   unsigned int addr_size_words)
-{
-	struct reg_property64 reg_struct;
-	void __iomem *chip_regs;
-	volatile u32 val;
-
-	if (get_phb_reg_prop(dev, addr_size_words, &reg_struct))
-		return;
-
-	/* Python's register file is 1 MB in size. */
-	chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000);
-
-	/* 
-	 * Firmware doesn't always clear this bit which is critical
-	 * for good performance - Anton
-	 */
-
-#define PRG_CL_RESET_VALID 0x00010000
-
-	val = in_be32(chip_regs + 0xf6030);
-	if (val & PRG_CL_RESET_VALID) {
-		printk(KERN_INFO "Python workaround: ");
-		val &= ~PRG_CL_RESET_VALID;
-		out_be32(chip_regs + 0xf6030, val);
-		/*
-		 * We must read it back for changes to
-		 * take effect
-		 */
-		val = in_be32(chip_regs + 0xf6030);
-		printk("reg0: %x\n", val);
-	}
-
-	iounmap(chip_regs);
-}
-
-void __init init_pci_config_tokens (void)
-{
-	read_pci_config = rtas_token("read-pci-config");
-	write_pci_config = rtas_token("write-pci-config");
-	ibm_read_pci_config = rtas_token("ibm,read-pci-config");
-	ibm_write_pci_config = rtas_token("ibm,write-pci-config");
-}
-
-unsigned long __devinit get_phb_buid (struct device_node *phb)
-{
-	int addr_cells;
-	unsigned int *buid_vals;
-	unsigned int len;
-	unsigned long buid;
-
-	if (ibm_read_pci_config == -1) return 0;
-
-	/* PHB's will always be children of the root node,
-	 * or so it is promised by the current firmware. */
-	if (phb->parent == NULL)
-		return 0;
-	if (phb->parent->parent)
-		return 0;
-
-	buid_vals = (unsigned int *) get_property(phb, "reg", &len);
-	if (buid_vals == NULL)
-		return 0;
-
-	addr_cells = prom_n_addr_cells(phb);
-	if (addr_cells == 1) {
-		buid = (unsigned long) buid_vals[0];
-	} else {
-		buid = (((unsigned long)buid_vals[0]) << 32UL) |
-			(((unsigned long)buid_vals[1]) & 0xffffffff);
-	}
-	return buid;
-}
-
-static int phb_set_bus_ranges(struct device_node *dev,
-			      struct pci_controller *phb)
-{
-	int *bus_range;
-	unsigned int len;
-
-	bus_range = (int *) get_property(dev, "bus-range", &len);
-	if (bus_range == NULL || len < 2 * sizeof(int)) {
-		return 1;
- 	}
- 
-	phb->first_busno =  bus_range[0];
-	phb->last_busno  =  bus_range[1];
-
-	return 0;
-}
-
-static int __devinit setup_phb(struct device_node *dev,
-			       struct pci_controller *phb,
-			       unsigned int addr_size_words)
-{
-	pci_setup_pci_controller(phb);
-
-	if (is_python(dev))
-		python_countermeasures(dev, addr_size_words);
-
-	if (phb_set_bus_ranges(dev, phb))
-		return 1;
-
-	phb->arch_data = dev;
-	phb->ops = &rtas_pci_ops;
-	phb->buid = get_phb_buid(dev);
-
-	return 0;
-}
-
-static void __devinit add_linux_pci_domain(struct device_node *dev,
-					   struct pci_controller *phb,
-					   struct property *of_prop)
-{
-	memset(of_prop, 0, sizeof(struct property));
-	of_prop->name = "linux,pci-domain";
-	of_prop->length = sizeof(phb->global_number);
-	of_prop->value = (unsigned char *)&of_prop[1];
-	memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number));
-	prom_add_property(dev, of_prop);
-}
-
-static struct pci_controller * __init alloc_phb(struct device_node *dev,
-						unsigned int addr_size_words)
-{
-	struct pci_controller *phb;
-	struct property *of_prop;
-
-	phb = alloc_bootmem(sizeof(struct pci_controller));
-	if (phb == NULL)
-		return NULL;
-
-	of_prop = alloc_bootmem(sizeof(struct property) +
-				sizeof(phb->global_number));
-	if (!of_prop)
-		return NULL;
-
-	if (setup_phb(dev, phb, addr_size_words))
-		return NULL;
-
-	add_linux_pci_domain(dev, phb, of_prop);
-
-	return phb;
-}
-
-static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words)
-{
-	struct pci_controller *phb;
-
-	phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller),
-					       GFP_KERNEL);
-	if (phb == NULL)
-		return NULL;
-
-	if (setup_phb(dev, phb, addr_size_words))
-		return NULL;
-
-	phb->is_dynamic = 1;
-
-	/* TODO: linux,pci-domain? */
-
- 	return phb;
-}
-
-unsigned long __init find_and_init_phbs(void)
-{
-	struct device_node *node;
-	struct pci_controller *phb;
-	unsigned int root_size_cells = 0;
-	unsigned int index;
-	unsigned int *opprop = NULL;
-	struct device_node *root = of_find_node_by_path("/");
-
-	if (ppc64_interrupt_controller == IC_OPEN_PIC) {
-		opprop = (unsigned int *)get_property(root,
-				"platform-open-pic", NULL);
-	}
-
-	root_size_cells = prom_n_size_cells(root);
-
-	index = 0;
-
-	for (node = of_get_next_child(root, NULL);
-	     node != NULL;
-	     node = of_get_next_child(root, node)) {
-		if (node->type == NULL || strcmp(node->type, "pci") != 0)
-			continue;
-
-		phb = alloc_phb(node, root_size_cells);
-		if (!phb)
-			continue;
-
-		pci_process_bridge_OF_ranges(phb, node, 0);
-		pci_setup_phb_io(phb, index == 0);
-#ifdef CONFIG_PPC_PSERIES
-		if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) {
-			int addr = root_size_cells * (index + 2) - 1;
-			mpic_assign_isu(pSeries_mpic, index, opprop[addr]);
-		}
-#endif
-		index++;
-	}
-
-	of_node_put(root);
-	pci_devs_phb_init();
-
-	/*
-	 * pci_probe_only and pci_assign_all_buses can be set via properties
-	 * in chosen.
-	 */
-	if (of_chosen) {
-		int *prop;
-
-		prop = (int *)get_property(of_chosen, "linux,pci-probe-only",
-					   NULL);
-		if (prop)
-			pci_probe_only = *prop;
-
-		prop = (int *)get_property(of_chosen,
-					   "linux,pci-assign-all-buses", NULL);
-		if (prop)
-			pci_assign_all_buses = *prop;
-	}
-
-	return 0;
-}
-
-struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
-{
-	struct device_node *root = of_find_node_by_path("/");
-	unsigned int root_size_cells = 0;
-	struct pci_controller *phb;
-	struct pci_bus *bus;
-	int primary;
-
-	root_size_cells = prom_n_size_cells(root);
-
-	primary = list_empty(&hose_list);
-	phb = alloc_phb_dynamic(dn, root_size_cells);
-	if (!phb)
-		return NULL;
-
-	pci_process_bridge_OF_ranges(phb, dn, primary);
-
-	pci_setup_phb_io_dynamic(phb, primary);
-	of_node_put(root);
-
-	pci_devs_phb_init_dynamic(phb);
-	phb->last_busno = 0xff;
-	bus = pci_scan_bus(phb->first_busno, phb->ops, phb->arch_data);
-	phb->bus = bus;
-	phb->last_busno = bus->subordinate;
-
-	return phb;
-}
-EXPORT_SYMBOL(init_phb_dynamic);
-
-/* RPA-specific bits for removing PHBs */
-int pcibios_remove_root_bus(struct pci_controller *phb)
-{
-	struct pci_bus *b = phb->bus;
-	struct resource *res;
-	int rc, i;
-
-	res = b->resource[0];
-	if (!res->flags) {
-		printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__,
-				b->name);
-		return 1;
-	}
-
-	rc = unmap_bus_range(b);
-	if (rc) {
-		printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
-			__FUNCTION__, b->name);
-		return 1;
-	}
-
-	if (release_resource(res)) {
-		printk(KERN_ERR "%s: failed to release IO on bus %s\n",
-				__FUNCTION__, b->name);
-		return 1;
-	}
-
-	for (i = 1; i < 3; ++i) {
-		res = b->resource[i];
-		if (!res->flags && i == 0) {
-			printk(KERN_ERR "%s: no MEM resource for PHB %s\n",
-				__FUNCTION__, b->name);
-			return 1;
-		}
-		if (res->flags && release_resource(res)) {
-			printk(KERN_ERR
-			       "%s: failed to release IO %d on bus %s\n",
-				__FUNCTION__, i, b->name);
-			return 1;
-		}
-	}
-
-	list_del(&phb->list_node);
-	if (phb->is_dynamic)
-		kfree(phb);
-
-	return 0;
-}
-EXPORT_SYMBOL(pcibios_remove_root_bus);
diff --git a/arch/ppc64/kernel/scanlog.c b/arch/ppc64/kernel/scanlog.c
deleted file mode 100644
index 215bf890030..00000000000
--- a/arch/ppc64/kernel/scanlog.c
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- *  c 2001 PPC 64 Team, IBM Corp
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
- * scan-log-data driver for PPC64  Todd Inglett <tinglett@vnet.ibm.com>
- *
- * When ppc64 hardware fails the service processor dumps internal state
- * of the system.  After a reboot the operating system can access a dump
- * of this data using this driver.  A dump exists if the device-tree
- * /chosen/ibm,scan-log-data property exists.
- *
- * This driver exports /proc/ppc64/scan-log-dump which can be read.
- * The driver supports only sequential reads.
- *
- * The driver looks at a write to the driver for the single word "reset".
- * If given, the driver will reset the scanlog so the platform can free it.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <asm/uaccess.h>
-#include <asm/rtas.h>
-#include <asm/prom.h>
-
-#define MODULE_VERS "1.0"
-#define MODULE_NAME "scanlog"
-
-/* Status returns from ibm,scan-log-dump */
-#define SCANLOG_COMPLETE 0
-#define SCANLOG_HWERROR -1
-#define SCANLOG_CONTINUE 1
-
-#define DEBUG(A...) do { if (scanlog_debug) printk(KERN_ERR "scanlog: " A); } while (0)
-
-static int scanlog_debug;
-static unsigned int ibm_scan_log_dump;			/* RTAS token */
-static struct proc_dir_entry *proc_ppc64_scan_log_dump;	/* The proc file */
-
-static ssize_t scanlog_read(struct file *file, char __user *buf,
-			    size_t count, loff_t *ppos)
-{
-        struct inode * inode = file->f_dentry->d_inode;
-	struct proc_dir_entry *dp;
-	unsigned int *data;
-	int status;
-	unsigned long len, off;
-	unsigned int wait_time;
-
-        dp = PDE(inode);
- 	data = (unsigned int *)dp->data;
-
-	if (!data) {
-		printk(KERN_ERR "scanlog: read failed no data\n");
-		return -EIO;
-	}
-
-	if (count > RTAS_DATA_BUF_SIZE)
-		count = RTAS_DATA_BUF_SIZE;
-
-	if (count < 1024) {
-		/* This is the min supported by this RTAS call.  Rather
-		 * than do all the buffering we insist the user code handle
-		 * larger reads.  As long as cp works... :)
-		 */
-		printk(KERN_ERR "scanlog: cannot perform a small read (%ld)\n", count);
-		return -EINVAL;
-	}
-
-	if (!access_ok(VERIFY_WRITE, buf, count))
-		return -EFAULT;
-
-	for (;;) {
-		wait_time = 500;	/* default wait if no data */
-		spin_lock(&rtas_data_buf_lock);
-		memcpy(rtas_data_buf, data, RTAS_DATA_BUF_SIZE);
-		status = rtas_call(ibm_scan_log_dump, 2, 1, NULL,
-				   (u32) __pa(rtas_data_buf), (u32) count);
-		memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE);
-		spin_unlock(&rtas_data_buf_lock);
-
-		DEBUG("status=%d, data[0]=%x, data[1]=%x, data[2]=%x\n",
-		      status, data[0], data[1], data[2]);
-		switch (status) {
-		    case SCANLOG_COMPLETE:
-			DEBUG("hit eof\n");
-			return 0;
-		    case SCANLOG_HWERROR:
-			DEBUG("hardware error reading scan log data\n");
-			return -EIO;
-		    case SCANLOG_CONTINUE:
-			/* We may or may not have data yet */
-			len = data[1];
-			off = data[2];
-			if (len > 0) {
-				if (copy_to_user(buf, ((char *)data)+off, len))
-					return -EFAULT;
-				return len;
-			}
-			/* Break to sleep default time */
-			break;
-		    default:
-			if (status > 9900 && status <= 9905) {
-				wait_time = rtas_extended_busy_delay_time(status);
-			} else {
-				printk(KERN_ERR "scanlog: unknown error from rtas: %d\n", status);
-				return -EIO;
-			}
-		}
-		/* Apparently no data yet.  Wait and try again. */
-		msleep_interruptible(wait_time);
-	}
-	/*NOTREACHED*/
-}
-
-static ssize_t scanlog_write(struct file * file, const char __user * buf,
-			     size_t count, loff_t *ppos)
-{
-	char stkbuf[20];
-	int status;
-
-	if (count > 19) count = 19;
-	if (copy_from_user (stkbuf, buf, count)) {
-		return -EFAULT;
-	}
-	stkbuf[count] = 0;
-
-	if (buf) {
-		if (strncmp(stkbuf, "reset", 5) == 0) {
-			DEBUG("reset scanlog\n");
-			status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0);
-			DEBUG("rtas returns %d\n", status);
-		} else if (strncmp(stkbuf, "debugon", 7) == 0) {
-			printk(KERN_ERR "scanlog: debug on\n");
-			scanlog_debug = 1;
-		} else if (strncmp(stkbuf, "debugoff", 8) == 0) {
-			printk(KERN_ERR "scanlog: debug off\n");
-			scanlog_debug = 0;
-		}
-	}
-	return count;
-}
-
-static int scanlog_open(struct inode * inode, struct file * file)
-{
-	struct proc_dir_entry *dp = PDE(inode);
-	unsigned int *data = (unsigned int *)dp->data;
-
-	if (!data) {
-		printk(KERN_ERR "scanlog: open failed no data\n");
-		return -EIO;
-	}
-
-	if (data[0] != 0) {
-		/* This imperfect test stops a second copy of the
-		 * data (or a reset while data is being copied)
-		 */
-		return -EBUSY;
-	}
-
-	data[0] = 0;	/* re-init so we restart the scan */
-
-	return 0;
-}
-
-static int scanlog_release(struct inode * inode, struct file * file)
-{
-	struct proc_dir_entry *dp = PDE(inode);
-	unsigned int *data = (unsigned int *)dp->data;
-
-	if (!data) {
-		printk(KERN_ERR "scanlog: release failed no data\n");
-		return -EIO;
-	}
-	data[0] = 0;
-
-	return 0;
-}
-
-struct file_operations scanlog_fops = {
-	.owner		= THIS_MODULE,
-	.read		= scanlog_read,
-	.write		= scanlog_write,
-	.open		= scanlog_open,
-	.release	= scanlog_release,
-};
-
-int __init scanlog_init(void)
-{
-	struct proc_dir_entry *ent;
-
-	ibm_scan_log_dump = rtas_token("ibm,scan-log-dump");
-	if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE) {
-		printk(KERN_ERR "scan-log-dump not implemented on this system\n");
-		return -EIO;
-	}
-
-        ent = create_proc_entry("ppc64/rtas/scan-log-dump",  S_IRUSR, NULL);
-	if (ent) {
-		ent->proc_fops = &scanlog_fops;
-		/* Ideally we could allocate a buffer < 4G */
-		ent->data = kmalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
-		if (!ent->data) {
-			printk(KERN_ERR "Failed to allocate a buffer\n");
-			remove_proc_entry("scan-log-dump", ent->parent);
-			return -ENOMEM;
-		}
-		((unsigned int *)ent->data)[0] = 0;
-	} else {
-		printk(KERN_ERR "Failed to create ppc64/scan-log-dump proc entry\n");
-		return -EIO;
-	}
-	proc_ppc64_scan_log_dump = ent;
-
-	return 0;
-}
-
-void __exit scanlog_cleanup(void)
-{
-	if (proc_ppc64_scan_log_dump) {
-		if (proc_ppc64_scan_log_dump->data)
-			kfree(proc_ppc64_scan_log_dump->data);
-		remove_proc_entry("scan-log-dump", proc_ppc64_scan_log_dump->parent);
-	}
-}
-
-module_init(scanlog_init);
-module_exit(scanlog_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c
deleted file mode 100644
index 6654b350979..00000000000
--- a/arch/ppc64/kernel/sysfs.c
+++ /dev/null
@@ -1,383 +0,0 @@
-#include <linux/config.h>
-#include <linux/sysdev.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <linux/percpu.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/nodemask.h>
-#include <linux/cpumask.h>
-#include <linux/notifier.h>
-
-#include <asm/current.h>
-#include <asm/processor.h>
-#include <asm/cputable.h>
-#include <asm/firmware.h>
-#include <asm/hvcall.h>
-#include <asm/prom.h>
-#include <asm/systemcfg.h>
-#include <asm/paca.h>
-#include <asm/lppaca.h>
-#include <asm/machdep.h>
-
-static DEFINE_PER_CPU(struct cpu, cpu_devices);
-
-/* SMT stuff */
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
-/* default to snooze disabled */
-DEFINE_PER_CPU(unsigned long, smt_snooze_delay);
-
-static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf,
-				      size_t count)
-{
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
-	ssize_t ret;
-	unsigned long snooze;
-
-	ret = sscanf(buf, "%lu", &snooze);
-	if (ret != 1)
-		return -EINVAL;
-
-	per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
-
-	return count;
-}
-
-static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf)
-{
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
-
-	return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id));
-}
-
-static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
-		   store_smt_snooze_delay);
-
-/* Only parse OF options if the matching cmdline option was not specified */
-static int smt_snooze_cmdline;
-
-static int __init smt_setup(void)
-{
-	struct device_node *options;
-	unsigned int *val;
-	unsigned int cpu;
-
-	if (!cpu_has_feature(CPU_FTR_SMT))
-		return 1;
-
-	options = find_path_device("/options");
-	if (!options)
-		return 1;
-
-	val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay",
-					   NULL);
-	if (!smt_snooze_cmdline && val) {
-		for_each_cpu(cpu)
-			per_cpu(smt_snooze_delay, cpu) = *val;
-	}
-
-	return 1;
-}
-__initcall(smt_setup);
-
-static int __init setup_smt_snooze_delay(char *str)
-{
-	unsigned int cpu;
-	int snooze;
-
-	if (!cpu_has_feature(CPU_FTR_SMT))
-		return 1;
-
-	smt_snooze_cmdline = 1;
-
-	if (get_option(&str, &snooze)) {
-		for_each_cpu(cpu)
-			per_cpu(smt_snooze_delay, cpu) = snooze;
-	}
-
-	return 1;
-}
-__setup("smt-snooze-delay=", setup_smt_snooze_delay);
-
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-
-/*
- * Enabling PMCs will slow partition context switch times so we only do
- * it the first time we write to the PMCs.
- */
-
-static DEFINE_PER_CPU(char, pmcs_enabled);
-
-void ppc64_enable_pmcs(void)
-{
-	/* Only need to enable them once */
-	if (__get_cpu_var(pmcs_enabled))
-		return;
-
-	__get_cpu_var(pmcs_enabled) = 1;
-
-	if (ppc_md.enable_pmcs)
-		ppc_md.enable_pmcs();
-}
-EXPORT_SYMBOL(ppc64_enable_pmcs);
-
-/* XXX convert to rusty's on_one_cpu */
-static unsigned long run_on_cpu(unsigned long cpu,
-			        unsigned long (*func)(unsigned long),
-				unsigned long arg)
-{
-	cpumask_t old_affinity = current->cpus_allowed;
-	unsigned long ret;
-
-	/* should return -EINVAL to userspace */
-	if (set_cpus_allowed(current, cpumask_of_cpu(cpu)))
-		return 0;
-
-	ret = func(arg);
-
-	set_cpus_allowed(current, old_affinity);
-
-	return ret;
-}
-
-#define SYSFS_PMCSETUP(NAME, ADDRESS) \
-static unsigned long read_##NAME(unsigned long junk) \
-{ \
-	return mfspr(ADDRESS); \
-} \
-static unsigned long write_##NAME(unsigned long val) \
-{ \
-	ppc64_enable_pmcs(); \
-	mtspr(ADDRESS, val); \
-	return 0; \
-} \
-static ssize_t show_##NAME(struct sys_device *dev, char *buf) \
-{ \
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
-	unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \
-	return sprintf(buf, "%lx\n", val); \
-} \
-static ssize_t __attribute_used__ \
-	store_##NAME(struct sys_device *dev, const char *buf, size_t count) \
-{ \
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
-	unsigned long val; \
-	int ret = sscanf(buf, "%lx", &val); \
-	if (ret != 1) \
-		return -EINVAL; \
-	run_on_cpu(cpu->sysdev.id, write_##NAME, val); \
-	return count; \
-}
-
-SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
-SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
-SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
-SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
-SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
-SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
-SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
-SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
-SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
-SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
-SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
-SYSFS_PMCSETUP(purr, SPRN_PURR);
-
-static SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0);
-static SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1);
-static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
-static SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1);
-static SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2);
-static SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3);
-static SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4);
-static SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5);
-static SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6);
-static SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7);
-static SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8);
-static SYSDEV_ATTR(purr, 0600, show_purr, NULL);
-
-static void register_cpu_online(unsigned int cpu)
-{
-	struct cpu *c = &per_cpu(cpu_devices, cpu);
-	struct sys_device *s = &c->sysdev;
-
-#ifndef CONFIG_PPC_ISERIES
-	if (cpu_has_feature(CPU_FTR_SMT))
-		sysdev_create_file(s, &attr_smt_snooze_delay);
-#endif
-
-	/* PMC stuff */
-
-	sysdev_create_file(s, &attr_mmcr0);
-	sysdev_create_file(s, &attr_mmcr1);
-
-	if (cpu_has_feature(CPU_FTR_MMCRA))
-		sysdev_create_file(s, &attr_mmcra);
-
-	if (cur_cpu_spec->num_pmcs >= 1)
-		sysdev_create_file(s, &attr_pmc1);
-	if (cur_cpu_spec->num_pmcs >= 2)
-		sysdev_create_file(s, &attr_pmc2);
-	if (cur_cpu_spec->num_pmcs >= 3)
-		sysdev_create_file(s, &attr_pmc3);
-	if (cur_cpu_spec->num_pmcs >= 4)
-		sysdev_create_file(s, &attr_pmc4);
-	if (cur_cpu_spec->num_pmcs >= 5)
-		sysdev_create_file(s, &attr_pmc5);
-	if (cur_cpu_spec->num_pmcs >= 6)
-		sysdev_create_file(s, &attr_pmc6);
-	if (cur_cpu_spec->num_pmcs >= 7)
-		sysdev_create_file(s, &attr_pmc7);
-	if (cur_cpu_spec->num_pmcs >= 8)
-		sysdev_create_file(s, &attr_pmc8);
-  
-	if (cpu_has_feature(CPU_FTR_SMT))
-		sysdev_create_file(s, &attr_purr);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static void unregister_cpu_online(unsigned int cpu)
-{
-	struct cpu *c = &per_cpu(cpu_devices, cpu);
-	struct sys_device *s = &c->sysdev;
-
-	BUG_ON(c->no_control);
-
-#ifndef CONFIG_PPC_ISERIES
-	if (cpu_has_feature(CPU_FTR_SMT))
-		sysdev_remove_file(s, &attr_smt_snooze_delay);
-#endif
-
-	/* PMC stuff */
-
-	sysdev_remove_file(s, &attr_mmcr0);
-	sysdev_remove_file(s, &attr_mmcr1);
-
-	if (cpu_has_feature(CPU_FTR_MMCRA))
-		sysdev_remove_file(s, &attr_mmcra);
-
-	if (cur_cpu_spec->num_pmcs >= 1)
-		sysdev_remove_file(s, &attr_pmc1);
-	if (cur_cpu_spec->num_pmcs >= 2)
-		sysdev_remove_file(s, &attr_pmc2);
-	if (cur_cpu_spec->num_pmcs >= 3)
-		sysdev_remove_file(s, &attr_pmc3);
-	if (cur_cpu_spec->num_pmcs >= 4)
-		sysdev_remove_file(s, &attr_pmc4);
-	if (cur_cpu_spec->num_pmcs >= 5)
-		sysdev_remove_file(s, &attr_pmc5);
-	if (cur_cpu_spec->num_pmcs >= 6)
-		sysdev_remove_file(s, &attr_pmc6);
-	if (cur_cpu_spec->num_pmcs >= 7)
-		sysdev_remove_file(s, &attr_pmc7);
-	if (cur_cpu_spec->num_pmcs >= 8)
-		sysdev_remove_file(s, &attr_pmc8);
-
-	if (cpu_has_feature(CPU_FTR_SMT))
-		sysdev_remove_file(s, &attr_purr);
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
-static int __devinit sysfs_cpu_notify(struct notifier_block *self,
-				      unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned int)(long)hcpu;
-
-	switch (action) {
-	case CPU_ONLINE:
-		register_cpu_online(cpu);
-		break;
-#ifdef CONFIG_HOTPLUG_CPU
-	case CPU_DEAD:
-		unregister_cpu_online(cpu);
-		break;
-#endif
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block __devinitdata sysfs_cpu_nb = {
-	.notifier_call	= sysfs_cpu_notify,
-};
-
-/* NUMA stuff */
-
-#ifdef CONFIG_NUMA
-static struct node node_devices[MAX_NUMNODES];
-
-static void register_nodes(void)
-{
-	int i;
-
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		if (node_online(i)) {
-			int p_node = parent_node(i);
-			struct node *parent = NULL;
-
-			if (p_node != i)
-				parent = &node_devices[p_node];
-
-			register_node(&node_devices[i], i, parent);
-		}
-	}
-}
-#else
-static void register_nodes(void)
-{
-	return;
-}
-#endif
-
-/* Only valid if CPU is present. */
-static ssize_t show_physical_id(struct sys_device *dev, char *buf)
-{
-	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
-
-	return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id));
-}
-static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL);
-
-static int __init topology_init(void)
-{
-	int cpu;
-	struct node *parent = NULL;
-
-	register_nodes();
-
-	register_cpu_notifier(&sysfs_cpu_nb);
-
-	for_each_cpu(cpu) {
-		struct cpu *c = &per_cpu(cpu_devices, cpu);
-
-#ifdef CONFIG_NUMA
-		/* The node to which a cpu belongs can't be known
-		 * until the cpu is made present.
-		 */
-		parent = NULL;
-		if (cpu_present(cpu))
-			parent = &node_devices[cpu_to_node(cpu)];
-#endif
-		/*
-		 * For now, we just see if the system supports making
-		 * the RTAS calls for CPU hotplug.  But, there may be a
-		 * more comprehensive way to do this for an individual
-		 * CPU.  For instance, the boot cpu might never be valid
-		 * for hotplugging.
-		 */
-		if (!ppc_md.cpu_die)
-			c->no_control = 1;
-
-		if (cpu_online(cpu) || (c->no_control == 0)) {
-			register_cpu(c, cpu, parent);
-
-			sysdev_create_file(&c->sysdev, &attr_physical_id);
-		}
-
-		if (cpu_online(cpu))
-			register_cpu_online(cpu);
-	}
-
-	return 0;
-}
-__initcall(topology_init);
diff --git a/arch/ppc64/kernel/udbg.c b/arch/ppc64/kernel/udbg.c
index d49c3613c8e..0d878e72fc4 100644
--- a/arch/ppc64/kernel/udbg.c
+++ b/arch/ppc64/kernel/udbg.c
@@ -10,12 +10,10 @@
  */
 
 #include <stdarg.h>
-#define WANT_PPCDBG_TAB /* Only defined here */
 #include <linux/config.h>
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/console.h>
-#include <asm/ppcdebug.h>
 #include <asm/processor.h>
 
 void (*udbg_putc)(unsigned char c);
@@ -89,59 +87,6 @@ void udbg_printf(const char *fmt, ...)
 	va_end(args);
 }
 
-/* PPCDBG stuff */
-
-u64 ppc64_debug_switch;
-
-/* Special print used by PPCDBG() macro */
-void udbg_ppcdbg(unsigned long debug_flags, const char *fmt, ...)
-{
-	unsigned long active_debugs = debug_flags & ppc64_debug_switch;
-
-	if (active_debugs) {
-		va_list ap;
-		unsigned char buf[UDBG_BUFSIZE];
-		unsigned long i, len = 0;
-
-		for (i=0; i < PPCDBG_NUM_FLAGS; i++) {
-			if (((1U << i) & active_debugs) && 
-			    trace_names[i]) {
-				len += strlen(trace_names[i]); 
-				udbg_puts(trace_names[i]);
-				break;
-			}
-		}
-
-		snprintf(buf, UDBG_BUFSIZE, " [%s]: ", current->comm);
-		len += strlen(buf); 
-		udbg_puts(buf);
-
-		while (len < 18) {
-			udbg_puts(" ");
-			len++;
-		}
-
-		va_start(ap, fmt);
-		vsnprintf(buf, UDBG_BUFSIZE, fmt, ap);
-		udbg_puts(buf);
-		va_end(ap);
-	}
-}
-
-unsigned long udbg_ifdebug(unsigned long flags)
-{
-	return (flags & ppc64_debug_switch);
-}
-
-/*
- * Initialize the PPCDBG state.  Called before relocation has been enabled.
- */
-void __init ppcdbg_initialize(void)
-{
-	ppc64_debug_switch = PPC_DEBUG_DEFAULT; /* | PPCDBG_BUSWALK | */
-	/* PPCDBG_PHBINIT | PPCDBG_MM | PPCDBG_MMINIT | PPCDBG_TCEINIT | PPCDBG_TCE */;
-}
-
 /*
  * Early boot console based on udbg
  */
diff --git a/arch/ppc64/kernel/vdso.c b/arch/ppc64/kernel/vdso.c
index 4aacf521e3e..1bbacac4498 100644
--- a/arch/ppc64/kernel/vdso.c
+++ b/arch/ppc64/kernel/vdso.c
@@ -34,6 +34,7 @@
 #include <asm/machdep.h>
 #include <asm/cputable.h>
 #include <asm/sections.h>
+#include <asm/systemcfg.h>
 #include <asm/vdso.h>
 
 #undef DEBUG
@@ -179,7 +180,7 @@ static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
 	 * Last page is systemcfg.
 	 */
 	if ((vma->vm_end - address) <= PAGE_SIZE)
-		pg = virt_to_page(systemcfg);
+		pg = virt_to_page(_systemcfg);
 	else
 		pg = virt_to_page(vbase + offset);
 
@@ -604,7 +605,7 @@ void __init vdso_init(void)
 		get_page(pg);
 	}
 
-	get_page(virt_to_page(systemcfg));
+	get_page(virt_to_page(_systemcfg));
 }
 
 int in_gate_area_no_task(unsigned long addr)