156 files changed, 9029 insertions, 1148 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a433b7126d3..2cdc35ce804 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -491,7 +491,7 @@ config PPC601_SYNC_FIX
 	  If in doubt, say Y here.
 
 config TAU
-	bool "Thermal Management Support"
+	bool "On-chip CPU temperature sensor support"
 	depends on 6xx
 	help
 	  G3 and G4 processors have an on-chip temperature sensor called the
@@ -500,7 +500,7 @@ config TAU
 	  on-die temperature in /proc/cpuinfo if the cpu supports it.
 
 	  Unfortunately, on some chip revisions, this sensor is very inaccurate
-	  and in some cases, does not work at all, so don't assume the cpu
+	  and in many cases, does not work at all, so don't assume the cpu
 	  temp is actually what /proc/cpuinfo says it is.
 
 config TAU_INT
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 9254806f703..8d48e9e7162 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -110,11 +110,6 @@ config SERIAL_TEXT_DEBUG
 	depends on 4xx || LOPEC || MV64X60 || PPLUS || PRPMC800 || \
 		PPC_GEN550 || PPC_MPC52xx
 
-config PPC_OCP
-	bool
-	depends on IBM_OCP || XILINX_OCP
-	default y
-
 choice
 	prompt "Early debugging (dangerous)"
 	bool
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 829e017b8a5..6ec84d37a33 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -129,13 +129,8 @@ core-y				+= arch/powerpc/kernel/ \
 				   arch/powerpc/lib/ \
 				   arch/powerpc/sysdev/ \
 				   arch/powerpc/platforms/
-core-$(CONFIG_PPC32)		+= arch/ppc/kernel/
-core-$(CONFIG_MATH_EMULATION)	+= arch/ppc/math-emu/
+core-$(CONFIG_MATH_EMULATION)	+= arch/powerpc/math-emu/
 core-$(CONFIG_XMON)		+= arch/powerpc/xmon/
-core-$(CONFIG_APUS)		+= arch/ppc/amiga/
-drivers-$(CONFIG_8xx)		+= arch/ppc/8xx_io/
-drivers-$(CONFIG_4xx)		+= arch/ppc/4xx_io/
-drivers-$(CONFIG_CPM2)		+= arch/ppc/8260_io/
 
 drivers-$(CONFIG_OPROFILE)	+= arch/powerpc/oprofile/
 
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 3c2acab6373..fe22e54ab2b 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-rc6
-# Wed Mar 15 16:19:48 2006
+# Linux kernel version: 2.6.16
+# Thu Mar 23 20:48:09 2006
 #
 CONFIG_PPC64=y
 CONFIG_64BIT=y
@@ -30,6 +30,7 @@ CONFIG_POWER4=y
 CONFIG_PPC_FPU=y
 CONFIG_ALTIVEC=y
 CONFIG_PPC_STD_MMU=y
+CONFIG_VIRT_CPU_ACCOUNTING=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=4
 
@@ -51,7 +52,8 @@ CONFIG_SYSVIPC=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 CONFIG_SYSCTL=y
 # CONFIG_AUDIT is not set
-# CONFIG_IKCONFIG is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
 # CONFIG_CPUSETS is not set
 CONFIG_INITRAMFS_SOURCE=""
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
@@ -85,7 +87,7 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_OBSOLETE_MODPARM=y
 # CONFIG_MODVERSIONS is not set
 # CONFIG_MODULE_SRCVERSION_ALL is not set
-# CONFIG_KMOD is not set
+CONFIG_KMOD=y
 CONFIG_STOP_MACHINE=y
 
 #
@@ -130,7 +132,8 @@ CONFIG_CELL_IIC=y
 #
 # Cell Broadband Engine options
 #
-CONFIG_SPU_FS=y
+CONFIG_SPU_FS=m
+CONFIG_SPUFS_MMAP=y
 
 #
 # Kernel options
@@ -144,7 +147,7 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT is not set
 CONFIG_PREEMPT_BKL=y
 CONFIG_BINFMT_ELF=y
-# CONFIG_BINFMT_MISC is not set
+CONFIG_BINFMT_MISC=m
 CONFIG_FORCE_MAX_ZONEORDER=13
 # CONFIG_IOMMU_VMERGE is not set
 CONFIG_KEXEC=y
@@ -155,13 +158,16 @@ CONFIG_ARCH_SELECT_MEMORY_MODEL=y
 CONFIG_ARCH_FLATMEM_ENABLE=y
 CONFIG_ARCH_SPARSEMEM_ENABLE=y
 CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_FLATMEM_MANUAL=y
+# CONFIG_FLATMEM_MANUAL is not set
 # CONFIG_DISCONTIGMEM_MANUAL is not set
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_SPARSEMEM_MANUAL=y
+CONFIG_SPARSEMEM=y
+CONFIG_HAVE_MEMORY_PRESENT=y
 # CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPARSEMEM_EXTREME=y
+# CONFIG_MEMORY_HOTPLUG is not set
 CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_MIGRATION=y
 # CONFIG_PPC_64K_PAGES is not set
 CONFIG_SCHED_SMT=y
 CONFIG_PROC_DEVICETREE=y
@@ -232,6 +238,7 @@ CONFIG_TCP_CONG_BIC=y
 # CONFIG_IP_VS is not set
 CONFIG_IPV6=y
 # CONFIG_IPV6_PRIVACY is not set
+# CONFIG_IPV6_ROUTER_PREF is not set
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
@@ -244,25 +251,7 @@ CONFIG_NETFILTER=y
 # Core Netfilter Configuration
 #
 # CONFIG_NETFILTER_NETLINK is not set
-CONFIG_NETFILTER_XTABLES=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-# CONFIG_NETFILTER_XT_MATCH_DCCP is not set
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+# CONFIG_NETFILTER_XTABLES is not set
 
 #
 # IP: Netfilter Configuration
@@ -278,51 +267,13 @@ CONFIG_IP_NF_IRC=m
 CONFIG_IP_NF_TFTP=m
 CONFIG_IP_NF_AMANDA=m
 # CONFIG_IP_NF_PPTP is not set
+# CONFIG_IP_NF_H323 is not set
 CONFIG_IP_NF_QUEUE=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_IPRANGE=m
-CONFIG_IP_NF_MATCH_MULTIPORT=m
-CONFIG_IP_NF_MATCH_TOS=m
-CONFIG_IP_NF_MATCH_RECENT=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_DSCP=m
-CONFIG_IP_NF_MATCH_AH_ESP=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_MATCH_OWNER=m
-CONFIG_IP_NF_MATCH_ADDRTYPE=m
-CONFIG_IP_NF_MATCH_HASHLIMIT=m
-CONFIG_IP_NF_MATCH_POLICY=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_IP_NF_TARGET_TCPMSS=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_NAT_NEEDED=y
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_SAME=m
-CONFIG_IP_NF_NAT_SNMP_BASIC=m
-CONFIG_IP_NF_NAT_IRC=m
-CONFIG_IP_NF_NAT_FTP=m
-CONFIG_IP_NF_NAT_TFTP=m
-CONFIG_IP_NF_NAT_AMANDA=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_TOS=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_DSCP=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
 
 #
 # IPv6: Netfilter Configuration (EXPERIMENTAL)
 #
 # CONFIG_IP6_NF_QUEUE is not set
-# CONFIG_IP6_NF_IPTABLES is not set
 
 #
 # DCCP Configuration (EXPERIMENTAL)
@@ -355,7 +306,6 @@ CONFIG_IP_NF_ARP_MANGLE=m
 # QoS and/or fair queueing
 #
 # CONFIG_NET_SCHED is not set
-CONFIG_NET_CLS_ROUTE=y
 
 #
 # Network testing
@@ -408,7 +358,7 @@ CONFIG_FW_LOADER=y
 # CONFIG_BLK_DEV_COW_COMMON is not set
 CONFIG_BLK_DEV_LOOP=y
 # CONFIG_BLK_DEV_CRYPTOLOOP is not set
-CONFIG_BLK_DEV_NBD=y
+# CONFIG_BLK_DEV_NBD is not set
 # CONFIG_BLK_DEV_SX8 is not set
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=16
@@ -484,7 +434,23 @@ CONFIG_IDEDMA_AUTO=y
 #
 # Multi-device support (RAID and LVM)
 #
-# CONFIG_MD is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+# CONFIG_MD_RAID10 is not set
+# CONFIG_MD_RAID5 is not set
+# CONFIG_MD_RAID6 is not set
+# CONFIG_MD_MULTIPATH is not set
+# CONFIG_MD_FAULTY is not set
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+# CONFIG_DM_MULTIPATH_EMC is not set
 
 #
 # Fusion MPT device support
@@ -548,7 +514,7 @@ CONFIG_MII=y
 # CONFIG_ACENIC is not set
 # CONFIG_DL2K is not set
 CONFIG_E1000=m
-# CONFIG_E1000_NAPI is not set
+CONFIG_E1000_NAPI=y
 # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
@@ -560,7 +526,7 @@ CONFIG_SKGE=m
 # CONFIG_SK98LIN is not set
 # CONFIG_TIGON3 is not set
 # CONFIG_BNX2 is not set
-CONFIG_SPIDER_NET=y
+CONFIG_SPIDER_NET=m
 # CONFIG_MV643XX_ETH is not set
 
 #
@@ -678,6 +644,8 @@ CONFIG_SERIAL_CORE_CONSOLE=y
 # CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
+CONFIG_HVC_DRIVER=y
+CONFIG_HVC_RTAS=y
 
 #
 # IPMI
@@ -694,14 +662,13 @@ CONFIG_WATCHDOG=y
 # Watchdog Device Drivers
 #
 # CONFIG_SOFT_WATCHDOG is not set
-# CONFIG_WATCHDOG_RTAS is not set
+CONFIG_WATCHDOG_RTAS=y
 
 #
 # PCI-based Watchdog Cards
 #
 # CONFIG_PCIPCWATCHDOG is not set
 # CONFIG_WDTPCI is not set
-# CONFIG_RTC is not set
 CONFIG_GEN_RTC=y
 # CONFIG_GEN_RTC_X is not set
 # CONFIG_DTLK is not set
@@ -833,6 +800,7 @@ CONFIG_DUMMY_CONSOLE=y
 #
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_ARCH_HAS_EHCI=y
 # CONFIG_USB is not set
 
 #
@@ -852,7 +820,14 @@ CONFIG_USB_ARCH_HAS_OHCI=y
 #
 # InfiniBand support
 #
-# CONFIG_INFINIBAND is not set
+CONFIG_INFINIBAND=y
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_MTHCA_DEBUG=y
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_INFINIBAND_IPOIB_DEBUG=y
+CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y
 
 #
 # EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
@@ -1037,10 +1012,6 @@ CONFIG_CRC32=y
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=m
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_TEXTSEARCH=y
-CONFIG_TEXTSEARCH_KMP=m
-CONFIG_TEXTSEARCH_BM=m
-CONFIG_TEXTSEARCH_FSM=m
 
 #
 # Instrumentation Support
@@ -1058,7 +1029,7 @@ CONFIG_LOG_BUF_SHIFT=15
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_MUTEXES is not set
+CONFIG_DEBUG_MUTEXES=y
 # CONFIG_DEBUG_SPINLOCK is not set
 CONFIG_DEBUG_SPINLOCK_SLEEP=y
 # CONFIG_DEBUG_KOBJECT is not set
diff --git a/arch/powerpc/configs/mpc8540_ads_defconfig b/arch/powerpc/configs/mpc8540_ads_defconfig
index 2a8290ee15c..7f0780f1aa3 100644
--- a/arch/powerpc/configs/mpc8540_ads_defconfig
+++ b/arch/powerpc/configs/mpc8540_ads_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 
-# Sat Jan 14 15:57:54 2006
+# Linux kernel version: 2.6.16
+# Mon Mar 27 23:37:36 2006
 #
 # CONFIG_PPC64 is not set
 CONFIG_PPC32=y
@@ -9,6 +9,7 @@ CONFIG_PPC_MERGE=y
 CONFIG_MMU=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
 CONFIG_PPC=y
 CONFIG_EARLY_PRINTK=y
@@ -18,6 +19,7 @@ CONFIG_ARCH_MAY_HAVE_PC_FDC=y
 CONFIG_PPC_OF=y
 CONFIG_PPC_UDBG_16550=y
 # CONFIG_GENERIC_TBSYNC is not set
+CONFIG_DEFAULT_UIMAGE=y
 
 #
 # Processor support
@@ -42,7 +44,6 @@ CONFIG_SPE=y
 # Code maturity level options
 #
 CONFIG_EXPERIMENTAL=y
-CONFIG_CLEAN_COMPILE=y
 CONFIG_BROKEN_ON_SMP=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
 
@@ -58,6 +59,7 @@ CONFIG_SYSVIPC=y
 CONFIG_SYSCTL=y
 # CONFIG_AUDIT is not set
 # CONFIG_IKCONFIG is not set
+# CONFIG_RELAY is not set
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EMBEDDED=y
@@ -72,10 +74,6 @@ CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
 CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
 CONFIG_SLAB=y
 # CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
@@ -90,6 +88,8 @@ CONFIG_BASE_SMALL=0
 # Block layer
 #
 # CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
 
 #
 # IO Schedulers
@@ -183,6 +183,7 @@ CONFIG_NET=y
 #
 # Networking options
 #
+# CONFIG_NETDEBUG is not set
 CONFIG_PACKET=y
 # CONFIG_PACKET_MMAP is not set
 CONFIG_UNIX=y
@@ -220,6 +221,11 @@ CONFIG_TCP_CONG_BIC=y
 # SCTP Configuration (EXPERIMENTAL)
 #
 # CONFIG_IP_SCTP is not set
+
+#
+# TIPC Configuration (EXPERIMENTAL)
+#
+# CONFIG_TIPC is not set
 # CONFIG_ATM is not set
 # CONFIG_BRIDGE is not set
 # CONFIG_VLAN_8021Q is not set
@@ -229,11 +235,6 @@ CONFIG_TCP_CONG_BIC=y
 # CONFIG_ATALK is not set
 # CONFIG_X25 is not set
 # CONFIG_LAPB is not set
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
-# CONFIG_TIPC is not set
 # CONFIG_NET_DIVERT is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
@@ -487,6 +488,12 @@ CONFIG_GEN_RTC=y
 # CONFIG_I2C is not set
 
 #
+# SPI support
+#
+# CONFIG_SPI is not set
+# CONFIG_SPI_MASTER is not set
+
+#
 # Dallas's 1-wire bus
 #
 # CONFIG_W1 is not set
@@ -496,6 +503,7 @@ CONFIG_GEN_RTC=y
 #
 CONFIG_HWMON=y
 # CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_F71805F is not set
 # CONFIG_HWMON_DEBUG_CHIP is not set
 
 #
@@ -503,10 +511,6 @@ CONFIG_HWMON=y
 #
 
 #
-# Multimedia Capabilities Port drivers
-#
-
-#
 # Multimedia devices
 #
 # CONFIG_VIDEO_DEV is not set
@@ -531,6 +535,7 @@ CONFIG_HWMON=y
 #
 # CONFIG_USB_ARCH_HAS_HCD is not set
 # CONFIG_USB_ARCH_HAS_OHCI is not set
+# CONFIG_USB_ARCH_HAS_EHCI is not set
 
 #
 # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
@@ -551,7 +556,7 @@ CONFIG_HWMON=y
 #
 
 #
-# SN Devices
+# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
 #
 
 #
@@ -603,7 +608,6 @@ CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 # CONFIG_HUGETLB_PAGE is not set
 CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
 # CONFIG_CONFIGFS_FS is not set
 
 #
@@ -658,6 +662,7 @@ CONFIG_PARTITION_ADVANCED=y
 # CONFIG_SGI_PARTITION is not set
 # CONFIG_ULTRIX_PARTITION is not set
 # CONFIG_SUN_PARTITION is not set
+# CONFIG_KARMA_PARTITION is not set
 # CONFIG_EFI_PARTITION is not set
 
 #
@@ -695,6 +700,8 @@ CONFIG_DEBUG_MUTEXES=y
 # CONFIG_DEBUG_INFO is not set
 # CONFIG_DEBUG_FS is not set
 # CONFIG_DEBUG_VM is not set
+# CONFIG_UNWIND_INFO is not set
+CONFIG_FORCED_INLINING=y
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_DEBUGGER is not set
 # CONFIG_BDI_SWITCH is not set
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 80e9fe2632b..0cc0995b81b 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -12,12 +12,12 @@ endif
 
 obj-y				:= semaphore.o cputable.o ptrace.o syscalls.o \
 				   irq.o align.o signal_32.o pmc.o vdso.o \
-				   init_task.o process.o systbl.o
+				   init_task.o process.o systbl.o idle.o
 obj-y				+= vdso32/
 obj-$(CONFIG_PPC64)		+= setup_64.o binfmt_elf32.o sys_ppc32.o \
 				   signal_64.o ptrace32.o \
 				   paca.o cpu_setup_power4.o \
-				   firmware.o sysfs.o idle_64.o
+				   firmware.o sysfs.o
 obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o vector.o
 obj-$(CONFIG_POWER4)		+= idle_power4.o
@@ -34,6 +34,11 @@ obj-$(CONFIG_IBMEBUS)           += ibmebus.o
 obj-$(CONFIG_GENERIC_TBSYNC)	+= smp-tbsync.o
 obj64-$(CONFIG_PPC_MULTIPLATFORM) += nvram_64.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+obj-$(CONFIG_6xx)		+= idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
+obj-$(CONFIG_TAU)		+= tau_6xx.o
+obj32-$(CONFIG_SOFTWARE_SUSPEND) += swsusp_32.o
+obj32-$(CONFIG_MODULES)		+= module_32.o
+obj-$(CONFIG_E500)		+= perfmon_fsl_booke.o
 
 ifeq ($(CONFIG_PPC_MERGE),y)
 
@@ -51,7 +56,6 @@ obj-$(CONFIG_PPC64)		+= misc_64.o dma_64.o iommu.o
 obj-$(CONFIG_PPC_MULTIPLATFORM)	+= prom_init.o
 obj-$(CONFIG_MODULES)		+= ppc_ksyms.o
 obj-$(CONFIG_BOOTX_TEXT)	+= btext.o
-obj-$(CONFIG_6xx)		+= idle_6xx.o
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_PPC_UDBG_16550)	+= legacy_serial.o udbg_16550.o
@@ -77,6 +81,7 @@ smpobj-$(CONFIG_SMP)		+= smp.o
 
 endif
 
+obj-$(CONFIG_PPC32)		+= $(obj32-y)
 obj-$(CONFIG_PPC64)		+= $(obj64-y)
 
 extra-$(CONFIG_PPC_FPU)		+= fpu.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 882889b1592..54b48f33005 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -105,8 +105,6 @@ int main(void)
 	DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
 	DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
 	DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
-	DEFINE(PLATFORM_LPAR, PLATFORM_LPAR);
-
 	/* paca */
 	DEFINE(PACA_SIZE, sizeof(struct paca_struct));
 	DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index));
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
new file mode 100644
index 00000000000..55ed7716636
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -0,0 +1,474 @@
+/*
+ * This file contains low level CPU setup functions.
+ *    Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+_GLOBAL(__setup_cpu_603)
+	b	setup_common_caches
+_GLOBAL(__setup_cpu_604)
+	mflr	r4
+	bl	setup_common_caches
+	bl	setup_604_hid0
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_750)
+	mflr	r4
+	bl	__init_fpu_registers
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_750cx)
+	mflr	r4
+	bl	__init_fpu_registers
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	bl	setup_750cx
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_750fx)
+	mflr	r4
+	bl	__init_fpu_registers
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	bl	setup_750fx
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_7400)
+	mflr	r4
+	bl	__init_fpu_registers
+	bl	setup_7400_workarounds
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_7410)
+	mflr	r4
+	bl	__init_fpu_registers
+	bl	setup_7410_workarounds
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	li	r3,0
+	mtspr	SPRN_L2CR2,r3
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_745x)
+	mflr	r4
+	bl	setup_common_caches
+	bl	setup_745x_specifics
+	mtlr	r4
+	blr
+
+/* Enable caches for 603's, 604, 750 & 7400 */
+setup_common_caches:
+	mfspr	r11,SPRN_HID0
+	andi.	r0,r11,HID0_DCE
+	ori	r11,r11,HID0_ICE|HID0_DCE
+	ori	r8,r11,HID0_ICFI
+	bne	1f			/* don't invalidate the D-cache */
+	ori	r8,r8,HID0_DCI		/* unless it wasn't enabled */
+1:	sync
+	mtspr	SPRN_HID0,r8		/* enable and invalidate caches */
+	sync
+	mtspr	SPRN_HID0,r11		/* enable caches */
+	sync
+	isync
+	blr
+
+/* 604, 604e, 604ev, ...
+ * Enable superscalar execution & branch history table
+ */
+setup_604_hid0:
+	mfspr	r11,SPRN_HID0
+	ori	r11,r11,HID0_SIED|HID0_BHTE
+	ori	r8,r11,HID0_BTCD
+	sync
+	mtspr	SPRN_HID0,r8	/* flush branch target address cache */
+	sync			/* on 604e/604r */
+	mtspr	SPRN_HID0,r11
+	sync
+	isync
+	blr
+
+/* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some
+ * erratas we work around here.
+ * Moto MPC710CE.pdf describes them, those are errata
+ * #3, #4 and #5
+ * Note that we assume the firmware didn't choose to
+ * apply other workarounds (there are other ones documented
+ * in the .pdf). It appear that Apple firmware only works
+ * around #3 and with the same fix we use. We may want to
+ * check if the CPU is using 60x bus mode in which case
+ * the workaround for errata #4 is useless. Also, we may
+ * want to explicitely clear HID0_NOPDST as this is not
+ * needed once we have applied workaround #5 (though it's
+ * not set by Apple's firmware at least).
+ */
+setup_7400_workarounds:
+	mfpvr	r3
+	rlwinm	r3,r3,0,20,31
+	cmpwi	0,r3,0x0207
+	ble	1f
+	blr
+setup_7410_workarounds:
+	mfpvr	r3
+	rlwinm	r3,r3,0,20,31
+	cmpwi	0,r3,0x0100
+	bnelr
+1:
+	mfspr	r11,SPRN_MSSSR0
+	/* Errata #3: Set L1OPQ_SIZE to 0x10 */
+	rlwinm	r11,r11,0,9,6
+	oris	r11,r11,0x0100
+	/* Errata #4: Set L2MQ_SIZE to 1 (check for MPX mode first ?) */
+	oris	r11,r11,0x0002
+	/* Errata #5: Set DRLT_SIZE to 0x01 */
+	rlwinm	r11,r11,0,5,2
+	oris	r11,r11,0x0800
+	sync
+	mtspr	SPRN_MSSSR0,r11
+	sync
+	isync
+	blr
+
+/* 740/750/7400/7410
+ * Enable Store Gathering (SGE), Address Brodcast (ABE),
+ * Branch History Table (BHTE), Branch Target ICache (BTIC)
+ * Dynamic Power Management (DPM), Speculative (SPD)
+ * Clear Instruction cache throttling (ICTC)
+ */
+setup_750_7400_hid0:
+	mfspr	r11,SPRN_HID0
+	ori	r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC
+	oris	r11,r11,HID0_DPM@h
+BEGIN_FTR_SECTION
+	xori	r11,r11,HID0_BTIC
+END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC)
+BEGIN_FTR_SECTION
+	xoris	r11,r11,HID0_DPM@h	/* disable dynamic power mgmt */
+END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM)
+	li	r3,HID0_SPD
+	andc	r11,r11,r3		/* clear SPD: enable speculative */
+ 	li	r3,0
+ 	mtspr	SPRN_ICTC,r3		/* Instruction Cache Throttling off */
+	isync
+	mtspr	SPRN_HID0,r11
+	sync
+	isync
+	blr
+
+/* 750cx specific
+ * Looks like we have to disable NAP feature for some PLL settings...
+ * (waiting for confirmation)
+ */
+setup_750cx:
+	mfspr	r10, SPRN_HID1
+	rlwinm	r10,r10,4,28,31
+	cmpwi	cr0,r10,7
+	cmpwi	cr1,r10,9
+	cmpwi	cr2,r10,11
+	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr2+eq
+	bnelr
+	lwz	r6,CPU_SPEC_FEATURES(r5)
+	li	r7,CPU_FTR_CAN_NAP
+	andc	r6,r6,r7
+	stw	r6,CPU_SPEC_FEATURES(r5)
+	blr
+
+/* 750fx specific
+ */
+setup_750fx:
+	blr
+
+/* MPC 745x
+ * Enable Store Gathering (SGE), Branch Folding (FOLD)
+ * Branch History Table (BHTE), Branch Target ICache (BTIC)
+ * Dynamic Power Management (DPM), Speculative (SPD)
+ * Ensure our data cache instructions really operate.
+ * Timebase has to be running or we wouldn't have made it here,
+ * just ensure we don't disable it.
+ * Clear Instruction cache throttling (ICTC)
+ * Enable L2 HW prefetch
+ */
+setup_745x_specifics:
+	/* We check for the presence of an L3 cache setup by
+	 * the firmware. If any, we disable NAP capability as
+	 * it's known to be bogus on rev 2.1 and earlier
+	 */
+	mfspr	r11,SPRN_L3CR
+	andis.	r11,r11,L3CR_L3E@h
+	beq	1f
+	lwz	r6,CPU_SPEC_FEATURES(r5)
+	andi.	r0,r6,CPU_FTR_L3_DISABLE_NAP
+	beq	1f
+	li	r7,CPU_FTR_CAN_NAP
+	andc	r6,r6,r7
+	stw	r6,CPU_SPEC_FEATURES(r5)
+1:
+	mfspr	r11,SPRN_HID0
+
+	/* All of the bits we have to set.....
+	 */
+	ori	r11,r11,HID0_SGE | HID0_FOLD | HID0_BHTE
+	ori	r11,r11,HID0_LRSTK | HID0_BTIC
+	oris	r11,r11,HID0_DPM@h
+BEGIN_FTR_SECTION
+	xori	r11,r11,HID0_BTIC
+END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC)
+BEGIN_FTR_SECTION
+	xoris	r11,r11,HID0_DPM@h	/* disable dynamic power mgmt */
+END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM)
+
+	/* All of the bits we have to clear....
+	 */
+	li	r3,HID0_SPD | HID0_NOPDST | HID0_NOPTI
+	andc	r11,r11,r3		/* clear SPD: enable speculative */
+ 	li	r3,0
+
+ 	mtspr	SPRN_ICTC,r3		/* Instruction Cache Throttling off */
+	isync
+	mtspr	SPRN_HID0,r11
+	sync
+	isync
+
+	/* Enable L2 HW prefetch, if L2 is enabled
+	 */
+	mfspr	r3,SPRN_L2CR
+	andis.	r3,r3,L2CR_L2E@h
+	beqlr
+	mfspr	r3,SPRN_MSSCR0
+	ori	r3,r3,3
+	sync
+	mtspr	SPRN_MSSCR0,r3
+	sync
+	isync
+	blr
+
+/*
+ * Initialize the FPU registers. This is needed to work around an errata
+ * in some 750 cpus where using a not yet initialized FPU register after
+ * power on reset may hang the CPU
+ */
+_GLOBAL(__init_fpu_registers)
+	mfmsr	r10
+	ori	r11,r10,MSR_FP
+	mtmsr	r11
+	isync
+	addis	r9,r3,empty_zero_page@ha
+	addi	r9,r9,empty_zero_page@l
+	REST_32FPRS(0,r9)
+	sync
+	mtmsr	r10
+	isync
+	blr
+
+
+/* Definitions for the table use to save CPU states */
+#define CS_HID0		0
+#define CS_HID1		4
+#define CS_HID2		8
+#define	CS_MSSCR0	12
+#define CS_MSSSR0	16
+#define CS_ICTRL	20
+#define CS_LDSTCR	24
+#define CS_LDSTDB	28
+#define CS_SIZE		32
+
+	.data
+	.balign	L1_CACHE_BYTES
+cpu_state_storage:
+	.space	CS_SIZE
+	.balign	L1_CACHE_BYTES,0
+	.text
+
+/* Called in normal context to backup CPU 0 state. This
+ * does not include cache settings. This function is also
+ * called for machine sleep. This does not include the MMU
+ * setup, BATs, etc... but rather the "special" registers
+ * like HID0, HID1, MSSCR0, etc...
+ */
+_GLOBAL(__save_cpu_setup)
+	/* Some CR fields are volatile, we back it up all */
+	mfcr	r7
+
+	/* Get storage ptr */
+	lis	r5,cpu_state_storage@h
+	ori	r5,r5,cpu_state_storage@l
+
+	/* Save HID0 (common to all CONFIG_6xx cpus) */
+	mfspr	r3,SPRN_HID0
+	stw	r3,CS_HID0(r5)
+
+	/* Now deal with CPU type dependent registers */
+	mfspr	r3,SPRN_PVR
+	srwi	r3,r3,16
+	cmplwi	cr0,r3,0x8000	/* 7450 */
+	cmplwi	cr1,r3,0x000c	/* 7400 */
+	cmplwi	cr2,r3,0x800c	/* 7410 */
+	cmplwi	cr3,r3,0x8001	/* 7455 */
+	cmplwi	cr4,r3,0x8002	/* 7457 */
+	cmplwi	cr5,r3,0x8003	/* 7447A */
+	cmplwi	cr6,r3,0x7000	/* 750FX */
+	cmplwi	cr7,r3,0x8004	/* 7448 */
+	/* cr1 is 7400 || 7410 */
+	cror	4*cr1+eq,4*cr1+eq,4*cr2+eq
+	/* cr0 is 74xx */
+	cror	4*cr0+eq,4*cr0+eq,4*cr3+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr4+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr5+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr7+eq
+	bne	1f
+	/* Backup 74xx specific regs */
+	mfspr	r4,SPRN_MSSCR0
+	stw	r4,CS_MSSCR0(r5)
+	mfspr	r4,SPRN_MSSSR0
+	stw	r4,CS_MSSSR0(r5)
+	beq	cr1,1f
+	/* Backup 745x specific registers */
+	mfspr	r4,SPRN_HID1
+	stw	r4,CS_HID1(r5)
+	mfspr	r4,SPRN_ICTRL
+	stw	r4,CS_ICTRL(r5)
+	mfspr	r4,SPRN_LDSTCR
+	stw	r4,CS_LDSTCR(r5)
+	mfspr	r4,SPRN_LDSTDB
+	stw	r4,CS_LDSTDB(r5)
+1:
+	bne	cr6,1f
+	/* Backup 750FX specific registers */
+	mfspr	r4,SPRN_HID1
+	stw	r4,CS_HID1(r5)
+	/* If rev 2.x, backup HID2 */
+	mfspr	r3,SPRN_PVR
+	andi.	r3,r3,0xff00
+	cmpwi	cr0,r3,0x0200
+	bne	1f
+	mfspr	r4,SPRN_HID2
+	stw	r4,CS_HID2(r5)
+1:
+	mtcr	r7
+	blr
+
+/* Called with no MMU context (typically MSR:IR/DR off) to
+ * restore CPU state as backed up by the previous
+ * function. This does not include cache setting
+ */
+_GLOBAL(__restore_cpu_setup)
+	/* Some CR fields are volatile, we back it up all */
+	mfcr	r7
+
+	/* Get storage ptr */
+	lis	r5,(cpu_state_storage-KERNELBASE)@h
+	ori	r5,r5,cpu_state_storage@l
+
+	/* Restore HID0 */
+	lwz	r3,CS_HID0(r5)
+	sync
+	isync
+	mtspr	SPRN_HID0,r3
+	sync
+	isync
+
+	/* Now deal with CPU type dependent registers */
+	mfspr	r3,SPRN_PVR
+	srwi	r3,r3,16
+	cmplwi	cr0,r3,0x8000	/* 7450 */
+	cmplwi	cr1,r3,0x000c	/* 7400 */
+	cmplwi	cr2,r3,0x800c	/* 7410 */
+	cmplwi	cr3,r3,0x8001	/* 7455 */
+	cmplwi	cr4,r3,0x8002	/* 7457 */
+	cmplwi	cr5,r3,0x8003	/* 7447A */
+	cmplwi	cr6,r3,0x7000	/* 750FX */
+	cmplwi	cr7,r3,0x8004	/* 7448 */
+	/* cr1 is 7400 || 7410 */
+	cror	4*cr1+eq,4*cr1+eq,4*cr2+eq
+	/* cr0 is 74xx */
+	cror	4*cr0+eq,4*cr0+eq,4*cr3+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr4+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr5+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr7+eq
+	bne	2f
+	/* Restore 74xx specific regs */
+	lwz	r4,CS_MSSCR0(r5)
+	sync
+	mtspr	SPRN_MSSCR0,r4
+	sync
+	isync
+	lwz	r4,CS_MSSSR0(r5)
+	sync
+	mtspr	SPRN_MSSSR0,r4
+	sync
+	isync
+	bne	cr2,1f
+	/* Clear 7410 L2CR2 */
+	li	r4,0
+	mtspr	SPRN_L2CR2,r4
+1:	beq	cr1,2f
+	/* Restore 745x specific registers */
+	lwz	r4,CS_HID1(r5)
+	sync
+	mtspr	SPRN_HID1,r4
+	isync
+	sync
+	lwz	r4,CS_ICTRL(r5)
+	sync
+	mtspr	SPRN_ICTRL,r4
+	isync
+	sync
+	lwz	r4,CS_LDSTCR(r5)
+	sync
+	mtspr	SPRN_LDSTCR,r4
+	isync
+	sync
+	lwz	r4,CS_LDSTDB(r5)
+	sync
+	mtspr	SPRN_LDSTDB,r4
+	isync
+	sync
+2:	bne	cr6,1f
+	/* Restore 750FX specific registers
+	 * that is restore HID2 on rev 2.x and PLL config & switch
+	 * to PLL 0 on all
+	 */
+	/* If rev 2.x, restore HID2 with low voltage bit cleared */
+	mfspr	r3,SPRN_PVR
+	andi.	r3,r3,0xff00
+	cmpwi	cr0,r3,0x0200
+	bne	4f
+	lwz	r4,CS_HID2(r5)
+	rlwinm	r4,r4,0,19,17
+	mtspr	SPRN_HID2,r4
+	sync
+4:
+	lwz	r4,CS_HID1(r5)
+	rlwinm  r5,r4,0,16,14
+	mtspr	SPRN_HID1,r5
+		/* Wait for PLL to stabilize */
+	mftbl	r5
+3:	mftbl	r6
+	sub	r6,r6,r5
+	cmplwi	cr0,r6,10000
+	ble	3b
+	/* Setup final PLL */
+	mtspr	SPRN_HID1,r4
+1:
+	mtcr	r7
+	blr
+
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 4827ca1ec89..b3a97946722 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -135,10 +135,10 @@ transfer_to_handler:
 	mfspr	r11,SPRN_HID0
 	mtcr	r11
 BEGIN_FTR_SECTION
-	bt-	8,power_save_6xx_restore	/* Check DOZE */
+	bt-	8,4f			/* Check DOZE */
 END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
 BEGIN_FTR_SECTION
-	bt-	9,power_save_6xx_restore	/* Check NAP */
+	bt-	9,4f			/* Check NAP */
 END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 #endif /* CONFIG_6xx */
 	.globl transfer_to_handler_cont
@@ -157,6 +157,10 @@ transfer_to_handler_cont:
 	SYNC
 	RFI				/* jump to handler, enable MMU */
 
+#ifdef CONFIG_6xx	
+4:	b	power_save_6xx_restore
+#endif
+
 /*
  * On kernel stack overflow, load up an initial stack pointer
  * and call StackOverflow(regs), which should not return.
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 1060155d84c..19ad5c6b181 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -617,6 +617,12 @@ _GLOBAL(enter_rtas)
 	mfsrr1	r10
 	std	r10,_SRR1(r1)
 
+	/* Temporary workaround to clear CR until RTAS can be modified to
+	 * ignore all bits.
+	 */
+	li	r0,0
+	mtcr	r0
+
 	/* There is no way it is acceptable to get here with interrupts enabled,
 	 * check it with the asm equivalent of WARN_ON
 	 */
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
index 4d37a3cb80f..0bfe9061720 100644
--- a/arch/powerpc/kernel/firmware.c
+++ b/arch/powerpc/kernel/firmware.c
@@ -14,7 +14,9 @@
  */
 
 #include <linux/config.h>
+#include <linux/module.h>
 
 #include <asm/firmware.h>
 
-unsigned long ppc64_firmware_features;
+unsigned long powerpc_firmware_features;
+EXPORT_SYMBOL_GPL(powerpc_firmware_features);
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 35084f3a841..a5ae04a57c7 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -1544,7 +1544,11 @@ _STATIC(__boot_from_prom)
 	mr	r28,r6
 	mr	r27,r7
 
-	/* Align the stack to 16-byte boundary for broken yaboot */
+	/*
+	 * Align the stack to 16-byte boundary
+	 * Depending on the size and layout of the ELF sections in the initial
+	 * boot binary, the stack pointer will be unalignet on PowerMac
+	 */
 	rldicr	r1,r1,0,59
 
 	/* Make sure we are running in 64 bits mode */
@@ -1847,21 +1851,6 @@ _STATIC(start_here_multiplatform)
 	bl	.__save_cpu_setup
 	sync
 
-	/* Setup a valid physical PACA pointer in SPRG3 for early_setup
-	 * note that boot_cpuid can always be 0 nowadays since there is
-	 * nowhere it can be initialized differently before we reach this
-	 * code
-	 */
-	LOAD_REG_IMMEDIATE(r27, boot_cpuid)
-	add	r27,r27,r26
-	lwz	r27,0(r27)
-
-	LOAD_REG_IMMEDIATE(r24, paca) 	/* Get base vaddr of paca array	 */
-	mulli	r13,r27,PACA_SIZE	/* Calculate vaddr of right paca */
-	add	r13,r13,r24		/* for this processor.		 */
-	add	r13,r13,r26		/* convert to physical addr	 */
-	mtspr	SPRN_SPRG3,r13
-	
 	/* Do very early kernel initializations, including initial hash table,
 	 * stab and slb setup before we turn on relocation.	*/
 
@@ -1930,6 +1919,17 @@ _STATIC(start_here_common)
 	/* Not reached */
 	BUG_OPCODE
 
+/* Put the paca pointer into r13 and SPRG3 */
+_GLOBAL(setup_boot_paca)
+	LOAD_REG_IMMEDIATE(r3, boot_cpuid)
+	lwz	r3,0(r3)
+	LOAD_REG_IMMEDIATE(r4, paca) 	/* Get base vaddr of paca array	 */
+	mulli	r3,r3,PACA_SIZE		/* Calculate vaddr of right paca */
+	add	r13,r3,r4		/* for this processor.		 */
+	mtspr	SPRN_SPRG3,r13
+
+	blr
+
 /*
  * We put a few things here that have to be page-aligned.
  * This stuff goes at the beginning of the bss, which is page-aligned.
diff --git a/arch/powerpc/kernel/idle_64.c b/arch/powerpc/kernel/idle.c
index b879d3057ef..e9f321d74d8 100644
--- a/arch/powerpc/kernel/idle_64.c
+++ b/arch/powerpc/kernel/idle.c
@@ -2,13 +2,17 @@
  * Idle daemon for PowerPC.  Idle daemon will handle any action
  * that needs to be taken when the system becomes idle.
  *
- * Originally Written by Cort Dougan (cort@cs.nmt.edu)
+ * Originally written by Cort Dougan (cort@cs.nmt.edu).
+ * Subsequent 32-bit hacking by Tom Rini, Armin Kuster,
+ * Paul Mackerras and others.
  *
  * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com>
  *
  * Additional shared processor, SMT, and firmware support
  *    Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com>
  *
+ * 32-bit and 64-bit versions merged by Paul Mackerras <paulus@samba.org>
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
@@ -29,18 +33,43 @@
 #include <asm/machdep.h>
 #include <asm/smp.h>
 
-extern void power4_idle(void);
+#ifdef CONFIG_HOTPLUG_CPU
+#define cpu_should_die()	(cpu_is_offline(smp_processor_id()) && \
+				 system_state == SYSTEM_RUNNING)
+#else
+#define cpu_should_die()	0
+#endif
 
-void default_idle(void)
+/*
+ * The body of the idle task.
+ */
+void cpu_idle(void)
 {
-	unsigned int cpu = smp_processor_id();
-	set_thread_flag(TIF_POLLING_NRFLAG);
+	if (ppc_md.idle_loop)
+		ppc_md.idle_loop();	/* doesn't return */
 
+	set_thread_flag(TIF_POLLING_NRFLAG);
 	while (1) {
-		if (!need_resched()) {
-			while (!need_resched() && !cpu_is_offline(cpu)) {
-				ppc64_runlatch_off();
+		ppc64_runlatch_off();
 
+		while (!need_resched() && !cpu_should_die()) {
+			if (ppc_md.power_save) {
+				clear_thread_flag(TIF_POLLING_NRFLAG);
+				/*
+				 * smp_mb is so clearing of TIF_POLLING_NRFLAG
+				 * is ordered w.r.t. need_resched() test.
+				 */
+				smp_mb();
+				local_irq_disable();
+
+				/* check again after disabling irqs */
+				if (!need_resched() && !cpu_should_die())
+					ppc_md.power_save();
+
+				local_irq_enable();
+				set_thread_flag(TIF_POLLING_NRFLAG);
+
+			} else {
 				/*
 				 * Go into low thread priority and possibly
 				 * low power mode.
@@ -48,46 +77,18 @@ void default_idle(void)
 				HMT_low();
 				HMT_very_low();
 			}
-
-			HMT_medium();
 		}
 
+		HMT_medium();
 		ppc64_runlatch_on();
+		if (cpu_should_die())
+			cpu_die();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
-		if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
-			cpu_die();
 	}
 }
 
-void native_idle(void)
-{
-	while (1) {
-		ppc64_runlatch_off();
-
-		if (!need_resched())
-			power4_idle();
-
-		if (need_resched()) {
-			ppc64_runlatch_on();
-			preempt_enable_no_resched();
-			schedule();
-			preempt_disable();
-		}
-
-		if (cpu_is_offline(smp_processor_id()) &&
-		    system_state == SYSTEM_RUNNING)
-			cpu_die();
-	}
-}
-
-void cpu_idle(void)
-{
-	BUG_ON(NULL == ppc_md.idle_loop);
-	ppc_md.idle_loop();
-}
-
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 444fdcc769f..12a4efbaa08 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -87,19 +87,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 	cmpwi	0,r3,0
 	beqlr
 
-	/* Clear MSR:EE */
-	mfmsr	r7
-	rlwinm	r0,r7,0,17,15
-	mtmsr	r0
-
-	/* Check current_thread_info()->flags */
-	rlwinm	r4,r1,0,0,18
-	lwz	r4,TI_FLAGS(r4)
-	andi.	r0,r4,_TIF_NEED_RESCHED
-	beq	1f
-	mtmsr	r7	/* out of line this ? */
-	blr
-1:	
 	/* Some pre-nap cleanups needed on some CPUs */
 	andis.	r0,r3,HID0_NAP@h
 	beq	2f
@@ -157,7 +144,8 @@ BEGIN_FTR_SECTION
 	DSSALL
 	sync
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-	ori	r7,r7,MSR_EE /* Could be ommited (already set) */
+	mfmsr	r7
+	ori	r7,r7,MSR_EE
 	oris	r7,r7,MSR_POW@h
 	sync
 	isync
@@ -220,8 +208,6 @@ _GLOBAL(nap_save_msscr0)
 _GLOBAL(nap_save_hid1)
 	.space	4*NR_CPUS
 
-_GLOBAL(powersave_nap)
-	.long	0
 _GLOBAL(powersave_lowspeed)
 	.long	0
 
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index c16b4afab58..6dad1c02496 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -1,11 +1,5 @@
 /*
- *  This file contains the power_save function for 6xx & 7xxx CPUs
- *  rewritten in assembler
- *
- *  Warning ! This code assumes that if your machine has a 750fx
- *  it will have PLL 1 set to low speed mode (used during NAP/DOZE).
- *  if this is not the case some additional changes will have to
- *  be done to check a runtime var (a bit like powersave-nap)
+ *  This file contains the power_save function for 970-family CPUs.
  *
  *  This program is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU General Public License
@@ -26,49 +20,23 @@
 
 	.text
 
-/*
- * Here is the power_save_6xx function. This could eventually be
- * split into several functions & changing the function pointer
- * depending on the various features.
- */
 _GLOBAL(power4_idle)
 BEGIN_FTR_SECTION
 	blr
 END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
-	/* We must dynamically check for the NAP feature as it
-	 * can be cleared by CPU init after the fixups are done
-	 */
-	LOAD_REG_ADDRBASE(r3,cur_cpu_spec)
-	ld	r4,ADDROFF(cur_cpu_spec)(r3)
-	ld	r4,CPU_SPEC_FEATURES(r4)
-	andi.	r0,r4,CPU_FTR_CAN_NAP
-	beqlr
 	/* Now check if user or arch enabled NAP mode */
 	LOAD_REG_ADDRBASE(r3,powersave_nap)
 	lwz	r4,ADDROFF(powersave_nap)(r3)
 	cmpwi	0,r4,0
 	beqlr
 
-	/* Clear MSR:EE */
-	mfmsr	r7
-	li	r4,0
-	ori	r4,r4,MSR_EE
-	andc	r0,r7,r4
-	mtmsrd	r0
-
-	/* Check current_thread_info()->flags */
-	clrrdi	r4,r1,THREAD_SHIFT
-	ld	r4,TI_FLAGS(r4)
-	andi.	r0,r4,_TIF_NEED_RESCHED
-	beq	1f
-	mtmsrd	r7	/* out of line this ? */
-	blr
-1:
 	/* Go to NAP now */
 BEGIN_FTR_SECTION
 	DSSALL
 	sync
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+	mfmsr	r7
+	ori	r7,r7,MSR_EE
 	oris	r7,r7,MSR_POW@h
 	sync
 	isync
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 771a59cbd21..bb5c9501234 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -379,7 +379,7 @@ void irq_ctx_init(void)
 	struct thread_info *tp;
 	int i;
 
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
 		tp = softirq_ctx[i];
 		tp->cpu = i;
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
new file mode 100644
index 00000000000..d7f4e982b53
--- /dev/null
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -0,0 +1,471 @@
+/*
+	L2CR functions
+	Copyright � 1997-1998 by PowerLogix R & D, Inc.
+
+	This program is free software; you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation; either version 2 of the License, or
+	(at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with this program; if not, write to the Free Software
+	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+/*
+	Thur, Dec. 12, 1998.
+	- First public release, contributed by PowerLogix.
+	***********
+	Sat, Aug. 7, 1999.
+	- Terry: Made sure code disabled interrupts before running. (Previously
+			it was assumed interrupts were already disabled).
+	- Terry: Updated for tentative G4 support.  4MB of memory is now flushed
+			instead of 2MB.  (Prob. only 3 is necessary).
+	- Terry: Updated for workaround to HID0[DPM] processor bug
+			during global invalidates.
+	***********
+	Thu, July 13, 2000.
+	- Terry: Added isync to correct for an errata.
+
+	22 August 2001.
+	- DanM: Finally added the 7450 patch I've had for the past
+		several months.  The L2CR is similar, but I'm going
+		to assume the user of this functions knows what they
+		are doing.
+
+	Author:	Terry Greeniaus (tgree@phys.ualberta.ca)
+	Please e-mail updates to this file to me, thanks!
+*/
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/cache.h>
+#include <asm/page.h>
+
+/* Usage:
+
+	When setting the L2CR register, you must do a few special
+	things.  If you are enabling the cache, you must perform a
+	global invalidate.  If you are disabling the cache, you must
+	flush the cache contents first.  This routine takes care of
+	doing these things.  When first enabling the cache, make sure
+	you pass in the L2CR you want, as well as passing in the
+	global invalidate bit set.  A global invalidate will only be
+	performed if the L2I bit is set in applyThis.  When enabling
+	the cache, you should also set the L2E bit in applyThis.  If
+	you want to modify the L2CR contents after the cache has been
+	enabled, the recommended procedure is to first call
+	__setL2CR(0) to disable the cache and then call it again with
+	the new values for L2CR.  Examples:
+
+	_setL2CR(0)		- disables the cache
+	_setL2CR(0xB3A04000)	- enables my G3 upgrade card:
+				- L2E set to turn on the cache
+				- L2SIZ set to 1MB
+				- L2CLK set to 1:1
+				- L2RAM set to pipelined synchronous late-write
+				- L2I set to perform a global invalidation
+				- L2OH set to 0.5 nS
+				- L2DF set because this upgrade card
+				  requires it
+
+	A similar call should work for your card.  You need to know
+	the correct setting for your card and then place them in the
+	fields I have outlined above.  Other fields support optional
+	features, such as L2DO which caches only data, or L2TS which
+	causes cache pushes from the L1 cache to go to the L2 cache
+	instead of to main memory.
+
+IMPORTANT:
+	Starting with the 7450, the bits in this register have moved
+	or behave differently.  The Enable, Parity Enable, Size,
+	and L2 Invalidate are the only bits that have not moved.
+	The size is read-only for these processors with internal L2
+	cache, and the invalidate is a control as well as status.
+		-- Dan
+
+*/
+/*
+ * Summary: this procedure ignores the L2I bit in the value passed in,
+ * flushes the cache if it was already enabled, always invalidates the
+ * cache, then enables the cache if the L2E bit is set in the value
+ * passed in.
+ *   -- paulus.
+ */
+_GLOBAL(_set_L2CR)
+	/* Make sure this is a 750 or 7400 chip */
+BEGIN_FTR_SECTION
+	li	r3,-1
+	blr
+END_FTR_SECTION_IFCLR(CPU_FTR_L2CR)
+
+	mflr	r9
+
+	/* Stop DST streams */
+BEGIN_FTR_SECTION
+	DSSALL
+	sync
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+
+	/* Turn off interrupts and data relocation. */
+	mfmsr	r7		/* Save MSR in r7 */
+	rlwinm	r4,r7,0,17,15
+	rlwinm	r4,r4,0,28,26	/* Turn off DR bit */
+	sync
+	mtmsr	r4
+	isync
+
+	/* Before we perform the global invalidation, we must disable dynamic
+	 * power management via HID0[DPM] to work around a processor bug where
+	 * DPM can possibly interfere with the state machine in the processor
+	 * that invalidates the L2 cache tags.
+	 */
+	mfspr	r8,SPRN_HID0		/* Save HID0 in r8 */
+	rlwinm	r4,r8,0,12,10		/* Turn off HID0[DPM] */
+	sync
+	mtspr	SPRN_HID0,r4		/* Disable DPM */
+	sync
+
+	/* Get the current enable bit of the L2CR into r4 */
+	mfspr	r4,SPRN_L2CR
+
+	/* Tweak some bits */
+	rlwinm	r5,r3,0,0,0		/* r5 contains the new enable bit */
+	rlwinm	r3,r3,0,11,9		/* Turn off the invalidate bit */
+	rlwinm	r3,r3,0,1,31		/* Turn off the enable bit */
+
+	/* Check to see if we need to flush */
+	rlwinm.	r4,r4,0,0,0
+	beq	2f
+
+	/* Flush the cache. First, read the first 4MB of memory (physical) to
+	 * put new data in the cache.  (Actually we only need
+	 * the size of the L2 cache plus the size of the L1 cache, but 4MB will
+	 * cover everything just to be safe).
+	 */
+
+	 /**** Might be a good idea to set L2DO here - to prevent instructions
+	       from getting into the cache.  But since we invalidate
+	       the next time we enable the cache it doesn't really matter.
+	       Don't do this unless you accomodate all processor variations.
+	       The bit moved on the 7450.....
+	  ****/
+
+BEGIN_FTR_SECTION
+	/* Disable L2 prefetch on some 745x and try to ensure
+	 * L2 prefetch engines are idle. As explained by errata
+	 * text, we can't be sure they are, we just hope very hard
+	 * that well be enough (sic !). At least I noticed Apple
+	 * doesn't even bother doing the dcbf's here...
+	 */
+	mfspr	r4,SPRN_MSSCR0
+	rlwinm	r4,r4,0,0,29
+	sync
+	mtspr	SPRN_MSSCR0,r4
+	sync
+	isync
+	lis	r4,KERNELBASE@h
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+
+	/* TODO: use HW flush assist when available */
+
+	lis	r4,0x0002
+	mtctr	r4
+	li	r4,0
+1:
+	lwzx	r0,r0,r4
+	addi	r4,r4,32		/* Go to start of next cache line */
+	bdnz	1b
+	isync
+
+	/* Now, flush the first 4MB of memory */
+	lis	r4,0x0002
+	mtctr	r4
+	li	r4,0
+	sync
+1:
+	dcbf	0,r4
+	addi	r4,r4,32		/* Go to start of next cache line */
+	bdnz	1b
+
+2:
+	/* Set up the L2CR configuration bits (and switch L2 off) */
+	/* CPU errata: Make sure the mtspr below is already in the
+	 * L1 icache
+	 */
+	b	20f
+	.balign	L1_CACHE_BYTES
+22:
+	sync
+	mtspr	SPRN_L2CR,r3
+	sync
+	b	23f
+20:
+	b	21f
+21:	sync
+	isync
+	b	22b
+
+23:
+	/* Perform a global invalidation */
+	oris	r3,r3,0x0020
+	sync
+	mtspr	SPRN_L2CR,r3
+	sync
+	isync				/* For errata */
+
+BEGIN_FTR_SECTION
+	/* On the 7450, we wait for the L2I bit to clear......
+	*/
+10:	mfspr	r3,SPRN_L2CR
+	andis.	r4,r3,0x0020
+	bne	10b
+	b	11f
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+
+	/* Wait for the invalidation to complete */
+3:	mfspr	r3,SPRN_L2CR
+	rlwinm.	r4,r3,0,31,31
+	bne	3b
+
+11:	rlwinm	r3,r3,0,11,9		/* Turn off the L2I bit */
+	sync
+	mtspr	SPRN_L2CR,r3
+	sync
+
+	/* See if we need to enable the cache */
+	cmplwi	r5,0
+	beq	4f
+
+	/* Enable the cache */
+	oris	r3,r3,0x8000
+	mtspr	SPRN_L2CR,r3
+	sync
+	
+	/* Enable L2 HW prefetch on 744x/745x */
+BEGIN_FTR_SECTION
+	mfspr	r3,SPRN_MSSCR0
+	ori	r3,r3,3
+	sync
+	mtspr	SPRN_MSSCR0,r3
+	sync
+	isync
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+4:
+
+	/* Restore HID0[DPM] to whatever it was before */
+	sync
+	mtspr	1008,r8
+	sync
+
+	/* Restore MSR (restores EE and DR bits to original state) */
+	SYNC
+	mtmsr	r7
+	isync
+
+	mtlr	r9
+	blr
+
+_GLOBAL(_get_L2CR)
+	/* Return the L2CR contents */
+	li	r3,0
+BEGIN_FTR_SECTION
+	mfspr	r3,SPRN_L2CR
+END_FTR_SECTION_IFSET(CPU_FTR_L2CR)
+	blr
+
+
+/*
+ * Here is a similar routine for dealing with the L3 cache
+ * on the 745x family of chips
+ */
+
+_GLOBAL(_set_L3CR)
+	/* Make sure this is a 745x chip */
+BEGIN_FTR_SECTION
+	li	r3,-1
+	blr
+END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
+
+	/* Turn off interrupts and data relocation. */
+	mfmsr	r7		/* Save MSR in r7 */
+	rlwinm	r4,r7,0,17,15
+	rlwinm	r4,r4,0,28,26	/* Turn off DR bit */
+	sync
+	mtmsr	r4
+	isync
+
+	/* Stop DST streams */
+	DSSALL
+	sync
+
+	/* Get the current enable bit of the L3CR into r4 */
+	mfspr	r4,SPRN_L3CR
+
+	/* Tweak some bits */
+	rlwinm	r5,r3,0,0,0		/* r5 contains the new enable bit */
+	rlwinm	r3,r3,0,22,20		/* Turn off the invalidate bit */
+	rlwinm	r3,r3,0,2,31		/* Turn off the enable & PE bits */
+	rlwinm	r3,r3,0,5,3		/* Turn off the clken bit */
+	/* Check to see if we need to flush */
+	rlwinm.	r4,r4,0,0,0
+	beq	2f
+
+	/* Flush the cache.
+	 */
+
+	/* TODO: use HW flush assist */
+
+	lis	r4,0x0008
+	mtctr	r4
+	li	r4,0
+1:
+	lwzx	r0,r0,r4
+	dcbf	0,r4
+	addi	r4,r4,32		/* Go to start of next cache line */
+	bdnz	1b
+
+2:
+	/* Set up the L3CR configuration bits (and switch L3 off) */
+	sync
+	mtspr	SPRN_L3CR,r3
+	sync
+
+	oris	r3,r3,L3CR_L3RES@h		/* Set reserved bit 5 */
+	mtspr	SPRN_L3CR,r3
+	sync
+	oris	r3,r3,L3CR_L3CLKEN@h		/* Set clken */
+	mtspr	SPRN_L3CR,r3
+	sync
+
+	/* Wait for stabilize */
+	li	r0,256
+	mtctr	r0
+1:	bdnz	1b
+
+	/* Perform a global invalidation */
+	ori	r3,r3,0x0400
+	sync
+	mtspr	SPRN_L3CR,r3
+	sync
+	isync
+
+	/* We wait for the L3I bit to clear...... */
+10:	mfspr	r3,SPRN_L3CR
+	andi.	r4,r3,0x0400
+	bne	10b
+
+	/* Clear CLKEN */
+	rlwinm	r3,r3,0,5,3		/* Turn off the clken bit */
+	mtspr	SPRN_L3CR,r3
+	sync
+
+	/* Wait for stabilize */
+	li	r0,256
+	mtctr	r0
+1:	bdnz	1b
+
+	/* See if we need to enable the cache */
+	cmplwi	r5,0
+	beq	4f
+
+	/* Enable the cache */
+	oris	r3,r3,(L3CR_L3E | L3CR_L3CLKEN)@h
+	mtspr	SPRN_L3CR,r3
+	sync
+
+	/* Wait for stabilize */
+	li	r0,256
+	mtctr	r0
+1:	bdnz	1b
+
+	/* Restore MSR (restores EE and DR bits to original state) */
+4:	SYNC
+	mtmsr	r7
+	isync
+	blr
+
+_GLOBAL(_get_L3CR)
+	/* Return the L3CR contents */
+	li	r3,0
+BEGIN_FTR_SECTION
+	mfspr	r3,SPRN_L3CR
+END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
+	blr
+
+/* --- End of PowerLogix code ---
+ */
+
+
+/* flush_disable_L1()	- Flush and disable L1 cache
+ *
+ * clobbers r0, r3, ctr, cr0
+ * Must be called with interrupts disabled and MMU enabled.
+ */
+_GLOBAL(__flush_disable_L1)
+	/* Stop pending alitvec streams and memory accesses */
+BEGIN_FTR_SECTION
+	DSSALL
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ 	sync
+
+	/* Load counter to 0x4000 cache lines (512k) and
+	 * load cache with datas
+	 */
+	li	r3,0x4000	/* 512kB / 32B */
+	mtctr	r3
+	lis	r3,KERNELBASE@h
+1:
+	lwz	r0,0(r3)
+	addi	r3,r3,0x0020	/* Go to start of next cache line */
+	bdnz	1b
+	isync
+	sync
+
+	/* Now flush those cache lines */
+	li	r3,0x4000	/* 512kB / 32B */
+	mtctr	r3
+	lis	r3,KERNELBASE@h
+1:
+	dcbf	0,r3
+	addi	r3,r3,0x0020	/* Go to start of next cache line */
+	bdnz	1b
+	sync
+
+	/* We can now disable the L1 cache (HID0:DCE, HID0:ICE) */
+	mfspr	r3,SPRN_HID0
+	rlwinm	r3,r3,0,18,15
+	mtspr	SPRN_HID0,r3
+	sync
+	isync
+ 	blr
+
+/* inval_enable_L1	- Invalidate and enable L1 cache
+ *
+ * Assumes L1 is already disabled and MSR:EE is off
+ *
+ * clobbers r3
+ */
+_GLOBAL(__inval_enable_L1)
+	/* Enable and then Flash inval the instruction & data cache */
+	mfspr	r3,SPRN_HID0
+	ori	r3,r3, HID0_ICE|HID0_ICFI|HID0_DCE|HID0_DCI
+	sync
+	isync
+	mtspr	SPRN_HID0,r3
+	xori	r3,r3, HID0_ICFI|HID0_DCI
+	mtspr	SPRN_HID0,r3
+	sync
+
+ 	blr
+
+
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index c7a799a0951..6e67b5b49ba 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -37,7 +37,7 @@ static int legacy_serial_console = -1;
 static int __init add_legacy_port(struct device_node *np, int want_index,
 				  int iotype, phys_addr_t base,
 				  phys_addr_t taddr, unsigned long irq,
-				  unsigned int flags)
+				  upf_t flags)
 {
 	u32 *clk, *spd, clock = BASE_BAUD * 16;
 	int index;
@@ -113,7 +113,7 @@ static int __init add_legacy_soc_port(struct device_node *np,
 {
 	phys_addr_t addr;
 	u32 *addrp;
-	unsigned int flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ;
+	upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ;
 
 	/* We only support ports that have a clock frequency properly
 	 * encoded in the device-tree.
@@ -236,6 +236,23 @@ static int __init add_legacy_pci_port(struct device_node *np,
 }
 #endif
 
+static void __init setup_legacy_serial_console(int console)
+{
+	struct legacy_serial_info *info =
+		&legacy_serial_infos[console];
+	void __iomem *addr;
+
+	if (info->taddr == 0)
+		return;
+	addr = ioremap(info->taddr, 0x1000);
+	if (addr == NULL)
+		return;
+	if (info->speed == 0)
+		info->speed = udbg_probe_uart_speed(addr, info->clock);
+	DBG("default console speed = %d\n", info->speed);
+	udbg_init_uart(addr, info->speed, info->clock);
+}
+
 /*
  * This is called very early, as part of setup_system() or eventually
  * setup_arch(), basically before anything else in this file. This function
@@ -318,25 +335,8 @@ void __init find_legacy_serial_ports(void)
 #endif
 
 	DBG("legacy_serial_console = %d\n", legacy_serial_console);
-
-	/* udbg is 64 bits only for now, that will change soon though ... */
-	while (legacy_serial_console >= 0) {
-		struct legacy_serial_info *info =
-			&legacy_serial_infos[legacy_serial_console];
-		void __iomem *addr;
-
-		if (info->taddr == 0)
-			break;
-		addr = ioremap(info->taddr, 0x1000);
-		if (addr == NULL)
-			break;
-		if (info->speed == 0)
-			info->speed = udbg_probe_uart_speed(addr, info->clock);
-		DBG("default console speed = %d\n", info->speed);
-		udbg_init_uart(addr, info->speed, info->clock);
-		break;
-	}
-
+	if (legacy_serial_console >= 0)
+		setup_legacy_serial_console(legacy_serial_console);
 	DBG(" <- find_legacy_serial_port()\n");
 }
 
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index e789fef4eb8..1b73508ecb2 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -56,7 +56,7 @@ static unsigned long get_purr(void)
 	unsigned long sum_purr = 0;
 	int cpu;
 
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		sum_purr += lppaca[cpu].emulated_time_base;
 
 #ifdef PURR_DEBUG
@@ -222,7 +222,7 @@ static unsigned long get_purr(void)
 	int cpu;
 	struct cpu_usage *cu;
 
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		cu = &per_cpu(cpu_usage_array, cpu);
 		sum_purr += cu->current_tb;
 	}
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
new file mode 100644
index 00000000000..92f4e5f64f0
--- /dev/null
+++ b/arch/powerpc/kernel/module_32.c
@@ -0,0 +1,320 @@
+/*  Kernel module help for PPC.
+    Copyright (C) 2001 Rusty Russell.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <linux/module.h>
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/cache.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt , ...)
+#endif
+
+LIST_HEAD(module_bug_list);
+
+void *module_alloc(unsigned long size)
+{
+	if (size == 0)
+		return NULL;
+	return vmalloc(size);
+}
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
+}
+
+/* Count how many different relocations (different symbol, different
+   addend) */
+static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
+{
+	unsigned int i, j, ret = 0;
+
+	/* Sure, this is order(n^2), but it's usually short, and not
+           time critical */
+	for (i = 0; i < num; i++) {
+		for (j = 0; j < i; j++) {
+			/* If this addend appeared before, it's
+                           already been counted */
+			if (ELF32_R_SYM(rela[i].r_info)
+			    == ELF32_R_SYM(rela[j].r_info)
+			    && rela[i].r_addend == rela[j].r_addend)
+				break;
+		}
+		if (j == i) ret++;
+	}
+	return ret;
+}
+
+/* Get the potential trampolines size required of the init and
+   non-init sections */
+static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
+				  const Elf32_Shdr *sechdrs,
+				  const char *secstrings,
+				  int is_init)
+{
+	unsigned long ret = 0;
+	unsigned i;
+
+	/* Everything marked ALLOC (this includes the exported
+           symbols) */
+	for (i = 1; i < hdr->e_shnum; i++) {
+		/* If it's called *.init*, and we're not init, we're
+                   not interested */
+		if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
+		    != is_init)
+			continue;
+
+		/* We don't want to look at debug sections. */
+		if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != 0)
+			continue;
+
+		if (sechdrs[i].sh_type == SHT_RELA) {
+			DEBUGP("Found relocations in section %u\n", i);
+			DEBUGP("Ptr: %p.  Number: %u\n",
+			       (void *)hdr + sechdrs[i].sh_offset,
+			       sechdrs[i].sh_size / sizeof(Elf32_Rela));
+			ret += count_relocs((void *)hdr
+					     + sechdrs[i].sh_offset,
+					     sechdrs[i].sh_size
+					     / sizeof(Elf32_Rela))
+				* sizeof(struct ppc_plt_entry);
+		}
+	}
+
+	return ret;
+}
+
+int module_frob_arch_sections(Elf32_Ehdr *hdr,
+			      Elf32_Shdr *sechdrs,
+			      char *secstrings,
+			      struct module *me)
+{
+	unsigned int i;
+
+	/* Find .plt and .init.plt sections */
+	for (i = 0; i < hdr->e_shnum; i++) {
+		if (strcmp(secstrings + sechdrs[i].sh_name, ".init.plt") == 0)
+			me->arch.init_plt_section = i;
+		else if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0)
+			me->arch.core_plt_section = i;
+	}
+	if (!me->arch.core_plt_section || !me->arch.init_plt_section) {
+		printk("Module doesn't contain .plt or .init.plt sections.\n");
+		return -ENOEXEC;
+	}
+
+	/* Override their sizes */
+	sechdrs[me->arch.core_plt_section].sh_size
+		= get_plt_size(hdr, sechdrs, secstrings, 0);
+	sechdrs[me->arch.init_plt_section].sh_size
+		= get_plt_size(hdr, sechdrs, secstrings, 1);
+	return 0;
+}
+
+int apply_relocate(Elf32_Shdr *sechdrs,
+		   const char *strtab,
+		   unsigned int symindex,
+		   unsigned int relsec,
+		   struct module *module)
+{
+	printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n",
+	       module->name);
+	return -ENOEXEC;
+}
+
+static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
+{
+	if (entry->jump[0] == 0x3d600000 + ((val + 0x8000) >> 16)
+	    && entry->jump[1] == 0x396b0000 + (val & 0xffff))
+		return 1;
+	return 0;
+}
+
+/* Set up a trampoline in the PLT to bounce us to the distant function */
+static uint32_t do_plt_call(void *location,
+			    Elf32_Addr val,
+			    Elf32_Shdr *sechdrs,
+			    struct module *mod)
+{
+	struct ppc_plt_entry *entry;
+
+	DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location);
+	/* Init, or core PLT? */
+	if (location >= mod->module_core
+	    && location < mod->module_core + mod->core_size)
+		entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;
+	else
+		entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr;
+
+	/* Find this entry, or if that fails, the next avail. entry */
+	while (entry->jump[0]) {
+		if (entry_matches(entry, val)) return (uint32_t)entry;
+		entry++;
+	}
+
+	/* Stolen from Paul Mackerras as well... */
+	entry->jump[0] = 0x3d600000+((val+0x8000)>>16);	/* lis r11,sym@ha */
+	entry->jump[1] = 0x396b0000 + (val&0xffff);	/* addi r11,r11,sym@l*/
+	entry->jump[2] = 0x7d6903a6;			/* mtctr r11 */
+	entry->jump[3] = 0x4e800420;			/* bctr */
+
+	DEBUGP("Initialized plt for 0x%x at %p\n", val, entry);
+	return (uint32_t)entry;
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *module)
+{
+	unsigned int i;
+	Elf32_Rela *rela = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Sym *sym;
+	uint32_t *location;
+	uint32_t value;
+
+	DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rela[i].r_offset;
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+			+ ELF32_R_SYM(rela[i].r_info);
+		/* `Everything is relative'. */
+		value = sym->st_value + rela[i].r_addend;
+
+		switch (ELF32_R_TYPE(rela[i].r_info)) {
+		case R_PPC_ADDR32:
+			/* Simply set it */
+			*(uint32_t *)location = value;
+			break;
+
+		case R_PPC_ADDR16_LO:
+			/* Low half of the symbol */
+			*(uint16_t *)location = value;
+			break;
+		
+		case R_PPC_ADDR16_HA:
+			/* Sign-adjusted lower 16 bits: PPC ELF ABI says:
+			   (((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF.
+			   This is the same, only sane.
+			 */
+			*(uint16_t *)location = (value + 0x8000) >> 16;
+			break;
+
+		case R_PPC_REL24:
+			if ((int)(value - (uint32_t)location) < -0x02000000
+			    || (int)(value - (uint32_t)location) >= 0x02000000)
+				value = do_plt_call(location, value,
+						    sechdrs, module);
+
+			/* Only replace bits 2 through 26 */
+			DEBUGP("REL24 value = %08X. location = %08X\n",
+			       value, (uint32_t)location);
+			DEBUGP("Location before: %08X.\n",
+			       *(uint32_t *)location);
+			*(uint32_t *)location
+				= (*(uint32_t *)location & ~0x03fffffc)
+				| ((value - (uint32_t)location)
+				   & 0x03fffffc);
+			DEBUGP("Location after: %08X.\n",
+			       *(uint32_t *)location);
+			DEBUGP("ie. jump to %08X+%08X = %08X\n",
+			       *(uint32_t *)location & 0x03fffffc,
+			       (uint32_t)location,
+			       (*(uint32_t *)location & 0x03fffffc)
+			       + (uint32_t)location);
+			break;
+
+		case R_PPC_REL32:
+			/* 32-bit relative jump. */
+			*(uint32_t *)location = value - (uint32_t)location;
+			break;
+
+		default:
+			printk("%s: unknown ADD relocation: %u\n",
+			       module->name,
+			       ELF32_R_TYPE(rela[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	char *secstrings;
+	unsigned int i;
+
+	me->arch.bug_table = NULL;
+	me->arch.num_bugs = 0;
+
+	/* Find the __bug_table section, if present */
+	secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+	for (i = 1; i < hdr->e_shnum; i++) {
+		if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table"))
+			continue;
+		me->arch.bug_table = (void *) sechdrs[i].sh_addr;
+		me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry);
+		break;
+	}
+
+	/*
+	 * Strictly speaking this should have a spinlock to protect against
+	 * traversals, but since we only traverse on BUG()s, a spinlock
+	 * could potentially lead to deadlock and thus be counter-productive.
+	 */
+	list_add(&me->arch.bug_list, &module_bug_list);
+
+	return 0;
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+	list_del(&mod->arch.bug_list);
+}
+
+struct bug_entry *module_find_bug(unsigned long bugaddr)
+{
+	struct mod_arch_specific *mod;
+	unsigned int i;
+	struct bug_entry *bug;
+
+	list_for_each_entry(mod, &module_bug_list, bug_list) {
+		bug = mod->bug_table;
+		for (i = 0; i < mod->num_bugs; ++i, ++bug)
+			if (bugaddr == bug->bug_addr)
+				return bug;
+	}
+	return NULL;
+}
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index fd7db8d542d..ada50aa5b60 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -160,7 +160,7 @@ static int dev_nvram_ioctl(struct inode *inode, struct file *file,
 	case IOC_NVRAM_GET_OFFSET: {
 		int part, offset;
 
-		if (_machine != PLATFORM_POWERMAC)
+		if (!machine_is(powermac))
 			return -EINVAL;
 		if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
 			return -EFAULT;
@@ -174,8 +174,9 @@ static int dev_nvram_ioctl(struct inode *inode, struct file *file,
 		return 0;
 	}
 #endif /* CONFIG_PPC_PMAC */
+	default:
+		return -EINVAL;
 	}
-	return -EINVAL;
 }
 
 struct file_operations nvram_fops = {
@@ -443,7 +444,7 @@ static int nvram_setup_partition(void)
 	 * in our nvram, as Apple defined partitions use pretty much
 	 * all of the space
 	 */
-	if (_machine == PLATFORM_POWERMAC)
+	if (machine_is(powermac))
 		return -ENOSPC;
 
 	/* see if we have an OS partition that meets our needs.
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 5d1b708086b..f505a8827e3 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -56,14 +56,11 @@ struct lppaca lppaca[] = {
  * processors.  The processor VPD array needs one entry per physical
  * processor (not thread).
  */
-#define PACA_INIT_COMMON(number, start, asrr, asrv)			    \
+#define PACA_INIT_COMMON(number)					    \
 	.lppaca_ptr = &lppaca[number],					    \
 	.lock_token = 0x8000,						    \
 	.paca_index = (number),		/* Paca Index */		    \
 	.kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL,		    \
-	.stab_real = (asrr), 		/* Real pointer to segment table */ \
-	.stab_addr = (asrv),		/* Virt pointer to segment table */ \
-	.cpu_start = (start),		/* Processor start */		    \
 	.hw_cpu_id = 0xffff,
 
 #ifdef CONFIG_PPC_ISERIES
@@ -72,30 +69,20 @@ struct lppaca lppaca[] = {
 
 #define PACA_INIT(number)						    \
 {									    \
-	PACA_INIT_COMMON(number, 0, 0, 0)				    \
-	PACA_INIT_ISERIES(number)					    \
-}
-
-#define BOOTCPU_PACA_INIT(number)					    \
-{									    \
-	PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab)		    \
+	PACA_INIT_COMMON(number)					    \
 	PACA_INIT_ISERIES(number)					    \
 }
 
 #else
 #define PACA_INIT(number)						    \
 {									    \
-	PACA_INIT_COMMON(number, 0, 0, 0)				    \
+	PACA_INIT_COMMON(number)					    \
 }
 
-#define BOOTCPU_PACA_INIT(number)					    \
-{									    \
-	PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab)    \
-}
 #endif
 
 struct paca_struct paca[] = {
-	BOOTCPU_PACA_INIT(0),
+	PACA_INIT(0),
 #if NR_CPUS > 1
 	PACA_INIT(  1), PACA_INIT(  2), PACA_INIT(  3),
 #if NR_CPUS > 4
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 704c846b2b0..b129d2e4b75 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -787,7 +787,7 @@ pci_busdev_to_OF_node(struct pci_bus *bus, int devfn)
 	 * fix has to be done by making the remapping per-host and always
 	 * filling the pci_to_OF map. --BenH
 	 */
-	if (_machine == _MACH_Pmac && busnr >= 0xf0)
+	if (machine_is(powermac) && busnr >= 0xf0)
 		busnr -= 0xf0;
 	else
 #endif
@@ -1728,7 +1728,7 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn)
 	 * (bus 0 is HT root), we return the AGP one instead.
 	 */
 #ifdef CONFIG_PPC_PMAC
-	if (_machine == _MACH_Pmac && machine_is_compatible("MacRISC4"))
+	if (machine_is(powermac) && machine_is_compatible("MacRISC4"))
 		if (bus == 0)
 			bus = 0xf0;
 #endif /* CONFIG_PPC_PMAC */
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index ba92bab7cc2..4c4449be81c 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -78,6 +78,7 @@ int global_phb_number;		/* Global phb counter */
 
 /* Cached ISA bridge dev. */
 struct pci_dev *ppc64_isabridge_dev = NULL;
+EXPORT_SYMBOL_GPL(ppc64_isabridge_dev);
 
 static void fixup_broken_pcnet32(struct pci_dev* dev)
 {
diff --git a/arch/powerpc/kernel/perfmon_fsl_booke.c b/arch/powerpc/kernel/perfmon_fsl_booke.c
new file mode 100644
index 00000000000..32455dfcc36
--- /dev/null
+++ b/arch/powerpc/kernel/perfmon_fsl_booke.c
@@ -0,0 +1,222 @@
+/* kernel/perfmon_fsl_booke.c
+ * Freescale Book-E Performance Monitor code
+ *
+ * Author: Andy Fleming
+ * Copyright (c) 2004 Freescale Semiconductor, Inc
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/interrupt.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/prctl.h>
+
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/reg.h>
+#include <asm/xmon.h>
+#include <asm/pmc.h>
+
+static inline u32 get_pmlca(int ctr);
+static inline void set_pmlca(int ctr, u32 pmlca);
+
+static inline u32 get_pmlca(int ctr)
+{
+	u32 pmlca;
+
+	switch (ctr) {
+		case 0:
+			pmlca = mfpmr(PMRN_PMLCA0);
+			break;
+		case 1:
+			pmlca = mfpmr(PMRN_PMLCA1);
+			break;
+		case 2:
+			pmlca = mfpmr(PMRN_PMLCA2);
+			break;
+		case 3:
+			pmlca = mfpmr(PMRN_PMLCA3);
+			break;
+		default:
+			panic("Bad ctr number\n");
+	}
+
+	return pmlca;
+}
+
+static inline void set_pmlca(int ctr, u32 pmlca)
+{
+	switch (ctr) {
+		case 0:
+			mtpmr(PMRN_PMLCA0, pmlca);
+			break;
+		case 1:
+			mtpmr(PMRN_PMLCA1, pmlca);
+			break;
+		case 2:
+			mtpmr(PMRN_PMLCA2, pmlca);
+			break;
+		case 3:
+			mtpmr(PMRN_PMLCA3, pmlca);
+			break;
+		default:
+			panic("Bad ctr number\n");
+	}
+}
+
+void init_pmc_stop(int ctr)
+{
+	u32 pmlca = (PMLCA_FC | PMLCA_FCS | PMLCA_FCU |
+			PMLCA_FCM1 | PMLCA_FCM0);
+	u32 pmlcb = 0;
+
+	switch (ctr) {
+		case 0:
+			mtpmr(PMRN_PMLCA0, pmlca);
+			mtpmr(PMRN_PMLCB0, pmlcb);
+			break;
+		case 1:
+			mtpmr(PMRN_PMLCA1, pmlca);
+			mtpmr(PMRN_PMLCB1, pmlcb);
+			break;
+		case 2:
+			mtpmr(PMRN_PMLCA2, pmlca);
+			mtpmr(PMRN_PMLCB2, pmlcb);
+			break;
+		case 3:
+			mtpmr(PMRN_PMLCA3, pmlca);
+			mtpmr(PMRN_PMLCB3, pmlcb);
+			break;
+		default:
+			panic("Bad ctr number!\n");
+	}
+}
+
+void set_pmc_event(int ctr, int event)
+{
+	u32 pmlca;
+
+	pmlca = get_pmlca(ctr);
+
+	pmlca = (pmlca & ~PMLCA_EVENT_MASK) |
+		((event << PMLCA_EVENT_SHIFT) &
+		 PMLCA_EVENT_MASK);
+
+	set_pmlca(ctr, pmlca);
+}
+
+void set_pmc_user_kernel(int ctr, int user, int kernel)
+{
+	u32 pmlca;
+
+	pmlca = get_pmlca(ctr);
+
+	if(user)
+		pmlca &= ~PMLCA_FCU;
+	else
+		pmlca |= PMLCA_FCU;
+
+	if(kernel)
+		pmlca &= ~PMLCA_FCS;
+	else
+		pmlca |= PMLCA_FCS;
+
+	set_pmlca(ctr, pmlca);
+}
+
+void set_pmc_marked(int ctr, int mark0, int mark1)
+{
+	u32 pmlca = get_pmlca(ctr);
+
+	if(mark0)
+		pmlca &= ~PMLCA_FCM0;
+	else
+		pmlca |= PMLCA_FCM0;
+
+	if(mark1)
+		pmlca &= ~PMLCA_FCM1;
+	else
+		pmlca |= PMLCA_FCM1;
+
+	set_pmlca(ctr, pmlca);
+}
+
+void pmc_start_ctr(int ctr, int enable)
+{
+	u32 pmlca = get_pmlca(ctr);
+
+	pmlca &= ~PMLCA_FC;
+
+	if (enable)
+		pmlca |= PMLCA_CE;
+	else
+		pmlca &= ~PMLCA_CE;
+
+	set_pmlca(ctr, pmlca);
+}
+
+void pmc_start_ctrs(int enable)
+{
+	u32 pmgc0 = mfpmr(PMRN_PMGC0);
+
+	pmgc0 &= ~PMGC0_FAC;
+	pmgc0 |= PMGC0_FCECE;
+
+	if (enable)
+		pmgc0 |= PMGC0_PMIE;
+	else
+		pmgc0 &= ~PMGC0_PMIE;
+
+	mtpmr(PMRN_PMGC0, pmgc0);
+}
+
+void pmc_stop_ctrs(void)
+{
+	u32 pmgc0 = mfpmr(PMRN_PMGC0);
+
+	pmgc0 |= PMGC0_FAC;
+
+	pmgc0 &= ~(PMGC0_PMIE | PMGC0_FCECE);
+
+	mtpmr(PMRN_PMGC0, pmgc0);
+}
+
+void dump_pmcs(void)
+{
+	printk("pmgc0: %x\n", mfpmr(PMRN_PMGC0));
+	printk("pmc\t\tpmlca\t\tpmlcb\n");
+	printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC0),
+			mfpmr(PMRN_PMLCA0), mfpmr(PMRN_PMLCB0));
+	printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC1),
+			mfpmr(PMRN_PMLCA1), mfpmr(PMRN_PMLCB1));
+	printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC2),
+			mfpmr(PMRN_PMLCA2), mfpmr(PMRN_PMLCB2));
+	printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC3),
+			mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3));
+}
+
+EXPORT_SYMBOL(init_pmc_stop);
+EXPORT_SYMBOL(set_pmc_event);
+EXPORT_SYMBOL(set_pmc_user_kernel);
+EXPORT_SYMBOL(set_pmc_marked);
+EXPORT_SYMBOL(pmc_start_ctr);
+EXPORT_SYMBOL(pmc_start_ctrs);
+EXPORT_SYMBOL(pmc_stop_ctrs);
+EXPORT_SYMBOL(dump_pmcs);
diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c
index 7ba42a405f4..3c2cf661f6d 100644
--- a/arch/powerpc/kernel/proc_ppc64.c
+++ b/arch/powerpc/kernel/proc_ppc64.c
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 
+#include <asm/machdep.h>
 #include <asm/vdso_datapage.h>
 #include <asm/rtas.h>
 #include <asm/uaccess.h>
@@ -51,7 +52,7 @@ static int __init proc_ppc64_create(void)
 	if (!root)
 		return 1;
 
-	if (!(platform_is_pseries() || _machine == PLATFORM_CELL))
+	if (!machine_is(pseries) && !machine_is(cell))
 		return 0;
 
 	if (!proc_mkdir("rtas", root))
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index f698aa77127..706090c99f4 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -45,6 +45,7 @@
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <asm/time.h>
+#include <asm/syscalls.h>
 #ifdef CONFIG_PPC64
 #include <asm/firmware.h>
 #endif
@@ -362,7 +363,11 @@ static void show_instructions(struct pt_regs *regs)
 		if (!(i % 8))
 			printk("\n");
 
-		if (BAD_PC(pc) || __get_user(instr, (unsigned int *)pc)) {
+		/* We use __get_user here *only* to avoid an OOPS on a
+		 * bad address because the pc *should* only be a
+		 * kernel address.
+		 */
+		if (BAD_PC(pc) || __get_user(instr, (unsigned int __user *)pc)) {
 			printk("XXXXXXXX ");
 		} else {
 			if (regs->nip == pc)
@@ -765,7 +770,7 @@ out:
 	return error;
 }
 
-static int validate_sp(unsigned long sp, struct task_struct *p,
+int validate_sp(unsigned long sp, struct task_struct *p,
 		       unsigned long nbytes)
 {
 	unsigned long stack_page = (unsigned long)task_stack_page(p);
@@ -803,6 +808,8 @@ static int validate_sp(unsigned long sp, struct task_struct *p,
 #define FRAME_MARKER	2
 #endif
 
+EXPORT_SYMBOL(validate_sp);
+
 unsigned long get_wchan(struct task_struct *p)
 {
 	unsigned long ip, sp;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index d63cd562d9d..4336390bcf3 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -383,14 +383,14 @@ static int __devinit finish_node_interrupts(struct device_node *np,
 			/* Apple uses bits in there in a different way, let's
 			 * only keep the real sense bit on macs
 			 */
-			if (_machine == PLATFORM_POWERMAC)
+			if (machine_is(powermac))
 				sense &= 0x1;
 			np->intrs[intrcount].sense = map_mpic_senses[sense];
 		}
 
 #ifdef CONFIG_PPC64
 		/* We offset irq numbers for the u3 MPIC by 128 in PowerMac */
-		if (_machine == PLATFORM_POWERMAC && ic && ic->parent) {
+		if (machine_is(powermac) && ic && ic->parent) {
 			char *name = get_property(ic->parent, "name", NULL);
 			if (name && !strcmp(name, "u3"))
 				np->intrs[intrcount].line += 128;
@@ -570,6 +570,18 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 	return rc;
 }
 
+unsigned long __init of_get_flat_dt_root(void)
+{
+	unsigned long p = ((unsigned long)initial_boot_params) +
+		initial_boot_params->off_dt_struct;
+
+	while(*((u32 *)p) == OF_DT_NOP)
+		p += 4;
+	BUG_ON (*((u32 *)p) != OF_DT_BEGIN_NODE);
+	p += 4;
+	return _ALIGN(p + strlen((char *)p) + 1, 4);
+}
+
 /**
  * This  function can be used within scan_flattened_dt callback to get
  * access to properties
@@ -612,6 +624,25 @@ void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
 	} while(1);
 }
 
+int __init of_flat_dt_is_compatible(unsigned long node, const char *compat)
+{
+	const char* cp;
+	unsigned long cplen, l;
+
+	cp = of_get_flat_dt_prop(node, "compatible", &cplen);
+	if (cp == NULL)
+		return 0;
+	while (cplen > 0) {
+		if (strncasecmp(cp, compat, strlen(compat)) == 0)
+			return 1;
+		l = strlen(cp) + 1;
+		cp += l;
+		cplen -= l;
+	}
+
+	return 0;
+}
+
 static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size,
 				       unsigned long align)
 {
@@ -686,7 +717,7 @@ static unsigned long __init unflatten_dt_node(unsigned long mem,
 #ifdef DEBUG
 				if ((strlen(p) + l + 1) != allocl) {
 					DBG("%s: p: %d, l: %d, a: %d\n",
-					    pathp, strlen(p), l, allocl);
+					    pathp, (int)strlen(p), l, allocl);
 				}
 #endif
 				p += strlen(p);
@@ -854,35 +885,73 @@ void __init unflatten_device_tree(void)
 	DBG(" <- unflatten_device_tree()\n");
 }
 
-
 static int __init early_init_dt_scan_cpus(unsigned long node,
-					  const char *uname, int depth, void *data)
+					  const char *uname, int depth,
+					  void *data)
 {
+	static int logical_cpuid = 0;
+	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+#ifdef CONFIG_ALTIVEC
 	u32 *prop;
-	unsigned long size;
-	char *type = of_get_flat_dt_prop(node, "device_type", &size);
+#endif
+	u32 *intserv;
+	int i, nthreads;
+	unsigned long len;
+	int found = 0;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
 		return 0;
 
-	boot_cpuid = 0;
-	boot_cpuid_phys = 0;
-	if (initial_boot_params && initial_boot_params->version >= 2) {
-		/* version 2 of the kexec param format adds the phys cpuid
-		 * of booted proc.
-		 */
-		boot_cpuid_phys = initial_boot_params->boot_cpuid_phys;
+	/* Get physical cpuid */
+	intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len);
+	if (intserv) {
+		nthreads = len / sizeof(int);
 	} else {
-		/* Check if it's the boot-cpu, set it's hw index now */
-		if (of_get_flat_dt_prop(node,
+		intserv = of_get_flat_dt_prop(node, "reg", NULL);
+		nthreads = 1;
+	}
+
+	/*
+	 * Now see if any of these threads match our boot cpu.
+	 * NOTE: This must match the parsing done in smp_setup_cpu_maps.
+	 */
+	for (i = 0; i < nthreads; i++) {
+		/*
+		 * version 2 of the kexec param format adds the phys cpuid of
+		 * booted proc.
+		 */
+		if (initial_boot_params && initial_boot_params->version >= 2) {
+			if (intserv[i] ==
+					initial_boot_params->boot_cpuid_phys) {
+				found = 1;
+				break;
+			}
+		} else {
+			/*
+			 * Check if it's the boot-cpu, set it's hw index now,
+			 * unfortunately this format did not support booting
+			 * off secondary threads.
+			 */
+			if (of_get_flat_dt_prop(node,
 					"linux,boot-cpu", NULL) != NULL) {
-			prop = of_get_flat_dt_prop(node, "reg", NULL);
-			if (prop != NULL)
-				boot_cpuid_phys = *prop;
+				found = 1;
+				break;
+			}
 		}
+
+#ifdef CONFIG_SMP
+		/* logical cpu id is always 0 on UP kernels */
+		logical_cpuid++;
+#endif
+	}
+
+	if (found) {
+		DBG("boot cpu: logical %d physical %d\n", logical_cpuid,
+			intserv[i]);
+		boot_cpuid = logical_cpuid;
+		set_hard_smp_processor_id(boot_cpuid, intserv[i]);
 	}
-	set_hard_smp_processor_id(0, boot_cpuid_phys);
 
 #ifdef CONFIG_ALTIVEC
 	/* Check if we have a VMX and eventually update CPU features */
@@ -901,16 +970,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 #endif /* CONFIG_ALTIVEC */
 
 #ifdef CONFIG_PPC_PSERIES
-	/*
-	 * Check for an SMT capable CPU and set the CPU feature. We do
-	 * this by looking at the size of the ibm,ppc-interrupt-server#s
-	 * property
-	 */
-	prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
-				       &size);
-	cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
-	if (prop && ((size / sizeof(u32)) > 1))
+	if (nthreads > 1)
 		cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
+	else
+		cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
 #endif
 
 	return 0;
@@ -919,7 +982,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 static int __init early_init_dt_scan_chosen(unsigned long node,
 					    const char *uname, int depth, void *data)
 {
-	u32 *prop;
 	unsigned long *lprop;
 	unsigned long l;
 	char *p;
@@ -930,14 +992,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 	    (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
 		return 0;
 
-	/* get platform type */
-	prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
-	if (prop == NULL)
-		return 0;
-#ifdef CONFIG_PPC_MULTIPLATFORM
-	_machine = *prop;
-#endif
-
 #ifdef CONFIG_PPC64
 	/* check if iommu is forced on or off */
 	if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
@@ -964,15 +1018,15 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 	 * set of RTAS infos now if available
 	 */
 	{
-		u64 *basep, *entryp;
+		u64 *basep, *entryp, *sizep;
 
 		basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL);
 		entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
-		prop = of_get_flat_dt_prop(node, "linux,rtas-size", NULL);
-		if (basep && entryp && prop) {
+		sizep = of_get_flat_dt_prop(node, "linux,rtas-size", NULL);
+		if (basep && entryp && sizep) {
 			rtas.base = *basep;
 			rtas.entry = *entryp;
-			rtas.size = *prop;
+			rtas.size = *sizep;
 		}
 	}
 #endif /* CONFIG_PPC_RTAS */
@@ -1001,25 +1055,13 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 
 	if (strstr(cmd_line, "mem=")) {
 		char *p, *q;
-		unsigned long maxmem = 0;
 
 		for (q = cmd_line; (p = strstr(q, "mem=")) != 0; ) {
 			q = p + 4;
 			if (p > cmd_line && p[-1] != ' ')
 				continue;
-			maxmem = simple_strtoul(q, &q, 0);
-			if (*q == 'k' || *q == 'K') {
-				maxmem <<= 10;
-				++q;
-			} else if (*q == 'm' || *q == 'M') {
-				maxmem <<= 20;
-				++q;
-			} else if (*q == 'g' || *q == 'G') {
-				maxmem <<= 30;
-				++q;
-			}
+			memory_limit = memparse(q, &q);
 		}
-		memory_limit = maxmem;
 	}
 
 	/* break now */
@@ -1755,7 +1797,7 @@ static int of_finish_dynamic_node(struct device_node *node)
 	/* We don't support that function on PowerMac, at least
 	 * not yet
 	 */
-	if (_machine == PLATFORM_POWERMAC)
+	if (machine_is(powermac))
 		return -ENODEV;
 
 	/* fix up new node's linux_phandle field */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 813c2cd194c..d66c5e77fcf 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -180,6 +180,16 @@ static unsigned long __initdata prom_tce_alloc_start;
 static unsigned long __initdata prom_tce_alloc_end;
 #endif
 
+/* Platforms codes are now obsolete in the kernel. Now only used within this
+ * file and ultimately gone too. Feel free to change them if you need, they
+ * are not shared with anything outside of this file anymore
+ */
+#define PLATFORM_PSERIES	0x0100
+#define PLATFORM_PSERIES_LPAR	0x0101
+#define PLATFORM_LPAR		0x0001
+#define PLATFORM_POWERMAC	0x0400
+#define PLATFORM_GENERIC	0x0500
+
 static int __initdata of_platform;
 
 static char __initdata prom_cmd_line[COMMAND_LINE_SIZE];
@@ -397,6 +407,11 @@ static void __init __attribute__((noreturn)) prom_panic(const char *reason)
 	reason = PTRRELOC(reason);
 #endif
 	prom_print(reason);
+	/* Do not call exit because it clears the screen on pmac
+	 * it also causes some sort of double-fault on early pmacs */
+	if (RELOC(of_platform) == PLATFORM_POWERMAC)
+		asm("trap\n");
+
 	/* ToDo: should put up an SRC here on p/iSeries */
 	call_prom("exit", 0, 0);
 
@@ -1487,7 +1502,10 @@ static int __init prom_find_machine_type(void)
 	int len, i = 0;
 #ifdef CONFIG_PPC64
 	phandle rtas;
+	int x;
 #endif
+
+	/* Look for a PowerMac */
 	len = prom_getprop(_prom->root, "compatible",
 			   compat, sizeof(compat)-1);
 	if (len > 0) {
@@ -1500,28 +1518,36 @@ static int __init prom_find_machine_type(void)
 			if (strstr(p, RELOC("Power Macintosh")) ||
 			    strstr(p, RELOC("MacRISC")))
 				return PLATFORM_POWERMAC;
-#ifdef CONFIG_PPC64
-			if (strstr(p, RELOC("Momentum,Maple")))
-				return PLATFORM_MAPLE;
-			if (strstr(p, RELOC("IBM,CPB")))
-				return PLATFORM_CELL;
-#endif
 			i += sl + 1;
 		}
 	}
 #ifdef CONFIG_PPC64
+	/* If not a mac, try to figure out if it's an IBM pSeries or any other
+	 * PAPR compliant platform. We assume it is if :
+	 *  - /device_type is "chrp" (please, do NOT use that for future
+	 *    non-IBM designs !
+	 *  - it has /rtas
+	 */
+	len = prom_getprop(_prom->root, "model",
+			   compat, sizeof(compat)-1);
+	if (len <= 0)
+		return PLATFORM_GENERIC;
+	compat[len] = 0;
+	if (strcmp(compat, "chrp"))
+		return PLATFORM_GENERIC;
+
 	/* Default to pSeries. We need to know if we are running LPAR */
 	rtas = call_prom("finddevice", 1, 1, ADDR("/rtas"));
-	if (PHANDLE_VALID(rtas)) {
-		int x = prom_getproplen(rtas, "ibm,hypertas-functions");
-		if (x != PROM_ERROR) {
-			prom_printf("Hypertas detected, assuming LPAR !\n");
-			return PLATFORM_PSERIES_LPAR;
-		}
+	if (!PHANDLE_VALID(rtas))
+		return PLATFORM_GENERIC;
+	x = prom_getproplen(rtas, "ibm,hypertas-functions");
+	if (x != PROM_ERROR) {
+		prom_printf("Hypertas detected, assuming LPAR !\n");
+		return PLATFORM_PSERIES_LPAR;
 	}
 	return PLATFORM_PSERIES;
 #else
-	return PLATFORM_CHRP;
+	return PLATFORM_GENERIC;
 #endif
 }
 
@@ -2029,7 +2055,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 {	
        	struct prom_t *_prom;
 	unsigned long hdr;
-	u32 getprop_rval;
 	unsigned long offset = reloc_offset();
 
 #ifdef CONFIG_PPC32
@@ -2060,6 +2085,12 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	 */
 	prom_init_stdout();
 
+	/*
+	 * Get default machine type. At this point, we do not differentiate
+	 * between pSeries SMP and pSeries LPAR
+	 */
+	RELOC(of_platform) = prom_find_machine_type();
+
 	/* Bail if this is a kdump kernel. */
 	if (PHYSICAL_START > 0)
 		prom_panic("Error: You can't boot a kdump kernel from OF!\n");
@@ -2069,15 +2100,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	 */
 	prom_check_initrd(r3, r4);
 
-	/*
-	 * Get default machine type. At this point, we do not differentiate
-	 * between pSeries SMP and pSeries LPAR
-	 */
-	RELOC(of_platform) = prom_find_machine_type();
-	getprop_rval = RELOC(of_platform);
-	prom_setprop(_prom->chosen, "/chosen", "linux,platform",
-		     &getprop_rval, sizeof(getprop_rval));
-
 #ifdef CONFIG_PPC_PSERIES
 	/*
 	 * On pSeries, inform the firmware about our capabilities
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 1f03fb28cc0..456286cf1d1 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -257,7 +257,7 @@ static int __init proc_rtas_init(void)
 {
 	struct proc_dir_entry *entry;
 
-	if (_machine != PLATFORM_PSERIES && _machine != PLATFORM_PSERIES_LPAR)
+	if (!machine_is(pseries))
 		return 1;
 
 	rtas_node = of_find_node_by_name(NULL, "rtas");
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index b5b2add7ad1..06636c927a7 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -25,6 +25,7 @@
 #include <asm/hvcall.h>
 #include <asm/semaphore.h>
 #include <asm/machdep.h>
+#include <asm/firmware.h>
 #include <asm/page.h>
 #include <asm/param.h>
 #include <asm/system.h>
@@ -32,6 +33,7 @@
 #include <asm/uaccess.h>
 #include <asm/lmb.h>
 #include <asm/udbg.h>
+#include <asm/syscalls.h>
 
 struct rtas_t rtas = {
 	.lock = SPIN_LOCK_UNLOCKED
@@ -591,7 +593,7 @@ static void rtas_percpu_suspend_me(void *info)
 		data->waiting = 0;
 		data->args->args[data->args->nargs] =
 			rtas_call(ibm_suspend_me_token, 0, 1, NULL);
-		for_each_cpu(i)
+		for_each_possible_cpu(i)
 			plpar_hcall_norets(H_PROD,i);
 	} else {
 		data->waiting = -EBUSY;
@@ -624,7 +626,7 @@ static int rtas_ibm_suspend_me(struct rtas_args *args)
 	/* Prod each CPU.  This won't hurt, and will wake
 	 * anyone we successfully put to sleep with H_Join
 	 */
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		plpar_hcall_norets(H_PROD, i);
 
 	return data.waiting;
@@ -767,7 +769,7 @@ void __init rtas_initialize(void)
 	 * the stop-self token if any
 	 */
 #ifdef CONFIG_PPC64
-	if (_machine == PLATFORM_PSERIES_LPAR) {
+	if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) {
 		rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX);
 		ibm_suspend_me_token = rtas_token("ibm,suspend-me");
 	}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index c1d62bf11f2..c607f3b9ca1 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -9,6 +9,9 @@
  *      as published by the Free Software Foundation; either version
  *      2 of the License, or (at your option) any later version.
  */
+
+#undef DEBUG
+
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/string.h>
@@ -41,6 +44,7 @@
 #include <asm/time.h>
 #include <asm/cputable.h>
 #include <asm/sections.h>
+#include <asm/firmware.h>
 #include <asm/btext.h>
 #include <asm/nvram.h>
 #include <asm/setup.h>
@@ -56,8 +60,6 @@
 
 #include "setup.h"
 
-#undef DEBUG
-
 #ifdef DEBUG
 #include <asm/udbg.h>
 #define DBG(fmt...) udbg_printf(fmt)
@@ -65,10 +67,12 @@
 #define DBG(fmt...)
 #endif
 
-#ifdef CONFIG_PPC_MULTIPLATFORM
-int _machine = 0;
-EXPORT_SYMBOL(_machine);
-#endif
+/* The main machine-dep calls structure
+ */
+struct machdep_calls ppc_md;
+EXPORT_SYMBOL(ppc_md);
+struct machdep_calls *machine_id;
+EXPORT_SYMBOL(machine_id);
 
 unsigned long klimit = (unsigned long) _end;
 
@@ -168,7 +172,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 			   bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
 #endif /* CONFIG_SMP && CONFIG_PPC32 */
 		seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq);
-
+		if (ppc_md.name)
+			seq_printf(m, "platform\t: %s\n", ppc_md.name);
 		if (ppc_md.show_cpuinfo != NULL)
 			ppc_md.show_cpuinfo(m);
 
@@ -352,12 +357,13 @@ void __init check_for_initrd(void)
  * must be called before using this.
  *
  * While we're here, we may as well set the "physical" cpu ids in the paca.
+ *
+ * NOTE: This must match the parsing done in early_init_dt_scan_cpus.
  */
 void __init smp_setup_cpu_maps(void)
 {
 	struct device_node *dn = NULL;
 	int cpu = 0;
-	int swap_cpuid = 0;
 
 	while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) {
 		int *intserv;
@@ -376,30 +382,17 @@ void __init smp_setup_cpu_maps(void)
 		for (j = 0; j < nthreads && cpu < NR_CPUS; j++) {
 			cpu_set(cpu, cpu_present_map);
 			set_hard_smp_processor_id(cpu, intserv[j]);
-
-			if (intserv[j] == boot_cpuid_phys)
-				swap_cpuid = cpu;
 			cpu_set(cpu, cpu_possible_map);
 			cpu++;
 		}
 	}
 
-	/* Swap CPU id 0 with boot_cpuid_phys, so we can always assume that
-	 * boot cpu is logical 0.
-	 */
-	if (boot_cpuid_phys != get_hard_smp_processor_id(0)) {
-		u32 tmp;
-		tmp = get_hard_smp_processor_id(0);
-		set_hard_smp_processor_id(0, boot_cpuid_phys);
-		set_hard_smp_processor_id(swap_cpuid, tmp);
-	}
-
 #ifdef CONFIG_PPC64
 	/*
 	 * On pSeries LPAR, we need to know how many cpus
 	 * could possibly be added to this partition.
 	 */
-	if (_machine == PLATFORM_PSERIES_LPAR &&
+	if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) &&
 	    (dn = of_find_node_by_path("/rtas"))) {
 		int num_addr_cell, num_size_cell, maxcpus;
 		unsigned int *ireg;
@@ -438,7 +431,7 @@ void __init smp_setup_cpu_maps(void)
 	/*
 	 * Do the sibling map; assume only two threads per processor.
 	 */
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		cpu_set(cpu, cpu_sibling_map[cpu]);
 		if (cpu_has_feature(CPU_FTR_SMT))
 			cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]);
@@ -468,3 +461,34 @@ static int __init early_xmon(char *p)
 }
 early_param("xmon", early_xmon);
 #endif
+
+void probe_machine(void)
+{
+	extern struct machdep_calls __machine_desc_start;
+	extern struct machdep_calls __machine_desc_end;
+
+	/*
+	 * Iterate all ppc_md structures until we find the proper
+	 * one for the current machine type
+	 */
+	DBG("Probing machine type ...\n");
+
+	for (machine_id = &__machine_desc_start;
+	     machine_id < &__machine_desc_end;
+	     machine_id++) {
+		DBG("  %s ...", machine_id->name);
+		memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls));
+		if (ppc_md.probe()) {
+			DBG(" match !\n");
+			break;
+		}
+		DBG("\n");
+	}
+	/* What can we do if we didn't find ? */
+	if (machine_id >= &__machine_desc_end) {
+		DBG("No suitable machine found !\n");
+		for (;;);
+	}
+
+	printk(KERN_INFO "Using %s machine description\n", ppc_md.name);
+}
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index dc2770df25b..a72bf5dceee 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -53,9 +53,6 @@
 extern void platform_init(void);
 extern void bootx_init(unsigned long r4, unsigned long phys);
 
-extern void ppc6xx_idle(void);
-extern void power4_idle(void);
-
 boot_infos_t *boot_infos;
 struct ide_machdep_calls ppc_ide_md;
 
@@ -70,10 +67,6 @@ unsigned int DMA_MODE_WRITE;
 int have_of = 1;
 
 #ifdef CONFIG_PPC_MULTIPLATFORM
-extern void prep_init(void);
-extern void pmac_init(void);
-extern void chrp_init(void);
-
 dev_t boot_dev;
 #endif /* CONFIG_PPC_MULTIPLATFORM */
 
@@ -85,9 +78,6 @@ unsigned long SYSRQ_KEY = 0x54;
 unsigned long vgacon_remap_base;
 #endif
 
-struct machdep_calls ppc_md;
-EXPORT_SYMBOL(ppc_md);
-
 /*
  * These are used in binfmt_elf.c to put aux entries on the stack
  * for each elf executable being started.
@@ -111,7 +101,7 @@ unsigned long __init early_init(unsigned long dt_ptr)
 
 	/* First zero the BSS -- use memset_io, some platforms don't have
 	 * caches on yet */
-	memset_io(PTRRELOC(&__bss_start), 0, _end - __bss_start);
+	memset_io((void __iomem *)PTRRELOC(&__bss_start), 0, _end - __bss_start);
 
 	/*
 	 * Identify the CPU type and fix up code sections
@@ -123,48 +113,6 @@ unsigned long __init early_init(unsigned long dt_ptr)
 	return KERNELBASE + offset;
 }
 
-#ifdef CONFIG_PPC_MULTIPLATFORM
-/*
- * The PPC_MULTIPLATFORM version of platform_init...
- */
-void __init platform_init(void)
-{
-	/* if we didn't get any bootinfo telling us what we are... */
-	if (_machine == 0) {
-		/* prep boot loader tells us if we're prep or not */
-		if ( *(unsigned long *)(KERNELBASE) == (0xdeadc0de) )
-			_machine = _MACH_prep;
-	}
-
-#ifdef CONFIG_PPC_PREP
-	/* not much more to do here, if prep */
-	if (_machine == _MACH_prep) {
-		prep_init();
-		return;
-	}
-#endif
-
-#ifdef CONFIG_ADB
-	if (strstr(cmd_line, "adb_sync")) {
-		extern int __adb_probe_sync;
-		__adb_probe_sync = 1;
-	}
-#endif /* CONFIG_ADB */
-
-	switch (_machine) {
-#ifdef CONFIG_PPC_PMAC
-	case _MACH_Pmac:
-		pmac_init();
-		break;
-#endif
-#ifdef CONFIG_PPC_CHRP
-	case _MACH_chrp:
-		chrp_init();
-		break;
-#endif
-	}
-}
-#endif
 
 /*
  * Find out what kind of machine we're on and save any data we need
@@ -190,11 +138,17 @@ void __init machine_init(unsigned long dt_ptr, unsigned long phys)
 		strlcpy(cmd_line, CONFIG_CMDLINE, sizeof(cmd_line));
 #endif /* CONFIG_CMDLINE */
 
-	/* Base init based on machine type */
+#ifdef CONFIG_PPC_MULTIPLATFORM
+	probe_machine();
+#else
+	/* Base init based on machine type. Obsoloete, please kill ! */
 	platform_init();
+#endif
 
 #ifdef CONFIG_6xx
-	ppc_md.power_save = ppc6xx_idle;
+	if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
+	    cpu_has_feature(CPU_FTR_CAN_NAP))
+		ppc_md.power_save = ppc6xx_idle;
 #endif
 
 	if (ppc_md.progress)
@@ -272,7 +226,7 @@ int __init ppc_init(void)
 	if ( ppc_md.progress ) ppc_md.progress("             ", 0xffff);
 
 	/* register CPU devices */
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		register_cpu(&cpu_devices[i], i, NULL);
 
 	/* call platform init */
@@ -352,12 +306,6 @@ void __init setup_arch(char **cmdline_p)
 	do_init_bootmem();
 	if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab);
 
-#ifdef CONFIG_PPC_OCP
-	/* Initialize OCP device list */
-	ocp_early_init();
-	if ( ppc_md.progress ) ppc_md.progress("ocp: exit", 0x3eab);
-#endif
-
 #ifdef CONFIG_DUMMY_CONSOLE
 	conswitchp = &dummy_con;
 #endif
@@ -366,7 +314,4 @@ void __init setup_arch(char **cmdline_p)
 	if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
 
 	paging_init();
-
-	/* this is for modules since _machine can be a define -- Cort */
-	ppc_md.ppc_machine = _machine;
 }
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e20c1fae342..59aa92cd6fa 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -73,7 +73,6 @@
 
 int have_of = 1;
 int boot_cpuid = 0;
-int boot_cpuid_phys = 0;
 dev_t boot_dev;
 u64 ppc64_pft_size;
 
@@ -96,11 +95,6 @@ int dcache_bsize;
 int icache_bsize;
 int ucache_bsize;
 
-/* The main machine-dep calls structure
- */
-struct machdep_calls ppc_md;
-EXPORT_SYMBOL(ppc_md);
-
 #ifdef CONFIG_MAGIC_SYSRQ
 unsigned long SYSRQ_KEY;
 #endif /* CONFIG_MAGIC_SYSRQ */
@@ -161,32 +155,6 @@ early_param("smt-enabled", early_smt_enabled);
 #define check_smt_enabled()
 #endif /* CONFIG_SMP */
 
-extern struct machdep_calls pSeries_md;
-extern struct machdep_calls pmac_md;
-extern struct machdep_calls maple_md;
-extern struct machdep_calls cell_md;
-extern struct machdep_calls iseries_md;
-
-/* Ultimately, stuff them in an elf section like initcalls... */
-static struct machdep_calls __initdata *machines[] = {
-#ifdef CONFIG_PPC_PSERIES
-	&pSeries_md,
-#endif /* CONFIG_PPC_PSERIES */
-#ifdef CONFIG_PPC_PMAC
-	&pmac_md,
-#endif /* CONFIG_PPC_PMAC */
-#ifdef CONFIG_PPC_MAPLE
-	&maple_md,
-#endif /* CONFIG_PPC_MAPLE */
-#ifdef CONFIG_PPC_CELL
-	&cell_md,
-#endif
-#ifdef CONFIG_PPC_ISERIES
-	&iseries_md,
-#endif
-	NULL
-};
-
 /*
  * Early initialization entry point. This is called by head.S
  * with MMU translation disabled. We rely on the "feature" of
@@ -208,13 +176,10 @@ static struct machdep_calls __initdata *machines[] = {
 
 void __init early_setup(unsigned long dt_ptr)
 {
-	struct paca_struct *lpaca = get_paca();
-	static struct machdep_calls **mach;
-
 	/* Enable early debugging if any specified (see udbg.h) */
 	udbg_early_init();
 
-	DBG(" -> early_setup()\n");
+ 	DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
 
 	/*
 	 * Do early initializations using the flattened device
@@ -223,22 +188,16 @@ void __init early_setup(unsigned long dt_ptr)
 	 */
 	early_init_devtree(__va(dt_ptr));
 
-	/*
-	 * Iterate all ppc_md structures until we find the proper
-	 * one for the current machine type
-	 */
-	DBG("Probing machine type for platform %x...\n", _machine);
+	/* Now we know the logical id of our boot cpu, setup the paca. */
+	setup_boot_paca();
 
-	for (mach = machines; *mach; mach++) {
-		if ((*mach)->probe(_machine))
-			break;
-	}
-	/* What can we do if we didn't find ? */
-	if (*mach == NULL) {
-		DBG("No suitable machine found !\n");
-		for (;;);
-	}
-	ppc_md = **mach;
+	/* Fix up paca fields required for the boot cpu */
+	get_paca()->cpu_start = 1;
+	get_paca()->stab_real = __pa((u64)&initial_stab);
+	get_paca()->stab_addr = (u64)&initial_stab;
+
+	/* Probe the machine type */
+	probe_machine();
 
 #ifdef CONFIG_CRASH_DUMP
 	kdump_setup();
@@ -260,7 +219,7 @@ void __init early_setup(unsigned long dt_ptr)
 		if (cpu_has_feature(CPU_FTR_SLB))
 			slb_initialize();
 		else
-			stab_initialize(lpaca->stab_real);
+			stab_initialize(get_paca()->stab_real);
 	}
 
 	DBG(" <- early_setup()\n");
@@ -340,7 +299,7 @@ static void __init initialize_cache_info(void)
 			const char *dc, *ic;
 
 			/* Then read cache informations */
-			if (_machine == PLATFORM_POWERMAC) {
+			if (machine_is(powermac)) {
 				dc = "d-cache-block-size";
 				ic = "i-cache-block-size";
 			} else {
@@ -484,7 +443,6 @@ void __init setup_system(void)
 	printk("ppc64_pft_size                = 0x%lx\n", ppc64_pft_size);
 	printk("ppc64_interrupt_controller    = 0x%ld\n",
 	       ppc64_interrupt_controller);
-	printk("platform                      = 0x%x\n", _machine);
 	printk("physicalMemorySize            = 0x%lx\n", lmb_phys_mem_size());
 	printk("ppc64_caches.dcache_line_size = 0x%x\n",
 	       ppc64_caches.dline_size);
@@ -516,7 +474,7 @@ static void __init irqstack_early_init(void)
 	 * interrupt stacks must be under 256MB, we cannot afford to take
 	 * SLB misses on them.
 	 */
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		softirq_ctx[i] = (struct thread_info *)
 			__va(lmb_alloc_base(THREAD_SIZE,
 					    THREAD_SIZE, 0x10000000));
@@ -549,7 +507,7 @@ static void __init emergency_stack_init(void)
 	 */
 	limit = min(0x10000000UL, lmb.rmo_size);
 
-	for_each_cpu(i)
+	for_each_possible_cpu(i)
 		paca[i].emergency_sp =
 		__va(lmb_alloc_base(HW_PAGE_SIZE, 128, limit)) + HW_PAGE_SIZE;
 }
@@ -602,12 +560,6 @@ void __init setup_arch(char **cmdline_p)
 
 	ppc_md.setup_arch();
 
-	/* Use the default idle loop if the platform hasn't provided one. */
-	if (NULL == ppc_md.idle_loop) {
-		ppc_md.idle_loop = default_idle;
-		printk(KERN_INFO "Using default idle loop\n");
-	}
-
 	paging_init();
 	ppc64_boot_msg(0x15, "Setup Done");
 }
@@ -672,7 +624,7 @@ void __init setup_per_cpu_areas(void)
 		size = PERCPU_ENOUGH_ROOM;
 #endif
 
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size);
 		if (!ptr)
 			panic("Cannot allocate cpu data for CPU %d\n", i);
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index d7a4e814974..01e3c08cb55 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -42,6 +42,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
+#include <asm/syscalls.h>
 #include <asm/sigcontext.h>
 #include <asm/vdso.h>
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 47f910380a6..27f65b95184 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -33,6 +33,7 @@
 #include <asm/pgtable.h>
 #include <asm/unistd.h>
 #include <asm/cacheflush.h>
+#include <asm/syscalls.h>
 #include <asm/vdso.h>
 
 #define DEBUG_SIG 0
@@ -211,7 +212,7 @@ static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs
         /* Default to using normal stack */
         newsp = regs->gpr[1];
 
-	if (ka->sa.sa_flags & SA_ONSTACK) {
+	if ((ka->sa.sa_flags & SA_ONSTACK) && current->sas_ss_size) {
 		if (! on_sig_stack(regs->gpr[1]))
 			newsp = (current->sas_ss_sp + current->sas_ss_size);
 	}
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 805eaedbc30..530f7dba0bd 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -362,7 +362,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
  
 	smp_space_timers(max_cpus);
 
-	for_each_cpu(cpu)
+	for_each_possible_cpu(cpu)
 		if (cpu != boot_cpuid)
 			smp_create_idle(cpu);
 }
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
new file mode 100644
index 00000000000..69773cc1a85
--- /dev/null
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -0,0 +1,349 @@
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+
+/*
+ * Structure for storing CPU registers on the save area.
+ */
+#define SL_SP		0
+#define SL_PC		4
+#define SL_MSR		8
+#define SL_SDR1		0xc
+#define SL_SPRG0	0x10	/* 4 sprg's */
+#define SL_DBAT0	0x20
+#define SL_IBAT0	0x28
+#define SL_DBAT1	0x30
+#define SL_IBAT1	0x38
+#define SL_DBAT2	0x40
+#define SL_IBAT2	0x48
+#define SL_DBAT3	0x50
+#define SL_IBAT3	0x58
+#define SL_TB		0x60
+#define SL_R2		0x68
+#define SL_CR		0x6c
+#define SL_LR		0x70
+#define SL_R12		0x74	/* r12 to r31 */
+#define SL_SIZE		(SL_R12 + 80)
+
+	.section .data
+	.align	5
+
+_GLOBAL(swsusp_save_area)
+	.space	SL_SIZE
+
+
+	.section .text
+	.align	5
+
+_GLOBAL(swsusp_arch_suspend)
+
+	lis	r11,swsusp_save_area@h
+	ori	r11,r11,swsusp_save_area@l
+
+	mflr	r0
+	stw	r0,SL_LR(r11)
+	mfcr	r0
+	stw	r0,SL_CR(r11)
+	stw	r1,SL_SP(r11)
+	stw	r2,SL_R2(r11)
+	stmw	r12,SL_R12(r11)
+
+	/* Save MSR & SDR1 */
+	mfmsr	r4
+	stw	r4,SL_MSR(r11)
+	mfsdr1	r4
+	stw	r4,SL_SDR1(r11)
+
+	/* Get a stable timebase and save it */
+1:	mftbu	r4
+	stw	r4,SL_TB(r11)
+	mftb	r5
+	stw	r5,SL_TB+4(r11)
+	mftbu	r3
+	cmpw	r3,r4
+	bne	1b
+
+	/* Save SPRGs */
+	mfsprg	r4,0
+	stw	r4,SL_SPRG0(r11)
+	mfsprg	r4,1
+	stw	r4,SL_SPRG0+4(r11)
+	mfsprg	r4,2
+	stw	r4,SL_SPRG0+8(r11)
+	mfsprg	r4,3
+	stw	r4,SL_SPRG0+12(r11)
+
+	/* Save BATs */
+	mfdbatu	r4,0
+	stw	r4,SL_DBAT0(r11)
+	mfdbatl	r4,0
+	stw	r4,SL_DBAT0+4(r11)
+	mfdbatu	r4,1
+	stw	r4,SL_DBAT1(r11)
+	mfdbatl	r4,1
+	stw	r4,SL_DBAT1+4(r11)
+	mfdbatu	r4,2
+	stw	r4,SL_DBAT2(r11)
+	mfdbatl	r4,2
+	stw	r4,SL_DBAT2+4(r11)
+	mfdbatu	r4,3
+	stw	r4,SL_DBAT3(r11)
+	mfdbatl	r4,3
+	stw	r4,SL_DBAT3+4(r11)
+	mfibatu	r4,0
+	stw	r4,SL_IBAT0(r11)
+	mfibatl	r4,0
+	stw	r4,SL_IBAT0+4(r11)
+	mfibatu	r4,1
+	stw	r4,SL_IBAT1(r11)
+	mfibatl	r4,1
+	stw	r4,SL_IBAT1+4(r11)
+	mfibatu	r4,2
+	stw	r4,SL_IBAT2(r11)
+	mfibatl	r4,2
+	stw	r4,SL_IBAT2+4(r11)
+	mfibatu	r4,3
+	stw	r4,SL_IBAT3(r11)
+	mfibatl	r4,3
+	stw	r4,SL_IBAT3+4(r11)
+
+#if  0
+	/* Backup various CPU config stuffs */
+	bl	__save_cpu_setup
+#endif
+	/* Call the low level suspend stuff (we should probably have made
+	 * a stackframe...
+	 */
+	bl	swsusp_save
+
+	/* Restore LR from the save area */
+	lis	r11,swsusp_save_area@h
+	ori	r11,r11,swsusp_save_area@l
+	lwz	r0,SL_LR(r11)
+	mtlr	r0
+
+	blr
+
+
+/* Resume code */
+_GLOBAL(swsusp_arch_resume)
+
+	/* Stop pending alitvec streams and memory accesses */
+BEGIN_FTR_SECTION
+	DSSALL
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ 	sync
+
+	/* Disable MSR:DR to make sure we don't take a TLB or
+	 * hash miss during the copy, as our hash table will
+	 * for a while be unuseable. For .text, we assume we are
+	 * covered by a BAT. This works only for non-G5 at this
+	 * point. G5 will need a better approach, possibly using
+	 * a small temporary hash table filled with large mappings,
+	 * disabling the MMU completely isn't a good option for
+	 * performance reasons.
+	 * (Note that 750's may have the same performance issue as
+	 * the G5 in this case, we should investigate using moving
+	 * BATs for these CPUs)
+	 */
+	mfmsr	r0
+	sync
+	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
+	mtmsr	r0
+	sync
+	isync
+
+	/* Load ptr the list of pages to copy in r3 */
+	lis	r11,(pagedir_nosave - KERNELBASE)@h
+	ori	r11,r11,pagedir_nosave@l
+	lwz	r10,0(r11)
+
+	/* Copy the pages. This is a very basic implementation, to
+	 * be replaced by something more cache efficient */
+1:
+	tophys(r3,r10)
+	li	r0,256
+	mtctr	r0
+	lwz	r11,pbe_address(r3)	/* source */
+	tophys(r5,r11)
+	lwz	r10,pbe_orig_address(r3)	/* destination */
+	tophys(r6,r10)
+2:
+	lwz	r8,0(r5)
+	lwz	r9,4(r5)
+	lwz	r10,8(r5)
+	lwz	r11,12(r5)
+	addi	r5,r5,16
+	stw	r8,0(r6)
+	stw	r9,4(r6)
+	stw	r10,8(r6)
+	stw	r11,12(r6)
+	addi	r6,r6,16
+	bdnz	2b
+	lwz		r10,pbe_next(r3)
+	cmpwi	0,r10,0
+	bne	1b
+
+	/* Do a very simple cache flush/inval of the L1 to ensure
+	 * coherency of the icache
+	 */
+	lis	r3,0x0002
+	mtctr	r3
+	li	r3, 0
+1:
+	lwz	r0,0(r3)
+	addi	r3,r3,0x0020
+	bdnz	1b
+	isync
+	sync
+
+	/* Now flush those cache lines */
+	lis	r3,0x0002
+	mtctr	r3
+	li	r3, 0
+1:
+	dcbf	0,r3
+	addi	r3,r3,0x0020
+	bdnz	1b
+	sync
+
+	/* Ok, we are now running with the kernel data of the old
+	 * kernel fully restored. We can get to the save area
+	 * easily now. As for the rest of the code, it assumes the
+	 * loader kernel and the booted one are exactly identical
+	 */
+	lis	r11,swsusp_save_area@h
+	ori	r11,r11,swsusp_save_area@l
+	tophys(r11,r11)
+
+#if 0
+	/* Restore various CPU config stuffs */
+	bl	__restore_cpu_setup
+#endif
+	/* Restore the BATs, and SDR1.  Then we can turn on the MMU.
+	 * This is a bit hairy as we are running out of those BATs,
+	 * but first, our code is probably in the icache, and we are
+	 * writing the same value to the BAT, so that should be fine,
+	 * though a better solution will have to be found long-term
+	 */
+	lwz	r4,SL_SDR1(r11)
+	mtsdr1	r4
+	lwz	r4,SL_SPRG0(r11)
+	mtsprg	0,r4
+	lwz	r4,SL_SPRG0+4(r11)
+	mtsprg	1,r4
+	lwz	r4,SL_SPRG0+8(r11)
+	mtsprg	2,r4
+	lwz	r4,SL_SPRG0+12(r11)
+	mtsprg	3,r4
+
+#if 0
+	lwz	r4,SL_DBAT0(r11)
+	mtdbatu	0,r4
+	lwz	r4,SL_DBAT0+4(r11)
+	mtdbatl	0,r4
+	lwz	r4,SL_DBAT1(r11)
+	mtdbatu	1,r4
+	lwz	r4,SL_DBAT1+4(r11)
+	mtdbatl	1,r4
+	lwz	r4,SL_DBAT2(r11)
+	mtdbatu	2,r4
+	lwz	r4,SL_DBAT2+4(r11)
+	mtdbatl	2,r4
+	lwz	r4,SL_DBAT3(r11)
+	mtdbatu	3,r4
+	lwz	r4,SL_DBAT3+4(r11)
+	mtdbatl	3,r4
+	lwz	r4,SL_IBAT0(r11)
+	mtibatu	0,r4
+	lwz	r4,SL_IBAT0+4(r11)
+	mtibatl	0,r4
+	lwz	r4,SL_IBAT1(r11)
+	mtibatu	1,r4
+	lwz	r4,SL_IBAT1+4(r11)
+	mtibatl	1,r4
+	lwz	r4,SL_IBAT2(r11)
+	mtibatu	2,r4
+	lwz	r4,SL_IBAT2+4(r11)
+	mtibatl	2,r4
+	lwz	r4,SL_IBAT3(r11)
+	mtibatu	3,r4
+	lwz	r4,SL_IBAT3+4(r11)
+	mtibatl	3,r4
+#endif
+
+BEGIN_FTR_SECTION
+	li	r4,0
+	mtspr	SPRN_DBAT4U,r4
+	mtspr	SPRN_DBAT4L,r4
+	mtspr	SPRN_DBAT5U,r4
+	mtspr	SPRN_DBAT5L,r4
+	mtspr	SPRN_DBAT6U,r4
+	mtspr	SPRN_DBAT6L,r4
+	mtspr	SPRN_DBAT7U,r4
+	mtspr	SPRN_DBAT7L,r4
+	mtspr	SPRN_IBAT4U,r4
+	mtspr	SPRN_IBAT4L,r4
+	mtspr	SPRN_IBAT5U,r4
+	mtspr	SPRN_IBAT5L,r4
+	mtspr	SPRN_IBAT6U,r4
+	mtspr	SPRN_IBAT6L,r4
+	mtspr	SPRN_IBAT7U,r4
+	mtspr	SPRN_IBAT7L,r4
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS)
+
+	/* Flush all TLBs */
+	lis	r4,0x1000
+1:	addic.	r4,r4,-0x1000
+	tlbie	r4
+	blt	1b
+	sync
+
+	/* restore the MSR and turn on the MMU */
+	lwz	r3,SL_MSR(r11)
+	bl	turn_on_mmu
+	tovirt(r11,r11)
+
+	/* Restore TB */
+	li	r3,0
+	mttbl	r3
+	lwz	r3,SL_TB(r11)
+	lwz	r4,SL_TB+4(r11)
+	mttbu	r3
+	mttbl	r4
+
+	/* Kick decrementer */
+	li	r0,1
+	mtdec	r0
+
+	/* Restore the callee-saved registers and return */
+	lwz	r0,SL_CR(r11)
+	mtcr	r0
+	lwz	r2,SL_R2(r11)
+	lmw	r12,SL_R12(r11)
+	lwz	r1,SL_SP(r11)
+	lwz	r0,SL_LR(r11)
+	mtlr	r0
+
+	// XXX Note: we don't really need to call swsusp_resume
+
+	li	r3,0
+	blr
+
+/* FIXME:This construct is actually not useful since we don't shut
+ * down the instruction MMU, we could just flip back MSR-DR on.
+ */
+turn_on_mmu:
+	mflr	r4
+	mtsrr0	r4
+	mtsrr1	r3
+	sync
+	isync
+	rfi
+
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index ad895c99813..9b69d99a910 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -40,6 +40,7 @@
 #include <asm/uaccess.h>
 #include <asm/ipc.h>
 #include <asm/semaphore.h>
+#include <asm/syscalls.h>
 #include <asm/time.h>
 #include <asm/unistd.h>
 
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 0f0c3a9ae2e..73560ef6f80 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -65,20 +65,20 @@ static int __init smt_setup(void)
 	unsigned int cpu;
 
 	if (!cpu_has_feature(CPU_FTR_SMT))
-		return 1;
+		return -ENODEV;
 
 	options = find_path_device("/options");
 	if (!options)
-		return 1;
+		return -ENODEV;
 
 	val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay",
 					   NULL);
 	if (!smt_snooze_cmdline && val) {
-		for_each_cpu(cpu)
+		for_each_possible_cpu(cpu)
 			per_cpu(smt_snooze_delay, cpu) = *val;
 	}
 
-	return 1;
+	return 0;
 }
 __initcall(smt_setup);
 
@@ -93,7 +93,7 @@ static int __init setup_smt_snooze_delay(char *str)
 	smt_snooze_cmdline = 1;
 
 	if (get_option(&str, &snooze)) {
-		for_each_cpu(cpu)
+		for_each_possible_cpu(cpu)
 			per_cpu(smt_snooze_delay, cpu) = snooze;
 	}
 
@@ -347,7 +347,7 @@ static int __init topology_init(void)
 
 	register_cpu_notifier(&sysfs_cpu_nb);
 
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		struct cpu *c = &per_cpu(cpu_devices, cpu);
 
 #ifdef CONFIG_NUMA
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
new file mode 100644
index 00000000000..26bd8ea35a4
--- /dev/null
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -0,0 +1,271 @@
+/*
+ * temp.c	Thermal management for cpu's with Thermal Assist Units
+ *
+ * Written by Troy Benjegerdes <hozer@drgw.net>
+ *
+ * TODO:
+ * dynamic power management to limit peak CPU temp (using ICTC)
+ * calibration???
+ *
+ * Silly, crazy ideas: use cpu load (from scheduler) and ICTC to extend battery
+ * life in portables, and add a 'performance/watt' metric somewhere in /proc
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/reg.h>
+#include <asm/nvram.h>
+#include <asm/cache.h>
+#include <asm/8xx_immap.h>
+#include <asm/machdep.h>
+
+static struct tau_temp
+{
+	int interrupts;
+	unsigned char low;
+	unsigned char high;
+	unsigned char grew;
+} tau[NR_CPUS];
+
+struct timer_list tau_timer;
+
+#undef DEBUG
+
+/* TODO: put these in a /proc interface, with some sanity checks, and maybe
+ * dynamic adjustment to minimize # of interrupts */
+/* configurable values for step size and how much to expand the window when
+ * we get an interrupt. These are based on the limit that was out of range */
+#define step_size		2	/* step size when temp goes out of range */
+#define window_expand		1	/* expand the window by this much */
+/* configurable values for shrinking the window */
+#define shrink_timer	2*HZ	/* period between shrinking the window */
+#define min_window	2	/* minimum window size, degrees C */
+
+void set_thresholds(unsigned long cpu)
+{
+#ifdef CONFIG_TAU_INT
+	/*
+	 * setup THRM1,
+	 * threshold, valid bit, enable interrupts, interrupt when below threshold
+	 */
+	mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID);
+
+	/* setup THRM2,
+	 * threshold, valid bit, enable interrupts, interrupt when above threshhold
+	 */
+	mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE);
+#else
+	/* same thing but don't enable interrupts */
+	mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID);
+	mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V);
+#endif
+}
+
+void TAUupdate(int cpu)
+{
+	unsigned thrm;
+
+#ifdef DEBUG
+	printk("TAUupdate ");
+#endif
+
+	/* if both thresholds are crossed, the step_sizes cancel out
+	 * and the window winds up getting expanded twice. */
+	if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */
+		if(thrm & THRM1_TIN){ /* crossed low threshold */
+			if (tau[cpu].low >= step_size){
+				tau[cpu].low -= step_size;
+				tau[cpu].high -= (step_size - window_expand);
+			}
+			tau[cpu].grew = 1;
+#ifdef DEBUG
+			printk("low threshold crossed ");
+#endif
+		}
+	}
+	if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */
+		if(thrm & THRM1_TIN){ /* crossed high threshold */
+			if (tau[cpu].high <= 127-step_size){
+				tau[cpu].low += (step_size - window_expand);
+				tau[cpu].high += step_size;
+			}
+			tau[cpu].grew = 1;
+#ifdef DEBUG
+			printk("high threshold crossed ");
+#endif
+		}
+	}
+
+#ifdef DEBUG
+	printk("grew = %d\n", tau[cpu].grew);
+#endif
+
+#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */
+	set_thresholds(cpu);
+#endif
+
+}
+
+#ifdef CONFIG_TAU_INT
+/*
+ * TAU interrupts - called when we have a thermal assist unit interrupt
+ * with interrupts disabled
+ */
+
+void TAUException(struct pt_regs * regs)
+{
+	int cpu = smp_processor_id();
+
+	irq_enter();
+	tau[cpu].interrupts++;
+
+	TAUupdate(cpu);
+
+	irq_exit();
+}
+#endif /* CONFIG_TAU_INT */
+
+static void tau_timeout(void * info)
+{
+	int cpu;
+	unsigned long flags;
+	int size;
+	int shrink;
+
+	/* disabling interrupts *should* be okay */
+	local_irq_save(flags);
+	cpu = smp_processor_id();
+
+#ifndef CONFIG_TAU_INT
+	TAUupdate(cpu);
+#endif
+
+	size = tau[cpu].high - tau[cpu].low;
+	if (size > min_window && ! tau[cpu].grew) {
+		/* do an exponential shrink of half the amount currently over size */
+		shrink = (2 + size - min_window) / 4;
+		if (shrink) {
+			tau[cpu].low += shrink;
+			tau[cpu].high -= shrink;
+		} else { /* size must have been min_window + 1 */
+			tau[cpu].low += 1;
+#if 1 /* debug */
+			if ((tau[cpu].high - tau[cpu].low) != min_window){
+				printk(KERN_ERR "temp.c: line %d, logic error\n", __LINE__);
+			}
+#endif
+		}
+	}
+
+	tau[cpu].grew = 0;
+
+	set_thresholds(cpu);
+
+	/*
+	 * Do the enable every time, since otherwise a bunch of (relatively)
+	 * complex sleep code needs to be added. One mtspr every time
+	 * tau_timeout is called is probably not a big deal.
+	 *
+	 * Enable thermal sensor and set up sample interval timer
+	 * need 20 us to do the compare.. until a nice 'cpu_speed' function
+	 * call is implemented, just assume a 500 mhz clock. It doesn't really
+	 * matter if we take too long for a compare since it's all interrupt
+	 * driven anyway.
+	 *
+	 * use a extra long time.. (60 us @ 500 mhz)
+	 */
+	mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E);
+
+	local_irq_restore(flags);
+}
+
+static void tau_timeout_smp(unsigned long unused)
+{
+
+	/* schedule ourselves to be run again */
+	mod_timer(&tau_timer, jiffies + shrink_timer) ;
+	on_each_cpu(tau_timeout, NULL, 1, 0);
+}
+
+/*
+ * setup the TAU
+ *
+ * Set things up to use THRM1 as a temperature lower bound, and THRM2 as an upper bound.
+ * Start off at zero
+ */
+
+int tau_initialized = 0;
+
+void __init TAU_init_smp(void * info)
+{
+	unsigned long cpu = smp_processor_id();
+
+	/* set these to a reasonable value and let the timer shrink the
+	 * window */
+	tau[cpu].low = 5;
+	tau[cpu].high = 120;
+
+	set_thresholds(cpu);
+}
+
+int __init TAU_init(void)
+{
+	/* We assume in SMP that if one CPU has TAU support, they
+	 * all have it --BenH
+	 */
+	if (!cpu_has_feature(CPU_FTR_TAU)) {
+		printk("Thermal assist unit not available\n");
+		tau_initialized = 0;
+		return 1;
+	}
+
+
+	/* first, set up the window shrinking timer */
+	init_timer(&tau_timer);
+	tau_timer.function = tau_timeout_smp;
+	tau_timer.expires = jiffies + shrink_timer;
+	add_timer(&tau_timer);
+
+	on_each_cpu(TAU_init_smp, NULL, 1, 0);
+
+	printk("Thermal assist unit ");
+#ifdef CONFIG_TAU_INT
+	printk("using interrupts, ");
+#else
+	printk("using timers, ");
+#endif
+	printk("shrink_timer: %d jiffies\n", shrink_timer);
+	tau_initialized = 1;
+
+	return 0;
+}
+
+__initcall(TAU_init);
+
+/*
+ * return current temp
+ */
+
+u32 cpu_temp_both(unsigned long cpu)
+{
+	return ((tau[cpu].high << 16) | tau[cpu].low);
+}
+
+int cpu_temp(unsigned long cpu)
+{
+	return ((tau[cpu].high + tau[cpu].low) / 2);
+}
+
+int tau_interrupts(unsigned long cpu)
+{
+	return (tau[cpu].interrupts);
+}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 4a27218a086..24e3ad756de 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -261,7 +261,7 @@ void snapshot_timebases(void)
 
 	if (!cpu_has_feature(CPU_FTR_PURR))
 		return;
-	for_each_cpu(cpu)
+	for_each_possible_cpu(cpu)
 		spin_lock_init(&per_cpu(cpu_purr_data, cpu).lock);
 	on_each_cpu(snapshot_tb_and_purr, NULL, 0, 1);
 }
@@ -751,7 +751,7 @@ void __init smp_space_timers(unsigned int max_cpus)
 	 * systems works better if the two threads' timebase interrupts
 	 * are staggered by half a jiffy with respect to each other.
 	 */
-	for_each_cpu(i) {
+	for_each_possible_cpu(i) {
 		if (i == boot_cpuid)
 			continue;
 		if (i == (boot_cpuid ^ 1))
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 9763faab673..4cbde211eb6 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -97,7 +97,6 @@ static DEFINE_SPINLOCK(die_lock);
 int die(const char *str, struct pt_regs *regs, long err)
 {
 	static int die_counter, crash_dump_start = 0;
-	int nl = 0;
 
 	if (debugger(regs))
 		return 1;
@@ -106,7 +105,7 @@ int die(const char *str, struct pt_regs *regs, long err)
 	spin_lock_irq(&die_lock);
 	bust_spinlocks(1);
 #ifdef CONFIG_PMAC_BACKLIGHT
-	if (_machine == _MACH_Pmac) {
+	if (machine_is(powermac)) {
 		set_backlight_enable(1);
 		set_backlight_level(BACKLIGHT_MAX);
 	}
@@ -114,46 +113,18 @@ int die(const char *str, struct pt_regs *regs, long err)
 	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
 #ifdef CONFIG_PREEMPT
 	printk("PREEMPT ");
-	nl = 1;
 #endif
 #ifdef CONFIG_SMP
 	printk("SMP NR_CPUS=%d ", NR_CPUS);
-	nl = 1;
 #endif
 #ifdef CONFIG_DEBUG_PAGEALLOC
 	printk("DEBUG_PAGEALLOC ");
-	nl = 1;
 #endif
 #ifdef CONFIG_NUMA
 	printk("NUMA ");
-	nl = 1;
 #endif
-#ifdef CONFIG_PPC64
-	switch (_machine) {
-	case PLATFORM_PSERIES:
-		printk("PSERIES ");
-		nl = 1;
-		break;
-	case PLATFORM_PSERIES_LPAR:
-		printk("PSERIES LPAR ");
-		nl = 1;
-		break;
-	case PLATFORM_ISERIES_LPAR:
-		printk("ISERIES LPAR ");
-		nl = 1;
-		break;
-	case PLATFORM_POWERMAC:
-		printk("POWERMAC ");
-		nl = 1;
-		break;
-	case PLATFORM_CELL:
-		printk("CELL ");
-		nl = 1;
-		break;
-	}
-#endif
-	if (nl)
-		printk("\n");
+	printk("%s\n", ppc_md.name ? "" : ppc_md.name);
+
 	print_modules();
 	show_regs(regs);
 	bust_spinlocks(0);
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index ec837036842..573afb68d69 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -33,6 +33,7 @@
 #include <asm/machdep.h>
 #include <asm/cputable.h>
 #include <asm/sections.h>
+#include <asm/firmware.h>
 #include <asm/vdso.h>
 #include <asm/vdso_datapage.h>
 
@@ -667,7 +668,13 @@ void __init vdso_init(void)
 	vdso_data->version.major = SYSTEMCFG_MAJOR;
 	vdso_data->version.minor = SYSTEMCFG_MINOR;
 	vdso_data->processor = mfspr(SPRN_PVR);
-	vdso_data->platform = _machine;
+	/*
+	 * Fake the old platform number for pSeries and iSeries and add
+	 * in LPAR bit if necessary
+	 */
+	vdso_data->platform = machine_is(iseries) ? 0x200 : 0x100;
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		vdso_data->platform |= 1;
 	vdso_data->physicalMemorySize = lmb_phys_mem_size();
 	vdso_data->dcache_size = ppc64_caches.dsize;
 	vdso_data->dcache_line_size = ppc64_caches.dline_size;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 7fa7b15fd8e..fe79c2584cb 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -1,9 +1,11 @@
 #include <linux/config.h>
 #ifdef CONFIG_PPC64
 #include <asm/page.h>
+#define PROVIDE32(x)	PROVIDE(__unused__##x)
 #else
 #define PAGE_SIZE	4096
 #define KERNELBASE	CONFIG_KERNEL_START
+#define PROVIDE32(x)	PROVIDE(x)
 #endif
 #include <asm-generic/vmlinux.lds.h>
 
@@ -18,43 +20,42 @@ jiffies = jiffies_64 + 4;
 #endif
 SECTIONS
 {
-  /* Sections to be discarded. */
-  /DISCARD/ : {
-    *(.exitcall.exit)
-    *(.exit.data)
-  }
-
-  . = KERNELBASE;
-
-  /* Read-only sections, merged into text segment: */
-  .text : {
-    *(.text .text.*)
-    SCHED_TEXT
-    LOCK_TEXT
-    KPROBES_TEXT
-    *(.fixup)
-#ifdef CONFIG_PPC32
-    *(.got1)
-    __got2_start = .;
-    *(.got2)
-    __got2_end = .;
-#else
-    . = ALIGN(PAGE_SIZE);
-    _etext = .;
-#endif
-  }
-#ifdef CONFIG_PPC32
-  _etext = .;
-  PROVIDE (etext = .);
+	/* Sections to be discarded. */
+	/DISCARD/ : {
+	*(.exitcall.exit)
+	*(.exit.data)
+	}
 
-  RODATA
-  .fini      : { *(.fini)    } =0
-  .ctors     : { *(.ctors)   }
-  .dtors     : { *(.dtors)   }
+	. = KERNELBASE;
 
-  .fixup   : { *(.fixup) }
-#endif
+/*
+ * Text, read only data and other permanent read-only sections
+ */
+
+	/* Text and gots */
+	.text : {
+		*(.text .text.*)
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		*(.fixup)
 
+#ifdef CONFIG_PPC32
+		*(.got1)
+		__got2_start = .;
+		*(.got2)
+		__got2_end = .;
+#endif /* CONFIG_PPC32 */
+
+		. = ALIGN(PAGE_SIZE);
+		_etext = .;
+		PROVIDE32 (etext = .);
+	}
+
+	/* Read-only data */
+	RODATA
+
+	/* Exception & bug tables */
 	__ex_table : {
 		__start___ex_table = .;
 		*(__ex_table)
@@ -67,192 +68,172 @@ SECTIONS
 		__stop___bug_table = .;
 	}
 
-#ifdef CONFIG_PPC64
+/*
+ * Init sections discarded at runtime
+ */
+	. = ALIGN(PAGE_SIZE);
+	__init_begin = .;
+
+	.init.text : {
+		_sinittext = .;
+		*(.init.text)
+		_einittext = .;
+	}
+
+	/* .exit.text is discarded at runtime, not link time,
+	 * to deal with references from __bug_table
+	 */
+	.exit.text : { *(.exit.text) }
+
+	.init.data : {
+		*(.init.data);
+		__vtop_table_begin = .;
+		*(.vtop_fixup);
+		__vtop_table_end = .;
+		__ptov_table_begin = .;
+		*(.ptov_fixup);
+		__ptov_table_end = .;
+	}
+
+	. = ALIGN(16);
+	.init.setup : {
+		__setup_start = .;
+		*(.init.setup)
+		__setup_end = .;
+	}
+
+	.initcall.init : {
+		__initcall_start = .;
+		*(.initcall1.init)
+		*(.initcall2.init)
+		*(.initcall3.init)
+		*(.initcall4.init)
+		*(.initcall5.init)
+		*(.initcall6.init)
+		*(.initcall7.init)
+		__initcall_end = .;
+		}
+
+	.con_initcall.init : {
+		__con_initcall_start = .;
+		*(.con_initcall.init)
+		__con_initcall_end = .;
+	}
+
+	SECURITY_INIT
+
+	. = ALIGN(8);
 	__ftr_fixup : {
 		__start___ftr_fixup = .;
 		*(__ftr_fixup)
 		__stop___ftr_fixup = .;
 	}
 
-  RODATA
-#endif
+	. = ALIGN(PAGE_SIZE);
+	.init.ramfs : {
+		__initramfs_start = .;
+		*(.init.ramfs)
+		__initramfs_end = .;
+	}
 
 #ifdef CONFIG_PPC32
-  /* Read-write section, merged into data segment: */
-  . = ALIGN(PAGE_SIZE);
-  _sdata = .;
-  .data    :
-  {
-    *(.data)
-    *(.data1)
-    *(.sdata)
-    *(.sdata2)
-    *(.got.plt) *(.got)
-    *(.dynamic)
-    CONSTRUCTORS
-  }
-
-  . = ALIGN(PAGE_SIZE);
-  __nosave_begin = .;
-  .data_nosave : { *(.data.nosave) }
-  . = ALIGN(PAGE_SIZE);
-  __nosave_end = .;
-
-  . = ALIGN(32);
-  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
-
-  _edata  =  .;
-  PROVIDE (edata = .);
-
-  . = ALIGN(8192);
-  .data.init_task : { *(.data.init_task) }
+	. = ALIGN(32);
+#else
+	. = ALIGN(128);
 #endif
+	.data.percpu : {
+		__per_cpu_start = .;
+		*(.data.percpu)
+		__per_cpu_end = .;
+	}
 
-  /* will be freed after init */
-  . = ALIGN(PAGE_SIZE);
-  __init_begin = .;
-  .init.text : {
-	_sinittext = .;
-	*(.init.text)
-	_einittext = .;
-  }
-#ifdef CONFIG_PPC32
-  /* .exit.text is discarded at runtime, not link time,
-     to deal with references from __bug_table */
-  .exit.text : { *(.exit.text) }
-#endif
-  .init.data : {
-    *(.init.data);
-    __vtop_table_begin = .;
-    *(.vtop_fixup);
-    __vtop_table_end = .;
-    __ptov_table_begin = .;
-    *(.ptov_fixup);
-    __ptov_table_end = .;
-  }
-
-  . = ALIGN(16);
-  .init.setup : {
-    __setup_start = .;
-    *(.init.setup)
-    __setup_end = .;
-  }
-
-  .initcall.init : {
-	__initcall_start = .;
-	*(.initcall1.init)
-	*(.initcall2.init)
-	*(.initcall3.init)
-	*(.initcall4.init)
-	*(.initcall5.init)
-	*(.initcall6.init)
-	*(.initcall7.init)
-	__initcall_end = .;
-  }
-
-  .con_initcall.init : {
-    __con_initcall_start = .;
-    *(.con_initcall.init)
-    __con_initcall_end = .;
-  }
-
-  SECURITY_INIT
+	. = ALIGN(8);
+	.machine.desc : {
+		__machine_desc_start = . ;
+		*(.machine.desc)
+		__machine_desc_end = . ;
+	}
+
+	/* freed after init ends here */
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
+
+/*
+ * And now the various read/write data
+ */
+
+	. = ALIGN(PAGE_SIZE);
+	_sdata = .;
 
 #ifdef CONFIG_PPC32
-  __start___ftr_fixup = .;
-  __ftr_fixup : { *(__ftr_fixup) }
-  __stop___ftr_fixup = .;
+	.data    :
+	{
+		*(.data)
+		*(.sdata)
+		*(.got.plt) *(.got)
+	}
 #else
-  . = ALIGN(PAGE_SIZE);
-  .init.ramfs : {
-    __initramfs_start = .;
-    *(.init.ramfs)
-    __initramfs_end = .;
-  }
-#endif
+	.data : {
+		*(.data .data.rel* .toc1)
+		*(.branch_lt)
+	}
 
-#ifdef CONFIG_PPC32
-  . = ALIGN(32);
+	.opd : {
+		*(.opd)
+	}
+
+	.got : {
+		__toc_start = .;
+		*(.got)
+		*(.toc)
+	}
 #endif
-  .data.percpu : {
-    __per_cpu_start = .;
-    *(.data.percpu)
-    __per_cpu_end = .;
-  }
 
- . = ALIGN(PAGE_SIZE);
-#ifdef CONFIG_PPC64
- . = ALIGN(16384);
- __init_end = .;
- /* freed after init ends here */
-
- /* Read/write sections */
- . = ALIGN(PAGE_SIZE);
- . = ALIGN(16384);
- _sdata = .;
- /* The initial task and kernel stack */
- .data.init_task : {
-      *(.data.init_task)
-      }
-
- . = ALIGN(PAGE_SIZE);
- .data.page_aligned : {
-      *(.data.page_aligned)
-      }
-
- .data.cacheline_aligned : {
-      *(.data.cacheline_aligned)
-      }
-
- .data : {
-      *(.data .data.rel* .toc1)
-      *(.branch_lt)
-      }
-
- .opd : {
-      *(.opd)
-      }
-
- .got : {
-      __toc_start = .;
-      *(.got)
-      *(.toc)
-      . = ALIGN(PAGE_SIZE);
-      _edata = .;
-      }
-
-  . = ALIGN(PAGE_SIZE);
+	. = ALIGN(PAGE_SIZE);
+	_edata  =  .;
+	PROVIDE32 (edata = .);
+
+	/* The initial task and kernel stack */
+#ifdef CONFIG_PPC32
+	. = ALIGN(8192);
 #else
-  __initramfs_start = .;
-  .init.ramfs : {
-    *(.init.ramfs)
-  }
-  __initramfs_end = .;
+	. = ALIGN(16384);
+#endif
+	.data.init_task : {
+		*(.data.init_task)
+	}
 
-  . = ALIGN(4096);
-  __init_end = .;
+	. = ALIGN(PAGE_SIZE);
+	.data.page_aligned : {
+		*(.data.page_aligned)
+	}
 
-  . = ALIGN(4096);
-  _sextratext = .;
-  _eextratext = .;
+	.data.cacheline_aligned : {
+		*(.data.cacheline_aligned)
+	}
 
-  __bss_start = .;
-#endif
+	. = ALIGN(PAGE_SIZE);
+	__data_nosave : {
+		__nosave_begin = .;
+		*(.data.nosave)
+		. = ALIGN(PAGE_SIZE);
+		__nosave_end = .;
+	}
 
-  .bss : {
-    __bss_start = .;
-   *(.sbss) *(.scommon)
-   *(.dynbss)
-   *(.bss)
-   *(COMMON)
-  __bss_stop = .;
-  }
+/*
+ * And finally the bss
+ */
+
+	.bss : {
+		__bss_start = .;
+		*(.sbss) *(.scommon)
+		*(.dynbss)
+		*(.bss)
+		*(COMMON)
+		__bss_stop = .;
+	}
 
-#ifdef CONFIG_PPC64
-  . = ALIGN(PAGE_SIZE);
-#endif
-  _end = . ;
-#ifdef CONFIG_PPC32
-  PROVIDE (end = .);
-#endif
+	. = ALIGN(PAGE_SIZE);
+	_end = . ;
+	PROVIDE32 (end = .);
 }
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 666c2aa5501..c251d993661 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -18,7 +18,7 @@ extern char system_call_common[];
 
 #ifdef CONFIG_PPC64
 /* Bits in SRR1 that are copied from MSR */
-#define MSR_MASK	0xffffffff87c0ffff
+#define MSR_MASK	0xffffffff87c0ffffUL
 #else
 #define MSR_MASK	0x87c0ffff
 #endif
diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile
new file mode 100644
index 00000000000..754143e8936
--- /dev/null
+++ b/arch/powerpc/math-emu/Makefile
@@ -0,0 +1,13 @@
+
+obj-y				:= math.o fmr.o lfd.o stfd.o
+
+obj-$(CONFIG_MATH_EMULATION)	+= fabs.o fadd.o fadds.o fcmpo.o fcmpu.o \
+					fctiw.o fctiwz.o fdiv.o fdivs.o \
+					fmadd.o fmadds.o fmsub.o fmsubs.o \
+					fmul.o fmuls.o fnabs.o fneg.o types.o \
+					fnmadd.o fnmadds.o fnmsub.o fnmsubs.o \
+					fres.o frsp.o frsqrte.o fsel.o lfs.o \
+					fsqrt.o	fsqrts.o fsub.o fsubs.o \
+					mcrfs.o mffs.o mtfsb0.o mtfsb1.o \
+					mtfsf.o mtfsfi.o stfiwx.o stfs.o \
+					udivmodti4.o
diff --git a/arch/powerpc/math-emu/double.h b/arch/powerpc/math-emu/double.h
new file mode 100644
index 00000000000..ffba8b67f05
--- /dev/null
+++ b/arch/powerpc/math-emu/double.h
@@ -0,0 +1,129 @@
+/*
+ * Definitions for IEEE Double Precision
+ */
+
+#if _FP_W_TYPE_SIZE < 32
+#error "Here's a nickel kid.  Go buy yourself a real computer."
+#endif
+
+#if _FP_W_TYPE_SIZE < 64
+#define _FP_FRACTBITS_D		(2 * _FP_W_TYPE_SIZE)
+#else
+#define _FP_FRACTBITS_D		_FP_W_TYPE_SIZE
+#endif
+
+#define _FP_FRACBITS_D		53
+#define _FP_FRACXBITS_D		(_FP_FRACTBITS_D - _FP_FRACBITS_D)
+#define _FP_WFRACBITS_D		(_FP_WORKBITS + _FP_FRACBITS_D)
+#define _FP_WFRACXBITS_D	(_FP_FRACTBITS_D - _FP_WFRACBITS_D)
+#define _FP_EXPBITS_D		11
+#define _FP_EXPBIAS_D		1023
+#define _FP_EXPMAX_D		2047
+
+#define _FP_QNANBIT_D		\
+	((_FP_W_TYPE)1 << ((_FP_FRACBITS_D-2) % _FP_W_TYPE_SIZE))
+#define _FP_IMPLBIT_D		\
+	((_FP_W_TYPE)1 << ((_FP_FRACBITS_D-1) % _FP_W_TYPE_SIZE))
+#define _FP_OVERFLOW_D		\
+	((_FP_W_TYPE)1 << (_FP_WFRACBITS_D % _FP_W_TYPE_SIZE))
+
+#if _FP_W_TYPE_SIZE < 64
+
+union _FP_UNION_D
+{
+  double flt;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign  : 1;
+    unsigned exp   : _FP_EXPBITS_D;
+    unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE;
+    unsigned frac0 : _FP_W_TYPE_SIZE;
+#else
+    unsigned frac0 : _FP_W_TYPE_SIZE;
+    unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE;
+    unsigned exp   : _FP_EXPBITS_D;
+    unsigned sign  : 1;
+#endif
+  } bits __attribute__((packed));
+};
+
+#define FP_DECL_D(X)		_FP_DECL(2,X)
+#define FP_UNPACK_RAW_D(X,val)	_FP_UNPACK_RAW_2(D,X,val)
+#define FP_PACK_RAW_D(val,X)	_FP_PACK_RAW_2(D,val,X)
+
+#define FP_UNPACK_D(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_2(D,X,val);		\
+    _FP_UNPACK_CANONICAL(D,2,X);	\
+  } while (0)
+
+#define FP_PACK_D(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(D,2,X);		\
+    _FP_PACK_RAW_2(D,val,X);		\
+  } while (0)
+
+#define FP_NEG_D(R,X)		_FP_NEG(D,2,R,X)
+#define FP_ADD_D(R,X,Y)		_FP_ADD(D,2,R,X,Y)
+#define FP_SUB_D(R,X,Y)		_FP_SUB(D,2,R,X,Y)
+#define FP_MUL_D(R,X,Y)		_FP_MUL(D,2,R,X,Y)
+#define FP_DIV_D(R,X,Y)		_FP_DIV(D,2,R,X,Y)
+#define FP_SQRT_D(R,X)		_FP_SQRT(D,2,R,X)
+
+#define FP_CMP_D(r,X,Y,un)	_FP_CMP(D,2,r,X,Y,un)
+#define FP_CMP_EQ_D(r,X,Y)	_FP_CMP_EQ(D,2,r,X,Y)
+
+#define FP_TO_INT_D(r,X,rsz,rsg)  _FP_TO_INT(D,2,r,X,rsz,rsg)
+#define FP_FROM_INT_D(X,r,rs,rt)  _FP_FROM_INT(D,2,X,r,rs,rt)
+
+#else
+
+union _FP_UNION_D
+{
+  double flt;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign : 1;
+    unsigned exp  : _FP_EXPBITS_D;
+    unsigned long frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0);
+#else
+    unsigned long frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0);
+    unsigned exp  : _FP_EXPBITS_D;
+    unsigned sign : 1;
+#endif
+  } bits __attribute__((packed));
+};
+
+#define FP_DECL_D(X)		_FP_DECL(1,X)
+#define FP_UNPACK_RAW_D(X,val)	_FP_UNPACK_RAW_1(D,X,val)
+#define FP_PACK_RAW_D(val,X)	_FP_PACK_RAW_1(D,val,X)
+
+#define FP_UNPACK_D(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_1(D,X,val);		\
+    _FP_UNPACK_CANONICAL(D,1,X);	\
+  } while (0)
+
+#define FP_PACK_D(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(D,1,X);		\
+    _FP_PACK_RAW_1(D,val,X);		\
+  } while (0)
+
+#define FP_NEG_D(R,X)		_FP_NEG(D,1,R,X)
+#define FP_ADD_D(R,X,Y)		_FP_ADD(D,1,R,X,Y)
+#define FP_SUB_D(R,X,Y)		_FP_SUB(D,1,R,X,Y)
+#define FP_MUL_D(R,X,Y)		_FP_MUL(D,1,R,X,Y)
+#define FP_DIV_D(R,X,Y)		_FP_DIV(D,1,R,X,Y)
+#define FP_SQRT_D(R,X)		_FP_SQRT(D,1,R,X)
+
+/* The implementation of _FP_MUL_D and _FP_DIV_D should be chosen by
+   the target machine.  */
+
+#define FP_CMP_D(r,X,Y,un)	_FP_CMP(D,1,r,X,Y,un)
+#define FP_CMP_EQ_D(r,X,Y)	_FP_CMP_EQ(D,1,r,X,Y)
+
+#define FP_TO_INT_D(r,X,rsz,rsg)  _FP_TO_INT(D,1,r,X,rsz,rsg)
+#define FP_FROM_INT_D(X,r,rs,rt)  _FP_FROM_INT(D,1,X,r,rs,rt)
+
+#endif /* W_TYPE_SIZE < 64 */
diff --git a/arch/powerpc/math-emu/fabs.c b/arch/powerpc/math-emu/fabs.c
new file mode 100644
index 00000000000..41f0617f3d3
--- /dev/null
+++ b/arch/powerpc/math-emu/fabs.c
@@ -0,0 +1,18 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+int
+fabs(u32 *frD, u32 *frB)
+{
+	frD[0] = frB[0] & 0x7fffffff;
+	frD[1] = frB[1];
+
+#ifdef DEBUG
+	printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB);
+	dump_double(frD);
+	printk("\n");
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fadd.c b/arch/powerpc/math-emu/fadd.c
new file mode 100644
index 00000000000..fc8836488b6
--- /dev/null
+++ b/arch/powerpc/math-emu/fadd.c
@@ -0,0 +1,38 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+
+int
+fadd(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (A_s != B_s && A_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		ret |= EFLAG_VXISI;
+
+	FP_ADD_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fadds.c b/arch/powerpc/math-emu/fadds.c
new file mode 100644
index 00000000000..93025b6c8f3
--- /dev/null
+++ b/arch/powerpc/math-emu/fadds.c
@@ -0,0 +1,39 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+
+int
+fadds(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (A_s != B_s && A_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		ret |= EFLAG_VXISI;
+
+	FP_ADD_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fcmpo.c b/arch/powerpc/math-emu/fcmpo.c
new file mode 100644
index 00000000000..4efac394b4c
--- /dev/null
+++ b/arch/powerpc/math-emu/fcmpo.c
@@ -0,0 +1,46 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+
+int
+fcmpo(u32 *ccr, int crfD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	int code[4] = { (1 << 3), (1 << 1), (1 << 2), (1 << 0) };
+	long cmp;
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p (%08x) %d %p %p\n", __FUNCTION__, ccr, *ccr, crfD, frA, frB);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (A_c == FP_CLS_NAN || B_c == FP_CLS_NAN)
+		ret |= EFLAG_VXVC;
+
+	FP_CMP_D(cmp, A, B, 2);
+	cmp = code[(cmp + 1) & 3];
+
+	__FPU_FPSCR &= ~(0x1f000);
+	__FPU_FPSCR |= (cmp << 12);
+
+	*ccr &= ~(15 << ((7 - crfD) << 2));
+	*ccr |= (cmp << ((7 - crfD) << 2));
+
+#ifdef DEBUG
+	printk("CR: %08x\n", *ccr);
+#endif
+
+	return ret;
+}
diff --git a/arch/powerpc/math-emu/fcmpu.c b/arch/powerpc/math-emu/fcmpu.c
new file mode 100644
index 00000000000..b7e33176e61
--- /dev/null
+++ b/arch/powerpc/math-emu/fcmpu.c
@@ -0,0 +1,42 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+
+int
+fcmpu(u32 *ccr, int crfD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	int code[4] = { (1 << 3), (1 << 1), (1 << 2), (1 << 0) };
+	long cmp;
+
+#ifdef DEBUG
+	printk("%s: %p (%08x) %d %p %p\n", __FUNCTION__, ccr, *ccr, crfD, frA, frB);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	FP_CMP_D(cmp, A, B, 2);
+	cmp = code[(cmp + 1) & 3];
+
+	__FPU_FPSCR &= ~(0x1f000);
+	__FPU_FPSCR |= (cmp << 12);
+
+	*ccr &= ~(15 << ((7 - crfD) << 2));
+	*ccr |= (cmp << ((7 - crfD) << 2));
+
+#ifdef DEBUG
+	printk("CR: %08x\n", *ccr);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fctiw.c b/arch/powerpc/math-emu/fctiw.c
new file mode 100644
index 00000000000..3b3c98b840c
--- /dev/null
+++ b/arch/powerpc/math-emu/fctiw.c
@@ -0,0 +1,25 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+
+int
+fctiw(u32 *frD, void *frB)
+{
+	FP_DECL_D(B);
+	unsigned int r;
+
+	__FP_UNPACK_D(B, frB);
+	FP_TO_INT_D(r, B, 32, 1);
+	frD[1] = r;
+
+#ifdef DEBUG
+	printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB);
+	dump_double(frD);
+	printk("\n");
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fctiwz.c b/arch/powerpc/math-emu/fctiwz.c
new file mode 100644
index 00000000000..7717eb6fcfb
--- /dev/null
+++ b/arch/powerpc/math-emu/fctiwz.c
@@ -0,0 +1,32 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+
+int
+fctiwz(u32 *frD, void *frB)
+{
+	FP_DECL_D(B);
+	u32 fpscr;
+	unsigned int r;
+
+	fpscr = __FPU_FPSCR;
+	__FPU_FPSCR &= ~(3);
+	__FPU_FPSCR |= FP_RND_ZERO;
+
+	__FP_UNPACK_D(B, frB);
+	FP_TO_INT_D(r, B, 32, 1);
+	frD[1] = r;
+
+	__FPU_FPSCR = fpscr;
+
+#ifdef DEBUG
+	printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB);
+	dump_double(frD);
+	printk("\n");
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fdiv.c b/arch/powerpc/math-emu/fdiv.c
new file mode 100644
index 00000000000..f2fba825b2d
--- /dev/null
+++ b/arch/powerpc/math-emu/fdiv.c
@@ -0,0 +1,53 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+
+int
+fdiv(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (A_c == FP_CLS_ZERO && B_c == FP_CLS_ZERO) {
+		ret |= EFLAG_VXZDZ;
+#ifdef DEBUG
+		printk("%s: FPSCR_VXZDZ raised\n", __FUNCTION__);
+#endif
+	}
+	if (A_c == FP_CLS_INF && B_c == FP_CLS_INF) {
+		ret |= EFLAG_VXIDI;
+#ifdef DEBUG
+		printk("%s: FPSCR_VXIDI raised\n", __FUNCTION__);
+#endif
+	}
+
+	if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO) {
+		ret |= EFLAG_DIVZERO;
+		if (__FPU_TRAP_P(EFLAG_DIVZERO))
+			return ret;
+	}
+	FP_DIV_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fdivs.c b/arch/powerpc/math-emu/fdivs.c
new file mode 100644
index 00000000000..b971196e317
--- /dev/null
+++ b/arch/powerpc/math-emu/fdivs.c
@@ -0,0 +1,55 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+
+int
+fdivs(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (A_c == FP_CLS_ZERO && B_c == FP_CLS_ZERO) {
+		ret |= EFLAG_VXZDZ;
+#ifdef DEBUG
+		printk("%s: FPSCR_VXZDZ raised\n", __FUNCTION__);
+#endif
+	}
+	if (A_c == FP_CLS_INF && B_c == FP_CLS_INF) {
+		ret |= EFLAG_VXIDI;
+#ifdef DEBUG
+		printk("%s: FPSCR_VXIDI raised\n", __FUNCTION__);
+#endif
+	}
+
+	if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO) {
+		ret |= EFLAG_DIVZERO;
+		if (__FPU_TRAP_P(EFLAG_DIVZERO))
+			return ret;
+	}
+
+	FP_DIV_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fmadd.c b/arch/powerpc/math-emu/fmadd.c
new file mode 100644
index 00000000000..0a1dbce793e
--- /dev/null
+++ b/arch/powerpc/math-emu/fmadd.c
@@ -0,0 +1,48 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+
+int
+fmadd(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+	__FP_UNPACK_D(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                ret |= EFLAG_VXIMZ;
+
+	FP_MUL_D(T, A, C);
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		ret |= EFLAG_VXISI;
+
+	FP_ADD_D(R, T, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fmadds.c b/arch/powerpc/math-emu/fmadds.c
new file mode 100644
index 00000000000..0f70bba9445
--- /dev/null
+++ b/arch/powerpc/math-emu/fmadds.c
@@ -0,0 +1,49 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+
+int
+fmadds(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+	__FP_UNPACK_D(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                ret |= EFLAG_VXIMZ;
+
+	FP_MUL_D(T, A, C);
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		ret |= EFLAG_VXISI;
+
+	FP_ADD_D(R, T, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fmr.c b/arch/powerpc/math-emu/fmr.c
new file mode 100644
index 00000000000..28df700c0c7
--- /dev/null
+++ b/arch/powerpc/math-emu/fmr.c
@@ -0,0 +1,18 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+int
+fmr(u32 *frD, u32 *frB)
+{
+	frD[0] = frB[0];
+	frD[1] = frB[1];
+
+#ifdef DEBUG
+	printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB);
+	dump_double(frD);
+	printk("\n");
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fmsub.c b/arch/powerpc/math-emu/fmsub.c
new file mode 100644
index 00000000000..203fd48a6fe
--- /dev/null
+++ b/arch/powerpc/math-emu/fmsub.c
@@ -0,0 +1,51 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+
+int
+fmsub(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+	__FP_UNPACK_D(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+		ret |= EFLAG_VXIMZ;
+
+	FP_MUL_D(T, A, C);
+
+	if (B_c != FP_CLS_NAN)
+		B_s ^= 1;
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		ret |= EFLAG_VXISI;
+
+	FP_ADD_D(R, T, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fmsubs.c b/arch/powerpc/math-emu/fmsubs.c
new file mode 100644
index 00000000000..8ce68624c18
--- /dev/null
+++ b/arch/powerpc/math-emu/fmsubs.c
@@ -0,0 +1,52 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+
+int
+fmsubs(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+	__FP_UNPACK_D(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+		ret |= EFLAG_VXIMZ;
+
+	FP_MUL_D(T, A, C);
+
+	if (B_c != FP_CLS_NAN)
+		B_s ^= 1;
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		ret |= EFLAG_VXISI;
+
+	FP_ADD_D(R, T, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fmul.c b/arch/powerpc/math-emu/fmul.c
new file mode 100644
index 00000000000..66c7e79aae2
--- /dev/null
+++ b/arch/powerpc/math-emu/fmul.c
@@ -0,0 +1,42 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+
+int
+fmul(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+	       A_s, A_f1, A_f0, A_e, A_c, A_f1, A_f0, A_e + 1023);
+	printk("B: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+	       B_s, B_f1, B_f0, B_e, B_c, B_f1, B_f0, B_e + 1023);
+#endif
+
+	if ((A_c == FP_CLS_INF && B_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && B_c == FP_CLS_INF))
+		ret |= EFLAG_VXIMZ;
+
+	FP_MUL_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+	       R_s, R_f1, R_f0, R_e, R_c, R_f1, R_f0, R_e + 1023);
+#endif
+
+	return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fmuls.c b/arch/powerpc/math-emu/fmuls.c
new file mode 100644
index 00000000000..26bc4278271
--- /dev/null
+++ b/arch/powerpc/math-emu/fmuls.c
@@ -0,0 +1,43 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+
+int
+fmuls(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+	       A_s, A_f1, A_f0, A_e, A_c, A_f1, A_f0, A_e + 1023);
+	printk("B: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+	       B_s, B_f1, B_f0, B_e, B_c, B_f1, B_f0, B_e + 1023);
+#endif
+
+	if ((A_c == FP_CLS_INF && B_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && B_c == FP_CLS_INF))
+		ret |= EFLAG_VXIMZ;
+
+	FP_MUL_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+	       R_s, R_f1, R_f0, R_e, R_c, R_f1, R_f0, R_e + 1023);
+#endif
+
+	return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fnabs.c b/arch/powerpc/math-emu/fnabs.c
new file mode 100644
index 00000000000..c6b913d179e
--- /dev/null
+++ b/arch/powerpc/math-emu/fnabs.c
@@ -0,0 +1,18 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+int
+fnabs(u32 *frD, u32 *frB)
+{
+	frD[0] = frB[0] | 0x80000000;
+	frD[1] = frB[1];
+
+#ifdef DEBUG
+	printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB);
+	dump_double(frD);
+	printk("\n");
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fneg.c b/arch/powerpc/math-emu/fneg.c
new file mode 100644
index 00000000000..fe9a98deff6
--- /dev/null
+++ b/arch/powerpc/math-emu/fneg.c
@@ -0,0 +1,18 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+int
+fneg(u32 *frD, u32 *frB)
+{
+	frD[0] = frB[0] ^ 0x80000000;
+	frD[1] = frB[1];
+
+#ifdef DEBUG
+	printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB);
+	dump_double(frD);
+	printk("\n");
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fnmadd.c b/arch/powerpc/math-emu/fnmadd.c
new file mode 100644
index 00000000000..7f312276d92
--- /dev/null
+++ b/arch/powerpc/math-emu/fnmadd.c
@@ -0,0 +1,51 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+
+int
+fnmadd(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+	__FP_UNPACK_D(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                ret |= EFLAG_VXIMZ;
+
+	FP_MUL_D(T, A, C);
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		ret |= EFLAG_VXISI;
+
+	FP_ADD_D(R, T, B);
+
+	if (R_c != FP_CLS_NAN)
+		R_s ^= 1;
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fnmadds.c b/arch/powerpc/math-emu/fnmadds.c
new file mode 100644
index 00000000000..65454c9c70b
--- /dev/null
+++ b/arch/powerpc/math-emu/fnmadds.c
@@ -0,0 +1,52 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+
+int
+fnmadds(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+	__FP_UNPACK_D(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                ret |= EFLAG_VXIMZ;
+
+	FP_MUL_D(T, A, C);
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		ret |= EFLAG_VXISI;
+
+	FP_ADD_D(R, T, B);
+
+	if (R_c != FP_CLS_NAN)
+		R_s ^= 1;
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fnmsub.c b/arch/powerpc/math-emu/fnmsub.c
new file mode 100644
index 00000000000..f1ca7482b5f
--- /dev/null
+++ b/arch/powerpc/math-emu/fnmsub.c
@@ -0,0 +1,54 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+
+int
+fnmsub(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+	__FP_UNPACK_D(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+		ret |= EFLAG_VXIMZ;
+
+	FP_MUL_D(T, A, C);
+
+	if (B_c != FP_CLS_NAN)
+		B_s ^= 1;
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		ret |= EFLAG_VXISI;
+
+	FP_ADD_D(R, T, B);
+
+	if (R_c != FP_CLS_NAN)
+		R_s ^= 1;
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fnmsubs.c b/arch/powerpc/math-emu/fnmsubs.c
new file mode 100644
index 00000000000..5c9a09a87dc
--- /dev/null
+++ b/arch/powerpc/math-emu/fnmsubs.c
@@ -0,0 +1,55 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+
+int
+fnmsubs(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+	__FP_UNPACK_D(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+		ret |= EFLAG_VXIMZ;
+
+	FP_MUL_D(T, A, C);
+
+	if (B_c != FP_CLS_NAN)
+		B_s ^= 1;
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		ret |= EFLAG_VXISI;
+
+	FP_ADD_D(R, T, B);
+
+	if (R_c != FP_CLS_NAN)
+		R_s ^= 1;
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fres.c b/arch/powerpc/math-emu/fres.c
new file mode 100644
index 00000000000..ec11e46d20a
--- /dev/null
+++ b/arch/powerpc/math-emu/fres.c
@@ -0,0 +1,12 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+int
+fres(void *frD, void *frB)
+{
+#ifdef DEBUG
+	printk("%s: %p %p\n", __FUNCTION__, frD, frB);
+#endif
+	return -ENOSYS;
+}
diff --git a/arch/powerpc/math-emu/frsp.c b/arch/powerpc/math-emu/frsp.c
new file mode 100644
index 00000000000..d879b2a3d0c
--- /dev/null
+++ b/arch/powerpc/math-emu/frsp.c
@@ -0,0 +1,25 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+
+int
+frsp(void *frD, void *frB)
+{
+	FP_DECL_D(B);
+
+#ifdef DEBUG
+	printk("%s: D %p, B %p\n", __FUNCTION__, frD, frB);
+#endif
+
+	__FP_UNPACK_D(B, frB);
+
+#ifdef DEBUG
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	return __FP_PACK_DS(frD, B);
+}
diff --git a/arch/powerpc/math-emu/frsqrte.c b/arch/powerpc/math-emu/frsqrte.c
new file mode 100644
index 00000000000..a11ae182985
--- /dev/null
+++ b/arch/powerpc/math-emu/frsqrte.c
@@ -0,0 +1,12 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+int
+frsqrte(void *frD, void *frB)
+{
+#ifdef DEBUG
+	printk("%s: %p %p\n", __FUNCTION__, frD, frB);
+#endif
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fsel.c b/arch/powerpc/math-emu/fsel.c
new file mode 100644
index 00000000000..e36e6e72819
--- /dev/null
+++ b/arch/powerpc/math-emu/fsel.c
@@ -0,0 +1,38 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+
+int
+fsel(u32 *frD, void *frA, u32 *frB, u32 *frC)
+{
+	FP_DECL_D(A);
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %08x %08x\n", frB[0], frB[1]);
+	printk("C: %08x %08x\n", frC[0], frC[1]);
+#endif
+
+	if (A_c == FP_CLS_NAN || (A_c != FP_CLS_ZERO && A_s)) {
+		frD[0] = frB[0];
+		frD[1] = frB[1];
+	} else {
+		frD[0] = frC[0];
+		frD[1] = frC[1];
+	}
+
+#ifdef DEBUG
+	printk("D: %08x.%08x\n", frD[0], frD[1]);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fsqrt.c b/arch/powerpc/math-emu/fsqrt.c
new file mode 100644
index 00000000000..6f8319f64a8
--- /dev/null
+++ b/arch/powerpc/math-emu/fsqrt.c
@@ -0,0 +1,37 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+
+int
+fsqrt(void *frD, void *frB)
+{
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frB);
+#endif
+
+	__FP_UNPACK_D(B, frB);
+
+#ifdef DEBUG
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (B_s && B_c != FP_CLS_ZERO)
+		ret |= EFLAG_VXSQRT;
+	if (B_c == FP_CLS_NAN)
+		ret |= EFLAG_VXSNAN;
+
+	FP_SQRT_D(R, B);
+
+#ifdef DEBUG
+	printk("R: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fsqrts.c b/arch/powerpc/math-emu/fsqrts.c
new file mode 100644
index 00000000000..3b2b1cf55c1
--- /dev/null
+++ b/arch/powerpc/math-emu/fsqrts.c
@@ -0,0 +1,38 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+
+int
+fsqrts(void *frD, void *frB)
+{
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frB);
+#endif
+
+	__FP_UNPACK_D(B, frB);
+
+#ifdef DEBUG
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (B_s && B_c != FP_CLS_ZERO)
+		ret |= EFLAG_VXSQRT;
+	if (B_c == FP_CLS_NAN)
+		ret |= EFLAG_VXSNAN;
+
+	FP_SQRT_D(R, B);
+
+#ifdef DEBUG
+	printk("R: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fsub.c b/arch/powerpc/math-emu/fsub.c
new file mode 100644
index 00000000000..956679042bb
--- /dev/null
+++ b/arch/powerpc/math-emu/fsub.c
@@ -0,0 +1,41 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+
+int
+fsub(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (B_c != FP_CLS_NAN)
+		B_s ^= 1;
+
+	if (A_s != B_s && A_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		ret |= EFLAG_VXISI;
+
+	FP_ADD_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/powerpc/math-emu/fsubs.c b/arch/powerpc/math-emu/fsubs.c
new file mode 100644
index 00000000000..3428117dfe8
--- /dev/null
+++ b/arch/powerpc/math-emu/fsubs.c
@@ -0,0 +1,42 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+
+int
+fsubs(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	int ret = 0;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+
+	__FP_UNPACK_D(A, frA);
+	__FP_UNPACK_D(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (B_c != FP_CLS_NAN)
+		B_s ^= 1;
+
+	if (A_s != B_s && A_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		ret |= EFLAG_VXISI;
+
+	FP_ADD_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/powerpc/math-emu/lfd.c b/arch/powerpc/math-emu/lfd.c
new file mode 100644
index 00000000000..7d38101c329
--- /dev/null
+++ b/arch/powerpc/math-emu/lfd.c
@@ -0,0 +1,19 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "sfp-machine.h"
+#include "double.h"
+
+int
+lfd(void *frD, void *ea)
+{
+	if (copy_from_user(frD, ea, sizeof(double)))
+		return -EFAULT;
+#ifdef DEBUG
+	printk("%s: D %p, ea %p: ", __FUNCTION__, frD, ea);
+	dump_double(frD);
+	printk("\n");
+#endif
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/lfs.c b/arch/powerpc/math-emu/lfs.c
new file mode 100644
index 00000000000..c86dee3d765
--- /dev/null
+++ b/arch/powerpc/math-emu/lfs.c
@@ -0,0 +1,37 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+
+int
+lfs(void *frD, void *ea)
+{
+	FP_DECL_D(R);
+	FP_DECL_S(A);
+	float f;
+
+#ifdef DEBUG
+	printk("%s: D %p, ea %p\n", __FUNCTION__, frD, ea);
+#endif
+
+	if (copy_from_user(&f, ea, sizeof(float)))
+		return -EFAULT;
+
+	__FP_UNPACK_S(A, &f);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %ld (%ld) [%08lx]\n", A_s, A_f, A_e, A_c,
+	       *(unsigned long *)&f);
+#endif
+
+	FP_CONV(D, S, 2, 1, R, A);
+
+#ifdef DEBUG
+	printk("R: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	return __FP_PACK_D(frD, R);
+}
diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c
new file mode 100644
index 00000000000..58915347276
--- /dev/null
+++ b/arch/powerpc/math-emu/math.c
@@ -0,0 +1,483 @@
+/*
+ * Copyright (C) 1999  Eddie C. Dost  (ecd@atecom.com)
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+
+#include <asm/uaccess.h>
+#include <asm/reg.h>
+
+#include "sfp-machine.h"
+#include "double.h"
+
+#define FLOATFUNC(x)	extern int x(void *, void *, void *, void *)
+
+FLOATFUNC(fadd);
+FLOATFUNC(fadds);
+FLOATFUNC(fdiv);
+FLOATFUNC(fdivs);
+FLOATFUNC(fmul);
+FLOATFUNC(fmuls);
+FLOATFUNC(fsub);
+FLOATFUNC(fsubs);
+
+FLOATFUNC(fmadd);
+FLOATFUNC(fmadds);
+FLOATFUNC(fmsub);
+FLOATFUNC(fmsubs);
+FLOATFUNC(fnmadd);
+FLOATFUNC(fnmadds);
+FLOATFUNC(fnmsub);
+FLOATFUNC(fnmsubs);
+
+FLOATFUNC(fctiw);
+FLOATFUNC(fctiwz);
+FLOATFUNC(frsp);
+
+FLOATFUNC(fcmpo);
+FLOATFUNC(fcmpu);
+
+FLOATFUNC(mcrfs);
+FLOATFUNC(mffs);
+FLOATFUNC(mtfsb0);
+FLOATFUNC(mtfsb1);
+FLOATFUNC(mtfsf);
+FLOATFUNC(mtfsfi);
+
+FLOATFUNC(lfd);
+FLOATFUNC(lfs);
+
+FLOATFUNC(stfd);
+FLOATFUNC(stfs);
+FLOATFUNC(stfiwx);
+
+FLOATFUNC(fabs);
+FLOATFUNC(fmr);
+FLOATFUNC(fnabs);
+FLOATFUNC(fneg);
+
+/* Optional */
+FLOATFUNC(fres);
+FLOATFUNC(frsqrte);
+FLOATFUNC(fsel);
+FLOATFUNC(fsqrt);
+FLOATFUNC(fsqrts);
+
+
+#define OP31		0x1f		/*   31 */
+#define LFS		0x30		/*   48 */
+#define LFSU		0x31		/*   49 */
+#define LFD		0x32		/*   50 */
+#define LFDU		0x33		/*   51 */
+#define STFS		0x34		/*   52 */
+#define STFSU		0x35		/*   53 */
+#define STFD		0x36		/*   54 */
+#define STFDU		0x37		/*   55 */
+#define OP59		0x3b		/*   59 */
+#define OP63		0x3f		/*   63 */
+
+/* Opcode 31: */
+/* X-Form: */
+#define LFSX		0x217		/*  535 */
+#define LFSUX		0x237		/*  567 */
+#define LFDX		0x257		/*  599 */
+#define LFDUX		0x277		/*  631 */
+#define STFSX		0x297		/*  663 */
+#define STFSUX		0x2b7		/*  695 */
+#define STFDX		0x2d7		/*  727 */
+#define STFDUX		0x2f7		/*  759 */
+#define STFIWX		0x3d7		/*  983 */
+
+/* Opcode 59: */
+/* A-Form: */
+#define FDIVS		0x012		/*   18 */
+#define FSUBS		0x014		/*   20 */
+#define FADDS		0x015		/*   21 */
+#define FSQRTS		0x016		/*   22 */
+#define FRES		0x018		/*   24 */
+#define FMULS		0x019		/*   25 */
+#define FMSUBS		0x01c		/*   28 */
+#define FMADDS		0x01d		/*   29 */
+#define FNMSUBS		0x01e		/*   30 */
+#define FNMADDS		0x01f		/*   31 */
+
+/* Opcode 63: */
+/* A-Form: */
+#define FDIV		0x012		/*   18 */
+#define FSUB		0x014		/*   20 */
+#define FADD		0x015		/*   21 */
+#define FSQRT		0x016		/*   22 */
+#define FSEL		0x017		/*   23 */
+#define FMUL		0x019		/*   25 */
+#define FRSQRTE		0x01a		/*   26 */
+#define FMSUB		0x01c		/*   28 */
+#define FMADD		0x01d		/*   29 */
+#define FNMSUB		0x01e		/*   30 */
+#define FNMADD		0x01f		/*   31 */
+
+/* X-Form: */
+#define FCMPU		0x000		/*    0	*/
+#define FRSP		0x00c		/*   12 */
+#define FCTIW		0x00e		/*   14 */
+#define FCTIWZ		0x00f		/*   15 */
+#define FCMPO		0x020		/*   32 */
+#define MTFSB1		0x026		/*   38 */
+#define FNEG		0x028		/*   40 */
+#define MCRFS		0x040		/*   64 */
+#define MTFSB0		0x046		/*   70 */
+#define FMR		0x048		/*   72 */
+#define MTFSFI		0x086		/*  134 */
+#define FNABS		0x088		/*  136 */
+#define FABS		0x108		/*  264 */
+#define MFFS		0x247		/*  583 */
+#define MTFSF		0x2c7		/*  711 */
+
+
+#define AB	2
+#define AC	3
+#define ABC	4
+#define D	5
+#define DU	6
+#define X	7
+#define XA	8
+#define XB	9
+#define XCR	11
+#define XCRB	12
+#define XCRI	13
+#define XCRL	16
+#define XE	14
+#define XEU	15
+#define XFLB	10
+
+#ifdef CONFIG_MATH_EMULATION
+static int
+record_exception(struct pt_regs *regs, int eflag)
+{
+	u32 fpscr;
+
+	fpscr = __FPU_FPSCR;
+
+	if (eflag) {
+		fpscr |= FPSCR_FX;
+		if (eflag & EFLAG_OVERFLOW)
+			fpscr |= FPSCR_OX;
+		if (eflag & EFLAG_UNDERFLOW)
+			fpscr |= FPSCR_UX;
+		if (eflag & EFLAG_DIVZERO)
+			fpscr |= FPSCR_ZX;
+		if (eflag & EFLAG_INEXACT)
+			fpscr |= FPSCR_XX;
+		if (eflag & EFLAG_VXSNAN)
+			fpscr |= FPSCR_VXSNAN;
+		if (eflag & EFLAG_VXISI)
+			fpscr |= FPSCR_VXISI;
+		if (eflag & EFLAG_VXIDI)
+			fpscr |= FPSCR_VXIDI;
+		if (eflag & EFLAG_VXZDZ)
+			fpscr |= FPSCR_VXZDZ;
+		if (eflag & EFLAG_VXIMZ)
+			fpscr |= FPSCR_VXIMZ;
+		if (eflag & EFLAG_VXVC)
+			fpscr |= FPSCR_VXVC;
+		if (eflag & EFLAG_VXSOFT)
+			fpscr |= FPSCR_VXSOFT;
+		if (eflag & EFLAG_VXSQRT)
+			fpscr |= FPSCR_VXSQRT;
+		if (eflag & EFLAG_VXCVI)
+			fpscr |= FPSCR_VXCVI;
+	}
+
+	fpscr &= ~(FPSCR_VX);
+	if (fpscr & (FPSCR_VXSNAN | FPSCR_VXISI | FPSCR_VXIDI |
+		     FPSCR_VXZDZ | FPSCR_VXIMZ | FPSCR_VXVC |
+		     FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI))
+		fpscr |= FPSCR_VX;
+
+	fpscr &= ~(FPSCR_FEX);
+	if (((fpscr & FPSCR_VX) && (fpscr & FPSCR_VE)) ||
+	    ((fpscr & FPSCR_OX) && (fpscr & FPSCR_OE)) ||
+	    ((fpscr & FPSCR_UX) && (fpscr & FPSCR_UE)) ||
+	    ((fpscr & FPSCR_ZX) && (fpscr & FPSCR_ZE)) ||
+	    ((fpscr & FPSCR_XX) && (fpscr & FPSCR_XE)))
+		fpscr |= FPSCR_FEX;
+
+	__FPU_FPSCR = fpscr;
+
+	return (fpscr & FPSCR_FEX) ? 1 : 0;
+}
+#endif /* CONFIG_MATH_EMULATION */
+
+int
+do_mathemu(struct pt_regs *regs)
+{
+	void *op0 = 0, *op1 = 0, *op2 = 0, *op3 = 0;
+	unsigned long pc = regs->nip;
+	signed short sdisp;
+	u32 insn = 0;
+	int idx = 0;
+#ifdef CONFIG_MATH_EMULATION
+	int (*func)(void *, void *, void *, void *);
+	int type = 0;
+	int eflag, trap;
+#endif
+
+	if (get_user(insn, (u32 *)pc))
+		return -EFAULT;
+
+#ifndef CONFIG_MATH_EMULATION
+	switch (insn >> 26) {
+	case LFD:
+		idx = (insn >> 16) & 0x1f;
+		sdisp = (insn & 0xffff);
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp);
+		lfd(op0, op1, op2, op3);
+		break;
+	case LFDU:
+		idx = (insn >> 16) & 0x1f;
+		sdisp = (insn & 0xffff);
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp);
+		lfd(op0, op1, op2, op3);
+		regs->gpr[idx] = (unsigned long)op1;
+		break;
+	case STFD:
+		idx = (insn >> 16) & 0x1f;
+		sdisp = (insn & 0xffff);
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp);
+		stfd(op0, op1, op2, op3);
+		break;
+	case STFDU:
+		idx = (insn >> 16) & 0x1f;
+		sdisp = (insn & 0xffff);
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp);
+		stfd(op0, op1, op2, op3);
+		regs->gpr[idx] = (unsigned long)op1;
+		break;
+	case OP63:
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		op1 = (void *)&current->thread.fpr[(insn >> 11) & 0x1f];
+		fmr(op0, op1, op2, op3);
+		break;
+	default:
+		goto illegal;
+	}
+#else /* CONFIG_MATH_EMULATION */
+	switch (insn >> 26) {
+	case LFS:	func = lfs;	type = D;	break;
+	case LFSU:	func = lfs;	type = DU;	break;
+	case LFD:	func = lfd;	type = D;	break;
+	case LFDU:	func = lfd;	type = DU;	break;
+	case STFS:	func = stfs;	type = D;	break;
+	case STFSU:	func = stfs;	type = DU;	break;
+	case STFD:	func = stfd;	type = D;	break;
+	case STFDU:	func = stfd;	type = DU;	break;
+
+	case OP31:
+		switch ((insn >> 1) & 0x3ff) {
+		case LFSX:	func = lfs;	type = XE;	break;
+		case LFSUX:	func = lfs;	type = XEU;	break;
+		case LFDX:	func = lfd;	type = XE;	break;
+		case LFDUX:	func = lfd;	type = XEU;	break;
+		case STFSX:	func = stfs;	type = XE;	break;
+		case STFSUX:	func = stfs;	type = XEU;	break;
+		case STFDX:	func = stfd;	type = XE;	break;
+		case STFDUX:	func = stfd;	type = XEU;	break;
+		case STFIWX:	func = stfiwx;	type = XE;	break;
+		default:
+			goto illegal;
+		}
+		break;
+
+	case OP59:
+		switch ((insn >> 1) & 0x1f) {
+		case FDIVS:	func = fdivs;	type = AB;	break;
+		case FSUBS:	func = fsubs;	type = AB;	break;
+		case FADDS:	func = fadds;	type = AB;	break;
+		case FSQRTS:	func = fsqrts;	type = AB;	break;
+		case FRES:	func = fres;	type = AB;	break;
+		case FMULS:	func = fmuls;	type = AC;	break;
+		case FMSUBS:	func = fmsubs;	type = ABC;	break;
+		case FMADDS:	func = fmadds;	type = ABC;	break;
+		case FNMSUBS:	func = fnmsubs;	type = ABC;	break;
+		case FNMADDS:	func = fnmadds;	type = ABC;	break;
+		default:
+			goto illegal;
+		}
+		break;
+
+	case OP63:
+		if (insn & 0x20) {
+			switch ((insn >> 1) & 0x1f) {
+			case FDIV:	func = fdiv;	type = AB;	break;
+			case FSUB:	func = fsub;	type = AB;	break;
+			case FADD:	func = fadd;	type = AB;	break;
+			case FSQRT:	func = fsqrt;	type = AB;	break;
+			case FSEL:	func = fsel;	type = ABC;	break;
+			case FMUL:	func = fmul;	type = AC;	break;
+			case FRSQRTE:	func = frsqrte;	type = AB;	break;
+			case FMSUB:	func = fmsub;	type = ABC;	break;
+			case FMADD:	func = fmadd;	type = ABC;	break;
+			case FNMSUB:	func = fnmsub;	type = ABC;	break;
+			case FNMADD:	func = fnmadd;	type = ABC;	break;
+			default:
+				goto illegal;
+			}
+			break;
+		}
+
+		switch ((insn >> 1) & 0x3ff) {
+		case FCMPU:	func = fcmpu;	type = XCR;	break;
+		case FRSP:	func = frsp;	type = XB;	break;
+		case FCTIW:	func = fctiw;	type = XB;	break;
+		case FCTIWZ:	func = fctiwz;	type = XB;	break;
+		case FCMPO:	func = fcmpo;	type = XCR;	break;
+		case MTFSB1:	func = mtfsb1;	type = XCRB;	break;
+		case FNEG:	func = fneg;	type = XB;	break;
+		case MCRFS:	func = mcrfs;	type = XCRL;	break;
+		case MTFSB0:	func = mtfsb0;	type = XCRB;	break;
+		case FMR:	func = fmr;	type = XB;	break;
+		case MTFSFI:	func = mtfsfi;	type = XCRI;	break;
+		case FNABS:	func = fnabs;	type = XB;	break;
+		case FABS:	func = fabs;	type = XB;	break;
+		case MFFS:	func = mffs;	type = X;	break;
+		case MTFSF:	func = mtfsf;	type = XFLB;	break;
+		default:
+			goto illegal;
+		}
+		break;
+
+	default:
+		goto illegal;
+	}
+
+	switch (type) {
+	case AB:
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		op1 = (void *)&current->thread.fpr[(insn >> 16) & 0x1f];
+		op2 = (void *)&current->thread.fpr[(insn >> 11) & 0x1f];
+		break;
+
+	case AC:
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		op1 = (void *)&current->thread.fpr[(insn >> 16) & 0x1f];
+		op2 = (void *)&current->thread.fpr[(insn >>  6) & 0x1f];
+		break;
+
+	case ABC:
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		op1 = (void *)&current->thread.fpr[(insn >> 16) & 0x1f];
+		op2 = (void *)&current->thread.fpr[(insn >> 11) & 0x1f];
+		op3 = (void *)&current->thread.fpr[(insn >>  6) & 0x1f];
+		break;
+
+	case D:
+		idx = (insn >> 16) & 0x1f;
+		sdisp = (insn & 0xffff);
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp);
+		break;
+
+	case DU:
+		idx = (insn >> 16) & 0x1f;
+		if (!idx)
+			goto illegal;
+
+		sdisp = (insn & 0xffff);
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		op1 = (void *)(regs->gpr[idx] + sdisp);
+		break;
+
+	case X:
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		break;
+
+	case XA:
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		op1 = (void *)&current->thread.fpr[(insn >> 16) & 0x1f];
+		break;
+
+	case XB:
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		op1 = (void *)&current->thread.fpr[(insn >> 11) & 0x1f];
+		break;
+
+	case XE:
+		idx = (insn >> 16) & 0x1f;
+		if (!idx)
+			goto illegal;
+
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		op1 = (void *)(regs->gpr[idx] + regs->gpr[(insn >> 11) & 0x1f]);
+		break;
+
+	case XEU:
+		idx = (insn >> 16) & 0x1f;
+		op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+		op1 = (void *)((idx ? regs->gpr[idx] : 0)
+				+ regs->gpr[(insn >> 11) & 0x1f]);
+		break;
+
+	case XCR:
+		op0 = (void *)&regs->ccr;
+		op1 = (void *)((insn >> 23) & 0x7);
+		op2 = (void *)&current->thread.fpr[(insn >> 16) & 0x1f];
+		op3 = (void *)&current->thread.fpr[(insn >> 11) & 0x1f];
+		break;
+
+	case XCRL:
+		op0 = (void *)&regs->ccr;
+		op1 = (void *)((insn >> 23) & 0x7);
+		op2 = (void *)((insn >> 18) & 0x7);
+		break;
+
+	case XCRB:
+		op0 = (void *)((insn >> 21) & 0x1f);
+		break;
+
+	case XCRI:
+		op0 = (void *)((insn >> 23) & 0x7);
+		op1 = (void *)((insn >> 12) & 0xf);
+		break;
+
+	case XFLB:
+		op0 = (void *)((insn >> 17) & 0xff);
+		op1 = (void *)&current->thread.fpr[(insn >> 11) & 0x1f];
+		break;
+
+	default:
+		goto illegal;
+	}
+
+	eflag = func(op0, op1, op2, op3);
+
+	if (insn & 1) {
+		regs->ccr &= ~(0x0f000000);
+		regs->ccr |= (__FPU_FPSCR >> 4) & 0x0f000000;
+	}
+
+	trap = record_exception(regs, eflag);
+	if (trap)
+		return 1;
+
+	switch (type) {
+	case DU:
+	case XEU:
+		regs->gpr[idx] = (unsigned long)op1;
+		break;
+
+	default:
+		break;
+	}
+#endif /* CONFIG_MATH_EMULATION */
+
+	regs->nip += 4;
+	return 0;
+
+illegal:
+	return -ENOSYS;
+}
diff --git a/arch/powerpc/math-emu/mcrfs.c b/arch/powerpc/math-emu/mcrfs.c
new file mode 100644
index 00000000000..106dd912914
--- /dev/null
+++ b/arch/powerpc/math-emu/mcrfs.c
@@ -0,0 +1,31 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+
+int
+mcrfs(u32 *ccr, u32 crfD, u32 crfS)
+{
+	u32 value, clear;
+
+#ifdef DEBUG
+	printk("%s: %p (%08x) %d %d\n", __FUNCTION__, ccr, *ccr, crfD, crfS);
+#endif
+
+	clear = 15 << ((7 - crfS) << 2);
+	if (!crfS)
+		clear = 0x90000000;
+
+	value = (__FPU_FPSCR >> ((7 - crfS) << 2)) & 15;
+	__FPU_FPSCR &= ~(clear);
+
+	*ccr &= ~(15 << ((7 - crfD) << 2));
+	*ccr |= (value << ((7 - crfD) << 2));
+
+#ifdef DEBUG
+	printk("CR: %08x\n", __FUNCTION__, *ccr);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/mffs.c b/arch/powerpc/math-emu/mffs.c
new file mode 100644
index 00000000000..f477c9170e7
--- /dev/null
+++ b/arch/powerpc/math-emu/mffs.c
@@ -0,0 +1,17 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+
+int
+mffs(u32 *frD)
+{
+	frD[1] = __FPU_FPSCR;
+
+#ifdef DEBUG
+	printk("%s: frD %p: %08x.%08x\n", __FUNCTION__, frD, frD[0], frD[1]);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/mtfsb0.c b/arch/powerpc/math-emu/mtfsb0.c
new file mode 100644
index 00000000000..99bfd80f4af
--- /dev/null
+++ b/arch/powerpc/math-emu/mtfsb0.c
@@ -0,0 +1,18 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+
+int
+mtfsb0(int crbD)
+{
+	if ((crbD != 1) && (crbD != 2))
+		__FPU_FPSCR &= ~(1 << (31 - crbD));
+
+#ifdef DEBUG
+	printk("%s: %d %08lx\n", __FUNCTION__, crbD, __FPU_FPSCR);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/mtfsb1.c b/arch/powerpc/math-emu/mtfsb1.c
new file mode 100644
index 00000000000..3d9e7ed92d2
--- /dev/null
+++ b/arch/powerpc/math-emu/mtfsb1.c
@@ -0,0 +1,18 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+
+int
+mtfsb1(int crbD)
+{
+	if ((crbD != 1) && (crbD != 2))
+		__FPU_FPSCR |= (1 << (31 - crbD));
+
+#ifdef DEBUG
+	printk("%s: %d %08lx\n", __FUNCTION__, crbD, __FPU_FPSCR);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/mtfsf.c b/arch/powerpc/math-emu/mtfsf.c
new file mode 100644
index 00000000000..d70cf714994
--- /dev/null
+++ b/arch/powerpc/math-emu/mtfsf.c
@@ -0,0 +1,45 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+
+int
+mtfsf(unsigned int FM, u32 *frB)
+{
+	u32 mask;
+
+	if (FM == 0)
+		return 0;
+
+	if (FM == 0xff)
+		mask = 0x9fffffff;
+	else {
+		mask = 0;
+		if (FM & (1 << 0))
+			mask |= 0x90000000;
+		if (FM & (1 << 1))
+			mask |= 0x0f000000;
+		if (FM & (1 << 2))
+			mask |= 0x00f00000;
+		if (FM & (1 << 3))
+			mask |= 0x000f0000;
+		if (FM & (1 << 4))
+			mask |= 0x0000f000;
+		if (FM & (1 << 5))
+			mask |= 0x00000f00;
+		if (FM & (1 << 6))
+			mask |= 0x000000f0;
+		if (FM & (1 << 7))
+			mask |= 0x0000000f;
+	}
+
+	__FPU_FPSCR &= ~(mask);
+	__FPU_FPSCR |= (frB[1] & mask);
+
+#ifdef DEBUG
+	printk("%s: %02x %p: %08lx\n", __FUNCTION__, FM, frB, __FPU_FPSCR);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/mtfsfi.c b/arch/powerpc/math-emu/mtfsfi.c
new file mode 100644
index 00000000000..71df854baa7
--- /dev/null
+++ b/arch/powerpc/math-emu/mtfsfi.c
@@ -0,0 +1,23 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+
+int
+mtfsfi(unsigned int crfD, unsigned int IMM)
+{
+	u32 mask = 0xf;
+
+	if (!crfD)
+		mask = 9;
+
+	__FPU_FPSCR &= ~(mask << ((7 - crfD) << 2));
+	__FPU_FPSCR |= (IMM & 0xf) << ((7 - crfD) << 2);
+
+#ifdef DEBUG
+	printk("%s: %d %x: %08lx\n", __FUNCTION__, crfD, IMM, __FPU_FPSCR);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/op-1.h b/arch/powerpc/math-emu/op-1.h
new file mode 100644
index 00000000000..c92fa95f562
--- /dev/null
+++ b/arch/powerpc/math-emu/op-1.h
@@ -0,0 +1,245 @@
+/*
+ * Basic one-word fraction declaration and manipulation.
+ */
+
+#define _FP_FRAC_DECL_1(X)	_FP_W_TYPE X##_f
+#define _FP_FRAC_COPY_1(D,S)	(D##_f = S##_f)
+#define _FP_FRAC_SET_1(X,I)	(X##_f = I)
+#define _FP_FRAC_HIGH_1(X)	(X##_f)
+#define _FP_FRAC_LOW_1(X)	(X##_f)
+#define _FP_FRAC_WORD_1(X,w)	(X##_f)
+
+#define _FP_FRAC_ADDI_1(X,I)	(X##_f += I)
+#define _FP_FRAC_SLL_1(X,N)			\
+  do {						\
+    if (__builtin_constant_p(N) && (N) == 1)	\
+      X##_f += X##_f;				\
+    else					\
+      X##_f <<= (N);				\
+  } while (0)
+#define _FP_FRAC_SRL_1(X,N)	(X##_f >>= N)
+
+/* Right shift with sticky-lsb.  */
+#define _FP_FRAC_SRS_1(X,N,sz)	__FP_FRAC_SRS_1(X##_f, N, sz)
+
+#define __FP_FRAC_SRS_1(X,N,sz)						\
+   (X = (X >> (N) | (__builtin_constant_p(N) && (N) == 1		\
+		     ? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0)))
+
+#define _FP_FRAC_ADD_1(R,X,Y)	(R##_f = X##_f + Y##_f)
+#define _FP_FRAC_SUB_1(R,X,Y)	(R##_f = X##_f - Y##_f)
+#define _FP_FRAC_CLZ_1(z, X)	__FP_CLZ(z, X##_f)
+
+/* Predicates */
+#define _FP_FRAC_NEGP_1(X)	((_FP_WS_TYPE)X##_f < 0)
+#define _FP_FRAC_ZEROP_1(X)	(X##_f == 0)
+#define _FP_FRAC_OVERP_1(fs,X)	(X##_f & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_EQ_1(X, Y)	(X##_f == Y##_f)
+#define _FP_FRAC_GE_1(X, Y)	(X##_f >= Y##_f)
+#define _FP_FRAC_GT_1(X, Y)	(X##_f > Y##_f)
+
+#define _FP_ZEROFRAC_1		0
+#define _FP_MINFRAC_1		1
+
+/*
+ * Unpack the raw bits of a native fp value.  Do not classify or
+ * normalize the data.
+ */
+
+#define _FP_UNPACK_RAW_1(fs, X, val)				\
+  do {								\
+    union _FP_UNION_##fs _flo; _flo.flt = (val);		\
+								\
+    X##_f = _flo.bits.frac;					\
+    X##_e = _flo.bits.exp;					\
+    X##_s = _flo.bits.sign;					\
+  } while (0)
+
+
+/*
+ * Repack the raw bits of a native fp value.
+ */
+
+#define _FP_PACK_RAW_1(fs, val, X)				\
+  do {								\
+    union _FP_UNION_##fs _flo;					\
+								\
+    _flo.bits.frac = X##_f;					\
+    _flo.bits.exp  = X##_e;					\
+    _flo.bits.sign = X##_s;					\
+								\
+    (val) = _flo.flt;						\
+  } while (0)
+
+
+/*
+ * Multiplication algorithms:
+ */
+
+/* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
+   multiplication immediately.  */
+
+#define _FP_MUL_MEAT_1_imm(fs, R, X, Y)					\
+  do {									\
+    R##_f = X##_f * Y##_f;						\
+    /* Normalize since we know where the msb of the multiplicands	\
+       were (bit B), we know that the msb of the of the product is	\
+       at either 2B or 2B-1.  */					\
+    _FP_FRAC_SRS_1(R, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs);	\
+  } while (0)
+
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
+
+#define _FP_MUL_MEAT_1_wide(fs, R, X, Y, doit)				\
+  do {									\
+    _FP_W_TYPE _Z_f0, _Z_f1;						\
+    doit(_Z_f1, _Z_f0, X##_f, Y##_f);					\
+    /* Normalize since we know where the msb of the multiplicands	\
+       were (bit B), we know that the msb of the of the product is	\
+       at either 2B or 2B-1.  */					\
+    _FP_FRAC_SRS_2(_Z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs);	\
+    R##_f = _Z_f0;							\
+  } while (0)
+
+/* Finally, a simple widening multiply algorithm.  What fun!  */
+
+#define _FP_MUL_MEAT_1_hard(fs, R, X, Y)				\
+  do {									\
+    _FP_W_TYPE _xh, _xl, _yh, _yl, _z_f0, _z_f1, _a_f0, _a_f1;		\
+									\
+    /* split the words in half */					\
+    _xh = X##_f >> (_FP_W_TYPE_SIZE/2);					\
+    _xl = X##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1);		\
+    _yh = Y##_f >> (_FP_W_TYPE_SIZE/2);					\
+    _yl = Y##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1);		\
+									\
+    /* multiply the pieces */						\
+    _z_f0 = _xl * _yl;							\
+    _a_f0 = _xh * _yl;							\
+    _a_f1 = _xl * _yh;							\
+    _z_f1 = _xh * _yh;							\
+									\
+    /* reassemble into two full words */				\
+    if ((_a_f0 += _a_f1) < _a_f1)					\
+      _z_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2);			\
+    _a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2);				\
+    _a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2);				\
+    _FP_FRAC_ADD_2(_z, _z, _a);						\
+									\
+    /* normalize */							\
+    _FP_FRAC_SRS_2(_z, _FP_WFRACBITS_##fs - 1, 2*_FP_WFRACBITS_##fs);	\
+    R##_f = _z_f0;							\
+  } while (0)
+
+
+/*
+ * Division algorithms:
+ */
+
+/* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
+   division immediately.  Give this macro either _FP_DIV_HELP_imm for
+   C primitives or _FP_DIV_HELP_ldiv for the ISO function.  Which you
+   choose will depend on what the compiler does with divrem4.  */
+
+#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit)		\
+  do {							\
+    _FP_W_TYPE _q, _r;					\
+    X##_f <<= (X##_f < Y##_f				\
+	       ? R##_e--, _FP_WFRACBITS_##fs		\
+	       : _FP_WFRACBITS_##fs - 1);		\
+    doit(_q, _r, X##_f, Y##_f);				\
+    R##_f = _q | (_r != 0);				\
+  } while (0)
+
+/* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd
+   that may be useful in this situation.  This first is for a primitive
+   that requires normalization, the second for one that does not.  Look
+   for UDIV_NEEDS_NORMALIZATION to tell which your machine needs.  */
+
+#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y)				\
+  do {									\
+    _FP_W_TYPE _nh, _nl, _q, _r;					\
+									\
+    /* Normalize Y -- i.e. make the most significant bit set.  */	\
+    Y##_f <<= _FP_WFRACXBITS_##fs - 1;					\
+									\
+    /* Shift X op correspondingly high, that is, up one full word.  */	\
+    if (X##_f <= Y##_f)							\
+      {									\
+	_nl = 0;							\
+	_nh = X##_f;							\
+      }									\
+    else								\
+      {									\
+	R##_e++;							\
+	_nl = X##_f << (_FP_W_TYPE_SIZE-1);				\
+	_nh = X##_f >> 1;						\
+      }									\
+    									\
+    udiv_qrnnd(_q, _r, _nh, _nl, Y##_f);				\
+    R##_f = _q | (_r != 0);						\
+  } while (0)
+
+#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y)		\
+  do {							\
+    _FP_W_TYPE _nh, _nl, _q, _r;			\
+    if (X##_f < Y##_f)					\
+      {							\
+	R##_e--;					\
+	_nl = X##_f << _FP_WFRACBITS_##fs;		\
+	_nh = X##_f >> _FP_WFRACXBITS_##fs;		\
+      }							\
+    else						\
+      {							\
+	_nl = X##_f << (_FP_WFRACBITS_##fs - 1);	\
+	_nh = X##_f >> (_FP_WFRACXBITS_##fs + 1);	\
+      }							\
+    udiv_qrnnd(_q, _r, _nh, _nl, Y##_f);		\
+    R##_f = _q | (_r != 0);				\
+  } while (0)
+
+
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ */
+
+#define _FP_SQRT_MEAT_1(R, S, T, X, q)			\
+  do {							\
+    while (q)						\
+      {							\
+        T##_f = S##_f + q;				\
+        if (T##_f <= X##_f)				\
+          {						\
+            S##_f = T##_f + q;				\
+            X##_f -= T##_f;				\
+            R##_f += q;					\
+          }						\
+        _FP_FRAC_SLL_1(X, 1);				\
+        q >>= 1;					\
+      }							\
+  } while (0)
+
+/*
+ * Assembly/disassembly for converting to/from integral types.
+ * No shifting or overflow handled here.
+ */
+
+#define _FP_FRAC_ASSEMBLE_1(r, X, rsize)	(r = X##_f)
+#define _FP_FRAC_DISASSEMBLE_1(X, r, rsize)	(X##_f = r)
+
+
+/*
+ * Convert FP values between word sizes
+ */
+
+#define _FP_FRAC_CONV_1_1(dfs, sfs, D, S)				\
+  do {									\
+    D##_f = S##_f;							\
+    if (_FP_WFRACBITS_##sfs > _FP_WFRACBITS_##dfs)			\
+      _FP_FRAC_SRS_1(D, (_FP_WFRACBITS_##sfs-_FP_WFRACBITS_##dfs),	\
+		     _FP_WFRACBITS_##sfs);				\
+    else								\
+      D##_f <<= _FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs;		\
+  } while (0)
diff --git a/arch/powerpc/math-emu/op-2.h b/arch/powerpc/math-emu/op-2.h
new file mode 100644
index 00000000000..b9b06b4c6ea
--- /dev/null
+++ b/arch/powerpc/math-emu/op-2.h
@@ -0,0 +1,433 @@
+/*
+ * Basic two-word fraction declaration and manipulation.
+ */
+
+#define _FP_FRAC_DECL_2(X)	_FP_W_TYPE X##_f0, X##_f1
+#define _FP_FRAC_COPY_2(D,S)	(D##_f0 = S##_f0, D##_f1 = S##_f1)
+#define _FP_FRAC_SET_2(X,I)	__FP_FRAC_SET_2(X, I)
+#define _FP_FRAC_HIGH_2(X)	(X##_f1)
+#define _FP_FRAC_LOW_2(X)	(X##_f0)
+#define _FP_FRAC_WORD_2(X,w)	(X##_f##w)
+
+#define _FP_FRAC_SLL_2(X,N)						\
+  do {									\
+    if ((N) < _FP_W_TYPE_SIZE)						\
+      {									\
+        if (__builtin_constant_p(N) && (N) == 1) 			\
+          {								\
+            X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);	\
+            X##_f0 += X##_f0;						\
+          }								\
+        else								\
+          {								\
+	    X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N));	\
+	    X##_f0 <<= (N);						\
+	  }								\
+      }									\
+    else								\
+      {									\
+	X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);			\
+	X##_f0 = 0;							\
+      }									\
+  } while (0)
+
+#define _FP_FRAC_SRL_2(X,N)						\
+  do {									\
+    if ((N) < _FP_W_TYPE_SIZE)						\
+      {									\
+	X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));	\
+	X##_f1 >>= (N);							\
+      }									\
+    else								\
+      {									\
+	X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);			\
+	X##_f1 = 0;							\
+      }									\
+  } while (0)
+
+/* Right shift with sticky-lsb.  */
+#define _FP_FRAC_SRS_2(X,N,sz)						\
+  do {									\
+    if ((N) < _FP_W_TYPE_SIZE)						\
+      {									\
+	X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) |	\
+		  (__builtin_constant_p(N) && (N) == 1			\
+		   ? X##_f0 & 1						\
+		   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));	\
+	X##_f1 >>= (N);							\
+      }									\
+    else								\
+      {									\
+	X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |			\
+	          (((X##_f1 << (sz - (N))) | X##_f0) != 0));		\
+	X##_f1 = 0;							\
+      }									\
+  } while (0)
+
+#define _FP_FRAC_ADDI_2(X,I) \
+  __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
+
+#define _FP_FRAC_ADD_2(R,X,Y) \
+  __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
+
+#define _FP_FRAC_SUB_2(R,X,Y) \
+  __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
+
+#define _FP_FRAC_CLZ_2(R,X)	\
+  do {				\
+    if (X##_f1)			\
+      __FP_CLZ(R,X##_f1);	\
+    else 			\
+    {				\
+      __FP_CLZ(R,X##_f0);	\
+      R += _FP_W_TYPE_SIZE;	\
+    }				\
+  } while(0)
+
+/* Predicates */
+#define _FP_FRAC_NEGP_2(X)	((_FP_WS_TYPE)X##_f1 < 0)
+#define _FP_FRAC_ZEROP_2(X)	((X##_f1 | X##_f0) == 0)
+#define _FP_FRAC_OVERP_2(fs,X)	(X##_f1 & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_EQ_2(X, Y)	(X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
+#define _FP_FRAC_GT_2(X, Y)	\
+  ((X##_f1 > Y##_f1) || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
+#define _FP_FRAC_GE_2(X, Y)	\
+  ((X##_f1 > Y##_f1) || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
+
+#define _FP_ZEROFRAC_2		0, 0
+#define _FP_MINFRAC_2		0, 1
+
+/*
+ * Internals
+ */
+
+#define __FP_FRAC_SET_2(X,I1,I0)	(X##_f0 = I0, X##_f1 = I1)
+
+#define __FP_CLZ_2(R, xh, xl)	\
+  do {				\
+    if (xh)			\
+      __FP_CLZ(R,xl);		\
+    else 			\
+    {				\
+      __FP_CLZ(R,xl);		\
+      R += _FP_W_TYPE_SIZE;	\
+    }				\
+  } while(0)
+
+#if 0
+
+#ifndef __FP_FRAC_ADDI_2
+#define __FP_FRAC_ADDI_2(xh, xl, i) \
+  (xh += ((xl += i) < i))
+#endif
+#ifndef __FP_FRAC_ADD_2
+#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
+  (rh = xh + yh + ((rl = xl + yl) < xl))
+#endif
+#ifndef __FP_FRAC_SUB_2
+#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
+  (rh = xh - yh - ((rl = xl - yl) > xl))
+#endif
+
+#else
+
+#undef __FP_FRAC_ADDI_2
+#define __FP_FRAC_ADDI_2(xh, xl, i)	add_ssaaaa(xh, xl, xh, xl, 0, i)
+#undef __FP_FRAC_ADD_2
+#define __FP_FRAC_ADD_2			add_ssaaaa
+#undef __FP_FRAC_SUB_2
+#define __FP_FRAC_SUB_2			sub_ddmmss
+
+#endif
+
+/*
+ * Unpack the raw bits of a native fp value.  Do not classify or
+ * normalize the data.
+ */
+
+#define _FP_UNPACK_RAW_2(fs, X, val)			\
+  do {							\
+    union _FP_UNION_##fs _flo; _flo.flt = (val);	\
+							\
+    X##_f0 = _flo.bits.frac0;				\
+    X##_f1 = _flo.bits.frac1;				\
+    X##_e  = _flo.bits.exp;				\
+    X##_s  = _flo.bits.sign;				\
+  } while (0)
+
+
+/*
+ * Repack the raw bits of a native fp value.
+ */
+
+#define _FP_PACK_RAW_2(fs, val, X)			\
+  do {							\
+    union _FP_UNION_##fs _flo;				\
+							\
+    _flo.bits.frac0 = X##_f0;				\
+    _flo.bits.frac1 = X##_f1;				\
+    _flo.bits.exp   = X##_e;				\
+    _flo.bits.sign  = X##_s;				\
+							\
+    (val) = _flo.flt;					\
+  } while (0)
+
+
+/*
+ * Multiplication algorithms:
+ */
+
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
+
+#define _FP_MUL_MEAT_2_wide(fs, R, X, Y, doit)				\
+  do {									\
+    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	\
+									\
+    doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
+    doit(_b_f1, _b_f0, X##_f0, Y##_f1);					\
+    doit(_c_f1, _c_f0, X##_f1, Y##_f0);					\
+    doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
+									\
+    __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0),	\
+		    0, _b_f1, _b_f0, 0,					\
+		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0));	\
+    __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0),	\
+		    0, _c_f1, _c_f0, 0,					\
+		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0));	\
+									\
+    /* Normalize since we know where the msb of the multiplicands	\
+       were (bit B), we know that the msb of the of the product is	\
+       at either 2B or 2B-1.  */					\
+    _FP_FRAC_SRS_4(_z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs);	\
+    R##_f0 = _FP_FRAC_WORD_4(_z,0);					\
+    R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
+  } while (0)
+
+/* This next macro appears to be totally broken. Fortunately nowhere
+ * seems to use it :-> The problem is that we define _z[4] but
+ * then use it in _FP_FRAC_SRS_4, which will attempt to access
+ * _z_f[n] which will cause an error. The fix probably involves
+ * declaring it with _FP_FRAC_DECL_4, see previous macro. -- PMM 02/1998
+ */
+#define _FP_MUL_MEAT_2_gmp(fs, R, X, Y)					\
+  do {									\
+    _FP_W_TYPE _x[2], _y[2], _z[4];					\
+    _x[0] = X##_f0; _x[1] = X##_f1;					\
+    _y[0] = Y##_f0; _y[1] = Y##_f1;					\
+									\
+    mpn_mul_n(_z, _x, _y, 2);						\
+									\
+    /* Normalize since we know where the msb of the multiplicands	\
+       were (bit B), we know that the msb of the of the product is	\
+       at either 2B or 2B-1.  */					\
+    _FP_FRAC_SRS_4(_z, _FP_WFRACBITS##_fs-1, 2*_FP_WFRACBITS_##fs);	\
+    R##_f0 = _z[0];							\
+    R##_f1 = _z[1];							\
+  } while (0)
+
+
+/*
+ * Division algorithms:
+ * This seems to be giving me difficulties -- PMM
+ * Look, NetBSD seems to be able to comment algorithms. Can't you?
+ * I've thrown printks at the problem.
+ * This now appears to work, but I still don't really know why.
+ * Also, I don't think the result is properly normalised...
+ */
+
+#define _FP_DIV_MEAT_2_udiv_64(fs, R, X, Y)				\
+  do {									\
+    extern void _fp_udivmodti4(_FP_W_TYPE q[2], _FP_W_TYPE r[2],	\
+			       _FP_W_TYPE n1, _FP_W_TYPE n0,		\
+			       _FP_W_TYPE d1, _FP_W_TYPE d0);		\
+    _FP_W_TYPE _n_f3, _n_f2, _n_f1, _n_f0, _r_f1, _r_f0;		\
+    _FP_W_TYPE _q_f1, _q_f0, _m_f1, _m_f0;				\
+    _FP_W_TYPE _rmem[2], _qmem[2];					\
+    /* I think this check is to ensure that the result is normalised.   \
+     * Assuming X,Y normalised (ie in [1.0,2.0)) X/Y will be in         \
+     * [0.5,2.0). Furthermore, it will be less than 1.0 iff X < Y.      \
+     * In this case we tweak things. (this is based on comments in      \
+     * the NetBSD FPU emulation code. )                                 \
+     * We know X,Y are normalised because we ensure this as part of     \
+     * the unpacking process. -- PMM                                    \
+     */									\
+    if (_FP_FRAC_GT_2(X, Y))						\
+      {									\
+/*	R##_e++; */							\
+	_n_f3 = X##_f1 >> 1;						\
+	_n_f2 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;		\
+	_n_f1 = X##_f0 << (_FP_W_TYPE_SIZE - 1);			\
+	_n_f0 = 0;							\
+      }									\
+    else								\
+      {									\
+	R##_e--;							\
+	_n_f3 = X##_f1;							\
+	_n_f2 = X##_f0;							\
+	_n_f1 = _n_f0 = 0;						\
+      }									\
+									\
+    /* Normalize, i.e. make the most significant bit of the 		\
+       denominator set.  CHANGED: - 1 to nothing -- PMM */		\
+    _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs /* -1 */);			\
+									\
+    /* Do the 256/128 bit division given the 128-bit _fp_udivmodtf4 	\
+       primitive snagged from libgcc2.c.  */				\
+									\
+    _fp_udivmodti4(_qmem, _rmem, _n_f3, _n_f2, 0, Y##_f1);		\
+    _q_f1 = _qmem[0];							\
+    umul_ppmm(_m_f1, _m_f0, _q_f1, Y##_f0);				\
+    _r_f1 = _rmem[0];							\
+    _r_f0 = _n_f1;							\
+    if (_FP_FRAC_GT_2(_m, _r))						\
+      {									\
+	_q_f1--;							\
+	_FP_FRAC_ADD_2(_r, _r, Y);					\
+	if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
+	  {								\
+	    _q_f1--;							\
+	    _FP_FRAC_ADD_2(_r, _r, Y);					\
+	  }								\
+      }									\
+    _FP_FRAC_SUB_2(_r, _r, _m);						\
+									\
+    _fp_udivmodti4(_qmem, _rmem, _r_f1, _r_f0, 0, Y##_f1);		\
+    _q_f0 = _qmem[0];							\
+    umul_ppmm(_m_f1, _m_f0, _q_f0, Y##_f0);				\
+    _r_f1 = _rmem[0];							\
+    _r_f0 = _n_f0;							\
+    if (_FP_FRAC_GT_2(_m, _r))						\
+      {									\
+	_q_f0--;							\
+	_FP_FRAC_ADD_2(_r, _r, Y);					\
+	if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
+	  {								\
+	    _q_f0--;							\
+	    _FP_FRAC_ADD_2(_r, _r, Y);					\
+	  }								\
+      }									\
+    _FP_FRAC_SUB_2(_r, _r, _m);						\
+									\
+    R##_f1 = _q_f1;							\
+    R##_f0 = _q_f0 | ((_r_f1 | _r_f0) != 0);				\
+    /* adjust so answer is normalized again. I'm not sure what the 	\
+     * final sz param should be. In practice it's never used since      \
+     * N is 1 which is always going to be < _FP_W_TYPE_SIZE...		\
+     */									\
+    /* _FP_FRAC_SRS_2(R,1,_FP_WFRACBITS_##fs);	*/			\
+  } while (0)
+
+
+#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)					\
+  do {									\
+    _FP_W_TYPE _x[4], _y[2], _z[4];					\
+    _y[0] = Y##_f0; _y[1] = Y##_f1;					\
+    _x[0] = _x[3] = 0;							\
+    if (_FP_FRAC_GT_2(X, Y))						\
+      {									\
+	R##_e++;							\
+	_x[1] = (X##_f0 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE) |	\
+		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
+			    (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE)));	\
+	_x[2] = X##_f1 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE);		\
+      }									\
+    else								\
+      {									\
+	_x[1] = (X##_f0 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE) |		\
+		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
+			    (_FP_WFRACBITS - _FP_W_TYPE_SIZE)));	\
+	_x[2] = X##_f1 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE);		\
+      }									\
+									\
+    (void) mpn_divrem (_z, 0, _x, 4, _y, 2);				\
+    R##_f1 = _z[1];							\
+    R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);				\
+  } while (0)
+
+
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ */
+
+#define _FP_SQRT_MEAT_2(R, S, T, X, q)			\
+  do {							\
+    while (q)						\
+      {							\
+        T##_f1 = S##_f1 + q;				\
+        if (T##_f1 <= X##_f1)				\
+          {						\
+            S##_f1 = T##_f1 + q;			\
+            X##_f1 -= T##_f1;				\
+            R##_f1 += q;				\
+          }						\
+        _FP_FRAC_SLL_2(X, 1);				\
+        q >>= 1;					\
+      }							\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
+    while (q)						\
+      {							\
+        T##_f0 = S##_f0 + q;				\
+        T##_f1 = S##_f1;				\
+        if (T##_f1 < X##_f1 || 				\
+            (T##_f1 == X##_f1 && T##_f0 < X##_f0))	\
+          {						\
+            S##_f0 = T##_f0 + q;			\
+            if (((_FP_WS_TYPE)T##_f0) < 0 &&		\
+                ((_FP_WS_TYPE)S##_f0) >= 0)		\
+              S##_f1++;					\
+            _FP_FRAC_SUB_2(X, X, T);			\
+            R##_f0 += q;				\
+          }						\
+        _FP_FRAC_SLL_2(X, 1);				\
+        q >>= 1;					\
+      }							\
+  } while (0)
+
+
+/*
+ * Assembly/disassembly for converting to/from integral types.
+ * No shifting or overflow handled here.
+ */
+
+#define _FP_FRAC_ASSEMBLE_2(r, X, rsize)	\
+  do {						\
+    if (rsize <= _FP_W_TYPE_SIZE)		\
+      r = X##_f0;				\
+    else					\
+      {						\
+	r = X##_f1;				\
+	r <<= _FP_W_TYPE_SIZE;			\
+	r += X##_f0;				\
+      }						\
+  } while (0)
+
+#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)				\
+  do {									\
+    X##_f0 = r;								\
+    X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);	\
+  } while (0)
+
+/*
+ * Convert FP values between word sizes
+ */
+
+#define _FP_FRAC_CONV_1_2(dfs, sfs, D, S)				\
+  do {									\
+    _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),	\
+		   _FP_WFRACBITS_##sfs);				\
+    D##_f = S##_f0;							\
+  } while (0)
+
+#define _FP_FRAC_CONV_2_1(dfs, sfs, D, S)				\
+  do {									\
+    D##_f0 = S##_f;							\
+    D##_f1 = 0;								\
+    _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));	\
+  } while (0)
+
diff --git a/arch/powerpc/math-emu/op-4.h b/arch/powerpc/math-emu/op-4.h
new file mode 100644
index 00000000000..fcdd6d064c5
--- /dev/null
+++ b/arch/powerpc/math-emu/op-4.h
@@ -0,0 +1,297 @@
+/*
+ * Basic four-word fraction declaration and manipulation.
+ *
+ * When adding quadword support for 32 bit machines, we need
+ * to be a little careful as double multiply uses some of these
+ * macros: (in op-2.h)
+ * _FP_MUL_MEAT_2_wide() uses _FP_FRAC_DECL_4, _FP_FRAC_WORD_4,
+ * _FP_FRAC_ADD_4, _FP_FRAC_SRS_4
+ * _FP_MUL_MEAT_2_gmp() uses _FP_FRAC_SRS_4 (and should use
+ * _FP_FRAC_DECL_4: it appears to be broken and is not used
+ * anywhere anyway. )
+ *
+ * I've now fixed all the macros that were here from the sparc64 code.
+ * [*none* of the shift macros were correct!] -- PMM 02/1998
+ *
+ * The only quadword stuff that remains to be coded is:
+ * 1) the conversion to/from ints, which requires
+ * that we check (in op-common.h) that the following do the right thing
+ * for quadwords: _FP_TO_INT(Q,4,r,X,rsz,rsg), _FP_FROM_INT(Q,4,X,r,rs,rt)
+ * 2) multiply, divide and sqrt, which require:
+ * _FP_MUL_MEAT_4_*(R,X,Y), _FP_DIV_MEAT_4_*(R,X,Y), _FP_SQRT_MEAT_4(R,S,T,X,q),
+ * This also needs _FP_MUL_MEAT_Q and _FP_DIV_MEAT_Q to be defined to
+ * some suitable _FP_MUL_MEAT_4_* macros in sfp-machine.h.
+ * [we're free to choose whatever FP_MUL_MEAT_4_* macros we need for
+ * these; they are used nowhere else. ]
+ */
+
+#define _FP_FRAC_DECL_4(X)	_FP_W_TYPE X##_f[4]
+#define _FP_FRAC_COPY_4(D,S)			\
+  (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],	\
+   D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
+/* The _FP_FRAC_SET_n(X,I) macro is intended for use with another
+ * macro such as _FP_ZEROFRAC_n which returns n comma separated values.
+ * The result is that we get an expansion of __FP_FRAC_SET_n(X,I0,I1,I2,I3)
+ * which just assigns the In values to the array X##_f[].
+ * This is why the number of parameters doesn't appear to match
+ * at first glance...      -- PMM
+ */
+#define _FP_FRAC_SET_4(X,I)	__FP_FRAC_SET_4(X, I)
+#define _FP_FRAC_HIGH_4(X)	(X##_f[3])
+#define _FP_FRAC_LOW_4(X)	(X##_f[0])
+#define _FP_FRAC_WORD_4(X,w)	(X##_f[w])
+
+#define _FP_FRAC_SLL_4(X,N)						\
+  do {									\
+    _FP_I_TYPE _up, _down, _skip, _i;					\
+    _skip = (N) / _FP_W_TYPE_SIZE;					\
+    _up = (N) % _FP_W_TYPE_SIZE;					\
+    _down = _FP_W_TYPE_SIZE - _up;					\
+    for (_i = 3; _i > _skip; --_i)					\
+      X##_f[_i] = X##_f[_i-_skip] << _up | X##_f[_i-_skip-1] >> _down;	\
+/* bugfixed: was X##_f[_i] <<= _up;  -- PMM 02/1998 */                  \
+    X##_f[_i] = X##_f[0] << _up; 	                                \
+    for (--_i; _i >= 0; --_i)						\
+      X##_f[_i] = 0;							\
+  } while (0)
+
+/* This one was broken too */
+#define _FP_FRAC_SRL_4(X,N)						\
+  do {									\
+    _FP_I_TYPE _up, _down, _skip, _i;					\
+    _skip = (N) / _FP_W_TYPE_SIZE;					\
+    _down = (N) % _FP_W_TYPE_SIZE;					\
+    _up = _FP_W_TYPE_SIZE - _down;					\
+    for (_i = 0; _i < 3-_skip; ++_i)					\
+      X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up;	\
+    X##_f[_i] = X##_f[3] >> _down;			         	\
+    for (++_i; _i < 4; ++_i)						\
+      X##_f[_i] = 0;							\
+  } while (0)
+
+
+/* Right shift with sticky-lsb.
+ * What this actually means is that we do a standard right-shift,
+ * but that if any of the bits that fall off the right hand side
+ * were one then we always set the LSbit.
+ */
+#define _FP_FRAC_SRS_4(X,N,size)					\
+  do {									\
+    _FP_I_TYPE _up, _down, _skip, _i;					\
+    _FP_W_TYPE _s;							\
+    _skip = (N) / _FP_W_TYPE_SIZE;					\
+    _down = (N) % _FP_W_TYPE_SIZE;					\
+    _up = _FP_W_TYPE_SIZE - _down;					\
+    for (_s = _i = 0; _i < _skip; ++_i)					\
+      _s |= X##_f[_i];							\
+    _s |= X##_f[_i] << _up;						\
+/* s is now != 0 if we want to set the LSbit */                         \
+    for (_i = 0; _i < 3-_skip; ++_i)					\
+      X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up;	\
+    X##_f[_i] = X##_f[3] >> _down;					\
+    for (++_i; _i < 4; ++_i)						\
+      X##_f[_i] = 0;							\
+    /* don't fix the LSB until the very end when we're sure f[0] is stable */ \
+    X##_f[0] |= (_s != 0);                                              \
+  } while (0)
+
+#define _FP_FRAC_ADD_4(R,X,Y)						\
+  __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\
+		  X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
+		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+
+#define _FP_FRAC_SUB_4(R,X,Y)                                           \
+  __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\
+		  X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
+		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+
+#define _FP_FRAC_ADDI_4(X,I)                                            \
+  __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
+
+#define _FP_ZEROFRAC_4  0,0,0,0
+#define _FP_MINFRAC_4   0,0,0,1
+
+#define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
+#define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
+#define _FP_FRAC_OVERP_4(fs,X)  (X##_f[0] & _FP_OVERFLOW_##fs)
+
+#define _FP_FRAC_EQ_4(X,Y)                              \
+ (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]          \
+  && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
+
+#define _FP_FRAC_GT_4(X,Y)                              \
+ (X##_f[3] > Y##_f[3] ||                                \
+  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
+   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
+    (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])       \
+   ))                                                   \
+  ))                                                    \
+ )
+
+#define _FP_FRAC_GE_4(X,Y)                              \
+ (X##_f[3] > Y##_f[3] ||                                \
+  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
+   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
+    (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])      \
+   ))                                                   \
+  ))                                                    \
+ )
+
+
+#define _FP_FRAC_CLZ_4(R,X)             \
+  do {                                  \
+    if (X##_f[3])                       \
+    {                                   \
+        __FP_CLZ(R,X##_f[3]);           \
+    }                                   \
+    else if (X##_f[2])                  \
+    {                                   \
+        __FP_CLZ(R,X##_f[2]);           \
+        R += _FP_W_TYPE_SIZE;           \
+    }                                   \
+    else if (X##_f[1])                  \
+    {                                   \
+        __FP_CLZ(R,X##_f[2]);           \
+        R += _FP_W_TYPE_SIZE*2;         \
+    }                                   \
+    else                                \
+    {                                   \
+        __FP_CLZ(R,X##_f[0]);           \
+        R += _FP_W_TYPE_SIZE*3;         \
+    }                                   \
+  } while(0)
+
+
+#define _FP_UNPACK_RAW_4(fs, X, val)                            \
+  do {                                                          \
+    union _FP_UNION_##fs _flo; _flo.flt = (val);        	\
+    X##_f[0] = _flo.bits.frac0;                                 \
+    X##_f[1] = _flo.bits.frac1;                                 \
+    X##_f[2] = _flo.bits.frac2;                                 \
+    X##_f[3] = _flo.bits.frac3;                                 \
+    X##_e  = _flo.bits.exp;                                     \
+    X##_s  = _flo.bits.sign;                                    \
+  } while (0)
+
+#define _FP_PACK_RAW_4(fs, val, X)                              \
+  do {                                                          \
+    union _FP_UNION_##fs _flo;					\
+    _flo.bits.frac0 = X##_f[0];                                 \
+    _flo.bits.frac1 = X##_f[1];                                 \
+    _flo.bits.frac2 = X##_f[2];                                 \
+    _flo.bits.frac3 = X##_f[3];                                 \
+    _flo.bits.exp   = X##_e;                                    \
+    _flo.bits.sign  = X##_s;                                    \
+    (val) = _flo.flt;                                   	\
+  } while (0)
+
+
+/*
+ * Internals
+ */
+
+#define __FP_FRAC_SET_4(X,I3,I2,I1,I0)					\
+  (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
+
+#ifndef __FP_FRAC_ADD_4
+#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)		\
+  (r0 = x0 + y0,							\
+   r1 = x1 + y1 + (r0 < x0),						\
+   r2 = x2 + y2 + (r1 < x1),						\
+   r3 = x3 + y3 + (r2 < x2))
+#endif
+
+#ifndef __FP_FRAC_SUB_4
+#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)		\
+  (r0 = x0 - y0,                                                        \
+   r1 = x1 - y1 - (r0 > x0),                                            \
+   r2 = x2 - y2 - (r1 > x1),                                            \
+   r3 = x3 - y3 - (r2 > x2))
+#endif
+
+#ifndef __FP_FRAC_ADDI_4
+/* I always wanted to be a lisp programmer :-> */
+#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)                                 \
+  (x3 += ((x2 += ((x1 += ((x0 += i) < x0)) < x1) < x2)))
+#endif
+
+/* Convert FP values between word sizes. This appears to be more
+ * complicated than I'd have expected it to be, so these might be
+ * wrong... These macros are in any case somewhat bogus because they
+ * use information about what various FRAC_n variables look like
+ * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
+ * the ones in op-2.h and op-1.h.
+ */
+#define _FP_FRAC_CONV_1_4(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),     \
+                        _FP_WFRACBITS_##sfs);                           \
+     D##_f = S##_f[0];                                                   \
+  } while (0)
+
+#define _FP_FRAC_CONV_2_4(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),     \
+                        _FP_WFRACBITS_##sfs);                           \
+     D##_f0 = S##_f[0];                                                  \
+     D##_f1 = S##_f[1];                                                  \
+  } while (0)
+
+/* Assembly/disassembly for converting to/from integral types.
+ * No shifting or overflow handled here.
+ */
+/* Put the FP value X into r, which is an integer of size rsize. */
+#define _FP_FRAC_ASSEMBLE_4(r, X, rsize)                                \
+  do {                                                                  \
+    if (rsize <= _FP_W_TYPE_SIZE)                                       \
+      r = X##_f[0];                                                     \
+    else if (rsize <= 2*_FP_W_TYPE_SIZE)                                \
+    {                                                                   \
+      r = X##_f[1];                                                     \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[0];                                                    \
+    }                                                                   \
+    else                                                                \
+    {                                                                   \
+      /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
+      /* and int == 4words as a single case.                         */ \
+      r = X##_f[3];                                                     \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[2];                                                    \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[1];                                                    \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[0];                                                    \
+    }                                                                   \
+  } while (0)
+
+/* "No disassemble Number Five!" */
+/* move an integer of size rsize into X's fractional part. We rely on
+ * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
+ * having to mask the values we store into it.
+ */
+#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)                             \
+  do {                                                                  \
+    X##_f[0] = r;                                                       \
+    X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \
+    X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
+    X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
+  } while (0)
+
+#define _FP_FRAC_CONV_4_1(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     D##_f[0] = S##_f;                                                  \
+     D##_f[1] = D##_f[2] = D##_f[3] = 0;                                \
+     _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));    \
+   } while (0)
+
+#define _FP_FRAC_CONV_4_2(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     D##_f[0] = S##_f0;                                                 \
+     D##_f[1] = S##_f1;                                                 \
+     D##_f[2] = D##_f[3] = 0;                                           \
+     _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));    \
+   } while (0)
+
+/* FIXME! This has to be written */
+#define _FP_SQRT_MEAT_4(R, S, T, X, q)
diff --git a/arch/powerpc/math-emu/op-common.h b/arch/powerpc/math-emu/op-common.h
new file mode 100644
index 00000000000..afb82b6498c
--- /dev/null
+++ b/arch/powerpc/math-emu/op-common.h
@@ -0,0 +1,688 @@
+#define _FP_DECL(wc, X)			\
+  _FP_I_TYPE X##_c, X##_s, X##_e;	\
+  _FP_FRAC_DECL_##wc(X)
+
+/*
+ * Finish truely unpacking a native fp value by classifying the kind
+ * of fp value and normalizing both the exponent and the fraction.
+ */
+
+#define _FP_UNPACK_CANONICAL(fs, wc, X)					\
+do {									\
+  switch (X##_e)							\
+  {									\
+  default:								\
+    _FP_FRAC_HIGH_##wc(X) |= _FP_IMPLBIT_##fs;				\
+    _FP_FRAC_SLL_##wc(X, _FP_WORKBITS);					\
+    X##_e -= _FP_EXPBIAS_##fs;						\
+    X##_c = FP_CLS_NORMAL;						\
+    break;								\
+									\
+  case 0:								\
+    if (_FP_FRAC_ZEROP_##wc(X))						\
+      X##_c = FP_CLS_ZERO;						\
+    else								\
+      {									\
+	/* a denormalized number */					\
+	_FP_I_TYPE _shift;						\
+	_FP_FRAC_CLZ_##wc(_shift, X);					\
+	_shift -= _FP_FRACXBITS_##fs;					\
+	_FP_FRAC_SLL_##wc(X, (_shift+_FP_WORKBITS));			\
+	X##_e -= _FP_EXPBIAS_##fs - 1 + _shift;				\
+	X##_c = FP_CLS_NORMAL;						\
+      }									\
+    break;								\
+									\
+  case _FP_EXPMAX_##fs:							\
+    if (_FP_FRAC_ZEROP_##wc(X))						\
+      X##_c = FP_CLS_INF;						\
+    else								\
+      /* we don't differentiate between signaling and quiet nans */	\
+      X##_c = FP_CLS_NAN;						\
+    break;								\
+  }									\
+} while (0)
+
+
+/*
+ * Before packing the bits back into the native fp result, take care
+ * of such mundane things as rounding and overflow.  Also, for some
+ * kinds of fp values, the original parts may not have been fully
+ * extracted -- but that is ok, we can regenerate them now.
+ */
+
+#define _FP_PACK_CANONICAL(fs, wc, X)				\
+({int __ret = 0;						\
+  switch (X##_c)						\
+  {								\
+  case FP_CLS_NORMAL:						\
+    X##_e += _FP_EXPBIAS_##fs;					\
+    if (X##_e > 0)						\
+      {								\
+	__ret |= _FP_ROUND(wc, X);				\
+	if (_FP_FRAC_OVERP_##wc(fs, X))				\
+	  {							\
+	    _FP_FRAC_SRL_##wc(X, (_FP_WORKBITS+1));		\
+	    X##_e++;						\
+	  }							\
+	else							\
+	  _FP_FRAC_SRL_##wc(X, _FP_WORKBITS);			\
+	if (X##_e >= _FP_EXPMAX_##fs)				\
+	  {							\
+	    /* overflow to infinity */				\
+	    X##_e = _FP_EXPMAX_##fs;				\
+	    _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);		\
+            __ret |= EFLAG_OVERFLOW;				\
+	  }							\
+      }								\
+    else							\
+      {								\
+	/* we've got a denormalized number */			\
+	X##_e = -X##_e + 1;					\
+	if (X##_e <= _FP_WFRACBITS_##fs)			\
+	  {							\
+	    _FP_FRAC_SRS_##wc(X, X##_e, _FP_WFRACBITS_##fs);	\
+	    _FP_FRAC_SLL_##wc(X, 1);				\
+	    if (_FP_FRAC_OVERP_##wc(fs, X))			\
+	      {							\
+	        X##_e = 1;					\
+	        _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);	\
+	      }							\
+	    else						\
+	      {							\
+		X##_e = 0;					\
+		_FP_FRAC_SRL_##wc(X, _FP_WORKBITS+1);		\
+                __ret |= EFLAG_UNDERFLOW;			\
+	      }							\
+	  }							\
+	else							\
+	  {							\
+	    /* underflow to zero */				\
+	    X##_e = 0;						\
+	    _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);		\
+            __ret |= EFLAG_UNDERFLOW;				\
+	  }							\
+      }								\
+    break;							\
+								\
+  case FP_CLS_ZERO:						\
+    X##_e = 0;							\
+    _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);			\
+    break;							\
+								\
+  case FP_CLS_INF:						\
+    X##_e = _FP_EXPMAX_##fs;					\
+    _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);			\
+    break;							\
+								\
+  case FP_CLS_NAN:						\
+    X##_e = _FP_EXPMAX_##fs;					\
+    if (!_FP_KEEPNANFRACP)					\
+      {								\
+	_FP_FRAC_SET_##wc(X, _FP_NANFRAC_##fs);			\
+	X##_s = 0;						\
+      }								\
+    else							\
+      _FP_FRAC_HIGH_##wc(X) |= _FP_QNANBIT_##fs;		\
+    break;							\
+  }								\
+  __ret;							\
+})
+
+
+/*
+ * Main addition routine.  The input values should be cooked.
+ */
+
+#define _FP_ADD(fs, wc, R, X, Y)					     \
+do {									     \
+  switch (_FP_CLS_COMBINE(X##_c, Y##_c))				     \
+  {									     \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL):			     \
+    {									     \
+      /* shift the smaller number so that its exponent matches the larger */ \
+      _FP_I_TYPE diff = X##_e - Y##_e;					     \
+									     \
+      if (diff < 0)							     \
+	{								     \
+	  diff = -diff;							     \
+	  if (diff <= _FP_WFRACBITS_##fs)				     \
+	    _FP_FRAC_SRS_##wc(X, diff, _FP_WFRACBITS_##fs);		     \
+	  else if (!_FP_FRAC_ZEROP_##wc(X))				     \
+	    _FP_FRAC_SET_##wc(X, _FP_MINFRAC_##wc);			     \
+	  else								     \
+	    _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);			     \
+	  R##_e = Y##_e;						     \
+	}								     \
+      else								     \
+	{								     \
+	  if (diff > 0)							     \
+	    {								     \
+	      if (diff <= _FP_WFRACBITS_##fs)				     \
+	        _FP_FRAC_SRS_##wc(Y, diff, _FP_WFRACBITS_##fs);		     \
+	      else if (!_FP_FRAC_ZEROP_##wc(Y))				     \
+	        _FP_FRAC_SET_##wc(Y, _FP_MINFRAC_##wc);			     \
+	      else							     \
+	        _FP_FRAC_SET_##wc(Y, _FP_ZEROFRAC_##wc);		     \
+	    }								     \
+	  R##_e = X##_e;						     \
+	}								     \
+									     \
+      R##_c = FP_CLS_NORMAL;						     \
+									     \
+      if (X##_s == Y##_s)						     \
+	{								     \
+	  R##_s = X##_s;						     \
+	  _FP_FRAC_ADD_##wc(R, X, Y);					     \
+	  if (_FP_FRAC_OVERP_##wc(fs, R))				     \
+	    {								     \
+	      _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs);		     \
+	      R##_e++;							     \
+	    }								     \
+	}								     \
+      else								     \
+	{								     \
+	  R##_s = X##_s;						     \
+	  _FP_FRAC_SUB_##wc(R, X, Y);					     \
+	  if (_FP_FRAC_ZEROP_##wc(R))					     \
+	    {								     \
+	      /* return an exact zero */				     \
+	      if (FP_ROUNDMODE == FP_RND_MINF)				     \
+		R##_s |= Y##_s;						     \
+	      else							     \
+		R##_s &= Y##_s;						     \
+	      R##_c = FP_CLS_ZERO;					     \
+	    }								     \
+	  else								     \
+	    {								     \
+	      if (_FP_FRAC_NEGP_##wc(R))				     \
+		{							     \
+		  _FP_FRAC_SUB_##wc(R, Y, X);				     \
+		  R##_s = Y##_s;					     \
+		}							     \
+									     \
+	      /* renormalize after subtraction */			     \
+	      _FP_FRAC_CLZ_##wc(diff, R);				     \
+	      diff -= _FP_WFRACXBITS_##fs;				     \
+	      if (diff)							     \
+		{							     \
+		  R##_e -= diff;					     \
+		  _FP_FRAC_SLL_##wc(R, diff);				     \
+		}							     \
+	    }								     \
+	}								     \
+      break;								     \
+    }									     \
+									     \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN):				     \
+    _FP_CHOOSENAN(fs, wc, R, X, Y);					     \
+    break;								     \
+									     \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):			     \
+    R##_e = X##_e;							     \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL):			     \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):				     \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):				     \
+    _FP_FRAC_COPY_##wc(R, X);						     \
+    R##_s = X##_s;							     \
+    R##_c = X##_c;							     \
+    break;								     \
+									     \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL):			     \
+    R##_e = Y##_e;							     \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN):			     \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):				     \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):				     \
+    _FP_FRAC_COPY_##wc(R, Y);						     \
+    R##_s = Y##_s;							     \
+    R##_c = Y##_c;							     \
+    break;								     \
+									     \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF):				     \
+    if (X##_s != Y##_s)							     \
+      {									     \
+	/* +INF + -INF => NAN */					     \
+	_FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);				     \
+	R##_s = X##_s ^ Y##_s;						     \
+	R##_c = FP_CLS_NAN;						     \
+	break;								     \
+      }									     \
+    /* FALLTHRU */							     \
+									     \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):			     \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO):				     \
+    R##_s = X##_s;							     \
+    R##_c = FP_CLS_INF;							     \
+    break;								     \
+									     \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF):			     \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF):				     \
+    R##_s = Y##_s;							     \
+    R##_c = FP_CLS_INF;							     \
+    break;								     \
+									     \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO):			     \
+    /* make sure the sign is correct */					     \
+    if (FP_ROUNDMODE == FP_RND_MINF)					     \
+      R##_s = X##_s | Y##_s;						     \
+    else								     \
+      R##_s = X##_s & Y##_s;						     \
+    R##_c = FP_CLS_ZERO;						     \
+    break;								     \
+									     \
+  default:								     \
+    abort();								     \
+  }									     \
+} while (0)
+
+
+/*
+ * Main negation routine.  FIXME -- when we care about setting exception
+ * bits reliably, this will not do.  We should examine all of the fp classes.
+ */
+
+#define _FP_NEG(fs, wc, R, X)		\
+  do {					\
+    _FP_FRAC_COPY_##wc(R, X);		\
+    R##_c = X##_c;			\
+    R##_e = X##_e;			\
+    R##_s = 1 ^ X##_s;			\
+  } while (0)
+
+
+/*
+ * Main multiplication routine.  The input values should be cooked.
+ */
+
+#define _FP_MUL(fs, wc, R, X, Y)			\
+do {							\
+  R##_s = X##_s ^ Y##_s;				\
+  switch (_FP_CLS_COMBINE(X##_c, Y##_c))		\
+  {							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL):	\
+    R##_c = FP_CLS_NORMAL;				\
+    R##_e = X##_e + Y##_e + 1;				\
+							\
+    _FP_MUL_MEAT_##fs(R,X,Y);				\
+							\
+    if (_FP_FRAC_OVERP_##wc(fs, R))			\
+      _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs);	\
+    else						\
+      R##_e--;						\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN):		\
+    _FP_CHOOSENAN(fs, wc, R, X, Y);			\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL):	\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):		\
+    R##_s = X##_s;					\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):	\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL):	\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO):	\
+    _FP_FRAC_COPY_##wc(R, X);				\
+    R##_c = X##_c;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN):	\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):		\
+    R##_s = Y##_s;					\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF):	\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):	\
+    _FP_FRAC_COPY_##wc(R, Y);				\
+    R##_c = Y##_c;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF):		\
+    R##_c = FP_CLS_NAN;					\
+    _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);		\
+    break;						\
+							\
+  default:						\
+    abort();						\
+  }							\
+} while (0)
+
+
+/*
+ * Main division routine.  The input values should be cooked.
+ */
+
+#define _FP_DIV(fs, wc, R, X, Y)			\
+do {							\
+  R##_s = X##_s ^ Y##_s;				\
+  switch (_FP_CLS_COMBINE(X##_c, Y##_c))		\
+  {							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL):	\
+    R##_c = FP_CLS_NORMAL;				\
+    R##_e = X##_e - Y##_e;				\
+							\
+    _FP_DIV_MEAT_##fs(R,X,Y);				\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN):		\
+    _FP_CHOOSENAN(fs, wc, R, X, Y);			\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL):	\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):		\
+    R##_s = X##_s;					\
+    _FP_FRAC_COPY_##wc(R, X);				\
+    R##_c = X##_c;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN):	\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):		\
+    R##_s = Y##_s;					\
+    _FP_FRAC_COPY_##wc(R, Y);				\
+    R##_c = Y##_c;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF):	\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL):	\
+    R##_c = FP_CLS_ZERO;				\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):	\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO):		\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):	\
+    R##_c = FP_CLS_INF;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO):	\
+    R##_c = FP_CLS_NAN;					\
+    _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);		\
+    break;						\
+							\
+  default:						\
+    abort();						\
+  }							\
+} while (0)
+
+
+/*
+ * Main differential comparison routine.  The inputs should be raw not
+ * cooked.  The return is -1,0,1 for normal values, 2 otherwise.
+ */
+
+#define _FP_CMP(fs, wc, ret, X, Y, un)					\
+  do {									\
+    /* NANs are unordered */						\
+    if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(X))		\
+	|| (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y)))	\
+      {									\
+	ret = un;							\
+      }									\
+    else								\
+      {									\
+        int __x_zero = (!X##_e && _FP_FRAC_ZEROP_##wc(X)) ? 1 : 0;	\
+        int __y_zero = (!Y##_e && _FP_FRAC_ZEROP_##wc(Y)) ? 1 : 0;	\
+									\
+	if (__x_zero && __y_zero)					\
+	  ret = 0;							\
+	else if (__x_zero)						\
+	  ret = Y##_s ? 1 : -1;						\
+	else if (__y_zero)						\
+	  ret = X##_s ? -1 : 1;						\
+	else if (X##_s != Y##_s)					\
+	  ret = X##_s ? -1 : 1;						\
+	else if (X##_e > Y##_e)						\
+	  ret = X##_s ? -1 : 1;						\
+	else if (X##_e < Y##_e)						\
+	  ret = X##_s ? 1 : -1;						\
+	else if (_FP_FRAC_GT_##wc(X, Y))				\
+	  ret = X##_s ? -1 : 1;						\
+	else if (_FP_FRAC_GT_##wc(Y, X))				\
+	  ret = X##_s ? 1 : -1;						\
+	else								\
+	  ret = 0;							\
+      }									\
+  } while (0)
+
+
+/* Simplification for strict equality.  */
+
+#define _FP_CMP_EQ(fs, wc, ret, X, Y)					  \
+  do {									  \
+    /* NANs are unordered */						  \
+    if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(X))		  \
+	|| (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y)))	  \
+      {									  \
+	ret = 1;							  \
+      }									  \
+    else								  \
+      {									  \
+	ret = !(X##_e == Y##_e						  \
+		&& _FP_FRAC_EQ_##wc(X, Y)				  \
+		&& (X##_s == Y##_s || !X##_e && _FP_FRAC_ZEROP_##wc(X))); \
+      }									  \
+  } while (0)
+
+/*
+ * Main square root routine.  The input value should be cooked.
+ */
+
+#define _FP_SQRT(fs, wc, R, X)						\
+do {									\
+    _FP_FRAC_DECL_##wc(T); _FP_FRAC_DECL_##wc(S);			\
+    _FP_W_TYPE q;							\
+    switch (X##_c)							\
+    {									\
+    case FP_CLS_NAN:							\
+    	R##_s = 0;							\
+    	R##_c = FP_CLS_NAN;						\
+    	_FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);			\
+    	break;								\
+    case FP_CLS_INF:							\
+    	if (X##_s)							\
+    	  {								\
+    	    R##_s = 0;							\
+	    R##_c = FP_CLS_NAN; /* sNAN */				\
+    	  }								\
+    	else								\
+    	  {								\
+    	    R##_s = 0;							\
+    	    R##_c = FP_CLS_INF; /* sqrt(+inf) = +inf */			\
+    	  }								\
+    	break;								\
+    case FP_CLS_ZERO:							\
+	R##_s = X##_s;							\
+    	R##_c = FP_CLS_ZERO; /* sqrt(+-0) = +-0 */			\
+	break;								\
+    case FP_CLS_NORMAL:							\
+    	R##_s = 0;							\
+        if (X##_s)							\
+          {								\
+	    R##_c = FP_CLS_NAN; /* sNAN */				\
+	    break;							\
+          }								\
+    	R##_c = FP_CLS_NORMAL;						\
+        if (X##_e & 1)							\
+          _FP_FRAC_SLL_##wc(X, 1);					\
+        R##_e = X##_e >> 1;						\
+        _FP_FRAC_SET_##wc(S, _FP_ZEROFRAC_##wc);			\
+        _FP_FRAC_SET_##wc(R, _FP_ZEROFRAC_##wc);			\
+        q = _FP_OVERFLOW_##fs;						\
+        _FP_FRAC_SLL_##wc(X, 1);					\
+        _FP_SQRT_MEAT_##wc(R, S, T, X, q);				\
+        _FP_FRAC_SRL_##wc(R, 1);					\
+    }									\
+  } while (0)
+
+/*
+ * Convert from FP to integer
+ */
+
+/* "When a NaN, infinity, large positive argument >= 2147483648.0, or
+ * large negative argument <= -2147483649.0 is converted to an integer,
+ * the invalid_current bit...should be set and fp_exception_IEEE_754 should
+ * be raised. If the floating point invalid trap is disabled, no trap occurs
+ * and a numerical result is generated: if the sign bit of the operand
+ * is 0, the result is 2147483647; if the sign bit of the operand is 1,
+ * the result is -2147483648."
+ * Similarly for conversion to extended ints, except that the boundaries
+ * are >= 2^63, <= -(2^63 + 1), and the results are 2^63 + 1 for s=0 and
+ * -2^63 for s=1.
+ * -- SPARC Architecture Manual V9, Appendix B, which specifies how
+ * SPARCs resolve implementation dependencies in the IEEE-754 spec.
+ * I don't believe that the code below follows this. I'm not even sure
+ * it's right!
+ * It doesn't cope with needing to convert to an n bit integer when there
+ * is no n bit integer type. Fortunately gcc provides long long so this
+ * isn't a problem for sparc32.
+ * I have, however, fixed its NaN handling to conform as above.
+ *         -- PMM 02/1998
+ * NB: rsigned is not 'is r declared signed?' but 'should the value stored
+ * in r be signed or unsigned?'. r is always(?) declared unsigned.
+ * Comments below are mine, BTW -- PMM
+ */
+#define _FP_TO_INT(fs, wc, r, X, rsize, rsigned)			\
+  do {									\
+    switch (X##_c)							\
+      {									\
+      case FP_CLS_NORMAL:						\
+	if (X##_e < 0)							\
+	  {								\
+	  /* case FP_CLS_NAN: see above! */				\
+	  case FP_CLS_ZERO:						\
+	    r = 0;							\
+	  }								\
+	else if (X##_e >= rsize - (rsigned != 0))			\
+	  {	/* overflow */						\
+	  case FP_CLS_NAN:                                              \
+          case FP_CLS_INF:						\
+	    if (rsigned)						\
+	      {								\
+		r = 1;							\
+		r <<= rsize - 1;					\
+		r -= 1 - X##_s;						\
+	      }								\
+	    else							\
+	      {								\
+		r = 0;							\
+		if (!X##_s)						\
+		  r = ~r;						\
+	      }								\
+	  }								\
+	else								\
+	  {								\
+	    if (_FP_W_TYPE_SIZE*wc < rsize)				\
+	      {								\
+		_FP_FRAC_ASSEMBLE_##wc(r, X, rsize);			\
+		r <<= X##_e - _FP_WFRACBITS_##fs;			\
+	      }								\
+	    else							\
+	      {								\
+		if (X##_e >= _FP_WFRACBITS_##fs)			\
+		  _FP_FRAC_SLL_##wc(X, (X##_e - _FP_WFRACBITS_##fs + 1));\
+		else							\
+		  _FP_FRAC_SRL_##wc(X, (_FP_WFRACBITS_##fs - X##_e - 1));\
+		_FP_FRAC_ASSEMBLE_##wc(r, X, rsize);			\
+	      }								\
+	    if (rsigned && X##_s)					\
+	      r = -r;							\
+	  }								\
+	break;								\
+      }									\
+  } while (0)
+
+#define _FP_FROM_INT(fs, wc, X, r, rsize, rtype)			\
+  do {									\
+    if (r)								\
+      {									\
+	X##_c = FP_CLS_NORMAL;						\
+									\
+	if ((X##_s = (r < 0)))						\
+	  r = -r;							\
+	/* Note that `r' is now considered unsigned, so we don't have	\
+	   to worry about the single signed overflow case.  */		\
+									\
+	if (rsize <= _FP_W_TYPE_SIZE)					\
+	  __FP_CLZ(X##_e, r);						\
+	else								\
+	  __FP_CLZ_2(X##_e, (_FP_W_TYPE)(r >> _FP_W_TYPE_SIZE), 	\
+		     (_FP_W_TYPE)r);					\
+	if (rsize < _FP_W_TYPE_SIZE)					\
+		X##_e -= (_FP_W_TYPE_SIZE - rsize);			\
+	X##_e = rsize - X##_e - 1;					\
+									\
+	if (_FP_FRACBITS_##fs < rsize && _FP_WFRACBITS_##fs < X##_e)	\
+	  __FP_FRAC_SRS_1(r, (X##_e - _FP_WFRACBITS_##fs), rsize);	\
+	r &= ~((_FP_W_TYPE)1 << X##_e);					\
+	_FP_FRAC_DISASSEMBLE_##wc(X, ((unsigned rtype)r), rsize);	\
+	_FP_FRAC_SLL_##wc(X, (_FP_WFRACBITS_##fs - X##_e - 1));		\
+      }									\
+    else								\
+      {									\
+	X##_c = FP_CLS_ZERO, X##_s = 0;					\
+      }									\
+  } while (0)
+
+
+#define FP_CONV(dfs,sfs,dwc,swc,D,S)			\
+  do {							\
+    _FP_FRAC_CONV_##dwc##_##swc(dfs, sfs, D, S);	\
+    D##_e = S##_e;					\
+    D##_c = S##_c;					\
+    D##_s = S##_s;					\
+  } while (0)
+
+/*
+ * Helper primitives.
+ */
+
+/* Count leading zeros in a word.  */
+
+#ifndef __FP_CLZ
+#if _FP_W_TYPE_SIZE < 64
+/* this is just to shut the compiler up about shifts > word length -- PMM 02/1998 */
+#define __FP_CLZ(r, x)				\
+  do {						\
+    _FP_W_TYPE _t = (x);			\
+    r = _FP_W_TYPE_SIZE - 1;			\
+    if (_t > 0xffff) r -= 16;			\
+    if (_t > 0xffff) _t >>= 16;			\
+    if (_t > 0xff) r -= 8;			\
+    if (_t > 0xff) _t >>= 8;			\
+    if (_t & 0xf0) r -= 4;			\
+    if (_t & 0xf0) _t >>= 4;			\
+    if (_t & 0xc) r -= 2;			\
+    if (_t & 0xc) _t >>= 2;			\
+    if (_t & 0x2) r -= 1;			\
+  } while (0)
+#else /* not _FP_W_TYPE_SIZE < 64 */
+#define __FP_CLZ(r, x)				\
+  do {						\
+    _FP_W_TYPE _t = (x);			\
+    r = _FP_W_TYPE_SIZE - 1;			\
+    if (_t > 0xffffffff) r -= 32;		\
+    if (_t > 0xffffffff) _t >>= 32;		\
+    if (_t > 0xffff) r -= 16;			\
+    if (_t > 0xffff) _t >>= 16;			\
+    if (_t > 0xff) r -= 8;			\
+    if (_t > 0xff) _t >>= 8;			\
+    if (_t & 0xf0) r -= 4;			\
+    if (_t & 0xf0) _t >>= 4;			\
+    if (_t & 0xc) r -= 2;			\
+    if (_t & 0xc) _t >>= 2;			\
+    if (_t & 0x2) r -= 1;			\
+  } while (0)
+#endif /* not _FP_W_TYPE_SIZE < 64 */
+#endif /* ndef __FP_CLZ */
+
+#define _FP_DIV_HELP_imm(q, r, n, d)		\
+  do {						\
+    q = n / d, r = n % d;			\
+  } while (0)
+
diff --git a/arch/powerpc/math-emu/sfp-machine.h b/arch/powerpc/math-emu/sfp-machine.h
new file mode 100644
index 00000000000..4b17d83cfcd
--- /dev/null
+++ b/arch/powerpc/math-emu/sfp-machine.h
@@ -0,0 +1,377 @@
+/* Machine-dependent software floating-point definitions.  PPC version.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, write to the Free Software Foundation, Inc.,
+   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+   Actually, this is a PPC (32bit) version, written based on the
+   i386, sparc, and sparc64 versions, by me,
+   Peter Maydell (pmaydell@chiark.greenend.org.uk).
+   Comments are by and large also mine, although they may be inaccurate.
+
+   In picking out asm fragments I've gone with the lowest common
+   denominator, which also happens to be the hardware I have :->
+   That is, a SPARC without hardware multiply and divide.
+ */
+
+/* basic word size definitions */
+#define _FP_W_TYPE_SIZE		32
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+#define __ll_B			((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t)		((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t)	((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+/* You can optionally code some things like addition in asm. For
+ * example, i386 defines __FP_FRAC_ADD_2 as asm. If you don't
+ * then you get a fragment of C code [if you change an #ifdef 0
+ * in op-2.h] or a call to add_ssaaaa (see below).
+ * Good places to look for asm fragments to use are gcc and glibc.
+ * gcc's longlong.h is useful.
+ */
+
+/* We need to know how to multiply and divide. If the host word size
+ * is >= 2*fracbits you can use FP_MUL_MEAT_n_imm(t,R,X,Y) which
+ * codes the multiply with whatever gcc does to 'a * b'.
+ * _FP_MUL_MEAT_n_wide(t,R,X,Y,f) is used when you have an asm
+ * function that can multiply two 1W values and get a 2W result.
+ * Otherwise you're stuck with _FP_MUL_MEAT_n_hard(t,R,X,Y) which
+ * does bitshifting to avoid overflow.
+ * For division there is FP_DIV_MEAT_n_imm(t,R,X,Y,f) for word size
+ * >= 2*fracbits, where f is either _FP_DIV_HELP_imm or
+ * _FP_DIV_HELP_ldiv (see op-1.h).
+ * _FP_DIV_MEAT_udiv() is if you have asm to do 2W/1W => (1W, 1W).
+ * [GCC and glibc have longlong.h which has the asm macro udiv_qrnnd
+ * to do this.]
+ * In general, 'n' is the number of words required to hold the type,
+ * and 't' is either S, D or Q for single/double/quad.
+ *           -- PMM
+ */
+/* Example: SPARC64:
+ * #define _FP_MUL_MEAT_S(R,X,Y)	_FP_MUL_MEAT_1_imm(S,R,X,Y)
+ * #define _FP_MUL_MEAT_D(R,X,Y)	_FP_MUL_MEAT_1_wide(D,R,X,Y,umul_ppmm)
+ * #define _FP_MUL_MEAT_Q(R,X,Y)	_FP_MUL_MEAT_2_wide(Q,R,X,Y,umul_ppmm)
+ *
+ * #define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm)
+ * #define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_1_udiv(D,R,X,Y)
+ * #define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_2_udiv_64(Q,R,X,Y)
+ *
+ * Example: i386:
+ * #define _FP_MUL_MEAT_S(R,X,Y)   _FP_MUL_MEAT_1_wide(S,R,X,Y,_i386_mul_32_64)
+ * #define _FP_MUL_MEAT_D(R,X,Y)   _FP_MUL_MEAT_2_wide(D,R,X,Y,_i386_mul_32_64)
+ *
+ * #define _FP_DIV_MEAT_S(R,X,Y)   _FP_DIV_MEAT_1_udiv(S,R,X,Y,_i386_div_64_32)
+ * #define _FP_DIV_MEAT_D(R,X,Y)   _FP_DIV_MEAT_2_udiv_64(D,R,X,Y)
+ */
+
+#define _FP_MUL_MEAT_S(R,X,Y)   _FP_MUL_MEAT_1_wide(S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)   _FP_MUL_MEAT_2_wide(D,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)   _FP_DIV_MEAT_1_udiv(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)   _FP_DIV_MEAT_2_udiv_64(D,R,X,Y)
+
+/* These macros define what NaN looks like. They're supposed to expand to
+ * a comma-separated set of 32bit unsigned ints that encode NaN.
+ */
+#define _FP_NANFRAC_S		_FP_QNANBIT_S
+#define _FP_NANFRAC_D		_FP_QNANBIT_D, 0
+#define _FP_NANFRAC_Q           _FP_QNANBIT_Q, 0, 0, 0
+
+#define _FP_KEEPNANFRACP 1
+
+/* This macro appears to be called when both X and Y are NaNs, and
+ * has to choose one and copy it to R. i386 goes for the larger of the
+ * two, sparc64 just picks Y. I don't understand this at all so I'll
+ * go with sparc64 because it's shorter :->   -- PMM
+ */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y)			\
+  do {							\
+    R##_s = Y##_s;					\
+    _FP_FRAC_COPY_##wc(R,Y);				\
+    R##_c = FP_CLS_NAN;					\
+  } while (0)
+
+
+extern void fp_unpack_d(long *, unsigned long *, unsigned long *,
+			long *, long *, void *);
+extern int  fp_pack_d(void *, long, unsigned long, unsigned long, long, long);
+extern int  fp_pack_ds(void *, long, unsigned long, unsigned long, long, long);
+
+#define __FP_UNPACK_RAW_1(fs, X, val)			\
+  do {							\
+    union _FP_UNION_##fs *_flo =			\
+    	(union _FP_UNION_##fs *)val;			\
+							\
+    X##_f = _flo->bits.frac;				\
+    X##_e = _flo->bits.exp;				\
+    X##_s = _flo->bits.sign;				\
+  } while (0)
+
+#define __FP_UNPACK_RAW_2(fs, X, val)			\
+  do {							\
+    union _FP_UNION_##fs *_flo =			\
+    	(union _FP_UNION_##fs *)val;			\
+							\
+    X##_f0 = _flo->bits.frac0;				\
+    X##_f1 = _flo->bits.frac1;				\
+    X##_e  = _flo->bits.exp;				\
+    X##_s  = _flo->bits.sign;				\
+  } while (0)
+
+#define __FP_UNPACK_S(X,val)		\
+  do {					\
+    __FP_UNPACK_RAW_1(S,X,val);		\
+    _FP_UNPACK_CANONICAL(S,1,X);	\
+  } while (0)
+
+#define __FP_UNPACK_D(X,val)		\
+	fp_unpack_d(&X##_s, &X##_f1, &X##_f0, &X##_e, &X##_c, val)
+
+#define __FP_PACK_RAW_1(fs, val, X)			\
+  do {							\
+    union _FP_UNION_##fs *_flo =			\
+    	(union _FP_UNION_##fs *)val;			\
+							\
+    _flo->bits.frac = X##_f;				\
+    _flo->bits.exp  = X##_e;				\
+    _flo->bits.sign = X##_s;				\
+  } while (0)
+
+#define __FP_PACK_RAW_2(fs, val, X)			\
+  do {							\
+    union _FP_UNION_##fs *_flo =			\
+    	(union _FP_UNION_##fs *)val;			\
+							\
+    _flo->bits.frac0 = X##_f0;				\
+    _flo->bits.frac1 = X##_f1;				\
+    _flo->bits.exp   = X##_e;				\
+    _flo->bits.sign  = X##_s;				\
+  } while (0)
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#define __FPU_FPSCR	(current->thread.fpscr.val)
+
+/* We only actually write to the destination register
+ * if exceptions signalled (if any) will not trap.
+ */
+#define __FPU_ENABLED_EXC \
+({						\
+	(__FPU_FPSCR >> 3) & 0x1f;	\
+})
+
+#define __FPU_TRAP_P(bits) \
+	((__FPU_ENABLED_EXC & (bits)) != 0)
+
+#define __FP_PACK_S(val,X)			\
+({  int __exc = _FP_PACK_CANONICAL(S,1,X);	\
+    if(!__exc || !__FPU_TRAP_P(__exc))		\
+        __FP_PACK_RAW_1(S,val,X);		\
+    __exc;					\
+})
+
+#define __FP_PACK_D(val,X)			\
+	fp_pack_d(val, X##_s, X##_f1, X##_f0, X##_e, X##_c)
+
+#define __FP_PACK_DS(val,X)			\
+	fp_pack_ds(val, X##_s, X##_f1, X##_f0, X##_e, X##_c)
+
+/* Obtain the current rounding mode. */
+#define FP_ROUNDMODE			\
+({					\
+	__FPU_FPSCR & 0x3;		\
+})
+
+/* the asm fragments go here: all these are taken from glibc-2.0.5's
+ * stdlib/longlong.h
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+/* add_ssaaaa is used in op-2.h and should be equivalent to
+ * #define add_ssaaaa(sh,sl,ah,al,bh,bl) (sh = ah+bh+ (( sl = al+bl) < al))
+ * add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+ * high_addend_2, low_addend_2) adds two UWtype integers, composed by
+ * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+ * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
+ * (i.e. carry out) is not stored anywhere, and is lost.
+ */
+#define add_ssaaaa(sh, sl, ah, al, bh, bl)				\
+  do {									\
+    if (__builtin_constant_p (bh) && (bh) == 0)				\
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"		\
+	     : "=r" ((USItype)(sh)),					\
+	       "=&r" ((USItype)(sl))					\
+	     : "%r" ((USItype)(ah)),					\
+	       "%r" ((USItype)(al)),					\
+	       "rI" ((USItype)(bl)));					\
+    else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0)		\
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"		\
+	     : "=r" ((USItype)(sh)),					\
+	       "=&r" ((USItype)(sl))					\
+	     : "%r" ((USItype)(ah)),					\
+	       "%r" ((USItype)(al)),					\
+	       "rI" ((USItype)(bl)));					\
+    else								\
+      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"		\
+	     : "=r" ((USItype)(sh)),					\
+	       "=&r" ((USItype)(sl))					\
+	     : "%r" ((USItype)(ah)),					\
+	       "r" ((USItype)(bh)),					\
+	       "%r" ((USItype)(al)),					\
+	       "rI" ((USItype)(bl)));					\
+  } while (0)
+
+/* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to
+ * #define sub_ddmmss(sh, sl, ah, al, bh, bl) (sh = ah-bh - ((sl = al-bl) > al))
+ * sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+ * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+ * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+ * LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
+ * and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
+ * and is lost.
+ */
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
+  do {									\
+    if (__builtin_constant_p (ah) && (ah) == 0)				\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"	\
+	       : "=r" ((USItype)(sh)),					\
+		 "=&r" ((USItype)(sl))					\
+	       : "r" ((USItype)(bh)),					\
+		 "rI" ((USItype)(al)),					\
+		 "r" ((USItype)(bl)));					\
+    else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0)		\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"	\
+	       : "=r" ((USItype)(sh)),					\
+		 "=&r" ((USItype)(sl))					\
+	       : "r" ((USItype)(bh)),					\
+		 "rI" ((USItype)(al)),					\
+		 "r" ((USItype)(bl)));					\
+    else if (__builtin_constant_p (bh) && (bh) == 0)			\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"		\
+	       : "=r" ((USItype)(sh)),					\
+		 "=&r" ((USItype)(sl))					\
+	       : "r" ((USItype)(ah)),					\
+		 "rI" ((USItype)(al)),					\
+		 "r" ((USItype)(bl)));					\
+    else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0)		\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"		\
+	       : "=r" ((USItype)(sh)),					\
+		 "=&r" ((USItype)(sl))					\
+	       : "r" ((USItype)(ah)),					\
+		 "rI" ((USItype)(al)),					\
+		 "r" ((USItype)(bl)));					\
+    else								\
+      __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"	\
+	       : "=r" ((USItype)(sh)),					\
+		 "=&r" ((USItype)(sl))					\
+	       : "r" ((USItype)(ah)),					\
+		 "r" ((USItype)(bh)),					\
+		 "rI" ((USItype)(al)),					\
+		 "r" ((USItype)(bl)));					\
+  } while (0)
+
+/* asm fragments for mul and div */
+
+/* umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
+ * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
+ * word product in HIGH_PROD and LOW_PROD.
+ */
+#define umul_ppmm(ph, pl, m0, m1)					\
+  do {									\
+    USItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhwu %0,%1,%2"						\
+	     : "=r" ((USItype)(ph))					\
+	     : "%r" (__m0),						\
+               "r" (__m1));						\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+
+/* udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator) divides a UDWtype, composed by the UWtype integers
+ * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+ * in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
+ * than DENOMINATOR for correct operation.  If, in addition, the most
+ * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+ * UDIV_NEEDS_NORMALIZATION is defined to 1.
+ */
+#define udiv_qrnnd(q, r, n1, n0, d)					\
+  do {									\
+    UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;			\
+    __d1 = __ll_highpart (d);						\
+    __d0 = __ll_lowpart (d);						\
+									\
+    __r1 = (n1) % __d1;							\
+    __q1 = (n1) / __d1;							\
+    __m = (UWtype) __q1 * __d0;						\
+    __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
+    if (__r1 < __m)							\
+      {									\
+	__q1--, __r1 += (d);						\
+	if (__r1 >= (d)) /* we didn't get carry when adding to __r1 */	\
+	  if (__r1 < __m)						\
+	    __q1--, __r1 += (d);					\
+      }									\
+    __r1 -= __m;							\
+									\
+    __r0 = __r1 % __d1;							\
+    __q0 = __r1 / __d1;							\
+    __m = (UWtype) __q0 * __d0;						\
+    __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
+    if (__r0 < __m)							\
+      {									\
+	__q0--, __r0 += (d);						\
+	if (__r0 >= (d))						\
+	  if (__r0 < __m)						\
+	    __q0--, __r0 += (d);					\
+      }									\
+    __r0 -= __m;							\
+									\
+    (q) = (UWtype) __q1 * __ll_B | __q0;				\
+    (r) = __r0;								\
+  } while (0)
+
+#define UDIV_NEEDS_NORMALIZATION 1
+
+#define abort()								\
+	return 0
+
+#ifdef __BIG_ENDIAN
+#define __BYTE_ORDER __BIG_ENDIAN
+#else
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#endif
+
+/* Exception flags. */
+#define EFLAG_INVALID		(1 << (31 - 2))
+#define EFLAG_OVERFLOW		(1 << (31 - 3))
+#define EFLAG_UNDERFLOW		(1 << (31 - 4))
+#define EFLAG_DIVZERO		(1 << (31 - 5))
+#define EFLAG_INEXACT		(1 << (31 - 6))
+
+#define EFLAG_VXSNAN		(1 << (31 - 7))
+#define EFLAG_VXISI		(1 << (31 - 8))
+#define EFLAG_VXIDI		(1 << (31 - 9))
+#define EFLAG_VXZDZ		(1 << (31 - 10))
+#define EFLAG_VXIMZ		(1 << (31 - 11))
+#define EFLAG_VXVC		(1 << (31 - 12))
+#define EFLAG_VXSOFT		(1 << (31 - 21))
+#define EFLAG_VXSQRT		(1 << (31 - 22))
+#define EFLAG_VXCVI		(1 << (31 - 23))
diff --git a/arch/powerpc/math-emu/single.h b/arch/powerpc/math-emu/single.h
new file mode 100644
index 00000000000..f19d9945181
--- /dev/null
+++ b/arch/powerpc/math-emu/single.h
@@ -0,0 +1,66 @@
+/*
+ * Definitions for IEEE Single Precision
+ */
+
+#if _FP_W_TYPE_SIZE < 32
+#error "Here's a nickel kid.  Go buy yourself a real computer."
+#endif
+
+#define _FP_FRACBITS_S		24
+#define _FP_FRACXBITS_S		(_FP_W_TYPE_SIZE - _FP_FRACBITS_S)
+#define _FP_WFRACBITS_S		(_FP_WORKBITS + _FP_FRACBITS_S)
+#define _FP_WFRACXBITS_S	(_FP_W_TYPE_SIZE - _FP_WFRACBITS_S)
+#define _FP_EXPBITS_S		8
+#define _FP_EXPBIAS_S		127
+#define _FP_EXPMAX_S		255
+#define _FP_QNANBIT_S		((_FP_W_TYPE)1 << (_FP_FRACBITS_S-2))
+#define _FP_IMPLBIT_S		((_FP_W_TYPE)1 << (_FP_FRACBITS_S-1))
+#define _FP_OVERFLOW_S		((_FP_W_TYPE)1 << (_FP_WFRACBITS_S))
+
+/* The implementation of _FP_MUL_MEAT_S and _FP_DIV_MEAT_S should be
+   chosen by the target machine.  */
+
+union _FP_UNION_S
+{
+  float flt;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign : 1;
+    unsigned exp  : _FP_EXPBITS_S;
+    unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0);
+#else
+    unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0);
+    unsigned exp  : _FP_EXPBITS_S;
+    unsigned sign : 1;
+#endif
+  } bits __attribute__((packed));
+};
+
+#define FP_DECL_S(X)		_FP_DECL(1,X)
+#define FP_UNPACK_RAW_S(X,val)	_FP_UNPACK_RAW_1(S,X,val)
+#define FP_PACK_RAW_S(val,X)	_FP_PACK_RAW_1(S,val,X)
+
+#define FP_UNPACK_S(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_1(S,X,val);		\
+    _FP_UNPACK_CANONICAL(S,1,X);	\
+  } while (0)
+
+#define FP_PACK_S(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(S,1,X);		\
+    _FP_PACK_RAW_1(S,val,X);		\
+  } while (0)
+
+#define FP_NEG_S(R,X)		_FP_NEG(S,1,R,X)
+#define FP_ADD_S(R,X,Y)		_FP_ADD(S,1,R,X,Y)
+#define FP_SUB_S(R,X,Y)		_FP_SUB(S,1,R,X,Y)
+#define FP_MUL_S(R,X,Y)		_FP_MUL(S,1,R,X,Y)
+#define FP_DIV_S(R,X,Y)		_FP_DIV(S,1,R,X,Y)
+#define FP_SQRT_S(R,X)		_FP_SQRT(S,1,R,X)
+
+#define FP_CMP_S(r,X,Y,un)	_FP_CMP(S,1,r,X,Y,un)
+#define FP_CMP_EQ_S(r,X,Y)	_FP_CMP_EQ(S,1,r,X,Y)
+
+#define FP_TO_INT_S(r,X,rsz,rsg)  _FP_TO_INT(S,1,r,X,rsz,rsg)
+#define FP_FROM_INT_S(X,r,rs,rt)  _FP_FROM_INT(S,1,X,r,rs,rt)
diff --git a/arch/powerpc/math-emu/soft-fp.h b/arch/powerpc/math-emu/soft-fp.h
new file mode 100644
index 00000000000..cca39598f87
--- /dev/null
+++ b/arch/powerpc/math-emu/soft-fp.h
@@ -0,0 +1,104 @@
+#ifndef SOFT_FP_H
+#define SOFT_FP_H
+
+#include "sfp-machine.h"
+
+#define _FP_WORKBITS		3
+#define _FP_WORK_LSB		((_FP_W_TYPE)1 << 3)
+#define _FP_WORK_ROUND		((_FP_W_TYPE)1 << 2)
+#define _FP_WORK_GUARD		((_FP_W_TYPE)1 << 1)
+#define _FP_WORK_STICKY		((_FP_W_TYPE)1 << 0)
+
+#ifndef FP_RND_NEAREST
+# define FP_RND_NEAREST		0
+# define FP_RND_ZERO		1
+# define FP_RND_PINF		2
+# define FP_RND_MINF		3
+#ifndef FP_ROUNDMODE
+# define FP_ROUNDMODE		FP_RND_NEAREST
+#endif
+#endif
+
+#define _FP_ROUND_NEAREST(wc, X)			\
+({  int __ret = 0;					\
+    int __frac = _FP_FRAC_LOW_##wc(X) & 15;		\
+    if (__frac & 7) {					\
+      __ret = EFLAG_INEXACT;				\
+      if ((__frac & 7) != _FP_WORK_ROUND)		\
+        _FP_FRAC_ADDI_##wc(X, _FP_WORK_ROUND);		\
+      else if (__frac & _FP_WORK_LSB)			\
+        _FP_FRAC_ADDI_##wc(X, _FP_WORK_ROUND);		\
+    }							\
+    __ret;						\
+})
+
+#define _FP_ROUND_ZERO(wc, X)				\
+({  int __ret = 0;					\
+    if (_FP_FRAC_LOW_##wc(X) & 7)			\
+      __ret = EFLAG_INEXACT;				\
+    __ret;						\
+})
+
+#define _FP_ROUND_PINF(wc, X)				\
+({  int __ret = EFLAG_INEXACT;				\
+    if (!X##_s && (_FP_FRAC_LOW_##wc(X) & 7))		\
+      _FP_FRAC_ADDI_##wc(X, _FP_WORK_LSB);		\
+    else __ret = 0;					\
+    __ret;						\
+})
+
+#define _FP_ROUND_MINF(wc, X)				\
+({  int __ret = EFLAG_INEXACT;				\
+    if (X##_s && (_FP_FRAC_LOW_##wc(X) & 7))		\
+      _FP_FRAC_ADDI_##wc(X, _FP_WORK_LSB);		\
+    else __ret = 0;					\
+    __ret;						\
+})
+
+#define _FP_ROUND(wc, X)			\
+({	int __ret = 0;				\
+	switch (FP_ROUNDMODE)			\
+	{					\
+	  case FP_RND_NEAREST:			\
+	    __ret |= _FP_ROUND_NEAREST(wc,X);	\
+	    break;				\
+	  case FP_RND_ZERO:			\
+	    __ret |= _FP_ROUND_ZERO(wc,X);	\
+	    break;				\
+	  case FP_RND_PINF:			\
+	    __ret |= _FP_ROUND_PINF(wc,X);	\
+	    break;				\
+	  case FP_RND_MINF:			\
+	    __ret |= _FP_ROUND_MINF(wc,X);	\
+	    break;				\
+	};					\
+	__ret;					\
+})
+
+#define FP_CLS_NORMAL		0
+#define FP_CLS_ZERO		1
+#define FP_CLS_INF		2
+#define FP_CLS_NAN		3
+
+#define _FP_CLS_COMBINE(x,y)	(((x) << 2) | (y))
+
+#include "op-1.h"
+#include "op-2.h"
+#include "op-4.h"
+#include "op-common.h"
+
+/* Sigh.  Silly things longlong.h needs.  */
+#define UWtype		_FP_W_TYPE
+#define W_TYPE_SIZE	_FP_W_TYPE_SIZE
+
+typedef int SItype __attribute__((mode(SI)));
+typedef int DItype __attribute__((mode(DI)));
+typedef unsigned int USItype __attribute__((mode(SI)));
+typedef unsigned int UDItype __attribute__((mode(DI)));
+#if _FP_W_TYPE_SIZE == 32
+typedef unsigned int UHWtype __attribute__((mode(HI)));
+#elif _FP_W_TYPE_SIZE == 64
+typedef USItype UHWtype;
+#endif
+
+#endif
diff --git a/arch/powerpc/math-emu/stfd.c b/arch/powerpc/math-emu/stfd.c
new file mode 100644
index 00000000000..3f8c2558a9e
--- /dev/null
+++ b/arch/powerpc/math-emu/stfd.c
@@ -0,0 +1,20 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+int
+stfd(void *frS, void *ea)
+{
+#if 0
+#ifdef DEBUG
+	printk("%s: S %p, ea %p: ", __FUNCTION__, frS, ea);
+	dump_double(frS);
+	printk("\n");
+#endif
+#endif
+
+	if (copy_to_user(ea, frS, sizeof(double)))
+		return -EFAULT;
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/stfiwx.c b/arch/powerpc/math-emu/stfiwx.c
new file mode 100644
index 00000000000..95caaeec6a0
--- /dev/null
+++ b/arch/powerpc/math-emu/stfiwx.c
@@ -0,0 +1,16 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+int
+stfiwx(u32 *frS, void *ea)
+{
+#ifdef DEBUG
+	printk("%s: %p %p\n", __FUNCTION__, frS, ea);
+#endif
+
+	if (copy_to_user(ea, &frS[1], sizeof(frS[1])))
+		return -EFAULT;
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/stfs.c b/arch/powerpc/math-emu/stfs.c
new file mode 100644
index 00000000000..e87ca23c6dc
--- /dev/null
+++ b/arch/powerpc/math-emu/stfs.c
@@ -0,0 +1,41 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+
+int
+stfs(void *frS, void *ea)
+{
+	FP_DECL_D(A);
+	FP_DECL_S(R);
+	float f;
+	int err;
+
+#ifdef DEBUG
+	printk("%s: S %p, ea %p\n", __FUNCTION__, frS, ea);
+#endif
+
+	__FP_UNPACK_D(A, frS);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+#endif
+
+	FP_CONV(S, D, 1, 2, R, A);
+
+#ifdef DEBUG
+	printk("R: %ld %lu %ld (%ld)\n", R_s, R_f, R_e, R_c);
+#endif
+
+	err = _FP_PACK_CANONICAL(S, 1, R);
+	if (!err || !__FPU_TRAP_P(err)) {
+		__FP_PACK_RAW_1(S, &f, R);
+		if (copy_to_user(ea, &f, sizeof(float)))
+			return -EFAULT;
+	}
+
+	return err;
+}
diff --git a/arch/powerpc/math-emu/types.c b/arch/powerpc/math-emu/types.c
new file mode 100644
index 00000000000..e1ed15d829d
--- /dev/null
+++ b/arch/powerpc/math-emu/types.c
@@ -0,0 +1,51 @@
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+
+void
+fp_unpack_d(long *_s, unsigned long *_f1, unsigned long *_f0,
+	    long *_e, long *_c, void *val)
+{
+	FP_DECL_D(X);
+
+	__FP_UNPACK_RAW_2(D, X, val);
+
+	_FP_UNPACK_CANONICAL(D, 2, X);
+
+	*_s = X_s;
+	*_f1 = X_f1;
+	*_f0 = X_f0;
+	*_e = X_e;
+	*_c = X_c;
+}
+
+int
+fp_pack_d(void *val, long X_s, unsigned long X_f1,
+	  unsigned long X_f0, long X_e, long X_c)
+{
+	int exc;
+
+	exc = _FP_PACK_CANONICAL(D, 2, X);
+	if (!exc || !__FPU_TRAP_P(exc))
+		__FP_PACK_RAW_2(D, val, X);
+	return exc;
+}
+
+int
+fp_pack_ds(void *val, long X_s, unsigned long X_f1,
+	   unsigned long X_f0, long X_e, long X_c)
+{
+	FP_DECL_S(__X);
+	int exc;
+
+	FP_CONV(S, D, 1, 2, __X, X);
+	exc = _FP_PACK_CANONICAL(S, 1, __X);
+	if (!exc || !__FPU_TRAP_P(exc)) {
+		_FP_UNPACK_CANONICAL(S, 1, __X);
+		FP_CONV(D, S, 2, 1, X, __X);
+		exc |= _FP_PACK_CANONICAL(D, 2, X);
+		if (!exc || !__FPU_TRAP_P(exc))
+			__FP_PACK_RAW_2(D, val, X);
+	}
+	return exc;
+}
diff --git a/arch/powerpc/math-emu/udivmodti4.c b/arch/powerpc/math-emu/udivmodti4.c
new file mode 100644
index 00000000000..7e112dc1e2f
--- /dev/null
+++ b/arch/powerpc/math-emu/udivmodti4.c
@@ -0,0 +1,191 @@
+/* This has so very few changes over libgcc2's __udivmoddi4 it isn't funny.  */
+
+#include "soft-fp.h"
+
+#undef count_leading_zeros
+#define count_leading_zeros  __FP_CLZ
+
+void
+_fp_udivmodti4(_FP_W_TYPE q[2], _FP_W_TYPE r[2],
+	       _FP_W_TYPE n1, _FP_W_TYPE n0,
+	       _FP_W_TYPE d1, _FP_W_TYPE d0)
+{
+  _FP_W_TYPE q0, q1, r0, r1;
+  _FP_I_TYPE b, bm;
+
+  if (d1 == 0)
+    {
+#if !UDIV_NEEDS_NORMALIZATION
+      if (d0 > n1)
+	{
+	  /* 0q = nn / 0D */
+
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+	  q1 = 0;
+
+	  /* Remainder in n0.  */
+	}
+      else
+	{
+	  /* qq = NN / 0d */
+
+	  if (d0 == 0)
+	    d0 = 1 / d0;	/* Divide intentionally by zero.  */
+
+	  udiv_qrnnd (q1, n1, 0, n1, d0);
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+
+	  /* Remainder in n0.  */
+	}
+
+      r0 = n0;
+      r1 = 0;
+
+#else /* UDIV_NEEDS_NORMALIZATION */
+
+      if (d0 > n1)
+	{
+	  /* 0q = nn / 0D */
+
+	  count_leading_zeros (bm, d0);
+
+	  if (bm != 0)
+	    {
+	      /* Normalize, i.e. make the most significant bit of the
+		 denominator set.  */
+
+	      d0 = d0 << bm;
+	      n1 = (n1 << bm) | (n0 >> (_FP_W_TYPE_SIZE - bm));
+	      n0 = n0 << bm;
+	    }
+
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+	  q1 = 0;
+
+	  /* Remainder in n0 >> bm.  */
+	}
+      else
+	{
+	  /* qq = NN / 0d */
+
+	  if (d0 == 0)
+	    d0 = 1 / d0;	/* Divide intentionally by zero.  */
+
+	  count_leading_zeros (bm, d0);
+
+	  if (bm == 0)
+	    {
+	      /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
+		 conclude (the most significant bit of n1 is set) /\ (the
+		 leading quotient digit q1 = 1).
+
+		 This special case is necessary, not an optimization.
+		 (Shifts counts of SI_TYPE_SIZE are undefined.)  */
+
+	      n1 -= d0;
+	      q1 = 1;
+	    }
+	  else
+	    {
+	      _FP_W_TYPE n2;
+
+	      /* Normalize.  */
+
+	      b = _FP_W_TYPE_SIZE - bm;
+
+	      d0 = d0 << bm;
+	      n2 = n1 >> b;
+	      n1 = (n1 << bm) | (n0 >> b);
+	      n0 = n0 << bm;
+
+	      udiv_qrnnd (q1, n1, n2, n1, d0);
+	    }
+
+	  /* n1 != d0...  */
+
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+
+	  /* Remainder in n0 >> bm.  */
+	}
+
+      r0 = n0 >> bm;
+      r1 = 0;
+#endif /* UDIV_NEEDS_NORMALIZATION */
+    }
+  else
+    {
+      if (d1 > n1)
+	{
+	  /* 00 = nn / DD */
+
+	  q0 = 0;
+	  q1 = 0;
+
+	  /* Remainder in n1n0.  */
+	  r0 = n0;
+	  r1 = n1;
+	}
+      else
+	{
+	  /* 0q = NN / dd */
+
+	  count_leading_zeros (bm, d1);
+	  if (bm == 0)
+	    {
+	      /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
+		 conclude (the most significant bit of n1 is set) /\ (the
+		 quotient digit q0 = 0 or 1).
+
+		 This special case is necessary, not an optimization.  */
+
+	      /* The condition on the next line takes advantage of that
+		 n1 >= d1 (true due to program flow).  */
+	      if (n1 > d1 || n0 >= d0)
+		{
+		  q0 = 1;
+		  sub_ddmmss (n1, n0, n1, n0, d1, d0);
+		}
+	      else
+		q0 = 0;
+
+	      q1 = 0;
+
+	      r0 = n0;
+	      r1 = n1;
+	    }
+	  else
+	    {
+	      _FP_W_TYPE m1, m0, n2;
+
+	      /* Normalize.  */
+
+	      b = _FP_W_TYPE_SIZE - bm;
+
+	      d1 = (d1 << bm) | (d0 >> b);
+	      d0 = d0 << bm;
+	      n2 = n1 >> b;
+	      n1 = (n1 << bm) | (n0 >> b);
+	      n0 = n0 << bm;
+
+	      udiv_qrnnd (q0, n1, n2, n1, d1);
+	      umul_ppmm (m1, m0, q0, d0);
+
+	      if (m1 > n1 || (m1 == n1 && m0 > n0))
+		{
+		  q0--;
+		  sub_ddmmss (m1, m0, m1, m0, d1, d0);
+		}
+
+	      q1 = 0;
+
+	      /* Remainder in (n1n0 - m1m0) >> bm.  */
+	      sub_ddmmss (n1, n0, n1, n0, m1, m0);
+	      r0 = (n1 << b) | (n0 >> bm);
+	      r1 = n1 >> bm;
+	    }
+	}
+    }
+
+  q[0] = q0; q[1] = q1;
+  r[0] = r0, r[1] = r1;
+}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ec4adcb4bc2..5aea0909a5e 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -267,25 +267,29 @@ good_area:
 #endif
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
 		pte_t *ptep;
+		pmd_t *pmdp;
 
 		/* Since 4xx/Book-E supports per-page execute permission,
 		 * we lazily flush dcache to icache. */
 		ptep = NULL;
-		if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) {
-			struct page *page = pte_page(*ptep);
-
-			if (! test_bit(PG_arch_1, &page->flags)) {
-				flush_dcache_icache_page(page);
-				set_bit(PG_arch_1, &page->flags);
+		if (get_pteptr(mm, address, &ptep, &pmdp)) {
+			spinlock_t *ptl = pte_lockptr(mm, pmdp);
+			spin_lock(ptl);
+			if (pte_present(*ptep)) {
+				struct page *page = pte_page(*ptep);
+
+				if (!test_bit(PG_arch_1, &page->flags)) {
+					flush_dcache_icache_page(page);
+					set_bit(PG_arch_1, &page->flags);
+				}
+				pte_update(ptep, 0, _PAGE_HWEXEC);
+				_tlbie(address);
+				pte_unmap_unlock(ptep, ptl);
+				up_read(&mm->mmap_sem);
+				return 0;
 			}
-			pte_update(ptep, 0, _PAGE_HWEXEC);
-			_tlbie(address);
-			pte_unmap(ptep);
-			up_read(&mm->mmap_sem);
-			return 0;
+			pte_unmap_unlock(ptep, ptl);
 		}
-		if (ptep != NULL)
-			pte_unmap(ptep);
 #endif
 	/* a write */
 	} else if (is_write) {
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 89b35c18131..c006d903963 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -167,7 +167,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 		 * normal insert callback here.
 		 */
 #ifdef CONFIG_PPC_ISERIES
-		if (_machine == PLATFORM_ISERIES_LPAR)
+		if (machine_is(iseries))
 			ret = iSeries_hpte_insert(hpteg, va,
 						  paddr,
 						  tmp_mode,
@@ -176,7 +176,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 		else
 #endif
 #ifdef CONFIG_PPC_PSERIES
-		if (_machine & PLATFORM_LPAR)
+		if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR))
 			ret = pSeries_lpar_hpte_insert(hpteg, va,
 						       paddr,
 						       tmp_mode,
@@ -295,8 +295,7 @@ static void __init htab_init_page_sizes(void)
 	 * Not in the device-tree, let's fallback on known size
 	 * list for 16M capable GP & GR
 	 */
-	if ((_machine != PLATFORM_ISERIES_LPAR) &&
-	    cpu_has_feature(CPU_FTR_16M_PAGE))
+	if (cpu_has_feature(CPU_FTR_16M_PAGE) && !machine_is(iseries))
 		memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
 		       sizeof(mmu_psize_defaults_gp));
  found:
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5e435a9c343..741dd8802d4 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -342,7 +342,7 @@ void __init mem_init(void)
 #ifdef CONFIG_NEED_MULTIPLE_NODES
         for_each_online_node(nid) {
 		if (NODE_DATA(nid)->node_spanned_pages != 0) {
-			printk("freeing bootmem node %x\n", nid);
+			printk("freeing bootmem node %d\n", nid);
 			totalram_pages +=
 				free_all_bootmem_node(NODE_DATA(nid));
 		}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index e89b22aa539..0a335f34974 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -756,6 +756,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
 	struct device_node *memory = NULL;
 	nodemask_t nodes;
 	int default_nid = any_online_node(NODE_MASK_ALL);
+	int nid;
 
 	if (!numa_enabled || (min_common_depth < 0))
 		return default_nid;
@@ -790,6 +791,7 @@ ha_new_range:
 			goto ha_new_range;
 	}
 	BUG();	/* section address should be found above */
+	return 0;
 
 	/* Temporary code to ensure that returned node is not empty */
 got_nid:
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index d296eb6b454..90628601fac 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -372,7 +372,7 @@ void __init io_block_mapping(unsigned long virt, phys_addr_t phys,
  * the PTE pointer is unmodified if PTE is not found.
  */
 int
-get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
+get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp)
 {
         pgd_t	*pgd;
         pmd_t	*pmd;
@@ -387,6 +387,8 @@ get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
                         if (pte) {
 				retval = 1;
 				*ptep = pte;
+				if (pmdp)
+					*pmdp = pmd;
 				/* XXX caller needs to do pte_unmap, yuck */
                         }
                 }
@@ -424,7 +426,7 @@ unsigned long iopa(unsigned long addr)
 		mm = &init_mm;
 
 	pa = 0;
-	if (get_pteptr(mm, addr, &pte)) {
+	if (get_pteptr(mm, addr, &pte, NULL)) {
 		pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK);
 		pte_unmap(pte);
 	}
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 91d25fb27f8..4a9291d9fef 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -239,7 +239,7 @@ void stabs_alloc(void)
 	if (cpu_has_feature(CPU_FTR_SLB))
 		return;
 
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		unsigned long newstab;
 
 		if (cpu == 0)
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile
index 554cd7c7532..f5f9859a833 100644
--- a/arch/powerpc/oprofile/Makefile
+++ b/arch/powerpc/oprofile/Makefile
@@ -6,7 +6,7 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
 		oprofilefs.o oprofile_stats.o \
 		timer_int.o )
 
-oprofile-y := $(DRIVER_OBJS) common.o
+oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
 oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o
 oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o
 oprofile-$(CONFIG_PPC32) += op_model_7450.o
diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c
new file mode 100644
index 00000000000..75f57bc96b4
--- /dev/null
+++ b/arch/powerpc/oprofile/backtrace.c
@@ -0,0 +1,126 @@
+/**
+ * Copyright (C) 2005 Brian Rogan <bcr6@cornell.edu>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+**/
+
+#include <linux/oprofile.h>
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+
+#define STACK_SP(STACK)		*(STACK)
+
+#define STACK_LR64(STACK)	*((unsigned long *)(STACK) + 2)
+#define STACK_LR32(STACK)	*((unsigned int *)(STACK) + 1)
+
+#ifdef CONFIG_PPC64
+#define STACK_LR(STACK)		STACK_LR64(STACK)
+#else
+#define STACK_LR(STACK)		STACK_LR32(STACK)
+#endif
+
+static unsigned int user_getsp32(unsigned int sp, int is_first)
+{
+	unsigned int stack_frame[2];
+
+	if (!access_ok(VERIFY_READ, sp, sizeof(stack_frame)))
+		return 0;
+
+	/*
+	 * The most likely reason for this is that we returned -EFAULT,
+	 * which means that we've done all that we can do from
+	 * interrupt context.
+	 */
+	if (__copy_from_user_inatomic(stack_frame, (void *)(long)sp,
+					sizeof(stack_frame)))
+		return 0;
+
+	if (!is_first)
+		oprofile_add_trace(STACK_LR32(stack_frame));
+
+	/*
+	 * We do not enforce increasing stack addresses here because
+	 * we may transition to a different stack, eg a signal handler.
+	 */
+	return STACK_SP(stack_frame);
+}
+
+#ifdef CONFIG_PPC64
+static unsigned long user_getsp64(unsigned long sp, int is_first)
+{
+	unsigned long stack_frame[3];
+
+	if (!access_ok(VERIFY_READ, sp, sizeof(stack_frame)))
+		return 0;
+
+	if (__copy_from_user_inatomic(stack_frame, (void *)sp,
+					sizeof(stack_frame)))
+		return 0;
+
+	if (!is_first)
+		oprofile_add_trace(STACK_LR64(stack_frame));
+
+	return STACK_SP(stack_frame);
+}
+#endif
+
+static unsigned long kernel_getsp(unsigned long sp, int is_first)
+{
+	unsigned long *stack_frame = (unsigned long *)sp;
+
+	if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+		return 0;
+
+	if (!is_first)
+		oprofile_add_trace(STACK_LR(stack_frame));
+
+	/*
+	 * We do not enforce increasing stack addresses here because
+	 * we might be transitioning from an interrupt stack to a kernel
+	 * stack. validate_sp() is designed to understand this, so just
+	 * use it.
+	 */
+	return STACK_SP(stack_frame);
+}
+
+void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth)
+{
+	unsigned long sp = regs->gpr[1];
+	int first_frame = 1;
+
+	/* We ditch the top stackframe so need to loop through an extra time */
+	depth += 1;
+
+	if (!user_mode(regs)) {
+		while (depth--) {
+			sp = kernel_getsp(sp, first_frame);
+			if (!sp)
+				break;
+			first_frame = 0;
+		}
+	} else {
+#ifdef CONFIG_PPC64
+		if (!test_thread_flag(TIF_32BIT)) {
+			while (depth--) {
+				sp = user_getsp64(sp, first_frame);
+				if (!sp)
+					break;
+				first_frame = 0;
+			}
+
+			return;
+		}
+#endif
+
+		while (depth--) {
+			sp = user_getsp32(sp, first_frame);
+			if (!sp)
+				break;
+			first_frame = 0;
+		}
+	}
+}
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index cc2535be3a7..5b1de7e8041 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -117,18 +117,10 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root)
 
 	oprofilefs_create_ulong(sb, root, "enable_kernel", &sys.enable_kernel);
 	oprofilefs_create_ulong(sb, root, "enable_user", &sys.enable_user);
-#ifdef CONFIG_PPC64
-	oprofilefs_create_ulong(sb, root, "backtrace_spinlocks",
-				&sys.backtrace_spinlocks);
-#endif
 
 	/* Default to tracing both kernel and user */
 	sys.enable_kernel = 1;
 	sys.enable_user = 1;
-#ifdef CONFIG_PPC64
-	/* Turn on backtracing through spinlocks by default */
-	sys.backtrace_spinlocks = 1;
-#endif
 
 	return 0;
 }
@@ -168,6 +160,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	ops->shutdown = op_powerpc_shutdown;
 	ops->start = op_powerpc_start;
 	ops->stop = op_powerpc_stop;
+	ops->backtrace = op_powerpc_backtrace;
 
 	printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
 	       ops->cpu_type);
diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c
index 32abfdbb0eb..e0491c3c71f 100644
--- a/arch/powerpc/oprofile/op_model_7450.c
+++ b/arch/powerpc/oprofile/op_model_7450.c
@@ -176,13 +176,13 @@ static void fsl7450_handle_interrupt(struct pt_regs *regs,
 	mtmsr(mfmsr() | MSR_PMM);
 
 	pc = mfspr(SPRN_SIAR);
-	is_kernel = (pc >= KERNELBASE);
+	is_kernel = is_kernel_addr(pc);
 
 	for (i = 0; i < NUM_CTRS; ++i) {
 		val = ctr_read(i);
 		if (val < 0) {
 			if (oprofile_running && ctr[i].enabled) {
-				oprofile_add_pc(pc, is_kernel, i);
+				oprofile_add_ext_sample(pc, regs, i, is_kernel);
 				ctr_write(i, reset_value[i]);
 			} else {
 				ctr_write(i, 0);
diff --git a/arch/powerpc/oprofile/op_model_fsl_booke.c b/arch/powerpc/oprofile/op_model_fsl_booke.c
index 26539cda602..93d63e62662 100644
--- a/arch/powerpc/oprofile/op_model_fsl_booke.c
+++ b/arch/powerpc/oprofile/op_model_fsl_booke.c
@@ -154,13 +154,13 @@ static void fsl_booke_handle_interrupt(struct pt_regs *regs,
 	mtmsr(mfmsr() | MSR_PMM);
 
 	pc = regs->nip;
-	is_kernel = (pc >= KERNELBASE);
+	is_kernel = is_kernel_addr(pc);
 
 	for (i = 0; i < num_counters; ++i) {
 		val = ctr_read(i);
 		if (val < 0) {
 			if (oprofile_running && ctr[i].enabled) {
-				oprofile_add_pc(pc, is_kernel, i);
+				oprofile_add_ext_sample(pc, regs, i, is_kernel);
 				ctr_write(i, reset_value[i]);
 			} else {
 				ctr_write(i, 0);
diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c
index 4b06e53eb9b..4c2beab1fdc 100644
--- a/arch/powerpc/oprofile/op_model_power4.c
+++ b/arch/powerpc/oprofile/op_model_power4.c
@@ -25,18 +25,14 @@ static unsigned long reset_value[OP_MAX_COUNTER];
 
 static int oprofile_running;
 static int mmcra_has_sihv;
+/* Unfortunately these bits vary between CPUs */
+static unsigned long mmcra_sihv = MMCRA_SIHV;
+static unsigned long mmcra_sipr = MMCRA_SIPR;
 
 /* mmcr values are set in power4_reg_setup, used in power4_cpu_setup */
 static u32 mmcr0_val;
 static u64 mmcr1_val;
-static u32 mmcra_val;
-
-/*
- * Since we do not have an NMI, backtracing through spinlocks is
- * only a best guess. In light of this, allow it to be disabled at
- * runtime.
- */
-static int backtrace_spinlocks;
+static u64 mmcra_val;
 
 static void power4_reg_setup(struct op_counter_config *ctr,
 			     struct op_system_config *sys,
@@ -63,8 +59,6 @@ static void power4_reg_setup(struct op_counter_config *ctr,
 	mmcr1_val = sys->mmcr1;
 	mmcra_val = sys->mmcra;
 
-	backtrace_spinlocks = sys->backtrace_spinlocks;
-
 	for (i = 0; i < cur_cpu_spec->num_pmcs; ++i)
 		reset_value[i] = 0x80000000UL - ctr[i].count;
 
@@ -197,25 +191,6 @@ static void __attribute_used__ kernel_unknown_bucket(void)
 {
 }
 
-static unsigned long check_spinlock_pc(struct pt_regs *regs,
-				       unsigned long profile_pc)
-{
-	unsigned long pc = instruction_pointer(regs);
-
-	/*
-	 * If both the SIAR (sampled instruction) and the perfmon exception
-	 * occurred in a spinlock region then we account the sample to the
-	 * calling function. This isnt 100% correct, we really need soft
-	 * IRQ disable so we always get the perfmon exception at the
-	 * point at which the SIAR is set.
-	 */
-	if (backtrace_spinlocks && in_lock_functions(pc) &&
-			in_lock_functions(profile_pc))
-		return regs->link;
-	else
-		return profile_pc;
-}
-
 /*
  * On GQ and newer the MMCRA stores the HV and PR bits at the time
  * the SIAR was sampled. We use that to work out if the SIAR was sampled in
@@ -228,17 +203,17 @@ static unsigned long get_pc(struct pt_regs *regs)
 
 	/* Cant do much about it */
 	if (!mmcra_has_sihv)
-		return check_spinlock_pc(regs, pc);
+		return pc;
 
 	mmcra = mfspr(SPRN_MMCRA);
 
 	/* Were we in the hypervisor? */
-	if (firmware_has_feature(FW_FEATURE_LPAR) && (mmcra & MMCRA_SIHV))
+	if (firmware_has_feature(FW_FEATURE_LPAR) && (mmcra & mmcra_sihv))
 		/* function descriptor madness */
 		return *((unsigned long *)hypervisor_bucket);
 
 	/* We were in userspace, nothing to do */
-	if (mmcra & MMCRA_SIPR)
+	if (mmcra & mmcra_sipr)
 		return pc;
 
 #ifdef CONFIG_PPC_RTAS
@@ -257,7 +232,7 @@ static unsigned long get_pc(struct pt_regs *regs)
 		/* function descriptor madness */
 		return *((unsigned long *)kernel_unknown_bucket);
 
-	return check_spinlock_pc(regs, pc);
+	return pc;
 }
 
 static int get_kernel(unsigned long pc)
@@ -268,7 +243,7 @@ static int get_kernel(unsigned long pc)
 		is_kernel = is_kernel_addr(pc);
 	} else {
 		unsigned long mmcra = mfspr(SPRN_MMCRA);
-		is_kernel = ((mmcra & MMCRA_SIPR) == 0);
+		is_kernel = ((mmcra & mmcra_sipr) == 0);
 	}
 
 	return is_kernel;
@@ -293,7 +268,7 @@ static void power4_handle_interrupt(struct pt_regs *regs,
 		val = ctr_read(i);
 		if (val < 0) {
 			if (oprofile_running && ctr[i].enabled) {
-				oprofile_add_pc(pc, is_kernel, i);
+				oprofile_add_ext_sample(pc, regs, i, is_kernel);
 				ctr_write(i, reset_value[i]);
 			} else {
 				ctr_write(i, 0);
diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c
index 5c909ee609f..042f8f4867a 100644
--- a/arch/powerpc/oprofile/op_model_rs64.c
+++ b/arch/powerpc/oprofile/op_model_rs64.c
@@ -175,10 +175,13 @@ static void rs64_handle_interrupt(struct pt_regs *regs,
 				  struct op_counter_config *ctr)
 {
 	unsigned int mmcr0;
+	int is_kernel;
 	int val;
 	int i;
 	unsigned long pc = mfspr(SPRN_SIAR);
 
+	is_kernel = is_kernel_addr(pc);
+
 	/* set the PMM bit (see comment below) */
 	mtmsrd(mfmsr() | MSR_PMM);
 
@@ -186,7 +189,7 @@ static void rs64_handle_interrupt(struct pt_regs *regs,
 		val = ctr_read(i);
 		if (val < 0) {
 			if (ctr[i].enabled) {
-				oprofile_add_pc(pc, is_kernel_addr(pc), i);
+				oprofile_add_ext_sample(pc, regs, i, is_kernel);
 				ctr_write(i, reset_value[i]);
 			} else {
 				ctr_write(i, 0);
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index d3d0ff745e8..06e371282f5 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -7,6 +7,7 @@ choice
 
 config MPC8540_ADS
 	bool "Freescale MPC8540 ADS"
+	select DEFAULT_UIMAGE
 	help
 	  This option enables support for the MPC 8540 ADS board
 
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 3157071e241..c2a3db8edb0 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -10,4 +10,9 @@ config SPU_FS
 	  Units on machines implementing the Broadband Processor
 	  Architecture.
 
+config SPUFS_MMAP
+	bool
+	depends on SPU_FS && SPARSEMEM && !PPC_64K_PAGES
+	default y
+
 endmenu
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index 3b998a393e3..e570bad0639 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -6,5 +6,11 @@ obj-$(CONFIG_SPU_FS)	+= spu-base.o spufs/
 
 spu-base-y		+= spu_base.o spu_priv1.o
 
-builtin-spufs-$(CONFIG_SPU_FS)	+= spu_syscalls.o
-obj-y			+= $(builtin-spufs-m)
+# needed only when building loadable spufs.ko
+spufs-modular-$(CONFIG_SPU_FS) += spu_syscalls.o
+obj-y			+= $(spufs-modular-m)
+
+# always needed in kernel
+spufs-builtin-$(CONFIG_SPU_FS) += spu_callbacks.o
+obj-y			+= $(spufs-builtin-y) $(spufs-builtin-m)
+
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 63aa52acf44..978be1c30c1 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -63,7 +63,24 @@ static DEFINE_PER_CPU(struct iic, iic);
 
 void iic_local_enable(void)
 {
-	out_be64(&__get_cpu_var(iic).regs->prio, 0xff);
+	struct iic *iic = &__get_cpu_var(iic);
+	u64 tmp;
+
+	/*
+	 * There seems to be a bug that is present in DD2.x CPUs
+	 * and still only partially fixed in DD3.1.
+	 * This bug causes a value written to the priority register
+	 * not to make it there, resulting in a system hang unless we
+	 * write it again.
+	 * Masking with 0xf0 is done because the Cell BE does not
+	 * implement the lower four bits of the interrupt priority,
+	 * they always read back as zeroes, although future CPUs
+	 * might implement different bits.
+	 */
+	do {
+		out_be64(&iic->regs->prio, 0xff);
+		tmp = in_be64(&iic->regs->prio);
+	} while ((tmp & 0xf0) != 0xf0);
 }
 
 void iic_local_disable(void)
@@ -123,7 +140,7 @@ static int iic_external_get_irq(struct iic_pending_bits pending)
 		    pending.class != 2)
 			break;
 		irq = IIC_EXT_OFFSET
-			+ spider_get_irq(pending.prio + node * IIC_NODE_STRIDE)
+			+ spider_get_irq(node)
 			+ node * IIC_NODE_STRIDE;
 		break;
 	case 0x01 ... 0x04:
@@ -174,38 +191,98 @@ int iic_get_irq(struct pt_regs *regs)
 	return irq;
 }
 
-static int setup_iic(int cpu, struct iic *iic)
+/* hardcoded part to be compatible with older firmware */
+
+static int setup_iic_hardcoded(void)
 {
 	struct device_node *np;
-	int nodeid = cpu / 2;
+	int nodeid, cpu;
 	unsigned long regs;
+	struct iic *iic;
 
-	for (np = of_find_node_by_type(NULL, "cpu");
-	     np;
-	     np = of_find_node_by_type(np, "cpu")) {
-		if (nodeid == *(int *)get_property(np, "node-id", NULL))
-			break;
+	for_each_cpu(cpu) {
+		iic = &per_cpu(iic, cpu);
+		nodeid = cpu/2;
+
+		for (np = of_find_node_by_type(NULL, "cpu");
+		     np;
+		     np = of_find_node_by_type(np, "cpu")) {
+			if (nodeid == *(int *)get_property(np, "node-id", NULL))
+				break;
+			}
+
+		if (!np) {
+			printk(KERN_WARNING "IIC: CPU %d not found\n", cpu);
+			iic->regs = NULL;
+			iic->target_id = 0xff;
+			return -ENODEV;
+			}
+
+		regs = *(long *)get_property(np, "iic", NULL);
+
+		/* hack until we have decided on the devtree info */
+		regs += 0x400;
+		if (cpu & 1)
+			regs += 0x20;
+
+		printk(KERN_INFO "IIC for CPU %d at %lx\n", cpu, regs);
+		iic->regs = ioremap(regs, sizeof(struct iic_regs));
+		iic->target_id = (nodeid << 4) + ((cpu & 1) ? 0xf : 0xe);
 	}
 
-	if (!np) {
-		printk(KERN_WARNING "IIC: CPU %d not found\n", cpu);
-		iic->regs = NULL;
-		iic->target_id = 0xff;
-		return -ENODEV;
-	}
+	return 0;
+}
 
-	regs = *(long *)get_property(np, "iic", NULL);
+static int setup_iic(void)
+{
+	struct device_node *dn;
+	unsigned long *regs;
+	char *compatible;
+ 	unsigned *np, found = 0;
+	struct iic *iic = NULL;
+
+	for (dn = NULL; (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+		compatible = (char *)get_property(dn, "compatible", NULL);
+
+		if (!compatible) {
+			printk(KERN_WARNING "no compatible property found !\n");
+			continue;
+		}
 
-	/* hack until we have decided on the devtree info */
-	regs += 0x400;
-	if (cpu & 1)
-		regs += 0x20;
+ 		if (strstr(compatible, "IBM,CBEA-Internal-Interrupt-Controller"))
+ 			regs = (unsigned long *)get_property(dn,"reg", NULL);
+ 		else
+			continue;
 
-	printk(KERN_DEBUG "IIC for CPU %d at %lx\n", cpu, regs);
-	iic->regs = __ioremap(regs, sizeof(struct iic_regs),
-					 _PAGE_NO_CACHE);
-	iic->target_id = (nodeid << 4) + ((cpu & 1) ? 0xf : 0xe);
-	return 0;
+ 		if (!regs)
+ 			printk(KERN_WARNING "IIC: no reg property\n");
+
+ 		np = (unsigned int *)get_property(dn, "ibm,interrupt-server-ranges", NULL);
+
+ 		if (!np) {
+			printk(KERN_WARNING "IIC: CPU association not found\n");
+			iic->regs = NULL;
+			iic->target_id = 0xff;
+			return -ENODEV;
+		}
+
+ 		iic = &per_cpu(iic, np[0]);
+ 		iic->regs = ioremap(regs[0], sizeof(struct iic_regs));
+		iic->target_id = ((np[0] & 2) << 3) + ((np[0] & 1) ? 0xf : 0xe);
+ 		printk("IIC for CPU %d at %lx mapped to %p\n", np[0], regs[0], iic->regs);
+
+ 		iic = &per_cpu(iic, np[1]);
+ 		iic->regs = ioremap(regs[2], sizeof(struct iic_regs));
+		iic->target_id = ((np[1] & 2) << 3) + ((np[1] & 1) ? 0xf : 0xe);
+ 		printk("IIC for CPU %d at %lx mapped to %p\n", np[1], regs[2], iic->regs);
+
+		found++;
+  	}
+
+	if (found)
+		return 0;
+	else
+		return -ENODEV;
 }
 
 #ifdef CONFIG_SMP
@@ -283,10 +360,12 @@ void iic_init_IRQ(void)
 	int cpu, irq_offset;
 	struct iic *iic;
 
+	if (setup_iic() < 0)
+		setup_iic_hardcoded();
+
 	irq_offset = 0;
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		iic = &per_cpu(iic, cpu);
-		setup_iic(cpu, iic);
 		if (iic->regs)
 			out_be64(&iic->regs->prio, 0xff);
 	}
diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h
index a14bd38791c..799f77d98f9 100644
--- a/arch/powerpc/platforms/cell/interrupt.h
+++ b/arch/powerpc/platforms/cell/interrupt.h
@@ -57,7 +57,7 @@ extern void iic_local_disable(void);
 extern u8 iic_get_target_id(int cpu);
 
 extern void spider_init_IRQ(void);
-extern int spider_get_irq(unsigned long int_pending);
+extern int spider_get_irq(int node);
 
 #endif
 #endif /* ASM_CELL_PIC_H */
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 46e7cb9c3e6..a49ceb799a8 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -289,7 +289,7 @@ static void cell_do_map_iommu(struct cell_iommu *iommu,
 	ioc_base = iommu->mapped_base;
 	ioc_mmio_base = iommu->mapped_mmio_base;
 
-	for (real_address = 0, io_address = 0;
+	for (real_address = 0, io_address = map_start;
 	     io_address <= map_start + map_size;
 	     real_address += io_page_size, io_address += io_page_size) {
 		ioste = get_iost_entry(fake_iopt, io_address, io_page_size);
@@ -302,7 +302,7 @@ static void cell_do_map_iommu(struct cell_iommu *iommu,
 		set_iopt_cache(ioc_mmio_base,
 			get_ioc_hash_1way(ioste, io_address),
 			get_ioc_tag(ioste, io_address),
-			get_iopt_entry(real_address-map_start, ioid, IOPT_PROT_RW));
+			get_iopt_entry(real_address, ioid, IOPT_PROT_RW));
 	}
 }
 
@@ -344,8 +344,8 @@ static int cell_map_iommu_hardcoded(int num_nodes)
 
 	/* node 0 */
 	iommu = &cell_iommus[0];
-	iommu->mapped_base = __ioremap(0x20000511000, 0x1000, _PAGE_NO_CACHE);
-	iommu->mapped_mmio_base = __ioremap(0x20000510000, 0x1000, _PAGE_NO_CACHE);
+	iommu->mapped_base = ioremap(0x20000511000, 0x1000);
+	iommu->mapped_mmio_base = ioremap(0x20000510000, 0x1000);
 
 	enable_mapping(iommu->mapped_base, iommu->mapped_mmio_base);
 
@@ -357,8 +357,8 @@ static int cell_map_iommu_hardcoded(int num_nodes)
 
 	/* node 1 */
 	iommu = &cell_iommus[1];
-	iommu->mapped_base = __ioremap(0x30000511000, 0x1000, _PAGE_NO_CACHE);
-	iommu->mapped_mmio_base = __ioremap(0x30000510000, 0x1000, _PAGE_NO_CACHE);
+	iommu->mapped_base = ioremap(0x30000511000, 0x1000);
+	iommu->mapped_mmio_base = ioremap(0x30000510000, 0x1000);
 
 	enable_mapping(iommu->mapped_base, iommu->mapped_mmio_base);
 
@@ -407,8 +407,8 @@ static int cell_map_iommu(void)
 		iommu->base = *base;
 		iommu->mmio_base = *mmio_base;
 
-		iommu->mapped_base = __ioremap(*base, 0x1000, _PAGE_NO_CACHE);
-		iommu->mapped_mmio_base = __ioremap(*mmio_base, 0x1000, _PAGE_NO_CACHE);
+		iommu->mapped_base = ioremap(*base, 0x1000);
+		iommu->mapped_mmio_base = ioremap(*mmio_base, 0x1000);
 
 		enable_mapping(iommu->mapped_base,
 			       iommu->mapped_mmio_base);
diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c
index e0e051c675d..7eed8c62451 100644
--- a/arch/powerpc/platforms/cell/pervasive.c
+++ b/arch/powerpc/platforms/cell/pervasive.c
@@ -203,7 +203,7 @@ found:
 
 	pr_debug("pervasive area for CPU %d at %lx, size %x\n",
 			cpu, real_address, size);
-	p->regs = __ioremap(real_address, size, _PAGE_NO_CACHE);
+	p->regs = ioremap(real_address, size);
 	p->thread = thread;
 	return 0;
 }
@@ -217,7 +217,7 @@ void __init cell_pervasive_init(void)
 	if (!cpu_has_feature(CPU_FTR_PAUSE_ZERO))
 		return;
 
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		p = &cbe_pervasive[cpu];
 		ret = cbe_find_pmd_mmio(cpu, p);
 		if (ret)
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index fec8e65b36e..dac5d0365fd 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -195,9 +195,13 @@ static void __init cell_init_early(void)
 }
 
 
-static int __init cell_probe(int platform)
+static int __init cell_probe(void)
 {
-	if (platform != PLATFORM_CELL)
+	/* XXX This is temporary, the Cell maintainer will come up with
+	 * more appropriate detection logic
+	 */
+	unsigned long root = of_get_flat_dt_root();
+	if (!of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
 		return 0;
 
 	return 1;
@@ -212,7 +216,8 @@ static int cell_check_legacy_ioport(unsigned int baseport)
 	return -ENODEV;
 }
 
-struct machdep_calls __initdata cell_md = {
+define_machine(cell) {
+	.name			= "Cell",
 	.probe			= cell_probe,
 	.setup_arch		= cell_setup_arch,
 	.init_early		= cell_init_early,
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index e74132188bd..55cbdd77a62 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -84,10 +84,11 @@ static void __iomem *spider_get_irq_config(int irq)
 
 static void spider_enable_irq(unsigned int irq)
 {
+	int nodeid = (irq / IIC_NODE_STRIDE) * 0x10;
 	void __iomem *cfg = spider_get_irq_config(irq);
 	irq = spider_get_nr(irq);
 
-	out_be32(cfg, in_be32(cfg) | 0x3107000eu);
+	out_be32(cfg, (in_be32(cfg) & ~0xf0)| 0x3107000eu | nodeid);
 	out_be32(cfg + 4, in_be32(cfg + 4) | 0x00020000u | irq);
 }
 
@@ -131,61 +132,108 @@ static struct hw_interrupt_type spider_pic = {
 	.end = spider_end_irq,
 };
 
-
-int spider_get_irq(unsigned long int_pending)
+int spider_get_irq(int node)
 {
-	void __iomem *regs = spider_get_pic(int_pending);
 	unsigned long cs;
-	int irq;
-
-	cs = in_be32(regs + TIR_CS);
+	void __iomem *regs = spider_pics[node];
 
-	irq = cs >> 24;
-	if (irq != 63)
-		return irq;
+	cs = in_be32(regs + TIR_CS) >> 24;
 
-	return -1;
+	if (cs == 63)
+		return -1;
+	else
+		return cs;
 }
- 
-void spider_init_IRQ(void)
+
+/* hardcoded part to be compatible with older firmware */
+
+void spider_init_IRQ_hardcoded(void)
 {
 	int node;
-	struct device_node *dn;
-	unsigned int *property;
 	long spiderpic;
+	long pics[] = { 0x24000008000, 0x34000008000 };
 	int n;
 
-/* FIXME: detect multiple PICs as soon as the device tree has them */
-	for (node = 0; node < 1; node++) {
-		dn = of_find_node_by_path("/");
-		n = prom_n_addr_cells(dn);
-		property = (unsigned int *) get_property(dn,
-				"platform-spider-pic", NULL);
+	pr_debug("%s(%d): Using hardcoded defaults\n", __FUNCTION__, __LINE__);
 
-		if (!property)
-			continue;
-		for (spiderpic = 0; n > 0; --n)
-			spiderpic = (spiderpic << 32) + *property++;
+	for (node = 0; node < num_present_cpus()/2; node++) {
+		spiderpic = pics[node];
 		printk(KERN_DEBUG "SPIDER addr: %lx\n", spiderpic);
-		spider_pics[node] = __ioremap(spiderpic, 0x800, _PAGE_NO_CACHE);
+		spider_pics[node] = ioremap(spiderpic, 0x800);
 		for (n = 0; n < IIC_NUM_EXT; n++) {
 			int irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE;
 			get_irq_desc(irq)->handler = &spider_pic;
+		}
 
  		/* do not mask any interrupts because of level */
  		out_be32(spider_pics[node] + TIR_MSK, 0x0);
- 		
+
  		/* disable edge detection clear */
  		/* out_be32(spider_pics[node] + TIR_EDC, 0x0); */
- 		
+
  		/* enable interrupt packets to be output */
  		out_be32(spider_pics[node] + TIR_PIEN,
 			in_be32(spider_pics[node] + TIR_PIEN) | 0x1);
- 		
+
  		/* Enable the interrupt detection enable bit. Do this last! */
  		out_be32(spider_pics[node] + TIR_DEN,
-			in_be32(spider_pics[node] +TIR_DEN) | 0x1);
+			in_be32(spider_pics[node] + TIR_DEN) | 0x1);
+	}
+}
+
+void spider_init_IRQ(void)
+{
+	long spider_reg;
+	struct device_node *dn;
+	char *compatible;
+	int n, node = 0;
+
+	for (dn = NULL; (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+		compatible = (char *)get_property(dn, "compatible", NULL);
 
+		if (!compatible)
+			continue;
+
+		if (strstr(compatible, "CBEA,platform-spider-pic"))
+			spider_reg = *(long *)get_property(dn,"reg", NULL);
+		else if (strstr(compatible, "sti,platform-spider-pic")) {
+			spider_init_IRQ_hardcoded();
+			return;
+		} else
+			continue;
+
+		if (!spider_reg)
+			printk("interrupt controller does not have reg property !\n");
+
+		n = prom_n_addr_cells(dn);
+
+		if ( n != 2)
+			printk("reg property with invalid number of elements \n");
+
+		spider_pics[node] = ioremap(spider_reg, 0x800);
+
+		printk("SPIDER addr: %lx with %i addr_cells mapped to %p\n",
+		       spider_reg, n, spider_pics[node]);
+
+		for (n = 0; n < IIC_NUM_EXT; n++) {
+			int irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE;
+			get_irq_desc(irq)->handler = &spider_pic;
 		}
+
+		/* do not mask any interrupts because of level */
+		out_be32(spider_pics[node] + TIR_MSK, 0x0);
+
+		/* disable edge detection clear */
+		/* out_be32(spider_pics[node] + TIR_EDC, 0x0); */
+
+		/* enable interrupt packets to be output */
+		out_be32(spider_pics[node] + TIR_PIEN,
+			in_be32(spider_pics[node] + TIR_PIEN) | 0x1);
+
+		/* Enable the interrupt detection enable bit. Do this last! */
+		out_be32(spider_pics[node] + TIR_DEN,
+			in_be32(spider_pics[node] + TIR_DEN) | 0x1);
+
+		node++;
 	}
 }
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index a8fa1eeeb17..269dda4fd0b 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -111,7 +111,7 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
 extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX
 static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
 {
-	pr_debug("%s\n", __FUNCTION__);
+	pr_debug("%s, %lx, %lx\n", __FUNCTION__, dsisr, ea);
 
 	/* Handle kernel space hash faults immediately.
 	   User hash faults need to be deferred to process context. */
@@ -168,7 +168,7 @@ static int __spu_trap_halt(struct spu *spu)
 static int __spu_trap_tag_group(struct spu *spu)
 {
 	pr_debug("%s\n", __FUNCTION__);
-	/* wake_up(&spu->dma_wq); */
+	spu->mfc_callback(spu);
 	return 0;
 }
 
@@ -242,6 +242,8 @@ spu_irq_class_1(int irq, void *data, struct pt_regs *regs)
 		spu_mfc_dsisr_set(spu, 0ul);
 	spu_int_stat_clear(spu, 1, stat);
 	spin_unlock(&spu->register_lock);
+	pr_debug("%s: %lx %lx %lx %lx\n", __FUNCTION__, mask, stat,
+			dar, dsisr);
 
 	if (stat & 1) /* segment fault */
 		__spu_trap_data_seg(spu, dar);
@@ -484,14 +486,13 @@ int spu_irq_class_1_bottom(struct spu *spu)
 
 	ea = spu->dar;
 	dsisr = spu->dsisr;
-	if (dsisr & MFC_DSISR_PTE_NOT_FOUND) {
+	if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) {
 		access = (_PAGE_PRESENT | _PAGE_USER);
 		access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
 		if (hash_page(ea, access, 0x300) != 0)
 			error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
 	}
-	if ((error & CLASS1_ENABLE_STORAGE_FAULT_INTR) ||
-	    (dsisr & MFC_DSISR_ACCESS_DENIED)) {
+	if (error & CLASS1_ENABLE_STORAGE_FAULT_INTR) {
 		if ((ret = spu_handle_mm_fault(spu)) != 0)
 			error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
 		else
@@ -568,6 +569,11 @@ static int __init spu_map_device(struct spu *spu, struct device_node *spe)
 	if (!spu->local_store)
 		goto out;
 
+	prop = get_property(spe, "problem", NULL);
+	if (!prop)
+		goto out_unmap;
+	spu->problem_phys = *(unsigned long *)prop;
+
 	spu->problem= map_spe_prop(spe, "problem");
 	if (!spu->problem)
 		goto out_unmap;
@@ -632,6 +638,7 @@ static int __init create_spu(struct device_node *spe)
 	spu->ibox_callback = NULL;
 	spu->wbox_callback = NULL;
 	spu->stop_callback = NULL;
+	spu->mfc_callback = NULL;
 
 	mutex_lock(&spu_mutex);
 	spu->number = number++;
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
new file mode 100644
index 00000000000..3a4245c926a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -0,0 +1,345 @@
+/*
+ * System call callback functions for SPUs
+ */
+
+#define DEBUG
+
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+
+#include <asm/spu.h>
+#include <asm/syscalls.h>
+#include <asm/unistd.h>
+
+/*
+ * This table defines the system calls that an SPU can call.
+ * It is currently a subset of the 64 bit powerpc system calls,
+ * with the exact semantics.
+ *
+ * The reasons for disabling some of the system calls are:
+ * 1. They interact with the way SPU syscalls are handled
+ *    and we can't let them execute ever:
+ *	restart_syscall, exit, for, execve, ptrace, ...
+ * 2. They are deprecated and replaced by other means:
+ *	uselib, pciconfig_*, sysfs, ...
+ * 3. They are somewhat interacting with the system in a way
+ *    we don't want an SPU to:
+ *	reboot, init_module, mount, kexec_load
+ * 4. They are optional and we can't rely on them being
+ *    linked into the kernel. Unfortunately, the cond_syscall
+ *    helper does not work here as it does not add the necessary
+ *    opd symbols:
+ *	mbind, mq_open, ipc, ...
+ */
+
+void *spu_syscall_table[] = {
+	[__NR_restart_syscall]		sys_ni_syscall, /* sys_restart_syscall */
+	[__NR_exit]			sys_ni_syscall, /* sys_exit */
+	[__NR_fork]			sys_ni_syscall, /* ppc_fork */
+	[__NR_read]			sys_read,
+	[__NR_write]			sys_write,
+	[__NR_open]			sys_open,
+	[__NR_close]			sys_close,
+	[__NR_waitpid]			sys_waitpid,
+	[__NR_creat]			sys_creat,
+	[__NR_link]			sys_link,
+	[__NR_unlink]			sys_unlink,
+	[__NR_execve]			sys_ni_syscall, /* sys_execve */
+	[__NR_chdir]			sys_chdir,
+	[__NR_time]			sys_time,
+	[__NR_mknod]			sys_mknod,
+	[__NR_chmod]			sys_chmod,
+	[__NR_lchown]			sys_lchown,
+	[__NR_break]			sys_ni_syscall,
+	[__NR_oldstat]			sys_ni_syscall,
+	[__NR_lseek]			sys_lseek,
+	[__NR_getpid]			sys_getpid,
+	[__NR_mount]			sys_ni_syscall, /* sys_mount */
+	[__NR_umount]			sys_ni_syscall,
+	[__NR_setuid]			sys_setuid,
+	[__NR_getuid]			sys_getuid,
+	[__NR_stime]			sys_stime,
+	[__NR_ptrace]			sys_ni_syscall, /* sys_ptrace */
+	[__NR_alarm]			sys_alarm,
+	[__NR_oldfstat]			sys_ni_syscall,
+	[__NR_pause]			sys_ni_syscall, /* sys_pause */
+	[__NR_utime]			sys_ni_syscall, /* sys_utime */
+	[__NR_stty]			sys_ni_syscall,
+	[__NR_gtty]			sys_ni_syscall,
+	[__NR_access]			sys_access,
+	[__NR_nice]			sys_nice,
+	[__NR_ftime]			sys_ni_syscall,
+	[__NR_sync]			sys_sync,
+	[__NR_kill]			sys_kill,
+	[__NR_rename]			sys_rename,
+	[__NR_mkdir]			sys_mkdir,
+	[__NR_rmdir]			sys_rmdir,
+	[__NR_dup]			sys_dup,
+	[__NR_pipe]			sys_pipe,
+	[__NR_times]			sys_times,
+	[__NR_prof]			sys_ni_syscall,
+	[__NR_brk]			sys_brk,
+	[__NR_setgid]			sys_setgid,
+	[__NR_getgid]			sys_getgid,
+	[__NR_signal]			sys_ni_syscall, /* sys_signal */
+	[__NR_geteuid]			sys_geteuid,
+	[__NR_getegid]			sys_getegid,
+	[__NR_acct]			sys_ni_syscall, /* sys_acct */
+	[__NR_umount2]			sys_ni_syscall, /* sys_umount */
+	[__NR_lock]			sys_ni_syscall,
+	[__NR_ioctl]			sys_ioctl,
+	[__NR_fcntl]			sys_fcntl,
+	[__NR_mpx]			sys_ni_syscall,
+	[__NR_setpgid]			sys_setpgid,
+	[__NR_ulimit]			sys_ni_syscall,
+	[__NR_oldolduname]		sys_ni_syscall,
+	[__NR_umask]			sys_umask,
+	[__NR_chroot]			sys_chroot,
+	[__NR_ustat]			sys_ni_syscall, /* sys_ustat */
+	[__NR_dup2]			sys_dup2,
+	[__NR_getppid]			sys_getppid,
+	[__NR_getpgrp]			sys_getpgrp,
+	[__NR_setsid]			sys_setsid,
+	[__NR_sigaction]		sys_ni_syscall,
+	[__NR_sgetmask]			sys_sgetmask,
+	[__NR_ssetmask]			sys_ssetmask,
+	[__NR_setreuid]			sys_setreuid,
+	[__NR_setregid]			sys_setregid,
+	[__NR_sigsuspend]		sys_ni_syscall,
+	[__NR_sigpending]		sys_ni_syscall,
+	[__NR_sethostname]		sys_sethostname,
+	[__NR_setrlimit]		sys_setrlimit,
+	[__NR_getrlimit]		sys_ni_syscall,
+	[__NR_getrusage]		sys_getrusage,
+	[__NR_gettimeofday]		sys_gettimeofday,
+	[__NR_settimeofday]		sys_settimeofday,
+	[__NR_getgroups]		sys_getgroups,
+	[__NR_setgroups]		sys_setgroups,
+	[__NR_select]			sys_ni_syscall,
+	[__NR_symlink]			sys_symlink,
+	[__NR_oldlstat]			sys_ni_syscall,
+	[__NR_readlink]			sys_readlink,
+	[__NR_uselib]			sys_ni_syscall, /* sys_uselib */
+	[__NR_swapon]			sys_ni_syscall, /* sys_swapon */
+	[__NR_reboot]			sys_ni_syscall, /* sys_reboot */
+	[__NR_readdir]			sys_ni_syscall,
+	[__NR_mmap]			sys_mmap,
+	[__NR_munmap]			sys_munmap,
+	[__NR_truncate]			sys_truncate,
+	[__NR_ftruncate]		sys_ftruncate,
+	[__NR_fchmod]			sys_fchmod,
+	[__NR_fchown]			sys_fchown,
+	[__NR_getpriority]		sys_getpriority,
+	[__NR_setpriority]		sys_setpriority,
+	[__NR_profil]			sys_ni_syscall,
+	[__NR_statfs]			sys_ni_syscall, /* sys_statfs */
+	[__NR_fstatfs]			sys_ni_syscall, /* sys_fstatfs */
+	[__NR_ioperm]			sys_ni_syscall,
+	[__NR_socketcall]		sys_socketcall,
+	[__NR_syslog]			sys_syslog,
+	[__NR_setitimer]		sys_setitimer,
+	[__NR_getitimer]		sys_getitimer,
+	[__NR_stat]			sys_newstat,
+	[__NR_lstat]			sys_newlstat,
+	[__NR_fstat]			sys_newfstat,
+	[__NR_olduname]			sys_ni_syscall,
+	[__NR_iopl]			sys_ni_syscall,
+	[__NR_vhangup]			sys_vhangup,
+	[__NR_idle]			sys_ni_syscall,
+	[__NR_vm86]			sys_ni_syscall,
+	[__NR_wait4]			sys_wait4,
+	[__NR_swapoff]			sys_ni_syscall, /* sys_swapoff */
+	[__NR_sysinfo]			sys_sysinfo,
+	[__NR_ipc]			sys_ni_syscall, /* sys_ipc */
+	[__NR_fsync]			sys_fsync,
+	[__NR_sigreturn]		sys_ni_syscall,
+	[__NR_clone]			sys_ni_syscall, /* ppc_clone */
+	[__NR_setdomainname]		sys_setdomainname,
+	[__NR_uname]			ppc_newuname,
+	[__NR_modify_ldt]		sys_ni_syscall,
+	[__NR_adjtimex]			sys_adjtimex,
+	[__NR_mprotect]			sys_mprotect,
+	[__NR_sigprocmask]		sys_ni_syscall,
+	[__NR_create_module]		sys_ni_syscall,
+	[__NR_init_module]		sys_ni_syscall, /* sys_init_module */
+	[__NR_delete_module]		sys_ni_syscall, /* sys_delete_module */
+	[__NR_get_kernel_syms]		sys_ni_syscall,
+	[__NR_quotactl]			sys_ni_syscall, /* sys_quotactl */
+	[__NR_getpgid]			sys_getpgid,
+	[__NR_fchdir]			sys_fchdir,
+	[__NR_bdflush]			sys_bdflush,
+	[__NR_sysfs]			sys_ni_syscall, /* sys_sysfs */
+	[__NR_personality]		ppc64_personality,
+	[__NR_afs_syscall]		sys_ni_syscall,
+	[__NR_setfsuid]			sys_setfsuid,
+	[__NR_setfsgid]			sys_setfsgid,
+	[__NR__llseek]			sys_llseek,
+	[__NR_getdents]			sys_getdents,
+	[__NR__newselect]		sys_select,
+	[__NR_flock]			sys_flock,
+	[__NR_msync]			sys_msync,
+	[__NR_readv]			sys_readv,
+	[__NR_writev]			sys_writev,
+	[__NR_getsid]			sys_getsid,
+	[__NR_fdatasync]		sys_fdatasync,
+	[__NR__sysctl]			sys_ni_syscall, /* sys_sysctl */
+	[__NR_mlock]			sys_mlock,
+	[__NR_munlock]			sys_munlock,
+	[__NR_mlockall]			sys_mlockall,
+	[__NR_munlockall]		sys_munlockall,
+	[__NR_sched_setparam]		sys_sched_setparam,
+	[__NR_sched_getparam]		sys_sched_getparam,
+	[__NR_sched_setscheduler]	sys_sched_setscheduler,
+	[__NR_sched_getscheduler]	sys_sched_getscheduler,
+	[__NR_sched_yield]		sys_sched_yield,
+	[__NR_sched_get_priority_max]	sys_sched_get_priority_max,
+	[__NR_sched_get_priority_min]	sys_sched_get_priority_min,
+	[__NR_sched_rr_get_interval]	sys_sched_rr_get_interval,
+	[__NR_nanosleep]		sys_nanosleep,
+	[__NR_mremap]			sys_mremap,
+	[__NR_setresuid]		sys_setresuid,
+	[__NR_getresuid]		sys_getresuid,
+	[__NR_query_module]		sys_ni_syscall,
+	[__NR_poll]			sys_poll,
+	[__NR_nfsservctl]		sys_ni_syscall, /* sys_nfsservctl */
+	[__NR_setresgid]		sys_setresgid,
+	[__NR_getresgid]		sys_getresgid,
+	[__NR_prctl]			sys_prctl,
+	[__NR_rt_sigreturn]		sys_ni_syscall, /* ppc64_rt_sigreturn */
+	[__NR_rt_sigaction]		sys_ni_syscall, /* sys_rt_sigaction */
+	[__NR_rt_sigprocmask]		sys_ni_syscall, /* sys_rt_sigprocmask */
+	[__NR_rt_sigpending]		sys_ni_syscall, /* sys_rt_sigpending */
+	[__NR_rt_sigtimedwait]		sys_ni_syscall, /* sys_rt_sigtimedwait */
+	[__NR_rt_sigqueueinfo]		sys_ni_syscall, /* sys_rt_sigqueueinfo */
+	[__NR_rt_sigsuspend]		sys_ni_syscall, /* sys_rt_sigsuspend */
+	[__NR_pread64]			sys_pread64,
+	[__NR_pwrite64]			sys_pwrite64,
+	[__NR_chown]			sys_chown,
+	[__NR_getcwd]			sys_getcwd,
+	[__NR_capget]			sys_capget,
+	[__NR_capset]			sys_capset,
+	[__NR_sigaltstack]		sys_ni_syscall, /* sys_sigaltstack */
+	[__NR_sendfile]			sys_sendfile64,
+	[__NR_getpmsg]			sys_ni_syscall,
+	[__NR_putpmsg]			sys_ni_syscall,
+	[__NR_vfork]			sys_ni_syscall, /* ppc_vfork */
+	[__NR_ugetrlimit]		sys_getrlimit,
+	[__NR_readahead]		sys_readahead,
+	[192]				sys_ni_syscall,
+	[193]				sys_ni_syscall,
+	[194]				sys_ni_syscall,
+	[195]				sys_ni_syscall,
+	[196]				sys_ni_syscall,
+	[197]				sys_ni_syscall,
+	[__NR_pciconfig_read]		sys_ni_syscall, /* sys_pciconfig_read */
+	[__NR_pciconfig_write]		sys_ni_syscall, /* sys_pciconfig_write */
+	[__NR_pciconfig_iobase]		sys_ni_syscall, /* sys_pciconfig_iobase */
+	[__NR_multiplexer]		sys_ni_syscall,
+	[__NR_getdents64]		sys_getdents64,
+	[__NR_pivot_root]		sys_pivot_root,
+	[204]				sys_ni_syscall,
+	[__NR_madvise]			sys_madvise,
+	[__NR_mincore]			sys_mincore,
+	[__NR_gettid]			sys_gettid,
+	[__NR_tkill]			sys_tkill,
+	[__NR_setxattr]			sys_setxattr,
+	[__NR_lsetxattr]		sys_lsetxattr,
+	[__NR_fsetxattr]		sys_fsetxattr,
+	[__NR_getxattr]			sys_getxattr,
+	[__NR_lgetxattr]		sys_lgetxattr,
+	[__NR_fgetxattr]		sys_fgetxattr,
+	[__NR_listxattr]		sys_listxattr,
+	[__NR_llistxattr]		sys_llistxattr,
+	[__NR_flistxattr]		sys_flistxattr,
+	[__NR_removexattr]		sys_removexattr,
+	[__NR_lremovexattr]		sys_lremovexattr,
+	[__NR_fremovexattr]		sys_fremovexattr,
+	[__NR_futex]			sys_futex,
+	[__NR_sched_setaffinity]	sys_sched_setaffinity,
+	[__NR_sched_getaffinity]	sys_sched_getaffinity,
+	[__NR_tuxcall]			sys_ni_syscall,
+	[226]				sys_ni_syscall,
+	[__NR_io_setup]			sys_io_setup,
+	[__NR_io_destroy]		sys_io_destroy,
+	[__NR_io_getevents]		sys_io_getevents,
+	[__NR_io_submit]		sys_io_submit,
+	[__NR_io_cancel]		sys_io_cancel,
+	[__NR_set_tid_address]		sys_ni_syscall, /* sys_set_tid_address */
+	[__NR_fadvise64]		sys_fadvise64,
+	[__NR_exit_group]		sys_ni_syscall, /* sys_exit_group */
+	[__NR_lookup_dcookie]		sys_ni_syscall, /* sys_lookup_dcookie */
+	[__NR_epoll_create]		sys_epoll_create,
+	[__NR_epoll_ctl]		sys_epoll_ctl,
+	[__NR_epoll_wait]		sys_epoll_wait,
+	[__NR_remap_file_pages]		sys_remap_file_pages,
+	[__NR_timer_create]		sys_timer_create,
+	[__NR_timer_settime]		sys_timer_settime,
+	[__NR_timer_gettime]		sys_timer_gettime,
+	[__NR_timer_getoverrun]		sys_timer_getoverrun,
+	[__NR_timer_delete]		sys_timer_delete,
+	[__NR_clock_settime]		sys_clock_settime,
+	[__NR_clock_gettime]		sys_clock_gettime,
+	[__NR_clock_getres]		sys_clock_getres,
+	[__NR_clock_nanosleep]		sys_clock_nanosleep,
+	[__NR_swapcontext]		sys_ni_syscall, /* ppc64_swapcontext */
+	[__NR_tgkill]			sys_tgkill,
+	[__NR_utimes]			sys_utimes,
+	[__NR_statfs64]			sys_statfs64,
+	[__NR_fstatfs64]		sys_fstatfs64,
+	[254]				sys_ni_syscall,
+	[__NR_rtas]			ppc_rtas,
+	[256]				sys_ni_syscall,
+	[257]				sys_ni_syscall,
+	[258]				sys_ni_syscall,
+	[__NR_mbind]			sys_ni_syscall, /* sys_mbind */
+	[__NR_get_mempolicy]		sys_ni_syscall, /* sys_get_mempolicy */
+	[__NR_set_mempolicy]		sys_ni_syscall, /* sys_set_mempolicy */
+	[__NR_mq_open]			sys_ni_syscall, /* sys_mq_open */
+	[__NR_mq_unlink]		sys_ni_syscall, /* sys_mq_unlink */
+	[__NR_mq_timedsend]		sys_ni_syscall, /* sys_mq_timedsend */
+	[__NR_mq_timedreceive]		sys_ni_syscall, /* sys_mq_timedreceive */
+	[__NR_mq_notify]		sys_ni_syscall, /* sys_mq_notify */
+	[__NR_mq_getsetattr]		sys_ni_syscall, /* sys_mq_getsetattr */
+	[__NR_kexec_load]		sys_ni_syscall, /* sys_kexec_load */
+	[__NR_add_key]			sys_ni_syscall, /* sys_add_key */
+	[__NR_request_key]		sys_ni_syscall, /* sys_request_key */
+	[__NR_keyctl]			sys_ni_syscall, /* sys_keyctl */
+	[__NR_waitid]			sys_ni_syscall, /* sys_waitid */
+	[__NR_ioprio_set]		sys_ni_syscall, /* sys_ioprio_set */
+	[__NR_ioprio_get]		sys_ni_syscall, /* sys_ioprio_get */
+	[__NR_inotify_init]		sys_ni_syscall, /* sys_inotify_init */
+	[__NR_inotify_add_watch]	sys_ni_syscall, /* sys_inotify_add_watch */
+	[__NR_inotify_rm_watch]		sys_ni_syscall, /* sys_inotify_rm_watch */
+	[__NR_spu_run]			sys_ni_syscall, /* sys_spu_run */
+	[__NR_spu_create]		sys_ni_syscall, /* sys_spu_create */
+	[__NR_pselect6]			sys_ni_syscall, /* sys_pselect */
+	[__NR_ppoll]			sys_ni_syscall, /* sys_ppoll */
+	[__NR_unshare]			sys_unshare,
+};
+
+long spu_sys_callback(struct spu_syscall_block *s)
+{
+	long (*syscall)(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6);
+
+	BUILD_BUG_ON(ARRAY_SIZE(spu_syscall_table) != __NR_syscalls);
+
+	syscall = spu_syscall_table[s->nr_ret];
+
+	if (s->nr_ret >= __NR_syscalls) {
+		pr_debug("%s: invalid syscall #%ld", __FUNCTION__, s->nr_ret);
+		return -ENOSYS;
+	}
+
+#ifdef DEBUG
+	print_symbol(KERN_DEBUG "SPU-syscall %s:", (unsigned long)syscall);
+	printk("syscall%ld(%lx, %lx, %lx, %lx, %lx, %lx)\n",
+			s->nr_ret,
+			s->parm[0], s->parm[1], s->parm[2],
+			s->parm[3], s->parm[4], s->parm[5]);
+#endif
+
+	return syscall(s->parm[0], s->parm[1], s->parm[2],
+		       s->parm[3], s->parm[4], s->parm[5]);
+}
+EXPORT_SYMBOL_GPL(spu_sys_callback);
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
index a5c489a53c6..f1d35ddc9df 100644
--- a/arch/powerpc/platforms/cell/spufs/backing_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -285,6 +285,49 @@ static void spu_backing_runcntl_stop(struct spu_context *ctx)
 	spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP);
 }
 
+static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask,
+					u32 mode)
+{
+	struct spu_problem_collapsed *prob = &ctx->csa.prob;
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	ret = -EAGAIN;
+	if (prob->dma_querytype_RW)
+		goto out;
+	ret = 0;
+	/* FIXME: what are the side-effects of this? */
+	prob->dma_querymask_RW = mask;
+	prob->dma_querytype_RW = mode;
+out:
+	spin_unlock(&ctx->csa.register_lock);
+
+	return ret;
+}
+
+static u32 spu_backing_read_mfc_tagstatus(struct spu_context * ctx)
+{
+	return ctx->csa.prob.dma_tagstatus_R;
+}
+
+static u32 spu_backing_get_mfc_free_elements(struct spu_context *ctx)
+{
+	return ctx->csa.prob.dma_qstatus_R;
+}
+
+static int spu_backing_send_mfc_command(struct spu_context *ctx,
+					struct mfc_dma_command *cmd)
+{
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	ret = -EAGAIN;
+	/* FIXME: set up priv2->puq */
+	spin_unlock(&ctx->csa.register_lock);
+
+	return ret;
+}
+
 struct spu_context_ops spu_backing_ops = {
 	.mbox_read = spu_backing_mbox_read,
 	.mbox_stat_read = spu_backing_mbox_stat_read,
@@ -305,4 +348,8 @@ struct spu_context_ops spu_backing_ops = {
 	.get_ls = spu_backing_get_ls,
 	.runcntl_write = spu_backing_runcntl_write,
 	.runcntl_stop = spu_backing_runcntl_stop,
+	.set_mfc_query = spu_backing_set_mfc_query,
+	.read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
+	.get_mfc_free_elements = spu_backing_get_mfc_free_elements,
+	.send_mfc_command = spu_backing_send_mfc_command,
 };
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index 336f238102f..8bb33abfad1 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -27,7 +27,7 @@
 #include <asm/spu_csa.h>
 #include "spufs.h"
 
-struct spu_context *alloc_spu_context(struct address_space *local_store)
+struct spu_context *alloc_spu_context(void)
 {
 	struct spu_context *ctx;
 	ctx = kmalloc(sizeof *ctx, GFP_KERNEL);
@@ -47,10 +47,17 @@ struct spu_context *alloc_spu_context(struct address_space *local_store)
 	init_waitqueue_head(&ctx->ibox_wq);
 	init_waitqueue_head(&ctx->wbox_wq);
 	init_waitqueue_head(&ctx->stop_wq);
+	init_waitqueue_head(&ctx->mfc_wq);
 	ctx->ibox_fasync = NULL;
 	ctx->wbox_fasync = NULL;
+	ctx->mfc_fasync = NULL;
+	ctx->mfc = NULL;
+	ctx->tagwait = 0;
 	ctx->state = SPU_STATE_SAVED;
-	ctx->local_store = local_store;
+	ctx->local_store = NULL;
+	ctx->cntl = NULL;
+	ctx->signal1 = NULL;
+	ctx->signal2 = NULL;
 	ctx->spu = NULL;
 	ctx->ops = &spu_backing_ops;
 	ctx->owner = get_task_mm(current);
@@ -68,8 +75,6 @@ void destroy_spu_context(struct kref *kref)
 	ctx = container_of(kref, struct spu_context, kref);
 	down_write(&ctx->state_sema);
 	spu_deactivate(ctx);
-	ctx->ibox_fasync = NULL;
-	ctx->wbox_fasync = NULL;
 	up_write(&ctx->state_sema);
 	spu_fini_csa(&ctx->csa);
 	kfree(ctx);
@@ -109,7 +114,16 @@ void spu_release(struct spu_context *ctx)
 
 void spu_unmap_mappings(struct spu_context *ctx)
 {
-	unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
+	if (ctx->local_store)
+		unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
+	if (ctx->mfc)
+		unmap_mapping_range(ctx->mfc, 0, 0x4000, 1);
+	if (ctx->cntl)
+		unmap_mapping_range(ctx->cntl, 0, 0x4000, 1);
+	if (ctx->signal1)
+		unmap_mapping_range(ctx->signal1, 0, 0x4000, 1);
+	if (ctx->signal2)
+		unmap_mapping_range(ctx->signal2, 0, 0x4000, 1);
 }
 
 int spu_acquire_runnable(struct spu_context *ctx)
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index dfa649c9b95..366185e9266 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -20,6 +20,8 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#undef DEBUG
+
 #include <linux/fs.h>
 #include <linux/ioctl.h>
 #include <linux/module.h>
@@ -39,8 +41,10 @@ static int
 spufs_mem_open(struct inode *inode, struct file *file)
 {
 	struct spufs_inode_info *i = SPUFS_I(inode);
-	file->private_data = i->i_ctx;
-	file->f_mapping = i->i_ctx->local_store;
+	struct spu_context *ctx = i->i_ctx;
+	file->private_data = ctx;
+	file->f_mapping = inode->i_mapping;
+	ctx->local_store = inode->i_mapping;
 	return 0;
 }
 
@@ -84,7 +88,7 @@ spufs_mem_write(struct file *file, const char __user *buffer,
 	return ret;
 }
 
-#ifdef CONFIG_SPARSEMEM
+#ifdef CONFIG_SPUFS_MMAP
 static struct page *
 spufs_mem_mmap_nopage(struct vm_area_struct *vma,
 		      unsigned long address, int *type)
@@ -136,11 +140,113 @@ static struct file_operations spufs_mem_fops = {
 	.read    = spufs_mem_read,
 	.write   = spufs_mem_write,
 	.llseek  = generic_file_llseek,
-#ifdef CONFIG_SPARSEMEM
+#ifdef CONFIG_SPUFS_MMAP
 	.mmap    = spufs_mem_mmap,
 #endif
 };
 
+#ifdef CONFIG_SPUFS_MMAP
+static struct page *spufs_ps_nopage(struct vm_area_struct *vma,
+				    unsigned long address,
+				    int *type, unsigned long ps_offs)
+{
+	struct page *page = NOPAGE_SIGBUS;
+	int fault_type = VM_FAULT_SIGBUS;
+	struct spu_context *ctx = vma->vm_file->private_data;
+	unsigned long offset = address - vma->vm_start;
+	unsigned long area;
+	int ret;
+
+	offset += vma->vm_pgoff << PAGE_SHIFT;
+	if (offset >= 0x4000)
+		goto out;
+
+	ret = spu_acquire_runnable(ctx);
+	if (ret)
+		goto out;
+
+	area = ctx->spu->problem_phys + ps_offs;
+	page = pfn_to_page((area + offset) >> PAGE_SHIFT);
+	fault_type = VM_FAULT_MINOR;
+	page_cache_get(page);
+
+	spu_release(ctx);
+
+      out:
+	if (type)
+		*type = fault_type;
+
+	return page;
+}
+
+static struct page *spufs_cntl_mmap_nopage(struct vm_area_struct *vma,
+					   unsigned long address, int *type)
+{
+	return spufs_ps_nopage(vma, address, type, 0x4000);
+}
+
+static struct vm_operations_struct spufs_cntl_mmap_vmops = {
+	.nopage = spufs_cntl_mmap_nopage,
+};
+
+/*
+ * mmap support for problem state control area [0x4000 - 0x4fff].
+ * Mapping this area requires that the application have CAP_SYS_RAWIO,
+ * as these registers require special care when read/writing.
+ */
+static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+				     | _PAGE_NO_CACHE);
+
+	vma->vm_ops = &spufs_cntl_mmap_vmops;
+	return 0;
+}
+#endif
+
+static int spufs_cntl_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	file->private_data = ctx;
+	file->f_mapping = inode->i_mapping;
+	ctx->cntl = inode->i_mapping;
+	return 0;
+}
+
+static ssize_t
+spufs_cntl_read(struct file *file, char __user *buffer,
+		size_t size, loff_t *pos)
+{
+	/* FIXME: read from spu status */
+	return -EINVAL;
+}
+
+static ssize_t
+spufs_cntl_write(struct file *file, const char __user *buffer,
+		 size_t size, loff_t *pos)
+{
+	/* FIXME: write to runctl bit */
+	return -EINVAL;
+}
+
+static struct file_operations spufs_cntl_fops = {
+	.open = spufs_cntl_open,
+	.read = spufs_cntl_read,
+	.write = spufs_cntl_write,
+#ifdef CONFIG_SPUFS_MMAP
+	.mmap = spufs_cntl_mmap,
+#endif
+};
+
 static int
 spufs_regs_open(struct inode *inode, struct file *file)
 {
@@ -501,6 +607,16 @@ static struct file_operations spufs_wbox_stat_fops = {
 	.read	= spufs_wbox_stat_read,
 };
 
+static int spufs_signal1_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+	file->private_data = ctx;
+	file->f_mapping = inode->i_mapping;
+	ctx->signal1 = inode->i_mapping;
+	return nonseekable_open(inode, file);
+}
+
 static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
 			size_t len, loff_t *pos)
 {
@@ -541,12 +657,50 @@ static ssize_t spufs_signal1_write(struct file *file, const char __user *buf,
 	return 4;
 }
 
+#ifdef CONFIG_SPUFS_MMAP
+static struct page *spufs_signal1_mmap_nopage(struct vm_area_struct *vma,
+					      unsigned long address, int *type)
+{
+	return spufs_ps_nopage(vma, address, type, 0x14000);
+}
+
+static struct vm_operations_struct spufs_signal1_mmap_vmops = {
+	.nopage = spufs_signal1_mmap_nopage,
+};
+
+static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+				     | _PAGE_NO_CACHE);
+
+	vma->vm_ops = &spufs_signal1_mmap_vmops;
+	return 0;
+}
+#endif
+
 static struct file_operations spufs_signal1_fops = {
-	.open = spufs_pipe_open,
+	.open = spufs_signal1_open,
 	.read = spufs_signal1_read,
 	.write = spufs_signal1_write,
+#ifdef CONFIG_SPUFS_MMAP
+	.mmap = spufs_signal1_mmap,
+#endif
 };
 
+static int spufs_signal2_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+	file->private_data = ctx;
+	file->f_mapping = inode->i_mapping;
+	ctx->signal2 = inode->i_mapping;
+	return nonseekable_open(inode, file);
+}
+
 static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
 			size_t len, loff_t *pos)
 {
@@ -589,10 +743,39 @@ static ssize_t spufs_signal2_write(struct file *file, const char __user *buf,
 	return 4;
 }
 
+#ifdef CONFIG_SPUFS_MMAP
+static struct page *spufs_signal2_mmap_nopage(struct vm_area_struct *vma,
+					      unsigned long address, int *type)
+{
+	return spufs_ps_nopage(vma, address, type, 0x1c000);
+}
+
+static struct vm_operations_struct spufs_signal2_mmap_vmops = {
+	.nopage = spufs_signal2_mmap_nopage,
+};
+
+static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	/* FIXME: */
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+				     | _PAGE_NO_CACHE);
+
+	vma->vm_ops = &spufs_signal2_mmap_vmops;
+	return 0;
+}
+#endif
+
 static struct file_operations spufs_signal2_fops = {
-	.open = spufs_pipe_open,
+	.open = spufs_signal2_open,
 	.read = spufs_signal2_read,
 	.write = spufs_signal2_write,
+#ifdef CONFIG_SPUFS_MMAP
+	.mmap = spufs_signal2_mmap,
+#endif
 };
 
 static void spufs_signal1_type_set(void *data, u64 val)
@@ -641,6 +824,332 @@ static u64 spufs_signal2_type_get(void *data)
 DEFINE_SIMPLE_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get,
 					spufs_signal2_type_set, "%llu");
 
+#ifdef CONFIG_SPUFS_MMAP
+static struct page *spufs_mfc_mmap_nopage(struct vm_area_struct *vma,
+					   unsigned long address, int *type)
+{
+	return spufs_ps_nopage(vma, address, type, 0x3000);
+}
+
+static struct vm_operations_struct spufs_mfc_mmap_vmops = {
+	.nopage = spufs_mfc_mmap_nopage,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ * Mapping this area requires that the application have CAP_SYS_RAWIO,
+ * as these registers require special care when read/writing.
+ */
+static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+				     | _PAGE_NO_CACHE);
+
+	vma->vm_ops = &spufs_mfc_mmap_vmops;
+	return 0;
+}
+#endif
+
+static int spufs_mfc_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	/* we don't want to deal with DMA into other processes */
+	if (ctx->owner != current->mm)
+		return -EINVAL;
+
+	if (atomic_read(&inode->i_count) != 1)
+		return -EBUSY;
+
+	file->private_data = ctx;
+	return nonseekable_open(inode, file);
+}
+
+/* interrupt-level mfc callback function. */
+void spufs_mfc_callback(struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	wake_up_all(&ctx->mfc_wq);
+
+	pr_debug("%s %s\n", __FUNCTION__, spu->name);
+	if (ctx->mfc_fasync) {
+		u32 free_elements, tagstatus;
+		unsigned int mask;
+
+		/* no need for spu_acquire in interrupt context */
+		free_elements = ctx->ops->get_mfc_free_elements(ctx);
+		tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
+
+		mask = 0;
+		if (free_elements & 0xffff)
+			mask |= POLLOUT;
+		if (tagstatus & ctx->tagwait)
+			mask |= POLLIN;
+
+		kill_fasync(&ctx->mfc_fasync, SIGIO, mask);
+	}
+}
+
+static int spufs_read_mfc_tagstatus(struct spu_context *ctx, u32 *status)
+{
+	/* See if there is one tag group is complete */
+	/* FIXME we need locking around tagwait */
+	*status = ctx->ops->read_mfc_tagstatus(ctx) & ctx->tagwait;
+	ctx->tagwait &= ~*status;
+	if (*status)
+		return 1;
+
+	/* enable interrupt waiting for any tag group,
+	   may silently fail if interrupts are already enabled */
+	ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+	return 0;
+}
+
+static ssize_t spufs_mfc_read(struct file *file, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret = -EINVAL;
+	u32 status;
+
+	if (size != 4)
+		goto out;
+
+	spu_acquire(ctx);
+	if (file->f_flags & O_NONBLOCK) {
+		status = ctx->ops->read_mfc_tagstatus(ctx);
+		if (!(status & ctx->tagwait))
+			ret = -EAGAIN;
+		else
+			ctx->tagwait &= ~status;
+	} else {
+		ret = spufs_wait(ctx->mfc_wq,
+			   spufs_read_mfc_tagstatus(ctx, &status));
+	}
+	spu_release(ctx);
+
+	if (ret)
+		goto out;
+
+	ret = 4;
+	if (copy_to_user(buffer, &status, 4))
+		ret = -EFAULT;
+
+out:
+	return ret;
+}
+
+static int spufs_check_valid_dma(struct mfc_dma_command *cmd)
+{
+	pr_debug("queueing DMA %x %lx %x %x %x\n", cmd->lsa,
+		 cmd->ea, cmd->size, cmd->tag, cmd->cmd);
+
+	switch (cmd->cmd) {
+	case MFC_PUT_CMD:
+	case MFC_PUTF_CMD:
+	case MFC_PUTB_CMD:
+	case MFC_GET_CMD:
+	case MFC_GETF_CMD:
+	case MFC_GETB_CMD:
+		break;
+	default:
+		pr_debug("invalid DMA opcode %x\n", cmd->cmd);
+		return -EIO;
+	}
+
+	if ((cmd->lsa & 0xf) != (cmd->ea &0xf)) {
+		pr_debug("invalid DMA alignment, ea %lx lsa %x\n",
+				cmd->ea, cmd->lsa);
+		return -EIO;
+	}
+
+	switch (cmd->size & 0xf) {
+	case 1:
+		break;
+	case 2:
+		if (cmd->lsa & 1)
+			goto error;
+		break;
+	case 4:
+		if (cmd->lsa & 3)
+			goto error;
+		break;
+	case 8:
+		if (cmd->lsa & 7)
+			goto error;
+		break;
+	case 0:
+		if (cmd->lsa & 15)
+			goto error;
+		break;
+	error:
+	default:
+		pr_debug("invalid DMA alignment %x for size %x\n",
+			cmd->lsa & 0xf, cmd->size);
+		return -EIO;
+	}
+
+	if (cmd->size > 16 * 1024) {
+		pr_debug("invalid DMA size %x\n", cmd->size);
+		return -EIO;
+	}
+
+	if (cmd->tag & 0xfff0) {
+		/* we reserve the higher tag numbers for kernel use */
+		pr_debug("invalid DMA tag\n");
+		return -EIO;
+	}
+
+	if (cmd->class) {
+		/* not supported in this version */
+		pr_debug("invalid DMA class\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int spu_send_mfc_command(struct spu_context *ctx,
+				struct mfc_dma_command cmd,
+				int *error)
+{
+	*error = ctx->ops->send_mfc_command(ctx, &cmd);
+	if (*error == -EAGAIN) {
+		/* wait for any tag group to complete
+		   so we have space for the new command */
+		ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+		/* try again, because the queue might be
+		   empty again */
+		*error = ctx->ops->send_mfc_command(ctx, &cmd);
+		if (*error == -EAGAIN)
+			return 0;
+	}
+	return 1;
+}
+
+static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct mfc_dma_command cmd;
+	int ret = -EINVAL;
+
+	if (size != sizeof cmd)
+		goto out;
+
+	ret = -EFAULT;
+	if (copy_from_user(&cmd, buffer, sizeof cmd))
+		goto out;
+
+	ret = spufs_check_valid_dma(&cmd);
+	if (ret)
+		goto out;
+
+	spu_acquire_runnable(ctx);
+	if (file->f_flags & O_NONBLOCK) {
+		ret = ctx->ops->send_mfc_command(ctx, &cmd);
+	} else {
+		int status;
+		ret = spufs_wait(ctx->mfc_wq,
+				 spu_send_mfc_command(ctx, cmd, &status));
+		if (status)
+			ret = status;
+	}
+	spu_release(ctx);
+
+	if (ret)
+		goto out;
+
+	ctx->tagwait |= 1 << cmd.tag;
+
+out:
+	return ret;
+}
+
+static unsigned int spufs_mfc_poll(struct file *file,poll_table *wait)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 free_elements, tagstatus;
+	unsigned int mask;
+
+	spu_acquire(ctx);
+	ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2);
+	free_elements = ctx->ops->get_mfc_free_elements(ctx);
+	tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
+	spu_release(ctx);
+
+	poll_wait(file, &ctx->mfc_wq, wait);
+
+	mask = 0;
+	if (free_elements & 0xffff)
+		mask |= POLLOUT | POLLWRNORM;
+	if (tagstatus & ctx->tagwait)
+		mask |= POLLIN | POLLRDNORM;
+
+	pr_debug("%s: free %d tagstatus %d tagwait %d\n", __FUNCTION__,
+		free_elements, tagstatus, ctx->tagwait);
+
+	return mask;
+}
+
+static int spufs_mfc_flush(struct file *file)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	spu_acquire(ctx);
+#if 0
+/* this currently hangs */
+	ret = spufs_wait(ctx->mfc_wq,
+			 ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2));
+	if (ret)
+		goto out;
+	ret = spufs_wait(ctx->mfc_wq,
+			 ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait);
+out:
+#else
+	ret = 0;
+#endif
+	spu_release(ctx);
+
+	return ret;
+}
+
+static int spufs_mfc_fsync(struct file *file, struct dentry *dentry,
+			   int datasync)
+{
+	return spufs_mfc_flush(file);
+}
+
+static int spufs_mfc_fasync(int fd, struct file *file, int on)
+{
+	struct spu_context *ctx = file->private_data;
+
+	return fasync_helper(fd, file, on, &ctx->mfc_fasync);
+}
+
+static struct file_operations spufs_mfc_fops = {
+	.open	 = spufs_mfc_open,
+	.read	 = spufs_mfc_read,
+	.write	 = spufs_mfc_write,
+	.poll	 = spufs_mfc_poll,
+	.flush	 = spufs_mfc_flush,
+	.fsync	 = spufs_mfc_fsync,
+	.fasync	 = spufs_mfc_fasync,
+#ifdef CONFIG_SPUFS_MMAP
+	.mmap	 = spufs_mfc_mmap,
+#endif
+};
+
 static void spufs_npc_set(void *data, u64 val)
 {
 	struct spu_context *ctx = data;
@@ -783,6 +1292,8 @@ struct tree_descr spufs_dir_contents[] = {
 	{ "signal2", &spufs_signal2_fops, 0666, },
 	{ "signal1_type", &spufs_signal1_type, 0666, },
 	{ "signal2_type", &spufs_signal2_type, 0666, },
+	{ "mfc", &spufs_mfc_fops, 0666, },
+	{ "cntl", &spufs_cntl_fops,  0666, },
 	{ "npc", &spufs_npc_ops, 0666, },
 	{ "fpcr", &spufs_fpcr_fops, 0666, },
 	{ "decr", &spufs_decr_ops, 0666, },
diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c
index 5445719bff7..a13a8b5a014 100644
--- a/arch/powerpc/platforms/cell/spufs/hw_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -232,6 +232,59 @@ static void spu_hw_runcntl_stop(struct spu_context *ctx)
 	spin_unlock_irq(&ctx->spu->register_lock);
 }
 
+static int spu_hw_set_mfc_query(struct spu_context * ctx, u32 mask, u32 mode)
+{
+	struct spu_problem *prob = ctx->spu->problem;
+	int ret;
+
+	spin_lock_irq(&ctx->spu->register_lock);
+	ret = -EAGAIN;
+	if (in_be32(&prob->dma_querytype_RW))
+		goto out;
+	ret = 0;
+	out_be32(&prob->dma_querymask_RW, mask);
+	out_be32(&prob->dma_querytype_RW, mode);
+out:
+	spin_unlock_irq(&ctx->spu->register_lock);
+	return ret;
+}
+
+static u32 spu_hw_read_mfc_tagstatus(struct spu_context * ctx)
+{
+	return in_be32(&ctx->spu->problem->dma_tagstatus_R);
+}
+
+static u32 spu_hw_get_mfc_free_elements(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->dma_qstatus_R);
+}
+
+static int spu_hw_send_mfc_command(struct spu_context *ctx,
+					struct mfc_dma_command *cmd)
+{
+	u32 status;
+	struct spu_problem *prob = ctx->spu->problem;
+
+	spin_lock_irq(&ctx->spu->register_lock);
+	out_be32(&prob->mfc_lsa_W, cmd->lsa);
+	out_be64(&prob->mfc_ea_W, cmd->ea);
+	out_be32(&prob->mfc_union_W.by32.mfc_size_tag32,
+				cmd->size << 16 | cmd->tag);
+	out_be32(&prob->mfc_union_W.by32.mfc_class_cmd32,
+				cmd->class << 16 | cmd->cmd);
+	status = in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+	spin_unlock_irq(&ctx->spu->register_lock);
+
+	switch (status & 0xffff) {
+	case 0:
+		return 0;
+	case 2:
+		return -EAGAIN;
+	default:
+		return -EINVAL;
+	}
+}
+
 struct spu_context_ops spu_hw_ops = {
 	.mbox_read = spu_hw_mbox_read,
 	.mbox_stat_read = spu_hw_mbox_stat_read,
@@ -252,4 +305,8 @@ struct spu_context_ops spu_hw_ops = {
 	.get_ls = spu_hw_get_ls,
 	.runcntl_write = spu_hw_runcntl_write,
 	.runcntl_stop = spu_hw_runcntl_stop,
+	.set_mfc_query = spu_hw_set_mfc_query,
+	.read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
+	.get_mfc_free_elements = spu_hw_get_mfc_free_elements,
+	.send_mfc_command = spu_hw_send_mfc_command,
 };
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 5be40aa483f..d9554199afa 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -241,7 +241,7 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		inode->i_gid = dir->i_gid;
 		inode->i_mode &= S_ISGID;
 	}
-	ctx = alloc_spu_context(inode->i_mapping);
+	ctx = alloc_spu_context();
 	SPUFS_I(inode)->i_ctx = ctx;
 	if (!ctx)
 		goto out_iput;
@@ -442,7 +442,7 @@ static struct file_system_type spufs_type = {
 	.kill_sb = kill_litter_super,
 };
 
-static int spufs_init(void)
+static int __init spufs_init(void)
 {
 	int ret;
 	ret = -ENOMEM;
@@ -472,7 +472,7 @@ out:
 }
 module_init(spufs_init);
 
-static void spufs_exit(void)
+static void __exit spufs_exit(void)
 {
 	spu_sched_exit();
 	unregister_spu_syscalls(&spufs_calls);
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 18ea8866c61..c04e078c0fe 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -76,6 +76,90 @@ static inline int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc,
 	return 0;
 }
 
+/*
+ * SPU syscall restarting is tricky because we violate the basic
+ * assumption that the signal handler is running on the interrupted
+ * thread. Here instead, the handler runs on PowerPC user space code,
+ * while the syscall was called from the SPU.
+ * This means we can only do a very rough approximation of POSIX
+ * signal semantics.
+ */
+int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret,
+			  unsigned int *npc)
+{
+	int ret;
+
+	switch (*spu_ret) {
+	case -ERESTARTSYS:
+	case -ERESTARTNOINTR:
+		/*
+		 * Enter the regular syscall restarting for
+		 * sys_spu_run, then restart the SPU syscall
+		 * callback.
+		 */
+		*npc -= 8;
+		ret = -ERESTARTSYS;
+		break;
+	case -ERESTARTNOHAND:
+	case -ERESTART_RESTARTBLOCK:
+		/*
+		 * Restart block is too hard for now, just return -EINTR
+		 * to the SPU.
+		 * ERESTARTNOHAND comes from sys_pause, we also return
+		 * -EINTR from there.
+		 * Assume that we need to be restarted ourselves though.
+		 */
+		*spu_ret = -EINTR;
+		ret = -ERESTARTSYS;
+		break;
+	default:
+		printk(KERN_WARNING "%s: unexpected return code %ld\n",
+			__FUNCTION__, *spu_ret);
+		ret = 0;
+	}
+	return ret;
+}
+
+int spu_process_callback(struct spu_context *ctx)
+{
+	struct spu_syscall_block s;
+	u32 ls_pointer, npc;
+	char *ls;
+	long spu_ret;
+	int ret;
+
+	/* get syscall block from local store */
+	npc = ctx->ops->npc_read(ctx);
+	ls = ctx->ops->get_ls(ctx);
+	ls_pointer = *(u32*)(ls + npc);
+	if (ls_pointer > (LS_SIZE - sizeof(s)))
+		return -EFAULT;
+	memcpy(&s, ls + ls_pointer, sizeof (s));
+
+	/* do actual syscall without pinning the spu */
+	ret = 0;
+	spu_ret = -ENOSYS;
+	npc += 4;
+
+	if (s.nr_ret < __NR_syscalls) {
+		spu_release(ctx);
+		/* do actual system call from here */
+		spu_ret = spu_sys_callback(&s);
+		if (spu_ret <= -ERESTARTSYS) {
+			ret = spu_handle_restartsys(ctx, &spu_ret, &npc);
+		}
+		spu_acquire(ctx);
+		if (ret == -ERESTARTSYS)
+			return ret;
+	}
+
+	/* write result, jump over indirect pointer */
+	memcpy(ls + ls_pointer, &spu_ret, sizeof (spu_ret));
+	ctx->ops->npc_write(ctx, npc);
+	ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+	return ret;
+}
+
 static inline int spu_process_events(struct spu_context *ctx)
 {
 	struct spu *spu = ctx->spu;
@@ -107,6 +191,13 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
 		ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, status));
 		if (unlikely(ret))
 			break;
+		if ((*status & SPU_STATUS_STOPPED_BY_STOP) &&
+		    (*status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
+			ret = spu_process_callback(ctx);
+			if (ret)
+				break;
+			*status &= ~SPU_STATUS_STOPPED_BY_STOP;
+		}
 		if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) {
 			ret = spu_reacquire_runnable(ctx, npc, status);
 			if (ret)
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 963182fbd1a..bf652cd7700 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -180,6 +180,7 @@ static inline void bind_context(struct spu *spu, struct spu_context *ctx)
 	spu->ibox_callback = spufs_ibox_callback;
 	spu->wbox_callback = spufs_wbox_callback;
 	spu->stop_callback = spufs_stop_callback;
+	spu->mfc_callback = spufs_mfc_callback;
 	mb();
 	spu_unmap_mappings(ctx);
 	spu_restore(&ctx->csa, spu);
@@ -197,6 +198,7 @@ static inline void unbind_context(struct spu *spu, struct spu_context *ctx)
 	spu->ibox_callback = NULL;
 	spu->wbox_callback = NULL;
 	spu->stop_callback = NULL;
+	spu->mfc_callback = NULL;
 	spu->mm = NULL;
 	spu->pid = 0;
 	spu->prio = MAX_PRIO;
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index db2601f0abd..4485738e210 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -43,7 +43,11 @@ struct spu_context {
 	struct spu *spu;		  /* pointer to a physical SPU */
 	struct spu_state csa;		  /* SPU context save area. */
 	spinlock_t mmio_lock;		  /* protects mmio access */
-	struct address_space *local_store;/* local store backing store */
+	struct address_space *local_store; /* local store mapping.  */
+	struct address_space *mfc;	   /* 'mfc' area mappings. */
+	struct address_space *cntl; 	   /* 'control' area mappings. */
+	struct address_space *signal1; 	   /* 'signal1' area mappings. */
+	struct address_space *signal2; 	   /* 'signal2' area mappings. */
 
 	enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state;
 	struct rw_semaphore state_sema;
@@ -55,13 +59,27 @@ struct spu_context {
 	wait_queue_head_t ibox_wq;
 	wait_queue_head_t wbox_wq;
 	wait_queue_head_t stop_wq;
+	wait_queue_head_t mfc_wq;
 	struct fasync_struct *ibox_fasync;
 	struct fasync_struct *wbox_fasync;
+	struct fasync_struct *mfc_fasync;
+	u32 tagwait;
 	struct spu_context_ops *ops;
 	struct work_struct reap_work;
 	u64 flags;
 };
 
+struct mfc_dma_command {
+	int32_t pad;	/* reserved */
+	uint32_t lsa;	/* local storage address */
+	uint64_t ea;	/* effective address */
+	uint16_t size;	/* transfer size */
+	uint16_t tag;	/* command tag */
+	uint16_t class;	/* class ID */
+	uint16_t cmd;	/* command opcode */
+};
+
+
 /* SPU context query/set operations. */
 struct spu_context_ops {
 	int (*mbox_read) (struct spu_context * ctx, u32 * data);
@@ -84,6 +102,11 @@ struct spu_context_ops {
 	char*(*get_ls) (struct spu_context * ctx);
 	void (*runcntl_write) (struct spu_context * ctx, u32 data);
 	void (*runcntl_stop) (struct spu_context * ctx);
+	int (*set_mfc_query)(struct spu_context * ctx, u32 mask, u32 mode);
+	u32 (*read_mfc_tagstatus)(struct spu_context * ctx);
+	u32 (*get_mfc_free_elements)(struct spu_context *ctx);
+	int (*send_mfc_command)(struct spu_context *ctx,
+					struct mfc_dma_command *cmd);
 };
 
 extern struct spu_context_ops spu_hw_ops;
@@ -106,7 +129,7 @@ long spufs_create_thread(struct nameidata *nd,
 extern struct file_operations spufs_context_fops;
 
 /* context management */
-struct spu_context * alloc_spu_context(struct address_space *local_store);
+struct spu_context * alloc_spu_context(void);
 void destroy_spu_context(struct kref *kref);
 struct spu_context * get_spu_context(struct spu_context *ctx);
 int put_spu_context(struct spu_context *ctx);
@@ -159,5 +182,6 @@ size_t spu_ibox_read(struct spu_context *ctx, u32 *data);
 void spufs_ibox_callback(struct spu *spu);
 void spufs_wbox_callback(struct spu *spu);
 void spufs_stop_callback(struct spu *spu);
+void spufs_mfc_callback(struct spu *spu);
 
 #endif
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
index 212db28531f..97898d5d34e 100644
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -2145,7 +2145,8 @@ static void init_priv1(struct spu_state *csa)
 	csa->priv1.int_mask_class1_RW = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
 	    CLASS1_ENABLE_STORAGE_FAULT_INTR;
 	csa->priv1.int_mask_class2_RW = CLASS2_ENABLE_SPU_STOP_INTR |
-	    CLASS2_ENABLE_SPU_HALT_INTR;
+	    CLASS2_ENABLE_SPU_HALT_INTR |
+	    CLASS2_ENABLE_SPU_DMA_TAG_GROUP_COMPLETE_INTR;
 }
 
 static void init_priv2(struct spu_state *csa)
diff --git a/arch/powerpc/platforms/chrp/chrp.h b/arch/powerpc/platforms/chrp/chrp.h
index 814f54742e0..63f0aee4c15 100644
--- a/arch/powerpc/platforms/chrp/chrp.h
+++ b/arch/powerpc/platforms/chrp/chrp.h
@@ -8,4 +8,4 @@ extern int chrp_set_rtc_time(struct rtc_time *);
 extern long chrp_time_init(void);
 
 extern void chrp_find_bridges(void);
-extern void chrp_event_scan(void);
+extern void chrp_event_scan(unsigned long);
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index 8bf4307e323..23a20171870 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -35,6 +35,7 @@
 #include <linux/root_dev.h>
 #include <linux/initrd.h>
 #include <linux/module.h>
+#include <linux/timer.h>
 
 #include <asm/io.h>
 #include <asm/pgtable.h>
@@ -61,6 +62,10 @@ EXPORT_SYMBOL(_chrp_type);
 
 struct mpic *chrp_mpic;
 
+/* Used for doing CHRP event-scans */
+DEFINE_PER_CPU(struct timer_list, heartbeat_timer);
+unsigned long event_scan_interval;
+
 /*
  * XXX this should be in xmon.h, but putting it there means xmon.h
  * has to include <linux/interrupt.h> (to get irqreturn_t), which
@@ -229,8 +234,6 @@ void __init chrp_setup_arch(void)
 {
 	struct device_node *root = find_path_device ("/");
 	char *machine = NULL;
-	struct device_node *device;
-	unsigned int *p = NULL;
 
 	/* init to some ~sane value until calibrate_delay() runs */
 	loops_per_jiffy = 50000000/HZ;
@@ -287,23 +290,12 @@ void __init chrp_setup_arch(void)
 	 */
 	sio_init();
 
-	/* Get the event scan rate for the rtas so we know how
-	 * often it expects a heartbeat. -- Cort
-	 */
-	device = find_devices("rtas");
-	if (device)
-		p = (unsigned int *) get_property
-			(device, "rtas-event-scan-rate", NULL);
-	if (p && *p) {
-		ppc_md.heartbeat = chrp_event_scan;
-		ppc_md.heartbeat_reset = HZ / (*p * 30) - 1;
-		ppc_md.heartbeat_count = 1;
-		printk("RTAS Event Scan Rate: %u (%lu jiffies)\n",
-		       *p, ppc_md.heartbeat_reset);
-	}
-
 	pci_create_OF_bus_map();
 
+#ifdef CONFIG_SMP
+	smp_ops = &chrp_smp_ops;
+#endif /* CONFIG_SMP */
+
 	/*
 	 * Print the banner, then scroll down so boot progress
 	 * can be printed.  -- Cort
@@ -312,7 +304,7 @@ void __init chrp_setup_arch(void)
 }
 
 void
-chrp_event_scan(void)
+chrp_event_scan(unsigned long unused)
 {
 	unsigned char log[1024];
 	int ret = 0;
@@ -320,7 +312,8 @@ chrp_event_scan(void)
 	/* XXX: we should loop until the hardware says no more error logs -- Cort */
 	rtas_call(rtas_token("event-scan"), 4, 1, &ret, 0xffffffff, 0,
 		  __pa(log), 1024);
-	ppc_md.heartbeat_count = ppc_md.heartbeat_reset;
+	mod_timer(&__get_cpu_var(heartbeat_timer),
+		  jiffies + event_scan_interval);
 }
 
 /*
@@ -465,6 +458,9 @@ void __init chrp_init_IRQ(void)
 void __init
 chrp_init2(void)
 {
+	struct device_node *device;
+	unsigned int *p = NULL;
+
 #ifdef CONFIG_NVRAM
 	chrp_nvram_init();
 #endif
@@ -476,12 +472,53 @@ chrp_init2(void)
 	request_region(0x80,0x10,"dma page reg");
 	request_region(0xc0,0x20,"dma2");
 
+	/* Get the event scan rate for the rtas so we know how
+	 * often it expects a heartbeat. -- Cort
+	 */
+	device = find_devices("rtas");
+	if (device)
+		p = (unsigned int *) get_property
+			(device, "rtas-event-scan-rate", NULL);
+	if (p && *p) {
+		/*
+		 * Arrange to call chrp_event_scan at least *p times
+		 * per minute.  We use 59 rather than 60 here so that
+		 * the rate will be slightly higher than the minimum.
+		 * This all assumes we don't do hotplug CPU on any
+		 * machine that needs the event scans done.
+		 */
+		unsigned long interval, offset;
+		int cpu, ncpus;
+		struct timer_list *timer;
+
+		interval = HZ * 59 / *p;
+		offset = HZ;
+		ncpus = num_online_cpus();
+		event_scan_interval = ncpus * interval;
+		for (cpu = 0; cpu < ncpus; ++cpu) {
+			timer = &per_cpu(heartbeat_timer, cpu);
+			setup_timer(timer, chrp_event_scan, 0);
+			timer->expires = jiffies + offset;
+			add_timer_on(timer, cpu);
+			offset += interval;
+		}
+		printk("RTAS Event Scan Rate: %u (%lu jiffies)\n",
+		       *p, interval);
+	}
+
 	if (ppc_md.progress)
 		ppc_md.progress("  Have fun!    ", 0x7777);
 }
 
-void __init chrp_init(void)
+static int __init chrp_probe(void)
 {
+ 	char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
+ 					  "device_type", NULL);
+ 	if (dtype == NULL)
+ 		return 0;
+ 	if (strcmp(dtype, "chrp"))
+		return 0;
+
 	ISA_DMA_THRESHOLD = ~0L;
 	DMA_MODE_READ = 0x44;
 	DMA_MODE_WRITE = 0x48;
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index fa4550611c1..6ce8a404ba6 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -675,18 +675,20 @@ static void iseries_dedicated_idle(void)
 void __init iSeries_init_IRQ(void) { }
 #endif
 
-static int __init iseries_probe(int platform)
+static int __init iseries_probe(void)
 {
-	if (PLATFORM_ISERIES_LPAR != platform)
+	unsigned long root = of_get_flat_dt_root();
+	if (!of_flat_dt_is_compatible(root, "IBM,iSeries"))
 		return 0;
 
-	ppc64_firmware_features |= FW_FEATURE_ISERIES;
-	ppc64_firmware_features |= FW_FEATURE_LPAR;
+	powerpc_firmware_features |= FW_FEATURE_ISERIES;
+	powerpc_firmware_features |= FW_FEATURE_LPAR;
 
 	return 1;
 }
 
-struct machdep_calls __initdata iseries_md = {
+define_machine(iseries) {
+	.name		= "iSeries",
 	.setup_arch	= iSeries_setup_arch,
 	.show_cpuinfo	= iSeries_show_cpuinfo,
 	.init_IRQ	= iSeries_init_IRQ,
@@ -930,7 +932,6 @@ void build_flat_dt(struct iseries_flat_dt *dt, unsigned long phys_mem_size)
 
 	/* /chosen */
 	dt_start_node(dt, "chosen");
-	dt_prop_u32(dt, "linux,platform", PLATFORM_ISERIES_LPAR);
 	dt_prop_str(dt, "bootargs", cmd_line);
 	if (cmd_mem_limit)
 		dt_prop_u64(dt, "linux,memory-limit", cmd_mem_limit);
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index ec5c1e10c40..24c0aef4ea3 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -259,9 +259,10 @@ static void __init maple_progress(char *s, unsigned short hex)
 /*
  * Called very early, MMU is off, device-tree isn't unflattened
  */
-static int __init maple_probe(int platform)
+static int __init maple_probe(void)
 {
-	if (platform != PLATFORM_MAPLE)
+	unsigned long root = of_get_flat_dt_root();
+	if (!of_flat_dt_is_compatible(root, "Momentum,Maple"))
 		return 0;
 	/*
 	 * On U3, the DART (iommu) must be allocated now since it
@@ -274,7 +275,8 @@ static int __init maple_probe(int platform)
 	return 1;
 }
 
-struct machdep_calls __initdata maple_md = {
+define_machine(maple_md) {
+	.name			= "Maple",
 	.probe			= maple_probe,
 	.setup_arch		= maple_setup_arch,
 	.init_early		= maple_init_early,
@@ -290,7 +292,7 @@ struct machdep_calls __initdata maple_md = {
        	.get_rtc_time		= maple_get_rtc_time,
       	.calibrate_decr		= generic_calibrate_decr,
 	.progress		= maple_progress,
-	.idle_loop		= native_idle,
+	.power_save		= power4_idle,
 #ifdef CONFIG_KEXEC
 	.machine_kexec		= default_machine_kexec,
 	.machine_kexec_prepare	= default_machine_kexec_prepare,
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index fa8b4d7b5de..eacbfd9beab 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -161,9 +161,7 @@ static void __init bootx_dt_add_prop(char *name, void *data, int size,
 static void __init bootx_add_chosen_props(unsigned long base,
 					  unsigned long *mem_end)
 {
-	u32 val = _MACH_Pmac;
-
-	bootx_dt_add_prop("linux,platform", &val, 4, mem_end);
+	u32 val;
 
 	if (bootx_info->kernelParamsOffset) {
 		char *args = (char *)((unsigned long)bootx_info) +
@@ -493,7 +491,7 @@ void __init bootx_init(unsigned long r3, unsigned long r4)
 		    && (strcmp(model, "iMac,1") == 0
 			|| strcmp(model, "PowerMac1,1") == 0)) {
 			bootx_printf("iMac,1 detected, shutting down USB \n");
-			out_le32((unsigned *)0x80880008, 1);	/* XXX */
+			out_le32((unsigned __iomem *)0x80880008, 1);	/* XXX */
 		}
 	}
 
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index e49eddd5042..a5063cd675c 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -2951,7 +2951,7 @@ static void *pmac_early_vresume_data;
 
 void pmac_set_early_video_resume(void (*proc)(void *data), void *data)
 {
-	if (_machine != _MACH_Pmac)
+	if (!machine_is(powermac))
 		return;
 	preempt_disable();
 	pmac_early_vresume_proc = proc;
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 87eb6bb7f0e..e14f9ac55cf 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -1457,6 +1457,9 @@ int __init pmac_i2c_init(void)
 		return 0;
 	i2c_inited = 1;
 
+	if (!machine_is(powermac))
+		return 0;
+
 	/* Probe keywest-i2c busses */
 	kw_i2c_probe();
 
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index 5fd28995c74..262f967b880 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -74,7 +74,7 @@ struct core99_header {
  * Read and write the non-volatile RAM on PowerMacs and CHRP machines.
  */
 static int nvram_naddrs;
-static volatile unsigned char *nvram_data;
+static volatile unsigned char __iomem *nvram_data;
 static int is_core_99;
 static int core99_bank = 0;
 static int nvram_partitions[3];
@@ -148,7 +148,7 @@ static ssize_t core99_nvram_size(void)
 }
 
 #ifdef CONFIG_PPC32
-static volatile unsigned char *nvram_addr;
+static volatile unsigned char __iomem *nvram_addr;
 static int nvram_mult;
 
 static unsigned char direct_nvram_read_byte(int addr)
@@ -285,7 +285,7 @@ static int sm_erase_bank(int bank)
 	int stat, i;
 	unsigned long timeout;
 
-	u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE;
+	u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
 
        	DBG("nvram: Sharp/Micron Erasing bank %d...\n", bank);
 
@@ -317,7 +317,7 @@ static int sm_write_bank(int bank, u8* datas)
 	int i, stat = 0;
 	unsigned long timeout;
 
-	u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE;
+	u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
 
        	DBG("nvram: Sharp/Micron Writing bank %d...\n", bank);
 
@@ -352,7 +352,7 @@ static int amd_erase_bank(int bank)
 	int i, stat = 0;
 	unsigned long timeout;
 
-	u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE;
+	u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
 
        	DBG("nvram: AMD Erasing bank %d...\n", bank);
 
@@ -399,7 +399,7 @@ static int amd_write_bank(int bank, u8* datas)
 	int i, stat = 0;
 	unsigned long timeout;
 
-	u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE;
+	u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
 
        	DBG("nvram: AMD Writing bank %d...\n", bank);
 
@@ -597,7 +597,7 @@ int __init pmac_nvram_init(void)
 	}
 
 #ifdef CONFIG_PPC32
-	if (_machine == _MACH_chrp && nvram_naddrs == 1) {
+	if (machine_is(chrp) && nvram_naddrs == 1) {
 		nvram_data = ioremap(r1.start, s1);
 		nvram_mult = 1;
 		ppc_md.nvram_read_val	= direct_nvram_read_byte;
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index de3f30e6b33..f5d8d15d74f 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -1201,7 +1201,7 @@ void __init pmac_pcibios_after_init(void)
 #ifdef CONFIG_PPC32
 void pmac_pci_fixup_cardbus(struct pci_dev* dev)
 {
-	if (_machine != _MACH_Pmac)
+	if (!machine_is(powermac))
 		return;
 	/*
 	 * Fix the interrupt routing on the various cardbus bridges
@@ -1244,8 +1244,9 @@ void pmac_pci_fixup_pciata(struct pci_dev* dev)
         * On PowerMacs, we try to switch any PCI ATA controller to
 	* fully native mode
         */
-	if (_machine != _MACH_Pmac)
+	if (!machine_is(powermac))
 		return;
+
 	/* Some controllers don't have the class IDE */
 	if (dev->vendor == PCI_VENDOR_ID_PROMISE)
 		switch(dev->device) {
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
index 9b7150f1041..a3bd3e728fa 100644
--- a/arch/powerpc/platforms/powermac/pfunc_base.c
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -336,6 +336,8 @@ int __init pmac_pfunc_base_install(void)
 		return 0;
 	pfbase_inited = 1;
 
+	if (!machine_is(powermac))
+		return 0;
 
 	DBG("Installing base platform functions...\n");
 
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 385aab90c4d..4d15e396655 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -350,6 +350,13 @@ static void __init pmac_setup_arch(void)
 		smp_ops = &psurge_smp_ops;
 #endif
 #endif /* CONFIG_SMP */
+
+#ifdef CONFIG_ADB
+	if (strstr(cmd_line, "adb_sync")) {
+		extern int __adb_probe_sync;
+		__adb_probe_sync = 1;
+	}
+#endif /* CONFIG_ADB */
 }
 
 char *bootpath;
@@ -576,30 +583,6 @@ pmac_halt(void)
 	pmac_power_off();
 }
 
-#ifdef CONFIG_PPC32
-void __init pmac_init(void)
-{
-	/* isa_io_base gets set in pmac_pci_init */
-	isa_mem_base = PMAC_ISA_MEM_BASE;
-	pci_dram_offset = PMAC_PCI_DRAM_OFFSET;
-	ISA_DMA_THRESHOLD = ~0L;
-	DMA_MODE_READ = 1;
-	DMA_MODE_WRITE = 2;
-
-	ppc_md = pmac_md;
-
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-#ifdef CONFIG_BLK_DEV_IDE_PMAC
-        ppc_ide_md.ide_init_hwif	= pmac_ide_init_hwif_ports;
-        ppc_ide_md.default_io_base	= pmac_ide_get_base;
-#endif /* CONFIG_BLK_DEV_IDE_PMAC */
-#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */
-
-	if (ppc_md.progress) ppc_md.progress("pmac_init(): exit", 0);
-
-}
-#endif
-
 /* 
  * Early initialization.
  */
@@ -646,6 +629,12 @@ static int __init pmac_declare_of_platform_devices(void)
 {
 	struct device_node *np;
 
+	if (machine_is(chrp))
+		return -1;
+
+	if (!machine_is(powermac))
+		return 0;
+
 	np = of_find_node_by_name(NULL, "valkyrie");
 	if (np)
 		of_platform_device_create(np, "valkyrie", NULL);
@@ -666,12 +655,15 @@ device_initcall(pmac_declare_of_platform_devices);
 /*
  * Called very early, MMU is off, device-tree isn't unflattened
  */
-static int __init pmac_probe(int platform)
+static int __init pmac_probe(void)
 {
-#ifdef CONFIG_PPC64
-	if (platform != PLATFORM_POWERMAC)
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "Power Macintosh") &&
+	    !of_flat_dt_is_compatible(root, "MacRISC"))
 		return 0;
 
+#ifdef CONFIG_PPC64
 	/*
 	 * On U3, the DART (iommu) must be allocated now since it
 	 * has an impact on htab_initialize (due to the large page it
@@ -681,6 +673,23 @@ static int __init pmac_probe(int platform)
 	alloc_dart_table();
 #endif
 
+#ifdef CONFIG_PPC32
+	/* isa_io_base gets set in pmac_pci_init */
+	isa_mem_base = PMAC_ISA_MEM_BASE;
+	pci_dram_offset = PMAC_PCI_DRAM_OFFSET;
+	ISA_DMA_THRESHOLD = ~0L;
+	DMA_MODE_READ = 1;
+	DMA_MODE_WRITE = 2;
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+#ifdef CONFIG_BLK_DEV_IDE_PMAC
+        ppc_ide_md.ide_init_hwif	= pmac_ide_init_hwif_ports;
+        ppc_ide_md.default_io_base	= pmac_ide_get_base;
+#endif /* CONFIG_BLK_DEV_IDE_PMAC */
+#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */
+
+#endif /* CONFIG_PPC32 */
+
 #ifdef CONFIG_PMAC_SMU
 	/*
 	 * SMU based G5s need some memory below 2Gb, at least the current
@@ -709,10 +718,8 @@ static int pmac_pci_probe_mode(struct pci_bus *bus)
 }
 #endif
 
-struct machdep_calls __initdata pmac_md = {
-#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC64)
-	.cpu_die		= generic_mach_cpu_die,
-#endif
+define_machine(powermac) {
+	.name			= "PowerMac",
 	.probe			= pmac_probe,
 	.setup_arch		= pmac_setup_arch,
 	.init_early		= pmac_init_early,
@@ -733,7 +740,7 @@ struct machdep_calls __initdata pmac_md = {
 	.progress		= udbg_progress,
 #ifdef CONFIG_PPC64
 	.pci_probe_mode		= pmac_pci_probe_mode,
-	.idle_loop		= native_idle,
+	.power_save		= power4_idle,
 	.enable_pmcs		= power4_enable_pmcs,
 #ifdef CONFIG_KEXEC
 	.machine_kexec		= default_machine_kexec,
@@ -746,4 +753,7 @@ struct machdep_calls __initdata pmac_md = {
 	.pcibios_after_init	= pmac_pcibios_after_init,
 	.phys_mem_access_prot	= pci_phys_mem_access_prot,
 #endif
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC64)
+	.cpu_die		= generic_mach_cpu_die,
+#endif
 };
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 5d9afa1fa02..890758aa966 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -336,10 +336,10 @@ static struct pmu_sleep_notifier time_sleep_notifier = {
  */
 void __init pmac_calibrate_decr(void)
 {
-#ifdef CONFIG_PM
+#if defined(CONFIG_PM) && defined(CONFIG_ADB_PMU)
 	/* XXX why here? */
 	pmu_register_sleep_notifier(&time_sleep_notifier);
-#endif /* CONFIG_PM */
+#endif
 
 	generic_calibrate_decr();
 
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
index c4352a8db64..b4fa9f03b46 100644
--- a/arch/powerpc/platforms/powermac/udbg_scc.c
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -116,7 +116,7 @@ void udbg_scc_init(int force_scc)
 	/* Setup for 57600 8N1 */
 	if (ch == ch_a)
 		addr += 0x20;
-	sccc = (volatile u8 * __iomem) ioremap(addr & PAGE_MASK, PAGE_SIZE) ;
+	sccc = ioremap(addr & PAGE_MASK, PAGE_SIZE) ;
 	sccc += addr & ~PAGE_MASK;
 	sccd = sccc + 0x10;
 
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 2ab9dcdfb41..9b2b1cb117b 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -1018,7 +1018,7 @@ static int __init eeh_init_proc(void)
 {
 	struct proc_dir_entry *e;
 
-	if (platform_is_pseries()) {
+	if (machine_is(pseries)) {
 		e = create_proc_entry("ppc64/eeh", 0, NULL);
 		if (e)
 			e->proc_fops = &proc_eeh_operations;
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index b811d5ff92f..cc2495a0cdd 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -257,6 +257,7 @@ void handle_eeh_events (struct eeh_event *event)
 	struct pci_bus *frozen_bus;
 	int rc = 0;
 	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
+	const char *pci_str, *drv_str;
 
 	frozen_dn = find_device_pe(event->dn);
 	frozen_bus = pcibios_find_pci_bus(frozen_dn);
@@ -291,6 +292,13 @@ void handle_eeh_events (struct eeh_event *event)
 
 	frozen_pdn = PCI_DN(frozen_dn);
 	frozen_pdn->eeh_freeze_count++;
+
+	pci_str = pci_name (frozen_pdn->pcidev);
+	drv_str = pcid_name (frozen_pdn->pcidev);
+	if (!pci_str) {
+		pci_str = pci_name (event->dev);
+		drv_str = pcid_name (event->dev);
+	}
 	
 	if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
 		goto hard_fail;
@@ -306,9 +314,7 @@ void handle_eeh_events (struct eeh_event *event)
 	eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */);
 	printk(KERN_WARNING
 	   "EEH: This PCI device has failed %d times since last reboot: %s - %s\n",
-		frozen_pdn->eeh_freeze_count,
-		pci_name (frozen_pdn->pcidev), 
-		pcid_name(frozen_pdn->pcidev));
+		frozen_pdn->eeh_freeze_count, drv_str, pci_str);
 
 	/* Walk the various device drivers attached to this slot through
 	 * a reset sequence, giving each an opportunity to do what it needs
@@ -360,9 +366,7 @@ hard_fail:
 	   "EEH: PCI device %s - %s has failed %d times \n"
 	   "and has been permanently disabled.  Please try reseating\n"
 	   "this device or replacing it.\n",
-		pci_name (frozen_pdn->pcidev), 
-		pcid_name(frozen_pdn->pcidev), 
-		frozen_pdn->eeh_freeze_count);
+		drv_str, pci_str, frozen_pdn->eeh_freeze_count);
 
 	eeh_slot_error_detail(frozen_pdn, 2 /* Permanent Error */);
 
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 989f4bc136c..c01d8f0cbe6 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -91,7 +91,7 @@ void __init fw_feature_init(void)
 				continue;
 
 			/* we have a match */
-			ppc64_firmware_features |=
+			powerpc_firmware_features |=
 				firmware_features_table[i].val;
 			break;
 		}
diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
index 138e128a388..ba6befd9663 100644
--- a/arch/powerpc/platforms/pseries/hvconsole.c
+++ b/arch/powerpc/platforms/pseries/hvconsole.c
@@ -62,6 +62,11 @@ int hvc_put_chars(uint32_t vtermno, const char *buf, int count)
 	unsigned long *lbuf = (unsigned long *) buf;
 	long ret;
 
+
+	/* hcall will ret H_PARAMETER if 'count' exceeds firmware max.*/
+	if (count > MAX_VIO_PUT_CHARS)
+		count = MAX_VIO_PUT_CHARS;
+
 	ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count, lbuf[0],
 				 lbuf[1]);
 	if (ret == H_Success)
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 946ad59e335..e97e67f5e07 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -120,7 +120,7 @@ static void fixup_winbond_82c105(struct pci_dev* dev)
 	int i;
 	unsigned int reg;
 
-	if (!platform_is_pseries())
+	if (!machine_is(pseries))
 		return;
 
 	printk("Using INTC for W82c105 IDE controller.\n");
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 44abdeb9ca0..6bfacc21708 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
+#include <asm/firmware.h>
 
 static struct pci_bus *
 find_bus_among_children(struct pci_bus *bus,
@@ -152,20 +153,24 @@ pcibios_pci_config_bridge(struct pci_dev *dev)
 void
 pcibios_add_pci_devices(struct pci_bus * bus)
 {
-	int slotno, num;
+	int slotno, num, mode;
 	struct pci_dev *dev;
 	struct device_node *dn = pci_bus_to_OF_node(bus);
 
 	eeh_add_device_tree_early(dn);
 
-	if (_machine == PLATFORM_PSERIES_LPAR) {
+	mode = PCI_PROBE_NORMAL;
+	if (ppc_md.pci_probe_mode)
+		mode = ppc_md.pci_probe_mode(bus);
+
+	if (mode == PCI_PROBE_DEVTREE) {
 		/* use ofdt-based probe */
 		of_scan_bus(dn, bus);
 		if (!list_empty(&bus->devices)) {
 			pcibios_fixup_new_pci_devices(bus, 0);
 			pci_bus_add_devices(bus);
 		}
-	} else {
+	} else if (mode == PCI_PROBE_NORMAL) {
 		/* use legacy probe */
 		slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
 		num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index b046bcf7443..9639c66b453 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -132,7 +132,7 @@ static int __init init_ras_IRQ(void)
 		of_node_put(np);
 	}
 
-	return 1;
+	return 0;
 }
 __initcall(init_ras_IRQ);
 
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 5ad90676567..1773103354b 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -17,8 +17,9 @@
 #include <linux/proc_fs.h>
 
 #include <asm/prom.h>
-#include <asm/pSeries_reconfig.h>
+#include <asm/machdep.h>
 #include <asm/uaccess.h>
+#include <asm/pSeries_reconfig.h>
 
 
 
@@ -508,7 +509,7 @@ static int proc_ppc64_create_ofdt(void)
 {
 	struct proc_dir_entry *ent;
 
-	if (!platform_is_pseries())
+	if (!machine_is(pseries))
 		return 0;
 
 	ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL);
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c
index a6f628d4c9d..fcc4d561a23 100644
--- a/arch/powerpc/platforms/pseries/rtasd.c
+++ b/arch/powerpc/platforms/pseries/rtasd.c
@@ -27,6 +27,7 @@
 #include <asm/prom.h>
 #include <asm/nvram.h>
 #include <asm/atomic.h>
+#include <asm/machdep.h>
 
 #if 0
 #define DEBUG(A...)	printk(KERN_ERR A)
@@ -481,7 +482,7 @@ static int __init rtas_init(void)
 {
 	struct proc_dir_entry *entry;
 
-	if (!platform_is_pseries())
+	if (!machine_is(pseries))
 		return 0;
 
 	/* No RTAS */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 44d5c7fdcd9..b2fbf8ba8fb 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -81,8 +81,8 @@ extern void find_udbg_vterm(void);
 
 int fwnmi_active;  /* TRUE if an FWNMI handler is present */
 
-static void pseries_shared_idle(void);
-static void pseries_dedicated_idle(void);
+static void pseries_shared_idle_sleep(void);
+static void pseries_dedicated_idle_sleep(void);
 
 struct mpic *pSeries_mpic;
 
@@ -236,14 +236,13 @@ static void __init pSeries_setup_arch(void)
 		vpa_init(boot_cpuid);
 		if (get_lppaca()->shared_proc) {
 			printk(KERN_INFO "Using shared processor idle loop\n");
-			ppc_md.idle_loop = pseries_shared_idle;
+			ppc_md.power_save = pseries_shared_idle_sleep;
 		} else {
 			printk(KERN_INFO "Using dedicated idle loop\n");
-			ppc_md.idle_loop = pseries_dedicated_idle;
+			ppc_md.power_save = pseries_dedicated_idle_sleep;
 		}
 	} else {
 		printk(KERN_INFO "Using default idle loop\n");
-		ppc_md.idle_loop = default_idle;
 	}
 
 	if (firmware_has_feature(FW_FEATURE_LPAR))
@@ -373,156 +372,123 @@ static int pSeries_check_legacy_ioport(unsigned int baseport)
 /*
  * Called very early, MMU is off, device-tree isn't unflattened
  */
-extern struct machdep_calls pSeries_md;
 
-static int __init pSeries_probe(int platform)
+static int __init pSeries_probe_hypertas(unsigned long node,
+					 const char *uname, int depth,
+					 void *data)
 {
-	if (platform != PLATFORM_PSERIES &&
-	    platform != PLATFORM_PSERIES_LPAR)
-		return 0;
-
-	/* if we have some ppc_md fixups for LPAR to do, do
-	 * it here ...
-	 */
+	if (depth != 1 ||
+	    (strcmp(uname, "rtas") != 0 && strcmp(uname, "rtas@0") != 0))
+ 		return 0;
 
-	if (platform == PLATFORM_PSERIES_LPAR)
-		ppc64_firmware_features |= FW_FEATURE_LPAR;
+	if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL) != NULL)
+ 		powerpc_firmware_features |= FW_FEATURE_LPAR;
 
-	return 1;
+ 	return 1;
 }
 
-DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
-
-static inline void dedicated_idle_sleep(unsigned int cpu)
+static int __init pSeries_probe(void)
 {
-	struct lppaca *plppaca = &lppaca[cpu ^ 1];
+ 	char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
+ 					  "device_type", NULL);
+ 	if (dtype == NULL)
+ 		return 0;
+ 	if (strcmp(dtype, "chrp"))
+		return 0;
 
-	/* Only sleep if the other thread is not idle */
-	if (!(plppaca->idle)) {
-		local_irq_disable();
+	DBG("pSeries detected, looking for LPAR capability...\n");
 
-		/*
-		 * We are about to sleep the thread and so wont be polling any
-		 * more.
-		 */
-		clear_thread_flag(TIF_POLLING_NRFLAG);
-		smp_mb__after_clear_bit();
-
-		/*
-		 * SMT dynamic mode. Cede will result in this thread going
-		 * dormant, if the partner thread is still doing work.  Thread
-		 * wakes up if partner goes idle, an interrupt is presented, or
-		 * a prod occurs.  Returning from the cede enables external
-		 * interrupts.
-		 */
-		if (!need_resched())
-			cede_processor();
-		else
-			local_irq_enable();
-		set_thread_flag(TIF_POLLING_NRFLAG);
-	} else {
-		/*
-		 * Give the HV an opportunity at the processor, since we are
-		 * not doing any work.
-		 */
-		poll_pending();
-	}
+	/* Now try to figure out if we are running on LPAR */
+	of_scan_flat_dt(pSeries_probe_hypertas, NULL);
+
+	DBG("Machine is%s LPAR !\n",
+	    (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not");
+
+	return 1;
 }
 
-static void pseries_dedicated_idle(void)
+
+DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
+
+static void pseries_dedicated_idle_sleep(void)
 { 
 	unsigned int cpu = smp_processor_id();
 	unsigned long start_snooze;
 	unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
-	set_thread_flag(TIF_POLLING_NRFLAG);
-
-	while (1) {
-		/*
-		 * Indicate to the HV that we are idle. Now would be
-		 * a good time to find other work to dispatch.
-		 */
-		get_lppaca()->idle = 1;
-
-		if (!need_resched()) {
-			start_snooze = get_tb() +
-				*smt_snooze_delay * tb_ticks_per_usec;
-
-			while (!need_resched() && !cpu_is_offline(cpu)) {
-				ppc64_runlatch_off();
-
-				/*
-				 * Go into low thread priority and possibly
-				 * low power mode.
-				 */
-				HMT_low();
-				HMT_very_low();
-
-				if (*smt_snooze_delay != 0 &&
-				    get_tb() > start_snooze) {
-					HMT_medium();
-					dedicated_idle_sleep(cpu);
-				}
-
-			}
-
-			HMT_medium();
-		}
-
-		get_lppaca()->idle = 0;
-		ppc64_runlatch_on();
 
-		preempt_enable_no_resched();
-		schedule();
-		preempt_disable();
+	/*
+	 * Indicate to the HV that we are idle. Now would be
+	 * a good time to find other work to dispatch.
+	 */
+	get_lppaca()->idle = 1;
 
-		if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
-			cpu_die();
-	}
-}
+	/*
+	 * We come in with interrupts disabled, and need_resched()
+	 * has been checked recently.  If we should poll for a little
+	 * while, do so.
+	 */
+	if (*smt_snooze_delay) {
+		start_snooze = get_tb() +
+			*smt_snooze_delay * tb_ticks_per_usec;
+		local_irq_enable();
+		set_thread_flag(TIF_POLLING_NRFLAG);
 
-static void pseries_shared_idle(void)
-{
-	unsigned int cpu = smp_processor_id();
+		while (get_tb() < start_snooze) {
+			if (need_resched() || cpu_is_offline(cpu))
+				goto out;
+			ppc64_runlatch_off();
+			HMT_low();
+			HMT_very_low();
+		}
 
-	while (1) {
-		/*
-		 * Indicate to the HV that we are idle. Now would be
-		 * a good time to find other work to dispatch.
-		 */
-		get_lppaca()->idle = 1;
+		HMT_medium();
+		clear_thread_flag(TIF_POLLING_NRFLAG);
+		smp_mb();
+		local_irq_disable();
+		if (need_resched() || cpu_is_offline(cpu))
+			goto out;
+	}
 
-		while (!need_resched() && !cpu_is_offline(cpu)) {
-			local_irq_disable();
-			ppc64_runlatch_off();
+	/*
+	 * Cede if the other thread is not idle, so that it can
+	 * go single-threaded.  If the other thread is idle,
+	 * we ask the hypervisor if it has pending work it
+	 * wants to do and cede if it does.  Otherwise we keep
+	 * polling in order to reduce interrupt latency.
+	 *
+	 * Doing the cede when the other thread is active will
+	 * result in this thread going dormant, meaning the other
+	 * thread gets to run in single-threaded (ST) mode, which
+	 * is slightly faster than SMT mode with this thread at
+	 * very low priority.  The cede enables interrupts, which
+	 * doesn't matter here.
+	 */
+	if (!lppaca[cpu ^ 1].idle || poll_pending() == H_Pending)
+		cede_processor();
 
-			/*
-			 * Yield the processor to the hypervisor.  We return if
-			 * an external interrupt occurs (which are driven prior
-			 * to returning here) or if a prod occurs from another
-			 * processor. When returning here, external interrupts
-			 * are enabled.
-			 *
-			 * Check need_resched() again with interrupts disabled
-			 * to avoid a race.
-			 */
-			if (!need_resched())
-				cede_processor();
-			else
-				local_irq_enable();
-
-			HMT_medium();
-		}
+out:
+	HMT_medium();
+	get_lppaca()->idle = 0;
+}
 
-		get_lppaca()->idle = 0;
-		ppc64_runlatch_on();
+static void pseries_shared_idle_sleep(void)
+{
+	/*
+	 * Indicate to the HV that we are idle. Now would be
+	 * a good time to find other work to dispatch.
+	 */
+	get_lppaca()->idle = 1;
 
-		preempt_enable_no_resched();
-		schedule();
-		preempt_disable();
+	/*
+	 * Yield the processor to the hypervisor.  We return if
+	 * an external interrupt occurs (which are driven prior
+	 * to returning here) or if a prod occurs from another
+	 * processor. When returning here, external interrupts
+	 * are enabled.
+	 */
+	cede_processor();
 
-		if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
-			cpu_die();
-	}
+	get_lppaca()->idle = 0;
 }
 
 static int pSeries_pci_probe_mode(struct pci_bus *bus)
@@ -553,7 +519,8 @@ static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
 }
 #endif
 
-struct machdep_calls __initdata pSeries_md = {
+define_machine(pseries) {
+	.name			= "pSeries",
 	.probe			= pSeries_probe,
 	.setup_arch		= pSeries_setup_arch,
 	.init_early		= pSeries_init_early,
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index eb86cdb9b80..4864cb32be2 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -500,7 +500,7 @@ nextnode:
 	     np;
 	     np = of_find_node_by_type(np, "cpu")) {
 		ireg = (uint *)get_property(np, "reg", &ilen);
-		if (ireg && ireg[0] == boot_cpuid_phys) {
+		if (ireg && ireg[0] == get_hard_smp_processor_id(boot_cpuid)) {
 			ireg = (uint *)get_property(np, "ibm,ppc-interrupt-gserver#s",
 						    &ilen);
 			i = ilen / sizeof(int);
@@ -541,7 +541,7 @@ nextnode:
 		ops = &pSeriesLP_ops;
 	else {
 #ifdef CONFIG_SMP
-		for_each_cpu(i) {
+		for_each_possible_cpu(i) {
 			int hard_id;
 
 			/* FIXME: Do this dynamically! --RR */