aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig52
-rw-r--r--arch/powerpc/Kconfig.debug10
-rw-r--r--arch/powerpc/Makefile2
-rw-r--r--arch/powerpc/configs/g5_defconfig261
-rw-r--r--arch/powerpc/configs/pseries_defconfig206
-rw-r--r--arch/powerpc/kernel/Makefile37
-rw-r--r--arch/powerpc/kernel/asm-offsets.c51
-rw-r--r--arch/powerpc/kernel/cpu_setup_power4.S233
-rw-r--r--arch/powerpc/kernel/cputable.c33
-rw-r--r--arch/powerpc/kernel/dma_64.c151
-rw-r--r--arch/powerpc/kernel/firmware.c45
-rw-r--r--arch/powerpc/kernel/fpu.S24
-rw-r--r--arch/powerpc/kernel/head_32.S1
-rw-r--r--arch/powerpc/kernel/head_64.S385
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S2
-rw-r--r--arch/powerpc/kernel/ioctl32.c45
-rw-r--r--arch/powerpc/kernel/iomap.c146
-rw-r--r--arch/powerpc/kernel/iommu.c572
-rw-r--r--arch/powerpc/kernel/irq.c479
-rw-r--r--arch/powerpc/kernel/kprobes.c459
-rw-r--r--arch/powerpc/kernel/lparcfg.c608
-rw-r--r--arch/powerpc/kernel/lparmap.c2
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c358
-rw-r--r--arch/powerpc/kernel/misc_32.S23
-rw-r--r--arch/powerpc/kernel/misc_64.S78
-rw-r--r--arch/powerpc/kernel/module_64.c455
-rw-r--r--arch/powerpc/kernel/paca.c135
-rw-r--r--arch/powerpc/kernel/pci_64.c1319
-rw-r--r--arch/powerpc/kernel/pci_direct_iommu.c94
-rw-r--r--arch/powerpc/kernel/pci_dn.c230
-rw-r--r--arch/powerpc/kernel/pci_iommu.c128
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c5
-rw-r--r--arch/powerpc/kernel/proc_ppc64.c126
-rw-r--r--arch/powerpc/kernel/process.c10
-rw-r--r--arch/powerpc/kernel/prom.c130
-rw-r--r--arch/powerpc/kernel/prom_init.c196
-rw-r--r--arch/powerpc/kernel/ptrace.c43
-rw-r--r--arch/powerpc/kernel/rtas-proc.c3
-rw-r--r--arch/powerpc/kernel/rtas-rtc.c105
-rw-r--r--arch/powerpc/kernel/rtas.c10
-rw-r--r--arch/powerpc/kernel/rtas_pci.c513
-rw-r--r--arch/powerpc/kernel/setup-common.c75
-rw-r--r--arch/powerpc/kernel/setup.h6
-rw-r--r--arch/powerpc/kernel/setup_32.c23
-rw-r--r--arch/powerpc/kernel/setup_64.c205
-rw-r--r--arch/powerpc/kernel/signal_32.c21
-rw-r--r--arch/powerpc/kernel/signal_64.c7
-rw-r--r--arch/powerpc/kernel/smp.c11
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c1
-rw-r--r--arch/powerpc/kernel/sysfs.c383
-rw-r--r--arch/powerpc/kernel/time.c47
-rw-r--r--arch/powerpc/kernel/traps.c18
-rw-r--r--arch/powerpc/kernel/udbg.c125
-rw-r--r--arch/powerpc/kernel/udbg_16550.c123
-rw-r--r--arch/powerpc/kernel/udbg_scc.c135
-rw-r--r--arch/powerpc/kernel/vdso.c746
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile40
-rw-r--r--arch/powerpc/kernel/vdso32/cacheflush.S67
-rw-r--r--arch/powerpc/kernel/vdso32/datapage.S85
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S319
-rw-r--r--arch/powerpc/kernel/vdso32/note.S25
-rw-r--r--arch/powerpc/kernel/vdso32/sigtramp.S300
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32.lds.S117
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32_wrapper.S13
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile35
-rw-r--r--arch/powerpc/kernel/vdso64/cacheflush.S66
-rw-r--r--arch/powerpc/kernel/vdso64/datapage.S85
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S249
-rw-r--r--arch/powerpc/kernel/vdso64/note.S1
-rw-r--r--arch/powerpc/kernel/vdso64/sigtramp.S295
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64.lds.S116
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64_wrapper.S13
-rw-r--r--arch/powerpc/kernel/vio.c27
-rw-r--r--arch/powerpc/lib/bitops.c2
-rw-r--r--arch/powerpc/lib/copypage_64.S2
-rw-r--r--arch/powerpc/lib/copyuser_64.S4
-rw-r--r--arch/powerpc/lib/locks.c1
-rw-r--r--arch/powerpc/mm/fault.c17
-rw-r--r--arch/powerpc/mm/fsl_booke_mmu.c2
-rw-r--r--arch/powerpc/mm/hash_low_64.S613
-rw-r--r--arch/powerpc/mm/hash_native_64.c377
-rw-r--r--arch/powerpc/mm/hash_utils_64.c570
-rw-r--r--arch/powerpc/mm/hugetlbpage.c140
-rw-r--r--arch/powerpc/mm/init_32.c3
-rw-r--r--arch/powerpc/mm/init_64.c33
-rw-r--r--arch/powerpc/mm/mem.c66
-rw-r--r--arch/powerpc/mm/numa.c370
-rw-r--r--arch/powerpc/mm/pgtable_64.c24
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c15
-rw-r--r--arch/powerpc/mm/slb.c102
-rw-r--r--arch/powerpc/mm/slb_low.S229
-rw-r--r--arch/powerpc/mm/stab.c47
-rw-r--r--arch/powerpc/mm/tlb_64.c32
-rw-r--r--arch/powerpc/oprofile/Kconfig6
-rw-r--r--arch/powerpc/oprofile/op_model_fsl_booke.c2
-rw-r--r--arch/powerpc/oprofile/op_model_power4.c28
-rw-r--r--arch/powerpc/platforms/chrp/setup.c4
-rw-r--r--arch/powerpc/platforms/iseries/htab.c65
-rw-r--r--arch/powerpc/platforms/iseries/hvlog.c4
-rw-r--r--arch/powerpc/platforms/iseries/iommu.c74
-rw-r--r--arch/powerpc/platforms/iseries/irq.c54
-rw-r--r--arch/powerpc/platforms/iseries/misc.S1
-rw-r--r--arch/powerpc/platforms/iseries/pci.c37
-rw-r--r--arch/powerpc/platforms/iseries/setup.c58
-rw-r--r--arch/powerpc/platforms/iseries/smp.c1
-rw-r--r--arch/powerpc/platforms/iseries/vio.c39
-rw-r--r--arch/powerpc/platforms/iseries/viopath.c16
-rw-r--r--arch/powerpc/platforms/maple/pci.c3
-rw-r--r--arch/powerpc/platforms/powermac/Makefile3
-rw-r--r--arch/powerpc/platforms/powermac/cpufreq_32.c (renamed from arch/powerpc/platforms/powermac/cpufreq.c)15
-rw-r--r--arch/powerpc/platforms/powermac/cpufreq_64.c323
-rw-r--r--arch/powerpc/platforms/powermac/pci.c3
-rw-r--r--arch/powerpc/platforms/powermac/pic.c3
-rw-r--r--arch/powerpc/platforms/powermac/setup.c13
-rw-r--r--arch/powerpc/platforms/powermac/smp.c51
-rw-r--r--arch/powerpc/platforms/powermac/time.c9
-rw-r--r--arch/powerpc/platforms/pseries/Makefile5
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c1212
-rw-r--r--arch/powerpc/platforms/pseries/eeh_event.c155
-rw-r--r--arch/powerpc/platforms/pseries/hvconsole.c74
-rw-r--r--arch/powerpc/platforms/pseries/hvcserver.c251
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c5
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c118
-rw-r--r--arch/powerpc/platforms/pseries/pci.c3
-rw-r--r--arch/powerpc/platforms/pseries/plpar_wrappers.h10
-rw-r--r--arch/powerpc/platforms/pseries/ras.c2
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c8
-rw-r--r--arch/powerpc/platforms/pseries/rtasd.c9
-rw-r--r--arch/powerpc/platforms/pseries/scanlog.c235
-rw-r--r--arch/powerpc/platforms/pseries/setup.c72
-rw-r--r--arch/powerpc/platforms/pseries/smp.c5
-rw-r--r--arch/powerpc/platforms/pseries/xics.c7
-rw-r--r--arch/powerpc/sysdev/i8259.c5
-rw-r--r--arch/powerpc/sysdev/u3_iommu.c3
-rw-r--r--arch/powerpc/xmon/Makefile2
-rw-r--r--arch/powerpc/xmon/nonstdio.c134
-rw-r--r--arch/powerpc/xmon/nonstdio.h28
-rw-r--r--arch/powerpc/xmon/setjmp.S176
-rw-r--r--arch/powerpc/xmon/start_32.c235
-rw-r--r--arch/powerpc/xmon/start_64.c167
-rw-r--r--arch/powerpc/xmon/start_8xx.c255
-rw-r--r--arch/powerpc/xmon/subr_prf.c54
-rw-r--r--arch/powerpc/xmon/xmon.c76
143 files changed, 15600 insertions, 2711 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f4e25c648fb..94df74bcc0e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -261,7 +261,7 @@ config PPC_ISERIES
config EMBEDDED6xx
bool "Embedded 6xx/7xx/7xxx-based board"
- depends on PPC32
+ depends on PPC32 && BROKEN
config APUS
bool "Amiga-APUS"
@@ -305,7 +305,7 @@ config PPC_PMAC64
config PPC_PREP
bool " PowerPC Reference Platform (PReP) based machines"
- depends on PPC_MULTIPLATFORM && PPC32
+ depends on PPC_MULTIPLATFORM && PPC32 && BROKEN
select PPC_I8259
select PPC_INDIRECT_PCI
default y
@@ -404,6 +404,14 @@ config CPU_FREQ_PMAC
this currently includes some models of iBook & Titanium
PowerBook.
+config CPU_FREQ_PMAC64
+ bool "Support for some Apple G5s"
+ depends on CPU_FREQ && PMAC_SMU && PPC64
+ select CPU_FREQ_TABLE
+ help
+ This adds support for frequency switching on Apple iMac G5,
+ and some of the more recent desktop G5 machines as well.
+
config PPC601_SYNC_FIX
bool "Workarounds for PPC601 bugs"
depends on 6xx && (PPC_PREP || PPC_PMAC)
@@ -484,6 +492,7 @@ source "fs/Kconfig.binfmt"
config FORCE_MAX_ZONEORDER
int
depends on PPC64
+ default "9" if PPC_64K_PAGES
default "13"
config MATH_EMULATION
@@ -572,17 +581,12 @@ config ARCH_FLATMEM_ENABLE
def_bool y
depends on PPC64 && !NUMA
-config ARCH_DISCONTIGMEM_ENABLE
- def_bool y
- depends on SMP && PPC_PSERIES
-
-config ARCH_DISCONTIGMEM_DEFAULT
+config ARCH_SPARSEMEM_ENABLE
def_bool y
- depends on ARCH_DISCONTIGMEM_ENABLE
-config ARCH_SPARSEMEM_ENABLE
+config ARCH_SPARSEMEM_DEFAULT
def_bool y
- depends on ARCH_DISCONTIGMEM_ENABLE
+ depends on SMP && PPC_PSERIES
source "mm/Kconfig"
@@ -590,6 +594,10 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
def_bool y
depends on NEED_MULTIPLE_NODES
+config ARCH_MEMORY_PROBE
+ def_bool y
+ depends on MEMORY_HOTPLUG
+
# Some NUMA nodes have memory ranges that span
# other nodes. Even though a pfn is valid and
# between a node's start and end pfns, it may not
@@ -603,6 +611,16 @@ config NODES_SPAN_OTHER_NODES
def_bool y
depends on NEED_MULTIPLE_NODES
+config PPC_64K_PAGES
+ bool "64k page size"
+ depends on PPC64
+ help
+ This option changes the kernel logical page size to 64k. On machines
+ without processor support for 64k pages, the kernel will simulate
+ them by loading each individual 4k page on demand transparently,
+ while on hardware with such support, it will be used to map
+ normal application pages.
+
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on PPC64 && SMP
@@ -907,8 +925,22 @@ source "arch/powerpc/platforms/iseries/Kconfig"
source "lib/Kconfig"
+menu "Instrumentation Support"
+ depends on EXPERIMENTAL
+
source "arch/powerpc/oprofile/Kconfig"
+config KPROBES
+ bool "Kprobes (EXPERIMENTAL)"
+ depends on PPC64
+ help
+ Kprobes allows you to trap at almost any kernel address and
+ execute a callback function. register_kprobe() establishes
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+endmenu
+
source "arch/powerpc/Kconfig.debug"
source "security/Kconfig"
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 0baf64ec80d..30a30bf559e 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -9,16 +9,6 @@ config DEBUG_STACKOVERFLOW
This option will cause messages to be printed if free stack space
drops below a certain limit.
-config KPROBES
- bool "Kprobes"
- depends on DEBUG_KERNEL && PPC64
- help
- Kprobes allows you to trap at almost any kernel address and
- execute a callback function. register_kprobe() establishes
- a probepoint and specifies the callback. Kprobes is useful
- for kernel debugging, non-intrusive instrumentation and testing.
- If in doubt, say "N".
-
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
depends on DEBUG_KERNEL && PPC64
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 5bc11bd36c1..d41ad2e675d 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -187,7 +187,7 @@ archprepare: checkbin
# Temporary hack until we have migrated to asm-powerpc
include/asm: arch/$(ARCH)/include/asm
-arch/$(ARCH)/include/asm:
+arch/$(ARCH)/include/asm: FORCE
$(Q)if [ ! -d arch/$(ARCH)/include ]; then mkdir -p arch/$(ARCH)/include; fi
$(Q)ln -fsn $(srctree)/include/asm-$(OLDARCH) arch/$(ARCH)/include/asm
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index 6323065fbf2..e76854f8c12 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -1,18 +1,32 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-rc4
-# Thu Oct 20 08:30:23 2005
+# Linux kernel version: 2.6.14
+# Mon Nov 7 13:37:59 2005
#
+CONFIG_PPC64=y
CONFIG_64BIT=y
+CONFIG_PPC_MERGE=y
CONFIG_MMU=y
+CONFIG_GENERIC_HARDIRQS=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_GENERIC_ISA_DMA=y
+CONFIG_PPC=y
CONFIG_EARLY_PRINTK=y
CONFIG_COMPAT=y
+CONFIG_SYSVIPC_COMPAT=y
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-CONFIG_FORCE_MAX_ZONEORDER=13
+
+#
+# Processor support
+#
+CONFIG_POWER4_ONLY=y
+CONFIG_POWER4=y
+CONFIG_PPC_FPU=y
+CONFIG_ALTIVEC=y
+CONFIG_PPC_STD_MMU=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
#
# Code maturity level options
@@ -67,30 +81,60 @@ CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
-CONFIG_SYSVIPC_COMPAT=y
#
# Platform support
#
-# CONFIG_PPC_ISERIES is not set
CONFIG_PPC_MULTIPLATFORM=y
+# CONFIG_PPC_ISERIES is not set
+# CONFIG_EMBEDDED6xx is not set
+# CONFIG_APUS is not set
# CONFIG_PPC_PSERIES is not set
-# CONFIG_PPC_BPA is not set
CONFIG_PPC_PMAC=y
+CONFIG_PPC_PMAC64=y
# CONFIG_PPC_MAPLE is not set
-CONFIG_PPC=y
-CONFIG_PPC64=y
+# CONFIG_PPC_CELL is not set
CONFIG_PPC_OF=y
-CONFIG_MPIC=y
-CONFIG_ALTIVEC=y
-CONFIG_KEXEC=y
CONFIG_U3_DART=y
-CONFIG_PPC_PMAC64=y
-CONFIG_BOOTX_TEXT=y
-CONFIG_POWER4_ONLY=y
+CONFIG_MPIC=y
+# CONFIG_PPC_RTAS is not set
+# CONFIG_MMIO_NVRAM is not set
+# CONFIG_PPC_MPC106 is not set
+CONFIG_GENERIC_TBSYNC=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_TABLE=y
+# CONFIG_CPU_FREQ_DEBUG is not set
+CONFIG_CPU_FREQ_STAT=y
+# CONFIG_CPU_FREQ_STAT_DETAILS is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+# CONFIG_CPU_FREQ_GOV_ONDEMAND is not set
+# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_PMAC64=y
+# CONFIG_WANT_EARLY_SERIAL is not set
+
+#
+# Kernel options
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+# CONFIG_PREEMPT_BKL is not set
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+CONFIG_FORCE_MAX_ZONEORDER=13
CONFIG_IOMMU_VMERGE=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_KEXEC=y
+CONFIG_IRQ_ALL_CPUS=y
+# CONFIG_NUMA is not set
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_SELECT_MEMORY_MODEL=y
@@ -100,28 +144,21 @@ CONFIG_FLATMEM_MANUAL=y
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
-# CONFIG_NUMA is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_PPC_64K_PAGES is not set
# CONFIG_SCHED_SMT is not set
-CONFIG_PREEMPT_NONE=y
-# CONFIG_PREEMPT_VOLUNTARY is not set
-# CONFIG_PREEMPT is not set
-# CONFIG_PREEMPT_BKL is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_SECCOMP=y
-CONFIG_BINFMT_ELF=y
-# CONFIG_BINFMT_MISC is not set
-# CONFIG_HOTPLUG_CPU is not set
CONFIG_PROC_DEVICETREE=y
# CONFIG_CMDLINE_BOOL is not set
+# CONFIG_PM is not set
+CONFIG_SECCOMP=y
CONFIG_ISA_DMA_API=y
#
-# Bus Options
+# Bus options
#
+CONFIG_GENERIC_ISA_DMA=y
+# CONFIG_PPC_I8259 is not set
+# CONFIG_PPC_INDIRECT_PCI is not set
CONFIG_PCI=y
CONFIG_PCI_DOMAINS=y
CONFIG_PCI_LEGACY_PROC=y
@@ -136,6 +173,7 @@ CONFIG_PCI_LEGACY_PROC=y
# PCI Hotplug Support
#
# CONFIG_HOTPLUG_PCI is not set
+CONFIG_KERNEL_START=0xc000000000000000
#
# Networking
@@ -276,6 +314,10 @@ CONFIG_LLC=y
# CONFIG_NET_DIVERT is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
# CONFIG_NET_SCHED is not set
CONFIG_NET_CLS_ROUTE=y
@@ -348,6 +390,11 @@ CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
# CONFIG_ATA_OVER_ETH is not set
#
@@ -449,6 +496,7 @@ CONFIG_SCSI_SPI_ATTRS=y
#
# SCSI low-level drivers
#
+# CONFIG_ISCSI_TCP is not set
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_ACARD is not set
@@ -465,10 +513,12 @@ CONFIG_SCSI_SATA_SVW=y
# CONFIG_SCSI_ATA_PIIX is not set
# CONFIG_SCSI_SATA_MV is not set
# CONFIG_SCSI_SATA_NV is not set
-# CONFIG_SCSI_SATA_PROMISE is not set
+# CONFIG_SCSI_PDC_ADMA is not set
# CONFIG_SCSI_SATA_QSTOR is not set
+# CONFIG_SCSI_SATA_PROMISE is not set
# CONFIG_SCSI_SATA_SX4 is not set
# CONFIG_SCSI_SATA_SIL is not set
+# CONFIG_SCSI_SATA_SIL24 is not set
# CONFIG_SCSI_SATA_SIS is not set
# CONFIG_SCSI_SATA_ULI is not set
# CONFIG_SCSI_SATA_VIA is not set
@@ -567,6 +617,9 @@ CONFIG_IEEE1394_RAWIO=y
CONFIG_ADB_PMU=y
CONFIG_PMAC_SMU=y
CONFIG_THERM_PM72=y
+CONFIG_WINDFARM=y
+CONFIG_WINDFARM_PM81=y
+CONFIG_WINDFARM_PM91=y
#
# Network device support
@@ -603,6 +656,7 @@ CONFIG_SUNGEM=y
# CONFIG_NET_TULIP is not set
# CONFIG_HP100 is not set
# CONFIG_NET_PCI is not set
+# CONFIG_FEC_8XX is not set
#
# Ethernet (1000 Mbit)
@@ -768,6 +822,7 @@ CONFIG_MAX_RAW_DEVS=256
# TPM devices
#
# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
#
# I2C support
@@ -820,6 +875,7 @@ CONFIG_I2C_PMAC_SMU=y
# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_SENSORS_RTC8564 is not set
# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_RTC_X1205_I2C is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
@@ -876,10 +932,9 @@ CONFIG_FB_OF=y
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_VGA16 is not set
-# CONFIG_FB_NVIDIA is not set
-CONFIG_FB_RIVA=y
-# CONFIG_FB_RIVA_I2C is not set
-# CONFIG_FB_RIVA_DEBUG is not set
+CONFIG_FB_NVIDIA=y
+CONFIG_FB_NVIDIA_I2C=y
+# CONFIG_FB_RIVA is not set
# CONFIG_FB_MATROX is not set
# CONFIG_FB_RADEON_OLD is not set
CONFIG_FB_RADEON=y
@@ -924,7 +979,96 @@ CONFIG_LCD_DEVICE=y
#
# Sound
#
-# CONFIG_SOUND is not set
+CONFIG_SOUND=m
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=m
+CONFIG_SND_TIMER=m
+CONFIG_SND_PCM=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_SEQUENCER=m
+# CONFIG_SND_SEQ_DUMMY is not set
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+# CONFIG_SND_VERBOSE_PRINTK is not set
+# CONFIG_SND_DEBUG is not set
+CONFIG_SND_GENERIC_DRIVER=y
+
+#
+# Generic devices
+#
+# CONFIG_SND_DUMMY is not set
+# CONFIG_SND_VIRMIDI is not set
+# CONFIG_SND_MTPAV is not set
+# CONFIG_SND_SERIAL_U16550 is not set
+# CONFIG_SND_MPU401 is not set
+
+#
+# PCI devices
+#
+# CONFIG_SND_ALI5451 is not set
+# CONFIG_SND_ATIIXP is not set
+# CONFIG_SND_ATIIXP_MODEM is not set
+# CONFIG_SND_AU8810 is not set
+# CONFIG_SND_AU8820 is not set
+# CONFIG_SND_AU8830 is not set
+# CONFIG_SND_AZT3328 is not set
+# CONFIG_SND_BT87X is not set
+# CONFIG_SND_CS46XX is not set
+# CONFIG_SND_CS4281 is not set
+# CONFIG_SND_EMU10K1 is not set
+# CONFIG_SND_EMU10K1X is not set
+# CONFIG_SND_CA0106 is not set
+# CONFIG_SND_KORG1212 is not set
+# CONFIG_SND_MIXART is not set
+# CONFIG_SND_NM256 is not set
+# CONFIG_SND_RME32 is not set
+# CONFIG_SND_RME96 is not set
+# CONFIG_SND_RME9652 is not set
+# CONFIG_SND_HDSP is not set
+# CONFIG_SND_HDSPM is not set
+# CONFIG_SND_TRIDENT is not set
+# CONFIG_SND_YMFPCI is not set
+# CONFIG_SND_AD1889 is not set
+# CONFIG_SND_ALS4000 is not set
+# CONFIG_SND_CMIPCI is not set
+# CONFIG_SND_ENS1370 is not set
+# CONFIG_SND_ENS1371 is not set
+# CONFIG_SND_ES1938 is not set
+# CONFIG_SND_ES1968 is not set
+# CONFIG_SND_MAESTRO3 is not set
+# CONFIG_SND_FM801 is not set
+# CONFIG_SND_ICE1712 is not set
+# CONFIG_SND_ICE1724 is not set
+# CONFIG_SND_INTEL8X0 is not set
+# CONFIG_SND_INTEL8X0M is not set
+# CONFIG_SND_SONICVIBES is not set
+# CONFIG_SND_VIA82XX is not set
+# CONFIG_SND_VIA82XX_MODEM is not set
+# CONFIG_SND_VX222 is not set
+# CONFIG_SND_HDA_INTEL is not set
+
+#
+# ALSA PowerMac devices
+#
+CONFIG_SND_POWERMAC=m
+CONFIG_SND_POWERMAC_AUTO_DRC=y
+
+#
+# USB devices
+#
+CONFIG_SND_USB_AUDIO=m
+# CONFIG_SND_USB_USX2Y is not set
+
+#
+# Open Sound System
+#
+# CONFIG_SOUND_PRIME is not set
#
# USB support
@@ -958,12 +1102,16 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y
#
# USB Device Class drivers
#
-# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
CONFIG_USB_ACM=m
CONFIG_USB_PRINTER=y
#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
#
CONFIG_USB_STORAGE=y
# CONFIG_USB_STORAGE_DEBUG is not set
@@ -1074,6 +1222,7 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
CONFIG_USB_SERIAL_KLSI=m
CONFIG_USB_SERIAL_KOBIL_SCT=m
CONFIG_USB_SERIAL_MCT_U232=m
+# CONFIG_USB_SERIAL_NOKIA_DKU2 is not set
CONFIG_USB_SERIAL_PL2303=m
# CONFIG_USB_SERIAL_HP4X is not set
CONFIG_USB_SERIAL_SAFE=m
@@ -1311,6 +1460,20 @@ CONFIG_NLS_ISO8859_15=y
CONFIG_NLS_UTF8=y
#
+# Library routines
+#
+CONFIG_CRC_CCITT=m
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=y
+CONFIG_LIBCRC32C=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
+CONFIG_TEXTSEARCH=y
+CONFIG_TEXTSEARCH_KMP=m
+CONFIG_TEXTSEARCH_BM=m
+CONFIG_TEXTSEARCH_FSM=m
+
+#
# Profiling support
#
CONFIG_PROFILING=y
@@ -1331,12 +1494,14 @@ CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_INFO is not set
CONFIG_DEBUG_FS=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_DEBUG_STACKOVERFLOW is not set
# CONFIG_KPROBES is not set
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_DEBUGGER is not set
-# CONFIG_PPCDBG is not set
CONFIG_IRQSTACKS=y
+CONFIG_BOOTX_TEXT=y
#
# Security options
@@ -1376,17 +1541,3 @@ CONFIG_CRYPTO_TEST=m
#
# Hardware crypto devices
#
-
-#
-# Library routines
-#
-CONFIG_CRC_CCITT=m
-# CONFIG_CRC16 is not set
-CONFIG_CRC32=y
-CONFIG_LIBCRC32C=m
-CONFIG_ZLIB_INFLATE=y
-CONFIG_ZLIB_DEFLATE=m
-CONFIG_TEXTSEARCH=y
-CONFIG_TEXTSEARCH_KMP=m
-CONFIG_TEXTSEARCH_BM=m
-CONFIG_TEXTSEARCH_FSM=m
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 9f09dff9e11..913962c1dae 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -1,18 +1,33 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-rc4
-# Thu Oct 20 08:32:17 2005
+# Linux kernel version: 2.6.15-rc1
+# Mon Nov 14 15:27:00 2005
#
+CONFIG_PPC64=y
CONFIG_64BIT=y
+CONFIG_PPC_MERGE=y
CONFIG_MMU=y
+CONFIG_GENERIC_HARDIRQS=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_GENERIC_ISA_DMA=y
+CONFIG_PPC=y
CONFIG_EARLY_PRINTK=y
CONFIG_COMPAT=y
+CONFIG_SYSVIPC_COMPAT=y
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-CONFIG_FORCE_MAX_ZONEORDER=13
+
+#
+# Processor support
+#
+# CONFIG_POWER4_ONLY is not set
+CONFIG_POWER3=y
+CONFIG_POWER4=y
+CONFIG_PPC_FPU=y
+CONFIG_ALTIVEC=y
+CONFIG_PPC_STD_MMU=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=128
#
# Code maturity level options
@@ -68,75 +83,103 @@ CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
-CONFIG_SYSVIPC_COMPAT=y
+
+#
+# Block layer
+#
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
#
# Platform support
#
-# CONFIG_PPC_ISERIES is not set
CONFIG_PPC_MULTIPLATFORM=y
+# CONFIG_PPC_ISERIES is not set
+# CONFIG_EMBEDDED6xx is not set
+# CONFIG_APUS is not set
CONFIG_PPC_PSERIES=y
-# CONFIG_PPC_BPA is not set
# CONFIG_PPC_PMAC is not set
# CONFIG_PPC_MAPLE is not set
-CONFIG_PPC=y
-CONFIG_PPC64=y
+# CONFIG_PPC_CELL is not set
CONFIG_PPC_OF=y
CONFIG_XICS=y
+# CONFIG_U3_DART is not set
CONFIG_MPIC=y
-CONFIG_ALTIVEC=y
-CONFIG_PPC_SPLPAR=y
-CONFIG_KEXEC=y
+CONFIG_PPC_RTAS=y
+CONFIG_RTAS_ERROR_LOGGING=y
+CONFIG_RTAS_PROC=y
+CONFIG_RTAS_FLASH=m
+# CONFIG_MMIO_NVRAM is not set
CONFIG_IBMVIO=y
-# CONFIG_U3_DART is not set
-# CONFIG_BOOTX_TEXT is not set
-# CONFIG_POWER4_ONLY is not set
+# CONFIG_PPC_MPC106 is not set
+# CONFIG_GENERIC_TBSYNC is not set
+# CONFIG_CPU_FREQ is not set
+# CONFIG_WANT_EARLY_SERIAL is not set
+
+#
+# Kernel options
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+# CONFIG_PREEMPT_BKL is not set
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+CONFIG_FORCE_MAX_ZONEORDER=13
CONFIG_IOMMU_VMERGE=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=128
+CONFIG_HOTPLUG_CPU=y
+CONFIG_KEXEC=y
+# CONFIG_IRQ_ALL_CPUS is not set
+CONFIG_PPC_SPLPAR=y
+CONFIG_EEH=y
+CONFIG_SCANLOG=m
+CONFIG_LPARCFG=y
+CONFIG_NUMA=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ARCH_FLATMEM_ENABLE=y
-CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
-CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
CONFIG_SELECT_MEMORY_MODEL=y
# CONFIG_FLATMEM_MANUAL is not set
-CONFIG_DISCONTIGMEM_MANUAL=y
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_DISCONTIGMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+CONFIG_SPARSEMEM_MANUAL=y
+CONFIG_SPARSEMEM=y
CONFIG_NEED_MULTIPLE_NODES=y
+CONFIG_HAVE_MEMORY_PRESENT=y
# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPARSEMEM_EXTREME=y
+# CONFIG_MEMORY_HOTPLUG is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
CONFIG_NODES_SPAN_OTHER_NODES=y
-CONFIG_NUMA=y
+# CONFIG_PPC_64K_PAGES is not set
CONFIG_SCHED_SMT=y
-CONFIG_PREEMPT_NONE=y
-# CONFIG_PREEMPT_VOLUNTARY is not set
-# CONFIG_PREEMPT is not set
-# CONFIG_PREEMPT_BKL is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
-CONFIG_EEH=y
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_PPC_RTAS=y
-CONFIG_RTAS_PROC=y
-CONFIG_RTAS_FLASH=m
-CONFIG_SCANLOG=m
-CONFIG_LPARCFG=y
-CONFIG_SECCOMP=y
-CONFIG_BINFMT_ELF=y
-# CONFIG_BINFMT_MISC is not set
-CONFIG_HOTPLUG_CPU=y
CONFIG_PROC_DEVICETREE=y
# CONFIG_CMDLINE_BOOL is not set
+# CONFIG_PM is not set
+CONFIG_SECCOMP=y
CONFIG_ISA_DMA_API=y
#
-# Bus Options
+# Bus options
#
+CONFIG_GENERIC_ISA_DMA=y
+CONFIG_PPC_I8259=y
+# CONFIG_PPC_INDIRECT_PCI is not set
CONFIG_PCI=y
CONFIG_PCI_DOMAINS=y
CONFIG_PCI_LEGACY_PROC=y
@@ -156,6 +199,7 @@ CONFIG_HOTPLUG_PCI=m
# CONFIG_HOTPLUG_PCI_SHPC is not set
CONFIG_HOTPLUG_PCI_RPA=m
CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
+CONFIG_KERNEL_START=0xc000000000000000
#
# Networking
@@ -197,6 +241,10 @@ CONFIG_TCP_CONG_BIC=y
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_DEBUG is not set
+
+#
+# Core Netfilter Configuration
+#
CONFIG_NETFILTER_NETLINK=y
CONFIG_NETFILTER_NETLINK_QUEUE=m
CONFIG_NETFILTER_NETLINK_LOG=m
@@ -299,6 +347,10 @@ CONFIG_LLC=y
# CONFIG_NET_DIVERT is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
# CONFIG_NET_SCHED is not set
CONFIG_NET_CLS_ROUTE=y
@@ -368,14 +420,6 @@ CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=65536
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
# CONFIG_ATA_OVER_ETH is not set
#
@@ -473,6 +517,7 @@ CONFIG_SCSI_ISCSI_ATTRS=m
#
# SCSI low-level drivers
#
+# CONFIG_ISCSI_TCP is not set
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_ACARD is not set
@@ -559,6 +604,7 @@ CONFIG_DM_MULTIPATH_EMC=m
#
# Macintosh device drivers
#
+# CONFIG_WINDFARM is not set
#
# Network device support
@@ -645,7 +691,6 @@ CONFIG_IXGB=m
# CONFIG_IXGB_NAPI is not set
CONFIG_S2IO=m
# CONFIG_S2IO_NAPI is not set
-# CONFIG_2BUFF_MODE is not set
#
# Token Ring devices
@@ -674,6 +719,7 @@ CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
CONFIG_PPP_DEFLATE=m
CONFIG_PPP_BSDCOMP=m
+# CONFIG_PPP_MPPE is not set
CONFIG_PPPOE=m
# CONFIG_SLIP is not set
# CONFIG_NET_FC is not set
@@ -784,6 +830,8 @@ CONFIG_HVCS=m
#
# CONFIG_WATCHDOG is not set
# CONFIG_RTC is not set
+CONFIG_GEN_RTC=y
+# CONFIG_GEN_RTC_X is not set
# CONFIG_DTLK is not set
# CONFIG_R3964 is not set
# CONFIG_APPLICOM is not set
@@ -801,6 +849,7 @@ CONFIG_MAX_RAW_DEVS=1024
# TPM devices
#
# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
#
# I2C support
@@ -852,6 +901,7 @@ CONFIG_I2C_ALGOBIT=y
# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_SENSORS_RTC8564 is not set
# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_RTC_X1205_I2C is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
@@ -893,7 +943,6 @@ CONFIG_FB=y
CONFIG_FB_CFB_FILLRECT=y
CONFIG_FB_CFB_COPYAREA=y
CONFIG_FB_CFB_IMAGEBLIT=y
-CONFIG_FB_SOFT_CURSOR=y
CONFIG_FB_MACMODES=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
@@ -905,6 +954,7 @@ CONFIG_FB_OF=y
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_VGA16 is not set
+# CONFIG_FB_S1D13XXX is not set
# CONFIG_FB_NVIDIA is not set
# CONFIG_FB_RIVA is not set
CONFIG_FB_MATROX=y
@@ -927,7 +977,6 @@ CONFIG_FB_RADEON_I2C=y
# CONFIG_FB_VOODOO1 is not set
# CONFIG_FB_CYBLA is not set
# CONFIG_FB_TRIDENT is not set
-# CONFIG_FB_S1D13XXX is not set
# CONFIG_FB_VIRTUAL is not set
#
@@ -936,6 +985,7 @@ CONFIG_FB_RADEON_I2C=y
# CONFIG_VGA_CONSOLE is not set
CONFIG_DUMMY_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
# CONFIG_FONTS is not set
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
@@ -990,12 +1040,15 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y
#
# USB Device Class drivers
#
-# CONFIG_USB_BLUETOOTH_TTY is not set
# CONFIG_USB_ACM is not set
# CONFIG_USB_PRINTER is not set
#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
#
CONFIG_USB_STORAGE=y
# CONFIG_USB_STORAGE_DEBUG is not set
@@ -1106,6 +1159,7 @@ CONFIG_INFINIBAND_MTHCA=m
# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
CONFIG_INFINIBAND_IPOIB=m
# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+# CONFIG_INFINIBAND_SRP is not set
#
# SN Devices
@@ -1288,10 +1342,25 @@ CONFIG_NLS_ISO8859_1=y
# CONFIG_NLS_UTF8 is not set
#
-# Profiling support
+# Library routines
+#
+CONFIG_CRC_CCITT=m
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=y
+CONFIG_LIBCRC32C=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
+CONFIG_TEXTSEARCH=y
+CONFIG_TEXTSEARCH_KMP=m
+CONFIG_TEXTSEARCH_BM=m
+CONFIG_TEXTSEARCH_FSM=m
+
+#
+# Instrumentation Support
#
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
+# CONFIG_KPROBES is not set
#
# Kernel hacking
@@ -1308,14 +1377,15 @@ CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_INFO is not set
CONFIG_DEBUG_FS=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_RCU_TORTURE_TEST is not set
CONFIG_DEBUG_STACKOVERFLOW=y
-# CONFIG_KPROBES is not set
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUGGER=y
CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
-# CONFIG_PPCDBG is not set
CONFIG_IRQSTACKS=y
+# CONFIG_BOOTX_TEXT is not set
#
# Security options
@@ -1355,17 +1425,3 @@ CONFIG_CRYPTO_TEST=m
#
# Hardware crypto devices
#
-
-#
-# Library routines
-#
-CONFIG_CRC_CCITT=m
-# CONFIG_CRC16 is not set
-CONFIG_CRC32=y
-CONFIG_LIBCRC32C=m
-CONFIG_ZLIB_INFLATE=y
-CONFIG_ZLIB_DEFLATE=m
-CONFIG_TEXTSEARCH=y
-CONFIG_TEXTSEARCH_KMP=m
-CONFIG_TEXTSEARCH_BM=m
-CONFIG_TEXTSEARCH_FSM=m
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b3ae2993efb..4970e3721a8 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -4,6 +4,7 @@
ifeq ($(CONFIG_PPC64),y)
EXTRA_CFLAGS += -mno-minimal-toc
+CFLAGS_ioctl32.o += -Ifs/
endif
ifeq ($(CONFIG_PPC32),y)
CFLAGS_prom_init.o += -fPIC
@@ -11,17 +12,29 @@ CFLAGS_btext.o += -fPIC
endif
obj-y := semaphore.o cputable.o ptrace.o syscalls.o \
- signal_32.o pmc.o
+ irq.o signal_32.o pmc.o vdso.o
+obj-y += vdso32/
obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \
- signal_64.o ptrace32.o systbl.o
+ signal_64.o ptrace32.o systbl.o \
+ paca.o ioctl32.o cpu_setup_power4.o \
+ firmware.o sysfs.o udbg.o
+obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o
obj-$(CONFIG_POWER4) += idle_power4.o
obj-$(CONFIG_PPC_OF) += of_device.o
-obj-$(CONFIG_PPC_RTAS) += rtas.o
+procfs-$(CONFIG_PPC64) := proc_ppc64.o
+obj-$(CONFIG_PROC_FS) += $(procfs-y)
+rtaspci-$(CONFIG_PPC64) := rtas_pci.o
+obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y)
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
+obj-$(CONFIG_LPARCFG) += lparcfg.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
+obj-$(CONFIG_PPC_PSERIES) += udbg_16550.o
+obj-$(CONFIG_PPC_MAPLE) += udbg_16550.o
+udbgscc-$(CONFIG_PPC64) := udbg_scc.o
+obj-$(CONFIG_PPC_PMAC) += $(udbgscc-y)
ifeq ($(CONFIG_PPC_MERGE),y)
@@ -36,12 +49,23 @@ extra-y += vmlinux.lds
obj-y += process.o init_task.o time.o \
prom.o traps.o setup-common.o
obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o systbl.o
-obj-$(CONFIG_PPC64) += misc_64.o
+obj-$(CONFIG_PPC64) += misc_64.o dma_64.o iommu.o
obj-$(CONFIG_PPC_OF) += prom_init.o
obj-$(CONFIG_MODULES) += ppc_ksyms.o
obj-$(CONFIG_BOOTX_TEXT) += btext.o
obj-$(CONFIG_6xx) += idle_6xx.o
obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_KPROBES) += kprobes.o
+
+module-$(CONFIG_PPC64) += module_64.o
+obj-$(CONFIG_MODULES) += $(module-y)
+
+pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o pci_iommu.o \
+ pci_direct_iommu.o iomap.o
+obj-$(CONFIG_PCI) += $(pci64-y)
+
+kexec64-$(CONFIG_PPC64) += machine_kexec_64.o
+obj-$(CONFIG_KEXEC) += $(kexec64-y)
ifeq ($(CONFIG_PPC_ISERIES),y)
$(obj)/head_64.o: $(obj)/lparmap.s
@@ -49,11 +73,8 @@ AFLAGS_head_64.o += -I$(obj)
endif
else
-# stuff used from here for ARCH=ppc or ARCH=ppc64
+# stuff used from here for ARCH=ppc
smpobj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_PPC64) += traps.o process.o init_task.o time.o \
- setup-common.o $(smpobj-y)
-
endif
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index bc5a3689cc0..91538d2445b 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -37,12 +37,12 @@
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/rtas.h>
+#include <asm/vdso_datapage.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#include <asm/lppaca.h>
#include <asm/iseries/hv_lp_event.h>
#include <asm/cache.h>
-#include <asm/systemcfg.h>
#include <asm/compat.h>
#endif
@@ -106,7 +106,6 @@ int main(void)
DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
- DEFINE(PLATFORM, offsetof(struct systemcfg, platform));
DEFINE(PLATFORM_LPAR, PLATFORM_LPAR);
/* paca */
@@ -125,6 +124,9 @@ int main(void)
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+#ifdef CONFIG_PPC_64K_PAGES
+ DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir));
+#endif
#ifdef CONFIG_HUGETLB_PAGE
DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
@@ -249,25 +251,44 @@ int main(void)
DEFINE(TASK_SIZE, TASK_SIZE);
DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28);
-#else /* CONFIG_PPC64 */
- /* systemcfg offsets for use by vdso */
- DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct systemcfg, tb_orig_stamp));
- DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct systemcfg, tb_ticks_per_sec));
- DEFINE(CFG_TB_TO_XS, offsetof(struct systemcfg, tb_to_xs));
- DEFINE(CFG_STAMP_XSEC, offsetof(struct systemcfg, stamp_xsec));
- DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct systemcfg, tb_update_count));
- DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct systemcfg, tz_minuteswest));
- DEFINE(CFG_TZ_DSTTIME, offsetof(struct systemcfg, tz_dsttime));
- DEFINE(CFG_SYSCALL_MAP32, offsetof(struct systemcfg, syscall_map_32));
- DEFINE(CFG_SYSCALL_MAP64, offsetof(struct systemcfg, syscall_map_64));
+#endif /* ! CONFIG_PPC64 */
- /* timeval/timezone offsets for use by vdso */
+ /* datapage offsets for use by vdso */
+ DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp));
+ DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec));
+ DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs));
+ DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec));
+ DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count));
+ DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest));
+ DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
+ DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
+ DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
+ DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
+#ifdef CONFIG_PPC64
+ DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64));
DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec));
DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
+ DEFINE(TSPC64_TV_SEC, offsetof(struct timespec, tv_sec));
+ DEFINE(TSPC64_TV_NSEC, offsetof(struct timespec, tv_nsec));
+ DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec));
+ DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec));
+#else
+ DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec));
+ DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec));
+ DEFINE(TSPC32_TV_SEC, offsetof(struct timespec, tv_sec));
+ DEFINE(TSPC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
+#endif
+ /* timeval/timezone offsets for use by vdso */
DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
-#endif /* CONFIG_PPC64 */
+
+ /* Other bits used by the vdso */
+ DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
+ DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
+ DEFINE(CLOCK_REALTIME_RES, TICK_NSEC);
+
return 0;
}
diff --git a/arch/powerpc/kernel/cpu_setup_power4.S b/arch/powerpc/kernel/cpu_setup_power4.S
new file mode 100644
index 00000000000..cca942fe611
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_power4.S
@@ -0,0 +1,233 @@
+/*
+ * This file contains low level CPU setup functions.
+ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+_GLOBAL(__970_cpu_preinit)
+ /*
+ * Do nothing if not running in HV mode
+ */
+ mfmsr r0
+ rldicl. r0,r0,4,63
+ beqlr
+
+ /*
+ * Deal only with PPC970 and PPC970FX.
+ */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39
+ beq 1f
+ cmpwi r0,0x3c
+ beq 1f
+ cmpwi r0,0x44
+ bnelr
+1:
+
+ /* Make sure HID4:rm_ci is off before MMU is turned off, that large
+ * pages are enabled with HID4:61 and clear HID5:DCBZ_size and
+ * HID5:DCBZ32_ill
+ */
+ li r0,0
+ mfspr r3,SPRN_HID4
+ rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
+ rldimi r3,r0,2,61 /* clear bit 61 (lg_pg_en) */
+ sync
+ mtspr SPRN_HID4,r3
+ isync
+ sync
+ mfspr r3,SPRN_HID5
+ rldimi r3,r0,6,56 /* clear bits 56 & 57 (DCBZ*) */
+ sync
+ mtspr SPRN_HID5,r3
+ isync
+ sync
+
+ /* Setup some basic HID1 features */
+ mfspr r0,SPRN_HID1
+ li r3,0x1200 /* enable i-fetch cacheability */
+ sldi r3,r3,44 /* and prefetch */
+ or r0,r0,r3
+ mtspr SPRN_HID1,r0
+ mtspr SPRN_HID1,r0
+ isync
+
+ /* Clear HIOR */
+ li r0,0
+ sync
+ mtspr SPRN_HIOR,0 /* Clear interrupt prefix */
+ isync
+ blr
+
+_GLOBAL(__setup_cpu_power4)
+ blr
+
+_GLOBAL(__setup_cpu_be)
+ /* Set large page sizes LP=0: 16MB, LP=1: 64KB */
+ addi r3, 0, 0
+ ori r3, r3, HID6_LB
+ sldi r3, r3, 32
+ nor r3, r3, r3
+ mfspr r4, SPRN_HID6
+ and r4, r4, r3
+ addi r3, 0, 0x02000
+ sldi r3, r3, 32
+ or r4, r4, r3
+ mtspr SPRN_HID6, r4
+ blr
+
+_GLOBAL(__setup_cpu_ppc970)
+ mfspr r0,SPRN_HID0
+ li r11,5 /* clear DOZE and SLEEP */
+ rldimi r0,r11,52,8 /* set NAP and DPM */
+ mtspr SPRN_HID0,r0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ sync
+ isync
+ blr
+
+/* Definitions for the table use to save CPU states */
+#define CS_HID0 0
+#define CS_HID1 8
+#define CS_HID4 16
+#define CS_HID5 24
+#define CS_SIZE 32
+
+ .data
+ .balign L1_CACHE_BYTES,0
+cpu_state_storage:
+ .space CS_SIZE
+ .balign L1_CACHE_BYTES,0
+ .text
+
+/* Called in normal context to backup CPU 0 state. This
+ * does not include cache settings. This function is also
+ * called for machine sleep. This does not include the MMU
+ * setup, BATs, etc... but rather the "special" registers
+ * like HID0, HID1, HID4, etc...
+ */
+_GLOBAL(__save_cpu_setup)
+ /* Some CR fields are volatile, we back it up all */
+ mfcr r7
+
+ /* Get storage ptr */
+ LOADADDR(r5,cpu_state_storage)
+
+ /* We only deal with 970 for now */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39
+ beq 1f
+ cmpwi r0,0x3c
+ beq 1f
+ cmpwi r0,0x44
+ bne 2f
+
+1: /* Save HID0,1,4 and 5 */
+ mfspr r3,SPRN_HID0
+ std r3,CS_HID0(r5)
+ mfspr r3,SPRN_HID1
+ std r3,CS_HID1(r5)
+ mfspr r3,SPRN_HID4
+ std r3,CS_HID4(r5)
+ mfspr r3,SPRN_HID5
+ std r3,CS_HID5(r5)
+
+2:
+ mtcr r7
+ blr
+
+/* Called with no MMU context (typically MSR:IR/DR off) to
+ * restore CPU state as backed up by the previous
+ * function. This does not include cache setting
+ */
+_GLOBAL(__restore_cpu_setup)
+ /* Get storage ptr (FIXME when using anton reloc as we
+ * are running with translation disabled here
+ */
+ LOADADDR(r5,cpu_state_storage)
+
+ /* We only deal with 970 for now */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39
+ beq 1f
+ cmpwi r0,0x3c
+ beq 1f
+ cmpwi r0,0x44
+ bnelr
+
+1: /* Before accessing memory, we make sure rm_ci is clear */
+ li r0,0
+ mfspr r3,SPRN_HID4
+ rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
+ sync
+ mtspr SPRN_HID4,r3
+ isync
+ sync
+
+ /* Clear interrupt prefix */
+ li r0,0
+ sync
+ mtspr SPRN_HIOR,0
+ isync
+
+ /* Restore HID0 */
+ ld r3,CS_HID0(r5)
+ sync
+ isync
+ mtspr SPRN_HID0,r3
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ sync
+ isync
+
+ /* Restore HID1 */
+ ld r3,CS_HID1(r5)
+ sync
+ isync
+ mtspr SPRN_HID1,r3
+ mtspr SPRN_HID1,r3
+ sync
+ isync
+
+ /* Restore HID4 */
+ ld r3,CS_HID4(r5)
+ sync
+ isync
+ mtspr SPRN_HID4,r3
+ sync
+ isync
+
+ /* Restore HID5 */
+ ld r3,CS_HID5(r5)
+ sync
+ isync
+ mtspr SPRN_HID5,r3
+ sync
+ isync
+ blr
+
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b91345fa080..1d85cedbbb7 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -52,6 +52,9 @@ extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
#define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
PPC_FEATURE_HAS_MMU)
#define COMMON_USER_PPC64 (COMMON_USER | PPC_FEATURE_64)
+#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4)
+#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5)
+#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS)
/* We only set the spe features if the kernel was compiled with
@@ -160,7 +163,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00350000,
.cpu_name = "POWER4 (gp)",
.cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER4,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -175,7 +178,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00380000,
.cpu_name = "POWER4+ (gq)",
.cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER4,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -190,7 +193,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00390000,
.cpu_name = "PPC970",
.cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_PPC64 |
+ .cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -212,7 +215,7 @@ struct cpu_spec cpu_specs[] = {
#else
.cpu_features = CPU_FTRS_PPC970,
#endif
- .cpu_user_features = COMMON_USER_PPC64 |
+ .cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -230,7 +233,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00440000,
.cpu_name = "PPC970MP",
.cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_PPC64 |
+ .cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -240,12 +243,12 @@ struct cpu_spec cpu_specs[] = {
.oprofile_model = &op_model_power4,
#endif
},
- { /* Power5 */
+ { /* Power5 GR */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003a0000,
.cpu_name = "POWER5 (gr)",
.cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER5,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
@@ -255,12 +258,12 @@ struct cpu_spec cpu_specs[] = {
.oprofile_model = &op_model_power4,
#endif
},
- { /* Power5 */
+ { /* Power5 GS */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003b0000,
.cpu_name = "POWER5 (gs)",
.cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
@@ -276,7 +279,7 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "Cell Broadband Engine",
.cpu_features = CPU_FTRS_CELL,
.cpu_user_features = COMMON_USER_PPC64 |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
+ PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
.cpu_setup = __setup_cpu_be,
@@ -929,6 +932,16 @@ struct cpu_spec cpu_specs[] = {
.icache_bsize = 32,
.dcache_bsize = 32,
},
+ { /* 440SPe Rev. A */
+ .pvr_mask = 0xff000fff,
+ .pvr_value = 0x53000890,
+ .cpu_name = "440SPe Rev. A",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ },
#endif /* CONFIG_44x */
#ifdef CONFIG_FSL_BOOKE
{ /* e200z5 */
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
new file mode 100644
index 00000000000..7c3419656cc
--- /dev/null
+++ b/arch/powerpc/kernel/dma_64.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2004 IBM Corporation
+ *
+ * Implements the generic device dma API for ppc64. Handles
+ * the pci and vio busses
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+/* Include the busses we support */
+#include <linux/pci.h>
+#include <asm/vio.h>
+#include <asm/scatterlist.h>
+#include <asm/bug.h>
+
+static struct dma_mapping_ops *get_dma_ops(struct device *dev)
+{
+#ifdef CONFIG_PCI
+ if (dev->bus == &pci_bus_type)
+ return &pci_dma_ops;
+#endif
+#ifdef CONFIG_IBMVIO
+ if (dev->bus == &vio_bus_type)
+ return &vio_dma_ops;
+#endif
+ return NULL;
+}
+
+int dma_supported(struct device *dev, u64 mask)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->dma_supported(dev, mask);
+ BUG();
+ return 0;
+}
+EXPORT_SYMBOL(dma_supported);
+
+int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+#ifdef CONFIG_PCI
+ if (dev->bus == &pci_bus_type)
+ return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
+#endif
+#ifdef CONFIG_IBMVIO
+ if (dev->bus == &vio_bus_type)
+ return -EIO;
+#endif /* CONFIG_IBMVIO */
+ BUG();
+ return 0;
+}
+EXPORT_SYMBOL(dma_set_mask);
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
+ BUG();
+ return NULL;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_handle)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
+ else
+ BUG();
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->map_single(dev, cpu_addr, size, direction);
+ BUG();
+ return (dma_addr_t)0;
+}
+EXPORT_SYMBOL(dma_map_single);
+
+void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ dma_ops->unmap_single(dev, dma_addr, size, direction);
+ else
+ BUG();
+}
+EXPORT_SYMBOL(dma_unmap_single);
+
+dma_addr_t dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->map_single(dev,
+ (page_address(page) + offset), size, direction);
+ BUG();
+ return (dma_addr_t)0;
+}
+EXPORT_SYMBOL(dma_map_page);
+
+void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ dma_ops->unmap_single(dev, dma_address, size, direction);
+ else
+ BUG();
+}
+EXPORT_SYMBOL(dma_unmap_page);
+
+int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->map_sg(dev, sg, nents, direction);
+ BUG();
+ return 0;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ dma_ops->unmap_sg(dev, sg, nhwentries, direction);
+ else
+ BUG();
+}
+EXPORT_SYMBOL(dma_unmap_sg);
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
new file mode 100644
index 00000000000..65eae752a52
--- /dev/null
+++ b/arch/powerpc/kernel/firmware.c
@@ -0,0 +1,45 @@
+/*
+ * Extracted from cputable.c
+ *
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * Modifications for ppc64:
+ * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ * Copyright (C) 2005 Stephen Rothwell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+
+#include <asm/firmware.h>
+
+unsigned long ppc64_firmware_features;
+
+#ifdef CONFIG_PPC_PSERIES
+firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
+ {FW_FEATURE_PFT, "hcall-pft"},
+ {FW_FEATURE_TCE, "hcall-tce"},
+ {FW_FEATURE_SPRG0, "hcall-sprg0"},
+ {FW_FEATURE_DABR, "hcall-dabr"},
+ {FW_FEATURE_COPY, "hcall-copy"},
+ {FW_FEATURE_ASR, "hcall-asr"},
+ {FW_FEATURE_DEBUG, "hcall-debug"},
+ {FW_FEATURE_PERF, "hcall-perf"},
+ {FW_FEATURE_DUMP, "hcall-dump"},
+ {FW_FEATURE_INTERRUPT, "hcall-interrupt"},
+ {FW_FEATURE_MIGRATE, "hcall-migrate"},
+ {FW_FEATURE_PERFMON, "hcall-perfmon"},
+ {FW_FEATURE_CRQ, "hcall-crq"},
+ {FW_FEATURE_VIO, "hcall-vio"},
+ {FW_FEATURE_RDMA, "hcall-rdma"},
+ {FW_FEATURE_LLAN, "hcall-lLAN"},
+ {FW_FEATURE_BULK, "hcall-bulk"},
+ {FW_FEATURE_XDABR, "hcall-xdabr"},
+ {FW_FEATURE_MULTITCE, "hcall-multi-tce"},
+ {FW_FEATURE_SPLPAR, "hcall-splpar"},
+};
+#endif
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 4d6001fa1cf..b780b42c95f 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -41,20 +41,20 @@ _GLOBAL(load_up_fpu)
#ifndef CONFIG_SMP
LOADBASE(r3, last_task_used_math)
toreal(r3)
- LDL r4,OFF(last_task_used_math)(r3)
- CMPI 0,r4,0
+ PPC_LL r4,OFF(last_task_used_math)(r3)
+ PPC_LCMPI 0,r4,0
beq 1f
toreal(r4)
addi r4,r4,THREAD /* want last_task_used_math->thread */
SAVE_32FPRS(0, r4)
mffs fr0
stfd fr0,THREAD_FPSCR(r4)
- LDL r5,PT_REGS(r4)
+ PPC_LL r5,PT_REGS(r4)
toreal(r5)
- LDL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
li r10,MSR_FP|MSR_FE0|MSR_FE1
andc r4,r4,r10 /* disable FP for previous task */
- STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1:
#endif /* CONFIG_SMP */
/* enable use of FP after return */
@@ -77,7 +77,7 @@ _GLOBAL(load_up_fpu)
#ifndef CONFIG_SMP
subi r4,r5,THREAD
fromreal(r4)
- STL r4,OFF(last_task_used_math)(r3)
+ PPC_STL r4,OFF(last_task_used_math)(r3)
#endif /* CONFIG_SMP */
/* restore registers and return */
/* we haven't used ctr or xer or lr */
@@ -97,24 +97,24 @@ _GLOBAL(giveup_fpu)
MTMSRD(r5) /* enable use of fpu now */
SYNC_601
isync
- CMPI 0,r3,0
+ PPC_LCMPI 0,r3,0
beqlr- /* if no previous owner, done */
addi r3,r3,THREAD /* want THREAD of task */
- LDL r5,PT_REGS(r3)
- CMPI 0,r5,0
+ PPC_LL r5,PT_REGS(r3)
+ PPC_LCMPI 0,r5,0
SAVE_32FPRS(0, r3)
mffs fr0
stfd fr0,THREAD_FPSCR(r3)
beq 1f
- LDL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
li r3,MSR_FP|MSR_FE0|MSR_FE1
andc r4,r4,r3 /* disable FP for previous task */
- STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1:
#ifndef CONFIG_SMP
li r5,0
LOADBASE(r4,last_task_used_math)
- STL r5,OFF(last_task_used_math)(r4)
+ PPC_STL r5,OFF(last_task_used_math)(r4)
#endif /* CONFIG_SMP */
blr
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index b102e3a2415..ccdf94731e3 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -1100,6 +1100,7 @@ start_here:
mr r3,r31
mr r4,r30
bl machine_init
+ bl __save_cpu_setup
bl MMU_init
#ifdef CONFIG_APUS
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 45d81976987..8a8bf79ef04 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -28,7 +28,6 @@
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/systemcfg.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/bug.h>
@@ -195,11 +194,11 @@ exception_marker:
#define EX_R12 24
#define EX_R13 32
#define EX_SRR0 40
-#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */
#define EX_DAR 48
-#define EX_LR 48 /* SLB miss saves LR, but not DAR */
#define EX_DSISR 56
#define EX_CCR 60
+#define EX_R3 64
+#define EX_LR 72
#define EXCEPTION_PROLOG_PSERIES(area, label) \
mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \
@@ -419,17 +418,22 @@ data_access_slb_pSeries:
mtspr SPRN_SPRG1,r13
RUNLATCH_ON(r13)
mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_DAR
std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
+ mfcr r9
+#ifdef __DISABLED__
+ /* Keep that around for when we re-implement dynamic VSIDs */
+ cmpdi r3,0
+ bge slb_miss_user_pseries
+#endif /* __DISABLED__ */
std r10,PACA_EXSLB+EX_R10(r13)
std r11,PACA_EXSLB+EX_R11(r13)
std r12,PACA_EXSLB+EX_R12(r13)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r9,SPRN_SPRG1
- std r9,PACA_EXSLB+EX_R13(r13)
- mfcr r9
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
mfspr r12,SPRN_SRR1 /* and SRR1 */
- mfspr r3,SPRN_DAR
- b .do_slb_miss /* Rel. branch works in real mode */
+ b .slb_miss_realmode /* Rel. branch works in real mode */
STD_EXCEPTION_PSERIES(0x400, instruction_access)
@@ -440,17 +444,22 @@ instruction_access_slb_pSeries:
mtspr SPRN_SPRG1,r13
RUNLATCH_ON(r13)
mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
+ mfcr r9
+#ifdef __DISABLED__
+ /* Keep that around for when we re-implement dynamic VSIDs */
+ cmpdi r3,0
+ bge slb_miss_user_pseries
+#endif /* __DISABLED__ */
std r10,PACA_EXSLB+EX_R10(r13)
std r11,PACA_EXSLB+EX_R11(r13)
std r12,PACA_EXSLB+EX_R12(r13)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r9,SPRN_SPRG1
- std r9,PACA_EXSLB+EX_R13(r13)
- mfcr r9
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
mfspr r12,SPRN_SRR1 /* and SRR1 */
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- b .do_slb_miss /* Rel. branch works in real mode */
+ b .slb_miss_realmode /* Rel. branch works in real mode */
STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
STD_EXCEPTION_PSERIES(0x600, alignment)
@@ -509,6 +518,38 @@ _GLOBAL(do_stab_bolted_pSeries)
EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
/*
+ * We have some room here we use that to put
+ * the peries slb miss user trampoline code so it's reasonably
+ * away from slb_miss_user_common to avoid problems with rfid
+ *
+ * This is used for when the SLB miss handler has to go virtual,
+ * which doesn't happen for now anymore but will once we re-implement
+ * dynamic VSIDs for shared page tables
+ */
+#ifdef __DISABLED__
+slb_miss_user_pseries:
+ std r10,PACA_EXGEN+EX_R10(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfspr r10,SPRG1
+ ld r11,PACA_EXSLB+EX_R9(r13)
+ ld r12,PACA_EXSLB+EX_R3(r13)
+ std r10,PACA_EXGEN+EX_R13(r13)
+ std r11,PACA_EXGEN+EX_R9(r13)
+ std r12,PACA_EXGEN+EX_R3(r13)
+ clrrdi r12,r13,32
+ mfmsr r10
+ mfspr r11,SRR0 /* save SRR0 */
+ ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
+ ori r10,r10,MSR_IR|MSR_DR|MSR_RI
+ mtspr SRR0,r12
+ mfspr r12,SRR1 /* and SRR1 */
+ mtspr SRR1,r10
+ rfid
+ b . /* prevent spec. execution */
+#endif /* __DISABLED__ */
+
+/*
* Vectors for the FWNMI option. Share common code.
*/
.globl system_reset_fwnmi
@@ -559,22 +600,59 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
.globl data_access_slb_iSeries
data_access_slb_iSeries:
mtspr SPRN_SPRG1,r13 /* save r13 */
- EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+ mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
std r3,PACA_EXSLB+EX_R3(r13)
- ld r12,PACALPPACA+LPPACASRR1(r13)
mfspr r3,SPRN_DAR
- b .do_slb_miss
+ std r9,PACA_EXSLB+EX_R9(r13)
+ mfcr r9
+#ifdef __DISABLED__
+ cmpdi r3,0
+ bge slb_miss_user_iseries
+#endif
+ std r10,PACA_EXSLB+EX_R10(r13)
+ std r11,PACA_EXSLB+EX_R11(r13)
+ std r12,PACA_EXSLB+EX_R12(r13)
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
+ ld r12,PACALPPACA+LPPACASRR1(r13);
+ b .slb_miss_realmode
STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
.globl instruction_access_slb_iSeries
instruction_access_slb_iSeries:
mtspr SPRN_SPRG1,r13 /* save r13 */
- EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+ mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
std r3,PACA_EXSLB+EX_R3(r13)
- ld r12,PACALPPACA+LPPACASRR1(r13)
- ld r3,PACALPPACA+LPPACASRR0(r13)
- b .do_slb_miss
+ ld r3,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
+ std r9,PACA_EXSLB+EX_R9(r13)
+ mfcr r9
+#ifdef __DISABLED__
+ cmpdi r3,0
+ bge .slb_miss_user_iseries
+#endif
+ std r10,PACA_EXSLB+EX_R10(r13)
+ std r11,PACA_EXSLB+EX_R11(r13)
+ std r12,PACA_EXSLB+EX_R12(r13)
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
+ ld r12,PACALPPACA+LPPACASRR1(r13);
+ b .slb_miss_realmode
+
+#ifdef __DISABLED__
+slb_miss_user_iseries:
+ std r10,PACA_EXGEN+EX_R10(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfspr r10,SPRG1
+ ld r11,PACA_EXSLB+EX_R9(r13)
+ ld r12,PACA_EXSLB+EX_R3(r13)
+ std r10,PACA_EXGEN+EX_R13(r13)
+ std r11,PACA_EXGEN+EX_R9(r13)
+ std r12,PACA_EXGEN+EX_R3(r13)
+ EXCEPTION_PROLOG_ISERIES_2
+ b slb_miss_user_common
+#endif
MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt)
STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN)
@@ -809,6 +887,126 @@ instruction_access_common:
li r5,0x400
b .do_hash_page /* Try to handle as hpte fault */
+/*
+ * Here is the common SLB miss user that is used when going to virtual
+ * mode for SLB misses, that is currently not used
+ */
+#ifdef __DISABLED__
+ .align 7
+ .globl slb_miss_user_common
+slb_miss_user_common:
+ mflr r10
+ std r3,PACA_EXGEN+EX_DAR(r13)
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+ std r10,PACA_EXGEN+EX_LR(r13)
+ std r11,PACA_EXGEN+EX_SRR0(r13)
+ bl .slb_allocate_user
+
+ ld r10,PACA_EXGEN+EX_LR(r13)
+ ld r3,PACA_EXGEN+EX_R3(r13)
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
+ ld r11,PACA_EXGEN+EX_SRR0(r13)
+ mtlr r10
+ beq- slb_miss_fault
+
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- unrecov_user_slb
+ mfmsr r10
+
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+.machine pop
+
+ clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
+ mtmsrd r10,1
+
+ mtspr SRR0,r11
+ mtspr SRR1,r12
+
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ rfid
+ b .
+
+slb_miss_fault:
+ EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
+ ld r4,PACA_EXGEN+EX_DAR(r13)
+ li r5,0
+ std r4,_DAR(r1)
+ std r5,_DSISR(r1)
+ b .handle_page_fault
+
+unrecov_user_slb:
+ EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
+ DISABLE_INTS
+ bl .save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b 1b
+
+#endif /* __DISABLED__ */
+
+
+/*
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r12 contain the saved SRR1, SRR0 is still ready for return
+ * r3 has the faulting address
+ * r9 - r13 are saved in paca->exslb.
+ * r3 is saved in paca->slb_r3
+ * We assume we aren't going to take any exceptions during this procedure.
+ */
+_GLOBAL(slb_miss_realmode)
+ mflr r10
+
+ stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
+ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
+
+ bl .slb_allocate_realmode
+
+ /* All done -- return from exception. */
+
+ ld r10,PACA_EXSLB+EX_LR(r13)
+ ld r3,PACA_EXSLB+EX_R3(r13)
+ lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
+#ifdef CONFIG_PPC_ISERIES
+ ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
+#endif /* CONFIG_PPC_ISERIES */
+
+ mtlr r10
+
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- unrecov_slb
+
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
+.machine pop
+
+#ifdef CONFIG_PPC_ISERIES
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
+#endif /* CONFIG_PPC_ISERIES */
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ ld r11,PACA_EXSLB+EX_R11(r13)
+ ld r12,PACA_EXSLB+EX_R12(r13)
+ ld r13,PACA_EXSLB+EX_R13(r13)
+ rfid
+ b . /* prevent speculative execution */
+
+unrecov_slb:
+ EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
+ DISABLE_INTS
+ bl .save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b 1b
+
.align 7
.globl hardware_interrupt_common
.globl hardware_interrupt_entry
@@ -1139,62 +1337,6 @@ _GLOBAL(do_stab_bolted)
b . /* prevent speculative execution */
/*
- * r13 points to the PACA, r9 contains the saved CR,
- * r11 and r12 contain the saved SRR0 and SRR1.
- * r3 has the faulting address
- * r9 - r13 are saved in paca->exslb.
- * r3 is saved in paca->slb_r3
- * We assume we aren't going to take any exceptions during this procedure.
- */
-_GLOBAL(do_slb_miss)
- mflr r10
-
- stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
- std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
-
- bl .slb_allocate /* handle it */
-
- /* All done -- return from exception. */
-
- ld r10,PACA_EXSLB+EX_LR(r13)
- ld r3,PACA_EXSLB+EX_R3(r13)
- lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
-#ifdef CONFIG_PPC_ISERIES
- ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
-#endif /* CONFIG_PPC_ISERIES */
-
- mtlr r10
-
- andi. r10,r12,MSR_RI /* check for unrecoverable exception */
- beq- unrecov_slb
-
-.machine push
-.machine "power4"
- mtcrf 0x80,r9
- mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-.machine pop
-
-#ifdef CONFIG_PPC_ISERIES
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r12
-#endif /* CONFIG_PPC_ISERIES */
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- rfid
- b . /* prevent speculative execution */
-
-unrecov_slb:
- EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
- DISABLE_INTS
- bl .save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unrecoverable_exception
- b 1b
-
-/*
* Space for CPU0's segment table.
*
* On iSeries, the hypervisor must fill in at least one entry before
@@ -1554,22 +1696,14 @@ _GLOBAL(pmac_secondary_start)
* SPRG3 = paca virtual address
*/
_GLOBAL(__secondary_start)
+ /* Set thread priority to MEDIUM */
+ HMT_MEDIUM
- HMT_MEDIUM /* Set thread priority to MEDIUM */
-
+ /* Load TOC */
ld r2,PACATOC(r13)
- li r6,0
- stb r6,PACAPROCENABLED(r13)
-
-#ifndef CONFIG_PPC_ISERIES
- /* Initialize the page table pointer register. */
- LOADADDR(r6,_SDR1)
- ld r6,0(r6) /* get the value of _SDR1 */
- mtspr SPRN_SDR1,r6 /* set the htab location */
-#endif
- /* Initialize the first segment table (or SLB) entry */
- ld r3,PACASTABVIRT(r13) /* get addr of segment table */
- bl .stab_initialize
+
+ /* Do early setup for that CPU (stab, slb, hash table pointer) */
+ bl .early_setup_secondary
/* Initialize the kernel stack. Just a repeat for iSeries. */
LOADADDR(r3,current_set)
@@ -1578,37 +1712,7 @@ _GLOBAL(__secondary_start)
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
std r1,PACAKSAVE(r13)
- ld r3,PACASTABREAL(r13) /* get raddr of segment table */
- ori r4,r3,1 /* turn on valid bit */
-
-#ifdef CONFIG_PPC_ISERIES
- li r0,-1 /* hypervisor call */
- li r3,1
- sldi r3,r3,63 /* 0x8000000000000000 */
- ori r3,r3,4 /* 0x8000000000000004 */
- sc /* HvCall_setASR */
-#else
- /* set the ASR */
- ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
- ld r3,0(r3)
- lwz r3,PLATFORM(r3) /* r3 = platform flags */
- andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */
- beq 98f /* branch if result is 0 */
- mfspr r3,SPRN_PVR
- srwi r3,r3,16
- cmpwi r3,0x37 /* SStar */
- beq 97f
- cmpwi r3,0x36 /* IStar */
- beq 97f
- cmpwi r3,0x34 /* Pulsar */
- bne 98f
-97: li r3,H_SET_ASR /* hcall = H_SET_ASR */
- HVSC /* Invoking hcall */
- b 99f
-98: /* !(rpa hypervisor) || !(star) */
- mtasr r4 /* set the stab location */
-99:
-#endif
+ /* Clear backchain so we get nice backtraces */
li r7,0
mtlr r7
@@ -1631,6 +1735,7 @@ _GLOBAL(start_secondary_prolog)
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
bl .start_secondary
+ b .
#endif
/*
@@ -1750,40 +1855,6 @@ _STATIC(start_here_multiplatform)
mr r3,r31
bl .early_setup
- /* set the ASR */
- ld r3,PACASTABREAL(r13)
- ori r4,r3,1 /* turn on valid bit */
- ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
- ld r3,0(r3)
- lwz r3,PLATFORM(r3) /* r3 = platform flags */
- andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */
- beq 98f /* branch if result is 0 */
- mfspr r3,SPRN_PVR
- srwi r3,r3,16
- cmpwi r3,0x37 /* SStar */
- beq 97f
- cmpwi r3,0x36 /* IStar */
- beq 97f
- cmpwi r3,0x34 /* Pulsar */
- bne 98f
-97: li r3,H_SET_ASR /* hcall = H_SET_ASR */
- HVSC /* Invoking hcall */
- b 99f
-98: /* !(rpa hypervisor) || !(star) */
- mtasr r4 /* set the stab location */
-99:
- /* Set SDR1 (hash table pointer) */
- ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
- ld r3,0(r3)
- lwz r3,PLATFORM(r3) /* r3 = platform flags */
- /* Test if bit 0 is set (LPAR bit) */
- andi. r3,r3,PLATFORM_LPAR
- bne 98f /* branch if result is !0 */
- LOADADDR(r6,_SDR1) /* Only if NOT LPAR */
- add r6,r6,r26
- ld r6,0(r6) /* get the value of _SDR1 */
- mtspr SPRN_SDR1,r6 /* set the htab location */
-98:
LOADADDR(r3,.start_here_common)
SET_REG_TO_CONST(r4, MSR_KERNEL)
mtspr SPRN_SRR0,r3
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 5063c603fad..8d60fa99fc4 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -24,7 +24,7 @@
* Copyright 2002-2004 MontaVista Software, Inc.
* PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
* Copyright 2004 Freescale Semiconductor, Inc
- * PowerPC e500 modifications, Kumar Gala <kumar.gala@freescale.com>
+ * PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
diff --git a/arch/powerpc/kernel/ioctl32.c b/arch/powerpc/kernel/ioctl32.c
new file mode 100644
index 00000000000..0fa3d27fef0
--- /dev/null
+++ b/arch/powerpc/kernel/ioctl32.c
@@ -0,0 +1,45 @@
+/*
+ * ioctl32.c: Conversion between 32bit and 64bit native ioctls.
+ *
+ * Based on sparc64 ioctl32.c by:
+ *
+ * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
+ * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
+ *
+ * ppc64 changes:
+ *
+ * Copyright (C) 2000 Ken Aaker (kdaaker@rchland.vnet.ibm.com)
+ * Copyright (C) 2001 Anton Blanchard (antonb@au.ibm.com)
+ *
+ * These routines maintain argument size conversion between 32bit and 64bit
+ * ioctls.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define INCLUDES
+#include "compat_ioctl.c"
+#include <linux/syscalls.h>
+
+#define CODE
+#include "compat_ioctl.c"
+
+#define HANDLE_IOCTL(cmd,handler) { cmd, (ioctl_trans_handler_t)handler, NULL },
+#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl)
+
+#define IOCTL_TABLE_START \
+ struct ioctl_trans ioctl_start[] = {
+#define IOCTL_TABLE_END \
+ };
+
+IOCTL_TABLE_START
+#include <linux/compat_ioctl.h>
+#define DECLARES
+#include "compat_ioctl.c"
+
+IOCTL_TABLE_END
+
+int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
new file mode 100644
index 00000000000..6160c8dbb7c
--- /dev/null
+++ b/arch/powerpc/kernel/iomap.c
@@ -0,0 +1,146 @@
+/*
+ * arch/ppc64/kernel/iomap.c
+ *
+ * ppc64 "iomap" interface implementation.
+ *
+ * (C) Copyright 2004 Linus Torvalds
+ */
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/mm.h>
+#include <asm/io.h>
+
+/*
+ * Here comes the ppc64 implementation of the IOMAP
+ * interfaces.
+ */
+unsigned int fastcall ioread8(void __iomem *addr)
+{
+ return readb(addr);
+}
+unsigned int fastcall ioread16(void __iomem *addr)
+{
+ return readw(addr);
+}
+unsigned int fastcall ioread16be(void __iomem *addr)
+{
+ return in_be16(addr);
+}
+unsigned int fastcall ioread32(void __iomem *addr)
+{
+ return readl(addr);
+}
+unsigned int fastcall ioread32be(void __iomem *addr)
+{
+ return in_be32(addr);
+}
+EXPORT_SYMBOL(ioread8);
+EXPORT_SYMBOL(ioread16);
+EXPORT_SYMBOL(ioread16be);
+EXPORT_SYMBOL(ioread32);
+EXPORT_SYMBOL(ioread32be);
+
+void fastcall iowrite8(u8 val, void __iomem *addr)
+{
+ writeb(val, addr);
+}
+void fastcall iowrite16(u16 val, void __iomem *addr)
+{
+ writew(val, addr);
+}
+void fastcall iowrite16be(u16 val, void __iomem *addr)
+{
+ out_be16(addr, val);
+}
+void fastcall iowrite32(u32 val, void __iomem *addr)
+{
+ writel(val, addr);
+}
+void fastcall iowrite32be(u32 val, void __iomem *addr)
+{
+ out_be32(addr, val);
+}
+EXPORT_SYMBOL(iowrite8);
+EXPORT_SYMBOL(iowrite16);
+EXPORT_SYMBOL(iowrite16be);
+EXPORT_SYMBOL(iowrite32);
+EXPORT_SYMBOL(iowrite32be);
+
+/*
+ * These are the "repeat read/write" functions. Note the
+ * non-CPU byte order. We do things in "IO byteorder"
+ * here.
+ *
+ * FIXME! We could make these do EEH handling if we really
+ * wanted. Not clear if we do.
+ */
+void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ _insb((u8 __iomem *) addr, dst, count);
+}
+void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ _insw_ns((u16 __iomem *) addr, dst, count);
+}
+void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ _insl_ns((u32 __iomem *) addr, dst, count);
+}
+EXPORT_SYMBOL(ioread8_rep);
+EXPORT_SYMBOL(ioread16_rep);
+EXPORT_SYMBOL(ioread32_rep);
+
+void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ _outsb((u8 __iomem *) addr, src, count);
+}
+void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ _outsw_ns((u16 __iomem *) addr, src, count);
+}
+void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ _outsl_ns((u32 __iomem *) addr, src, count);
+}
+EXPORT_SYMBOL(iowrite8_rep);
+EXPORT_SYMBOL(iowrite16_rep);
+EXPORT_SYMBOL(iowrite32_rep);
+
+void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+ if (!_IO_IS_VALID(port))
+ return NULL;
+ return (void __iomem *) (port+pci_io_base);
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+ /* Nothing to do */
+}
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
+{
+ unsigned long start = pci_resource_start(dev, bar);
+ unsigned long len = pci_resource_len(dev, bar);
+ unsigned long flags = pci_resource_flags(dev, bar);
+
+ if (!len)
+ return NULL;
+ if (max && len > max)
+ len = max;
+ if (flags & IORESOURCE_IO)
+ return ioport_map(start, len);
+ if (flags & IORESOURCE_MEM)
+ return ioremap(start, len);
+ /* What? */
+ return NULL;
+}
+
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{
+ /* Nothing to do */
+}
+EXPORT_SYMBOL(pci_iomap);
+EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
new file mode 100644
index 00000000000..4d9b4388918
--- /dev/null
+++ b/arch/powerpc/kernel/iommu.c
@@ -0,0 +1,572 @@
+/*
+ * arch/ppc64/kernel/iommu.c
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ *
+ * Rewrite, cleanup, new allocation schemes, virtual merging:
+ * Copyright (C) 2004 Olof Johansson, IBM Corporation
+ * and Ben. Herrenschmidt, IBM Corporation
+ *
+ * Dynamic DMA mapping support, bus-independent parts.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+
+#define DBG(...)
+
+#ifdef CONFIG_IOMMU_VMERGE
+static int novmerge = 0;
+#else
+static int novmerge = 1;
+#endif
+
+static int __init setup_iommu(char *str)
+{
+ if (!strcmp(str, "novmerge"))
+ novmerge = 1;
+ else if (!strcmp(str, "vmerge"))
+ novmerge = 0;
+ return 1;
+}
+
+__setup("iommu=", setup_iommu);
+
+static unsigned long iommu_range_alloc(struct iommu_table *tbl,
+ unsigned long npages,
+ unsigned long *handle,
+ unsigned int align_order)
+{
+ unsigned long n, end, i, start;
+ unsigned long limit;
+ int largealloc = npages > 15;
+ int pass = 0;
+ unsigned long align_mask;
+
+ align_mask = 0xffffffffffffffffl >> (64 - align_order);
+
+ /* This allocator was derived from x86_64's bit string search */
+
+ /* Sanity check */
+ if (unlikely(npages) == 0) {
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return DMA_ERROR_CODE;
+ }
+
+ if (handle && *handle)
+ start = *handle;
+ else
+ start = largealloc ? tbl->it_largehint : tbl->it_hint;
+
+ /* Use only half of the table for small allocs (15 pages or less) */
+ limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
+
+ if (largealloc && start < tbl->it_halfpoint)
+ start = tbl->it_halfpoint;
+
+ /* The case below can happen if we have a small segment appended
+ * to a large, or when the previous alloc was at the very end of
+ * the available space. If so, go back to the initial start.
+ */
+ if (start >= limit)
+ start = largealloc ? tbl->it_largehint : tbl->it_hint;
+
+ again:
+
+ n = find_next_zero_bit(tbl->it_map, limit, start);
+
+ /* Align allocation */
+ n = (n + align_mask) & ~align_mask;
+
+ end = n + npages;
+
+ if (unlikely(end >= limit)) {
+ if (likely(pass < 2)) {
+ /* First failure, just rescan the half of the table.
+ * Second failure, rescan the other half of the table.
+ */
+ start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
+ limit = pass ? tbl->it_size : limit;
+ pass++;
+ goto again;
+ } else {
+ /* Third failure, give up */
+ return DMA_ERROR_CODE;
+ }
+ }
+
+ for (i = n; i < end; i++)
+ if (test_bit(i, tbl->it_map)) {
+ start = i+1;
+ goto again;
+ }
+
+ for (i = n; i < end; i++)
+ __set_bit(i, tbl->it_map);
+
+ /* Bump the hint to a new block for small allocs. */
+ if (largealloc) {
+ /* Don't bump to new block to avoid fragmentation */
+ tbl->it_largehint = end;
+ } else {
+ /* Overflow will be taken care of at the next allocation */
+ tbl->it_hint = (end + tbl->it_blocksize - 1) &
+ ~(tbl->it_blocksize - 1);
+ }
+
+ /* Update handle for SG allocations */
+ if (handle)
+ *handle = end;
+
+ return n;
+}
+
+static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
+ unsigned int npages, enum dma_data_direction direction,
+ unsigned int align_order)
+{
+ unsigned long entry, flags;
+ dma_addr_t ret = DMA_ERROR_CODE;
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ entry = iommu_range_alloc(tbl, npages, NULL, align_order);
+
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ return DMA_ERROR_CODE;
+ }
+
+ entry += tbl->it_offset; /* Offset into real TCE table */
+ ret = entry << PAGE_SHIFT; /* Set the return dma address */
+
+ /* Put the TCEs in the HW table */
+ ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK,
+ direction);
+
+
+ /* Flush/invalidate TLB caches if necessary */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+
+ /* Make sure updates are seen by hardware */
+ mb();
+
+ return ret;
+}
+
+static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long entry, free_entry;
+ unsigned long i;
+
+ entry = dma_addr >> PAGE_SHIFT;
+ free_entry = entry - tbl->it_offset;
+
+ if (((free_entry + npages) > tbl->it_size) ||
+ (entry < tbl->it_offset)) {
+ if (printk_ratelimit()) {
+ printk(KERN_INFO "iommu_free: invalid entry\n");
+ printk(KERN_INFO "\tentry = 0x%lx\n", entry);
+ printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr);
+ printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl);
+ printk(KERN_INFO "\tbus# = 0x%lx\n", (u64)tbl->it_busno);
+ printk(KERN_INFO "\tsize = 0x%lx\n", (u64)tbl->it_size);
+ printk(KERN_INFO "\tstartOff = 0x%lx\n", (u64)tbl->it_offset);
+ printk(KERN_INFO "\tindex = 0x%lx\n", (u64)tbl->it_index);
+ WARN_ON(1);
+ }
+ return;
+ }
+
+ ppc_md.tce_free(tbl, entry, npages);
+
+ for (i = 0; i < npages; i++)
+ __clear_bit(free_entry+i, tbl->it_map);
+}
+
+static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ __iommu_free(tbl, dma_addr, npages);
+
+ /* Make sure TLB cache is flushed if the HW needs it. We do
+ * not do an mb() here on purpose, it is not needed on any of
+ * the current platforms.
+ */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+}
+
+int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
+ struct scatterlist *sglist, int nelems,
+ enum dma_data_direction direction)
+{
+ dma_addr_t dma_next = 0, dma_addr;
+ unsigned long flags;
+ struct scatterlist *s, *outs, *segstart;
+ int outcount, incount;
+ unsigned long handle;
+
+ BUG_ON(direction == DMA_NONE);
+
+ if ((nelems == 0) || !tbl)
+ return 0;
+
+ outs = s = segstart = &sglist[0];
+ outcount = 1;
+ incount = nelems;
+ handle = 0;
+
+ /* Init first segment length for backout at failure */
+ outs->dma_length = 0;
+
+ DBG("mapping %d elements:\n", nelems);
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ for (s = outs; nelems; nelems--, s++) {
+ unsigned long vaddr, npages, entry, slen;
+
+ slen = s->length;
+ /* Sanity check */
+ if (slen == 0) {
+ dma_next = 0;
+ continue;
+ }
+ /* Allocate iommu entries for that segment */
+ vaddr = (unsigned long)page_address(s->page) + s->offset;
+ npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK);
+ npages >>= PAGE_SHIFT;
+ entry = iommu_range_alloc(tbl, npages, &handle, 0);
+
+ DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
+
+ /* Handle failure */
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ if (printk_ratelimit())
+ printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx"
+ " npages %lx\n", tbl, vaddr, npages);
+ goto failure;
+ }
+
+ /* Convert entry to a dma_addr_t */
+ entry += tbl->it_offset;
+ dma_addr = entry << PAGE_SHIFT;
+ dma_addr |= s->offset;
+
+ DBG(" - %lx pages, entry: %lx, dma_addr: %lx\n",
+ npages, entry, dma_addr);
+
+ /* Insert into HW table */
+ ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction);
+
+ /* If we are in an open segment, try merging */
+ if (segstart != s) {
+ DBG(" - trying merge...\n");
+ /* We cannot merge if:
+ * - allocated dma_addr isn't contiguous to previous allocation
+ */
+ if (novmerge || (dma_addr != dma_next)) {
+ /* Can't merge: create a new segment */
+ segstart = s;
+ outcount++; outs++;
+ DBG(" can't merge, new segment.\n");
+ } else {
+ outs->dma_length += s->length;
+ DBG(" merged, new len: %lx\n", outs->dma_length);
+ }
+ }
+
+ if (segstart == s) {
+ /* This is a new segment, fill entries */
+ DBG(" - filling new segment.\n");
+ outs->dma_address = dma_addr;
+ outs->dma_length = slen;
+ }
+
+ /* Calculate next page pointer for contiguous check */
+ dma_next = dma_addr + slen;
+
+ DBG(" - dma next is: %lx\n", dma_next);
+ }
+
+ /* Flush/invalidate TLB caches if necessary */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+
+ /* Make sure updates are seen by hardware */
+ mb();
+
+ DBG("mapped %d elements:\n", outcount);
+
+ /* For the sake of iommu_unmap_sg, we clear out the length in the
+ * next entry of the sglist if we didn't fill the list completely
+ */
+ if (outcount < incount) {
+ outs++;
+ outs->dma_address = DMA_ERROR_CODE;
+ outs->dma_length = 0;
+ }
+ return outcount;
+
+ failure:
+ for (s = &sglist[0]; s <= outs; s++) {
+ if (s->dma_length != 0) {
+ unsigned long vaddr, npages;
+
+ vaddr = s->dma_address & PAGE_MASK;
+ npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr)
+ >> PAGE_SHIFT;
+ __iommu_free(tbl, vaddr, npages);
+ }
+ }
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ return 0;
+}
+
+
+void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
+{
+ unsigned long flags;
+
+ BUG_ON(direction == DMA_NONE);
+
+ if (!tbl)
+ return;
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ while (nelems--) {
+ unsigned int npages;
+ dma_addr_t dma_handle = sglist->dma_address;
+
+ if (sglist->dma_length == 0)
+ break;
+ npages = (PAGE_ALIGN(dma_handle + sglist->dma_length)
+ - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT;
+ __iommu_free(tbl, dma_handle, npages);
+ sglist++;
+ }
+
+ /* Flush/invalidate TLBs if necessary. As for iommu_free(), we
+ * do not do an mb() here, the affected platforms do not need it
+ * when freeing.
+ */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+}
+
+/*
+ * Build a iommu_table structure. This contains a bit map which
+ * is used to manage allocation of the tce space.
+ */
+struct iommu_table *iommu_init_table(struct iommu_table *tbl)
+{
+ unsigned long sz;
+ static int welcomed = 0;
+
+ /* Set aside 1/4 of the table for large allocations. */
+ tbl->it_halfpoint = tbl->it_size * 3 / 4;
+
+ /* number of bytes needed for the bitmap */
+ sz = (tbl->it_size + 7) >> 3;
+
+ tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz));
+ if (!tbl->it_map)
+ panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
+
+ memset(tbl->it_map, 0, sz);
+
+ tbl->it_hint = 0;
+ tbl->it_largehint = tbl->it_halfpoint;
+ spin_lock_init(&tbl->it_lock);
+
+ /* Clear the hardware table in case firmware left allocations in it */
+ ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
+
+ if (!welcomed) {
+ printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
+ novmerge ? "disabled" : "enabled");
+ welcomed = 1;
+ }
+
+ return tbl;
+}
+
+void iommu_free_table(struct device_node *dn)
+{
+ struct pci_dn *pdn = dn->data;
+ struct iommu_table *tbl = pdn->iommu_table;
+ unsigned long bitmap_sz, i;
+ unsigned int order;
+
+ if (!tbl || !tbl->it_map) {
+ printk(KERN_ERR "%s: expected TCE map for %s\n", __FUNCTION__,
+ dn->full_name);
+ return;
+ }
+
+ /* verify that table contains no entries */
+ /* it_size is in entries, and we're examining 64 at a time */
+ for (i = 0; i < (tbl->it_size/64); i++) {
+ if (tbl->it_map[i] != 0) {
+ printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
+ __FUNCTION__, dn->full_name);
+ break;
+ }
+ }
+
+ /* calculate bitmap size in bytes */
+ bitmap_sz = (tbl->it_size + 7) / 8;
+
+ /* free bitmap */
+ order = get_order(bitmap_sz);
+ free_pages((unsigned long) tbl->it_map, order);
+
+ /* free table */
+ kfree(tbl);
+}
+
+/* Creates TCEs for a user provided buffer. The user buffer must be
+ * contiguous real kernel storage (not vmalloc). The address of the buffer
+ * passed here is the kernel (virtual) address of the buffer. The buffer
+ * need not be page aligned, the dma_addr_t returned will point to the same
+ * byte within the page as vaddr.
+ */
+dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
+ size_t size, enum dma_data_direction direction)
+{
+ dma_addr_t dma_handle = DMA_ERROR_CODE;
+ unsigned long uaddr;
+ unsigned int npages;
+
+ BUG_ON(direction == DMA_NONE);
+
+ uaddr = (unsigned long)vaddr;
+ npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK);
+ npages >>= PAGE_SHIFT;
+
+ if (tbl) {
+ dma_handle = iommu_alloc(tbl, vaddr, npages, direction, 0);
+ if (dma_handle == DMA_ERROR_CODE) {
+ if (printk_ratelimit()) {
+ printk(KERN_INFO "iommu_alloc failed, "
+ "tbl %p vaddr %p npages %d\n",
+ tbl, vaddr, npages);
+ }
+ } else
+ dma_handle |= (uaddr & ~PAGE_MASK);
+ }
+
+ return dma_handle;
+}
+
+void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction)
+{
+ BUG_ON(direction == DMA_NONE);
+
+ if (tbl)
+ iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) -
+ (dma_handle & PAGE_MASK)) >> PAGE_SHIFT);
+}
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ void *ret = NULL;
+ dma_addr_t mapping;
+ unsigned int npages, order;
+
+ size = PAGE_ALIGN(size);
+ npages = size >> PAGE_SHIFT;
+ order = get_order(size);
+
+ /*
+ * Client asked for way too much space. This is checked later
+ * anyway. It is easier to debug here for the drivers than in
+ * the tce tables.
+ */
+ if (order >= IOMAP_MAX_ORDER) {
+ printk("iommu_alloc_consistent size too large: 0x%lx\n", size);
+ return NULL;
+ }
+
+ if (!tbl)
+ return NULL;
+
+ /* Alloc enough pages (and possibly more) */
+ ret = (void *)__get_free_pages(flag, order);
+ if (!ret)
+ return NULL;
+ memset(ret, 0, size);
+
+ /* Set up tces to cover the allocated range */
+ mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL, order);
+ if (mapping == DMA_ERROR_CODE) {
+ free_pages((unsigned long)ret, order);
+ ret = NULL;
+ } else
+ *dma_handle = mapping;
+ return ret;
+}
+
+void iommu_free_coherent(struct iommu_table *tbl, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ unsigned int npages;
+
+ if (tbl) {
+ size = PAGE_ALIGN(size);
+ npages = size >> PAGE_SHIFT;
+ iommu_free(tbl, dma_handle, npages);
+ free_pages((unsigned long)vaddr, get_order(size));
+ }
+}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
new file mode 100644
index 00000000000..5a71ed9612f
--- /dev/null
+++ b/arch/powerpc/kernel/irq.c
@@ -0,0 +1,479 @@
+/*
+ * arch/ppc/kernel/irq.c
+ *
+ * Derived from arch/i386/kernel/irq.c
+ * Copyright (C) 1992 Linus Torvalds
+ * Adapted from arch/i386 by Gary Thomas
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Updated and modified by Cort Dougan <cort@fsmlabs.com>
+ * Copyright (C) 1996-2001 Cort Dougan
+ * Adapted for Power Macintosh by Paul Mackerras
+ * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+ * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * The MPC8xx has an interrupt mask in the SIU. If a bit is set, the
+ * interrupt is _enabled_. As expected, IRQ0 is bit 0 in the 32-bit
+ * mask register (of which only 16 are defined), hence the weird shifting
+ * and complement of the cached_irq_mask. I want to be able to stuff
+ * this right into the SIU SMASK register.
+ * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx
+ * to reduce code space and undefined function references.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/threads.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/proc_fs.h>
+#include <linux/random.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <linux/profile.h>
+#include <linux/bitops.h>
+#ifdef CONFIG_PPC64
+#include <linux/kallsyms.h>
+#endif
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/cache.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#ifdef CONFIG_PPC64
+#include <asm/iseries/it_lp_queue.h>
+#include <asm/paca.h>
+#endif
+
+int __irq_offset_value;
+#ifdef CONFIG_PPC32
+EXPORT_SYMBOL(__irq_offset_value);
+#endif
+
+static int ppc_spurious_interrupts;
+
+#if defined(CONFIG_PPC_ISERIES) && defined(CONFIG_SMP)
+extern void iSeries_smp_message_recv(struct pt_regs *);
+#endif
+
+#ifdef CONFIG_PPC32
+#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
+
+unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
+atomic_t ppc_n_lost_interrupts;
+
+#ifdef CONFIG_TAU_INT
+extern int tau_initialized;
+extern int tau_interrupts(int);
+#endif
+
+#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE)
+extern atomic_t ipi_recv;
+extern atomic_t ipi_sent;
+#endif
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC64
+EXPORT_SYMBOL(irq_desc);
+
+int distribute_irqs = 1;
+u64 ppc64_interrupt_controller;
+#endif /* CONFIG_PPC64 */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int i = *(loff_t *)v, j;
+ struct irqaction *action;
+ irq_desc_t *desc;
+ unsigned long flags;
+
+ if (i == 0) {
+ seq_puts(p, " ");
+ for_each_online_cpu(j)
+ seq_printf(p, "CPU%d ", j);
+ seq_putc(p, '\n');
+ }
+
+ if (i < NR_IRQS) {
+ desc = get_irq_desc(i);
+ spin_lock_irqsave(&desc->lock, flags);
+ action = desc->action;
+ if (!action || !action->handler)
+ goto skip;
+ seq_printf(p, "%3d: ", i);
+#ifdef CONFIG_SMP
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#else
+ seq_printf(p, "%10u ", kstat_irqs(i));
+#endif /* CONFIG_SMP */
+ if (desc->handler)
+ seq_printf(p, " %s ", desc->handler->typename);
+ else
+ seq_puts(p, " None ");
+ seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge ");
+ seq_printf(p, " %s", action->name);
+ for (action = action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+ seq_putc(p, '\n');
+skip:
+ spin_unlock_irqrestore(&desc->lock, flags);
+ } else if (i == NR_IRQS) {
+#ifdef CONFIG_PPC32
+#ifdef CONFIG_TAU_INT
+ if (tau_initialized){
+ seq_puts(p, "TAU: ");
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ", tau_interrupts(j));
+ seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n");
+ }
+#endif
+#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE)
+ /* should this be per processor send/receive? */
+ seq_printf(p, "IPI (recv/sent): %10u/%u\n",
+ atomic_read(&ipi_recv), atomic_read(&ipi_sent));
+#endif
+#endif /* CONFIG_PPC32 */
+ seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts);
+ }
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void fixup_irqs(cpumask_t map)
+{
+ unsigned int irq;
+ static int warned;
+
+ for_each_irq(irq) {
+ cpumask_t mask;
+
+ if (irq_desc[irq].status & IRQ_PER_CPU)
+ continue;
+
+ cpus_and(mask, irq_affinity[irq], map);
+ if (any_online_cpu(mask) == NR_CPUS) {
+ printk("Breaking affinity for irq %i\n", irq);
+ mask = map;
+ }
+ if (irq_desc[irq].handler->set_affinity)
+ irq_desc[irq].handler->set_affinity(irq, mask);
+ else if (irq_desc[irq].action && !(warned++))
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+}
+#endif
+
+#ifdef CONFIG_PPC_ISERIES
+void do_IRQ(struct pt_regs *regs)
+{
+ struct paca_struct *lpaca;
+
+ irq_enter();
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: is there less than 2KB free? */
+ {
+ long sp;
+
+ sp = __get_SP() & (THREAD_SIZE-1);
+
+ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
+ printk("do_IRQ: stack overflow: %ld\n",
+ sp - sizeof(struct thread_info));
+ dump_stack();
+ }
+ }
+#endif
+
+ lpaca = get_paca();
+#ifdef CONFIG_SMP
+ if (lpaca->lppaca.int_dword.fields.ipi_cnt) {
+ lpaca->lppaca.int_dword.fields.ipi_cnt = 0;
+ iSeries_smp_message_recv(regs);
+ }
+#endif /* CONFIG_SMP */
+ if (hvlpevent_is_pending())
+ process_hvlpevents(regs);
+
+ irq_exit();
+
+ if (lpaca->lppaca.int_dword.fields.decr_int) {
+ lpaca->lppaca.int_dword.fields.decr_int = 0;
+ /* Signal a fake decrementer interrupt */
+ timer_interrupt(regs);
+ }
+}
+
+#else /* CONFIG_PPC_ISERIES */
+
+void do_IRQ(struct pt_regs *regs)
+{
+ int irq;
+#ifdef CONFIG_IRQSTACKS
+ struct thread_info *curtp, *irqtp;
+#endif
+
+ irq_enter();
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: is there less than 2KB free? */
+ {
+ long sp;
+
+ sp = __get_SP() & (THREAD_SIZE-1);
+
+ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
+ printk("do_IRQ: stack overflow: %ld\n",
+ sp - sizeof(struct thread_info));
+ dump_stack();
+ }
+ }
+#endif
+
+ /*
+ * Every platform is required to implement ppc_md.get_irq.
+ * This function will either return an irq number or -1 to
+ * indicate there are no more pending.
+ * The value -2 is for buggy hardware and means that this IRQ
+ * has already been handled. -- Tom
+ */
+ irq = ppc_md.get_irq(regs);
+
+ if (irq >= 0) {
+#ifdef CONFIG_IRQSTACKS
+ /* Switch to the irq stack to handle this */
+ curtp = current_thread_info();
+ irqtp = hardirq_ctx[smp_processor_id()];
+ if (curtp != irqtp) {
+ irqtp->task = curtp->task;
+ irqtp->flags = 0;
+ call___do_IRQ(irq, regs, irqtp);
+ irqtp->task = NULL;
+ if (irqtp->flags)
+ set_bits(irqtp->flags, &curtp->flags);
+ } else
+#endif
+ __do_IRQ(irq, regs);
+ } else
+#ifdef CONFIG_PPC32
+ if (irq != -2)
+#endif
+ /* That's not SMP safe ... but who cares ? */
+ ppc_spurious_interrupts++;
+ irq_exit();
+}
+
+#endif /* CONFIG_PPC_ISERIES */
+
+void __init init_IRQ(void)
+{
+#ifdef CONFIG_PPC64
+ static int once = 0;
+
+ if (once)
+ return;
+
+ once++;
+
+#endif
+ ppc_md.init_IRQ();
+#ifdef CONFIG_PPC64
+ irq_ctx_init();
+#endif
+}
+
+#ifdef CONFIG_PPC64
+/*
+ * Virtual IRQ mapping code, used on systems with XICS interrupt controllers.
+ */
+
+#define UNDEFINED_IRQ 0xffffffff
+unsigned int virt_irq_to_real_map[NR_IRQS];
+
+/*
+ * Don't use virtual irqs 0, 1, 2 for devices.
+ * The pcnet32 driver considers interrupt numbers < 2 to be invalid,
+ * and 2 is the XICS IPI interrupt.
+ * We limit virtual irqs to 17 less than NR_IRQS so that when we
+ * offset them by 16 (to reserve the first 16 for ISA interrupts)
+ * we don't end up with an interrupt number >= NR_IRQS.
+ */
+#define MIN_VIRT_IRQ 3
+#define MAX_VIRT_IRQ (NR_IRQS - NUM_ISA_INTERRUPTS - 1)
+#define NR_VIRT_IRQS (MAX_VIRT_IRQ - MIN_VIRT_IRQ + 1)
+
+void
+virt_irq_init(void)
+{
+ int i;
+ for (i = 0; i < NR_IRQS; i++)
+ virt_irq_to_real_map[i] = UNDEFINED_IRQ;
+}
+
+/* Create a mapping for a real_irq if it doesn't already exist.
+ * Return the virtual irq as a convenience.
+ */
+int virt_irq_create_mapping(unsigned int real_irq)
+{
+ unsigned int virq, first_virq;
+ static int warned;
+
+ if (ppc64_interrupt_controller == IC_OPEN_PIC)
+ return real_irq; /* no mapping for openpic (for now) */
+
+ if (ppc64_interrupt_controller == IC_CELL_PIC)
+ return real_irq; /* no mapping for iic either */
+
+ /* don't map interrupts < MIN_VIRT_IRQ */
+ if (real_irq < MIN_VIRT_IRQ) {
+ virt_irq_to_real_map[real_irq] = real_irq;
+ return real_irq;
+ }
+
+ /* map to a number between MIN_VIRT_IRQ and MAX_VIRT_IRQ */
+ virq = real_irq;
+ if (virq > MAX_VIRT_IRQ)
+ virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ;
+
+ /* search for this number or a free slot */
+ first_virq = virq;
+ while (virt_irq_to_real_map[virq] != UNDEFINED_IRQ) {
+ if (virt_irq_to_real_map[virq] == real_irq)
+ return virq;
+ if (++virq > MAX_VIRT_IRQ)
+ virq = MIN_VIRT_IRQ;
+ if (virq == first_virq)
+ goto nospace; /* oops, no free slots */
+ }
+
+ virt_irq_to_real_map[virq] = real_irq;
+ return virq;
+
+ nospace:
+ if (!warned) {
+ printk(KERN_CRIT "Interrupt table is full\n");
+ printk(KERN_CRIT "Increase NR_IRQS (currently %d) "
+ "in your kernel sources and rebuild.\n", NR_IRQS);
+ warned = 1;
+ }
+ return NO_IRQ;
+}
+
+/*
+ * In most cases will get a hit on the very first slot checked in the
+ * virt_irq_to_real_map. Only when there are a large number of
+ * IRQs will this be expensive.
+ */
+unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
+{
+ unsigned int virq;
+ unsigned int first_virq;
+
+ virq = real_irq;
+
+ if (virq > MAX_VIRT_IRQ)
+ virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ;
+
+ first_virq = virq;
+
+ do {
+ if (virt_irq_to_real_map[virq] == real_irq)
+ return virq;
+
+ virq++;
+
+ if (virq >= MAX_VIRT_IRQ)
+ virq = 0;
+
+ } while (first_virq != virq);
+
+ return NO_IRQ;
+
+}
+
+#ifdef CONFIG_IRQSTACKS
+struct thread_info *softirq_ctx[NR_CPUS];
+struct thread_info *hardirq_ctx[NR_CPUS];
+
+void irq_ctx_init(void)
+{
+ struct thread_info *tp;
+ int i;
+
+ for_each_cpu(i) {
+ memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
+ tp = softirq_ctx[i];
+ tp->cpu = i;
+ tp->preempt_count = SOFTIRQ_OFFSET;
+
+ memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
+ tp = hardirq_ctx[i];
+ tp->cpu = i;
+ tp->preempt_count = HARDIRQ_OFFSET;
+ }
+}
+
+void do_softirq(void)
+{
+ unsigned long flags;
+ struct thread_info *curtp, *irqtp;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+
+ if (local_softirq_pending()) {
+ curtp = current_thread_info();
+ irqtp = softirq_ctx[smp_processor_id()];
+ irqtp->task = curtp->task;
+ call_do_softirq(irqtp);
+ irqtp->task = NULL;
+ }
+
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(do_softirq);
+
+#endif /* CONFIG_IRQSTACKS */
+
+static int __init setup_noirqdistrib(char *str)
+{
+ distribute_irqs = 0;
+ return 1;
+}
+
+__setup("noirqdistrib", setup_noirqdistrib);
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
new file mode 100644
index 00000000000..511af54e623
--- /dev/null
+++ b/arch/powerpc/kernel/kprobes.c
@@ -0,0 +1,459 @@
+/*
+ * Kernel Probes (KProbes)
+ * arch/ppc64/kernel/kprobes.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ * Probes initial implementation ( includes contributions from
+ * Rusty Russell).
+ * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
+ * interface to access function arguments.
+ * 2004-Nov Ananth N Mavinakayanahalli <ananth@in.ibm.com> kprobes port
+ * for PPC64
+ */
+
+#include <linux/config.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <asm/cacheflush.h>
+#include <asm/kdebug.h>
+#include <asm/sstep.h>
+
+static DECLARE_MUTEX(kprobe_mutex);
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ int ret = 0;
+ kprobe_opcode_t insn = *p->addr;
+
+ if ((unsigned long)p->addr & 0x03) {
+ printk("Attempt to register kprobe at an unaligned address\n");
+ ret = -EINVAL;
+ } else if (IS_MTMSRD(insn) || IS_RFID(insn)) {
+ printk("Cannot register a kprobe on rfid or mtmsrd\n");
+ ret = -EINVAL;
+ }
+
+ /* insn must be on a special executable page on ppc64 */
+ if (!ret) {
+ down(&kprobe_mutex);
+ p->ainsn.insn = get_insn_slot();
+ up(&kprobe_mutex);
+ if (!p->ainsn.insn)
+ ret = -ENOMEM;
+ }
+ return ret;
+}
+
+void __kprobes arch_copy_kprobe(struct kprobe *p)
+{
+ memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ p->opcode = *p->addr;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ *p->addr = BREAKPOINT_INSTRUCTION;
+ flush_icache_range((unsigned long) p->addr,
+ (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+ *p->addr = p->opcode;
+ flush_icache_range((unsigned long) p->addr,
+ (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+ down(&kprobe_mutex);
+ free_insn_slot(p->ainsn.insn);
+ up(&kprobe_mutex);
+}
+
+static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+ kprobe_opcode_t insn = *p->ainsn.insn;
+
+ regs->msr |= MSR_SE;
+
+ /* single step inline if it is a trap variant */
+ if (is_trap(insn))
+ regs->nip = (unsigned long)p->addr;
+ else
+ regs->nip = (unsigned long)p->ainsn.insn;
+}
+
+static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+ kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr;
+}
+
+static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+ kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr;
+}
+
+static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ __get_cpu_var(current_kprobe) = p;
+ kcb->kprobe_saved_msr = regs->msr;
+}
+
+/* Called with kretprobe_lock held */
+void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
+ struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri;
+
+ if ((ri = get_free_rp_inst(rp)) != NULL) {
+ ri->rp = rp;
+ ri->task = current;
+ ri->ret_addr = (kprobe_opcode_t *)regs->link;
+
+ /* Replace the return addr with trampoline addr */
+ regs->link = (unsigned long)kretprobe_trampoline;
+ add_rp_inst(ri);
+ } else {
+ rp->nmissed++;
+ }
+}
+
+static inline int kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *p;
+ int ret = 0;
+ unsigned int *addr = (unsigned int *)regs->nip;
+ struct kprobe_ctlblk *kcb;
+
+ /*
+ * We don't want to be preempted for the entire
+ * duration of kprobe processing
+ */
+ preempt_disable();
+ kcb = get_kprobe_ctlblk();
+
+ /* Check we're not actually recursing */
+ if (kprobe_running()) {
+ p = get_kprobe(addr);
+ if (p) {
+ kprobe_opcode_t insn = *p->ainsn.insn;
+ if (kcb->kprobe_status == KPROBE_HIT_SS &&
+ is_trap(insn)) {
+ regs->msr &= ~MSR_SE;
+ regs->msr |= kcb->kprobe_saved_msr;
+ goto no_kprobe;
+ }
+ /* We have reentered the kprobe_handler(), since
+ * another probe was hit while within the handler.
+ * We here save the original kprobes variables and
+ * just single step on the instruction of the new probe
+ * without calling any user handlers.
+ */
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kcb->kprobe_saved_msr = regs->msr;
+ p->nmissed++;
+ prepare_singlestep(p, regs);
+ kcb->kprobe_status = KPROBE_REENTER;
+ return 1;
+ } else {
+ p = __get_cpu_var(current_kprobe);
+ if (p->break_handler && p->break_handler(p, regs)) {
+ goto ss_probe;
+ }
+ }
+ goto no_kprobe;
+ }
+
+ p = get_kprobe(addr);
+ if (!p) {
+ if (*addr != BREAKPOINT_INSTRUCTION) {
+ /*
+ * PowerPC has multiple variants of the "trap"
+ * instruction. If the current instruction is a
+ * trap variant, it could belong to someone else
+ */
+ kprobe_opcode_t cur_insn = *addr;
+ if (is_trap(cur_insn))
+ goto no_kprobe;
+ /*
+ * The breakpoint instruction was removed right
+ * after we hit it. Another cpu has removed
+ * either a probepoint or a debugger breakpoint
+ * at this address. In either case, no further
+ * handling of this interrupt is appropriate.
+ */
+ ret = 1;
+ }
+ /* Not one of ours: let kernel handle it */
+ goto no_kprobe;
+ }
+
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ set_current_kprobe(p, regs, kcb);
+ if (p->pre_handler && p->pre_handler(p, regs))
+ /* handler has already set things up, so skip ss setup */
+ return 1;
+
+ss_probe:
+ prepare_singlestep(p, regs);
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ return 1;
+
+no_kprobe:
+ preempt_enable_no_resched();
+ return ret;
+}
+
+/*
+ * Function return probe trampoline:
+ * - init_kprobes() establishes a probepoint here
+ * - When the probed function returns, this probe
+ * causes the handlers to fire
+ */
+void kretprobe_trampoline_holder(void)
+{
+ asm volatile(".global kretprobe_trampoline\n"
+ "kretprobe_trampoline:\n"
+ "nop\n");
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head;
+ struct hlist_node *node, *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+
+ spin_lock_irqsave(&kretprobe_lock, flags);
+ head = kretprobe_inst_table_head(current);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more then one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ recycle_rp_inst(ri);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+ regs->nip = orig_ret_address;
+
+ reset_current_kprobe();
+ spin_unlock_irqrestore(&kretprobe_lock, flags);
+ preempt_enable_no_resched();
+
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
+ return 1;
+}
+
+/*
+ * Called after single-stepping. p->addr is the address of the
+ * instruction whose first byte has been replaced by the "breakpoint"
+ * instruction. To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction. The address of this
+ * copy is p->ainsn.insn.
+ */
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+ int ret;
+ unsigned int insn = *p->ainsn.insn;
+
+ regs->nip = (unsigned long)p->addr;
+ ret = emulate_step(regs, insn);
+ if (ret == 0)
+ regs->nip = (unsigned long)p->addr + 4;
+}
+
+static inline int post_kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (!cur)
+ return 0;
+
+ if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ cur->post_handler(cur, regs, 0);
+ }
+
+ resume_execution(cur, regs);
+ regs->msr |= kcb->kprobe_saved_msr;
+
+ /*Restore back the original saved kprobes variables and continue. */
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
+ goto out;
+ }
+ reset_current_kprobe();
+out:
+ preempt_enable_no_resched();
+
+ /*
+ * if somebody else is singlestepping across a probe point, msr
+ * will have SE set, in which case, continue the remaining processing
+ * of do_debug, as if this is not a probe hit.
+ */
+ if (regs->msr & MSR_SE)
+ return 0;
+
+ return 1;
+}
+
+static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+ return 1;
+
+ if (kcb->kprobe_status & KPROBE_HIT_SS) {
+ resume_execution(cur, regs);
+ regs->msr &= ~MSR_SE;
+ regs->msr |= kcb->kprobe_saved_msr;
+
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ }
+ return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+
+ switch (val) {
+ case DIE_BPT:
+ if (kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_SSTEP:
+ if (post_kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_PAGE_FAULT:
+ /* kprobe_running() needs smp_processor_id() */
+ preempt_disable();
+ if (kprobe_running() &&
+ kprobe_fault_handler(args->regs, args->trapnr))
+ ret = NOTIFY_STOP;
+ preempt_enable();
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
+
+ /* setup return addr to the jprobe handler routine */
+ regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry);
+ regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
+
+ return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+ asm volatile("trap" ::: "memory");
+}
+
+void __kprobes jprobe_return_end(void)
+{
+};
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ /*
+ * FIXME - we should ideally be validating that we got here 'cos
+ * of the "trap" in jprobe_return() above, before restoring the
+ * saved regs...
+ */
+ memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+ preempt_enable_no_resched();
+ return 1;
+}
+
+static struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+ return register_kprobe(&trampoline_p);
+}
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
new file mode 100644
index 00000000000..9dda16ccde7
--- /dev/null
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -0,0 +1,608 @@
+/*
+ * PowerPC64 LPAR Configuration Information Driver
+ *
+ * Dave Engebretsen engebret@us.ibm.com
+ * Copyright (c) 2003 Dave Engebretsen
+ * Will Schmidt willschm@us.ibm.com
+ * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
+ * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
+ * Nathan Lynch nathanl@austin.ibm.com
+ * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This driver creates a proc file at /proc/ppc64/lparcfg which contains
+ * keyword - value pairs that specify the configuration of the partition.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include <asm/iseries/hv_lp_config.h>
+#include <asm/lppaca.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/system.h>
+#include <asm/time.h>
+#include <asm/iseries/it_exp_vpd_panel.h>
+#include <asm/prom.h>
+#include <asm/vdso_datapage.h>
+
+#define MODULE_VERS "1.6"
+#define MODULE_NAME "lparcfg"
+
+/* #define LPARCFG_DEBUG */
+
+static struct proc_dir_entry *proc_ppc64_lparcfg;
+#define LPARCFG_BUFF_SIZE 4096
+
+#ifdef CONFIG_PPC_ISERIES
+
+/*
+ * For iSeries legacy systems, the PPA purr function is available from the
+ * emulated_time_base field in the paca.
+ */
+static unsigned long get_purr(void)
+{
+ unsigned long sum_purr = 0;
+ int cpu;
+ struct paca_struct *lpaca;
+
+ for_each_cpu(cpu) {
+ lpaca = paca + cpu;
+ sum_purr += lpaca->lppaca.emulated_time_base;
+
+#ifdef PURR_DEBUG
+ printk(KERN_INFO "get_purr for cpu (%d) has value (%ld) \n",
+ cpu, lpaca->lppaca.emulated_time_base);
+#endif
+ }
+ return sum_purr;
+}
+
+#define lparcfg_write NULL
+
+/*
+ * Methods used to fetch LPAR data when running on an iSeries platform.
+ */
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+ unsigned long pool_id, lp_index;
+ int shared, entitled_capacity, max_entitled_capacity;
+ int processors, max_processors;
+ struct paca_struct *lpaca = get_paca();
+ unsigned long purr = get_purr();
+
+ seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
+
+ shared = (int)(lpaca->lppaca_ptr->shared_proc);
+ seq_printf(m, "serial_number=%c%c%c%c%c%c%c\n",
+ e2a(xItExtVpdPanel.mfgID[2]),
+ e2a(xItExtVpdPanel.mfgID[3]),
+ e2a(xItExtVpdPanel.systemSerial[1]),
+ e2a(xItExtVpdPanel.systemSerial[2]),
+ e2a(xItExtVpdPanel.systemSerial[3]),
+ e2a(xItExtVpdPanel.systemSerial[4]),
+ e2a(xItExtVpdPanel.systemSerial[5]));
+
+ seq_printf(m, "system_type=%c%c%c%c\n",
+ e2a(xItExtVpdPanel.machineType[0]),
+ e2a(xItExtVpdPanel.machineType[1]),
+ e2a(xItExtVpdPanel.machineType[2]),
+ e2a(xItExtVpdPanel.machineType[3]));
+
+ lp_index = HvLpConfig_getLpIndex();
+ seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+ seq_printf(m, "system_active_processors=%d\n",
+ (int)HvLpConfig_getSystemPhysicalProcessors());
+
+ seq_printf(m, "system_potential_processors=%d\n",
+ (int)HvLpConfig_getSystemPhysicalProcessors());
+
+ processors = (int)HvLpConfig_getPhysicalProcessors();
+ seq_printf(m, "partition_active_processors=%d\n", processors);
+
+ max_processors = (int)HvLpConfig_getMaxPhysicalProcessors();
+ seq_printf(m, "partition_potential_processors=%d\n", max_processors);
+
+ if (shared) {
+ entitled_capacity = HvLpConfig_getSharedProcUnits();
+ max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits();
+ } else {
+ entitled_capacity = processors * 100;
+ max_entitled_capacity = max_processors * 100;
+ }
+ seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity);
+
+ seq_printf(m, "partition_max_entitled_capacity=%d\n",
+ max_entitled_capacity);
+
+ if (shared) {
+ pool_id = HvLpConfig_getSharedPoolIndex();
+ seq_printf(m, "pool=%d\n", (int)pool_id);
+ seq_printf(m, "pool_capacity=%d\n",
+ (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) *
+ 100));
+ seq_printf(m, "purr=%ld\n", purr);
+ }
+
+ seq_printf(m, "shared_processor_mode=%d\n", shared);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_ISERIES */
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * Methods used to fetch LPAR data when running on a pSeries platform.
+ */
+/* find a better place for this function... */
+static void log_plpar_hcall_return(unsigned long rc, char *tag)
+{
+ if (rc == 0) /* success, return */
+ return;
+/* check for null tag ? */
+ if (rc == H_Hardware)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed with hardware fault\n", tag);
+ else if (rc == H_Function)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed; function not allowed\n", tag);
+ else if (rc == H_Authority)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed; not authorized to this function\n",
+ tag);
+ else if (rc == H_Parameter)
+ printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n",
+ tag);
+ else
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n",
+ tag, rc);
+
+}
+
+/*
+ * H_GET_PPP hcall returns info in 4 parms.
+ * entitled_capacity,unallocated_capacity,
+ * aggregation, resource_capability).
+ *
+ * R4 = Entitled Processor Capacity Percentage.
+ * R5 = Unallocated Processor Capacity Percentage.
+ * R6 (AABBCCDDEEFFGGHH).
+ * XXXX - reserved (0)
+ * XXXX - reserved (0)
+ * XXXX - Group Number
+ * XXXX - Pool Number.
+ * R7 (IIJJKKLLMMNNOOPP).
+ * XX - reserved. (0)
+ * XX - bit 0-6 reserved (0). bit 7 is Capped indicator.
+ * XX - variable processor Capacity Weight
+ * XX - Unallocated Variable Processor Capacity Weight.
+ * XXXX - Active processors in Physical Processor Pool.
+ * XXXX - Processors active on platform.
+ */
+static unsigned int h_get_ppp(unsigned long *entitled,
+ unsigned long *unallocated,
+ unsigned long *aggregation,
+ unsigned long *resource)
+{
+ unsigned long rc;
+ rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated,
+ aggregation, resource);
+
+ log_plpar_hcall_return(rc, "H_GET_PPP");
+
+ return rc;
+}
+
+static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
+{
+ unsigned long rc;
+ unsigned long dummy;
+ rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy);
+
+ if (rc != H_Authority)
+ log_plpar_hcall_return(rc, "H_PIC");
+}
+
+/* Track sum of all purrs across all processors. This is used to further */
+/* calculate usage values by different applications */
+
+static unsigned long get_purr(void)
+{
+ unsigned long sum_purr = 0;
+ int cpu;
+ struct cpu_usage *cu;
+
+ for_each_cpu(cpu) {
+ cu = &per_cpu(cpu_usage_array, cpu);
+ sum_purr += cu->current_tb;
+ }
+ return sum_purr;
+}
+
+#define SPLPAR_CHARACTERISTICS_TOKEN 20
+#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
+
+/*
+ * parse_system_parameter_string()
+ * Retrieve the potential_processors, max_entitled_capacity and friends
+ * through the get-system-parameter rtas call. Replace keyword strings as
+ * necessary.
+ */
+static void parse_system_parameter_string(struct seq_file *m)
+{
+ int call_status;
+
+ char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+ if (!local_buffer) {
+ printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
+ __FILE__, __FUNCTION__, __LINE__);
+ return;
+ }
+
+ spin_lock(&rtas_data_buf_lock);
+ memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH);
+ call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+ NULL,
+ SPLPAR_CHARACTERISTICS_TOKEN,
+ __pa(rtas_data_buf));
+ memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH);
+ spin_unlock(&rtas_data_buf_lock);
+
+ if (call_status != 0) {
+ printk(KERN_INFO
+ "%s %s Error calling get-system-parameter (0x%x)\n",
+ __FILE__, __FUNCTION__, call_status);
+ } else {
+ int splpar_strlen;
+ int idx, w_idx;
+ char *workbuffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+ if (!workbuffer) {
+ printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
+ __FILE__, __FUNCTION__, __LINE__);
+ kfree(local_buffer);
+ return;
+ }
+#ifdef LPARCFG_DEBUG
+ printk(KERN_INFO "success calling get-system-parameter \n");
+#endif
+ splpar_strlen = local_buffer[0] * 16 + local_buffer[1];
+ local_buffer += 2; /* step over strlen value */
+
+ memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+ w_idx = 0;
+ idx = 0;
+ while ((*local_buffer) && (idx < splpar_strlen)) {
+ workbuffer[w_idx++] = local_buffer[idx++];
+ if ((local_buffer[idx] == ',')
+ || (local_buffer[idx] == '\0')) {
+ workbuffer[w_idx] = '\0';
+ if (w_idx) {
+ /* avoid the empty string */
+ seq_printf(m, "%s\n", workbuffer);
+ }
+ memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+ idx++; /* skip the comma */
+ w_idx = 0;
+ } else if (local_buffer[idx] == '=') {
+ /* code here to replace workbuffer contents
+ with different keyword strings */
+ if (0 == strcmp(workbuffer, "MaxEntCap")) {
+ strcpy(workbuffer,
+ "partition_max_entitled_capacity");
+ w_idx = strlen(workbuffer);
+ }
+ if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
+ strcpy(workbuffer,
+ "system_potential_processors");
+ w_idx = strlen(workbuffer);
+ }
+ }
+ }
+ kfree(workbuffer);
+ local_buffer -= 2; /* back up over strlen value */
+ }
+ kfree(local_buffer);
+}
+
+/* Return the number of processors in the system.
+ * This function reads through the device tree and counts
+ * the virtual processors, this does not include threads.
+ */
+static int lparcfg_count_active_processors(void)
+{
+ struct device_node *cpus_dn = NULL;
+ int count = 0;
+
+ while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
+#ifdef LPARCFG_DEBUG
+ printk(KERN_ERR "cpus_dn %p \n", cpus_dn);
+#endif
+ count++;
+ }
+ return count;
+}
+
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+ int partition_potential_processors;
+ int partition_active_processors;
+ struct device_node *rootdn;
+ const char *model = "";
+ const char *system_id = "";
+ unsigned int *lp_index_ptr, lp_index = 0;
+ struct device_node *rtas_node;
+ int *lrdrp;
+
+ rootdn = find_path_device("/");
+ if (rootdn) {
+ model = get_property(rootdn, "model", NULL);
+ system_id = get_property(rootdn, "system-id", NULL);
+ lp_index_ptr = (unsigned int *)
+ get_property(rootdn, "ibm,partition-no", NULL);
+ if (lp_index_ptr)
+ lp_index = *lp_index_ptr;
+ }
+
+ seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
+
+ seq_printf(m, "serial_number=%s\n", system_id);
+
+ seq_printf(m, "system_type=%s\n", model);
+
+ seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+ rtas_node = find_path_device("/rtas");
+ lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity", NULL);
+
+ if (lrdrp == NULL) {
+ partition_potential_processors = vdso_data->processorCount;
+ } else {
+ partition_potential_processors = *(lrdrp + 4);
+ }
+
+ partition_active_processors = lparcfg_count_active_processors();
+
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ unsigned long h_entitled, h_unallocated;
+ unsigned long h_aggregation, h_resource;
+ unsigned long pool_idle_time, pool_procs;
+ unsigned long purr;
+
+ h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
+ &h_resource);
+
+ seq_printf(m, "R4=0x%lx\n", h_entitled);
+ seq_printf(m, "R5=0x%lx\n", h_unallocated);
+ seq_printf(m, "R6=0x%lx\n", h_aggregation);
+ seq_printf(m, "R7=0x%lx\n", h_resource);
+
+ purr = get_purr();
+
+ /* this call handles the ibm,get-system-parameter contents */
+ parse_system_parameter_string(m);
+
+ seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
+
+ seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
+
+ seq_printf(m, "system_active_processors=%ld\n",
+ (h_resource >> 0 * 8) & 0xffff);
+
+ /* pool related entries are apropriate for shared configs */
+ if (paca[0].lppaca.shared_proc) {
+
+ h_pic(&pool_idle_time, &pool_procs);
+
+ seq_printf(m, "pool=%ld\n",
+ (h_aggregation >> 0 * 8) & 0xffff);
+
+ /* report pool_capacity in percentage */
+ seq_printf(m, "pool_capacity=%ld\n",
+ ((h_resource >> 2 * 8) & 0xffff) * 100);
+
+ seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+
+ seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+ }
+
+ seq_printf(m, "unallocated_capacity_weight=%ld\n",
+ (h_resource >> 4 * 8) & 0xFF);
+
+ seq_printf(m, "capacity_weight=%ld\n",
+ (h_resource >> 5 * 8) & 0xFF);
+
+ seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
+
+ seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
+
+ seq_printf(m, "purr=%ld\n", purr);
+
+ } else { /* non SPLPAR case */
+
+ seq_printf(m, "system_active_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "system_potential_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "partition_max_entitled_capacity=%d\n",
+ partition_potential_processors * 100);
+
+ seq_printf(m, "partition_entitled_capacity=%d\n",
+ partition_active_processors * 100);
+ }
+
+ seq_printf(m, "partition_active_processors=%d\n",
+ partition_active_processors);
+
+ seq_printf(m, "partition_potential_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "shared_processor_mode=%d\n", paca[0].lppaca.shared_proc);
+
+ return 0;
+}
+
+/*
+ * Interface for changing system parameters (variable capacity weight
+ * and entitled capacity). Format of input is "param_name=value";
+ * anything after value is ignored. Valid parameters at this time are
+ * "partition_entitled_capacity" and "capacity_weight". We use
+ * H_SET_PPP to alter parameters.
+ *
+ * This function should be invoked only on systems with
+ * FW_FEATURE_SPLPAR.
+ */
+static ssize_t lparcfg_write(struct file *file, const char __user * buf,
+ size_t count, loff_t * off)
+{
+ char *kbuf;
+ char *tmp;
+ u64 new_entitled, *new_entitled_ptr = &new_entitled;
+ u8 new_weight, *new_weight_ptr = &new_weight;
+
+ unsigned long current_entitled; /* parameters for h_get_ppp */
+ unsigned long dummy;
+ unsigned long resource;
+ u8 current_weight;
+
+ ssize_t retval = -ENOMEM;
+
+ kbuf = kmalloc(count, GFP_KERNEL);
+ if (!kbuf)
+ goto out;
+
+ retval = -EFAULT;
+ if (copy_from_user(kbuf, buf, count))
+ goto out;
+
+ retval = -EINVAL;
+ kbuf[count - 1] = '\0';
+ tmp = strchr(kbuf, '=');
+ if (!tmp)
+ goto out;
+
+ *tmp++ = '\0';
+
+ if (!strcmp(kbuf, "partition_entitled_capacity")) {
+ char *endp;
+ *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ goto out;
+ new_weight_ptr = &current_weight;
+ } else if (!strcmp(kbuf, "capacity_weight")) {
+ char *endp;
+ *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ goto out;
+ new_entitled_ptr = &current_entitled;
+ } else
+ goto out;
+
+ /* Get our current parameters */
+ retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
+ if (retval) {
+ retval = -EIO;
+ goto out;
+ }
+
+ current_weight = (resource >> 5 * 8) & 0xFF;
+
+ pr_debug("%s: current_entitled = %lu, current_weight = %lu\n",
+ __FUNCTION__, current_entitled, current_weight);
+
+ pr_debug("%s: new_entitled = %lu, new_weight = %lu\n",
+ __FUNCTION__, *new_entitled_ptr, *new_weight_ptr);
+
+ retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
+ *new_weight_ptr);
+
+ if (retval == H_Success || retval == H_Constrained) {
+ retval = count;
+ } else if (retval == H_Busy) {
+ retval = -EBUSY;
+ } else if (retval == H_Hardware) {
+ retval = -EIO;
+ } else if (retval == H_Parameter) {
+ retval = -EINVAL;
+ } else {
+ printk(KERN_WARNING "%s: received unknown hv return code %ld",
+ __FUNCTION__, retval);
+ retval = -EIO;
+ }
+
+out:
+ kfree(kbuf);
+ return retval;
+}
+
+#endif /* CONFIG_PPC_PSERIES */
+
+static int lparcfg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, lparcfg_data, NULL);
+}
+
+struct file_operations lparcfg_fops = {
+ .owner = THIS_MODULE,
+ .read = seq_read,
+ .open = lparcfg_open,
+ .release = single_release,
+};
+
+int __init lparcfg_init(void)
+{
+ struct proc_dir_entry *ent;
+ mode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+
+ /* Allow writing if we have FW_FEATURE_SPLPAR */
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ lparcfg_fops.write = lparcfg_write;
+ mode |= S_IWUSR;
+ }
+
+ ent = create_proc_entry("ppc64/lparcfg", mode, NULL);
+ if (ent) {
+ ent->proc_fops = &lparcfg_fops;
+ ent->data = kmalloc(LPARCFG_BUFF_SIZE, GFP_KERNEL);
+ if (!ent->data) {
+ printk(KERN_ERR
+ "Failed to allocate buffer for lparcfg\n");
+ remove_proc_entry("lparcfg", ent->parent);
+ return -ENOMEM;
+ }
+ } else {
+ printk(KERN_ERR "Failed to create ppc64/lparcfg\n");
+ return -EIO;
+ }
+
+ proc_ppc64_lparcfg = ent;
+ return 0;
+}
+
+void __exit lparcfg_cleanup(void)
+{
+ if (proc_ppc64_lparcfg) {
+ kfree(proc_ppc64_lparcfg->data);
+ remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent);
+ }
+}
+
+module_init(lparcfg_init);
+module_exit(lparcfg_cleanup);
+MODULE_DESCRIPTION("Interface for LPAR configuration data");
+MODULE_AUTHOR("Dave Engebretsen");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c
index eded971d1bf..5a05a797485 100644
--- a/arch/powerpc/kernel/lparmap.c
+++ b/arch/powerpc/kernel/lparmap.c
@@ -25,7 +25,7 @@ const struct LparMap __attribute__((__section__(".text"))) xLparMap = {
.xRanges = {
{ .xPages = HvPagesToMap,
.xOffset = 0,
- .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT),
+ .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - HW_PAGE_SHIFT),
},
},
};
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
new file mode 100644
index 00000000000..97c51e452be
--- /dev/null
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -0,0 +1,358 @@
+/*
+ * machine_kexec.c - handle transition of Linux booting another kernel
+ *
+ * Copyright (C) 2004-2005, IBM Corp.
+ *
+ * Created by: Milton D Miller II
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+
+#include <linux/cpumask.h>
+#include <linux/kexec.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/errno.h>
+
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
+#include <asm/paca.h>
+#include <asm/mmu.h>
+#include <asm/sections.h> /* _end */
+#include <asm/prom.h>
+#include <asm/smp.h>
+
+#define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */
+
+/* Have this around till we move it into crash specific file */
+note_buf_t crash_notes[NR_CPUS];
+
+/* Dummy for now. Not sure if we need to have a crash shutdown in here
+ * and if what it will achieve. Letting it be now to compile the code
+ * in generic kexec environment
+ */
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ /* do nothing right now */
+ /* smp_relase_cpus() if we want smp on panic kernel */
+ /* cpu_irq_down to isolate us until we are ready */
+}
+
+int machine_kexec_prepare(struct kimage *image)
+{
+ int i;
+ unsigned long begin, end; /* limits of segment */
+ unsigned long low, high; /* limits of blocked memory range */
+ struct device_node *node;
+ unsigned long *basep;
+ unsigned int *sizep;
+
+ if (!ppc_md.hpte_clear_all)
+ return -ENOENT;
+
+ /*
+ * Since we use the kernel fault handlers and paging code to
+ * handle the virtual mode, we must make sure no destination
+ * overlaps kernel static data or bss.
+ */
+ for (i = 0; i < image->nr_segments; i++)
+ if (image->segment[i].mem < __pa(_end))
+ return -ETXTBSY;
+
+ /*
+ * For non-LPAR, we absolutely can not overwrite the mmu hash
+ * table, since we are still using the bolted entries in it to
+ * do the copy. Check that here.
+ *
+ * It is safe if the end is below the start of the blocked
+ * region (end <= low), or if the beginning is after the
+ * end of the blocked region (begin >= high). Use the
+ * boolean identity !(a || b) === (!a && !b).
+ */
+ if (htab_address) {
+ low = __pa(htab_address);
+ high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE;
+
+ for (i = 0; i < image->nr_segments; i++) {
+ begin = image->segment[i].mem;
+ end = begin + image->segment[i].memsz;
+
+ if ((begin < high) && (end > low))
+ return -ETXTBSY;
+ }
+ }
+
+ /* We also should not overwrite the tce tables */
+ for (node = of_find_node_by_type(NULL, "pci"); node != NULL;
+ node = of_find_node_by_type(node, "pci")) {
+ basep = (unsigned long *)get_property(node, "linux,tce-base",
+ NULL);
+ sizep = (unsigned int *)get_property(node, "linux,tce-size",
+ NULL);
+ if (basep == NULL || sizep == NULL)
+ continue;
+
+ low = *basep;
+ high = low + (*sizep);
+
+ for (i = 0; i < image->nr_segments; i++) {
+ begin = image->segment[i].mem;
+ end = begin + image->segment[i].memsz;
+
+ if ((begin < high) && (end > low))
+ return -ETXTBSY;
+ }
+ }
+
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+ /* we do nothing in prepare that needs to be undone */
+}
+
+#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
+
+static void copy_segments(unsigned long ind)
+{
+ unsigned long entry;
+ unsigned long *ptr;
+ void *dest;
+ void *addr;
+
+ /*
+ * We rely on kexec_load to create a lists that properly
+ * initializes these pointers before they are used.
+ * We will still crash if the list is wrong, but at least
+ * the compiler will be quiet.
+ */
+ ptr = NULL;
+ dest = NULL;
+
+ for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
+ addr = __va(entry & PAGE_MASK);
+
+ switch (entry & IND_FLAGS) {
+ case IND_DESTINATION:
+ dest = addr;
+ break;
+ case IND_INDIRECTION:
+ ptr = addr;
+ break;
+ case IND_SOURCE:
+ copy_page(dest, addr);
+ dest += PAGE_SIZE;
+ }
+ }
+}
+
+void kexec_copy_flush(struct kimage *image)
+{
+ long i, nr_segments = image->nr_segments;
+ struct kexec_segment ranges[KEXEC_SEGMENT_MAX];
+
+ /* save the ranges on the stack to efficiently flush the icache */
+ memcpy(ranges, image->segment, sizeof(ranges));
+
+ /*
+ * After this call we may not use anything allocated in dynamic
+ * memory, including *image.
+ *
+ * Only globals and the stack are allowed.
+ */
+ copy_segments(image->head);
+
+ /*
+ * we need to clear the icache for all dest pages sometime,
+ * including ones that were in place on the original copy
+ */
+ for (i = 0; i < nr_segments; i++)
+ flush_icache_range(ranges[i].mem + KERNELBASE,
+ ranges[i].mem + KERNELBASE +
+ ranges[i].memsz);
+}
+
+#ifdef CONFIG_SMP
+
+/* FIXME: we should schedule this function to be called on all cpus based
+ * on calling the interrupts, but we would like to call it off irq level
+ * so that the interrupt controller is clean.
+ */
+void kexec_smp_down(void *arg)
+{
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0, 1);
+
+ local_irq_disable();
+ kexec_smp_wait();
+ /* NOTREACHED */
+}
+
+static void kexec_prepare_cpus(void)
+{
+ int my_cpu, i, notified=-1;
+
+ smp_call_function(kexec_smp_down, NULL, 0, /* wait */0);
+ my_cpu = get_cpu();
+
+ /* check the others cpus are now down (via paca hw cpu id == -1) */
+ for (i=0; i < NR_CPUS; i++) {
+ if (i == my_cpu)
+ continue;
+
+ while (paca[i].hw_cpu_id != -1) {
+ barrier();
+ if (!cpu_possible(i)) {
+ printk("kexec: cpu %d hw_cpu_id %d is not"
+ " possible, ignoring\n",
+ i, paca[i].hw_cpu_id);
+ break;
+ }
+ if (!cpu_online(i)) {
+ /* Fixme: this can be spinning in
+ * pSeries_secondary_wait with a paca
+ * waiting for it to go online.
+ */
+ printk("kexec: cpu %d hw_cpu_id %d is not"
+ " online, ignoring\n",
+ i, paca[i].hw_cpu_id);
+ break;
+ }
+ if (i != notified) {
+ printk( "kexec: waiting for cpu %d (physical"
+ " %d) to go down\n",
+ i, paca[i].hw_cpu_id);
+ notified = i;
+ }
+ }
+ }
+
+ /* after we tell the others to go down */
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0, 0);
+
+ put_cpu();
+
+ local_irq_disable();
+}
+
+#else /* ! SMP */
+
+static void kexec_prepare_cpus(void)
+{
+ /*
+ * move the secondarys to us so that we can copy
+ * the new kernel 0-0x100 safely
+ *
+ * do this if kexec in setup.c ?
+ *
+ * We need to release the cpus if we are ever going from an
+ * UP to an SMP kernel.
+ */
+ smp_release_cpus();
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0, 0);
+ local_irq_disable();
+}
+
+#endif /* SMP */
+
+/*
+ * kexec thread structure and stack.
+ *
+ * We need to make sure that this is 16384-byte aligned due to the
+ * way process stacks are handled. It also must be statically allocated
+ * or allocated as part of the kimage, because everything else may be
+ * overwritten when we copy the kexec image. We piggyback on the
+ * "init_task" linker section here to statically allocate a stack.
+ *
+ * We could use a smaller stack if we don't care about anything using
+ * current, but that audit has not been performed.
+ */
+union thread_union kexec_stack
+ __attribute__((__section__(".data.init_task"))) = { };
+
+/* Our assembly helper, in kexec_stub.S */
+extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
+ void *image, void *control,
+ void (*clear_all)(void)) ATTRIB_NORET;
+
+/* too late to fail here */
+void machine_kexec(struct kimage *image)
+{
+
+ /* prepare control code if any */
+
+ /* shutdown other cpus into our wait loop and quiesce interrupts */
+ kexec_prepare_cpus();
+
+ /* switch to a staticly allocated stack. Based on irq stack code.
+ * XXX: the task struct will likely be invalid once we do the copy!
+ */
+ kexec_stack.thread_info.task = current_thread_info()->task;
+ kexec_stack.thread_info.flags = 0;
+
+ /* Some things are best done in assembly. Finding globals with
+ * a toc is easier in C, so pass in what we can.
+ */
+ kexec_sequence(&kexec_stack, image->start, image,
+ page_address(image->control_code_page),
+ ppc_md.hpte_clear_all);
+ /* NOTREACHED */
+}
+
+/* Values we need to export to the second kernel via the device tree. */
+static unsigned long htab_base, htab_size, kernel_end;
+
+static struct property htab_base_prop = {
+ .name = "linux,htab-base",
+ .length = sizeof(unsigned long),
+ .value = (unsigned char *)&htab_base,
+};
+
+static struct property htab_size_prop = {
+ .name = "linux,htab-size",
+ .length = sizeof(unsigned long),
+ .value = (unsigned char *)&htab_size,
+};
+
+static struct property kernel_end_prop = {
+ .name = "linux,kernel-end",
+ .length = sizeof(unsigned long),
+ .value = (unsigned char *)&kernel_end,
+};
+
+static void __init export_htab_values(void)
+{
+ struct device_node *node;
+
+ node = of_find_node_by_path("/chosen");
+ if (!node)
+ return;
+
+ kernel_end = __pa(_end);
+ prom_add_property(node, &kernel_end_prop);
+
+ /* On machines with no htab htab_address is NULL */
+ if (NULL == htab_address)
+ goto out;
+
+ htab_base = __pa(htab_address);
+ prom_add_property(node, &htab_base_prop);
+
+ htab_size = 1UL << ppc64_pft_size;
+ prom_add_property(node, &htab_size_prop);
+
+ out:
+ of_node_put(node);
+}
+
+void __init kexec_setup(void)
+{
+ export_htab_values();
+}
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 3bedb532aed..f6d84a75ed2 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -519,7 +519,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
*
* flush_icache_range(unsigned long start, unsigned long stop)
*/
-_GLOBAL(flush_icache_range)
+_GLOBAL(__flush_icache_range)
BEGIN_FTR_SECTION
blr /* for 601, do nothing */
END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
@@ -607,27 +607,6 @@ _GLOBAL(invalidate_dcache_range)
sync /* wait for dcbi's to get to ram */
blr
-#ifdef CONFIG_NOT_COHERENT_CACHE
-/*
- * 40x cores have 8K or 16K dcache and 32 byte line size.
- * 44x has a 32K dcache and 32 byte line size.
- * 8xx has 1, 2, 4, 8K variants.
- * For now, cover the worst case of the 44x.
- * Must be called with external interrupts disabled.
- */
-#define CACHE_NWAYS 64
-#define CACHE_NLINES 16
-
-_GLOBAL(flush_dcache_all)
- li r4, (2 * CACHE_NWAYS * CACHE_NLINES)
- mtctr r4
- lis r5, KERNELBASE@h
-1: lwz r3, 0(r5) /* Load one word from every line */
- addi r5, r5, L1_CACHE_BYTES
- bdnz 1b
- blr
-#endif /* CONFIG_NOT_COHERENT_CACHE */
-
/*
* Flush a particular page from the data cache to RAM.
* Note: this is necessary because the instruction cache does *not*
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index b3e95ff0dba..ae48a002f81 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -89,12 +89,12 @@ _GLOBAL(call_do_softirq)
mtlr r0
blr
-_GLOBAL(call_handle_IRQ_event)
+_GLOBAL(call___do_IRQ)
mflr r0
std r0,16(r1)
- stdu r1,THREAD_SIZE-112(r6)
- mr r1,r6
- bl .handle_IRQ_event
+ stdu r1,THREAD_SIZE-112(r5)
+ mr r1,r5
+ bl .__do_IRQ
ld r1,0(r1)
ld r0,16(r1)
mtlr r0
@@ -604,6 +604,76 @@ _GLOBAL(real_writeb)
#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */
/*
+ * SCOM access functions for 970 (FX only for now)
+ *
+ * unsigned long scom970_read(unsigned int address);
+ * void scom970_write(unsigned int address, unsigned long value);
+ *
+ * The address passed in is the 24 bits register address. This code
+ * is 970 specific and will not check the status bits, so you should
+ * know what you are doing.
+ */
+_GLOBAL(scom970_read)
+ /* interrupts off */
+ mfmsr r4
+ ori r0,r4,MSR_EE
+ xori r0,r0,MSR_EE
+ mtmsrd r0,1
+
+ /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+ * (including parity). On current CPUs they must be 0'd,
+ * and finally or in RW bit
+ */
+ rlwinm r3,r3,8,0,15
+ ori r3,r3,0x8000
+
+ /* do the actual scom read */
+ sync
+ mtspr SPRN_SCOMC,r3
+ isync
+ mfspr r3,SPRN_SCOMD
+ isync
+ mfspr r0,SPRN_SCOMC
+ isync
+
+ /* XXX: fixup result on some buggy 970's (ouch ! we lost a bit, bah
+ * that's the best we can do). Not implemented yet as we don't use
+ * the scom on any of the bogus CPUs yet, but may have to be done
+ * ultimately
+ */
+
+ /* restore interrupts */
+ mtmsrd r4,1
+ blr
+
+
+_GLOBAL(scom970_write)
+ /* interrupts off */
+ mfmsr r5
+ ori r0,r5,MSR_EE
+ xori r0,r0,MSR_EE
+ mtmsrd r0,1
+
+ /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+ * (including parity). On current CPUs they must be 0'd.
+ */
+
+ rlwinm r3,r3,8,0,15
+
+ sync
+ mtspr SPRN_SCOMD,r4 /* write data */
+ isync
+ mtspr SPRN_SCOMC,r3 /* write command */
+ isync
+ mfspr 3,SPRN_SCOMC
+ isync
+
+ /* restore interrupts */
+ mtmsrd r5,1
+ blr
+
+
+/*
* Create a kernel thread
* kernel_thread(fn, arg, flags)
*/
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
new file mode 100644
index 00000000000..928b8581fcb
--- /dev/null
+++ b/arch/powerpc/kernel/module_64.c
@@ -0,0 +1,455 @@
+/* Kernel module help for PPC64.
+ Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <linux/module.h>
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+#include <asm/module.h>
+#include <asm/uaccess.h>
+
+/* FIXME: We don't do .init separately. To do this, we'd need to have
+ a separate r2 value in the init and core section, and stub between
+ them, too.
+
+ Using a magic allocator which places modules within 32MB solves
+ this, and makes other things simpler. Anton?
+ --RR. */
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt , ...)
+#endif
+
+/* There's actually a third entry here, but it's unused */
+struct ppc64_opd_entry
+{
+ unsigned long funcaddr;
+ unsigned long r2;
+};
+
+/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
+ the kernel itself). But on PPC64, these need to be used for every
+ jump, actually, to reset r2 (TOC+0x8000). */
+struct ppc64_stub_entry
+{
+ /* 28 byte jump instruction sequence (7 instructions) */
+ unsigned char jump[28];
+ unsigned char unused[4];
+ /* Data for the above code */
+ struct ppc64_opd_entry opd;
+};
+
+/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external)
+ function which may be more than 24-bits away. We could simply
+ patch the new r2 value and function pointer into the stub, but it's
+ significantly shorter to put these values at the end of the stub
+ code, and patch the stub address (32-bits relative to the TOC ptr,
+ r2) into the stub. */
+static struct ppc64_stub_entry ppc64_stub =
+{ .jump = {
+ 0x3d, 0x82, 0x00, 0x00, /* addis r12,r2, <high> */
+ 0x39, 0x8c, 0x00, 0x00, /* addi r12,r12, <low> */
+ /* Save current r2 value in magic place on the stack. */
+ 0xf8, 0x41, 0x00, 0x28, /* std r2,40(r1) */
+ 0xe9, 0x6c, 0x00, 0x20, /* ld r11,32(r12) */
+ 0xe8, 0x4c, 0x00, 0x28, /* ld r2,40(r12) */
+ 0x7d, 0x69, 0x03, 0xa6, /* mtctr r11 */
+ 0x4e, 0x80, 0x04, 0x20 /* bctr */
+} };
+
+/* Count how many different 24-bit relocations (different symbol,
+ different addend) */
+static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
+{
+ unsigned int i, j, ret = 0;
+
+ /* FIXME: Only count external ones --RR */
+ /* Sure, this is order(n^2), but it's usually short, and not
+ time critical */
+ for (i = 0; i < num; i++) {
+ /* Only count 24-bit relocs, others don't need stubs */
+ if (ELF64_R_TYPE(rela[i].r_info) != R_PPC_REL24)
+ continue;
+ for (j = 0; j < i; j++) {
+ /* If this addend appeared before, it's
+ already been counted */
+ if (rela[i].r_info == rela[j].r_info
+ && rela[i].r_addend == rela[j].r_addend)
+ break;
+ }
+ if (j == i) ret++;
+ }
+ return ret;
+}
+
+void *module_alloc(unsigned long size)
+{
+ if (size == 0)
+ return NULL;
+
+ return vmalloc_exec(size);
+}
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+ vfree(module_region);
+ /* FIXME: If module_region == mod->init_region, trim exception
+ table entries. */
+}
+
+/* Get size of potential trampolines required. */
+static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
+ const Elf64_Shdr *sechdrs)
+{
+ /* One extra reloc so it's always 0-funcaddr terminated */
+ unsigned long relocs = 1;
+ unsigned i;
+
+ /* Every relocated section... */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_RELA) {
+ DEBUGP("Found relocations in section %u\n", i);
+ DEBUGP("Ptr: %p. Number: %lu\n",
+ (void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size / sizeof(Elf64_Rela));
+ relocs += count_relocs((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela));
+ }
+ }
+
+ DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
+ return relocs * sizeof(struct ppc64_stub_entry);
+}
+
+static void dedotify_versions(struct modversion_info *vers,
+ unsigned long size)
+{
+ struct modversion_info *end;
+
+ for (end = (void *)vers + size; vers < end; vers++)
+ if (vers->name[0] == '.')
+ memmove(vers->name, vers->name+1, strlen(vers->name));
+}
+
+/* Undefined symbols which refer to .funcname, hack to funcname */
+static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
+{
+ unsigned int i;
+
+ for (i = 1; i < numsyms; i++) {
+ if (syms[i].st_shndx == SHN_UNDEF) {
+ char *name = strtab + syms[i].st_name;
+ if (name[0] == '.')
+ memmove(name, name+1, strlen(name));
+ }
+ }
+}
+
+int module_frob_arch_sections(Elf64_Ehdr *hdr,
+ Elf64_Shdr *sechdrs,
+ char *secstrings,
+ struct module *me)
+{
+ unsigned int i;
+
+ /* Find .toc and .stubs sections, symtab and strtab */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ char *p;
+ if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
+ me->arch.stubs_section = i;
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0)
+ me->arch.toc_section = i;
+ else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
+ dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size);
+
+ /* We don't handle .init for the moment: rename to _init */
+ while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
+ p[0] = '_';
+
+ if (sechdrs[i].sh_type == SHT_SYMTAB)
+ dedotify((void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size / sizeof(Elf64_Sym),
+ (void *)hdr
+ + sechdrs[sechdrs[i].sh_link].sh_offset);
+ }
+ if (!me->arch.stubs_section || !me->arch.toc_section) {
+ printk("%s: doesn't contain .toc or .stubs.\n", me->name);
+ return -ENOEXEC;
+ }
+
+ /* Override the stubs size */
+ sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
+ return 0;
+}
+
+int apply_relocate(Elf64_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n", me->name);
+ return -ENOEXEC;
+}
+
+/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
+ gives the value maximum span in an instruction which uses a signed
+ offset) */
+static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
+{
+ return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
+}
+
+/* Both low and high 16 bits are added as SIGNED additions, so if low
+ 16 bits has high bit set, high 16 bits must be adjusted. These
+ macros do that (stolen from binutils). */
+#define PPC_LO(v) ((v) & 0xffff)
+#define PPC_HI(v) (((v) >> 16) & 0xffff)
+#define PPC_HA(v) PPC_HI ((v) + 0x8000)
+
+/* Patch stub to reference function and correct r2 value. */
+static inline int create_stub(Elf64_Shdr *sechdrs,
+ struct ppc64_stub_entry *entry,
+ struct ppc64_opd_entry *opd,
+ struct module *me)
+{
+ Elf64_Half *loc1, *loc2;
+ long reladdr;
+
+ *entry = ppc64_stub;
+
+ loc1 = (Elf64_Half *)&entry->jump[2];
+ loc2 = (Elf64_Half *)&entry->jump[6];
+
+ /* Stub uses address relative to r2. */
+ reladdr = (unsigned long)entry - my_r2(sechdrs, me);
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ printk("%s: Address %p of stub out of range of %p.\n",
+ me->name, (void *)reladdr, (void *)my_r2);
+ return 0;
+ }
+ DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr);
+
+ *loc1 = PPC_HA(reladdr);
+ *loc2 = PPC_LO(reladdr);
+ entry->opd.funcaddr = opd->funcaddr;
+ entry->opd.r2 = opd->r2;
+ return 1;
+}
+
+/* Create stub to jump to function described in this OPD: we need the
+ stub to set up the TOC ptr (r2) for the function. */
+static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
+ unsigned long opdaddr,
+ struct module *me)
+{
+ struct ppc64_stub_entry *stubs;
+ struct ppc64_opd_entry *opd = (void *)opdaddr;
+ unsigned int i, num_stubs;
+
+ num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs);
+
+ /* Find this stub, or if that fails, the next avail. entry */
+ stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
+ for (i = 0; stubs[i].opd.funcaddr; i++) {
+ BUG_ON(i >= num_stubs);
+
+ if (stubs[i].opd.funcaddr == opd->funcaddr)
+ return (unsigned long)&stubs[i];
+ }
+
+ if (!create_stub(sechdrs, &stubs[i], opd, me))
+ return 0;
+
+ return (unsigned long)&stubs[i];
+}
+
+/* We expect a noop next: if it is, replace it with instruction to
+ restore r2. */
+static int restore_r2(u32 *instruction, struct module *me)
+{
+ if (*instruction != 0x60000000) {
+ printk("%s: Expect noop after relocate, got %08x\n",
+ me->name, *instruction);
+ return 0;
+ }
+ *instruction = 0xe8410028; /* ld r2,40(r1) */
+ return 1;
+}
+
+int apply_relocate_add(Elf64_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ unsigned int i;
+ Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr;
+ Elf64_Sym *sym;
+ unsigned long *location;
+ unsigned long value;
+
+ DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
+ sechdrs[relsec].sh_info);
+ for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
+ /* This is where to make the change */
+ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+ + rela[i].r_offset;
+ /* This is the symbol it is referring to */
+ sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ + ELF64_R_SYM(rela[i].r_info);
+
+ DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n",
+ location, (long)ELF64_R_TYPE(rela[i].r_info),
+ strtab + sym->st_name, (unsigned long)sym->st_value,
+ (long)rela[i].r_addend);
+
+ /* `Everything is relative'. */
+ value = sym->st_value + rela[i].r_addend;
+
+ switch (ELF64_R_TYPE(rela[i].r_info)) {
+ case R_PPC64_ADDR32:
+ /* Simply set it */
+ *(u32 *)location = value;
+ break;
+
+ case R_PPC64_ADDR64:
+ /* Simply set it */
+ *(unsigned long *)location = value;
+ break;
+
+ case R_PPC64_TOC:
+ *(unsigned long *)location = my_r2(sechdrs, me);
+ break;
+
+ case R_PPC64_TOC16:
+ /* Subtact TOC pointer */
+ value -= my_r2(sechdrs, me);
+ if (value + 0x8000 > 0xffff) {
+ printk("%s: bad TOC16 relocation (%lu)\n",
+ me->name, value);
+ return -ENOEXEC;
+ }
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
+ case R_PPC64_TOC16_DS:
+ /* Subtact TOC pointer */
+ value -= my_r2(sechdrs, me);
+ if ((value & 3) != 0 || value + 0x8000 > 0xffff) {
+ printk("%s: bad TOC16_DS relocation (%lu)\n",
+ me->name, value);
+ return -ENOEXEC;
+ }
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xfffc)
+ | (value & 0xfffc);
+ break;
+
+ case R_PPC_REL24:
+ /* FIXME: Handle weak symbols here --RR */
+ if (sym->st_shndx == SHN_UNDEF) {
+ /* External: go via stub */
+ value = stub_for_addr(sechdrs, value, me);
+ if (!value)
+ return -ENOENT;
+ if (!restore_r2((u32 *)location + 1, me))
+ return -ENOEXEC;
+ }
+
+ /* Convert value to relative */
+ value -= (unsigned long)location;
+ if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){
+ printk("%s: REL24 %li out of range!\n",
+ me->name, (long int)value);
+ return -ENOEXEC;
+ }
+
+ /* Only replace bits 2 through 26 */
+ *(uint32_t *)location
+ = (*(uint32_t *)location & ~0x03fffffc)
+ | (value & 0x03fffffc);
+ break;
+
+ default:
+ printk("%s: Unknown ADD relocation: %lu\n",
+ me->name,
+ (unsigned long)ELF64_R_TYPE(rela[i].r_info));
+ return -ENOEXEC;
+ }
+ }
+
+ return 0;
+}
+
+LIST_HEAD(module_bug_list);
+
+int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs, struct module *me)
+{
+ char *secstrings;
+ unsigned int i;
+
+ me->arch.bug_table = NULL;
+ me->arch.num_bugs = 0;
+
+ /* Find the __bug_table section, if present */
+ secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table"))
+ continue;
+ me->arch.bug_table = (void *) sechdrs[i].sh_addr;
+ me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry);
+ break;
+ }
+
+ /*
+ * Strictly speaking this should have a spinlock to protect against
+ * traversals, but since we only traverse on BUG()s, a spinlock
+ * could potentially lead to deadlock and thus be counter-productive.
+ */
+ list_add(&me->arch.bug_list, &module_bug_list);
+
+ return 0;
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+ list_del(&mod->arch.bug_list);
+}
+
+struct bug_entry *module_find_bug(unsigned long bugaddr)
+{
+ struct mod_arch_specific *mod;
+ unsigned int i;
+ struct bug_entry *bug;
+
+ list_for_each_entry(mod, &module_bug_list, bug_list) {
+ bug = mod->bug_table;
+ for (i = 0; i < mod->num_bugs; ++i, ++bug)
+ if (bugaddr == bug->bug_addr)
+ return bug;
+ }
+ return NULL;
+}
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
new file mode 100644
index 00000000000..a7b68f911eb
--- /dev/null
+++ b/arch/powerpc/kernel/paca.c
@@ -0,0 +1,135 @@
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/module.h>
+
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/page.h>
+#include <asm/lppaca.h>
+#include <asm/iseries/it_lp_queue.h>
+#include <asm/paca.h>
+
+
+/* This symbol is provided by the linker - let it fill in the paca
+ * field correctly */
+extern unsigned long __toc_start;
+
+/* The Paca is an array with one entry per processor. Each contains an
+ * lppaca, which contains the information shared between the
+ * hypervisor and Linux. Each also contains an ItLpRegSave area which
+ * is used by the hypervisor to save registers.
+ * On systems with hardware multi-threading, there are two threads
+ * per processor. The Paca array must contain an entry for each thread.
+ * The VPD Areas will give a max logical processors = 2 * max physical
+ * processors. The processor VPD array needs one entry per physical
+ * processor (not thread).
+ */
+#define PACA_INIT_COMMON(number, start, asrr, asrv) \
+ .lock_token = 0x8000, \
+ .paca_index = (number), /* Paca Index */ \
+ .default_decr = 0x00ff0000, /* Initial Decr */ \
+ .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \
+ .stab_real = (asrr), /* Real pointer to segment table */ \
+ .stab_addr = (asrv), /* Virt pointer to segment table */ \
+ .cpu_start = (start), /* Processor start */ \
+ .hw_cpu_id = 0xffff, \
+ .lppaca = { \
+ .desc = 0xd397d781, /* "LpPa" */ \
+ .size = sizeof(struct lppaca), \
+ .dyn_proc_status = 2, \
+ .decr_val = 0x00ff0000, \
+ .fpregs_in_use = 1, \
+ .end_of_quantum = 0xfffffffffffffffful, \
+ .slb_count = 64, \
+ .vmxregs_in_use = 0, \
+ }, \
+
+#ifdef CONFIG_PPC_ISERIES
+#define PACA_INIT_ISERIES(number) \
+ .lppaca_ptr = &paca[number].lppaca, \
+ .reg_save_ptr = &paca[number].reg_save, \
+ .reg_save = { \
+ .xDesc = 0xd397d9e2, /* "LpRS" */ \
+ .xSize = sizeof(struct ItLpRegSave) \
+ }
+
+#define PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 0, 0, 0) \
+ PACA_INIT_ISERIES(number) \
+}
+
+#define BOOTCPU_PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab) \
+ PACA_INIT_ISERIES(number) \
+}
+
+#else
+#define PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 0, 0, 0) \
+}
+
+#define BOOTCPU_PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab) \
+}
+#endif
+
+struct paca_struct paca[] = {
+ BOOTCPU_PACA_INIT(0),
+#if NR_CPUS > 1
+ PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3),
+#if NR_CPUS > 4
+ PACA_INIT( 4), PACA_INIT( 5), PACA_INIT( 6), PACA_INIT( 7),
+#if NR_CPUS > 8
+ PACA_INIT( 8), PACA_INIT( 9), PACA_INIT( 10), PACA_INIT( 11),
+ PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15),
+ PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19),
+ PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23),
+ PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27),
+ PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31),
+#if NR_CPUS > 32
+ PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35),
+ PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39),
+ PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43),
+ PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47),
+ PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51),
+ PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55),
+ PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59),
+ PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63),
+#if NR_CPUS > 64
+ PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67),
+ PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71),
+ PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75),
+ PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79),
+ PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83),
+ PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87),
+ PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91),
+ PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95),
+ PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99),
+ PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103),
+ PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107),
+ PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111),
+ PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115),
+ PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119),
+ PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123),
+ PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127),
+#endif
+#endif
+#endif
+#endif
+#endif
+};
+EXPORT_SYMBOL(paca);
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
new file mode 100644
index 00000000000..3cef1b8f57f
--- /dev/null
+++ b/arch/powerpc/kernel/pci_64.c
@@ -0,0 +1,1319 @@
+/*
+ * Port for PPC64 David Engebretsen, IBM Corp.
+ * Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
+ *
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Rework, based on alpha PCI code.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/ppc-pci.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+unsigned long pci_probe_only = 1;
+unsigned long pci_assign_all_buses = 0;
+
+/*
+ * legal IO pages under MAX_ISA_PORT. This is to ensure we don't touch
+ * devices we don't have access to.
+ */
+unsigned long io_page_mask;
+
+EXPORT_SYMBOL(io_page_mask);
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+static void fixup_resource(struct resource *res, struct pci_dev *dev);
+static void do_bus_setup(struct pci_bus *bus);
+#endif
+
+unsigned int pcibios_assign_all_busses(void)
+{
+ return pci_assign_all_buses;
+}
+
+/* pci_io_base -- the base address from which io bars are offsets.
+ * This is the lowest I/O base address (so bar values are always positive),
+ * and it *must* be the start of ISA space if an ISA bus exists because
+ * ISA drivers use hard coded offsets. If no ISA bus exists a dummy
+ * page is mapped and isa_io_limit prevents access to it.
+ */
+unsigned long isa_io_base; /* NULL if no ISA bus */
+EXPORT_SYMBOL(isa_io_base);
+unsigned long pci_io_base;
+EXPORT_SYMBOL(pci_io_base);
+
+void iSeries_pcibios_init(void);
+
+LIST_HEAD(hose_list);
+
+struct dma_mapping_ops pci_dma_ops;
+EXPORT_SYMBOL(pci_dma_ops);
+
+int global_phb_number; /* Global phb counter */
+
+/* Cached ISA bridge dev. */
+struct pci_dev *ppc64_isabridge_dev = NULL;
+
+static void fixup_broken_pcnet32(struct pci_dev* dev)
+{
+ if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) {
+ dev->vendor = PCI_VENDOR_ID_AMD;
+ pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD);
+ }
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32);
+
+void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+ struct resource *res)
+{
+ unsigned long offset = 0;
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+ if (!hose)
+ return;
+
+ if (res->flags & IORESOURCE_IO)
+ offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+ if (res->flags & IORESOURCE_MEM)
+ offset = hose->pci_mem_offset;
+
+ region->start = res->start - offset;
+ region->end = res->end - offset;
+}
+
+void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+ struct pci_bus_region *region)
+{
+ unsigned long offset = 0;
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+ if (!hose)
+ return;
+
+ if (res->flags & IORESOURCE_IO)
+ offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+ if (res->flags & IORESOURCE_MEM)
+ offset = hose->pci_mem_offset;
+
+ res->start = region->start + offset;
+ res->end = region->end + offset;
+}
+
+#ifdef CONFIG_HOTPLUG
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+EXPORT_SYMBOL(pcibios_bus_to_resource);
+#endif
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+void pcibios_align_resource(void *data, struct resource *res,
+ unsigned long size, unsigned long align)
+{
+ struct pci_dev *dev = data;
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long start = res->start;
+ unsigned long alignto;
+
+ if (res->flags & IORESOURCE_IO) {
+ unsigned long offset = (unsigned long)hose->io_base_virt -
+ pci_io_base;
+ /* Make sure we start at our min on all hoses */
+ if (start - offset < PCIBIOS_MIN_IO)
+ start = PCIBIOS_MIN_IO + offset;
+
+ /*
+ * Put everything into 0x00-0xff region modulo 0x400
+ */
+ if (start & 0x300)
+ start = (start + 0x3ff) & ~0x3ff;
+
+ } else if (res->flags & IORESOURCE_MEM) {
+ /* Make sure we start at our min on all hoses */
+ if (start - hose->pci_mem_offset < PCIBIOS_MIN_MEM)
+ start = PCIBIOS_MIN_MEM + hose->pci_mem_offset;
+
+ /* Align to multiple of size of minimum base. */
+ alignto = max(0x1000UL, align);
+ start = ALIGN(start, alignto);
+ }
+
+ res->start = start;
+}
+
+static DEFINE_SPINLOCK(hose_spinlock);
+
+/*
+ * pci_controller(phb) initialized common variables.
+ */
+void __devinit pci_setup_pci_controller(struct pci_controller *hose)
+{
+ memset(hose, 0, sizeof(struct pci_controller));
+
+ spin_lock(&hose_spinlock);
+ hose->global_number = global_phb_number++;
+ list_add_tail(&hose->list_node, &hose_list);
+ spin_unlock(&hose_spinlock);
+}
+
+static void __init pcibios_claim_one_bus(struct pci_bus *b)
+{
+ struct pci_dev *dev;
+ struct pci_bus *child_bus;
+
+ list_for_each_entry(dev, &b->devices, bus_list) {
+ int i;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *r = &dev->resource[i];
+
+ if (r->parent || !r->start || !r->flags)
+ continue;
+ pci_claim_resource(dev, i);
+ }
+ }
+
+ list_for_each_entry(child_bus, &b->children, node)
+ pcibios_claim_one_bus(child_bus);
+}
+
+#ifndef CONFIG_PPC_ISERIES
+static void __init pcibios_claim_of_setup(void)
+{
+ struct pci_bus *b;
+
+ list_for_each_entry(b, &pci_root_buses, node)
+ pcibios_claim_one_bus(b);
+}
+#endif
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
+{
+ u32 *prop;
+ int len;
+
+ prop = (u32 *) get_property(np, name, &len);
+ if (prop && len >= 4)
+ return *prop;
+ return def;
+}
+
+static unsigned int pci_parse_of_flags(u32 addr0)
+{
+ unsigned int flags = 0;
+
+ if (addr0 & 0x02000000) {
+ flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
+ flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
+ flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
+ if (addr0 & 0x40000000)
+ flags |= IORESOURCE_PREFETCH
+ | PCI_BASE_ADDRESS_MEM_PREFETCH;
+ } else if (addr0 & 0x01000000)
+ flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+ return flags;
+}
+
+#define GET_64BIT(prop, i) ((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1])
+
+static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev)
+{
+ u64 base, size;
+ unsigned int flags;
+ struct resource *res;
+ u32 *addrs, i;
+ int proplen;
+
+ addrs = (u32 *) get_property(node, "assigned-addresses", &proplen);
+ if (!addrs)
+ return;
+ for (; proplen >= 20; proplen -= 20, addrs += 5) {
+ flags = pci_parse_of_flags(addrs[0]);
+ if (!flags)
+ continue;
+ base = GET_64BIT(addrs, 1);
+ size = GET_64BIT(addrs, 3);
+ if (!size)
+ continue;
+ i = addrs[0] & 0xff;
+ if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
+ res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
+ } else if (i == dev->rom_base_reg) {
+ res = &dev->resource[PCI_ROM_RESOURCE];
+ flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+ } else {
+ printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
+ continue;
+ }
+ res->start = base;
+ res->end = base + size - 1;
+ res->flags = flags;
+ res->name = pci_name(dev);
+ fixup_resource(res, dev);
+ }
+}
+
+struct pci_dev *of_create_pci_dev(struct device_node *node,
+ struct pci_bus *bus, int devfn)
+{
+ struct pci_dev *dev;
+ const char *type;
+
+ dev = kmalloc(sizeof(struct pci_dev), GFP_KERNEL);
+ if (!dev)
+ return NULL;
+ type = get_property(node, "device_type", NULL);
+ if (type == NULL)
+ type = "";
+
+ memset(dev, 0, sizeof(struct pci_dev));
+ dev->bus = bus;
+ dev->sysdata = node;
+ dev->dev.parent = bus->bridge;
+ dev->dev.bus = &pci_bus_type;
+ dev->devfn = devfn;
+ dev->multifunction = 0; /* maybe a lie? */
+
+ dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
+ dev->device = get_int_prop(node, "device-id", 0xffff);
+ dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
+ dev->subsystem_device = get_int_prop(node, "subsystem-id", 0);
+
+ dev->cfg_size = 256; /*pci_cfg_space_size(dev);*/
+
+ sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus),
+ dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ dev->class = get_int_prop(node, "class-code", 0);
+
+ dev->current_state = 4; /* unknown power state */
+
+ if (!strcmp(type, "pci")) {
+ /* a PCI-PCI bridge */
+ dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
+ dev->rom_base_reg = PCI_ROM_ADDRESS1;
+ } else if (!strcmp(type, "cardbus")) {
+ dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
+ } else {
+ dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
+ dev->rom_base_reg = PCI_ROM_ADDRESS;
+ dev->irq = NO_IRQ;
+ if (node->n_intrs > 0) {
+ dev->irq = node->intrs[0].line;
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE,
+ dev->irq);
+ }
+ }
+
+ pci_parse_of_addrs(node, dev);
+
+ pci_device_add(dev, bus);
+
+ /* XXX pci_scan_msi_device(dev); */
+
+ return dev;
+}
+EXPORT_SYMBOL(of_create_pci_dev);
+
+void __devinit of_scan_bus(struct device_node *node,
+ struct pci_bus *bus)
+{
+ struct device_node *child = NULL;
+ u32 *reg;
+ int reglen, devfn;
+ struct pci_dev *dev;
+
+ while ((child = of_get_next_child(node, child)) != NULL) {
+ reg = (u32 *) get_property(child, "reg", &reglen);
+ if (reg == NULL || reglen < 20)
+ continue;
+ devfn = (reg[0] >> 8) & 0xff;
+ /* create a new pci_dev for this device */
+ dev = of_create_pci_dev(child, bus, devfn);
+ if (!dev)
+ continue;
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+ dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+ of_scan_pci_bridge(child, dev);
+ }
+
+ do_bus_setup(bus);
+}
+EXPORT_SYMBOL(of_scan_bus);
+
+void __devinit of_scan_pci_bridge(struct device_node *node,
+ struct pci_dev *dev)
+{
+ struct pci_bus *bus;
+ u32 *busrange, *ranges;
+ int len, i, mode;
+ struct resource *res;
+ unsigned int flags;
+ u64 size;
+
+ /* parse bus-range property */
+ busrange = (u32 *) get_property(node, "bus-range", &len);
+ if (busrange == NULL || len != 8) {
+ printk(KERN_ERR "Can't get bus-range for PCI-PCI bridge %s\n",
+ node->full_name);
+ return;
+ }
+ ranges = (u32 *) get_property(node, "ranges", &len);
+ if (ranges == NULL) {
+ printk(KERN_ERR "Can't get ranges for PCI-PCI bridge %s\n",
+ node->full_name);
+ return;
+ }
+
+ bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+ if (!bus) {
+ printk(KERN_ERR "Failed to create pci bus for %s\n",
+ node->full_name);
+ return;
+ }
+
+ bus->primary = dev->bus->number;
+ bus->subordinate = busrange[1];
+ bus->bridge_ctl = 0;
+ bus->sysdata = node;
+
+ /* parse ranges property */
+ /* PCI #address-cells == 3 and #size-cells == 2 always */
+ res = &dev->resource[PCI_BRIDGE_RESOURCES];
+ for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
+ res->flags = 0;
+ bus->resource[i] = res;
+ ++res;
+ }
+ i = 1;
+ for (; len >= 32; len -= 32, ranges += 8) {
+ flags = pci_parse_of_flags(ranges[0]);
+ size = GET_64BIT(ranges, 6);
+ if (flags == 0 || size == 0)
+ continue;
+ if (flags & IORESOURCE_IO) {
+ res = bus->resource[0];
+ if (res->flags) {
+ printk(KERN_ERR "PCI: ignoring extra I/O range"
+ " for bridge %s\n", node->full_name);
+ continue;
+ }
+ } else {
+ if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
+ printk(KERN_ERR "PCI: too many memory ranges"
+ " for bridge %s\n", node->full_name);
+ continue;
+ }
+ res = bus->resource[i];
+ ++i;
+ }
+ res->start = GET_64BIT(ranges, 1);
+ res->end = res->start + size - 1;
+ res->flags = flags;
+ fixup_resource(res, dev);
+ }
+ sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
+ bus->number);
+
+ mode = PCI_PROBE_NORMAL;
+ if (ppc_md.pci_probe_mode)
+ mode = ppc_md.pci_probe_mode(bus);
+ if (mode == PCI_PROBE_DEVTREE)
+ of_scan_bus(node, bus);
+ else if (mode == PCI_PROBE_NORMAL)
+ pci_scan_child_bus(bus);
+}
+EXPORT_SYMBOL(of_scan_pci_bridge);
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+void __devinit scan_phb(struct pci_controller *hose)
+{
+ struct pci_bus *bus;
+ struct device_node *node = hose->arch_data;
+ int i, mode;
+ struct resource *res;
+
+ bus = pci_create_bus(NULL, hose->first_busno, hose->ops, node);
+ if (bus == NULL) {
+ printk(KERN_ERR "Failed to create bus for PCI domain %04x\n",
+ hose->global_number);
+ return;
+ }
+ bus->secondary = hose->first_busno;
+ hose->bus = bus;
+
+ bus->resource[0] = res = &hose->io_resource;
+ if (res->flags && request_resource(&ioport_resource, res))
+ printk(KERN_ERR "Failed to request PCI IO region "
+ "on PCI domain %04x\n", hose->global_number);
+
+ for (i = 0; i < 3; ++i) {
+ res = &hose->mem_resources[i];
+ bus->resource[i+1] = res;
+ if (res->flags && request_resource(&iomem_resource, res))
+ printk(KERN_ERR "Failed to request PCI memory region "
+ "on PCI domain %04x\n", hose->global_number);
+ }
+
+ mode = PCI_PROBE_NORMAL;
+#ifdef CONFIG_PPC_MULTIPLATFORM
+ if (ppc_md.pci_probe_mode)
+ mode = ppc_md.pci_probe_mode(bus);
+ if (mode == PCI_PROBE_DEVTREE) {
+ bus->subordinate = hose->last_busno;
+ of_scan_bus(node, bus);
+ }
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+ if (mode == PCI_PROBE_NORMAL)
+ hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
+ pci_bus_add_devices(bus);
+}
+
+static int __init pcibios_init(void)
+{
+ struct pci_controller *hose, *tmp;
+
+ /* For now, override phys_mem_access_prot. If we need it,
+ * later, we may move that initialization to each ppc_md
+ */
+ ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot;
+
+#ifdef CONFIG_PPC_ISERIES
+ iSeries_pcibios_init();
+#endif
+
+ printk("PCI: Probing PCI hardware\n");
+
+ /* Scan all of the recorded PCI controllers. */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ scan_phb(hose);
+
+#ifndef CONFIG_PPC_ISERIES
+ if (pci_probe_only)
+ pcibios_claim_of_setup();
+ else
+ /* FIXME: `else' will be removed when
+ pci_assign_unassigned_resources() is able to work
+ correctly with [partially] allocated PCI tree. */
+ pci_assign_unassigned_resources();
+#endif /* !CONFIG_PPC_ISERIES */
+
+ /* Call machine dependent final fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
+ /* Cache the location of the ISA bridge (if we have one) */
+ ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
+ if (ppc64_isabridge_dev != NULL)
+ printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev));
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+ /* map in PCI I/O space */
+ phbs_remap_io();
+#endif
+
+ printk("PCI: Probing PCI hardware done\n");
+
+ return 0;
+}
+
+subsys_initcall(pcibios_init);
+
+char __init *pcibios_setup(char *str)
+{
+ return str;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+ u16 cmd, oldcmd;
+ int i;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ oldcmd = cmd;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *res = &dev->resource[i];
+
+ /* Only set up the requested stuff */
+ if (!(mask & (1<<i)))
+ continue;
+
+ if (res->flags & IORESOURCE_IO)
+ cmd |= PCI_COMMAND_IO;
+ if (res->flags & IORESOURCE_MEM)
+ cmd |= PCI_COMMAND_MEMORY;
+ }
+
+ if (cmd != oldcmd) {
+ printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n",
+ pci_name(dev), cmd);
+ /* Enable the appropriate bits in the PCI command register. */
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+ return 0;
+}
+
+/*
+ * Return the domain number for this bus.
+ */
+int pci_domain_nr(struct pci_bus *bus)
+{
+#ifdef CONFIG_PPC_ISERIES
+ return 0;
+#else
+ struct pci_controller *hose = pci_bus_to_host(bus);
+
+ return hose->global_number;
+#endif
+}
+
+EXPORT_SYMBOL(pci_domain_nr);
+
+/* Decide whether to display the domain number in /proc */
+int pci_proc_domain(struct pci_bus *bus)
+{
+#ifdef CONFIG_PPC_ISERIES
+ return 0;
+#else
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ return hose->buid;
+#endif
+}
+
+/*
+ * Platform support for /proc/bus/pci/X/Y mmap()s,
+ * modelled on the sparc64 implementation by Dave Miller.
+ * -- paulus.
+ */
+
+/*
+ * Adjust vm_pgoff of VMA such that it is the physical page offset
+ * corresponding to the 32-bit pci bus offset for DEV requested by the user.
+ *
+ * Basically, the user finds the base address for his device which he wishes
+ * to mmap. They read the 32-bit value from the config space base register,
+ * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
+ * offset parameter of mmap on /proc/bus/pci/XXX for that device.
+ *
+ * Returns negative error code on failure, zero on success.
+ */
+static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
+ unsigned long *offset,
+ enum pci_mmap_state mmap_state)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long io_offset = 0;
+ int i, res_bit;
+
+ if (hose == 0)
+ return NULL; /* should never happen */
+
+ /* If memory, add on the PCI bridge address offset */
+ if (mmap_state == pci_mmap_mem) {
+ *offset += hose->pci_mem_offset;
+ res_bit = IORESOURCE_MEM;
+ } else {
+ io_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+ *offset += io_offset;
+ res_bit = IORESOURCE_IO;
+ }
+
+ /*
+ * Check that the offset requested corresponds to one of the
+ * resources of the device.
+ */
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ struct resource *rp = &dev->resource[i];
+ int flags = rp->flags;
+
+ /* treat ROM as memory (should be already) */
+ if (i == PCI_ROM_RESOURCE)
+ flags |= IORESOURCE_MEM;
+
+ /* Active and same type? */
+ if ((flags & res_bit) == 0)
+ continue;
+
+ /* In the range of this resource? */
+ if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
+ continue;
+
+ /* found it! construct the final physical address */
+ if (mmap_state == pci_mmap_io)
+ *offset += hose->io_base_phys - io_offset;
+ return rp;
+ }
+
+ return NULL;
+}
+
+/*
+ * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
+ * device mapping.
+ */
+static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
+ pgprot_t protection,
+ enum pci_mmap_state mmap_state,
+ int write_combine)
+{
+ unsigned long prot = pgprot_val(protection);
+
+ /* Write combine is always 0 on non-memory space mappings. On
+ * memory space, if the user didn't pass 1, we check for a
+ * "prefetchable" resource. This is a bit hackish, but we use
+ * this to workaround the inability of /sysfs to provide a write
+ * combine bit
+ */
+ if (mmap_state != pci_mmap_mem)
+ write_combine = 0;
+ else if (write_combine == 0) {
+ if (rp->flags & IORESOURCE_PREFETCH)
+ write_combine = 1;
+ }
+
+ /* XXX would be nice to have a way to ask for write-through */
+ prot |= _PAGE_NO_CACHE;
+ if (write_combine)
+ prot &= ~_PAGE_GUARDED;
+ else
+ prot |= _PAGE_GUARDED;
+
+ printk("PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start,
+ prot);
+
+ return __pgprot(prot);
+}
+
+/*
+ * This one is used by /dev/mem and fbdev who have no clue about the
+ * PCI device, it tries to find the PCI device first and calls the
+ * above routine
+ */
+pgprot_t pci_phys_mem_access_prot(struct file *file,
+ unsigned long pfn,
+ unsigned long size,
+ pgprot_t protection)
+{
+ struct pci_dev *pdev = NULL;
+ struct resource *found = NULL;
+ unsigned long prot = pgprot_val(protection);
+ unsigned long offset = pfn << PAGE_SHIFT;
+ int i;
+
+ if (page_is_ram(pfn))
+ return __pgprot(prot);
+
+ prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
+
+ for_each_pci_dev(pdev) {
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ struct resource *rp = &pdev->resource[i];
+ int flags = rp->flags;
+
+ /* Active and same type? */
+ if ((flags & IORESOURCE_MEM) == 0)
+ continue;
+ /* In the range of this resource? */
+ if (offset < (rp->start & PAGE_MASK) ||
+ offset > rp->end)
+ continue;
+ found = rp;
+ break;
+ }
+ if (found)
+ break;
+ }
+ if (found) {
+ if (found->flags & IORESOURCE_PREFETCH)
+ prot &= ~_PAGE_GUARDED;
+ pci_dev_put(pdev);
+ }
+
+ DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
+
+ return __pgprot(prot);
+}
+
+
+/*
+ * Perform the actual remap of the pages for a PCI device mapping, as
+ * appropriate for this architecture. The region in the process to map
+ * is described by vm_start and vm_end members of VMA, the base physical
+ * address is found in vm_pgoff.
+ * The pci device structure is provided so that architectures may make mapping
+ * decisions on a per-device or per-bus basis.
+ *
+ * Returns a negative error code on failure, zero on success.
+ */
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state,
+ int write_combine)
+{
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+ struct resource *rp;
+ int ret;
+
+ rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
+ if (rp == NULL)
+ return -EINVAL;
+
+ vma->vm_pgoff = offset >> PAGE_SHIFT;
+ vma->vm_flags |= VM_SHM | VM_LOCKED | VM_IO;
+ vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
+ vma->vm_page_prot,
+ mmap_state, write_combine);
+
+ ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start, vma->vm_page_prot);
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+static ssize_t pci_show_devspec(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct pci_dev *pdev;
+ struct device_node *np;
+
+ pdev = to_pci_dev (dev);
+ np = pci_device_to_OF_node(pdev);
+ if (np == NULL || np->full_name == NULL)
+ return 0;
+ return sprintf(buf, "%s", np->full_name);
+}
+static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL);
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+void pcibios_add_platform_entries(struct pci_dev *pdev)
+{
+#ifdef CONFIG_PPC_MULTIPLATFORM
+ device_create_file(&pdev->dev, &dev_attr_devspec);
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+}
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+
+#define ISA_SPACE_MASK 0x1
+#define ISA_SPACE_IO 0x1
+
+static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
+ unsigned long phb_io_base_phys,
+ void __iomem * phb_io_base_virt)
+{
+ struct isa_range *range;
+ unsigned long pci_addr;
+ unsigned int isa_addr;
+ unsigned int size;
+ int rlen = 0;
+
+ range = (struct isa_range *) get_property(isa_node, "ranges", &rlen);
+ if (range == NULL || (rlen < sizeof(struct isa_range))) {
+ printk(KERN_ERR "no ISA ranges or unexpected isa range size,"
+ "mapping 64k\n");
+ __ioremap_explicit(phb_io_base_phys,
+ (unsigned long)phb_io_base_virt,
+ 0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED);
+ return;
+ }
+
+ /* From "ISA Binding to 1275"
+ * The ranges property is laid out as an array of elements,
+ * each of which comprises:
+ * cells 0 - 1: an ISA address
+ * cells 2 - 4: a PCI address
+ * (size depending on dev->n_addr_cells)
+ * cell 5: the size of the range
+ */
+ if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) {
+ isa_addr = range->isa_addr.a_lo;
+ pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
+ range->pci_addr.a_lo;
+
+ /* Assume these are both zero */
+ if ((pci_addr != 0) || (isa_addr != 0)) {
+ printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
+ __FUNCTION__);
+ return;
+ }
+
+ size = PAGE_ALIGN(range->size);
+
+ __ioremap_explicit(phb_io_base_phys,
+ (unsigned long) phb_io_base_virt,
+ size, _PAGE_NO_CACHE | _PAGE_GUARDED);
+ }
+}
+
+void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
+ struct device_node *dev, int prim)
+{
+ unsigned int *ranges, pci_space;
+ unsigned long size;
+ int rlen = 0;
+ int memno = 0;
+ struct resource *res;
+ int np, na = prom_n_addr_cells(dev);
+ unsigned long pci_addr, cpu_phys_addr;
+
+ np = na + 5;
+
+ /* From "PCI Binding to 1275"
+ * The ranges property is laid out as an array of elements,
+ * each of which comprises:
+ * cells 0 - 2: a PCI address
+ * cells 3 or 3+4: a CPU physical address
+ * (size depending on dev->n_addr_cells)
+ * cells 4+5 or 5+6: the size of the range
+ */
+ rlen = 0;
+ hose->io_base_phys = 0;
+ ranges = (unsigned int *) get_property(dev, "ranges", &rlen);
+ while ((rlen -= np * sizeof(unsigned int)) >= 0) {
+ res = NULL;
+ pci_space = ranges[0];
+ pci_addr = ((unsigned long)ranges[1] << 32) | ranges[2];
+
+ cpu_phys_addr = ranges[3];
+ if (na >= 2)
+ cpu_phys_addr = (cpu_phys_addr << 32) | ranges[4];
+
+ size = ((unsigned long)ranges[na+3] << 32) | ranges[na+4];
+ ranges += np;
+ if (size == 0)
+ continue;
+
+ /* Now consume following elements while they are contiguous */
+ while (rlen >= np * sizeof(unsigned int)) {
+ unsigned long addr, phys;
+
+ if (ranges[0] != pci_space)
+ break;
+ addr = ((unsigned long)ranges[1] << 32) | ranges[2];
+ phys = ranges[3];
+ if (na >= 2)
+ phys = (phys << 32) | ranges[4];
+ if (addr != pci_addr + size ||
+ phys != cpu_phys_addr + size)
+ break;
+
+ size += ((unsigned long)ranges[na+3] << 32)
+ | ranges[na+4];
+ ranges += np;
+ rlen -= np * sizeof(unsigned int);
+ }
+
+ switch ((pci_space >> 24) & 0x3) {
+ case 1: /* I/O space */
+ hose->io_base_phys = cpu_phys_addr;
+ hose->pci_io_size = size;
+
+ res = &hose->io_resource;
+ res->flags = IORESOURCE_IO;
+ res->start = pci_addr;
+ DBG("phb%d: IO 0x%lx -> 0x%lx\n", hose->global_number,
+ res->start, res->start + size - 1);
+ break;
+ case 2: /* memory space */
+ memno = 0;
+ while (memno < 3 && hose->mem_resources[memno].flags)
+ ++memno;
+
+ if (memno == 0)
+ hose->pci_mem_offset = cpu_phys_addr - pci_addr;
+ if (memno < 3) {
+ res = &hose->mem_resources[memno];
+ res->flags = IORESOURCE_MEM;
+ res->start = cpu_phys_addr;
+ DBG("phb%d: MEM 0x%lx -> 0x%lx\n", hose->global_number,
+ res->start, res->start + size - 1);
+ }
+ break;
+ }
+ if (res != NULL) {
+ res->name = dev->full_name;
+ res->end = res->start + size - 1;
+ res->parent = NULL;
+ res->sibling = NULL;
+ res->child = NULL;
+ }
+ }
+}
+
+void __init pci_setup_phb_io(struct pci_controller *hose, int primary)
+{
+ unsigned long size = hose->pci_io_size;
+ unsigned long io_virt_offset;
+ struct resource *res;
+ struct device_node *isa_dn;
+
+ hose->io_base_virt = reserve_phb_iospace(size);
+ DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
+ hose->global_number, hose->io_base_phys,
+ (unsigned long) hose->io_base_virt);
+
+ if (primary) {
+ pci_io_base = (unsigned long)hose->io_base_virt;
+ isa_dn = of_find_node_by_type(NULL, "isa");
+ if (isa_dn) {
+ isa_io_base = pci_io_base;
+ pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys,
+ hose->io_base_virt);
+ of_node_put(isa_dn);
+ /* Allow all IO */
+ io_page_mask = -1;
+ }
+ }
+
+ io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+ res = &hose->io_resource;
+ res->start += io_virt_offset;
+ res->end += io_virt_offset;
+}
+
+void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose,
+ int primary)
+{
+ unsigned long size = hose->pci_io_size;
+ unsigned long io_virt_offset;
+ struct resource *res;
+
+ hose->io_base_virt = __ioremap(hose->io_base_phys, size,
+ _PAGE_NO_CACHE | _PAGE_GUARDED);
+ DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
+ hose->global_number, hose->io_base_phys,
+ (unsigned long) hose->io_base_virt);
+
+ if (primary)
+ pci_io_base = (unsigned long)hose->io_base_virt;
+
+ io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+ res = &hose->io_resource;
+ res->start += io_virt_offset;
+ res->end += io_virt_offset;
+}
+
+
+static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys,
+ unsigned long *start_virt, unsigned long *size)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct pci_bus_region region;
+ struct resource *res;
+
+ if (bus->self) {
+ res = bus->resource[0];
+ pcibios_resource_to_bus(bus->self, &region, res);
+ *start_phys = hose->io_base_phys + region.start;
+ *start_virt = (unsigned long) hose->io_base_virt +
+ region.start;
+ if (region.end > region.start)
+ *size = region.end - region.start + 1;
+ else {
+ printk("%s(): unexpected region 0x%lx->0x%lx\n",
+ __FUNCTION__, region.start, region.end);
+ return 1;
+ }
+
+ } else {
+ /* Root Bus */
+ res = &hose->io_resource;
+ *start_phys = hose->io_base_phys;
+ *start_virt = (unsigned long) hose->io_base_virt;
+ if (res->end > res->start)
+ *size = res->end - res->start + 1;
+ else {
+ printk("%s(): unexpected region 0x%lx->0x%lx\n",
+ __FUNCTION__, res->start, res->end);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int unmap_bus_range(struct pci_bus *bus)
+{
+ unsigned long start_phys;
+ unsigned long start_virt;
+ unsigned long size;
+
+ if (!bus) {
+ printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
+ return 1;
+ }
+
+ if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
+ return 1;
+ if (iounmap_explicit((void __iomem *) start_virt, size))
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(unmap_bus_range);
+
+int remap_bus_range(struct pci_bus *bus)
+{
+ unsigned long start_phys;
+ unsigned long start_virt;
+ unsigned long size;
+
+ if (!bus) {
+ printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
+ return 1;
+ }
+
+
+ if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
+ return 1;
+ printk("mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size);
+ if (__ioremap_explicit(start_phys, start_virt, size,
+ _PAGE_NO_CACHE | _PAGE_GUARDED))
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(remap_bus_range);
+
+void phbs_remap_io(void)
+{
+ struct pci_controller *hose, *tmp;
+
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ remap_bus_range(hose->bus);
+}
+
+/*
+ * ppc64 can have multifunction devices that do not respond to function 0.
+ * In this case we must scan all functions.
+ * XXX this can go now, we use the OF device tree in all the
+ * cases that caused problems. -- paulus
+ */
+int pcibios_scan_all_fns(struct pci_bus *bus, int devfn)
+{
+ return 0;
+}
+
+static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long start, end, mask, offset;
+
+ if (res->flags & IORESOURCE_IO) {
+ offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+ start = res->start += offset;
+ end = res->end += offset;
+
+ /* Need to allow IO access to pages that are in the
+ ISA range */
+ if (start < MAX_ISA_PORT) {
+ if (end > MAX_ISA_PORT)
+ end = MAX_ISA_PORT;
+
+ start >>= PAGE_SHIFT;
+ end >>= PAGE_SHIFT;
+
+ /* get the range of pages for the map */
+ mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1);
+ io_page_mask |= mask;
+ }
+ } else if (res->flags & IORESOURCE_MEM) {
+ res->start += hose->pci_mem_offset;
+ res->end += hose->pci_mem_offset;
+ }
+}
+
+void __devinit pcibios_fixup_device_resources(struct pci_dev *dev,
+ struct pci_bus *bus)
+{
+ /* Update device resources. */
+ int i;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++)
+ if (dev->resource[i].flags)
+ fixup_resource(&dev->resource[i], dev);
+}
+EXPORT_SYMBOL(pcibios_fixup_device_resources);
+
+static void __devinit do_bus_setup(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+
+ ppc_md.iommu_bus_setup(bus);
+
+ list_for_each_entry(dev, &bus->devices, bus_list)
+ ppc_md.iommu_dev_setup(dev);
+
+ if (ppc_md.irq_bus_setup)
+ ppc_md.irq_bus_setup(bus);
+}
+
+void __devinit pcibios_fixup_bus(struct pci_bus *bus)
+{
+ struct pci_dev *dev = bus->self;
+
+ if (dev && pci_probe_only &&
+ (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+ /* This is a subordinate bridge */
+
+ pci_read_bridge_bases(bus);
+ pcibios_fixup_device_resources(dev, bus);
+ }
+
+ do_bus_setup(bus);
+
+ if (!pci_probe_only)
+ return;
+
+ list_for_each_entry(dev, &bus->devices, bus_list)
+ if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+ pcibios_fixup_device_resources(dev, bus);
+}
+EXPORT_SYMBOL(pcibios_fixup_bus);
+
+/*
+ * Reads the interrupt pin to determine if interrupt is use by card.
+ * If the interrupt is used, then gets the interrupt line from the
+ * openfirmware and sets it in the pci_dev and pci_config line.
+ */
+int pci_read_irq_line(struct pci_dev *pci_dev)
+{
+ u8 intpin;
+ struct device_node *node;
+
+ pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intpin);
+ if (intpin == 0)
+ return 0;
+
+ node = pci_device_to_OF_node(pci_dev);
+ if (node == NULL)
+ return -1;
+
+ if (node->n_intrs == 0)
+ return -1;
+
+ pci_dev->irq = node->intrs[0].line;
+
+ pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq);
+
+ return 0;
+}
+EXPORT_SYMBOL(pci_read_irq_line);
+
+void pci_resource_to_user(const struct pci_dev *dev, int bar,
+ const struct resource *rsrc,
+ u64 *start, u64 *end)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long offset = 0;
+
+ if (hose == NULL)
+ return;
+
+ if (rsrc->flags & IORESOURCE_IO)
+ offset = pci_io_base - (unsigned long)hose->io_base_virt +
+ hose->io_base_phys;
+
+ *start = rsrc->start + offset;
+ *end = rsrc->end + offset;
+}
+
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+
+#define IOBASE_BRIDGE_NUMBER 0
+#define IOBASE_MEMORY 1
+#define IOBASE_IO 2
+#define IOBASE_ISA_IO 3
+#define IOBASE_ISA_MEM 4
+
+long sys_pciconfig_iobase(long which, unsigned long in_bus,
+ unsigned long in_devfn)
+{
+ struct pci_controller* hose;
+ struct list_head *ln;
+ struct pci_bus *bus = NULL;
+ struct device_node *hose_node;
+
+ /* Argh ! Please forgive me for that hack, but that's the
+ * simplest way to get existing XFree to not lockup on some
+ * G5 machines... So when something asks for bus 0 io base
+ * (bus 0 is HT root), we return the AGP one instead.
+ */
+ if (machine_is_compatible("MacRISC4"))
+ if (in_bus == 0)
+ in_bus = 0xf0;
+
+ /* That syscall isn't quite compatible with PCI domains, but it's
+ * used on pre-domains setup. We return the first match
+ */
+
+ for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) {
+ bus = pci_bus_b(ln);
+ if (in_bus >= bus->number && in_bus < (bus->number + bus->subordinate))
+ break;
+ bus = NULL;
+ }
+ if (bus == NULL || bus->sysdata == NULL)
+ return -ENODEV;
+
+ hose_node = (struct device_node *)bus->sysdata;
+ hose = PCI_DN(hose_node)->phb;
+
+ switch (which) {
+ case IOBASE_BRIDGE_NUMBER:
+ return (long)hose->first_busno;
+ case IOBASE_MEMORY:
+ return (long)hose->pci_mem_offset;
+ case IOBASE_IO:
+ return (long)hose->io_base_phys;
+ case IOBASE_ISA_IO:
+ return (long)isa_io_base;
+ case IOBASE_ISA_MEM:
+ return -EINVAL;
+ }
+
+ return -EOPNOTSUPP;
+}
diff --git a/arch/powerpc/kernel/pci_direct_iommu.c b/arch/powerpc/kernel/pci_direct_iommu.c
new file mode 100644
index 00000000000..e1a32f802c0
--- /dev/null
+++ b/arch/powerpc/kernel/pci_direct_iommu.c
@@ -0,0 +1,94 @@
+/*
+ * Support for DMA from PCI devices to main memory on
+ * machines without an iommu or with directly addressable
+ * RAM (typically a pmac with 2Gb of RAM or less)
+ *
+ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/abs_addr.h>
+#include <asm/ppc-pci.h>
+
+static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ void *ret;
+
+ ret = (void *)__get_free_pages(flag, get_order(size));
+ if (ret != NULL) {
+ memset(ret, 0, size);
+ *dma_handle = virt_to_abs(ret);
+ }
+ return ret;
+}
+
+static void pci_direct_free_coherent(struct device *hwdev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ free_pages((unsigned long)vaddr, get_order(size));
+}
+
+static dma_addr_t pci_direct_map_single(struct device *hwdev, void *ptr,
+ size_t size, enum dma_data_direction direction)
+{
+ return virt_to_abs(ptr);
+}
+
+static void pci_direct_unmap_single(struct device *hwdev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction direction)
+{
+}
+
+static int pci_direct_map_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction)
+{
+ int i;
+
+ for (i = 0; i < nents; i++, sg++) {
+ sg->dma_address = page_to_phys(sg->page) + sg->offset;
+ sg->dma_length = sg->length;
+ }
+
+ return nents;
+}
+
+static void pci_direct_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction)
+{
+}
+
+static int pci_direct_dma_supported(struct device *dev, u64 mask)
+{
+ return mask < 0x100000000ull;
+}
+
+void __init pci_direct_iommu_init(void)
+{
+ pci_dma_ops.alloc_coherent = pci_direct_alloc_coherent;
+ pci_dma_ops.free_coherent = pci_direct_free_coherent;
+ pci_dma_ops.map_single = pci_direct_map_single;
+ pci_dma_ops.unmap_single = pci_direct_unmap_single;
+ pci_dma_ops.map_sg = pci_direct_map_sg;
+ pci_dma_ops.unmap_sg = pci_direct_unmap_sg;
+ pci_dma_ops.dma_supported = pci_direct_dma_supported;
+}
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
new file mode 100644
index 00000000000..12c4c9e9bbc
--- /dev/null
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -0,0 +1,230 @@
+/*
+ * pci_dn.c
+ *
+ * Copyright (C) 2001 Todd Inglett, IBM Corporation
+ *
+ * PCI manipulation via device_nodes.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/pSeries_reconfig.h>
+#include <asm/ppc-pci.h>
+
+/*
+ * Traverse_func that inits the PCI fields of the device node.
+ * NOTE: this *must* be done before read/write config to the device.
+ */
+static void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
+{
+ struct pci_controller *phb = data;
+ int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL);
+ u32 *regs;
+ struct pci_dn *pdn;
+
+ if (mem_init_done)
+ pdn = kmalloc(sizeof(*pdn), GFP_KERNEL);
+ else
+ pdn = alloc_bootmem(sizeof(*pdn));
+ if (pdn == NULL)
+ return NULL;
+ memset(pdn, 0, sizeof(*pdn));
+ dn->data = pdn;
+ pdn->node = dn;
+ pdn->phb = phb;
+ regs = (u32 *)get_property(dn, "reg", NULL);
+ if (regs) {
+ /* First register entry is addr (00BBSS00) */
+ pdn->busno = (regs[0] >> 16) & 0xff;
+ pdn->devfn = (regs[0] >> 8) & 0xff;
+ }
+
+ pdn->pci_ext_config_space = (type && *type == 1);
+ return NULL;
+}
+
+/*
+ * Traverse a device tree stopping each PCI device in the tree.
+ * This is done depth first. As each node is processed, a "pre"
+ * function is called and the children are processed recursively.
+ *
+ * The "pre" func returns a value. If non-zero is returned from
+ * the "pre" func, the traversal stops and this value is returned.
+ * This return value is useful when using traverse as a method of
+ * finding a device.
+ *
+ * NOTE: we do not run the func for devices that do not appear to
+ * be PCI except for the start node which we assume (this is good
+ * because the start node is often a phb which may be missing PCI
+ * properties).
+ * We use the class-code as an indicator. If we run into
+ * one of these nodes we also assume its siblings are non-pci for
+ * performance.
+ */
+void *traverse_pci_devices(struct device_node *start, traverse_func pre,
+ void *data)
+{
+ struct device_node *dn, *nextdn;
+ void *ret;
+
+ /* We started with a phb, iterate all childs */
+ for (dn = start->child; dn; dn = nextdn) {
+ u32 *classp, class;
+
+ nextdn = NULL;
+ classp = (u32 *)get_property(dn, "class-code", NULL);
+ class = classp ? *classp : 0;
+
+ if (pre && ((ret = pre(dn, data)) != NULL))
+ return ret;
+
+ /* If we are a PCI bridge, go down */
+ if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI ||
+ (class >> 8) == PCI_CLASS_BRIDGE_CARDBUS))
+ /* Depth first...do children */
+ nextdn = dn->child;
+ else if (dn->sibling)
+ /* ok, try next sibling instead. */
+ nextdn = dn->sibling;
+ if (!nextdn) {
+ /* Walk up to next valid sibling. */
+ do {
+ dn = dn->parent;
+ if (dn == start)
+ return NULL;
+ } while (dn->sibling == NULL);
+ nextdn = dn->sibling;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * pci_devs_phb_init_dynamic - setup pci devices under this PHB
+ * phb: pci-to-host bridge (top-level bridge connecting to cpu)
+ *
+ * This routine is called both during boot, (before the memory
+ * subsystem is set up, before kmalloc is valid) and during the
+ * dynamic lpar operation of adding a PHB to a running system.
+ */
+void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb)
+{
+ struct device_node * dn = (struct device_node *) phb->arch_data;
+ struct pci_dn *pdn;
+
+ /* PHB nodes themselves must not match */
+ update_dn_pci_info(dn, phb);
+ pdn = dn->data;
+ if (pdn) {
+ pdn->devfn = pdn->busno = -1;
+ pdn->phb = phb;
+ }
+
+ /* Update dn->phb ptrs for new phb and children devices */
+ traverse_pci_devices(dn, update_dn_pci_info, phb);
+}
+
+/*
+ * Traversal func that looks for a <busno,devfcn> value.
+ * If found, the pci_dn is returned (thus terminating the traversal).
+ */
+static void *is_devfn_node(struct device_node *dn, void *data)
+{
+ int busno = ((unsigned long)data >> 8) & 0xff;
+ int devfn = ((unsigned long)data) & 0xff;
+ struct pci_dn *pci = dn->data;
+
+ if (pci && (devfn == pci->devfn) && (busno == pci->busno))
+ return dn;
+ return NULL;
+}
+
+/*
+ * This is the "slow" path for looking up a device_node from a
+ * pci_dev. It will hunt for the device under its parent's
+ * phb and then update sysdata for a future fastpath.
+ *
+ * It may also do fixups on the actual device since this happens
+ * on the first read/write.
+ *
+ * Note that it also must deal with devices that don't exist.
+ * In this case it may probe for real hardware ("just in case")
+ * and add a device_node to the device tree if necessary.
+ *
+ */
+struct device_node *fetch_dev_dn(struct pci_dev *dev)
+{
+ struct device_node *orig_dn = dev->sysdata;
+ struct device_node *dn;
+ unsigned long searchval = (dev->bus->number << 8) | dev->devfn;
+
+ dn = traverse_pci_devices(orig_dn, is_devfn_node, (void *)searchval);
+ if (dn)
+ dev->sysdata = dn;
+ return dn;
+}
+EXPORT_SYMBOL(fetch_dev_dn);
+
+static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
+{
+ struct device_node *np = node;
+ struct pci_dn *pci = NULL;
+ int err = NOTIFY_OK;
+
+ switch (action) {
+ case PSERIES_RECONFIG_ADD:
+ pci = np->parent->data;
+ if (pci)
+ update_dn_pci_info(np, pci->phb);
+ break;
+ default:
+ err = NOTIFY_DONE;
+ break;
+ }
+ return err;
+}
+
+static struct notifier_block pci_dn_reconfig_nb = {
+ .notifier_call = pci_dn_reconfig_notifier,
+};
+
+/**
+ * pci_devs_phb_init - Initialize phbs and pci devs under them.
+ *
+ * This routine walks over all phb's (pci-host bridges) on the
+ * system, and sets up assorted pci-related structures
+ * (including pci info in the device node structs) for each
+ * pci device found underneath. This routine runs once,
+ * early in the boot sequence.
+ */
+void __init pci_devs_phb_init(void)
+{
+ struct pci_controller *phb, *tmp;
+
+ /* This must be done first so the device nodes have valid pci info! */
+ list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
+ pci_devs_phb_init_dynamic(phb);
+
+ pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
+}
diff --git a/arch/powerpc/kernel/pci_iommu.c b/arch/powerpc/kernel/pci_iommu.c
new file mode 100644
index 00000000000..bdf15dbbf4f
--- /dev/null
+++ b/arch/powerpc/kernel/pci_iommu.c
@@ -0,0 +1,128 @@
+/*
+ * arch/ppc64/kernel/pci_iommu.c
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ *
+ * Rewrite, cleanup, new allocation schemes:
+ * Copyright (C) 2004 Olof Johansson, IBM Corporation
+ *
+ * Dynamic DMA mapping support, platform-independent parts.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+
+/*
+ * We can use ->sysdata directly and avoid the extra work in
+ * pci_device_to_OF_node since ->sysdata will have been initialised
+ * in the iommu init code for all devices.
+ */
+#define PCI_GET_DN(dev) ((struct device_node *)((dev)->sysdata))
+
+static inline struct iommu_table *devnode_table(struct device *dev)
+{
+ struct pci_dev *pdev;
+
+ if (!dev) {
+ pdev = ppc64_isabridge_dev;
+ if (!pdev)
+ return NULL;
+ } else
+ pdev = to_pci_dev(dev);
+
+ return PCI_DN(PCI_GET_DN(pdev))->iommu_table;
+}
+
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+static void *pci_iommu_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ return iommu_alloc_coherent(devnode_table(hwdev), size, dma_handle,
+ flag);
+}
+
+static void pci_iommu_free_coherent(struct device *hwdev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ iommu_free_coherent(devnode_table(hwdev), size, vaddr, dma_handle);
+}
+
+/* Creates TCEs for a user provided buffer. The user buffer must be
+ * contiguous real kernel storage (not vmalloc). The address of the buffer
+ * passed here is the kernel (virtual) address of the buffer. The buffer
+ * need not be page aligned, the dma_addr_t returned will point to the same
+ * byte within the page as vaddr.
+ */
+static dma_addr_t pci_iommu_map_single(struct device *hwdev, void *vaddr,
+ size_t size, enum dma_data_direction direction)
+{
+ return iommu_map_single(devnode_table(hwdev), vaddr, size, direction);
+}
+
+
+static void pci_iommu_unmap_single(struct device *hwdev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction)
+{
+ iommu_unmap_single(devnode_table(hwdev), dma_handle, size, direction);
+}
+
+
+static int pci_iommu_map_sg(struct device *pdev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
+{
+ return iommu_map_sg(pdev, devnode_table(pdev), sglist,
+ nelems, direction);
+}
+
+static void pci_iommu_unmap_sg(struct device *pdev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
+{
+ iommu_unmap_sg(devnode_table(pdev), sglist, nelems, direction);
+}
+
+/* We support DMA to/from any memory page via the iommu */
+static int pci_iommu_dma_supported(struct device *dev, u64 mask)
+{
+ return 1;
+}
+
+void pci_iommu_init(void)
+{
+ pci_dma_ops.alloc_coherent = pci_iommu_alloc_coherent;
+ pci_dma_ops.free_coherent = pci_iommu_free_coherent;
+ pci_dma_ops.map_single = pci_iommu_map_single;
+ pci_dma_ops.unmap_single = pci_iommu_unmap_single;
+ pci_dma_ops.map_sg = pci_iommu_map_sg;
+ pci_dma_ops.unmap_sg = pci_iommu_unmap_sg;
+ pci_dma_ops.dma_supported = pci_iommu_dma_supported;
+}
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 47d6f7e2ea9..5dcf4ba05ee 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -44,6 +44,7 @@
#include <asm/cputable.h>
#include <asm/btext.h>
#include <asm/div64.h>
+#include <asm/signal.h>
#ifdef CONFIG_8xx
#include <asm/commproc.h>
@@ -56,7 +57,6 @@ extern void machine_check_exception(struct pt_regs *regs);
extern void alignment_exception(struct pt_regs *regs);
extern void program_check_exception(struct pt_regs *regs);
extern void single_step_exception(struct pt_regs *regs);
-extern int do_signal(sigset_t *, struct pt_regs *);
extern int pmac_newworld;
extern int sys_sigreturn(struct pt_regs *regs);
@@ -188,9 +188,6 @@ EXPORT_SYMBOL(adb_try_handler_change);
EXPORT_SYMBOL(cuda_request);
EXPORT_SYMBOL(cuda_poll);
#endif /* CONFIG_ADB_CUDA */
-#if defined(CONFIG_PPC_MULTIPLATFORM) && defined(CONFIG_PPC32)
-EXPORT_SYMBOL(_machine);
-#endif
#ifdef CONFIG_PPC_PMAC
EXPORT_SYMBOL(sys_ctrler);
#endif
diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c
new file mode 100644
index 00000000000..7ba42a405f4
--- /dev/null
+++ b/arch/powerpc/kernel/proc_ppc64.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+#include <asm/vdso_datapage.h>
+#include <asm/rtas.h>
+#include <asm/uaccess.h>
+#include <asm/prom.h>
+
+static loff_t page_map_seek( struct file *file, loff_t off, int whence);
+static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
+ loff_t *ppos);
+static int page_map_mmap( struct file *file, struct vm_area_struct *vma );
+
+static struct file_operations page_map_fops = {
+ .llseek = page_map_seek,
+ .read = page_map_read,
+ .mmap = page_map_mmap
+};
+
+/*
+ * Create the ppc64 and ppc64/rtas directories early. This allows us to
+ * assume that they have been previously created in drivers.
+ */
+static int __init proc_ppc64_create(void)
+{
+ struct proc_dir_entry *root;
+
+ root = proc_mkdir("ppc64", NULL);
+ if (!root)
+ return 1;
+
+ if (!(platform_is_pseries() || _machine == PLATFORM_CELL))
+ return 0;
+
+ if (!proc_mkdir("rtas", root))
+ return 1;
+
+ if (!proc_symlink("rtas", NULL, "ppc64/rtas"))
+ return 1;
+
+ return 0;
+}
+core_initcall(proc_ppc64_create);
+
+static int __init proc_ppc64_init(void)
+{
+ struct proc_dir_entry *pde;
+
+ pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL);
+ if (!pde)
+ return 1;
+ pde->nlink = 1;
+ pde->data = vdso_data;
+ pde->size = PAGE_SIZE;
+ pde->proc_fops = &page_map_fops;
+
+ return 0;
+}
+__initcall(proc_ppc64_init);
+
+static loff_t page_map_seek( struct file *file, loff_t off, int whence)
+{
+ loff_t new;
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+
+ switch(whence) {
+ case 0:
+ new = off;
+ break;
+ case 1:
+ new = file->f_pos + off;
+ break;
+ case 2:
+ new = dp->size + off;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if ( new < 0 || new > dp->size )
+ return -EINVAL;
+ return (file->f_pos = new);
+}
+
+static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
+ loff_t *ppos)
+{
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+ return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size);
+}
+
+static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
+{
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+
+ vma->vm_flags |= VM_SHM | VM_LOCKED;
+
+ if ((vma->vm_end - vma->vm_start) > dp->size)
+ return -EINVAL;
+
+ remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT,
+ dp->size, vma->vm_page_prot);
+ return 0;
+}
+
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 96843211cc5..de69fb37c73 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -46,10 +46,10 @@
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/prom.h>
+#include <asm/machdep.h>
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#include <asm/time.h>
-#include <asm/machdep.h>
#endif
extern unsigned long _get_SP(void);
@@ -203,10 +203,8 @@ int dump_spe(struct pt_regs *regs, elf_vrregset_t *evrregs)
int set_dabr(unsigned long dabr)
{
-#ifdef CONFIG_PPC64
if (ppc_md.set_dabr)
return ppc_md.set_dabr(dabr);
-#endif
mtspr(SPRN_DABR, dabr);
return 0;
@@ -554,12 +552,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
#ifdef CONFIG_PPC64
if (cpu_has_feature(CPU_FTR_SLB)) {
unsigned long sp_vsid = get_kernel_vsid(sp);
+ unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
sp_vsid <<= SLB_VSID_SHIFT;
- sp_vsid |= SLB_VSID_KERNEL;
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- sp_vsid |= SLB_VSID_L;
-
+ sp_vsid |= SLB_VSID_KERNEL | llp;
p->thread.ksp_vsid = sp_vsid;
}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index eec2da69550..3bf968e7409 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -48,9 +48,6 @@
#include <asm/machdep.h>
#include <asm/pSeries_reconfig.h>
#include <asm/pci-bridge.h>
-#ifdef CONFIG_PPC64
-#include <asm/systemcfg.h>
-#endif
#ifdef DEBUG
#define DBG(fmt...) printk(KERN_ERR fmt)
@@ -74,10 +71,6 @@ struct isa_reg_property {
typedef int interpret_func(struct device_node *, unsigned long *,
int, int, int);
-extern struct rtas_t rtas;
-extern struct lmb lmb;
-extern unsigned long klimit;
-
static int __initdata dt_root_addr_cells;
static int __initdata dt_root_size_cells;
@@ -391,7 +384,7 @@ static int __devinit finish_node_interrupts(struct device_node *np,
#ifdef CONFIG_PPC64
/* We offset irq numbers for the u3 MPIC by 128 in PowerMac */
- if (systemcfg->platform == PLATFORM_POWERMAC && ic && ic->parent) {
+ if (_machine == PLATFORM_POWERMAC && ic && ic->parent) {
char *name = get_property(ic->parent, "name", NULL);
if (name && !strcmp(name, "u3"))
np->intrs[intrcount].line += 128;
@@ -724,10 +717,10 @@ static inline char *find_flat_dt_string(u32 offset)
* used to extract the memory informations at boot before we can
* unflatten the tree
*/
-static int __init scan_flat_dt(int (*it)(unsigned long node,
- const char *uname, int depth,
- void *data),
- void *data)
+int __init of_scan_flat_dt(int (*it)(unsigned long node,
+ const char *uname, int depth,
+ void *data),
+ void *data)
{
unsigned long p = ((unsigned long)initial_boot_params) +
initial_boot_params->off_dt_struct;
@@ -784,8 +777,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node,
* This function can be used within scan_flattened_dt callback to get
* access to properties
*/
-static void* __init get_flat_dt_prop(unsigned long node, const char *name,
- unsigned long *size)
+void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
+ unsigned long *size)
{
unsigned long p = node;
@@ -1087,27 +1080,14 @@ void __init unflatten_device_tree(void)
static int __init early_init_dt_scan_cpus(unsigned long node,
const char *uname, int depth, void *data)
{
- char *type = get_flat_dt_prop(node, "device_type", NULL);
u32 *prop;
- unsigned long size = 0;
+ unsigned long size;
+ char *type = of_get_flat_dt_prop(node, "device_type", &size);
/* We are scanning "cpu" nodes only */
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
-#ifdef CONFIG_PPC_PSERIES
- /* On LPAR, look for the first ibm,pft-size property for the hash table size
- */
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) {
- u32 *pft_size;
- pft_size = get_flat_dt_prop(node, "ibm,pft-size", NULL);
- if (pft_size != NULL) {
- /* pft_size[0] is the NUMA CEC cookie */
- ppc64_pft_size = pft_size[1];
- }
- }
-#endif
-
boot_cpuid = 0;
boot_cpuid_phys = 0;
if (initial_boot_params && initial_boot_params->version >= 2) {
@@ -1117,8 +1097,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
boot_cpuid_phys = initial_boot_params->boot_cpuid_phys;
} else {
/* Check if it's the boot-cpu, set it's hw index now */
- if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) {
- prop = get_flat_dt_prop(node, "reg", NULL);
+ if (of_get_flat_dt_prop(node,
+ "linux,boot-cpu", NULL) != NULL) {
+ prop = of_get_flat_dt_prop(node, "reg", NULL);
if (prop != NULL)
boot_cpuid_phys = *prop;
}
@@ -1127,14 +1108,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
#ifdef CONFIG_ALTIVEC
/* Check if we have a VMX and eventually update CPU features */
- prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", &size);
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", NULL);
if (prop && (*prop) > 0) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
}
/* Same goes for Apple's "altivec" property */
- prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL);
+ prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL);
if (prop) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
@@ -1147,7 +1128,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* this by looking at the size of the ibm,ppc-interrupt-server#s
* property
*/
- prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
&size);
cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
if (prop && ((size / sizeof(u32)) > 1))
@@ -1170,34 +1151,30 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
return 0;
/* get platform type */
- prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL);
+ prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
if (prop == NULL)
return 0;
-#ifdef CONFIG_PPC64
- systemcfg->platform = *prop;
-#else
#ifdef CONFIG_PPC_MULTIPLATFORM
_machine = *prop;
#endif
-#endif
#ifdef CONFIG_PPC64
/* check if iommu is forced on or off */
- if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
+ if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
iommu_is_off = 1;
- if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
+ if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
iommu_force_on = 1;
#endif
- lprop = get_flat_dt_prop(node, "linux,memory-limit", NULL);
+ lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
if (lprop)
memory_limit = *lprop;
#ifdef CONFIG_PPC64
- lprop = get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
+ lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
if (lprop)
tce_alloc_start = *lprop;
- lprop = get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
+ lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
if (lprop)
tce_alloc_end = *lprop;
#endif
@@ -1209,9 +1186,9 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
{
u64 *basep, *entryp;
- basep = get_flat_dt_prop(node, "linux,rtas-base", NULL);
- entryp = get_flat_dt_prop(node, "linux,rtas-entry", NULL);
- prop = get_flat_dt_prop(node, "linux,rtas-size", NULL);
+ basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL);
+ entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
+ prop = of_get_flat_dt_prop(node, "linux,rtas-size", NULL);
if (basep && entryp && prop) {
rtas.base = *basep;
rtas.entry = *entryp;
@@ -1232,11 +1209,11 @@ static int __init early_init_dt_scan_root(unsigned long node,
if (depth != 0)
return 0;
- prop = get_flat_dt_prop(node, "#size-cells", NULL);
+ prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
dt_root_size_cells = (prop == NULL) ? 1 : *prop;
DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
- prop = get_flat_dt_prop(node, "#address-cells", NULL);
+ prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
@@ -1271,15 +1248,22 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
static int __init early_init_dt_scan_memory(unsigned long node,
const char *uname, int depth, void *data)
{
- char *type = get_flat_dt_prop(node, "device_type", NULL);
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
cell_t *reg, *endp;
unsigned long l;
/* We are scanning "memory" nodes only */
- if (type == NULL || strcmp(type, "memory") != 0)
+ if (type == NULL) {
+ /*
+ * The longtrail doesn't have a device_type on the
+ * /memory node, so look for the node called /memory@0.
+ */
+ if (depth != 1 || strcmp(uname, "memory@0") != 0)
+ return 0;
+ } else if (strcmp(type, "memory") != 0)
return 0;
- reg = (cell_t *)get_flat_dt_prop(node, "reg", &l);
+ reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
if (reg == NULL)
return 0;
@@ -1343,17 +1327,14 @@ void __init early_init_devtree(void *params)
* device-tree, including the platform type, initrd location and
* size, TCE reserve, and more ...
*/
- scan_flat_dt(early_init_dt_scan_chosen, NULL);
+ of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
/* Scan memory nodes and rebuild LMBs */
lmb_init();
- scan_flat_dt(early_init_dt_scan_root, NULL);
- scan_flat_dt(early_init_dt_scan_memory, NULL);
+ of_scan_flat_dt(early_init_dt_scan_root, NULL);
+ of_scan_flat_dt(early_init_dt_scan_memory, NULL);
lmb_enforce_memory_limit(memory_limit);
lmb_analyze();
-#ifdef CONFIG_PPC64
- systemcfg->physicalMemorySize = lmb_phys_mem_size();
-#endif
lmb_reserve(0, __pa(klimit));
DBG("Phys. mem: %lx\n", lmb_phys_mem_size());
@@ -1363,10 +1344,10 @@ void __init early_init_devtree(void *params)
DBG("Scanning CPUs ...\n");
- /* Retreive hash table size from flattened tree plus other
- * CPU related informations (altivec support, boot CPU ID, ...)
+ /* Retreive CPU related informations from the flat tree
+ * (altivec support, boot CPU ID, ...)
*/
- scan_flat_dt(early_init_dt_scan_cpus, NULL);
+ of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
DBG(" <- early_init_devtree()\n");
}
@@ -1387,6 +1368,7 @@ prom_n_addr_cells(struct device_node* np)
/* No #address-cells property for the root node, default to 1 */
return 1;
}
+EXPORT_SYMBOL(prom_n_addr_cells);
int
prom_n_size_cells(struct device_node* np)
@@ -1402,6 +1384,7 @@ prom_n_size_cells(struct device_node* np)
/* No #size-cells property for the root node, default to 1 */
return 1;
}
+EXPORT_SYMBOL(prom_n_size_cells);
/**
* Work out the sense (active-low level / active-high edge)
@@ -1920,7 +1903,7 @@ static int of_finish_dynamic_node(struct device_node *node,
/* We don't support that function on PowerMac, at least
* not yet
*/
- if (systemcfg->platform == PLATFORM_POWERMAC)
+ if (_machine == PLATFORM_POWERMAC)
return -ENODEV;
/* fix up new node's linux_phandle field */
@@ -1986,14 +1969,31 @@ EXPORT_SYMBOL(get_property);
/*
* Add a property to a node
*/
-void prom_add_property(struct device_node* np, struct property* prop)
+int prom_add_property(struct device_node* np, struct property* prop)
{
- struct property **next = &np->properties;
+ struct property **next;
prop->next = NULL;
- while (*next)
+ write_lock(&devtree_lock);
+ next = &np->properties;
+ while (*next) {
+ if (strcmp(prop->name, (*next)->name) == 0) {
+ /* duplicate ! don't insert it */
+ write_unlock(&devtree_lock);
+ return -1;
+ }
next = &(*next)->next;
+ }
*next = prop;
+ write_unlock(&devtree_lock);
+
+#ifdef CONFIG_PROC_DEVICETREE
+ /* try to add to proc as well if it was initialized */
+ if (np->pde)
+ proc_device_tree_add_prop(np->pde, prop);
+#endif /* CONFIG_PROC_DEVICETREE */
+
+ return 0;
}
/* I quickly hacked that one, check against spec ! */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index c758b6624d7..4ce0105c308 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -94,11 +94,17 @@ extern const struct linux_logo logo_linux_clut224;
#ifdef CONFIG_PPC64
#define RELOC(x) (*PTRRELOC(&(x)))
#define ADDR(x) (u32) add_reloc_offset((unsigned long)(x))
+#define OF_WORKAROUNDS 0
#else
#define RELOC(x) (x)
#define ADDR(x) (u32) (x)
+#define OF_WORKAROUNDS of_workarounds
+int of_workarounds;
#endif
+#define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */
+#define OF_WA_LONGTRAIL 2 /* work around longtrail bugs */
+
#define PROM_BUG() do { \
prom_printf("kernel BUG at %s line 0x%x!\n", \
RELOC(__FILE__), __LINE__); \
@@ -111,11 +117,6 @@ extern const struct linux_logo logo_linux_clut224;
#define prom_debug(x...)
#endif
-#ifdef CONFIG_PPC32
-#define PLATFORM_POWERMAC _MACH_Pmac
-#define PLATFORM_CHRP _MACH_chrp
-#endif
-
typedef u32 prom_arg_t;
@@ -128,10 +129,11 @@ struct prom_args {
struct prom_t {
ihandle root;
- ihandle chosen;
+ phandle chosen;
int cpu;
ihandle stdout;
ihandle mmumap;
+ ihandle memory;
};
struct mem_map_entry {
@@ -360,16 +362,36 @@ static void __init prom_printf(const char *format, ...)
static unsigned int __init prom_claim(unsigned long virt, unsigned long size,
unsigned long align)
{
- int ret;
struct prom_t *_prom = &RELOC(prom);
- ret = call_prom("claim", 3, 1, (prom_arg_t)virt, (prom_arg_t)size,
- (prom_arg_t)align);
- if (ret != -1 && _prom->mmumap != 0)
- /* old pmacs need us to map as well */
+ if (align == 0 && (OF_WORKAROUNDS & OF_WA_CLAIM)) {
+ /*
+ * Old OF requires we claim physical and virtual separately
+ * and then map explicitly (assuming virtual mode)
+ */
+ int ret;
+ prom_arg_t result;
+
+ ret = call_prom_ret("call-method", 5, 2, &result,
+ ADDR("claim"), _prom->memory,
+ align, size, virt);
+ if (ret != 0 || result == -1)
+ return -1;
+ ret = call_prom_ret("call-method", 5, 2, &result,
+ ADDR("claim"), _prom->mmumap,
+ align, size, virt);
+ if (ret != 0) {
+ call_prom("call-method", 4, 1, ADDR("release"),
+ _prom->memory, size, virt);
+ return -1;
+ }
+ /* the 0x12 is M (coherence) + PP == read/write */
call_prom("call-method", 6, 1,
- ADDR("map"), _prom->mmumap, 0, size, virt, virt);
- return ret;
+ ADDR("map"), _prom->mmumap, 0x12, size, virt, virt);
+ return virt;
+ }
+ return call_prom("claim", 3, 1, (prom_arg_t)virt, (prom_arg_t)size,
+ (prom_arg_t)align);
}
static void __init __attribute__((noreturn)) prom_panic(const char *reason)
@@ -403,23 +425,64 @@ static int __init prom_next_node(phandle *nodep)
}
}
-static int __init prom_getprop(phandle node, const char *pname,
+static int inline prom_getprop(phandle node, const char *pname,
void *value, size_t valuelen)
{
return call_prom("getprop", 4, 1, node, ADDR(pname),
(u32)(unsigned long) value, (u32) valuelen);
}
-static int __init prom_getproplen(phandle node, const char *pname)
+static int inline prom_getproplen(phandle node, const char *pname)
{
return call_prom("getproplen", 2, 1, node, ADDR(pname));
}
-static int __init prom_setprop(phandle node, const char *pname,
- void *value, size_t valuelen)
+static void add_string(char **str, const char *q)
{
- return call_prom("setprop", 4, 1, node, ADDR(pname),
- (u32)(unsigned long) value, (u32) valuelen);
+ char *p = *str;
+
+ while (*q)
+ *p++ = *q++;
+ *p++ = ' ';
+ *str = p;
+}
+
+static char *tohex(unsigned int x)
+{
+ static char digits[] = "0123456789abcdef";
+ static char result[9];
+ int i;
+
+ result[8] = 0;
+ i = 8;
+ do {
+ --i;
+ result[i] = digits[x & 0xf];
+ x >>= 4;
+ } while (x != 0 && i > 0);
+ return &result[i];
+}
+
+static int __init prom_setprop(phandle node, const char *nodename,
+ const char *pname, void *value, size_t valuelen)
+{
+ char cmd[256], *p;
+
+ if (!(OF_WORKAROUNDS & OF_WA_LONGTRAIL))
+ return call_prom("setprop", 4, 1, node, ADDR(pname),
+ (u32)(unsigned long) value, (u32) valuelen);
+
+ /* gah... setprop doesn't work on longtrail, have to use interpret */
+ p = cmd;
+ add_string(&p, "dev");
+ add_string(&p, nodename);
+ add_string(&p, tohex((u32)(unsigned long) value));
+ add_string(&p, tohex(valuelen));
+ add_string(&p, tohex(ADDR(pname)));
+ add_string(&p, tohex(strlen(RELOC(pname))));
+ add_string(&p, "property");
+ *p = 0;
+ return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd);
}
/* We can't use the standard versions because of RELOC headaches. */
@@ -980,7 +1043,7 @@ static void __init prom_instantiate_rtas(void)
rtas_inst = call_prom("open", 1, 1, ADDR("/rtas"));
if (!IHANDLE_VALID(rtas_inst)) {
- prom_printf("opening rtas package failed");
+ prom_printf("opening rtas package failed (%x)\n", rtas_inst);
return;
}
@@ -988,7 +1051,7 @@ static void __init prom_instantiate_rtas(void)
if (call_prom_ret("call-method", 3, 2, &entry,
ADDR("instantiate-rtas"),
- rtas_inst, base) == PROM_ERROR
+ rtas_inst, base) != 0
|| entry == 0) {
prom_printf(" failed\n");
return;
@@ -997,8 +1060,10 @@ static void __init prom_instantiate_rtas(void)
reserve_mem(base, size);
- prom_setprop(rtas_node, "linux,rtas-base", &base, sizeof(base));
- prom_setprop(rtas_node, "linux,rtas-entry", &entry, sizeof(entry));
+ prom_setprop(rtas_node, "/rtas", "linux,rtas-base",
+ &base, sizeof(base));
+ prom_setprop(rtas_node, "/rtas", "linux,rtas-entry",
+ &entry, sizeof(entry));
prom_debug("rtas base = 0x%x\n", base);
prom_debug("rtas entry = 0x%x\n", entry);
@@ -1089,10 +1154,6 @@ static void __init prom_initialize_tce_table(void)
if (base < local_alloc_bottom)
local_alloc_bottom = base;
- /* Save away the TCE table attributes for later use. */
- prom_setprop(node, "linux,tce-base", &base, sizeof(base));
- prom_setprop(node, "linux,tce-size", &minsize, sizeof(minsize));
-
/* It seems OF doesn't null-terminate the path :-( */
memset(path, 0, sizeof(path));
/* Call OF to setup the TCE hardware */
@@ -1101,6 +1162,10 @@ static void __init prom_initialize_tce_table(void)
prom_printf("package-to-path failed\n");
}
+ /* Save away the TCE table attributes for later use. */
+ prom_setprop(node, path, "linux,tce-base", &base, sizeof(base));
+ prom_setprop(node, path, "linux,tce-size", &minsize, sizeof(minsize));
+
prom_debug("TCE table: %s\n", path);
prom_debug("\tnode = 0x%x\n", node);
prom_debug("\tbase = 0x%x\n", base);
@@ -1342,6 +1407,7 @@ static void __init prom_init_client_services(unsigned long pp)
/*
* For really old powermacs, we need to map things we claim.
* For that, we need the ihandle of the mmu.
+ * Also, on the longtrail, we need to work around other bugs.
*/
static void __init prom_find_mmu(void)
{
@@ -1355,12 +1421,19 @@ static void __init prom_find_mmu(void)
if (prom_getprop(oprom, "model", version, sizeof(version)) <= 0)
return;
version[sizeof(version) - 1] = 0;
- prom_printf("OF version is '%s'\n", version);
/* XXX might need to add other versions here */
- if (strcmp(version, "Open Firmware, 1.0.5") != 0)
+ if (strcmp(version, "Open Firmware, 1.0.5") == 0)
+ of_workarounds = OF_WA_CLAIM;
+ else if (strncmp(version, "FirmWorks,3.", 12) == 0) {
+ of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL;
+ call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim");
+ } else
return;
+ _prom->memory = call_prom("open", 1, 1, ADDR("/memory"));
prom_getprop(_prom->chosen, "mmu", &_prom->mmumap,
sizeof(_prom->mmumap));
+ if (!IHANDLE_VALID(_prom->memory) || !IHANDLE_VALID(_prom->mmumap))
+ of_workarounds &= ~OF_WA_CLAIM; /* hmmm */
}
#else
#define prom_find_mmu()
@@ -1382,16 +1455,17 @@ static void __init prom_init_stdout(void)
memset(path, 0, 256);
call_prom("instance-to-path", 3, 1, _prom->stdout, path, 255);
val = call_prom("instance-to-package", 1, 1, _prom->stdout);
- prom_setprop(_prom->chosen, "linux,stdout-package", &val, sizeof(val));
+ prom_setprop(_prom->chosen, "/chosen", "linux,stdout-package",
+ &val, sizeof(val));
prom_printf("OF stdout device is: %s\n", RELOC(of_stdout_device));
- prom_setprop(_prom->chosen, "linux,stdout-path",
- RELOC(of_stdout_device), strlen(RELOC(of_stdout_device))+1);
+ prom_setprop(_prom->chosen, "/chosen", "linux,stdout-path",
+ path, strlen(path) + 1);
/* If it's a display, note it */
memset(type, 0, sizeof(type));
prom_getprop(val, "device_type", type, sizeof(type));
if (strcmp(type, RELOC("display")) == 0)
- prom_setprop(val, "linux,boot-display", NULL, 0);
+ prom_setprop(val, path, "linux,boot-display", NULL, 0);
}
static void __init prom_close_stdin(void)
@@ -1408,8 +1482,9 @@ static int __init prom_find_machine_type(void)
struct prom_t *_prom = &RELOC(prom);
char compat[256];
int len, i = 0;
+#ifdef CONFIG_PPC64
phandle rtas;
-
+#endif
len = prom_getprop(_prom->root, "compatible",
compat, sizeof(compat)-1);
if (len > 0) {
@@ -1513,7 +1588,7 @@ static void __init prom_check_displays(void)
/* Success */
prom_printf("done\n");
- prom_setprop(node, "linux,opened", NULL, 0);
+ prom_setprop(node, path, "linux,opened", NULL, 0);
/* Setup a usable color table when the appropriate
* method is available. Should update this to set-colors */
@@ -1872,7 +1947,7 @@ static void __init fixup_device_tree(void)
if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev))
== PROM_ERROR)
return;
- if (u3_rev != 0x35 && u3_rev != 0x37)
+ if (u3_rev < 0x35 || u3_rev > 0x39)
return;
/* does it need fixup ? */
if (prom_getproplen(i2c, "interrupts") > 0)
@@ -1883,9 +1958,11 @@ static void __init fixup_device_tree(void)
/* interrupt on this revision of u3 is number 0 and level */
interrupts[0] = 0;
interrupts[1] = 1;
- prom_setprop(i2c, "interrupts", &interrupts, sizeof(interrupts));
+ prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupts",
+ &interrupts, sizeof(interrupts));
parent = (u32)mpic;
- prom_setprop(i2c, "interrupt-parent", &parent, sizeof(parent));
+ prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupt-parent",
+ &parent, sizeof(parent));
#endif
}
@@ -1921,11 +1998,11 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4;
val = RELOC(prom_initrd_start);
- prom_setprop(_prom->chosen, "linux,initrd-start", &val,
- sizeof(val));
+ prom_setprop(_prom->chosen, "/chosen", "linux,initrd-start",
+ &val, sizeof(val));
val = RELOC(prom_initrd_end);
- prom_setprop(_prom->chosen, "linux,initrd-end", &val,
- sizeof(val));
+ prom_setprop(_prom->chosen, "/chosen", "linux,initrd-end",
+ &val, sizeof(val));
reserve_mem(RELOC(prom_initrd_start),
RELOC(prom_initrd_end) - RELOC(prom_initrd_start));
@@ -1968,14 +2045,15 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
prom_init_client_services(pp);
/*
- * Init prom stdout device
+ * See if this OF is old enough that we need to do explicit maps
+ * and other workarounds
*/
- prom_init_stdout();
+ prom_find_mmu();
/*
- * See if this OF is old enough that we need to do explicit maps
+ * Init prom stdout device
*/
- prom_find_mmu();
+ prom_init_stdout();
/*
* Check for an initrd
@@ -1988,14 +2066,15 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*/
RELOC(of_platform) = prom_find_machine_type();
getprop_rval = RELOC(of_platform);
- prom_setprop(_prom->chosen, "linux,platform",
+ prom_setprop(_prom->chosen, "/chosen", "linux,platform",
&getprop_rval, sizeof(getprop_rval));
#ifdef CONFIG_PPC_PSERIES
/*
* On pSeries, inform the firmware about our capabilities
*/
- if (RELOC(of_platform) & PLATFORM_PSERIES)
+ if (RELOC(of_platform) == PLATFORM_PSERIES ||
+ RELOC(of_platform) == PLATFORM_PSERIES_LPAR)
prom_send_capabilities();
#endif
@@ -2049,21 +2128,23 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
* Fill in some infos for use by the kernel later on
*/
if (RELOC(prom_memory_limit))
- prom_setprop(_prom->chosen, "linux,memory-limit",
+ prom_setprop(_prom->chosen, "/chosen", "linux,memory-limit",
&RELOC(prom_memory_limit),
sizeof(prom_memory_limit));
#ifdef CONFIG_PPC64
if (RELOC(ppc64_iommu_off))
- prom_setprop(_prom->chosen, "linux,iommu-off", NULL, 0);
+ prom_setprop(_prom->chosen, "/chosen", "linux,iommu-off",
+ NULL, 0);
if (RELOC(iommu_force_on))
- prom_setprop(_prom->chosen, "linux,iommu-force-on", NULL, 0);
+ prom_setprop(_prom->chosen, "/chosen", "linux,iommu-force-on",
+ NULL, 0);
if (RELOC(prom_tce_alloc_start)) {
- prom_setprop(_prom->chosen, "linux,tce-alloc-start",
+ prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-start",
&RELOC(prom_tce_alloc_start),
sizeof(prom_tce_alloc_start));
- prom_setprop(_prom->chosen, "linux,tce-alloc-end",
+ prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-end",
&RELOC(prom_tce_alloc_end),
sizeof(prom_tce_alloc_end));
}
@@ -2080,8 +2161,13 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
prom_printf("copying OF device tree ...\n");
flatten_device_tree();
- /* in case stdin is USB and still active on IBM machines... */
- prom_close_stdin();
+ /*
+ * in case stdin is USB and still active on IBM machines...
+ * Unfortunately quiesce crashes on some powermacs if we have
+ * closed stdin already (in particular the powerbook 101).
+ */
+ if (RELOC(of_platform) != PLATFORM_POWERMAC)
+ prom_close_stdin();
/*
* Call OF "quiesce" method to shut down pending DMA's from
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 568ea335d61..3d2abd95c7a 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -248,46 +248,10 @@ void ptrace_disable(struct task_struct *child)
clear_single_step(child);
}
-long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
- struct task_struct *child;
int ret = -EPERM;
- lock_kernel();
- if (request == PTRACE_TRACEME) {
- /* are we already being traced? */
- if (current->ptrace & PT_PTRACED)
- goto out;
- ret = security_ptrace(current->parent, current);
- if (ret)
- goto out;
- /* set the ptrace bit in the process flags. */
- current->ptrace |= PT_PTRACED;
- ret = 0;
- goto out;
- }
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- child = find_task_by_pid(pid);
- if (child)
- get_task_struct(child);
- read_unlock(&tasklist_lock);
- if (!child)
- goto out;
-
- ret = -EPERM;
- if (pid == 1) /* you may not mess with init */
- goto out_tsk;
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
-
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out_tsk;
-
switch (request) {
/* when I and D space are separate, these will need to be fixed. */
case PTRACE_PEEKTEXT: /* read word at location addr. */
@@ -540,10 +504,7 @@ long sys_ptrace(long request, long pid, long addr, long data)
ret = ptrace_request(child, request, addr, data);
break;
}
-out_tsk:
- put_task_struct(child);
-out:
- unlock_kernel();
+
return ret;
}
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 5bdd5b079d9..7a95b8a2835 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -32,7 +32,6 @@
#include <asm/rtas.h>
#include <asm/machdep.h> /* for ppc_md */
#include <asm/time.h>
-#include <asm/systemcfg.h>
/* Token for Sensors */
#define KEY_SWITCH 0x0001
@@ -259,7 +258,7 @@ static int __init proc_rtas_init(void)
{
struct proc_dir_entry *entry;
- if (!(systemcfg->platform & PLATFORM_PSERIES))
+ if (_machine != PLATFORM_PSERIES && _machine != PLATFORM_PSERIES_LPAR)
return 1;
rtas_node = of_find_node_by_name(NULL, "rtas");
diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c
new file mode 100644
index 00000000000..7b948662704
--- /dev/null
+++ b/arch/powerpc/kernel/rtas-rtc.c
@@ -0,0 +1,105 @@
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <linux/rtc.h>
+#include <linux/delay.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/time.h>
+
+
+#define MAX_RTC_WAIT 5000 /* 5 sec */
+#define RTAS_CLOCK_BUSY (-2)
+unsigned long __init rtas_get_boot_time(void)
+{
+ int ret[8];
+ int error, wait_time;
+ unsigned long max_wait_tb;
+
+ max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+ do {
+ error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+ if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
+ wait_time = rtas_extended_busy_delay_time(error);
+ /* This is boot time so we spin. */
+ udelay(wait_time*1000);
+ error = RTAS_CLOCK_BUSY;
+ }
+ } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb));
+
+ if (error != 0 && printk_ratelimit()) {
+ printk(KERN_WARNING "error: reading the clock failed (%d)\n",
+ error);
+ return 0;
+ }
+
+ return mktime(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]);
+}
+
+/* NOTE: get_rtc_time will get an error if executed in interrupt context
+ * and if a delay is needed to read the clock. In this case we just
+ * silently return without updating rtc_tm.
+ */
+void rtas_get_rtc_time(struct rtc_time *rtc_tm)
+{
+ int ret[8];
+ int error, wait_time;
+ unsigned long max_wait_tb;
+
+ max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+ do {
+ error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+ if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
+ if (in_interrupt() && printk_ratelimit()) {
+ memset(&rtc_tm, 0, sizeof(struct rtc_time));
+ printk(KERN_WARNING "error: reading clock"
+ " would delay interrupt\n");
+ return; /* delay not allowed */
+ }
+ wait_time = rtas_extended_busy_delay_time(error);
+ msleep(wait_time);
+ error = RTAS_CLOCK_BUSY;
+ }
+ } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb));
+
+ if (error != 0 && printk_ratelimit()) {
+ printk(KERN_WARNING "error: reading the clock failed (%d)\n",
+ error);
+ return;
+ }
+
+ rtc_tm->tm_sec = ret[5];
+ rtc_tm->tm_min = ret[4];
+ rtc_tm->tm_hour = ret[3];
+ rtc_tm->tm_mday = ret[2];
+ rtc_tm->tm_mon = ret[1] - 1;
+ rtc_tm->tm_year = ret[0] - 1900;
+}
+
+int rtas_set_rtc_time(struct rtc_time *tm)
+{
+ int error, wait_time;
+ unsigned long max_wait_tb;
+
+ max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+ do {
+ error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL,
+ tm->tm_year + 1900, tm->tm_mon + 1,
+ tm->tm_mday, tm->tm_hour, tm->tm_min,
+ tm->tm_sec, 0);
+ if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
+ if (in_interrupt())
+ return 1; /* probably decrementer */
+ wait_time = rtas_extended_busy_delay_time(error);
+ msleep(wait_time);
+ error = RTAS_CLOCK_BUSY;
+ }
+ } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb));
+
+ if (error != 0 && printk_ratelimit())
+ printk(KERN_WARNING "error: setting the clock failed (%d)\n",
+ error);
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index b7fc2d88495..4283fa33f78 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -17,6 +17,7 @@
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/delay.h>
#include <asm/prom.h>
#include <asm/rtas.h>
@@ -28,9 +29,6 @@
#include <asm/delay.h>
#include <asm/uaccess.h>
#include <asm/lmb.h>
-#ifdef CONFIG_PPC64
-#include <asm/systemcfg.h>
-#endif
struct rtas_t rtas = {
.lock = SPIN_LOCK_UNLOCKED
@@ -83,7 +81,7 @@ void call_rtas_display_status_delay(unsigned char c)
while (width-- > 0)
call_rtas_display_status(' ');
width = 16;
- udelay(500000);
+ mdelay(500);
pending_newline = 1;
} else {
if (pending_newline) {
@@ -608,7 +606,6 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
return 0;
}
-#ifdef CONFIG_SMP
/* This version can't take the spinlock, because it never returns */
struct rtas_args rtas_stop_self_args = {
@@ -633,7 +630,6 @@ void rtas_stop_self(void)
panic("Alas, I survived.\n");
}
-#endif
/*
* Call early during boot, before mem init or bootmem, to retreive the RTAS
@@ -672,7 +668,7 @@ void __init rtas_initialize(void)
* the stop-self token if any
*/
#ifdef CONFIG_PPC64
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
+ if (_machine == PLATFORM_PSERIES_LPAR)
rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX);
#endif
rtas_rmo_buf = lmb_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region);
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
new file mode 100644
index 00000000000..0e5a8e11665
--- /dev/null
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -0,0 +1,513 @@
+/*
+ * arch/ppc64/kernel/rtas_pci.c
+ *
+ * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * RTAS specific routines for PCI.
+ *
+ * Based on code from pci.c, chrp_pci.c and pSeries_pci.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/rtas.h>
+#include <asm/mpic.h>
+#include <asm/ppc-pci.h>
+
+/* RTAS tokens */
+static int read_pci_config;
+static int write_pci_config;
+static int ibm_read_pci_config;
+static int ibm_write_pci_config;
+
+static inline int config_access_valid(struct pci_dn *dn, int where)
+{
+ if (where < 256)
+ return 1;
+ if (where < 4096 && dn->pci_ext_config_space)
+ return 1;
+
+ return 0;
+}
+
+static int of_device_available(struct device_node * dn)
+{
+ char * status;
+
+ status = get_property(dn, "status", NULL);
+
+ if (!status)
+ return 1;
+
+ if (!strcmp(status, "okay"))
+ return 1;
+
+ return 0;
+}
+
+static int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+{
+ int returnval = -1;
+ unsigned long buid, addr;
+ int ret;
+
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!config_access_valid(pdn, where))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
+ (pdn->devfn << 8) | (where & 0xff);
+ buid = pdn->phb->buid;
+ if (buid) {
+ ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
+ addr, BUID_HI(buid), BUID_LO(buid), size);
+ } else {
+ ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
+ }
+ *val = returnval;
+
+ if (ret)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (returnval == EEH_IO_ERROR_VALUE(size) &&
+ eeh_dn_check_failure (pdn->node, NULL))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_read_config(struct pci_bus *bus,
+ unsigned int devfn,
+ int where, int size, u32 *val)
+{
+ struct device_node *busdn, *dn;
+
+ if (bus->self)
+ busdn = pci_device_to_OF_node(bus->self);
+ else
+ busdn = bus->sysdata; /* must be a phb */
+
+ /* Search only direct children of the bus */
+ for (dn = busdn->child; dn; dn = dn->sibling) {
+ struct pci_dn *pdn = PCI_DN(dn);
+ if (pdn && pdn->devfn == devfn
+ && of_device_available(dn))
+ return rtas_read_config(pdn, where, size, val);
+ }
+
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
+{
+ unsigned long buid, addr;
+ int ret;
+
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!config_access_valid(pdn, where))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
+ (pdn->devfn << 8) | (where & 0xff);
+ buid = pdn->phb->buid;
+ if (buid) {
+ ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr,
+ BUID_HI(buid), BUID_LO(buid), size, (ulong) val);
+ } else {
+ ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val);
+ }
+
+ if (ret)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_write_config(struct pci_bus *bus,
+ unsigned int devfn,
+ int where, int size, u32 val)
+{
+ struct device_node *busdn, *dn;
+
+ if (bus->self)
+ busdn = pci_device_to_OF_node(bus->self);
+ else
+ busdn = bus->sysdata; /* must be a phb */
+
+ /* Search only direct children of the bus */
+ for (dn = busdn->child; dn; dn = dn->sibling) {
+ struct pci_dn *pdn = PCI_DN(dn);
+ if (pdn && pdn->devfn == devfn
+ && of_device_available(dn))
+ return rtas_write_config(pdn, where, size, val);
+ }
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+struct pci_ops rtas_pci_ops = {
+ rtas_pci_read_config,
+ rtas_pci_write_config
+};
+
+int is_python(struct device_node *dev)
+{
+ char *model = (char *)get_property(dev, "model", NULL);
+
+ if (model && strstr(model, "Python"))
+ return 1;
+
+ return 0;
+}
+
+static int get_phb_reg_prop(struct device_node *dev,
+ unsigned int addr_size_words,
+ struct reg_property64 *reg)
+{
+ unsigned int *ui_ptr = NULL, len;
+
+ /* Found a PHB, now figure out where his registers are mapped. */
+ ui_ptr = (unsigned int *)get_property(dev, "reg", &len);
+ if (ui_ptr == NULL)
+ return 1;
+
+ if (addr_size_words == 1) {
+ reg->address = ((struct reg_property32 *)ui_ptr)->address;
+ reg->size = ((struct reg_property32 *)ui_ptr)->size;
+ } else {
+ *reg = *((struct reg_property64 *)ui_ptr);
+ }
+
+ return 0;
+}
+
+static void python_countermeasures(struct device_node *dev,
+ unsigned int addr_size_words)
+{
+ struct reg_property64 reg_struct;
+ void __iomem *chip_regs;
+ volatile u32 val;
+
+ if (get_phb_reg_prop(dev, addr_size_words, &reg_struct))
+ return;
+
+ /* Python's register file is 1 MB in size. */
+ chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000);
+
+ /*
+ * Firmware doesn't always clear this bit which is critical
+ * for good performance - Anton
+ */
+
+#define PRG_CL_RESET_VALID 0x00010000
+
+ val = in_be32(chip_regs + 0xf6030);
+ if (val & PRG_CL_RESET_VALID) {
+ printk(KERN_INFO "Python workaround: ");
+ val &= ~PRG_CL_RESET_VALID;
+ out_be32(chip_regs + 0xf6030, val);
+ /*
+ * We must read it back for changes to
+ * take effect
+ */
+ val = in_be32(chip_regs + 0xf6030);
+ printk("reg0: %x\n", val);
+ }
+
+ iounmap(chip_regs);
+}
+
+void __init init_pci_config_tokens (void)
+{
+ read_pci_config = rtas_token("read-pci-config");
+ write_pci_config = rtas_token("write-pci-config");
+ ibm_read_pci_config = rtas_token("ibm,read-pci-config");
+ ibm_write_pci_config = rtas_token("ibm,write-pci-config");
+}
+
+unsigned long __devinit get_phb_buid (struct device_node *phb)
+{
+ int addr_cells;
+ unsigned int *buid_vals;
+ unsigned int len;
+ unsigned long buid;
+
+ if (ibm_read_pci_config == -1) return 0;
+
+ /* PHB's will always be children of the root node,
+ * or so it is promised by the current firmware. */
+ if (phb->parent == NULL)
+ return 0;
+ if (phb->parent->parent)
+ return 0;
+
+ buid_vals = (unsigned int *) get_property(phb, "reg", &len);
+ if (buid_vals == NULL)
+ return 0;
+
+ addr_cells = prom_n_addr_cells(phb);
+ if (addr_cells == 1) {
+ buid = (unsigned long) buid_vals[0];
+ } else {
+ buid = (((unsigned long)buid_vals[0]) << 32UL) |
+ (((unsigned long)buid_vals[1]) & 0xffffffff);
+ }
+ return buid;
+}
+
+static int phb_set_bus_ranges(struct device_node *dev,
+ struct pci_controller *phb)
+{
+ int *bus_range;
+ unsigned int len;
+
+ bus_range = (int *) get_property(dev, "bus-range", &len);
+ if (bus_range == NULL || len < 2 * sizeof(int)) {
+ return 1;
+ }
+
+ phb->first_busno = bus_range[0];
+ phb->last_busno = bus_range[1];
+
+ return 0;
+}
+
+static int __devinit setup_phb(struct device_node *dev,
+ struct pci_controller *phb,
+ unsigned int addr_size_words)
+{
+ pci_setup_pci_controller(phb);
+
+ if (is_python(dev))
+ python_countermeasures(dev, addr_size_words);
+
+ if (phb_set_bus_ranges(dev, phb))
+ return 1;
+
+ phb->arch_data = dev;
+ phb->ops = &rtas_pci_ops;
+ phb->buid = get_phb_buid(dev);
+
+ return 0;
+}
+
+static void __devinit add_linux_pci_domain(struct device_node *dev,
+ struct pci_controller *phb,
+ struct property *of_prop)
+{
+ memset(of_prop, 0, sizeof(struct property));
+ of_prop->name = "linux,pci-domain";
+ of_prop->length = sizeof(phb->global_number);
+ of_prop->value = (unsigned char *)&of_prop[1];
+ memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number));
+ prom_add_property(dev, of_prop);
+}
+
+static struct pci_controller * __init alloc_phb(struct device_node *dev,
+ unsigned int addr_size_words)
+{
+ struct pci_controller *phb;
+ struct property *of_prop;
+
+ phb = alloc_bootmem(sizeof(struct pci_controller));
+ if (phb == NULL)
+ return NULL;
+
+ of_prop = alloc_bootmem(sizeof(struct property) +
+ sizeof(phb->global_number));
+ if (!of_prop)
+ return NULL;
+
+ if (setup_phb(dev, phb, addr_size_words))
+ return NULL;
+
+ add_linux_pci_domain(dev, phb, of_prop);
+
+ return phb;
+}
+
+static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words)
+{
+ struct pci_controller *phb;
+
+ phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller),
+ GFP_KERNEL);
+ if (phb == NULL)
+ return NULL;
+
+ if (setup_phb(dev, phb, addr_size_words))
+ return NULL;
+
+ phb->is_dynamic = 1;
+
+ /* TODO: linux,pci-domain? */
+
+ return phb;
+}
+
+unsigned long __init find_and_init_phbs(void)
+{
+ struct device_node *node;
+ struct pci_controller *phb;
+ unsigned int root_size_cells = 0;
+ unsigned int index;
+ unsigned int *opprop = NULL;
+ struct device_node *root = of_find_node_by_path("/");
+
+ if (ppc64_interrupt_controller == IC_OPEN_PIC) {
+ opprop = (unsigned int *)get_property(root,
+ "platform-open-pic", NULL);
+ }
+
+ root_size_cells = prom_n_size_cells(root);
+
+ index = 0;
+
+ for (node = of_get_next_child(root, NULL);
+ node != NULL;
+ node = of_get_next_child(root, node)) {
+ if (node->type == NULL || strcmp(node->type, "pci") != 0)
+ continue;
+
+ phb = alloc_phb(node, root_size_cells);
+ if (!phb)
+ continue;
+
+ pci_process_bridge_OF_ranges(phb, node, 0);
+ pci_setup_phb_io(phb, index == 0);
+#ifdef CONFIG_PPC_PSERIES
+ if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) {
+ int addr = root_size_cells * (index + 2) - 1;
+ mpic_assign_isu(pSeries_mpic, index, opprop[addr]);
+ }
+#endif
+ index++;
+ }
+
+ of_node_put(root);
+ pci_devs_phb_init();
+
+ /*
+ * pci_probe_only and pci_assign_all_buses can be set via properties
+ * in chosen.
+ */
+ if (of_chosen) {
+ int *prop;
+
+ prop = (int *)get_property(of_chosen, "linux,pci-probe-only",
+ NULL);
+ if (prop)
+ pci_probe_only = *prop;
+
+ prop = (int *)get_property(of_chosen,
+ "linux,pci-assign-all-buses", NULL);
+ if (prop)
+ pci_assign_all_buses = *prop;
+ }
+
+ return 0;
+}
+
+struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
+{
+ struct device_node *root = of_find_node_by_path("/");
+ unsigned int root_size_cells = 0;
+ struct pci_controller *phb;
+ int primary;
+
+ root_size_cells = prom_n_size_cells(root);
+
+ primary = list_empty(&hose_list);
+ phb = alloc_phb_dynamic(dn, root_size_cells);
+ if (!phb)
+ return NULL;
+
+ pci_process_bridge_OF_ranges(phb, dn, primary);
+
+ pci_setup_phb_io_dynamic(phb, primary);
+ of_node_put(root);
+
+ pci_devs_phb_init_dynamic(phb);
+ scan_phb(phb);
+
+ return phb;
+}
+EXPORT_SYMBOL(init_phb_dynamic);
+
+/* RPA-specific bits for removing PHBs */
+int pcibios_remove_root_bus(struct pci_controller *phb)
+{
+ struct pci_bus *b = phb->bus;
+ struct resource *res;
+ int rc, i;
+
+ res = b->resource[0];
+ if (!res->flags) {
+ printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__,
+ b->name);
+ return 1;
+ }
+
+ rc = unmap_bus_range(b);
+ if (rc) {
+ printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+
+ if (release_resource(res)) {
+ printk(KERN_ERR "%s: failed to release IO on bus %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+
+ for (i = 1; i < 3; ++i) {
+ res = b->resource[i];
+ if (!res->flags && i == 0) {
+ printk(KERN_ERR "%s: no MEM resource for PHB %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+ if (res->flags && release_resource(res)) {
+ printk(KERN_ERR
+ "%s: failed to release IO %d on bus %s\n",
+ __FUNCTION__, i, b->name);
+ return 1;
+ }
+ }
+
+ list_del(&phb->list_node);
+ if (phb->is_dynamic)
+ kfree(phb);
+
+ return 0;
+}
+EXPORT_SYMBOL(pcibios_remove_root_bus);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index d43fa8c0e5a..33e7f2c7f19 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -33,6 +33,7 @@
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/processor.h>
+#include <asm/vdso_datapage.h>
#include <asm/pgtable.h>
#include <asm/smp.h>
#include <asm/elf.h>
@@ -51,6 +52,9 @@
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/lmb.h>
+#include <asm/xmon.h>
+
+#include "setup.h"
#undef DEBUG
@@ -60,6 +64,13 @@
#define DBG(fmt...)
#endif
+#ifdef CONFIG_PPC_MULTIPLATFORM
+int _machine = 0;
+EXPORT_SYMBOL(_machine);
+#endif
+
+unsigned long klimit = (unsigned long) _end;
+
/*
* This still seems to be needed... -- paulus
*/
@@ -405,6 +416,44 @@ static int __init set_preferred_console(void)
console_initcall(set_preferred_console);
#endif /* CONFIG_PPC_MULTIPLATFORM */
+void __init check_for_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ unsigned long *prop;
+
+ DBG(" -> check_for_initrd()\n");
+
+ if (of_chosen) {
+ prop = (unsigned long *)get_property(of_chosen,
+ "linux,initrd-start", NULL);
+ if (prop != NULL) {
+ initrd_start = (unsigned long)__va(*prop);
+ prop = (unsigned long *)get_property(of_chosen,
+ "linux,initrd-end", NULL);
+ if (prop != NULL) {
+ initrd_end = (unsigned long)__va(*prop);
+ initrd_below_start_ok = 1;
+ } else
+ initrd_start = 0;
+ }
+ }
+
+ /* If we were passed an initrd, set the ROOT_DEV properly if the values
+ * look sensible. If not, clear initrd reference.
+ */
+ if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE &&
+ initrd_end > initrd_start)
+ ROOT_DEV = Root_RAM0;
+ else
+ initrd_start = initrd_end = 0;
+
+ if (initrd_start)
+ printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
+
+ DBG(" <- check_for_initrd()\n");
+#endif /* CONFIG_BLK_DEV_INITRD */
+}
+
#ifdef CONFIG_SMP
/**
@@ -470,8 +519,8 @@ void __init smp_setup_cpu_maps(void)
* On pSeries LPAR, we need to know how many cpus
* could possibly be added to this partition.
*/
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR &&
- (dn = of_find_node_by_path("/rtas"))) {
+ if (_machine == PLATFORM_PSERIES_LPAR &&
+ (dn = of_find_node_by_path("/rtas"))) {
int num_addr_cell, num_size_cell, maxcpus;
unsigned int *ireg;
@@ -515,7 +564,27 @@ void __init smp_setup_cpu_maps(void)
cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]);
}
- systemcfg->processorCount = num_present_cpus();
+ vdso_data->processorCount = num_present_cpus();
#endif /* CONFIG_PPC64 */
}
#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_XMON
+static int __init early_xmon(char *p)
+{
+ /* ensure xmon is enabled */
+ if (p) {
+ if (strncmp(p, "on", 2) == 0)
+ xmon_init(1);
+ if (strncmp(p, "off", 3) == 0)
+ xmon_init(0);
+ if (strncmp(p, "early", 5) != 0)
+ return 0;
+ }
+ xmon_init(1);
+ debugger(NULL);
+
+ return 0;
+}
+early_param("xmon", early_xmon);
+#endif
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
new file mode 100644
index 00000000000..2ebba755272
--- /dev/null
+++ b/arch/powerpc/kernel/setup.h
@@ -0,0 +1,6 @@
+#ifndef _POWERPC_KERNEL_SETUP_H
+#define _POWERPC_KERNEL_SETUP_H
+
+void check_for_initrd(void);
+
+#endif /* _POWERPC_KERNEL_SETUP_H */
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index b45eedbb4b3..e5694335bf1 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -40,6 +40,8 @@
#include <asm/xmon.h>
#include <asm/time.h>
+#include "setup.h"
+
#define DBG(fmt...)
#if defined CONFIG_KGDB
@@ -55,10 +57,6 @@ extern void power4_idle(void);
boot_infos_t *boot_infos;
struct ide_machdep_calls ppc_ide_md;
-/* XXX should go elsewhere */
-int __irq_offset_value;
-EXPORT_SYMBOL(__irq_offset_value);
-
int boot_cpuid;
EXPORT_SYMBOL_GPL(boot_cpuid);
int boot_cpuid_phys;
@@ -70,8 +68,6 @@ unsigned int DMA_MODE_WRITE;
int have_of = 1;
#ifdef CONFIG_PPC_MULTIPLATFORM
-int _machine = 0;
-
extern void prep_init(void);
extern void pmac_init(void);
extern void chrp_init(void);
@@ -279,13 +275,13 @@ arch_initcall(ppc_init);
/* Warning, IO base is not yet inited */
void __init setup_arch(char **cmdline_p)
{
- extern char *klimit;
extern void do_init_bootmem(void);
/* so udelay does something sensible, assume <= 1000 bogomips */
loops_per_jiffy = 500000000 / HZ;
unflatten_device_tree();
+ check_for_initrd();
finish_device_tree();
smp_setup_cpu_maps();
@@ -302,14 +298,9 @@ void __init setup_arch(char **cmdline_p)
pmac_feature_init(); /* New cool way */
#endif
-#ifdef CONFIG_XMON
- xmon_map_scc();
- if (strstr(cmd_line, "xmon")) {
- xmon_init(1);
- debugger(NULL);
- }
-#endif /* CONFIG_XMON */
- if ( ppc_md.progress ) ppc_md.progress("setup_arch: enter", 0x3eab);
+#ifdef CONFIG_XMON_DEFAULT
+ xmon_init(1);
+#endif
#if defined(CONFIG_KGDB)
if (ppc_md.kgdb_map_scc)
@@ -342,7 +333,7 @@ void __init setup_arch(char **cmdline_p)
init_mm.start_code = PAGE_OFFSET;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) klimit;
+ init_mm.brk = klimit;
/* Save unparsed command line copy for /proc/cmdline */
strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6b52cce872b..608fee7c7e2 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -41,7 +41,6 @@
#include <asm/elf.h>
#include <asm/machdep.h>
#include <asm/paca.h>
-#include <asm/ppcdebug.h>
#include <asm/time.h>
#include <asm/cputable.h>
#include <asm/sections.h>
@@ -58,8 +57,11 @@
#include <asm/lmb.h>
#include <asm/iseries/it_lp_naca.h>
#include <asm/firmware.h>
-#include <asm/systemcfg.h>
#include <asm/xmon.h>
+#include <asm/udbg.h>
+#include <asm/kexec.h>
+
+#include "setup.h"
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -94,15 +96,6 @@ extern void udbg_init_maple_realmode(void);
do { udbg_putc = call_rtas_display_status_delay; } while(0)
#endif
-/* extern void *stab; */
-extern unsigned long klimit;
-
-extern void mm_init_ppc64(void);
-extern void stab_initialize(unsigned long stab);
-extern void htab_initialize(void);
-extern void early_init_devtree(void *flat_dt);
-extern void unflatten_device_tree(void);
-
int have_of = 1;
int boot_cpuid = 0;
int boot_cpuid_phys = 0;
@@ -244,12 +237,6 @@ void __init early_setup(unsigned long dt_ptr)
DBG(" -> early_setup()\n");
/*
- * Fill the default DBG level (do we want to keep
- * that old mecanism around forever ?)
- */
- ppcdbg_initialize();
-
- /*
* Do early initializations using the flattened device
* tree, like retreiving the physical memory map or
* calculating/retreiving the hash table size
@@ -260,11 +247,10 @@ void __init early_setup(unsigned long dt_ptr)
* Iterate all ppc_md structures until we find the proper
* one for the current machine type
*/
- DBG("Probing machine type for platform %x...\n",
- systemcfg->platform);
+ DBG("Probing machine type for platform %x...\n", _machine);
for (mach = machines; *mach; mach++) {
- if ((*mach)->probe(systemcfg->platform))
+ if ((*mach)->probe(_machine))
break;
}
/* What can we do if we didn't find ? */
@@ -277,20 +263,47 @@ void __init early_setup(unsigned long dt_ptr)
DBG("Found, Initializing memory management...\n");
/*
- * Initialize stab / SLB management
+ * Initialize the MMU Hash table and create the linear mapping
+ * of memory. Has to be done before stab/slb initialization as
+ * this is currently where the page size encoding is obtained
*/
- if (!firmware_has_feature(FW_FEATURE_ISERIES))
- stab_initialize(lpaca->stab_real);
+ htab_initialize();
/*
- * Initialize the MMU Hash table and create the linear mapping
- * of memory
+ * Initialize stab / SLB management except on iSeries
*/
- htab_initialize();
+ if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
+ if (cpu_has_feature(CPU_FTR_SLB))
+ slb_initialize();
+ else
+ stab_initialize(lpaca->stab_real);
+ }
DBG(" <- early_setup()\n");
}
+#ifdef CONFIG_SMP
+void early_setup_secondary(void)
+{
+ struct paca_struct *lpaca = get_paca();
+
+ /* Mark enabled in PACA */
+ lpaca->proc_enabled = 0;
+
+ /* Initialize hash table for that CPU */
+ htab_initialize_secondary();
+
+ /* Initialize STAB/SLB. We use a virtual address as it works
+ * in real mode on pSeries and we want a virutal address on
+ * iSeries anyway
+ */
+ if (cpu_has_feature(CPU_FTR_SLB))
+ slb_initialize();
+ else
+ stab_initialize(lpaca->stab_addr);
+}
+
+#endif /* CONFIG_SMP */
#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
void smp_release_cpus(void)
@@ -316,7 +329,8 @@ void smp_release_cpus(void)
#endif /* CONFIG_SMP || CONFIG_KEXEC */
/*
- * Initialize some remaining members of the ppc64_caches and systemcfg structures
+ * Initialize some remaining members of the ppc64_caches and systemcfg
+ * structures
* (at least until we get rid of them completely). This is mostly some
* cache informations about the CPU that will be used by cache flush
* routines and/or provided to userland
@@ -341,7 +355,7 @@ static void __init initialize_cache_info(void)
const char *dc, *ic;
/* Then read cache informations */
- if (systemcfg->platform == PLATFORM_POWERMAC) {
+ if (_machine == PLATFORM_POWERMAC) {
dc = "d-cache-block-size";
ic = "i-cache-block-size";
} else {
@@ -361,9 +375,8 @@ static void __init initialize_cache_info(void)
DBG("Argh, can't find dcache properties ! "
"sizep: %p, lsizep: %p\n", sizep, lsizep);
- systemcfg->dcache_size = ppc64_caches.dsize = size;
- systemcfg->dcache_line_size =
- ppc64_caches.dline_size = lsize;
+ ppc64_caches.dsize = size;
+ ppc64_caches.dline_size = lsize;
ppc64_caches.log_dline_size = __ilog2(lsize);
ppc64_caches.dlines_per_page = PAGE_SIZE / lsize;
@@ -379,60 +392,16 @@ static void __init initialize_cache_info(void)
DBG("Argh, can't find icache properties ! "
"sizep: %p, lsizep: %p\n", sizep, lsizep);
- systemcfg->icache_size = ppc64_caches.isize = size;
- systemcfg->icache_line_size =
- ppc64_caches.iline_size = lsize;
+ ppc64_caches.isize = size;
+ ppc64_caches.iline_size = lsize;
ppc64_caches.log_iline_size = __ilog2(lsize);
ppc64_caches.ilines_per_page = PAGE_SIZE / lsize;
}
}
- /* Add an eye catcher and the systemcfg layout version number */
- strcpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
- systemcfg->version.major = SYSTEMCFG_MAJOR;
- systemcfg->version.minor = SYSTEMCFG_MINOR;
- systemcfg->processor = mfspr(SPRN_PVR);
-
DBG(" <- initialize_cache_info()\n");
}
-static void __init check_for_initrd(void)
-{
-#ifdef CONFIG_BLK_DEV_INITRD
- u64 *prop;
-
- DBG(" -> check_for_initrd()\n");
-
- if (of_chosen) {
- prop = (u64 *)get_property(of_chosen,
- "linux,initrd-start", NULL);
- if (prop != NULL) {
- initrd_start = (unsigned long)__va(*prop);
- prop = (u64 *)get_property(of_chosen,
- "linux,initrd-end", NULL);
- if (prop != NULL) {
- initrd_end = (unsigned long)__va(*prop);
- initrd_below_start_ok = 1;
- } else
- initrd_start = 0;
- }
- }
-
- /* If we were passed an initrd, set the ROOT_DEV properly if the values
- * look sensible. If not, clear initrd reference.
- */
- if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE &&
- initrd_end > initrd_start)
- ROOT_DEV = Root_RAM0;
- else
- initrd_start = initrd_end = 0;
-
- if (initrd_start)
- printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
-
- DBG(" <- check_for_initrd()\n");
-#endif /* CONFIG_BLK_DEV_INITRD */
-}
/*
* Do some initial setup of the system. The parameters are those which
@@ -447,6 +416,10 @@ void __init setup_system(void)
*/
unflatten_device_tree();
+#ifdef CONFIG_KEXEC
+ kexec_setup(); /* requires unflattened device tree. */
+#endif
+
/*
* Fill the ppc64_caches & systemcfg structures with informations
* retreived from the device-tree. Need to be called before
@@ -516,16 +489,14 @@ void __init setup_system(void)
printk("-----------------------------------------------------\n");
printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size);
- printk("ppc64_debug_switch = 0x%lx\n", ppc64_debug_switch);
- printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller);
- printk("systemcfg = 0x%p\n", systemcfg);
- printk("systemcfg->platform = 0x%x\n", systemcfg->platform);
- printk("systemcfg->processorCount = 0x%lx\n", systemcfg->processorCount);
- printk("systemcfg->physicalMemorySize = 0x%lx\n", systemcfg->physicalMemorySize);
+ printk("ppc64_interrupt_controller = 0x%ld\n",
+ ppc64_interrupt_controller);
+ printk("platform = 0x%x\n", _machine);
+ printk("physicalMemorySize = 0x%lx\n", lmb_phys_mem_size());
printk("ppc64_caches.dcache_line_size = 0x%x\n",
- ppc64_caches.dline_size);
+ ppc64_caches.dline_size);
printk("ppc64_caches.icache_line_size = 0x%x\n",
- ppc64_caches.iline_size);
+ ppc64_caches.iline_size);
printk("htab_address = 0x%p\n", htab_address);
printk("htab_hash_mask = 0x%lx\n", htab_hash_mask);
printk("-----------------------------------------------------\n");
@@ -552,10 +523,12 @@ static void __init irqstack_early_init(void)
* SLB misses on them.
*/
for_each_cpu(i) {
- softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE,
- THREAD_SIZE, 0x10000000));
- hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE,
- THREAD_SIZE, 0x10000000));
+ softirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc_base(THREAD_SIZE,
+ THREAD_SIZE, 0x10000000));
+ hardirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc_base(THREAD_SIZE,
+ THREAD_SIZE, 0x10000000));
}
}
#else
@@ -583,35 +556,8 @@ static void __init emergency_stack_init(void)
limit = min(0x10000000UL, lmb.rmo_size);
for_each_cpu(i)
- paca[i].emergency_sp = __va(lmb_alloc_base(PAGE_SIZE, 128,
- limit)) + PAGE_SIZE;
-}
-
-/*
- * Called from setup_arch to initialize the bitmap of available
- * syscalls in the systemcfg page
- */
-void __init setup_syscall_map(void)
-{
- unsigned int i, count64 = 0, count32 = 0;
- extern unsigned long *sys_call_table;
- extern unsigned long sys_ni_syscall;
-
-
- for (i = 0; i < __NR_syscalls; i++) {
- if (sys_call_table[i*2] != sys_ni_syscall) {
- count64++;
- systemcfg->syscall_map_64[i >> 5] |=
- 0x80000000UL >> (i & 0x1f);
- }
- if (sys_call_table[i*2+1] != sys_ni_syscall) {
- count32++;
- systemcfg->syscall_map_32[i >> 5] |=
- 0x80000000UL >> (i & 0x1f);
- }
- }
- printk(KERN_INFO "Syscall map setup, %d 32-bit and %d 64-bit syscalls\n",
- count32, count64);
+ paca[i].emergency_sp =
+ __va(lmb_alloc_base(HW_PAGE_SIZE, 128, limit)) + HW_PAGE_SIZE;
}
/*
@@ -655,9 +601,6 @@ void __init setup_arch(char **cmdline_p)
do_init_bootmem();
sparse_init();
- /* initialize the syscall map in systemcfg */
- setup_syscall_map();
-
#ifdef CONFIG_DUMMY_CONSOLE
conswitchp = &dummy_con;
#endif
@@ -895,26 +838,6 @@ int check_legacy_ioport(unsigned long base_port)
}
EXPORT_SYMBOL(check_legacy_ioport);
-#ifdef CONFIG_XMON
-static int __init early_xmon(char *p)
-{
- /* ensure xmon is enabled */
- if (p) {
- if (strncmp(p, "on", 2) == 0)
- xmon_init(1);
- if (strncmp(p, "off", 3) == 0)
- xmon_init(0);
- if (strncmp(p, "early", 5) != 0)
- return 0;
- }
- xmon_init(1);
- debugger(NULL);
-
- return 0;
-}
-early_param("xmon", early_xmon);
-#endif
-
void cpu_die(void)
{
if (ppc_md.cpu_die)
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 876c57c1136..5a2eba60dd3 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -42,11 +42,11 @@
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
+#include <asm/sigcontext.h>
+#include <asm/vdso.h>
#ifdef CONFIG_PPC64
#include "ppc32.h"
-#include <asm/ppcdebug.h>
#include <asm/unistd.h>
-#include <asm/vdso.h>
#else
#include <asm/ucontext.h>
#include <asm/pgtable.h>
@@ -403,8 +403,6 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
ELF_NFPREG * sizeof(double)))
return 1;
- current->thread.fpscr.val = 0; /* turn off all fp exceptions */
-
#ifdef CONFIG_ALTIVEC
/* save altivec registers */
if (current->thread.used_vr) {
@@ -809,18 +807,18 @@ static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka,
/* Save user registers on the stack */
frame = &rt_sf->uc.uc_mcontext;
-#ifdef CONFIG_PPC64
if (vdso32_rt_sigtramp && current->thread.vdso_base) {
if (save_user_regs(regs, frame, 0))
goto badframe;
regs->link = current->thread.vdso_base + vdso32_rt_sigtramp;
- } else
-#endif
- {
+ } else {
if (save_user_regs(regs, frame, __NR_rt_sigreturn))
goto badframe;
regs->link = (unsigned long) frame->tramp;
}
+
+ current->thread.fpscr.val = 0; /* turn off all fp exceptions */
+
if (put_user(regs->gpr[1], (u32 __user *)newsp))
goto badframe;
regs->gpr[1] = newsp;
@@ -1090,19 +1088,18 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
|| __put_user(sig, &sc->signal))
goto badframe;
-#ifdef CONFIG_PPC64
if (vdso32_sigtramp && current->thread.vdso_base) {
if (save_user_regs(regs, &frame->mctx, 0))
goto badframe;
regs->link = current->thread.vdso_base + vdso32_sigtramp;
- } else
-#endif
- {
+ } else {
if (save_user_regs(regs, &frame->mctx, __NR_sigreturn))
goto badframe;
regs->link = (unsigned long) frame->mctx.tramp;
}
+ current->thread.fpscr.val = 0; /* turn off all fp exceptions */
+
if (put_user(regs->gpr[1], (u32 __user *)newsp))
goto badframe;
regs->gpr[1] = newsp;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index ec9d0984b6a..1decf278553 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -33,7 +33,6 @@
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-#include <asm/ppcdebug.h>
#include <asm/unistd.h>
#include <asm/cacheflush.h>
#include <asm/vdso.h>
@@ -132,9 +131,6 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
flush_fp_to_thread(current);
- /* Make sure signal doesn't get spurrious FP exceptions */
- current->thread.fpscr.val = 0;
-
#ifdef CONFIG_ALTIVEC
err |= __put_user(v_regs, &sc->v_regs);
@@ -424,6 +420,9 @@ static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info,
if (err)
goto badframe;
+ /* Make sure signal handler doesn't get spurious FP exceptions */
+ current->thread.fpscr.val = 0;
+
/* Set up to return from userspace. */
if (vdso64_rt_sigtramp && current->thread.vdso_base) {
regs->link = current->thread.vdso_base + vdso64_rt_sigtramp;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 1794a694a92..62dfc5b8d76 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -40,11 +40,11 @@
#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/time.h>
-#include <asm/xmon.h>
#include <asm/machdep.h>
#include <asm/cputable.h>
#include <asm/system.h>
#include <asm/mpic.h>
+#include <asm/vdso_datapage.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#endif
@@ -369,9 +369,11 @@ int generic_cpu_disable(void)
if (cpu == boot_cpuid)
return -EBUSY;
- systemcfg->processorCount--;
cpu_clear(cpu, cpu_online_map);
+#ifdef CONFIG_PPC64
+ vdso_data->processorCount--;
fixup_irqs(cpu_online_map);
+#endif
return 0;
}
@@ -389,9 +391,11 @@ int generic_cpu_enable(unsigned int cpu)
while (!cpu_online(cpu))
cpu_relax();
+#ifdef CONFIG_PPC64
fixup_irqs(cpu_online_map);
/* counter the irq disable in fixup_irqs */
local_irq_enable();
+#endif
return 0;
}
@@ -420,7 +424,9 @@ void generic_mach_cpu_die(void)
while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
cpu_relax();
+#ifdef CONFIG_PPC64
flush_tlb_pending();
+#endif
cpu_set(cpu, cpu_online_map);
local_irq_enable();
}
@@ -511,6 +517,7 @@ int __devinit start_secondary(void *unused)
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
+ preempt_disable();
cpu_callin_map[cpu] = 1;
smp_ops->setup_cpu(cpu);
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index a8210ed5c68..9c921d1c408 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -52,7 +52,6 @@
#include <asm/semaphore.h>
#include <asm/time.h>
#include <asm/mmu_context.h>
-#include <asm/systemcfg.h>
#include <asm/ppc-pci.h>
/* readdir & getdents */
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
new file mode 100644
index 00000000000..0f0c3a9ae2e
--- /dev/null
+++ b/arch/powerpc/kernel/sysfs.c
@@ -0,0 +1,383 @@
+#include <linux/config.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/nodemask.h>
+#include <linux/cpumask.h>
+#include <linux/notifier.h>
+
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/prom.h>
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
+/* SMT stuff */
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+/* default to snooze disabled */
+DEFINE_PER_CPU(unsigned long, smt_snooze_delay);
+
+static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf,
+ size_t count)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+ ssize_t ret;
+ unsigned long snooze;
+
+ ret = sscanf(buf, "%lu", &snooze);
+ if (ret != 1)
+ return -EINVAL;
+
+ per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
+
+ return count;
+}
+
+static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+
+ return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id));
+}
+
+static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
+ store_smt_snooze_delay);
+
+/* Only parse OF options if the matching cmdline option was not specified */
+static int smt_snooze_cmdline;
+
+static int __init smt_setup(void)
+{
+ struct device_node *options;
+ unsigned int *val;
+ unsigned int cpu;
+
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return 1;
+
+ options = find_path_device("/options");
+ if (!options)
+ return 1;
+
+ val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay",
+ NULL);
+ if (!smt_snooze_cmdline && val) {
+ for_each_cpu(cpu)
+ per_cpu(smt_snooze_delay, cpu) = *val;
+ }
+
+ return 1;
+}
+__initcall(smt_setup);
+
+static int __init setup_smt_snooze_delay(char *str)
+{
+ unsigned int cpu;
+ int snooze;
+
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return 1;
+
+ smt_snooze_cmdline = 1;
+
+ if (get_option(&str, &snooze)) {
+ for_each_cpu(cpu)
+ per_cpu(smt_snooze_delay, cpu) = snooze;
+ }
+
+ return 1;
+}
+__setup("smt-snooze-delay=", setup_smt_snooze_delay);
+
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+/*
+ * Enabling PMCs will slow partition context switch times so we only do
+ * it the first time we write to the PMCs.
+ */
+
+static DEFINE_PER_CPU(char, pmcs_enabled);
+
+void ppc64_enable_pmcs(void)
+{
+ /* Only need to enable them once */
+ if (__get_cpu_var(pmcs_enabled))
+ return;
+
+ __get_cpu_var(pmcs_enabled) = 1;
+
+ if (ppc_md.enable_pmcs)
+ ppc_md.enable_pmcs();
+}
+EXPORT_SYMBOL(ppc64_enable_pmcs);
+
+/* XXX convert to rusty's on_one_cpu */
+static unsigned long run_on_cpu(unsigned long cpu,
+ unsigned long (*func)(unsigned long),
+ unsigned long arg)
+{
+ cpumask_t old_affinity = current->cpus_allowed;
+ unsigned long ret;
+
+ /* should return -EINVAL to userspace */
+ if (set_cpus_allowed(current, cpumask_of_cpu(cpu)))
+ return 0;
+
+ ret = func(arg);
+
+ set_cpus_allowed(current, old_affinity);
+
+ return ret;
+}
+
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+static unsigned long read_##NAME(unsigned long junk) \
+{ \
+ return mfspr(ADDRESS); \
+} \
+static unsigned long write_##NAME(unsigned long val) \
+{ \
+ ppc64_enable_pmcs(); \
+ mtspr(ADDRESS, val); \
+ return 0; \
+} \
+static ssize_t show_##NAME(struct sys_device *dev, char *buf) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+ unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \
+ return sprintf(buf, "%lx\n", val); \
+} \
+static ssize_t __attribute_used__ \
+ store_##NAME(struct sys_device *dev, const char *buf, size_t count) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+ unsigned long val; \
+ int ret = sscanf(buf, "%lx", &val); \
+ if (ret != 1) \
+ return -EINVAL; \
+ run_on_cpu(cpu->sysdev.id, write_##NAME, val); \
+ return count; \
+}
+
+SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
+SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
+SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
+SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
+SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
+SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
+SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
+SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
+SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
+SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
+SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
+SYSFS_PMCSETUP(purr, SPRN_PURR);
+
+static SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0);
+static SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1);
+static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1);
+static SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2);
+static SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3);
+static SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4);
+static SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5);
+static SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6);
+static SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7);
+static SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8);
+static SYSDEV_ATTR(purr, 0600, show_purr, NULL);
+
+static void register_cpu_online(unsigned int cpu)
+{
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct sys_device *s = &c->sysdev;
+
+#ifndef CONFIG_PPC_ISERIES
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_create_file(s, &attr_smt_snooze_delay);
+#endif
+
+ /* PMC stuff */
+
+ sysdev_create_file(s, &attr_mmcr0);
+ sysdev_create_file(s, &attr_mmcr1);
+
+ if (cpu_has_feature(CPU_FTR_MMCRA))
+ sysdev_create_file(s, &attr_mmcra);
+
+ if (cur_cpu_spec->num_pmcs >= 1)
+ sysdev_create_file(s, &attr_pmc1);
+ if (cur_cpu_spec->num_pmcs >= 2)
+ sysdev_create_file(s, &attr_pmc2);
+ if (cur_cpu_spec->num_pmcs >= 3)
+ sysdev_create_file(s, &attr_pmc3);
+ if (cur_cpu_spec->num_pmcs >= 4)
+ sysdev_create_file(s, &attr_pmc4);
+ if (cur_cpu_spec->num_pmcs >= 5)
+ sysdev_create_file(s, &attr_pmc5);
+ if (cur_cpu_spec->num_pmcs >= 6)
+ sysdev_create_file(s, &attr_pmc6);
+ if (cur_cpu_spec->num_pmcs >= 7)
+ sysdev_create_file(s, &attr_pmc7);
+ if (cur_cpu_spec->num_pmcs >= 8)
+ sysdev_create_file(s, &attr_pmc8);
+
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_create_file(s, &attr_purr);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void unregister_cpu_online(unsigned int cpu)
+{
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct sys_device *s = &c->sysdev;
+
+ BUG_ON(c->no_control);
+
+#ifndef CONFIG_PPC_ISERIES
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_remove_file(s, &attr_smt_snooze_delay);
+#endif
+
+ /* PMC stuff */
+
+ sysdev_remove_file(s, &attr_mmcr0);
+ sysdev_remove_file(s, &attr_mmcr1);
+
+ if (cpu_has_feature(CPU_FTR_MMCRA))
+ sysdev_remove_file(s, &attr_mmcra);
+
+ if (cur_cpu_spec->num_pmcs >= 1)
+ sysdev_remove_file(s, &attr_pmc1);
+ if (cur_cpu_spec->num_pmcs >= 2)
+ sysdev_remove_file(s, &attr_pmc2);
+ if (cur_cpu_spec->num_pmcs >= 3)
+ sysdev_remove_file(s, &attr_pmc3);
+ if (cur_cpu_spec->num_pmcs >= 4)
+ sysdev_remove_file(s, &attr_pmc4);
+ if (cur_cpu_spec->num_pmcs >= 5)
+ sysdev_remove_file(s, &attr_pmc5);
+ if (cur_cpu_spec->num_pmcs >= 6)
+ sysdev_remove_file(s, &attr_pmc6);
+ if (cur_cpu_spec->num_pmcs >= 7)
+ sysdev_remove_file(s, &attr_pmc7);
+ if (cur_cpu_spec->num_pmcs >= 8)
+ sysdev_remove_file(s, &attr_pmc8);
+
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_remove_file(s, &attr_purr);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __devinit sysfs_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned int)(long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ register_cpu_online(cpu);
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DEAD:
+ unregister_cpu_online(cpu);
+ break;
+#endif
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __devinitdata sysfs_cpu_nb = {
+ .notifier_call = sysfs_cpu_notify,
+};
+
+/* NUMA stuff */
+
+#ifdef CONFIG_NUMA
+static struct node node_devices[MAX_NUMNODES];
+
+static void register_nodes(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ if (node_online(i)) {
+ int p_node = parent_node(i);
+ struct node *parent = NULL;
+
+ if (p_node != i)
+ parent = &node_devices[p_node];
+
+ register_node(&node_devices[i], i, parent);
+ }
+ }
+}
+#else
+static void register_nodes(void)
+{
+ return;
+}
+#endif
+
+/* Only valid if CPU is present. */
+static ssize_t show_physical_id(struct sys_device *dev, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+
+ return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id));
+}
+static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL);
+
+static int __init topology_init(void)
+{
+ int cpu;
+ struct node *parent = NULL;
+
+ register_nodes();
+
+ register_cpu_notifier(&sysfs_cpu_nb);
+
+ for_each_cpu(cpu) {
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+#ifdef CONFIG_NUMA
+ /* The node to which a cpu belongs can't be known
+ * until the cpu is made present.
+ */
+ parent = NULL;
+ if (cpu_present(cpu))
+ parent = &node_devices[cpu_to_node(cpu)];
+#endif
+ /*
+ * For now, we just see if the system supports making
+ * the RTAS calls for CPU hotplug. But, there may be a
+ * more comprehensive way to do this for an individual
+ * CPU. For instance, the boot cpu might never be valid
+ * for hotplugging.
+ */
+ if (!ppc_md.cpu_die)
+ c->no_control = 1;
+
+ if (cpu_online(cpu) || (c->no_control == 0)) {
+ register_cpu(c, cpu, parent);
+
+ sysdev_create_file(&c->sysdev, &attr_physical_id);
+ }
+
+ if (cpu_online(cpu))
+ register_cpu_online(cpu);
+ }
+
+ return 0;
+}
+__initcall(topology_init);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 6996a593dcb..070b4b458aa 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -61,14 +61,16 @@
#include <asm/prom.h>
#include <asm/irq.h>
#include <asm/div64.h>
+#include <asm/smp.h>
+#include <asm/vdso_datapage.h>
#ifdef CONFIG_PPC64
-#include <asm/systemcfg.h>
#include <asm/firmware.h>
#endif
#ifdef CONFIG_PPC_ISERIES
#include <asm/iseries/it_lp_queue.h>
#include <asm/iseries/hv_call_xm.h>
#endif
+#include <asm/smp.h>
/* keep track of when we need to update the rtc */
time_t last_rtc_update;
@@ -118,10 +120,6 @@ static unsigned adjusting_time = 0;
unsigned long ppc_proc_freq;
unsigned long ppc_tb_freq;
-#ifdef CONFIG_PPC32 /* XXX for now */
-#define boot_cpuid 0
-#endif
-
u64 tb_last_jiffy __cacheline_aligned_in_smp;
unsigned long tb_last_stamp;
@@ -263,7 +261,6 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
do_gtod.varp = temp_varp;
do_gtod.var_idx = temp_idx;
-#ifdef CONFIG_PPC64
/*
* tb_update_count is used to allow the userspace gettimeofday code
* to assure itself that it sees a consistent view of the tb_to_xs and
@@ -273,14 +270,15 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
* tb_to_xs and stamp_xsec values are consistent. If not, then it
* loops back and reads them again until this criteria is met.
*/
- ++(systemcfg->tb_update_count);
+ ++(vdso_data->tb_update_count);
smp_wmb();
- systemcfg->tb_orig_stamp = new_tb_stamp;
- systemcfg->stamp_xsec = new_stamp_xsec;
- systemcfg->tb_to_xs = new_tb_to_xs;
+ vdso_data->tb_orig_stamp = new_tb_stamp;
+ vdso_data->stamp_xsec = new_stamp_xsec;
+ vdso_data->tb_to_xs = new_tb_to_xs;
+ vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
+ vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
smp_wmb();
- ++(systemcfg->tb_update_count);
-#endif
+ ++(vdso_data->tb_update_count);
}
/*
@@ -359,8 +357,8 @@ static void iSeries_tb_recal(void)
do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
tb_to_xs = divres.result_low;
do_gtod.varp->tb_to_xs = tb_to_xs;
- systemcfg->tb_ticks_per_sec = tb_ticks_per_sec;
- systemcfg->tb_to_xs = tb_to_xs;
+ vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
+ vdso_data->tb_to_xs = tb_to_xs;
}
else {
printk( "Titan recalibrate: FAILED (difference > 4 percent)\n"
@@ -485,6 +483,8 @@ void __init smp_space_timers(unsigned int max_cpus)
unsigned long offset = tb_ticks_per_jiffy / max_cpus;
unsigned long previous_tb = per_cpu(last_jiffy, boot_cpuid);
+ /* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */
+ previous_tb -= tb_ticks_per_jiffy;
for_each_cpu(i) {
if (i != boot_cpuid) {
previous_tb += offset;
@@ -560,10 +560,8 @@ int do_settimeofday(struct timespec *tv)
new_xsec += (u64)new_sec * XSEC_PER_SEC - tb_delta_xs;
update_gtod(tb_last_jiffy, new_xsec, do_gtod.varp->tb_to_xs);
-#ifdef CONFIG_PPC64
- systemcfg->tz_minuteswest = sys_tz.tz_minuteswest;
- systemcfg->tz_dsttime = sys_tz.tz_dsttime;
-#endif
+ vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+ vdso_data->tz_dsttime = sys_tz.tz_dsttime;
write_sequnlock_irqrestore(&xtime_lock, flags);
clock_was_set();
@@ -712,13 +710,12 @@ void __init time_init(void)
do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
do_gtod.varp->tb_to_xs = tb_to_xs;
do_gtod.tb_to_us = tb_to_us;
-#ifdef CONFIG_PPC64
- systemcfg->tb_orig_stamp = tb_last_jiffy;
- systemcfg->tb_update_count = 0;
- systemcfg->tb_ticks_per_sec = tb_ticks_per_sec;
- systemcfg->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
- systemcfg->tb_to_xs = tb_to_xs;
-#endif
+
+ vdso_data->tb_orig_stamp = tb_last_jiffy;
+ vdso_data->tb_update_count = 0;
+ vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
+ vdso_data->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
+ vdso_data->tb_to_xs = tb_to_xs;
time_freq = 0;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 07e5ee40b87..1511454c469 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -39,7 +39,6 @@
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
-#include <asm/xmon.h>
#include <asm/pmc.h>
#ifdef CONFIG_PPC32
#include <asm/reg.h>
@@ -50,7 +49,6 @@
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#include <asm/processor.h>
-#include <asm/systemcfg.h>
#endif
#ifdef CONFIG_PPC64 /* XXX */
@@ -130,7 +128,7 @@ int die(const char *str, struct pt_regs *regs, long err)
nl = 1;
#endif
#ifdef CONFIG_PPC64
- switch (systemcfg->platform) {
+ switch (_machine) {
case PLATFORM_PSERIES:
printk("PSERIES ");
nl = 1;
@@ -748,22 +746,12 @@ static int check_bug_trap(struct pt_regs *regs)
return 0;
if (bug->line & BUG_WARNING_TRAP) {
/* this is a WARN_ON rather than BUG/BUG_ON */
-#ifdef CONFIG_XMON
- xmon_printf(KERN_ERR "Badness in %s at %s:%ld\n",
- bug->function, bug->file,
- bug->line & ~BUG_WARNING_TRAP);
-#endif /* CONFIG_XMON */
printk(KERN_ERR "Badness in %s at %s:%ld\n",
bug->function, bug->file,
bug->line & ~BUG_WARNING_TRAP);
dump_stack();
return 1;
}
-#ifdef CONFIG_XMON
- xmon_printf(KERN_CRIT "kernel BUG in %s at %s:%ld!\n",
- bug->function, bug->file, bug->line);
- xmon(regs);
-#endif /* CONFIG_XMON */
printk(KERN_CRIT "kernel BUG in %s at %s:%ld!\n",
bug->function, bug->file, bug->line);
@@ -898,10 +886,6 @@ void altivec_unavailable_exception(struct pt_regs *regs)
die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
}
-#ifdef CONFIG_PPC64
-extern perf_irq_t perf_irq;
-#endif
-
#if defined(CONFIG_PPC64) || defined(CONFIG_E500)
void performance_monitor_exception(struct pt_regs *regs)
{
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
new file mode 100644
index 00000000000..0d878e72fc4
--- /dev/null
+++ b/arch/powerpc/kernel/udbg.c
@@ -0,0 +1,125 @@
+/*
+ * polling mode stateless debugging stuff, originally for NS16550 Serial Ports
+ *
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stdarg.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/console.h>
+#include <asm/processor.h>
+
+void (*udbg_putc)(unsigned char c);
+unsigned char (*udbg_getc)(void);
+int (*udbg_getc_poll)(void);
+
+/* udbg library, used by xmon et al */
+void udbg_puts(const char *s)
+{
+ if (udbg_putc) {
+ char c;
+
+ if (s && *s != '\0') {
+ while ((c = *s++) != '\0')
+ udbg_putc(c);
+ }
+ }
+#if 0
+ else {
+ printk("%s", s);
+ }
+#endif
+}
+
+int udbg_write(const char *s, int n)
+{
+ int remain = n;
+ char c;
+
+ if (!udbg_putc)
+ return 0;
+
+ if (s && *s != '\0') {
+ while (((c = *s++) != '\0') && (remain-- > 0)) {
+ udbg_putc(c);
+ }
+ }
+
+ return n - remain;
+}
+
+int udbg_read(char *buf, int buflen)
+{
+ char c, *p = buf;
+ int i;
+
+ if (!udbg_getc)
+ return 0;
+
+ for (i = 0; i < buflen; ++i) {
+ do {
+ c = udbg_getc();
+ } while (c == 0x11 || c == 0x13);
+ if (c == 0)
+ break;
+ *p++ = c;
+ }
+
+ return i;
+}
+
+#define UDBG_BUFSIZE 256
+void udbg_printf(const char *fmt, ...)
+{
+ unsigned char buf[UDBG_BUFSIZE];
+ va_list args;
+
+ va_start(args, fmt);
+ vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
+ udbg_puts(buf);
+ va_end(args);
+}
+
+/*
+ * Early boot console based on udbg
+ */
+static void udbg_console_write(struct console *con, const char *s,
+ unsigned int n)
+{
+ udbg_write(s, n);
+}
+
+static struct console udbg_console = {
+ .name = "udbg",
+ .write = udbg_console_write,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+};
+
+static int early_console_initialized;
+
+void __init disable_early_printk(void)
+{
+ if (!early_console_initialized)
+ return;
+ unregister_console(&udbg_console);
+ early_console_initialized = 0;
+}
+
+/* called by setup_system */
+void register_early_udbg_console(void)
+{
+ early_console_initialized = 1;
+ register_console(&udbg_console);
+}
+
+#if 0 /* if you want to use this as a regular output console */
+console_initcall(register_udbg_console);
+#endif
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
new file mode 100644
index 00000000000..9313574ab93
--- /dev/null
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -0,0 +1,123 @@
+/*
+ * udbg for for NS16550 compatable serial ports
+ *
+ * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <asm/udbg.h>
+#include <asm/io.h>
+
+extern u8 real_readb(volatile u8 __iomem *addr);
+extern void real_writeb(u8 data, volatile u8 __iomem *addr);
+
+struct NS16550 {
+ /* this struct must be packed */
+ unsigned char rbr; /* 0 */
+ unsigned char ier; /* 1 */
+ unsigned char fcr; /* 2 */
+ unsigned char lcr; /* 3 */
+ unsigned char mcr; /* 4 */
+ unsigned char lsr; /* 5 */
+ unsigned char msr; /* 6 */
+ unsigned char scr; /* 7 */
+};
+
+#define thr rbr
+#define iir fcr
+#define dll rbr
+#define dlm ier
+#define dlab lcr
+
+#define LSR_DR 0x01 /* Data ready */
+#define LSR_OE 0x02 /* Overrun */
+#define LSR_PE 0x04 /* Parity error */
+#define LSR_FE 0x08 /* Framing error */
+#define LSR_BI 0x10 /* Break */
+#define LSR_THRE 0x20 /* Xmit holding register empty */
+#define LSR_TEMT 0x40 /* Xmitter empty */
+#define LSR_ERR 0x80 /* Error */
+
+static volatile struct NS16550 __iomem *udbg_comport;
+
+static void udbg_550_putc(unsigned char c)
+{
+ if (udbg_comport) {
+ while ((in_8(&udbg_comport->lsr) & LSR_THRE) == 0)
+ /* wait for idle */;
+ out_8(&udbg_comport->thr, c);
+ if (c == '\n')
+ udbg_550_putc('\r');
+ }
+}
+
+static int udbg_550_getc_poll(void)
+{
+ if (udbg_comport) {
+ if ((in_8(&udbg_comport->lsr) & LSR_DR) != 0)
+ return in_8(&udbg_comport->rbr);
+ else
+ return -1;
+ }
+ return -1;
+}
+
+static unsigned char udbg_550_getc(void)
+{
+ if (udbg_comport) {
+ while ((in_8(&udbg_comport->lsr) & LSR_DR) == 0)
+ /* wait for char */;
+ return in_8(&udbg_comport->rbr);
+ }
+ return 0;
+}
+
+void udbg_init_uart(void __iomem *comport, unsigned int speed)
+{
+ u16 dll = speed ? (115200 / speed) : 12;
+
+ if (comport) {
+ udbg_comport = (struct NS16550 __iomem *)comport;
+ out_8(&udbg_comport->lcr, 0x00);
+ out_8(&udbg_comport->ier, 0xff);
+ out_8(&udbg_comport->ier, 0x00);
+ out_8(&udbg_comport->lcr, 0x80); /* Access baud rate */
+ out_8(&udbg_comport->dll, dll & 0xff); /* 1 = 115200, 2 = 57600,
+ 3 = 38400, 12 = 9600 baud */
+ out_8(&udbg_comport->dlm, dll >> 8); /* dll >> 8 which should be zero
+ for fast rates; */
+ out_8(&udbg_comport->lcr, 0x03); /* 8 data, 1 stop, no parity */
+ out_8(&udbg_comport->mcr, 0x03); /* RTS/DTR */
+ out_8(&udbg_comport->fcr ,0x07); /* Clear & enable FIFOs */
+ udbg_putc = udbg_550_putc;
+ udbg_getc = udbg_550_getc;
+ udbg_getc_poll = udbg_550_getc_poll;
+ }
+}
+
+#ifdef CONFIG_PPC_MAPLE
+void udbg_maple_real_putc(unsigned char c)
+{
+ if (udbg_comport) {
+ while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0)
+ /* wait for idle */;
+ real_writeb(c, &udbg_comport->thr); eieio();
+ if (c == '\n')
+ udbg_maple_real_putc('\r');
+ }
+}
+
+void udbg_init_maple_realmode(void)
+{
+ udbg_comport = (volatile struct NS16550 __iomem *)0xf40003f8;
+
+ udbg_putc = udbg_maple_real_putc;
+ udbg_getc = NULL;
+ udbg_getc_poll = NULL;
+}
+#endif /* CONFIG_PPC_MAPLE */
diff --git a/arch/powerpc/kernel/udbg_scc.c b/arch/powerpc/kernel/udbg_scc.c
new file mode 100644
index 00000000000..820c5355150
--- /dev/null
+++ b/arch/powerpc/kernel/udbg_scc.c
@@ -0,0 +1,135 @@
+/*
+ * udbg for for zilog scc ports as found on Apple PowerMacs
+ *
+ * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <asm/udbg.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pmac_feature.h>
+
+extern u8 real_readb(volatile u8 __iomem *addr);
+extern void real_writeb(u8 data, volatile u8 __iomem *addr);
+
+#define SCC_TXRDY 4
+#define SCC_RXRDY 1
+
+static volatile u8 __iomem *sccc;
+static volatile u8 __iomem *sccd;
+
+static void udbg_scc_putc(unsigned char c)
+{
+ if (sccc) {
+ while ((in_8(sccc) & SCC_TXRDY) == 0)
+ ;
+ out_8(sccd, c);
+ if (c == '\n')
+ udbg_scc_putc('\r');
+ }
+}
+
+static int udbg_scc_getc_poll(void)
+{
+ if (sccc) {
+ if ((in_8(sccc) & SCC_RXRDY) != 0)
+ return in_8(sccd);
+ else
+ return -1;
+ }
+ return -1;
+}
+
+static unsigned char udbg_scc_getc(void)
+{
+ if (sccc) {
+ while ((in_8(sccc) & SCC_RXRDY) == 0)
+ ;
+ return in_8(sccd);
+ }
+ return 0;
+}
+
+static unsigned char scc_inittab[] = {
+ 13, 0, /* set baud rate divisor */
+ 12, 0,
+ 14, 1, /* baud rate gen enable, src=rtxc */
+ 11, 0x50, /* clocks = br gen */
+ 5, 0xea, /* tx 8 bits, assert DTR & RTS */
+ 4, 0x46, /* x16 clock, 1 stop */
+ 3, 0xc1, /* rx enable, 8 bits */
+};
+
+void udbg_init_scc(struct device_node *np)
+{
+ u32 *reg;
+ unsigned long addr;
+ int i, x;
+
+ if (np == NULL)
+ np = of_find_node_by_name(NULL, "escc");
+ if (np == NULL || np->parent == NULL)
+ return;
+
+ udbg_printf("found SCC...\n");
+ /* Get address within mac-io ASIC */
+ reg = (u32 *)get_property(np, "reg", NULL);
+ if (reg == NULL)
+ return;
+ addr = reg[0];
+ udbg_printf("local addr: %lx\n", addr);
+ /* Get address of mac-io PCI itself */
+ reg = (u32 *)get_property(np->parent, "assigned-addresses", NULL);
+ if (reg == NULL)
+ return;
+ addr += reg[2];
+ udbg_printf("final addr: %lx\n", addr);
+
+ /* Setup for 57600 8N1 */
+ addr += 0x20;
+ sccc = (volatile u8 * __iomem) ioremap(addr & PAGE_MASK, PAGE_SIZE) ;
+ sccc += addr & ~PAGE_MASK;
+ sccd = sccc + 0x10;
+
+ udbg_printf("ioremap result sccc: %p\n", sccc);
+ mb();
+
+ for (i = 20000; i != 0; --i)
+ x = in_8(sccc);
+ out_8(sccc, 0x09); /* reset A or B side */
+ out_8(sccc, 0xc0);
+ for (i = 0; i < sizeof(scc_inittab); ++i)
+ out_8(sccc, scc_inittab[i]);
+
+ udbg_putc = udbg_scc_putc;
+ udbg_getc = udbg_scc_getc;
+ udbg_getc_poll = udbg_scc_getc_poll;
+
+ udbg_puts("Hello World !\n");
+}
+
+static void udbg_real_scc_putc(unsigned char c)
+{
+ while ((real_readb(sccc) & SCC_TXRDY) == 0)
+ ;
+ real_writeb(c, sccd);
+ if (c == '\n')
+ udbg_real_scc_putc('\r');
+}
+
+void udbg_init_pmac_realmode(void)
+{
+ sccc = (volatile u8 __iomem *)0x80013020ul;
+ sccd = (volatile u8 __iomem *)0x80013030ul;
+
+ udbg_putc = udbg_real_scc_putc;
+ udbg_getc = NULL;
+ udbg_getc_poll = NULL;
+}
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
new file mode 100644
index 00000000000..0d4d8bec0df
--- /dev/null
+++ b/arch/powerpc/kernel/vdso.c
@@ -0,0 +1,746 @@
+/*
+ * linux/arch/ppc64/kernel/vdso.c
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
+ * <benh@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/security.h>
+#include <linux/bootmem.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/lmb.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/sections.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/* Max supported size for symbol names */
+#define MAX_SYMNAME 64
+
+extern char vdso32_start, vdso32_end;
+static void *vdso32_kbase = &vdso32_start;
+unsigned int vdso32_pages;
+unsigned long vdso32_sigtramp;
+unsigned long vdso32_rt_sigtramp;
+
+#ifdef CONFIG_PPC64
+extern char vdso64_start, vdso64_end;
+static void *vdso64_kbase = &vdso64_start;
+unsigned int vdso64_pages;
+unsigned long vdso64_rt_sigtramp;
+#endif /* CONFIG_PPC64 */
+
+/*
+ * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
+ * Once the early boot kernel code no longer needs to muck around
+ * with it, it will become dynamically allocated
+ */
+static union {
+ struct vdso_data data;
+ u8 page[PAGE_SIZE];
+} vdso_data_store __attribute__((__section__(".data.page_aligned")));
+struct vdso_data *vdso_data = &vdso_data_store.data;
+
+/* Format of the patch table */
+struct vdso_patch_def
+{
+ unsigned long ftr_mask, ftr_value;
+ const char *gen_name;
+ const char *fix_name;
+};
+
+/* Table of functions to patch based on the CPU type/revision
+ *
+ * Currently, we only change sync_dicache to do nothing on processors
+ * with a coherent icache
+ */
+static struct vdso_patch_def vdso_patches[] = {
+ {
+ CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
+ "__kernel_sync_dicache", "__kernel_sync_dicache_p5"
+ },
+ {
+ CPU_FTR_USE_TB, 0,
+ "__kernel_gettimeofday", NULL
+ },
+};
+
+/*
+ * Some infos carried around for each of them during parsing at
+ * boot time.
+ */
+struct lib32_elfinfo
+{
+ Elf32_Ehdr *hdr; /* ptr to ELF */
+ Elf32_Sym *dynsym; /* ptr to .dynsym section */
+ unsigned long dynsymsize; /* size of .dynsym section */
+ char *dynstr; /* ptr to .dynstr section */
+ unsigned long text; /* offset of .text section in .so */
+};
+
+struct lib64_elfinfo
+{
+ Elf64_Ehdr *hdr;
+ Elf64_Sym *dynsym;
+ unsigned long dynsymsize;
+ char *dynstr;
+ unsigned long text;
+};
+
+
+#ifdef __DEBUG
+static void dump_one_vdso_page(struct page *pg, struct page *upg)
+{
+ printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
+ page_count(pg),
+ pg->flags);
+ if (upg/* && pg != upg*/) {
+ printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg)
+ << PAGE_SHIFT),
+ page_count(upg),
+ upg->flags);
+ }
+ printk("\n");
+}
+
+static void dump_vdso_pages(struct vm_area_struct * vma)
+{
+ int i;
+
+ if (!vma || test_thread_flag(TIF_32BIT)) {
+ printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
+ for (i=0; i<vdso32_pages; i++) {
+ struct page *pg = virt_to_page(vdso32_kbase +
+ i*PAGE_SIZE);
+ struct page *upg = (vma && vma->vm_mm) ?
+ follow_page(vma->vm_mm, vma->vm_start +
+ i*PAGE_SIZE, 0)
+ : NULL;
+ dump_one_vdso_page(pg, upg);
+ }
+ }
+ if (!vma || !test_thread_flag(TIF_32BIT)) {
+ printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
+ for (i=0; i<vdso64_pages; i++) {
+ struct page *pg = virt_to_page(vdso64_kbase +
+ i*PAGE_SIZE);
+ struct page *upg = (vma && vma->vm_mm) ?
+ follow_page(vma->vm_mm, vma->vm_start +
+ i*PAGE_SIZE, 0)
+ : NULL;
+ dump_one_vdso_page(pg, upg);
+ }
+ }
+}
+#endif /* DEBUG */
+
+/*
+ * Keep a dummy vma_close for now, it will prevent VMA merging.
+ */
+static void vdso_vma_close(struct vm_area_struct * vma)
+{
+}
+
+/*
+ * Our nopage() function, maps in the actual vDSO kernel pages, they will
+ * be mapped read-only by do_no_page(), and eventually COW'ed, either
+ * right away for an initial write access, or by do_wp_page().
+ */
+static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
+ unsigned long address, int *type)
+{
+ unsigned long offset = address - vma->vm_start;
+ struct page *pg;
+#ifdef CONFIG_PPC64
+ void *vbase = test_thread_flag(TIF_32BIT) ?
+ vdso32_kbase : vdso64_kbase;
+#else
+ void *vbase = vdso32_kbase;
+#endif
+
+ DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n",
+ current->comm, address, offset);
+
+ if (address < vma->vm_start || address > vma->vm_end)
+ return NOPAGE_SIGBUS;
+
+ /*
+ * Last page is systemcfg.
+ */
+ if ((vma->vm_end - address) <= PAGE_SIZE)
+ pg = virt_to_page(vdso_data);
+ else
+ pg = virt_to_page(vbase + offset);
+
+ get_page(pg);
+ DBG(" ->page count: %d\n", page_count(pg));
+
+ return pg;
+}
+
+static struct vm_operations_struct vdso_vmops = {
+ .close = vdso_vma_close,
+ .nopage = vdso_vma_nopage,
+};
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm,
+ int executable_stack)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long vdso_pages;
+ unsigned long vdso_base;
+
+#ifdef CONFIG_PPC64
+ if (test_thread_flag(TIF_32BIT)) {
+ vdso_pages = vdso32_pages;
+ vdso_base = VDSO32_MBASE;
+ } else {
+ vdso_pages = vdso64_pages;
+ vdso_base = VDSO64_MBASE;
+ }
+#else
+ vdso_pages = vdso32_pages;
+ vdso_base = VDSO32_MBASE;
+#endif
+
+ current->thread.vdso_base = 0;
+
+ /* vDSO has a problem and was disabled, just don't "enable" it for the
+ * process
+ */
+ if (vdso_pages == 0)
+ return 0;
+
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (vma == NULL)
+ return -ENOMEM;
+
+ memset(vma, 0, sizeof(*vma));
+
+ /* Add a page to the vdso size for the data page */
+ vdso_pages ++;
+
+ /*
+ * pick a base address for the vDSO in process space. We try to put it
+ * at vdso_base which is the "natural" base for it, but we might fail
+ * and end up putting it elsewhere.
+ */
+ vdso_base = get_unmapped_area(NULL, vdso_base,
+ vdso_pages << PAGE_SHIFT, 0, 0);
+ if (vdso_base & ~PAGE_MASK) {
+ kmem_cache_free(vm_area_cachep, vma);
+ return (int)vdso_base;
+ }
+
+ current->thread.vdso_base = vdso_base;
+
+ vma->vm_mm = mm;
+ vma->vm_start = current->thread.vdso_base;
+ vma->vm_end = vma->vm_start + (vdso_pages << PAGE_SHIFT);
+
+ /*
+ * our vma flags don't have VM_WRITE so by default, the process isn't
+ * allowed to write those pages.
+ * gdb can break that with ptrace interface, and thus trigger COW on
+ * those pages but it's then your responsibility to never do that on
+ * the "data" page of the vDSO or you'll stop getting kernel updates
+ * and your nice userland gettimeofday will be totally dead.
+ * It's fine to use that for setting breakpoints in the vDSO code
+ * pages though
+ */
+ vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE |
+ VM_MAYEXEC | VM_RESERVED;
+ vma->vm_flags |= mm->def_flags;
+ vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
+ vma->vm_ops = &vdso_vmops;
+
+ down_write(&mm->mmap_sem);
+ if (insert_vm_struct(mm, vma)) {
+ up_write(&mm->mmap_sem);
+ kmem_cache_free(vm_area_cachep, vma);
+ return -ENOMEM;
+ }
+ mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ up_write(&mm->mmap_sem);
+
+ return 0;
+}
+
+static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
+ unsigned long *size)
+{
+ Elf32_Shdr *sechdrs;
+ unsigned int i;
+ char *secnames;
+
+ /* Grab section headers and strings so we can tell who is who */
+ sechdrs = (void *)ehdr + ehdr->e_shoff;
+ secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+ /* Find the section they want */
+ for (i = 1; i < ehdr->e_shnum; i++) {
+ if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
+ if (size)
+ *size = sechdrs[i].sh_size;
+ return (void *)ehdr + sechdrs[i].sh_offset;
+ }
+ }
+ *size = 0;
+ return NULL;
+}
+
+static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib,
+ const char *symname)
+{
+ unsigned int i;
+ char name[MAX_SYMNAME], *c;
+
+ for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
+ if (lib->dynsym[i].st_name == 0)
+ continue;
+ strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
+ MAX_SYMNAME);
+ c = strchr(name, '@');
+ if (c)
+ *c = 0;
+ if (strcmp(symname, name) == 0)
+ return &lib->dynsym[i];
+ }
+ return NULL;
+}
+
+/* Note that we assume the section is .text and the symbol is relative to
+ * the library base
+ */
+static unsigned long __init find_function32(struct lib32_elfinfo *lib,
+ const char *symname)
+{
+ Elf32_Sym *sym = find_symbol32(lib, symname);
+
+ if (sym == NULL) {
+ printk(KERN_WARNING "vDSO32: function %s not found !\n",
+ symname);
+ return 0;
+ }
+ return sym->st_value - VDSO32_LBASE;
+}
+
+static int vdso_do_func_patch32(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64,
+ const char *orig, const char *fix)
+{
+ Elf32_Sym *sym32_gen, *sym32_fix;
+
+ sym32_gen = find_symbol32(v32, orig);
+ if (sym32_gen == NULL) {
+ printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig);
+ return -1;
+ }
+ if (fix == NULL) {
+ sym32_gen->st_name = 0;
+ return 0;
+ }
+ sym32_fix = find_symbol32(v32, fix);
+ if (sym32_fix == NULL) {
+ printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix);
+ return -1;
+ }
+ sym32_gen->st_value = sym32_fix->st_value;
+ sym32_gen->st_size = sym32_fix->st_size;
+ sym32_gen->st_info = sym32_fix->st_info;
+ sym32_gen->st_other = sym32_fix->st_other;
+ sym32_gen->st_shndx = sym32_fix->st_shndx;
+
+ return 0;
+}
+
+
+#ifdef CONFIG_PPC64
+
+static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname,
+ unsigned long *size)
+{
+ Elf64_Shdr *sechdrs;
+ unsigned int i;
+ char *secnames;
+
+ /* Grab section headers and strings so we can tell who is who */
+ sechdrs = (void *)ehdr + ehdr->e_shoff;
+ secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+ /* Find the section they want */
+ for (i = 1; i < ehdr->e_shnum; i++) {
+ if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
+ if (size)
+ *size = sechdrs[i].sh_size;
+ return (void *)ehdr + sechdrs[i].sh_offset;
+ }
+ }
+ if (size)
+ *size = 0;
+ return NULL;
+}
+
+static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib,
+ const char *symname)
+{
+ unsigned int i;
+ char name[MAX_SYMNAME], *c;
+
+ for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) {
+ if (lib->dynsym[i].st_name == 0)
+ continue;
+ strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
+ MAX_SYMNAME);
+ c = strchr(name, '@');
+ if (c)
+ *c = 0;
+ if (strcmp(symname, name) == 0)
+ return &lib->dynsym[i];
+ }
+ return NULL;
+}
+
+/* Note that we assume the section is .text and the symbol is relative to
+ * the library base
+ */
+static unsigned long __init find_function64(struct lib64_elfinfo *lib,
+ const char *symname)
+{
+ Elf64_Sym *sym = find_symbol64(lib, symname);
+
+ if (sym == NULL) {
+ printk(KERN_WARNING "vDSO64: function %s not found !\n",
+ symname);
+ return 0;
+ }
+#ifdef VDS64_HAS_DESCRIPTORS
+ return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) -
+ VDSO64_LBASE;
+#else
+ return sym->st_value - VDSO64_LBASE;
+#endif
+}
+
+static int vdso_do_func_patch64(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64,
+ const char *orig, const char *fix)
+{
+ Elf64_Sym *sym64_gen, *sym64_fix;
+
+ sym64_gen = find_symbol64(v64, orig);
+ if (sym64_gen == NULL) {
+ printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig);
+ return -1;
+ }
+ if (fix == NULL) {
+ sym64_gen->st_name = 0;
+ return 0;
+ }
+ sym64_fix = find_symbol64(v64, fix);
+ if (sym64_fix == NULL) {
+ printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix);
+ return -1;
+ }
+ sym64_gen->st_value = sym64_fix->st_value;
+ sym64_gen->st_size = sym64_fix->st_size;
+ sym64_gen->st_info = sym64_fix->st_info;
+ sym64_gen->st_other = sym64_fix->st_other;
+ sym64_gen->st_shndx = sym64_fix->st_shndx;
+
+ return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
+
+static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ void *sect;
+
+ /*
+ * Locate symbol tables & text section
+ */
+
+ v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize);
+ v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL);
+ if (v32->dynsym == NULL || v32->dynstr == NULL) {
+ printk(KERN_ERR "vDSO32: required symbol section not found\n");
+ return -1;
+ }
+ sect = find_section32(v32->hdr, ".text", NULL);
+ if (sect == NULL) {
+ printk(KERN_ERR "vDSO32: the .text section was not found\n");
+ return -1;
+ }
+ v32->text = sect - vdso32_kbase;
+
+#ifdef CONFIG_PPC64
+ v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize);
+ v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL);
+ if (v64->dynsym == NULL || v64->dynstr == NULL) {
+ printk(KERN_ERR "vDSO64: required symbol section not found\n");
+ return -1;
+ }
+ sect = find_section64(v64->hdr, ".text", NULL);
+ if (sect == NULL) {
+ printk(KERN_ERR "vDSO64: the .text section was not found\n");
+ return -1;
+ }
+ v64->text = sect - vdso64_kbase;
+#endif /* CONFIG_PPC64 */
+
+ return 0;
+}
+
+static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ /*
+ * Find signal trampolines
+ */
+
+#ifdef CONFIG_PPC64
+ vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64");
+#endif
+ vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32");
+ vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32");
+}
+
+static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ Elf32_Sym *sym32;
+#ifdef CONFIG_PPC64
+ Elf64_Sym *sym64;
+
+ sym64 = find_symbol64(v64, "__kernel_datapage_offset");
+ if (sym64 == NULL) {
+ printk(KERN_ERR "vDSO64: Can't find symbol "
+ "__kernel_datapage_offset !\n");
+ return -1;
+ }
+ *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) =
+ (vdso64_pages << PAGE_SHIFT) -
+ (sym64->st_value - VDSO64_LBASE);
+#endif /* CONFIG_PPC64 */
+
+ sym32 = find_symbol32(v32, "__kernel_datapage_offset");
+ if (sym32 == NULL) {
+ printk(KERN_ERR "vDSO32: Can't find symbol "
+ "__kernel_datapage_offset !\n");
+ return -1;
+ }
+ *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) =
+ (vdso32_pages << PAGE_SHIFT) -
+ (sym32->st_value - VDSO32_LBASE);
+
+ return 0;
+}
+
+static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) {
+ struct vdso_patch_def *patch = &vdso_patches[i];
+ int match = (cur_cpu_spec->cpu_features & patch->ftr_mask)
+ == patch->ftr_value;
+ if (!match)
+ continue;
+
+ DBG("replacing %s with %s...\n", patch->gen_name,
+ patch->fix_name ? "NONE" : patch->fix_name);
+
+ /*
+ * Patch the 32 bits and 64 bits symbols. Note that we do not
+ * patch the "." symbol on 64 bits.
+ * It would be easy to do, but doesn't seem to be necessary,
+ * patching the OPD symbol is enough.
+ */
+ vdso_do_func_patch32(v32, v64, patch->gen_name,
+ patch->fix_name);
+#ifdef CONFIG_PPC64
+ vdso_do_func_patch64(v32, v64, patch->gen_name,
+ patch->fix_name);
+#endif /* CONFIG_PPC64 */
+ }
+
+ return 0;
+}
+
+
+static __init int vdso_setup(void)
+{
+ struct lib32_elfinfo v32;
+ struct lib64_elfinfo v64;
+
+ v32.hdr = vdso32_kbase;
+#ifdef CONFIG_PPC64
+ v64.hdr = vdso64_kbase;
+#endif
+ if (vdso_do_find_sections(&v32, &v64))
+ return -1;
+
+ if (vdso_fixup_datapage(&v32, &v64))
+ return -1;
+
+ if (vdso_fixup_alt_funcs(&v32, &v64))
+ return -1;
+
+ vdso_setup_trampolines(&v32, &v64);
+
+ return 0;
+}
+
+/*
+ * Called from setup_arch to initialize the bitmap of available
+ * syscalls in the systemcfg page
+ */
+static void __init vdso_setup_syscall_map(void)
+{
+ unsigned int i;
+ extern unsigned long *sys_call_table;
+ extern unsigned long sys_ni_syscall;
+
+
+ for (i = 0; i < __NR_syscalls; i++) {
+#ifdef CONFIG_PPC64
+ if (sys_call_table[i*2] != sys_ni_syscall)
+ vdso_data->syscall_map_64[i >> 5] |=
+ 0x80000000UL >> (i & 0x1f);
+ if (sys_call_table[i*2+1] != sys_ni_syscall)
+ vdso_data->syscall_map_32[i >> 5] |=
+ 0x80000000UL >> (i & 0x1f);
+#else /* CONFIG_PPC64 */
+ if (sys_call_table[i] != sys_ni_syscall)
+ vdso_data->syscall_map_32[i >> 5] |=
+ 0x80000000UL >> (i & 0x1f);
+#endif /* CONFIG_PPC64 */
+ }
+}
+
+
+void __init vdso_init(void)
+{
+ int i;
+
+#ifdef CONFIG_PPC64
+ /*
+ * Fill up the "systemcfg" stuff for backward compatiblity
+ */
+ strcpy(vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
+ vdso_data->version.major = SYSTEMCFG_MAJOR;
+ vdso_data->version.minor = SYSTEMCFG_MINOR;
+ vdso_data->processor = mfspr(SPRN_PVR);
+ vdso_data->platform = _machine;
+ vdso_data->physicalMemorySize = lmb_phys_mem_size();
+ vdso_data->dcache_size = ppc64_caches.dsize;
+ vdso_data->dcache_line_size = ppc64_caches.dline_size;
+ vdso_data->icache_size = ppc64_caches.isize;
+ vdso_data->icache_line_size = ppc64_caches.iline_size;
+
+ /*
+ * Calculate the size of the 64 bits vDSO
+ */
+ vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
+ DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages);
+#endif /* CONFIG_PPC64 */
+
+
+ /*
+ * Calculate the size of the 32 bits vDSO
+ */
+ vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
+ DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages);
+
+
+ /*
+ * Setup the syscall map in the vDOS
+ */
+ vdso_setup_syscall_map();
+ /*
+ * Initialize the vDSO images in memory, that is do necessary
+ * fixups of vDSO symbols, locate trampolines, etc...
+ */
+ if (vdso_setup()) {
+ printk(KERN_ERR "vDSO setup failure, not enabled !\n");
+ vdso32_pages = 0;
+#ifdef CONFIG_PPC64
+ vdso64_pages = 0;
+#endif
+ return;
+ }
+
+ /* Make sure pages are in the correct state */
+ for (i = 0; i < vdso32_pages; i++) {
+ struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ get_page(pg);
+
+ }
+#ifdef CONFIG_PPC64
+ for (i = 0; i < vdso64_pages; i++) {
+ struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ get_page(pg);
+ }
+#endif /* CONFIG_PPC64 */
+
+ get_page(virt_to_page(vdso_data));
+}
+
+int in_gate_area_no_task(unsigned long addr)
+{
+ return 0;
+}
+
+int in_gate_area(struct task_struct *task, unsigned long addr)
+{
+ return 0;
+}
+
+struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+{
+ return NULL;
+}
+
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
new file mode 100644
index 00000000000..8a3bed5f143
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -0,0 +1,40 @@
+
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o
+
+# Build rules
+
+ifeq ($(CONFIG_PPC32),y)
+CROSS32CC := $(CC)
+endif
+
+targets := $(obj-vdso32) vdso32.so
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+
+EXTRA_CFLAGS := -shared -s -fno-common -fno-builtin
+EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1
+EXTRA_AFLAGS := -D__VDSO32__ -s
+
+obj-y += vdso32_wrapper.o
+extra-y += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C -Upowerpc
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32)
+ $(call if_changed,vdso32ld)
+
+# assembly rules for the .S files
+$(obj-vdso32): %.o: %.S
+ $(call if_changed_dep,vdso32as)
+
+# actual build commands
+quiet_cmd_vdso32ld = VDSO32L $@
+ cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso32as = VDSO32A $@
+ cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
+
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
new file mode 100644
index 00000000000..c8db993574e
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -0,0 +1,67 @@
+/*
+ * vDSO provided cache flush routines
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+
+ .text
+
+/*
+ * Default "generic" version of __kernel_sync_dicache.
+ *
+ * void __kernel_sync_dicache(unsigned long start, unsigned long end)
+ *
+ * Flushes the data cache & invalidate the instruction cache for the
+ * provided range [start, end[
+ *
+ * Note: all CPUs supported by this kernel have a 128 bytes cache
+ * line size so we don't have to peek that info from the datapage
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache)
+ .cfi_startproc
+ li r5,127
+ andc r6,r3,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5 /* ensure we get enough */
+ srwi. r8,r8,7 /* compute line count */
+ beqlr /* nothing to do? */
+ mtctr r8
+ mr r3,r6
+1: dcbst 0,r3
+ addi r3,r3,128
+ bdnz 1b
+ sync
+ mtctr r8
+1: icbi 0,r6
+ addi r6,r6,128
+ bdnz 1b
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache)
+
+
+/*
+ * POWER5 version of __kernel_sync_dicache
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
+ .cfi_startproc
+ sync
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache_p5)
+
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
new file mode 100644
index 00000000000..f6b38472318
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -0,0 +1,85 @@
+/*
+ * Access to the shared data page by the vDSO & syscall map
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+ .text
+V_FUNCTION_BEGIN(__get_datapage)
+ .cfi_startproc
+ /* We don't want that exposed or overridable as we want other objects
+ * to be able to bl directly to here
+ */
+ .protected __get_datapage
+ .hidden __get_datapage
+
+ mflr r0
+ .cfi_register lr,r0
+
+ bcl 20,31,1f
+ .global __kernel_datapage_offset;
+__kernel_datapage_offset:
+ .long 0
+1:
+ mflr r3
+ mtlr r0
+ lwz r0,0(r3)
+ add r3,r0,r3
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__get_datapage)
+
+/*
+ * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
+ *
+ * returns a pointer to the syscall map. the map is agnostic to the
+ * size of "long", unlike kernel bitops, it stores bits from top to
+ * bottom so that memory actually contains a linear bitmap
+ * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
+ * 32 bits int at N >> 5.
+ */
+V_FUNCTION_BEGIN(__kernel_get_syscall_map)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+
+ mr r4,r3
+ bl __get_datapage@local
+ mtlr r12
+ addi r3,r3,CFG_SYSCALL_MAP32
+ cmpli cr0,r4,0
+ beqlr
+ li r0,__NR_syscalls
+ stw r0,0(r4)
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_syscall_map)
+
+/*
+ * void unsigned long long __kernel_get_tbfreq(void);
+ *
+ * returns the timebase frequency in HZ
+ */
+V_FUNCTION_BEGIN(__kernel_get_tbfreq)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+ bl __get_datapage@local
+ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
+ lwz r3,CFG_TB_TICKS_PER_SEC(r3)
+ mtlr r12
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
new file mode 100644
index 00000000000..0a32a41d50b
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -0,0 +1,319 @@
+/*
+ * Userland implementation of gettimeofday() for 32 bits processes in a
+ * ppc64 kernel for use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org,
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+/*
+ * Exact prototype of gettimeofday
+ *
+ * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_gettimeofday)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+
+ mr r10,r3 /* r10 saves tv */
+ mr r11,r4 /* r11 saves tz */
+ bl __get_datapage@local /* get data page */
+ mr r9, r3 /* datapage ptr in r9 */
+ bl __do_get_xsec@local /* get xsec from tb & kernel */
+ bne- 2f /* out of line -> do syscall */
+
+ /* seconds are xsec >> 20 */
+ rlwinm r5,r4,12,20,31
+ rlwimi r5,r3,12,0,19
+ stw r5,TVAL32_TV_SEC(r10)
+
+ /* get remaining xsec and convert to usec. we scale
+ * up remaining xsec by 12 bits and get the top 32 bits
+ * of the multiplication
+ */
+ rlwinm r5,r4,12,0,19
+ lis r6,1000000@h
+ ori r6,r6,1000000@l
+ mulhwu r5,r5,r6
+ stw r5,TVAL32_TV_USEC(r10)
+
+ cmpli cr0,r11,0 /* check if tz is NULL */
+ beq 1f
+ lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */
+ lwz r5,CFG_TZ_DSTTIME(r9)
+ stw r4,TZONE_TZ_MINWEST(r11)
+ stw r5,TZONE_TZ_DSTTIME(r11)
+
+1: mtlr r12
+ li r3,0
+ blr
+
+2:
+ mtlr r12
+ mr r3,r10
+ mr r4,r11
+ li r0,__NR_gettimeofday
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_gettimeofday)
+
+/*
+ * Exact prototype of clock_gettime()
+ *
+ * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_gettime)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpli cr0,r3,CLOCK_REALTIME
+ cmpli cr1,r3,CLOCK_MONOTONIC
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+ bne cr0,99f
+
+ mflr r12 /* r12 saves lr */
+ .cfi_register lr,r12
+ mr r10,r3 /* r10 saves id */
+ mr r11,r4 /* r11 saves tp */
+ bl __get_datapage@local /* get data page */
+ mr r9,r3 /* datapage ptr in r9 */
+ beq cr1,50f /* if monotonic -> jump there */
+
+ /*
+ * CLOCK_REALTIME
+ */
+
+ bl __do_get_xsec@local /* get xsec from tb & kernel */
+ bne- 98f /* out of line -> do syscall */
+
+ /* seconds are xsec >> 20 */
+ rlwinm r5,r4,12,20,31
+ rlwimi r5,r3,12,0,19
+ stw r5,TSPC32_TV_SEC(r11)
+
+ /* get remaining xsec and convert to nsec. we scale
+ * up remaining xsec by 12 bits and get the top 32 bits
+ * of the multiplication, then we multiply by 1000
+ */
+ rlwinm r5,r4,12,0,19
+ lis r6,1000000@h
+ ori r6,r6,1000000@l
+ mulhwu r5,r5,r6
+ mulli r5,r5,1000
+ stw r5,TSPC32_TV_NSEC(r11)
+ mtlr r12
+ li r3,0
+ blr
+
+ /*
+ * CLOCK_MONOTONIC
+ */
+
+50: bl __do_get_xsec@local /* get xsec from tb & kernel */
+ bne- 98f /* out of line -> do syscall */
+
+ /* seconds are xsec >> 20 */
+ rlwinm r6,r4,12,20,31
+ rlwimi r6,r3,12,0,19
+
+ /* get remaining xsec and convert to nsec. we scale
+ * up remaining xsec by 12 bits and get the top 32 bits
+ * of the multiplication, then we multiply by 1000
+ */
+ rlwinm r7,r4,12,0,19
+ lis r5,1000000@h
+ ori r5,r5,1000000@l
+ mulhwu r7,r7,r5
+ mulli r7,r7,1000
+
+ /* now we must fixup using wall to monotonic. We need to snapshot
+ * that value and do the counter trick again. Fortunately, we still
+ * have the counter value in r8 that was returned by __do_get_xsec.
+ * At this point, r6,r7 contain our sec/nsec values, r3,r4 and r5
+ * can be used
+ */
+
+ lwz r3,WTOM_CLOCK_SEC(r9)
+ lwz r4,WTOM_CLOCK_NSEC(r9)
+
+ /* We now have our result in r3,r4. We create a fake dependency
+ * on that result and re-check the counter
+ */
+ or r5,r4,r3
+ xor r0,r5,r5
+ add r9,r9,r0
+#ifdef CONFIG_PPC64
+ lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9)
+#else
+ lwz r0,(CFG_TB_UPDATE_COUNT)(r9)
+#endif
+ cmpl cr0,r8,r0 /* check if updated */
+ bne- 50b
+
+ /* Calculate and store result. Note that this mimmics the C code,
+ * which may cause funny results if nsec goes negative... is that
+ * possible at all ?
+ */
+ add r3,r3,r6
+ add r4,r4,r7
+ lis r5,NSEC_PER_SEC@h
+ ori r5,r5,NSEC_PER_SEC@l
+ cmpl cr0,r4,r5
+ cmpli cr1,r4,0
+ blt 1f
+ subf r4,r5,r4
+ addi r3,r3,1
+1: bge cr1,1f
+ addi r3,r3,-1
+ add r4,r4,r5
+1: stw r3,TSPC32_TV_SEC(r11)
+ stw r4,TSPC32_TV_NSEC(r11)
+
+ mtlr r12
+ li r3,0
+ blr
+
+ /*
+ * syscall fallback
+ */
+98:
+ mtlr r12
+ mr r3,r10
+ mr r4,r11
+99:
+ li r0,__NR_clock_gettime
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_gettime)
+
+
+/*
+ * Exact prototype of clock_getres()
+ *
+ * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_getres)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpwi cr0,r3,CLOCK_REALTIME
+ cmpwi cr1,r3,CLOCK_MONOTONIC
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+ bne cr0,99f
+
+ li r3,0
+ cmpli cr0,r4,0
+ beqlr
+ lis r5,CLOCK_REALTIME_RES@h
+ ori r5,r5,CLOCK_REALTIME_RES@l
+ stw r3,TSPC32_TV_SEC(r4)
+ stw r5,TSPC32_TV_NSEC(r4)
+ blr
+
+ /*
+ * syscall fallback
+ */
+99:
+ li r0,__NR_clock_getres
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_getres)
+
+
+/*
+ * This is the core of gettimeofday() & friends, it returns the xsec
+ * value in r3 & r4 and expects the datapage ptr (non clobbered)
+ * in r9. clobbers r0,r4,r5,r6,r7,r8.
+ * When returning, r8 contains the counter value that can be reused
+ * by the monotonic clock implementation
+ */
+__do_get_xsec:
+ .cfi_startproc
+ /* Check for update count & load values. We use the low
+ * order 32 bits of the update count
+ */
+#ifdef CONFIG_PPC64
+1: lwz r8,(CFG_TB_UPDATE_COUNT+4)(r9)
+#else
+1: lwz r8,(CFG_TB_UPDATE_COUNT)(r9)
+#endif
+ andi. r0,r8,1 /* pending update ? loop */
+ bne- 1b
+ xor r0,r8,r8 /* create dependency */
+ add r9,r9,r0
+
+ /* Load orig stamp (offset to TB) */
+ lwz r5,CFG_TB_ORIG_STAMP(r9)
+ lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
+
+ /* Get a stable TB value */
+2: mftbu r3
+ mftbl r4
+ mftbu r0
+ cmpl cr0,r3,r0
+ bne- 2b
+
+ /* Substract tb orig stamp. If the high part is non-zero, we jump to
+ * the slow path which call the syscall.
+ * If it's ok, then we have our 32 bits tb_ticks value in r7
+ */
+ subfc r7,r6,r4
+ subfe. r0,r5,r3
+ bne- 3f
+
+ /* Load scale factor & do multiplication */
+ lwz r5,CFG_TB_TO_XS(r9) /* load values */
+ lwz r6,(CFG_TB_TO_XS+4)(r9)
+ mulhwu r4,r7,r5
+ mulhwu r6,r7,r6
+ mullw r0,r7,r5
+ addc r6,r6,r0
+
+ /* At this point, we have the scaled xsec value in r4 + XER:CA
+ * we load & add the stamp since epoch
+ */
+ lwz r5,CFG_STAMP_XSEC(r9)
+ lwz r6,(CFG_STAMP_XSEC+4)(r9)
+ adde r4,r4,r6
+ addze r3,r5
+
+ /* We now have our result in r3,r4. We create a fake dependency
+ * on that result and re-check the counter
+ */
+ or r6,r4,r3
+ xor r0,r6,r6
+ add r9,r9,r0
+#ifdef CONFIG_PPC64
+ lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9)
+#else
+ lwz r0,(CFG_TB_UPDATE_COUNT)(r9)
+#endif
+ cmpl cr0,r8,r0 /* check if updated */
+ bne- 1b
+
+ /* Warning ! The caller expects CR:EQ to be set to indicate a
+ * successful calculation (so it won't fallback to the syscall
+ * method). We have overriden that CR bit in the counter check,
+ * but fortunately, the loop exit condition _is_ CR:EQ set, so
+ * we can exit safely here. If you change this code, be careful
+ * of that side effect.
+ */
+3: blr
+ .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso32/note.S
new file mode 100644
index 00000000000..d4b5be4f3d5
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/note.S
@@ -0,0 +1,25 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+
+#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \
+ .section name, flags; \
+ .balign 4; \
+ .long 1f - 0f; /* name length */ \
+ .long 3f - 2f; /* data length */ \
+ .long type; /* note type */ \
+0: .asciz vendor; /* vendor name */ \
+1: .balign 4; \
+2:
+
+#define ASM_ELF_NOTE_END \
+3: .balign 4; /* pad out section */ \
+ .previous
+
+ ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
+ .long LINUX_VERSION_CODE
+ ASM_ELF_NOTE_END
diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso32/sigtramp.S
new file mode 100644
index 00000000000..e0464278191
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/sigtramp.S
@@ -0,0 +1,300 @@
+/*
+ * Signal trampolines for 32 bits processes in a ppc64 kernel for
+ * use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+ .text
+
+/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
+ the return address to get an address in the middle of the presumed
+ call instruction. Since we don't have a call here, we artifically
+ extend the range covered by the unwind info by adding a nop before
+ the real start. */
+ nop
+V_FUNCTION_BEGIN(__kernel_sigtramp32)
+.Lsig_start = . - 4
+ li r0,__NR_sigreturn
+ sc
+.Lsig_end:
+V_FUNCTION_END(__kernel_sigtramp32)
+
+.Lsigrt_start:
+ nop
+V_FUNCTION_BEGIN(__kernel_sigtramp_rt32)
+ li r0,__NR_rt_sigreturn
+ sc
+.Lsigrt_end:
+V_FUNCTION_END(__kernel_sigtramp_rt32)
+
+ .section .eh_frame,"a",@progbits
+
+/* Register r1 can be found at offset 4 of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define cfa_save \
+ .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+9:
+
+/* Register REGNO can be found at offset OFS of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define rsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .ifne ofs; \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+ .endif; \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. The VMX reg struct is at offset VREGS of
+ the pt_regs struct. This macro is for REGNO == 0, and contains
+ 'subroutines' that the other macros jump to. */
+#define vsave_msr0(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit0 */ \
+2: \
+ .byte 0x40; /* DW_OP_lit16 */ \
+ .byte 0x1e; /* DW_OP_mul */ \
+3: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x12; /* DW_OP_dup */ \
+ .byte 0x23; /* DW_OP_plus_uconst */ \
+ .uleb128 33*RSIZE; /* msr offset */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \
+ .byte 0x1a; /* DW_OP_and */ \
+ .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \
+ .byte 0x30; /* DW_OP_lit0 */ \
+ .byte 0x29; /* DW_OP_eq */ \
+ .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \
+ .byte 0x13; /* DW_OP_drop, pop the 0 */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x22; /* DW_OP_plus */ \
+ .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. REGNO is 1 thru 31. */
+#define vsave_msr1(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit n */ \
+ .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
+ the VMX save block. */
+#define vsave_msr2(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x0a; .short ofs; /* DW_OP_const2u */ \
+ .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \
+9:
+
+/* VMX register REGNO is at offset OFS of the VMX save area. */
+#define vsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+9:
+
+/* This is where the pt_regs pointer can be found on the stack. */
+#define PTREGS 64+28
+
+/* Size of regs. */
+#define RSIZE 4
+
+/* This is the offset of the VMX regs. */
+#define VREGS 48*RSIZE+34*8
+
+/* Describe where general purpose regs are saved. */
+#define EH_FRAME_GEN \
+ cfa_save; \
+ rsave ( 0, 0*RSIZE); \
+ rsave ( 2, 2*RSIZE); \
+ rsave ( 3, 3*RSIZE); \
+ rsave ( 4, 4*RSIZE); \
+ rsave ( 5, 5*RSIZE); \
+ rsave ( 6, 6*RSIZE); \
+ rsave ( 7, 7*RSIZE); \
+ rsave ( 8, 8*RSIZE); \
+ rsave ( 9, 9*RSIZE); \
+ rsave (10, 10*RSIZE); \
+ rsave (11, 11*RSIZE); \
+ rsave (12, 12*RSIZE); \
+ rsave (13, 13*RSIZE); \
+ rsave (14, 14*RSIZE); \
+ rsave (15, 15*RSIZE); \
+ rsave (16, 16*RSIZE); \
+ rsave (17, 17*RSIZE); \
+ rsave (18, 18*RSIZE); \
+ rsave (19, 19*RSIZE); \
+ rsave (20, 20*RSIZE); \
+ rsave (21, 21*RSIZE); \
+ rsave (22, 22*RSIZE); \
+ rsave (23, 23*RSIZE); \
+ rsave (24, 24*RSIZE); \
+ rsave (25, 25*RSIZE); \
+ rsave (26, 26*RSIZE); \
+ rsave (27, 27*RSIZE); \
+ rsave (28, 28*RSIZE); \
+ rsave (29, 29*RSIZE); \
+ rsave (30, 30*RSIZE); \
+ rsave (31, 31*RSIZE); \
+ rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \
+ rsave (65, 36*RSIZE); /* lr */ \
+ rsave (70, 38*RSIZE) /* cr */
+
+/* Describe where the FP regs are saved. */
+#define EH_FRAME_FP \
+ rsave (32, 48*RSIZE + 0*8); \
+ rsave (33, 48*RSIZE + 1*8); \
+ rsave (34, 48*RSIZE + 2*8); \
+ rsave (35, 48*RSIZE + 3*8); \
+ rsave (36, 48*RSIZE + 4*8); \
+ rsave (37, 48*RSIZE + 5*8); \
+ rsave (38, 48*RSIZE + 6*8); \
+ rsave (39, 48*RSIZE + 7*8); \
+ rsave (40, 48*RSIZE + 8*8); \
+ rsave (41, 48*RSIZE + 9*8); \
+ rsave (42, 48*RSIZE + 10*8); \
+ rsave (43, 48*RSIZE + 11*8); \
+ rsave (44, 48*RSIZE + 12*8); \
+ rsave (45, 48*RSIZE + 13*8); \
+ rsave (46, 48*RSIZE + 14*8); \
+ rsave (47, 48*RSIZE + 15*8); \
+ rsave (48, 48*RSIZE + 16*8); \
+ rsave (49, 48*RSIZE + 17*8); \
+ rsave (50, 48*RSIZE + 18*8); \
+ rsave (51, 48*RSIZE + 19*8); \
+ rsave (52, 48*RSIZE + 20*8); \
+ rsave (53, 48*RSIZE + 21*8); \
+ rsave (54, 48*RSIZE + 22*8); \
+ rsave (55, 48*RSIZE + 23*8); \
+ rsave (56, 48*RSIZE + 24*8); \
+ rsave (57, 48*RSIZE + 25*8); \
+ rsave (58, 48*RSIZE + 26*8); \
+ rsave (59, 48*RSIZE + 27*8); \
+ rsave (60, 48*RSIZE + 28*8); \
+ rsave (61, 48*RSIZE + 29*8); \
+ rsave (62, 48*RSIZE + 30*8); \
+ rsave (63, 48*RSIZE + 31*8)
+
+/* Describe where the VMX regs are saved. */
+#ifdef CONFIG_ALTIVEC
+#define EH_FRAME_VMX \
+ vsave_msr0 ( 0); \
+ vsave_msr1 ( 1); \
+ vsave_msr1 ( 2); \
+ vsave_msr1 ( 3); \
+ vsave_msr1 ( 4); \
+ vsave_msr1 ( 5); \
+ vsave_msr1 ( 6); \
+ vsave_msr1 ( 7); \
+ vsave_msr1 ( 8); \
+ vsave_msr1 ( 9); \
+ vsave_msr1 (10); \
+ vsave_msr1 (11); \
+ vsave_msr1 (12); \
+ vsave_msr1 (13); \
+ vsave_msr1 (14); \
+ vsave_msr1 (15); \
+ vsave_msr1 (16); \
+ vsave_msr1 (17); \
+ vsave_msr1 (18); \
+ vsave_msr1 (19); \
+ vsave_msr1 (20); \
+ vsave_msr1 (21); \
+ vsave_msr1 (22); \
+ vsave_msr1 (23); \
+ vsave_msr1 (24); \
+ vsave_msr1 (25); \
+ vsave_msr1 (26); \
+ vsave_msr1 (27); \
+ vsave_msr1 (28); \
+ vsave_msr1 (29); \
+ vsave_msr1 (30); \
+ vsave_msr1 (31); \
+ vsave_msr2 (33, 32*16+12); \
+ vsave (32, 32*16)
+#else
+#define EH_FRAME_VMX
+#endif
+
+.Lcie:
+ .long .Lcie_end - .Lcie_start
+.Lcie_start:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zR" /* NUL-terminated augmentation string */
+ .uleb128 4 /* Code alignment factor */
+ .sleb128 -4 /* Data alignment factor */
+ .byte 67 /* Return address register column, ap */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
+ .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */
+ .balign 4
+.Lcie_end:
+
+ .long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+ .long .Lfde0_start - .Lcie /* CIE pointer. */
+ .long .Lsig_start - . /* PC start, length */
+ .long .Lsig_end - .Lsig_start
+ .uleb128 0 /* Augmentation */
+ EH_FRAME_GEN
+ EH_FRAME_FP
+ EH_FRAME_VMX
+ .balign 4
+.Lfde0_end:
+
+/* We have a different stack layout for rt_sigreturn. */
+#undef PTREGS
+#define PTREGS 64+16+128+20+28
+
+ .long .Lfde1_end - .Lfde1_start
+.Lfde1_start:
+ .long .Lfde1_start - .Lcie /* CIE pointer. */
+ .long .Lsigrt_start - . /* PC start, length */
+ .long .Lsigrt_end - .Lsigrt_start
+ .uleb128 0 /* Augmentation */
+ EH_FRAME_GEN
+ EH_FRAME_FP
+ EH_FRAME_VMX
+ .balign 4
+.Lfde1_end:
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 00000000000..f4bad720cb0
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,117 @@
+
+/*
+ * This is the infamous ld script for the 32 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+
+/* Default link addresses for the vDSOs */
+OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc")
+OUTPUT_ARCH(powerpc:common)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO32_LBASE + SIZEOF_HEADERS;
+ .hash : { *(.hash) } :text
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN (16);
+ .text :
+ {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+
+ /* Other stuff is appended to the text segment: */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table) }
+ .fixup : { *(.fixup) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+ .got : { *(.got) }
+ .plt : { *(.plt) }
+
+ _end = .;
+ __end = .;
+ PROVIDE (end = .);
+
+
+ /* Stabs debugging sections are here too
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+
+ /DISCARD/ : { *(.note.GNU-stack) }
+ /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) }
+ /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
+
+
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+}
+
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ __kernel_datapage_offset; /* Has to be there for the kernel to find */
+ __kernel_get_syscall_map;
+ __kernel_gettimeofday;
+ __kernel_clock_gettime;
+ __kernel_clock_getres;
+ __kernel_get_tbfreq;
+ __kernel_sync_dicache;
+ __kernel_sync_dicache_p5;
+ __kernel_sigtramp32;
+ __kernel_sigtramp_rt32;
+ local: *;
+ };
+}
diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 00000000000..556f0caa5d8
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,13 @@
+#include <linux/init.h>
+#include <asm/page.h>
+
+ .section ".data.page_aligned"
+
+ .globl vdso32_start, vdso32_end
+ .balign PAGE_SIZE
+vdso32_start:
+ .incbin "arch/powerpc/kernel/vdso32/vdso32.so"
+ .balign PAGE_SIZE
+vdso32_end:
+
+ .previous
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
new file mode 100644
index 00000000000..ab39988452c
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -0,0 +1,35 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o
+
+# Build rules
+
+targets := $(obj-vdso64) vdso64.so
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+
+EXTRA_CFLAGS := -shared -s -fno-common -fno-builtin
+EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1
+EXTRA_AFLAGS := -D__VDSO64__ -s
+
+obj-y += vdso64_wrapper.o
+extra-y += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+
+# Force dependency (incbin is bad)
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64)
+ $(call if_changed,vdso64ld)
+
+# assembly rules for the .S files
+$(obj-vdso64): %.o: %.S
+ $(call if_changed_dep,vdso64as)
+
+# actual build commands
+quiet_cmd_vdso64ld = VDSO64L $@
+ cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso64as = VDSO64A $@
+ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+
+
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
new file mode 100644
index 00000000000..d4a0ad28d53
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/cacheflush.S
@@ -0,0 +1,66 @@
+/*
+ * vDSO provided cache flush routines
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+
+ .text
+
+/*
+ * Default "generic" version of __kernel_sync_dicache.
+ *
+ * void __kernel_sync_dicache(unsigned long start, unsigned long end)
+ *
+ * Flushes the data cache & invalidate the instruction cache for the
+ * provided range [start, end[
+ *
+ * Note: all CPUs supported by this kernel have a 128 bytes cache
+ * line size so we don't have to peek that info from the datapage
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache)
+ .cfi_startproc
+ li r5,127
+ andc r6,r3,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5 /* ensure we get enough */
+ srwi. r8,r8,7 /* compute line count */
+ beqlr /* nothing to do? */
+ mtctr r8
+ mr r3,r6
+1: dcbst 0,r3
+ addi r3,r3,128
+ bdnz 1b
+ sync
+ mtctr r8
+1: icbi 0,r6
+ addi r6,r6,128
+ bdnz 1b
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache)
+
+
+/*
+ * POWER5 version of __kernel_sync_dicache
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
+ .cfi_startproc
+ sync
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache_p5)
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
new file mode 100644
index 00000000000..6393e4137bc
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -0,0 +1,85 @@
+/*
+ * Access to the shared data page by the vDSO & syscall map
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+ .text
+V_FUNCTION_BEGIN(__get_datapage)
+ .cfi_startproc
+ /* We don't want that exposed or overridable as we want other objects
+ * to be able to bl directly to here
+ */
+ .protected __get_datapage
+ .hidden __get_datapage
+
+ mflr r0
+ .cfi_register lr,r0
+
+ bcl 20,31,1f
+ .global __kernel_datapage_offset;
+__kernel_datapage_offset:
+ .long 0
+1:
+ mflr r3
+ mtlr r0
+ lwz r0,0(r3)
+ add r3,r0,r3
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__get_datapage)
+
+/*
+ * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
+ *
+ * returns a pointer to the syscall map. the map is agnostic to the
+ * size of "long", unlike kernel bitops, it stores bits from top to
+ * bottom so that memory actually contains a linear bitmap
+ * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
+ * 32 bits int at N >> 5.
+ */
+V_FUNCTION_BEGIN(__kernel_get_syscall_map)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+
+ mr r4,r3
+ bl V_LOCAL_FUNC(__get_datapage)
+ mtlr r12
+ addi r3,r3,CFG_SYSCALL_MAP64
+ cmpli cr0,r4,0
+ beqlr
+ li r0,__NR_syscalls
+ stw r0,0(r4)
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_syscall_map)
+
+
+/*
+ * void unsigned long __kernel_get_tbfreq(void);
+ *
+ * returns the timebase frequency in HZ
+ */
+V_FUNCTION_BEGIN(__kernel_get_tbfreq)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+ bl V_LOCAL_FUNC(__get_datapage)
+ ld r3,CFG_TB_TICKS_PER_SEC(r3)
+ mtlr r12
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
new file mode 100644
index 00000000000..1a89094715c
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -0,0 +1,249 @@
+
+ /*
+ * Userland implementation of gettimeofday() for 64 bits processes in a
+ * ppc64 kernel for use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+/*
+ * Exact prototype of gettimeofday
+ *
+ * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_gettimeofday)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+
+ mr r11,r3 /* r11 holds tv */
+ mr r10,r4 /* r10 holds tz */
+ bl V_LOCAL_FUNC(__get_datapage) /* get data page */
+ bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
+ lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
+ ori r7,r7,16960
+ rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
+ rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
+ std r5,TVAL64_TV_SEC(r11) /* store sec in tv */
+ subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
+ mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
+ * XSEC_PER_SEC
+ */
+ rldicl r0,r0,44,20
+ cmpldi cr0,r10,0 /* check if tz is NULL */
+ std r0,TVAL64_TV_USEC(r11) /* store usec in tv */
+ beq 1f
+ lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */
+ lwz r5,CFG_TZ_DSTTIME(r3)
+ stw r4,TZONE_TZ_MINWEST(r10)
+ stw r5,TZONE_TZ_DSTTIME(r10)
+1: mtlr r12
+ li r3,0 /* always success */
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_gettimeofday)
+
+
+/*
+ * Exact prototype of clock_gettime()
+ *
+ * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_gettime)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpwi cr0,r3,CLOCK_REALTIME
+ cmpwi cr1,r3,CLOCK_MONOTONIC
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+ bne cr0,99f
+
+ mflr r12 /* r12 saves lr */
+ .cfi_register lr,r12
+ mr r10,r3 /* r10 saves id */
+ mr r11,r4 /* r11 saves tp */
+ bl V_LOCAL_FUNC(__get_datapage) /* get data page */
+ beq cr1,50f /* if monotonic -> jump there */
+
+ /*
+ * CLOCK_REALTIME
+ */
+
+ bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
+
+ lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
+ ori r7,r7,16960
+ rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
+ rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
+ std r5,TSPC64_TV_SEC(r11) /* store sec in tv */
+ subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
+ mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
+ * XSEC_PER_SEC
+ */
+ rldicl r0,r0,44,20
+ mulli r0,r0,1000 /* nsec = usec * 1000 */
+ std r0,TSPC64_TV_NSEC(r11) /* store nsec in tp */
+
+ mtlr r12
+ li r3,0
+ blr
+
+ /*
+ * CLOCK_MONOTONIC
+ */
+
+50: bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
+
+ lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
+ ori r7,r7,16960
+ rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
+ rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
+ subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
+ mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
+ * XSEC_PER_SEC
+ */
+ rldicl r6,r0,44,20
+ mulli r6,r6,1000 /* nsec = usec * 1000 */
+
+ /* now we must fixup using wall to monotonic. We need to snapshot
+ * that value and do the counter trick again. Fortunately, we still
+ * have the counter value in r8 that was returned by __do_get_xsec.
+ * At this point, r5,r6 contain our sec/nsec values.
+ * can be used
+ */
+
+ lwa r4,WTOM_CLOCK_SEC(r3)
+ lwa r7,WTOM_CLOCK_NSEC(r3)
+
+ /* We now have our result in r4,r7. We create a fake dependency
+ * on that result and re-check the counter
+ */
+ or r9,r4,r7
+ xor r0,r9,r9
+ add r3,r3,r0
+ ld r0,CFG_TB_UPDATE_COUNT(r3)
+ cmpld cr0,r0,r8 /* check if updated */
+ bne- 50b
+
+ /* Calculate and store result. Note that this mimmics the C code,
+ * which may cause funny results if nsec goes negative... is that
+ * possible at all ?
+ */
+ add r4,r4,r5
+ add r7,r7,r6
+ lis r9,NSEC_PER_SEC@h
+ ori r9,r9,NSEC_PER_SEC@l
+ cmpl cr0,r7,r9
+ cmpli cr1,r7,0
+ blt 1f
+ subf r7,r9,r7
+ addi r4,r4,1
+1: bge cr1,1f
+ addi r4,r4,-1
+ add r7,r7,r9
+1: std r4,TSPC64_TV_SEC(r11)
+ std r7,TSPC64_TV_NSEC(r11)
+
+ mtlr r12
+ li r3,0
+ blr
+
+ /*
+ * syscall fallback
+ */
+98:
+ mtlr r12
+ mr r3,r10
+ mr r4,r11
+99:
+ li r0,__NR_clock_gettime
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_gettime)
+
+
+/*
+ * Exact prototype of clock_getres()
+ *
+ * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_getres)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpwi cr0,r3,CLOCK_REALTIME
+ cmpwi cr1,r3,CLOCK_MONOTONIC
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+ bne cr0,99f
+
+ li r3,0
+ cmpli cr0,r4,0
+ beqlr
+ lis r5,CLOCK_REALTIME_RES@h
+ ori r5,r5,CLOCK_REALTIME_RES@l
+ std r3,TSPC64_TV_SEC(r4)
+ std r5,TSPC64_TV_NSEC(r4)
+ blr
+
+ /*
+ * syscall fallback
+ */
+99:
+ li r0,__NR_clock_getres
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_getres)
+
+
+/*
+ * This is the core of gettimeofday(), it returns the xsec
+ * value in r4 and expects the datapage ptr (non clobbered)
+ * in r3. clobbers r0,r4,r5,r6,r7,r8
+ * When returning, r8 contains the counter value that can be reused
+ */
+V_FUNCTION_BEGIN(__do_get_xsec)
+ .cfi_startproc
+ /* check for update count & load values */
+1: ld r8,CFG_TB_UPDATE_COUNT(r3)
+ andi. r0,r4,1 /* pending update ? loop */
+ bne- 1b
+ xor r0,r4,r4 /* create dependency */
+ add r3,r3,r0
+
+ /* Get TB & offset it */
+ mftb r7
+ ld r9,CFG_TB_ORIG_STAMP(r3)
+ subf r7,r9,r7
+
+ /* Scale result */
+ ld r5,CFG_TB_TO_XS(r3)
+ mulhdu r7,r7,r5
+
+ /* Add stamp since epoch */
+ ld r6,CFG_STAMP_XSEC(r3)
+ add r4,r6,r7
+
+ xor r0,r4,r4
+ add r3,r3,r0
+ ld r0,CFG_TB_UPDATE_COUNT(r3)
+ cmpld cr0,r0,r8 /* check if updated */
+ bne- 1b
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__do_get_xsec)
diff --git a/arch/powerpc/kernel/vdso64/note.S b/arch/powerpc/kernel/vdso64/note.S
new file mode 100644
index 00000000000..dc2a509f7e8
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/note.S
@@ -0,0 +1 @@
+#include "../vdso32/note.S"
diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S
new file mode 100644
index 00000000000..31b604ab56d
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/sigtramp.S
@@ -0,0 +1,295 @@
+/*
+ * Signal trampoline for 64 bits processes in a ppc64 kernel for
+ * use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+#include <asm/ptrace.h> /* XXX for __SIGNAL_FRAMESIZE */
+
+ .text
+
+/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
+ the return address to get an address in the middle of the presumed
+ call instruction. Since we don't have a call here, we artifically
+ extend the range covered by the unwind info by padding before the
+ real start. */
+ nop
+ .balign 8
+V_FUNCTION_BEGIN(__kernel_sigtramp_rt64)
+.Lsigrt_start = . - 4
+ addi r1, r1, __SIGNAL_FRAMESIZE
+ li r0,__NR_rt_sigreturn
+ sc
+.Lsigrt_end:
+V_FUNCTION_END(__kernel_sigtramp_rt64)
+/* The ".balign 8" above and the following zeros mimic the old stack
+ trampoline layout. The last magic value is the ucontext pointer,
+ chosen in such a way that older libgcc unwind code returns a zero
+ for a sigcontext pointer. */
+ .long 0,0,0
+ .quad 0,-21*8
+
+/* Register r1 can be found at offset 8 of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define cfa_save \
+ .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+9:
+
+/* Register REGNO can be found at offset OFS of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define rsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .ifne ofs; \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+ .endif; \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. A pointer to the VMX reg struct is at VREGS in
+ the pt_regs struct. This macro is for REGNO == 0, and contains
+ 'subroutines' that the other macros jump to. */
+#define vsave_msr0(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit0 */ \
+2: \
+ .byte 0x40; /* DW_OP_lit16 */ \
+ .byte 0x1e; /* DW_OP_mul */ \
+3: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x12; /* DW_OP_dup */ \
+ .byte 0x23; /* DW_OP_plus_uconst */ \
+ .uleb128 33*RSIZE; /* msr offset */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \
+ .byte 0x1a; /* DW_OP_and */ \
+ .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \
+ .byte 0x30; /* DW_OP_lit0 */ \
+ .byte 0x29; /* DW_OP_eq */ \
+ .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \
+ .byte 0x13; /* DW_OP_drop, pop the 0 */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x22; /* DW_OP_plus */ \
+ .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. REGNO is 1 thru 31. */
+#define vsave_msr1(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit n */ \
+ .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
+ the VMX save block. */
+#define vsave_msr2(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x0a; .short ofs; /* DW_OP_const2u */ \
+ .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \
+9:
+
+/* VMX register REGNO is at offset OFS of the VMX save area. */
+#define vsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+9:
+
+/* This is where the pt_regs pointer can be found on the stack. */
+#define PTREGS 128+168+56
+
+/* Size of regs. */
+#define RSIZE 8
+
+/* This is the offset of the VMX reg pointer. */
+#define VREGS 48*RSIZE+33*8
+
+/* Describe where general purpose regs are saved. */
+#define EH_FRAME_GEN \
+ cfa_save; \
+ rsave ( 0, 0*RSIZE); \
+ rsave ( 2, 2*RSIZE); \
+ rsave ( 3, 3*RSIZE); \
+ rsave ( 4, 4*RSIZE); \
+ rsave ( 5, 5*RSIZE); \
+ rsave ( 6, 6*RSIZE); \
+ rsave ( 7, 7*RSIZE); \
+ rsave ( 8, 8*RSIZE); \
+ rsave ( 9, 9*RSIZE); \
+ rsave (10, 10*RSIZE); \
+ rsave (11, 11*RSIZE); \
+ rsave (12, 12*RSIZE); \
+ rsave (13, 13*RSIZE); \
+ rsave (14, 14*RSIZE); \
+ rsave (15, 15*RSIZE); \
+ rsave (16, 16*RSIZE); \
+ rsave (17, 17*RSIZE); \
+ rsave (18, 18*RSIZE); \
+ rsave (19, 19*RSIZE); \
+ rsave (20, 20*RSIZE); \
+ rsave (21, 21*RSIZE); \
+ rsave (22, 22*RSIZE); \
+ rsave (23, 23*RSIZE); \
+ rsave (24, 24*RSIZE); \
+ rsave (25, 25*RSIZE); \
+ rsave (26, 26*RSIZE); \
+ rsave (27, 27*RSIZE); \
+ rsave (28, 28*RSIZE); \
+ rsave (29, 29*RSIZE); \
+ rsave (30, 30*RSIZE); \
+ rsave (31, 31*RSIZE); \
+ rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \
+ rsave (65, 36*RSIZE); /* lr */ \
+ rsave (70, 38*RSIZE) /* cr */
+
+/* Describe where the FP regs are saved. */
+#define EH_FRAME_FP \
+ rsave (32, 48*RSIZE + 0*8); \
+ rsave (33, 48*RSIZE + 1*8); \
+ rsave (34, 48*RSIZE + 2*8); \
+ rsave (35, 48*RSIZE + 3*8); \
+ rsave (36, 48*RSIZE + 4*8); \
+ rsave (37, 48*RSIZE + 5*8); \
+ rsave (38, 48*RSIZE + 6*8); \
+ rsave (39, 48*RSIZE + 7*8); \
+ rsave (40, 48*RSIZE + 8*8); \
+ rsave (41, 48*RSIZE + 9*8); \
+ rsave (42, 48*RSIZE + 10*8); \
+ rsave (43, 48*RSIZE + 11*8); \
+ rsave (44, 48*RSIZE + 12*8); \
+ rsave (45, 48*RSIZE + 13*8); \
+ rsave (46, 48*RSIZE + 14*8); \
+ rsave (47, 48*RSIZE + 15*8); \
+ rsave (48, 48*RSIZE + 16*8); \
+ rsave (49, 48*RSIZE + 17*8); \
+ rsave (50, 48*RSIZE + 18*8); \
+ rsave (51, 48*RSIZE + 19*8); \
+ rsave (52, 48*RSIZE + 20*8); \
+ rsave (53, 48*RSIZE + 21*8); \
+ rsave (54, 48*RSIZE + 22*8); \
+ rsave (55, 48*RSIZE + 23*8); \
+ rsave (56, 48*RSIZE + 24*8); \
+ rsave (57, 48*RSIZE + 25*8); \
+ rsave (58, 48*RSIZE + 26*8); \
+ rsave (59, 48*RSIZE + 27*8); \
+ rsave (60, 48*RSIZE + 28*8); \
+ rsave (61, 48*RSIZE + 29*8); \
+ rsave (62, 48*RSIZE + 30*8); \
+ rsave (63, 48*RSIZE + 31*8)
+
+/* Describe where the VMX regs are saved. */
+#ifdef CONFIG_ALTIVEC
+#define EH_FRAME_VMX \
+ vsave_msr0 ( 0); \
+ vsave_msr1 ( 1); \
+ vsave_msr1 ( 2); \
+ vsave_msr1 ( 3); \
+ vsave_msr1 ( 4); \
+ vsave_msr1 ( 5); \
+ vsave_msr1 ( 6); \
+ vsave_msr1 ( 7); \
+ vsave_msr1 ( 8); \
+ vsave_msr1 ( 9); \
+ vsave_msr1 (10); \
+ vsave_msr1 (11); \
+ vsave_msr1 (12); \
+ vsave_msr1 (13); \
+ vsave_msr1 (14); \
+ vsave_msr1 (15); \
+ vsave_msr1 (16); \
+ vsave_msr1 (17); \
+ vsave_msr1 (18); \
+ vsave_msr1 (19); \
+ vsave_msr1 (20); \
+ vsave_msr1 (21); \
+ vsave_msr1 (22); \
+ vsave_msr1 (23); \
+ vsave_msr1 (24); \
+ vsave_msr1 (25); \
+ vsave_msr1 (26); \
+ vsave_msr1 (27); \
+ vsave_msr1 (28); \
+ vsave_msr1 (29); \
+ vsave_msr1 (30); \
+ vsave_msr1 (31); \
+ vsave_msr2 (33, 32*16+12); \
+ vsave (32, 33*16)
+#else
+#define EH_FRAME_VMX
+#endif
+
+ .section .eh_frame,"a",@progbits
+.Lcie:
+ .long .Lcie_end - .Lcie_start
+.Lcie_start:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zR" /* NUL-terminated augmentation string */
+ .uleb128 4 /* Code alignment factor */
+ .sleb128 -8 /* Data alignment factor */
+ .byte 67 /* Return address register column, ap */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x14 /* DW_EH_PE_pcrel | DW_EH_PE_udata8. */
+ .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */
+ .balign 8
+.Lcie_end:
+
+ .long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+ .long .Lfde0_start - .Lcie /* CIE pointer. */
+ .quad .Lsigrt_start - . /* PC start, length */
+ .quad .Lsigrt_end - .Lsigrt_start
+ .uleb128 0 /* Augmentation */
+ EH_FRAME_GEN
+ EH_FRAME_FP
+ EH_FRAME_VMX
+# Do we really need to describe the frame at this point? ie. will
+# we ever have some call chain that returns somewhere past the addi?
+# I don't think so, since gcc doesn't support async signals.
+# .byte 0x41 /* DW_CFA_advance_loc 1*4 */
+#undef PTREGS
+#define PTREGS 168+56
+# EH_FRAME_GEN
+# EH_FRAME_FP
+# EH_FRAME_VMX
+ .balign 8
+.Lfde0_end:
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S
new file mode 100644
index 00000000000..4bdf224464a
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S
@@ -0,0 +1,116 @@
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc")
+OUTPUT_ARCH(powerpc:common64)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO64_LBASE + SIZEOF_HEADERS;
+ .hash : { *(.hash) } :text
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN (16);
+ .text :
+ {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ *(.sfpr .glink)
+ } :text
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+
+ /* Other stuff is appended to the text segment: */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table) }
+
+ .opd ALIGN(8) : { KEEP (*(.opd)) }
+ .got ALIGN(8) : { *(.got .toc) }
+ .rela.dyn ALIGN(8) : { *(.rela.dyn) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ _end = .;
+ PROVIDE (end = .);
+
+ /* Stabs debugging sections are here too
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sectio/ns.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+
+ /DISCARD/ : { *(.note.GNU-stack) }
+ /DISCARD/ : { *(.branch_lt) }
+ /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.*) }
+ /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
+
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ __kernel_datapage_offset; /* Has to be there for the kernel to find */
+ __kernel_get_syscall_map;
+ __kernel_gettimeofday;
+ __kernel_clock_gettime;
+ __kernel_clock_getres;
+ __kernel_get_tbfreq;
+ __kernel_sync_dicache;
+ __kernel_sync_dicache_p5;
+ __kernel_sigtramp_rt64;
+ local: *;
+ };
+}
diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
new file mode 100644
index 00000000000..0529cb9e3b9
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
@@ -0,0 +1,13 @@
+#include <linux/init.h>
+#include <asm/page.h>
+
+ .section ".data.page_aligned"
+
+ .globl vdso64_start, vdso64_end
+ .balign PAGE_SIZE
+vdso64_start:
+ .incbin "arch/powerpc/kernel/vdso64/vdso64.so"
+ .balign PAGE_SIZE
+vdso64_end:
+
+ .previous
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 97082a4203a..71a6addf9f7 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -21,6 +21,7 @@
#include <asm/iommu.h>
#include <asm/dma.h>
#include <asm/vio.h>
+#include <asm/prom.h>
static const struct vio_device_id *vio_match_device(
const struct vio_device_id *, const struct vio_dev *);
@@ -265,7 +266,33 @@ static int vio_bus_match(struct device *dev, struct device_driver *drv)
return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
}
+static int vio_hotplug(struct device *dev, char **envp, int num_envp,
+ char *buffer, int buffer_size)
+{
+ const struct vio_dev *vio_dev = to_vio_dev(dev);
+ char *cp;
+ int length;
+
+ if (!num_envp)
+ return -ENOMEM;
+
+ if (!vio_dev->dev.platform_data)
+ return -ENODEV;
+ cp = (char *)get_property(vio_dev->dev.platform_data, "compatible", &length);
+ if (!cp)
+ return -ENODEV;
+
+ envp[0] = buffer;
+ length = scnprintf(buffer, buffer_size, "MODALIAS=vio:T%sS%s",
+ vio_dev->type, cp);
+ if (buffer_size - length <= 0)
+ return -ENOMEM;
+ envp[1] = NULL;
+ return 0;
+}
+
struct bus_type vio_bus_type = {
.name = "vio",
+ .hotplug = vio_hotplug,
.match = vio_bus_match,
};
diff --git a/arch/powerpc/lib/bitops.c b/arch/powerpc/lib/bitops.c
index b67ce3004eb..f68ad71a018 100644
--- a/arch/powerpc/lib/bitops.c
+++ b/arch/powerpc/lib/bitops.c
@@ -41,7 +41,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
tmp = *p;
found_first:
- tmp &= (~0UL >> (64 - size));
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found_middle:
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 733d61618bb..40523b14010 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -11,7 +11,7 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
-_GLOBAL(copy_page)
+_GLOBAL(copy_4K_page)
std r31,-8(1)
std r30,-16(1)
std r29,-24(1)
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index a0b3fbbd6fb..6d69ef39b7d 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user)
std r4,-16(r1)
std r5,-8(r1)
dcbt 0,r4
- beq .Lcopy_page
+ beq .Lcopy_page_4K
andi. r6,r6,7
mtcrf 0x01,r5
blt cr1,.Lshort_copy
@@ -366,7 +366,7 @@ _GLOBAL(__copy_tofrom_user)
* above (following the .Ldst_aligned label) but it runs slightly
* slower on POWER3.
*/
-.Lcopy_page:
+.Lcopy_page_4K:
std r31,-32(1)
std r30,-40(1)
std r29,-48(1)
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 2a912f411eb..35bd03c41dd 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -23,6 +23,7 @@
#if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES)
#include <asm/hvcall.h>
#include <asm/iseries/hv_call.h>
+#include <asm/smp.h>
void __spin_yield(raw_spinlock_t *lock)
{
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 841d8b6323a..93d4fbfdb72 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -389,5 +389,22 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
}
/* kernel has accessed a bad area */
+
+ printk(KERN_ALERT "Unable to handle kernel paging request for ");
+ switch (regs->trap) {
+ case 0x300:
+ case 0x380:
+ printk("data at address 0x%08lx\n", regs->dar);
+ break;
+ case 0x400:
+ case 0x480:
+ printk("instruction fetch\n");
+ break;
+ default:
+ printk("unknown fault\n");
+ }
+ printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
+ regs->nip);
+
die("Kernel access of bad area", regs, sig);
}
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index af9ca0eb6d5..5d581bb3aa1 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -1,5 +1,5 @@
/*
- * Modifications by Kumar Gala (kumar.gala@freescale.com) to support
+ * Modifications by Kumar Gala (galak@kernel.crashing.org) to support
* E500 Book E processors.
*
* Copyright 2004 Freescale Semiconductor, Inc
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index d6ed9102eee..e0d02c4a261 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -1,7 +1,7 @@
/*
* ppc64 MMU hashtable management routines
*
- * (c) Copyright IBM Corp. 2003
+ * (c) Copyright IBM Corp. 2003, 2005
*
* Maintained by: Benjamin Herrenschmidt
* <benh@kernel.crashing.org>
@@ -10,6 +10,7 @@
* described in the kernel's COPYING file.
*/
+#include <linux/config.h>
#include <asm/reg.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
@@ -42,14 +43,24 @@
/* Save non-volatile offsets */
#define STK_REG(i) (112 + ((i)-14)*8)
+
+#ifndef CONFIG_PPC_64K_PAGES
+
+/*****************************************************************************
+ * *
+ * 4K SW & 4K HW pages implementation *
+ * *
+ *****************************************************************************/
+
+
/*
- * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
- * pte_t *ptep, unsigned long trap, int local)
+ * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+ * pte_t *ptep, unsigned long trap, int local)
*
- * Adds a page to the hash table. This is the non-LPAR version for now
+ * Adds a 4K page to the hash table in a segment of 4K pages only
*/
-_GLOBAL(__hash_page)
+_GLOBAL(__hash_page_4K)
mflr r0
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
@@ -88,7 +99,8 @@ _GLOBAL(__hash_page)
/* If so, just bail out and refault if needed. Someone else
* is changing this PTE anyway and might hash it.
*/
- bne- bail_ok
+ bne- htab_bail_ok
+
/* Prepare new PTE value (turn access RW into DIRTY, then
* add BUSY,HASHPTE and ACCESSED)
*/
@@ -118,10 +130,10 @@ _GLOBAL(__hash_page)
/* Convert linux PTE bits into HW equivalents */
andi. r3,r30,0x1fe /* Get basic set of flags */
- xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
- and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
andc r0,r30,r0 /* r0 = pte & ~r0 */
rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
@@ -158,19 +170,21 @@ htab_insert_pte:
andc r30,r30,r0
ori r30,r30,_PAGE_HASHPTE
- /* page number in r5 */
- rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT
+ /* physical address r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
/* Calculate primary group hash */
and r0,r28,r27
- rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ rldicr r3,r0,3,63-3 /* r3 = (hash & mask) << 3 */
/* Call ppc_md.hpte_insert */
- ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
mr r4,r29 /* Retreive va */
- li r6,0 /* no vflags */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_4K /* page size */
_GLOBAL(htab_call_hpte_insert1)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
cmpdi 0,r3,0
bge htab_pte_insert_ok /* Insertion successful */
cmpdi 0,r3,-2 /* Critical failure */
@@ -178,19 +192,21 @@ _GLOBAL(htab_call_hpte_insert1)
/* Now try secondary slot */
- /* page number in r5 */
- rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT
+ /* physical address r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
/* Calculate secondary group hash */
andc r0,r27,r28
rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
/* Call ppc_md.hpte_insert */
- ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
mr r4,r29 /* Retreive va */
- li r6,HPTE_V_SECONDARY@l /* secondary slot */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_4K /* page size */
_GLOBAL(htab_call_hpte_insert2)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
cmpdi 0,r3,0
bge+ htab_pte_insert_ok /* Insertion successful */
cmpdi 0,r3,-2 /* Critical failure */
@@ -207,14 +223,14 @@ _GLOBAL(htab_call_hpte_insert2)
rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
/* Call ppc_md.hpte_remove */
_GLOBAL(htab_call_hpte_remove)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
/* Try all again */
b htab_insert_pte
-bail_ok:
+htab_bail_ok:
li r3,0
- b bail
+ b htab_bail
htab_pte_insert_ok:
/* Insert slot number & secondary bit in PTE */
@@ -227,7 +243,7 @@ htab_write_out_pte:
ld r6,STK_PARM(r6)(r1)
std r30,0(r6)
li r3, 0
-bail:
+htab_bail:
ld r27,STK_REG(r27)(r1)
ld r28,STK_REG(r28)(r1)
ld r29,STK_REG(r29)(r1)
@@ -256,10 +272,10 @@ htab_modify_pte:
/* Call ppc_md.hpte_updatepp */
mr r5,r29 /* va */
- li r6,0 /* large is 0 */
+ li r6,MMU_PAGE_4K /* page size */
ld r7,STK_PARM(r8)(r1) /* get "local" param */
_GLOBAL(htab_call_hpte_updatepp)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
/* if we failed because typically the HPTE wasn't really here
* we try an insertion.
@@ -276,13 +292,556 @@ htab_wrong_access:
/* Bail out clearing reservation */
stdcx. r31,0,r6
li r3,1
- b bail
+ b htab_bail
+
+htab_pte_insert_failure:
+ /* Bail out restoring old PTE */
+ ld r6,STK_PARM(r6)(r1)
+ std r31,0(r6)
+ li r3,-1
+ b htab_bail
+
+
+#else /* CONFIG_PPC_64K_PAGES */
+
+
+/*****************************************************************************
+ * *
+ * 64K SW & 4K or 64K HW in a 4K segment pages implementation *
+ * *
+ *****************************************************************************/
+
+/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+ * pte_t *ptep, unsigned long trap, int local)
+ */
+
+/*
+ * For now, we do NOT implement Admixed pages
+ */
+_GLOBAL(__hash_page_4K)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ /* Save all params that we need after a function call */
+ std r6,STK_PARM(r6)(r1)
+ std r8,STK_PARM(r8)(r1)
+
+ /* Add _PAGE_PRESENT to access */
+ ori r4,r4,_PAGE_PRESENT
+
+ /* Save non-volatile registers.
+ * r31 will hold "old PTE"
+ * r30 is "new PTE"
+ * r29 is "va"
+ * r28 is a hash value
+ * r27 is hashtab mask (maybe dynamic patched instead ?)
+ * r26 is the hidx mask
+ * r25 is the index in combo page
+ */
+ std r25,STK_REG(r25)(r1)
+ std r26,STK_REG(r26)(r1)
+ std r27,STK_REG(r27)(r1)
+ std r28,STK_REG(r28)(r1)
+ std r29,STK_REG(r29)(r1)
+ std r30,STK_REG(r30)(r1)
+ std r31,STK_REG(r31)(r1)
+
+ /* Step 1:
+ *
+ * Check permissions, atomically mark the linux PTE busy
+ * and hashed.
+ */
+1:
+ ldarx r31,0,r6
+ /* Check access rights (access & ~(pte_val(*ptep))) */
+ andc. r0,r4,r31
+ bne- htab_wrong_access
+ /* Check if PTE is busy */
+ andi. r0,r31,_PAGE_BUSY
+ /* If so, just bail out and refault if needed. Someone else
+ * is changing this PTE anyway and might hash it.
+ */
+ bne- htab_bail_ok
+ /* Prepare new PTE value (turn access RW into DIRTY, then
+ * add BUSY and ACCESSED)
+ */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
+ or r30,r30,r31
+ ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+ /* Write the linux PTE atomically (setting busy) */
+ stdcx. r30,0,r6
+ bne- 1b
+ isync
+
+ /* Step 2:
+ *
+ * Insert/Update the HPTE in the hash table. At this point,
+ * r4 (access) is re-useable, we use it for the new HPTE flags
+ */
+
+ /* Load the hidx index */
+ rldicl r25,r3,64-12,60
+
+ /* Calc va and put it in r29 */
+ rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */
+ rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */
+ or r29,r3,r29 /* r29 = va
+
+ /* Calculate hash value for primary slot and store it in r28 */
+ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
+ rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
+ xor r28,r5,r0
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+ rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
+ andc r0,r30,r0 /* r0 = pte & ~r0 */
+ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
+
+ /* We eventually do the icache sync here (maybe inline that
+ * code rather than call a C function...)
+ */
+BEGIN_FTR_SECTION
+ mr r4,r30
+ mr r5,r7
+ bl .hash_page_do_lazy_icache
+END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
+
+ /* At this point, r3 contains new PP bits, save them in
+ * place of "access" in the param area (sic)
+ */
+ std r3,STK_PARM(r4)(r1)
+
+ /* Get htab_hash_mask */
+ ld r4,htab_hash_mask@got(2)
+ ld r27,0(r4) /* htab_hash_mask -> r27 */
+
+ /* Check if we may already be in the hashtable, in this case, we
+ * go to out-of-line code to try to modify the HPTE. We look for
+ * the bit at (1 >> (index + 32))
+ */
+ andi. r0,r31,_PAGE_HASHPTE
+ li r26,0 /* Default hidx */
+ beq htab_insert_pte
+ ld r6,STK_PARM(r6)(r1)
+ ori r26,r6,0x8000 /* Load the hidx mask */
+ ld r26,0(r26)
+ addi r5,r25,36 /* Check actual HPTE_SUB bit, this */
+ rldcr. r0,r31,r5,0 /* must match pgtable.h definition */
+ bne htab_modify_pte
+
+htab_insert_pte:
+ /* real page number in r5, PTE RPN value + index */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
+ add r5,r5,r25
+ sldi r5,r5,HW_PAGE_SHIFT
+
+ /* Calculate primary group hash */
+ and r0,r28,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_4K /* page size */
+_GLOBAL(htab_call_hpte_insert1)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge htab_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- htab_pte_insert_failure
+
+ /* Now try secondary slot */
+
+ /* real page number in r5, PTE RPN value + index */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
+ add r5,r5,r25
+ sldi r5,r5,HW_PAGE_SHIFT
+
+ /* Calculate secondary group hash */
+ andc r0,r27,r28
+ rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_4K /* page size */
+_GLOBAL(htab_call_hpte_insert2)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge+ htab_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- htab_pte_insert_failure
+
+ /* Both are full, we need to evict something */
+ mftb r0
+ /* Pick a random group based on TB */
+ andi. r0,r0,1
+ mr r5,r28
+ bne 2f
+ not r5,r5
+2: and r0,r5,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ /* Call ppc_md.hpte_remove */
+_GLOBAL(htab_call_hpte_remove)
+ bl . /* patched by htab_finish_init() */
+
+ /* Try all again */
+ b htab_insert_pte
+
+htab_bail_ok:
+ li r3,0
+ b htab_bail
+
+htab_pte_insert_ok:
+ /* Insert slot number & secondary bit in PTE second half,
+ * clear _PAGE_BUSY and set approriate HPTE slot bit
+ */
+ ld r6,STK_PARM(r6)(r1)
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ /* HPTE SUB bit */
+ li r0,1
+ subfic r5,r25,27 /* Must match bit position in */
+ sld r0,r0,r5 /* pgtable.h */
+ or r30,r30,r0
+ /* hindx */
+ sldi r5,r25,2
+ sld r3,r3,r5
+ li r4,0xf
+ sld r4,r4,r5
+ andc r26,r26,r4
+ or r26,r26,r3
+ ori r5,r6,0x8000
+ std r26,0(r5)
+ lwsync
+ std r30,0(r6)
+ li r3, 0
+htab_bail:
+ ld r25,STK_REG(r25)(r1)
+ ld r26,STK_REG(r26)(r1)
+ ld r27,STK_REG(r27)(r1)
+ ld r28,STK_REG(r28)(r1)
+ ld r29,STK_REG(r29)(r1)
+ ld r30,STK_REG(r30)(r1)
+ ld r31,STK_REG(r31)(r1)
+ addi r1,r1,STACKFRAMESIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+htab_modify_pte:
+ /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
+ mr r4,r3
+ sldi r5,r25,2
+ srd r3,r26,r5
+
+ /* Secondary group ? if yes, get a inverted hash value */
+ mr r5,r28
+ andi. r0,r3,0x8 /* page secondary ? */
+ beq 1f
+ not r5,r5
+1: andi. r3,r3,0x7 /* extract idx alone */
+
+ /* Calculate proper slot value for ppc_md.hpte_updatepp */
+ and r0,r5,r27
+ rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ add r3,r0,r3 /* add slot idx */
+
+ /* Call ppc_md.hpte_updatepp */
+ mr r5,r29 /* va */
+ li r6,MMU_PAGE_4K /* page size */
+ ld r7,STK_PARM(r8)(r1) /* get "local" param */
+_GLOBAL(htab_call_hpte_updatepp)
+ bl . /* patched by htab_finish_init() */
+
+ /* if we failed because typically the HPTE wasn't really here
+ * we try an insertion.
+ */
+ cmpdi 0,r3,-1
+ beq- htab_insert_pte
+
+ /* Clear the BUSY bit and Write out the PTE */
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ ld r6,STK_PARM(r6)(r1)
+ std r30,0(r6)
+ li r3,0
+ b htab_bail
+
+htab_wrong_access:
+ /* Bail out clearing reservation */
+ stdcx. r31,0,r6
+ li r3,1
+ b htab_bail
htab_pte_insert_failure:
/* Bail out restoring old PTE */
ld r6,STK_PARM(r6)(r1)
std r31,0(r6)
li r3,-1
- b bail
+ b htab_bail
+
+
+/*****************************************************************************
+ * *
+ * 64K SW & 64K HW in a 64K segment pages implementation *
+ * *
+ *****************************************************************************/
+
+_GLOBAL(__hash_page_64K)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ /* Save all params that we need after a function call */
+ std r6,STK_PARM(r6)(r1)
+ std r8,STK_PARM(r8)(r1)
+
+ /* Add _PAGE_PRESENT to access */
+ ori r4,r4,_PAGE_PRESENT
+
+ /* Save non-volatile registers.
+ * r31 will hold "old PTE"
+ * r30 is "new PTE"
+ * r29 is "va"
+ * r28 is a hash value
+ * r27 is hashtab mask (maybe dynamic patched instead ?)
+ */
+ std r27,STK_REG(r27)(r1)
+ std r28,STK_REG(r28)(r1)
+ std r29,STK_REG(r29)(r1)
+ std r30,STK_REG(r30)(r1)
+ std r31,STK_REG(r31)(r1)
+
+ /* Step 1:
+ *
+ * Check permissions, atomically mark the linux PTE busy
+ * and hashed.
+ */
+1:
+ ldarx r31,0,r6
+ /* Check access rights (access & ~(pte_val(*ptep))) */
+ andc. r0,r4,r31
+ bne- ht64_wrong_access
+ /* Check if PTE is busy */
+ andi. r0,r31,_PAGE_BUSY
+ /* If so, just bail out and refault if needed. Someone else
+ * is changing this PTE anyway and might hash it.
+ */
+ bne- ht64_bail_ok
+ /* Prepare new PTE value (turn access RW into DIRTY, then
+ * add BUSY,HASHPTE and ACCESSED)
+ */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
+ or r30,r30,r31
+ ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+ /* Write the linux PTE atomically (setting busy) */
+ stdcx. r30,0,r6
+ bne- 1b
+ isync
+
+ /* Step 2:
+ *
+ * Insert/Update the HPTE in the hash table. At this point,
+ * r4 (access) is re-useable, we use it for the new HPTE flags
+ */
+
+ /* Calc va and put it in r29 */
+ rldicr r29,r5,28,63-28
+ rldicl r3,r3,0,36
+ or r29,r3,r29
+
+ /* Calculate hash value for primary slot and store it in r28 */
+ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
+ rldicl r0,r3,64-16,52 /* (ea >> 16) & 0xfff */
+ xor r28,r5,r0
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+ rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
+ andc r0,r30,r0 /* r0 = pte & ~r0 */
+ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
+
+ /* We eventually do the icache sync here (maybe inline that
+ * code rather than call a C function...)
+ */
+BEGIN_FTR_SECTION
+ mr r4,r30
+ mr r5,r7
+ bl .hash_page_do_lazy_icache
+END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
+
+ /* At this point, r3 contains new PP bits, save them in
+ * place of "access" in the param area (sic)
+ */
+ std r3,STK_PARM(r4)(r1)
+
+ /* Get htab_hash_mask */
+ ld r4,htab_hash_mask@got(2)
+ ld r27,0(r4) /* htab_hash_mask -> r27 */
+
+ /* Check if we may already be in the hashtable, in this case, we
+ * go to out-of-line code to try to modify the HPTE
+ */
+ andi. r0,r31,_PAGE_HASHPTE
+ bne ht64_modify_pte
+
+ht64_insert_pte:
+ /* Clear hpte bits in new pte (we also clear BUSY btw) and
+ * add _PAGE_HASHPTE
+ */
+ lis r0,_PAGE_HPTEFLAGS@h
+ ori r0,r0,_PAGE_HPTEFLAGS@l
+ andc r30,r30,r0
+ ori r30,r30,_PAGE_HASHPTE
+
+ /* Phyical address in r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
+
+ /* Calculate primary group hash */
+ and r0,r28,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_64K
+_GLOBAL(ht64_call_hpte_insert1)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge ht64_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- ht64_pte_insert_failure
+
+ /* Now try secondary slot */
+
+ /* Phyical address in r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
+
+ /* Calculate secondary group hash */
+ andc r0,r27,r28
+ rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_64K
+_GLOBAL(ht64_call_hpte_insert2)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge+ ht64_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- ht64_pte_insert_failure
+
+ /* Both are full, we need to evict something */
+ mftb r0
+ /* Pick a random group based on TB */
+ andi. r0,r0,1
+ mr r5,r28
+ bne 2f
+ not r5,r5
+2: and r0,r5,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ /* Call ppc_md.hpte_remove */
+_GLOBAL(ht64_call_hpte_remove)
+ bl . /* patched by htab_finish_init() */
+
+ /* Try all again */
+ b ht64_insert_pte
+
+ht64_bail_ok:
+ li r3,0
+ b ht64_bail
+
+ht64_pte_insert_ok:
+ /* Insert slot number & secondary bit in PTE */
+ rldimi r30,r3,12,63-15
+
+ /* Write out the PTE with a normal write
+ * (maybe add eieio may be good still ?)
+ */
+ht64_write_out_pte:
+ ld r6,STK_PARM(r6)(r1)
+ std r30,0(r6)
+ li r3, 0
+ht64_bail:
+ ld r27,STK_REG(r27)(r1)
+ ld r28,STK_REG(r28)(r1)
+ ld r29,STK_REG(r29)(r1)
+ ld r30,STK_REG(r30)(r1)
+ ld r31,STK_REG(r31)(r1)
+ addi r1,r1,STACKFRAMESIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+ht64_modify_pte:
+ /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
+ mr r4,r3
+ rlwinm r3,r31,32-12,29,31
+
+ /* Secondary group ? if yes, get a inverted hash value */
+ mr r5,r28
+ andi. r0,r31,_PAGE_F_SECOND
+ beq 1f
+ not r5,r5
+1:
+ /* Calculate proper slot value for ppc_md.hpte_updatepp */
+ and r0,r5,r27
+ rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ add r3,r0,r3 /* add slot idx */
+
+ /* Call ppc_md.hpte_updatepp */
+ mr r5,r29 /* va */
+ li r6,MMU_PAGE_64K
+ ld r7,STK_PARM(r8)(r1) /* get "local" param */
+_GLOBAL(ht64_call_hpte_updatepp)
+ bl . /* patched by htab_finish_init() */
+
+ /* if we failed because typically the HPTE wasn't really here
+ * we try an insertion.
+ */
+ cmpdi 0,r3,-1
+ beq- ht64_insert_pte
+
+ /* Clear the BUSY bit and Write out the PTE */
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ b ht64_write_out_pte
+
+ht64_wrong_access:
+ /* Bail out clearing reservation */
+ stdcx. r31,0,r6
+ li r3,1
+ b ht64_bail
+
+ht64_pte_insert_failure:
+ /* Bail out restoring old PTE */
+ ld r6,STK_PARM(r6)(r1)
+ std r31,0(r6)
+ li r3,-1
+ b ht64_bail
+
+
+#endif /* CONFIG_PPC_64K_PAGES */
+/*****************************************************************************
+ * *
+ * Huge pages implementation is in hugetlbpage.c *
+ * *
+ *****************************************************************************/
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 174d14576c2..d96bcfe4c6f 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -9,6 +9,9 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
+#undef DEBUG_LOW
+
#include <linux/spinlock.h>
#include <linux/bitops.h>
#include <linux/threads.h>
@@ -22,11 +25,84 @@
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
+#include <asm/udbg.h>
+
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) udbg_printf(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
#define HPTE_LOCK_BIT 3
static DEFINE_SPINLOCK(native_tlbie_lock);
+static inline void __tlbie(unsigned long va, unsigned int psize)
+{
+ unsigned int penc;
+
+ /* clear top 16 bits, non SLS segment */
+ va &= ~(0xffffULL << 48);
+
+ switch (psize) {
+ case MMU_PAGE_4K:
+ va &= ~0xffful;
+ asm volatile("tlbie %0,0" : : "r" (va) : "memory");
+ break;
+ default:
+ penc = mmu_psize_defs[psize].penc;
+ va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+ va |= (0x7f >> (8 - penc)) << 12;
+ asm volatile("tlbie %0,1" : : "r" (va) : "memory");
+ break;
+ }
+}
+
+static inline void __tlbiel(unsigned long va, unsigned int psize)
+{
+ unsigned int penc;
+
+ /* clear top 16 bits, non SLS segment */
+ va &= ~(0xffffULL << 48);
+
+ switch (psize) {
+ case MMU_PAGE_4K:
+ va &= ~0xffful;
+ asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
+ : : "r"(va) : "memory");
+ break;
+ default:
+ penc = mmu_psize_defs[psize].penc;
+ va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+ va |= (0x7f >> (8 - penc)) << 12;
+ asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
+ : : "r"(va) : "memory");
+ break;
+ }
+
+}
+
+static inline void tlbie(unsigned long va, int psize, int local)
+{
+ unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
+ int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+
+ if (use_local)
+ use_local = mmu_psize_defs[psize].tlbiel;
+ if (lock_tlbie && !use_local)
+ spin_lock(&native_tlbie_lock);
+ asm volatile("ptesync": : :"memory");
+ if (use_local) {
+ __tlbiel(va, psize);
+ asm volatile("ptesync": : :"memory");
+ } else {
+ __tlbie(va, psize);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ if (lock_tlbie && !use_local)
+ spin_unlock(&native_tlbie_lock);
+}
+
static inline void native_lock_hpte(hpte_t *hptep)
{
unsigned long *word = &hptep->v;
@@ -48,13 +124,19 @@ static inline void native_unlock_hpte(hpte_t *hptep)
}
long native_hpte_insert(unsigned long hpte_group, unsigned long va,
- unsigned long prpn, unsigned long vflags,
- unsigned long rflags)
+ unsigned long pa, unsigned long rflags,
+ unsigned long vflags, int psize)
{
hpte_t *hptep = htab_address + hpte_group;
unsigned long hpte_v, hpte_r;
int i;
+ if (!(vflags & HPTE_V_BOLTED)) {
+ DBG_LOW(" insert(group=%lx, va=%016lx, pa=%016lx,"
+ " rflags=%lx, vflags=%lx, psize=%d)\n",
+ hpte_group, va, pa, rflags, vflags, psize);
+ }
+
for (i = 0; i < HPTES_PER_GROUP; i++) {
if (! (hptep->v & HPTE_V_VALID)) {
/* retry with lock held */
@@ -70,10 +152,13 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
if (i == HPTES_PER_GROUP)
return -1;
- hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
- if (vflags & HPTE_V_LARGE)
- va &= ~(1UL << HPTE_V_AVPN_SHIFT);
- hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
+ hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
+ hpte_r = hpte_encode_r(pa, psize) | rflags;
+
+ if (!(vflags & HPTE_V_BOLTED)) {
+ DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
+ i, hpte_v, hpte_r);
+ }
hptep->r = hpte_r;
/* Guarantee the second dword is visible before the valid bit */
@@ -96,6 +181,8 @@ static long native_hpte_remove(unsigned long hpte_group)
int slot_offset;
unsigned long hpte_v;
+ DBG_LOW(" remove(group=%lx)\n", hpte_group);
+
/* pick a random entry to start at */
slot_offset = mftb() & 0x7;
@@ -126,34 +213,51 @@ static long native_hpte_remove(unsigned long hpte_group)
return i;
}
-static inline void set_pp_bit(unsigned long pp, hpte_t *addr)
+static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
+ unsigned long va, int psize, int local)
{
- unsigned long old;
- unsigned long *p = &addr->r;
-
- __asm__ __volatile__(
- "1: ldarx %0,0,%3\n\
- rldimi %0,%2,0,61\n\
- stdcx. %0,0,%3\n\
- bne 1b"
- : "=&r" (old), "=m" (*p)
- : "r" (pp), "r" (p), "m" (*p)
- : "cc");
+ hpte_t *hptep = htab_address + slot;
+ unsigned long hpte_v, want_v;
+ int ret = 0;
+
+ want_v = hpte_encode_v(va, psize);
+
+ DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
+ va, want_v & HPTE_V_AVPN, slot, newpp);
+
+ native_lock_hpte(hptep);
+
+ hpte_v = hptep->v;
+
+ /* Even if we miss, we need to invalidate the TLB */
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
+ DBG_LOW(" -> miss\n");
+ native_unlock_hpte(hptep);
+ ret = -1;
+ } else {
+ DBG_LOW(" -> hit\n");
+ /* Update the HPTE */
+ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PP | HPTE_R_N));
+ native_unlock_hpte(hptep);
+ }
+
+ /* Ensure it is out of the tlb too. */
+ tlbie(va, psize, local);
+
+ return ret;
}
-/*
- * Only works on small pages. Yes its ugly to have to check each slot in
- * the group but we only use this during bootup.
- */
-static long native_hpte_find(unsigned long vpn)
+static long native_hpte_find(unsigned long va, int psize)
{
hpte_t *hptep;
unsigned long hash;
unsigned long i, j;
long slot;
- unsigned long hpte_v;
+ unsigned long want_v, hpte_v;
- hash = hpt_hash(vpn, 0);
+ hash = hpt_hash(va, mmu_psize_defs[psize].shift);
+ want_v = hpte_encode_v(va, psize);
for (j = 0; j < 2; j++) {
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -161,7 +265,7 @@ static long native_hpte_find(unsigned long vpn)
hptep = htab_address + slot;
hpte_v = hptep->v;
- if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
+ if (HPTE_V_COMPARE(hpte_v, want_v)
&& (hpte_v & HPTE_V_VALID)
&& ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
/* HPTE matches */
@@ -177,127 +281,101 @@ static long native_hpte_find(unsigned long vpn)
return -1;
}
-static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
- unsigned long va, int large, int local)
-{
- hpte_t *hptep = htab_address + slot;
- unsigned long hpte_v;
- unsigned long avpn = va >> 23;
- int ret = 0;
-
- if (large)
- avpn &= ~1;
-
- native_lock_hpte(hptep);
-
- hpte_v = hptep->v;
-
- /* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
- native_unlock_hpte(hptep);
- ret = -1;
- } else {
- set_pp_bit(newpp, hptep);
- native_unlock_hpte(hptep);
- }
-
- /* Ensure it is out of the tlb too */
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
- tlbiel(va);
- } else {
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
-
- if (lock_tlbie)
- spin_lock(&native_tlbie_lock);
- tlbie(va, large);
- if (lock_tlbie)
- spin_unlock(&native_tlbie_lock);
- }
-
- return ret;
-}
-
/*
* Update the page protection bits. Intended to be used to create
* guard pages for kernel data structures on pages which are bolted
* in the HPT. Assumes pages being operated on will not be stolen.
- * Does not work on large pages.
*
* No need to lock here because we should be the only user.
*/
-static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
+static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
+ int psize)
{
- unsigned long vsid, va, vpn, flags = 0;
+ unsigned long vsid, va;
long slot;
hpte_t *hptep;
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> PAGE_SHIFT;
- slot = native_hpte_find(vpn);
+ slot = native_hpte_find(va, psize);
if (slot == -1)
panic("could not find page to bolt\n");
hptep = htab_address + slot;
- set_pp_bit(newpp, hptep);
+ /* Update the HPTE */
+ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PP | HPTE_R_N));
- /* Ensure it is out of the tlb too */
- if (lock_tlbie)
- spin_lock_irqsave(&native_tlbie_lock, flags);
- tlbie(va, 0);
- if (lock_tlbie)
- spin_unlock_irqrestore(&native_tlbie_lock, flags);
+ /* Ensure it is out of the tlb too. */
+ tlbie(va, psize, 0);
}
static void native_hpte_invalidate(unsigned long slot, unsigned long va,
- int large, int local)
+ int psize, int local)
{
hpte_t *hptep = htab_address + slot;
unsigned long hpte_v;
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
unsigned long flags;
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
-
- if (large)
- avpn &= ~1;
local_irq_save(flags);
- native_lock_hpte(hptep);
+ DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot);
+
+ want_v = hpte_encode_v(va, psize);
+ native_lock_hpte(hptep);
hpte_v = hptep->v;
/* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
- } else {
+ else
/* Invalidate the hpte. NOTE: this also unlocks it */
hptep->v = 0;
- }
- /* Invalidate the tlb */
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
- tlbiel(va);
- } else {
- if (lock_tlbie)
- spin_lock(&native_tlbie_lock);
- tlbie(va, large);
- if (lock_tlbie)
- spin_unlock(&native_tlbie_lock);
- }
+ /* Invalidate the TLB */
+ tlbie(va, psize, local);
+
local_irq_restore(flags);
}
/*
+ * XXX This need fixing based on page size. It's only used by
+ * native_hpte_clear() for now which needs fixing too so they
+ * make a good pair...
+ */
+static unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
+{
+ unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v);
+ unsigned long va;
+
+ va = avpn << 23;
+
+ if (! (hpte_v & HPTE_V_LARGE)) {
+ unsigned long vpi, pteg;
+
+ pteg = slot / HPTES_PER_GROUP;
+ if (hpte_v & HPTE_V_SECONDARY)
+ pteg = ~pteg;
+
+ vpi = ((va >> 28) ^ pteg) & htab_hash_mask;
+
+ va |= vpi << PAGE_SHIFT;
+ }
+
+ return va;
+}
+
+/*
* clear all mappings on kexec. All cpus are in real mode (or they will
* be when they isi), and we are the only one left. We rely on our kernel
* mapping being 0xC0's and the hardware ignoring those two real bits.
*
* TODO: add batching support when enabled. remember, no dynamic memory here,
* athough there is the control page available...
+ *
+ * XXX FIXME: 4k only for now !
*/
static void native_hpte_clear(void)
{
@@ -327,7 +405,7 @@ static void native_hpte_clear(void)
if (hpte_v & HPTE_V_VALID) {
hptep->v = 0;
- tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE);
+ tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K, 0);
}
}
@@ -335,59 +413,59 @@ static void native_hpte_clear(void)
local_irq_restore(flags);
}
+/*
+ * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
+ * the lock all the time
+ */
static void native_flush_hash_range(unsigned long number, int local)
{
- unsigned long va, vpn, hash, secondary, slot, flags, avpn;
- int i, j;
+ unsigned long va, hash, index, hidx, shift, slot;
hpte_t *hptep;
unsigned long hpte_v;
+ unsigned long want_v;
+ unsigned long flags;
+ real_pte_t pte;
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
- unsigned long large = batch->large;
+ unsigned long psize = batch->psize;
+ int i;
local_irq_save(flags);
- j = 0;
for (i = 0; i < number; i++) {
- va = batch->vaddr[j];
- if (large)
- vpn = va >> HPAGE_SHIFT;
- else
- vpn = va >> PAGE_SHIFT;
- hash = hpt_hash(vpn, large);
- secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15;
- if (secondary)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12;
-
- hptep = htab_address + slot;
-
- avpn = va >> 23;
- if (large)
- avpn &= ~0x1UL;
-
- native_lock_hpte(hptep);
-
- hpte_v = hptep->v;
-
- /* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
- native_unlock_hpte(hptep);
- } else {
- /* Invalidate the hpte. NOTE: this also unlocks it */
- hptep->v = 0;
- }
-
- j++;
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
+
+ pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
+ hash = hpt_hash(va, shift);
+ hidx = __rpte_to_hidx(pte, index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ hptep = htab_address + slot;
+ want_v = hpte_encode_v(va, psize);
+ native_lock_hpte(hptep);
+ hpte_v = hptep->v;
+ if (!HPTE_V_COMPARE(hpte_v, want_v) ||
+ !(hpte_v & HPTE_V_VALID))
+ native_unlock_hpte(hptep);
+ else
+ hptep->v = 0;
+ } pte_iterate_hashed_end();
}
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
+ if (cpu_has_feature(CPU_FTR_TLBIEL) &&
+ mmu_psize_defs[psize].tlbiel && local) {
asm volatile("ptesync":::"memory");
-
- for (i = 0; i < j; i++)
- __tlbiel(batch->vaddr[i]);
-
+ for (i = 0; i < number; i++) {
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
+
+ pte_iterate_hashed_subpages(pte, psize, va, index,
+ shift) {
+ __tlbiel(va, psize);
+ } pte_iterate_hashed_end();
+ }
asm volatile("ptesync":::"memory");
} else {
int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
@@ -396,10 +474,15 @@ static void native_flush_hash_range(unsigned long number, int local)
spin_lock(&native_tlbie_lock);
asm volatile("ptesync":::"memory");
-
- for (i = 0; i < j; i++)
- __tlbie(batch->vaddr[i], large);
-
+ for (i = 0; i < number; i++) {
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
+
+ pte_iterate_hashed_subpages(pte, psize, va, index,
+ shift) {
+ __tlbie(va, psize);
+ } pte_iterate_hashed_end();
+ }
asm volatile("eieio; tlbsync; ptesync":::"memory");
if (lock_tlbie)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 6e9e05cce02..706e8a63ced 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -19,6 +19,7 @@
*/
#undef DEBUG
+#undef DEBUG_LOW
#include <linux/config.h>
#include <linux/spinlock.h>
@@ -32,7 +33,6 @@
#include <linux/init.h>
#include <linux/signal.h>
-#include <asm/ppcdebug.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
@@ -59,6 +59,15 @@
#define DBG(fmt...)
#endif
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) udbg_printf(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
+
+#define KB (1024)
+#define MB (1024*KB)
+
/*
* Note: pte --> Linux PTE
* HPTE --> PowerPC Hashed Page Table Entry
@@ -75,99 +84,302 @@
extern unsigned long dart_tablebase;
#endif /* CONFIG_U3_DART */
+static unsigned long _SDR1;
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+
hpte_t *htab_address;
unsigned long htab_hash_mask;
+int mmu_linear_psize = MMU_PAGE_4K;
+int mmu_virtual_psize = MMU_PAGE_4K;
+#ifdef CONFIG_HUGETLB_PAGE
+int mmu_huge_psize = MMU_PAGE_16M;
+unsigned int HPAGE_SHIFT;
+#endif
-unsigned long _SDR1;
-
-#define KB (1024)
-#define MB (1024*KB)
-
-static inline void loop_forever(void)
-{
- volatile unsigned long x = 1;
- for(;x;x|=1)
- ;
-}
+/* There are definitions of page sizes arrays to be used when none
+ * is provided by the firmware.
+ */
-static inline void create_pte_mapping(unsigned long start, unsigned long end,
- unsigned long mode, int large)
+/* Pre-POWER4 CPUs (4k pages only)
+ */
+struct mmu_psize_def mmu_psize_defaults_old[] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ .sllp = 0,
+ .penc = 0,
+ .avpnm = 0,
+ .tlbiel = 0,
+ },
+};
+
+/* POWER4, GPUL, POWER5
+ *
+ * Support for 16Mb large pages
+ */
+struct mmu_psize_def mmu_psize_defaults_gp[] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ .sllp = 0,
+ .penc = 0,
+ .avpnm = 0,
+ .tlbiel = 1,
+ },
+ [MMU_PAGE_16M] = {
+ .shift = 24,
+ .sllp = SLB_VSID_L,
+ .penc = 0,
+ .avpnm = 0x1UL,
+ .tlbiel = 0,
+ },
+};
+
+
+int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
+ unsigned long pstart, unsigned long mode, int psize)
{
- unsigned long addr;
- unsigned int step;
+ unsigned long vaddr, paddr;
+ unsigned int step, shift;
unsigned long tmp_mode;
- unsigned long vflags;
+ int ret = 0;
- if (large) {
- step = 16*MB;
- vflags = HPTE_V_BOLTED | HPTE_V_LARGE;
- } else {
- step = 4*KB;
- vflags = HPTE_V_BOLTED;
- }
+ shift = mmu_psize_defs[psize].shift;
+ step = 1 << shift;
- for (addr = start; addr < end; addr += step) {
+ for (vaddr = vstart, paddr = pstart; vaddr < vend;
+ vaddr += step, paddr += step) {
unsigned long vpn, hash, hpteg;
- unsigned long vsid = get_kernel_vsid(addr);
- unsigned long va = (vsid << 28) | (addr & 0xfffffff);
- int ret = -1;
-
- if (large)
- vpn = va >> HPAGE_SHIFT;
- else
- vpn = va >> PAGE_SHIFT;
-
+ unsigned long vsid = get_kernel_vsid(vaddr);
+ unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff);
+ vpn = va >> shift;
tmp_mode = mode;
/* Make non-kernel text non-executable */
- if (!in_kernel_text(addr))
- tmp_mode = mode | HW_NO_EXEC;
-
- hash = hpt_hash(vpn, large);
+ if (!in_kernel_text(vaddr))
+ tmp_mode = mode | HPTE_R_N;
+ hash = hpt_hash(va, shift);
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+ /* The crap below can be cleaned once ppd_md.probe() can
+ * set up the hash callbacks, thus we can just used the
+ * normal insert callback here.
+ */
#ifdef CONFIG_PPC_ISERIES
- if (systemcfg->platform & PLATFORM_ISERIES_LPAR)
- ret = iSeries_hpte_bolt_or_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ if (_machine == PLATFORM_ISERIES_LPAR)
+ ret = iSeries_hpte_insert(hpteg, va,
+ virt_to_abs(paddr),
+ tmp_mode,
+ HPTE_V_BOLTED,
+ psize);
else
#endif
#ifdef CONFIG_PPC_PSERIES
- if (systemcfg->platform & PLATFORM_LPAR)
+ if (_machine & PLATFORM_LPAR)
ret = pSeries_lpar_hpte_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ virt_to_abs(paddr),
+ tmp_mode,
+ HPTE_V_BOLTED,
+ psize);
else
#endif
#ifdef CONFIG_PPC_MULTIPLATFORM
ret = native_hpte_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ virt_to_abs(paddr),
+ tmp_mode, HPTE_V_BOLTED,
+ psize);
#endif
+ if (ret < 0)
+ break;
+ }
+ return ret < 0 ? ret : 0;
+}
- if (ret == -1) {
- ppc64_terminate_msg(0x20, "create_pte_mapping");
- loop_forever();
+static int __init htab_dt_scan_page_sizes(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ u32 *prop;
+ unsigned long size = 0;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = (u32 *)of_get_flat_dt_prop(node,
+ "ibm,segment-page-sizes", &size);
+ if (prop != NULL) {
+ DBG("Page sizes from device-tree:\n");
+ size /= 4;
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE);
+ while(size > 0) {
+ unsigned int shift = prop[0];
+ unsigned int slbenc = prop[1];
+ unsigned int lpnum = prop[2];
+ unsigned int lpenc = 0;
+ struct mmu_psize_def *def;
+ int idx = -1;
+
+ size -= 3; prop += 3;
+ while(size > 0 && lpnum) {
+ if (prop[0] == shift)
+ lpenc = prop[1];
+ prop += 2; size -= 2;
+ lpnum--;
+ }
+ switch(shift) {
+ case 0xc:
+ idx = MMU_PAGE_4K;
+ break;
+ case 0x10:
+ idx = MMU_PAGE_64K;
+ break;
+ case 0x14:
+ idx = MMU_PAGE_1M;
+ break;
+ case 0x18:
+ idx = MMU_PAGE_16M;
+ cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE;
+ break;
+ case 0x22:
+ idx = MMU_PAGE_16G;
+ break;
+ }
+ if (idx < 0)
+ continue;
+ def = &mmu_psize_defs[idx];
+ def->shift = shift;
+ if (shift <= 23)
+ def->avpnm = 0;
+ else
+ def->avpnm = (1 << (shift - 23)) - 1;
+ def->sllp = slbenc;
+ def->penc = lpenc;
+ /* We don't know for sure what's up with tlbiel, so
+ * for now we only set it for 4K and 64K pages
+ */
+ if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K)
+ def->tlbiel = 1;
+ else
+ def->tlbiel = 0;
+
+ DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, "
+ "tlbiel=%d, penc=%d\n",
+ idx, shift, def->sllp, def->avpnm, def->tlbiel,
+ def->penc);
}
+ return 1;
+ }
+ return 0;
+}
+
+
+static void __init htab_init_page_sizes(void)
+{
+ int rc;
+
+ /* Default to 4K pages only */
+ memcpy(mmu_psize_defs, mmu_psize_defaults_old,
+ sizeof(mmu_psize_defaults_old));
+
+ /*
+ * Try to find the available page sizes in the device-tree
+ */
+ rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
+ if (rc != 0) /* Found */
+ goto found;
+
+ /*
+ * Not in the device-tree, let's fallback on known size
+ * list for 16M capable GP & GR
+ */
+ if ((_machine != PLATFORM_ISERIES_LPAR) &&
+ cpu_has_feature(CPU_FTR_16M_PAGE))
+ memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
+ sizeof(mmu_psize_defaults_gp));
+ found:
+ /*
+ * Pick a size for the linear mapping. Currently, we only support
+ * 16M, 1M and 4K which is the default
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ mmu_linear_psize = MMU_PAGE_16M;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ mmu_linear_psize = MMU_PAGE_1M;
+
+ /*
+ * Pick a size for the ordinary pages. Default is 4K, we support
+ * 64K if cache inhibited large pages are supported by the
+ * processor
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+ if (mmu_psize_defs[MMU_PAGE_64K].shift &&
+ cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
+ mmu_virtual_psize = MMU_PAGE_64K;
+#endif
+
+ printk(KERN_INFO "Page orders: linear mapping = %d, others = %d\n",
+ mmu_psize_defs[mmu_linear_psize].shift,
+ mmu_psize_defs[mmu_virtual_psize].shift);
+
+#ifdef CONFIG_HUGETLB_PAGE
+ /* Init large page size. Currently, we pick 16M or 1M depending
+ * on what is available
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ mmu_huge_psize = MMU_PAGE_16M;
+ /* With 4k/4level pagetables, we can't (for now) cope with a
+ * huge page size < PMD_SIZE */
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ mmu_huge_psize = MMU_PAGE_1M;
+
+ /* Calculate HPAGE_SHIFT and sanity check it */
+ if (mmu_psize_defs[mmu_huge_psize].shift > MIN_HUGEPTE_SHIFT &&
+ mmu_psize_defs[mmu_huge_psize].shift < SID_SHIFT)
+ HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift;
+ else
+ HPAGE_SHIFT = 0; /* No huge pages dude ! */
+#endif /* CONFIG_HUGETLB_PAGE */
+}
+
+static int __init htab_dt_scan_pftsize(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ u32 *prop;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
+ if (prop != NULL) {
+ /* pft_size[0] is the NUMA CEC cookie */
+ ppc64_pft_size = prop[1];
+ return 1;
}
+ return 0;
}
-static unsigned long get_hashtable_size(void)
+static unsigned long __init htab_get_table_size(void)
{
- unsigned long rnd_mem_size, pteg_count;
+ unsigned long mem_size, rnd_mem_size, pteg_count;
- /* If hash size wasn't obtained in prom.c, we calculate it now based on
- * the total RAM size
+ /* If hash size isn't already provided by the platform, we try to
+ * retreive it from the device-tree. If it's not there neither, we
+ * calculate it now based on the total RAM size
*/
+ if (ppc64_pft_size == 0)
+ of_scan_flat_dt(htab_dt_scan_pftsize, NULL);
if (ppc64_pft_size)
return 1UL << ppc64_pft_size;
/* round mem_size up to next power of 2 */
- rnd_mem_size = 1UL << __ilog2(systemcfg->physicalMemorySize);
- if (rnd_mem_size < systemcfg->physicalMemorySize)
+ mem_size = lmb_phys_mem_size();
+ rnd_mem_size = 1UL << __ilog2(mem_size);
+ if (rnd_mem_size < mem_size)
rnd_mem_size <<= 1;
/* # pages / 2 */
@@ -176,33 +388,40 @@ static unsigned long get_hashtable_size(void)
return pteg_count << 7;
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+void create_section_mapping(unsigned long start, unsigned long end)
+{
+ BUG_ON(htab_bolt_mapping(start, end, start,
+ _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX,
+ mmu_linear_psize));
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
void __init htab_initialize(void)
{
unsigned long table, htab_size_bytes;
unsigned long pteg_count;
unsigned long mode_rw;
- int i, use_largepages = 0;
unsigned long base = 0, size = 0;
+ int i;
+
extern unsigned long tce_alloc_start, tce_alloc_end;
DBG(" -> htab_initialize()\n");
+ /* Initialize page sizes */
+ htab_init_page_sizes();
+
/*
* Calculate the required size of the htab. We want the number of
* PTEGs to equal one half the number of real pages.
*/
- htab_size_bytes = get_hashtable_size();
+ htab_size_bytes = htab_get_table_size();
pteg_count = htab_size_bytes >> 7;
- /* For debug, make the HTAB 1/8 as big as it normally would be. */
- ifppcdebug(PPCDBG_HTABSIZE) {
- pteg_count >>= 3;
- htab_size_bytes = pteg_count << 7;
- }
-
htab_hash_mask = pteg_count - 1;
- if (systemcfg->platform & PLATFORM_LPAR) {
+ if (platform_is_lpar()) {
/* Using a hypervisor which owns the htab */
htab_address = NULL;
_SDR1 = 0;
@@ -211,14 +430,11 @@ void __init htab_initialize(void)
* the absolute address space.
*/
table = lmb_alloc(htab_size_bytes, htab_size_bytes);
+ BUG_ON(table == 0);
DBG("Hash table allocated at %lx, size: %lx\n", table,
htab_size_bytes);
- if ( !table ) {
- ppc64_terminate_msg(0x20, "hpt space");
- loop_forever();
- }
htab_address = abs_to_virt(table);
/* htab absolute addr + encoded htabsize */
@@ -226,6 +442,9 @@ void __init htab_initialize(void)
/* Initialize the HPT with no entries */
memset((void *)table, 0, htab_size_bytes);
+
+ /* Set SDR1 */
+ mtspr(SPRN_SDR1, _SDR1);
}
mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX;
@@ -234,8 +453,6 @@ void __init htab_initialize(void)
* _NOT_ map it to avoid cache paradoxes as it's remapped non
* cacheable later on
*/
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- use_largepages = 1;
/* create bolted the linear mapping in the hash table */
for (i=0; i < lmb.memory.cnt; i++) {
@@ -246,27 +463,32 @@ void __init htab_initialize(void)
#ifdef CONFIG_U3_DART
/* Do not map the DART space. Fortunately, it will be aligned
- * in such a way that it will not cross two lmb regions and will
- * fit within a single 16Mb page.
- * The DART space is assumed to be a full 16Mb region even if we
- * only use 2Mb of that space. We will use more of it later for
- * AGP GART. We have to use a full 16Mb large page.
+ * in such a way that it will not cross two lmb regions and
+ * will fit within a single 16Mb page.
+ * The DART space is assumed to be a full 16Mb region even if
+ * we only use 2Mb of that space. We will use more of it later
+ * for AGP GART. We have to use a full 16Mb large page.
*/
DBG("DART base: %lx\n", dart_tablebase);
if (dart_tablebase != 0 && dart_tablebase >= base
&& dart_tablebase < (base + size)) {
if (base != dart_tablebase)
- create_pte_mapping(base, dart_tablebase, mode_rw,
- use_largepages);
+ BUG_ON(htab_bolt_mapping(base, dart_tablebase,
+ base, mode_rw,
+ mmu_linear_psize));
if ((base + size) > (dart_tablebase + 16*MB))
- create_pte_mapping(dart_tablebase + 16*MB, base + size,
- mode_rw, use_largepages);
+ BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
+ base + size,
+ dart_tablebase+16*MB,
+ mode_rw,
+ mmu_linear_psize));
continue;
}
#endif /* CONFIG_U3_DART */
- create_pte_mapping(base, base + size, mode_rw, use_largepages);
- }
+ BUG_ON(htab_bolt_mapping(base, base + size, base,
+ mode_rw, mmu_linear_psize));
+ }
/*
* If we have a memory_limit and we've allocated TCEs then we need to
@@ -282,8 +504,9 @@ void __init htab_initialize(void)
if (base + size >= tce_alloc_start)
tce_alloc_start = base + size + 1;
- create_pte_mapping(tce_alloc_start, tce_alloc_end,
- mode_rw, use_largepages);
+ BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
+ tce_alloc_start, mode_rw,
+ mmu_linear_psize));
}
DBG(" <- htab_initialize()\n");
@@ -291,6 +514,12 @@ void __init htab_initialize(void)
#undef KB
#undef MB
+void __init htab_initialize_secondary(void)
+{
+ if (!platform_is_lpar())
+ mtspr(SPRN_SDR1, _SDR1);
+}
+
/*
* Called by asm hashtable.S for doing lazy icache flush
*/
@@ -309,7 +538,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
__flush_dcache_icache(page_address(page));
set_bit(PG_arch_1, &page->flags);
} else
- pp |= HW_NO_EXEC;
+ pp |= HPTE_R_N;
}
return pp;
}
@@ -325,94 +554,169 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
unsigned long vsid;
struct mm_struct *mm;
pte_t *ptep;
- int ret;
- int user_region = 0;
- int local = 0;
cpumask_t tmp;
+ int rc, user_region = 0, local = 0;
- if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
- return 1;
+ DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
+ ea, access, trap);
+ if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) {
+ DBG_LOW(" out of pgtable range !\n");
+ return 1;
+ }
+
+ /* Get region & vsid */
switch (REGION_ID(ea)) {
case USER_REGION_ID:
user_region = 1;
mm = current->mm;
- if (! mm)
+ if (! mm) {
+ DBG_LOW(" user region with no mm !\n");
return 1;
-
+ }
vsid = get_vsid(mm->context.id, ea);
break;
case VMALLOC_REGION_ID:
mm = &init_mm;
vsid = get_kernel_vsid(ea);
break;
-#if 0
- case KERNEL_REGION_ID:
- /*
- * Should never get here - entire 0xC0... region is bolted.
- * Send the problem up to do_page_fault
- */
-#endif
default:
/* Not a valid range
* Send the problem up to do_page_fault
*/
return 1;
- break;
}
+ DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
+ /* Get pgdir */
pgdir = mm->pgd;
-
if (pgdir == NULL)
return 1;
+ /* Check CPU locality */
tmp = cpumask_of_cpu(smp_processor_id());
if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
local = 1;
- /* Is this a huge page ? */
- if (unlikely(in_hugepage_area(mm->context, ea)))
- ret = hash_huge_page(mm, access, ea, vsid, local);
- else {
- ptep = find_linux_pte(pgdir, ea);
- if (ptep == NULL)
- return 1;
- ret = __hash_page(ea, access, vsid, ptep, trap, local);
+ /* Handle hugepage regions */
+ if (unlikely(in_hugepage_area(mm->context, ea))) {
+ DBG_LOW(" -> huge page !\n");
+ return hash_huge_page(mm, access, ea, vsid, local);
+ }
+
+ /* Get PTE and page size from page tables */
+ ptep = find_linux_pte(pgdir, ea);
+ if (ptep == NULL || !pte_present(*ptep)) {
+ DBG_LOW(" no PTE !\n");
+ return 1;
+ }
+
+#ifndef CONFIG_PPC_64K_PAGES
+ DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
+#else
+ DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
+ pte_val(*(ptep + PTRS_PER_PTE)));
+#endif
+ /* Pre-check access permissions (will be re-checked atomically
+ * in __hash_page_XX but this pre-check is a fast path
+ */
+ if (access & ~pte_val(*ptep)) {
+ DBG_LOW(" no access !\n");
+ return 1;
}
- return ret;
+ /* Do actual hashing */
+#ifndef CONFIG_PPC_64K_PAGES
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#else
+ if (mmu_virtual_psize == MMU_PAGE_64K)
+ rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
+ else
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifndef CONFIG_PPC_64K_PAGES
+ DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
+#else
+ DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep),
+ pte_val(*(ptep + PTRS_PER_PTE)));
+#endif
+ DBG_LOW(" -> rc=%d\n", rc);
+ return rc;
}
-void flush_hash_page(unsigned long va, pte_t pte, int local)
+void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap)
{
- unsigned long vpn, hash, secondary, slot;
- unsigned long huge = pte_huge(pte);
+ unsigned long vsid;
+ void *pgdir;
+ pte_t *ptep;
+ cpumask_t mask;
+ unsigned long flags;
+ int local = 0;
+
+ /* We don't want huge pages prefaulted for now
+ */
+ if (unlikely(in_hugepage_area(mm->context, ea)))
+ return;
+
+ DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
+ " trap=%lx\n", mm, mm->pgd, ea, access, trap);
+
+ /* Get PTE, VSID, access mask */
+ pgdir = mm->pgd;
+ if (pgdir == NULL)
+ return;
+ ptep = find_linux_pte(pgdir, ea);
+ if (!ptep)
+ return;
+ vsid = get_vsid(mm->context.id, ea);
- if (huge)
- vpn = va >> HPAGE_SHIFT;
+ /* Hash it in */
+ local_irq_save(flags);
+ mask = cpumask_of_cpu(smp_processor_id());
+ if (cpus_equal(mm->cpu_vm_mask, mask))
+ local = 1;
+#ifndef CONFIG_PPC_64K_PAGES
+ __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#else
+ if (mmu_virtual_psize == MMU_PAGE_64K)
+ __hash_page_64K(ea, access, vsid, ptep, trap, local);
else
- vpn = va >> PAGE_SHIFT;
- hash = hpt_hash(vpn, huge);
- secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
- if (secondary)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
-
- ppc_md.hpte_invalidate(slot, va, huge, local);
+ __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#endif /* CONFIG_PPC_64K_PAGES */
+ local_irq_restore(flags);
+}
+
+void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int local)
+{
+ unsigned long hash, index, shift, hidx, slot;
+
+ DBG_LOW("flush_hash_page(va=%016x)\n", va);
+ pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
+ hash = hpt_hash(va, shift);
+ hidx = __rpte_to_hidx(pte, index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx);
+ ppc_md.hpte_invalidate(slot, va, psize, local);
+ } pte_iterate_hashed_end();
}
void flush_hash_range(unsigned long number, int local)
{
- if (ppc_md.flush_hash_range) {
+ if (ppc_md.flush_hash_range)
ppc_md.flush_hash_range(number, local);
- } else {
+ else {
int i;
struct ppc64_tlb_batch *batch =
&__get_cpu_var(ppc64_tlb_batch);
for (i = 0; i < number; i++)
- flush_hash_page(batch->vaddr[i], batch->pte[i], local);
+ flush_hash_page(batch->vaddr[i], batch->pte[i],
+ batch->psize, local);
}
}
@@ -452,6 +756,18 @@ void __init htab_finish_init(void)
extern unsigned int *htab_call_hpte_remove;
extern unsigned int *htab_call_hpte_updatepp;
+#ifdef CONFIG_PPC_64K_PAGES
+ extern unsigned int *ht64_call_hpte_insert1;
+ extern unsigned int *ht64_call_hpte_insert2;
+ extern unsigned int *ht64_call_hpte_remove;
+ extern unsigned int *ht64_call_hpte_updatepp;
+
+ make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert);
+ make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert);
+ make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove);
+ make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp);
+#endif /* CONFIG_PPC_64K_PAGES */
+
make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0ea0994ed97..426c269e552 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
pu = pud_offset(pg, addr);
if (!pud_none(*pu)) {
pm = pmd_offset(pu, addr);
+#ifdef CONFIG_PPC_64K_PAGES
+ /* Currently, we use the normal PTE offset within full
+ * size PTE pages, thus our huge PTEs are scattered in
+ * the PTE page and we do waste some. We may change
+ * that in the future, but the current mecanism keeps
+ * things much simpler
+ */
+ if (!pmd_none(*pm)) {
+ /* Note: pte_offset_* are all equivalent on
+ * ppc64 as we don't have HIGHMEM
+ */
+ pt = pte_offset_kernel(pm, addr);
+ return pt;
+ }
+#else /* CONFIG_PPC_64K_PAGES */
+ /* On 4k pages, we put huge PTEs in the PMD page */
pt = (pte_t *)pm;
- BUG_ON(!pmd_none(*pm)
- && !(pte_present(*pt) && pte_huge(*pt)));
return pt;
+#endif /* CONFIG_PPC_64K_PAGES */
}
}
@@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
if (pu) {
pm = pmd_alloc(mm, pu, addr);
if (pm) {
+#ifdef CONFIG_PPC_64K_PAGES
+ /* See comment in huge_pte_offset. Note that if we ever
+ * want to put the page size in the PMD, we would have
+ * to open code our own pte_alloc* function in order
+ * to populate and set the size atomically
+ */
+ pt = pte_alloc_map(mm, pm, addr);
+#else /* CONFIG_PPC_64K_PAGES */
pt = (pte_t *)pm;
- BUG_ON(!pmd_none(*pm)
- && !(pte_present(*pt) && pte_huge(*pt)));
+#endif /* CONFIG_PPC_64K_PAGES */
return pt;
}
}
@@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
return NULL;
}
-#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
-
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- int i;
-
if (pte_present(*ptep)) {
- pte_clear(mm, addr, ptep);
+ /* We open-code pte_clear because we need to pass the right
+ * argument to hpte_update (huge / !huge)
+ */
+ unsigned long old = pte_update(ptep, ~0UL);
+ if (old & _PAGE_HASHPTE)
+ hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
flush_tlb_pending();
}
-
- for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
- *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
- ptep++;
- }
+ *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
}
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
unsigned long old = pte_update(ptep, ~0UL);
- int i;
if (old & _PAGE_HASHPTE)
- hpte_update(mm, addr, old, 0);
-
- for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
- ptep[i] = __pte(0);
+ hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
+ *ptep = __pte(0);
return __pte(old);
}
@@ -196,6 +212,12 @@ static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
BUG_ON(area >= NUM_HIGH_AREAS);
+ /* Hack, so that each addresses is controlled by exactly one
+ * of the high or low area bitmaps, the first high area starts
+ * at 4GB, not 0 */
+ if (start == 0)
+ start = 0x100000000UL;
+
/* Check no VMAs are in the region */
vma = find_vma(mm, start);
if (vma && (vma->vm_start < end))
@@ -563,6 +585,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
int lastshift;
u16 areamask, curareas;
+ if (HPAGE_SHIFT == 0)
+ return -EINVAL;
if (len & ~HPAGE_MASK)
return -EINVAL;
@@ -619,19 +643,15 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local)
{
pte_t *ptep;
- unsigned long va, vpn;
- pte_t old_pte, new_pte;
- unsigned long rflags, prpn;
+ unsigned long old_pte, new_pte;
+ unsigned long va, rflags, pa;
long slot;
int err = 1;
- spin_lock(&mm->page_table_lock);
-
ptep = huge_pte_offset(mm, ea);
/* Search the Linux page table for a match with va */
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> HPAGE_SHIFT;
/*
* If no pte found or not present, send the problem up to
@@ -640,8 +660,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
if (unlikely(!ptep || pte_none(*ptep)))
goto out;
-/* BUG_ON(pte_bad(*ptep)); */
-
/*
* Check the user's access rights to the page. If access should be
* prevented then send the problem up to do_page_fault.
@@ -661,58 +679,64 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
*/
- old_pte = *ptep;
- new_pte = old_pte;
-
- rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
+ do {
+ old_pte = pte_val(*ptep);
+ if (old_pte & _PAGE_BUSY)
+ goto out;
+ new_pte = old_pte | _PAGE_BUSY |
+ _PAGE_ACCESSED | _PAGE_HASHPTE;
+ } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
+ old_pte, new_pte));
+
+ rflags = 0x2 | (!(new_pte & _PAGE_RW));
/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
- rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
+ rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
/* Check if pte already has an hpte (case 2) */
- if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
+ if (unlikely(old_pte & _PAGE_HASHPTE)) {
/* There MIGHT be an HPTE for this pte */
unsigned long hash, slot;
- hash = hpt_hash(vpn, 1);
- if (pte_val(old_pte) & _PAGE_SECONDARY)
+ hash = hpt_hash(va, HPAGE_SHIFT);
+ if (old_pte & _PAGE_F_SECOND)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
+ slot += (old_pte & _PAGE_F_GIX) >> 12;
if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
- pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
+ old_pte &= ~_PAGE_HPTEFLAGS;
}
- if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
- unsigned long hash = hpt_hash(vpn, 1);
+ if (likely(!(old_pte & _PAGE_HASHPTE))) {
+ unsigned long hash = hpt_hash(va, HPAGE_SHIFT);
unsigned long hpte_group;
- prpn = pte_pfn(old_pte);
+ pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
repeat:
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- /* Update the linux pte with the HPTE slot */
- pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
- pte_val(new_pte) |= _PAGE_HASHPTE;
+ /* clear HPTE slot informations in new PTE */
+ new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
/* Add in WIMG bits */
/* XXX We should store these in the pte */
+ /* --BenH: I think they are ... */
rflags |= _PAGE_COHERENT;
- slot = ppc_md.hpte_insert(hpte_group, va, prpn,
- HPTE_V_LARGE, rflags);
+ /* Insert into the hash table, primary slot */
+ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
+ mmu_huge_psize);
/* Primary is full, try the secondary */
if (unlikely(slot == -1)) {
- pte_val(new_pte) |= _PAGE_SECONDARY;
+ new_pte |= _PAGE_F_SECOND;
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, va, prpn,
- HPTE_V_LARGE |
+ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
HPTE_V_SECONDARY,
- rflags);
+ mmu_huge_psize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
@@ -726,20 +750,18 @@ repeat:
if (unlikely(slot == -2))
panic("hash_huge_page: pte_insert failed\n");
- pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
-
- /*
- * No need to use ldarx/stdcx here because all who
- * might be updating the pte will hold the
- * page_table_lock
- */
- *ptep = new_pte;
+ new_pte |= (slot << 12) & _PAGE_F_GIX;
}
+ /*
+ * No need to use ldarx/stdcx here because all who
+ * might be updating the pte will hold the
+ * page_table_lock
+ */
+ *ptep = __pte(new_pte & ~_PAGE_BUSY);
+
err = 0;
out:
- spin_unlock(&mm->page_table_lock);
-
return err;
}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 4612a79dfb6..7d4b8b5f060 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -84,9 +84,6 @@ void MMU_init(void);
/* XXX should be in current.h -- paulus */
extern struct task_struct *current_set[NR_CPUS];
-char *klimit = _end;
-struct device_node *memory_node;
-
extern int init_bootmem_done;
/*
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index b0fc822ec29..1134f70f231 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -20,6 +20,8 @@
*
*/
+#undef DEBUG
+
#include <linux/config.h>
#include <linux/signal.h>
#include <linux/sched.h>
@@ -57,7 +59,6 @@
#include <asm/processor.h>
#include <asm/mmzone.h>
#include <asm/cputable.h>
-#include <asm/ppcdebug.h>
#include <asm/sections.h>
#include <asm/system.h>
#include <asm/iommu.h>
@@ -65,6 +66,12 @@
#include <asm/vdso.h>
#include <asm/imalloc.h>
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
#if PGTABLE_RANGE > USER_VSID_RANGE
#warning Limited user VSID range means pagetable space is wasted
#endif
@@ -73,8 +80,6 @@
#warning TASK_SIZE is smaller than it needs to be.
#endif
-unsigned long klimit = (unsigned long)_end;
-
/* max amount of RAM to use */
unsigned long __max_memory;
@@ -188,12 +193,21 @@ static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
memset(addr, 0, kmem_cache_size(cache));
}
-static const int pgtable_cache_size[2] = {
+#ifdef CONFIG_PPC_64K_PAGES
+static const unsigned int pgtable_cache_size[3] = {
+ PTE_TABLE_SIZE, PMD_TABLE_SIZE, PGD_TABLE_SIZE
+};
+static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+ "pte_pmd_cache", "pmd_cache", "pgd_cache",
+};
+#else
+static const unsigned int pgtable_cache_size[2] = {
PTE_TABLE_SIZE, PMD_TABLE_SIZE
};
static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
"pgd_pte_cache", "pud_pmd_cache",
};
+#endif /* CONFIG_PPC_64K_PAGES */
kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
@@ -201,19 +215,16 @@ void pgtable_cache_init(void)
{
int i;
- BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
- BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
- BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
- BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
-
for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
int size = pgtable_cache_size[i];
const char *name = pgtable_cache_name[i];
+ DBG("Allocating page table cache %s (#%d) "
+ "for size: %08x...\n", name, i, size);
pgtable_cache[i] = kmem_cache_create(name,
size, size,
- SLAB_HWCACHE_ALIGN
- | SLAB_MUST_HWCACHE_ALIGN,
+ SLAB_HWCACHE_ALIGN |
+ SLAB_MUST_HWCACHE_ALIGN,
zero_ctor,
NULL);
if (! pgtable_cache[i])
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 117b00012e1..e2c95fcb805 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -46,9 +46,7 @@
#include <asm/prom.h>
#include <asm/lmb.h>
#include <asm/sections.h>
-#ifdef CONFIG_PPC64
#include <asm/vdso.h>
-#endif
#include "mmu_decl.h"
@@ -61,6 +59,9 @@ int init_bootmem_done;
int mem_init_done;
unsigned long memory_limit;
+extern void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap);
+
/*
* This is called by /dev/mem to know if a given address has to
* be mapped non-cacheable or not
@@ -107,6 +108,7 @@ EXPORT_SYMBOL(phys_mem_access_prot);
void online_page(struct page *page)
{
ClearPageReserved(page);
+ set_page_count(page, 0);
free_cold_page(page);
totalram_pages++;
num_physpages++;
@@ -124,6 +126,9 @@ int __devinit add_memory(u64 start, u64 size)
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ start += KERNELBASE;
+ create_section_mapping(start, start + size);
+
/* this should work for most non-highmem platforms */
zone = pgdata->node_zones;
@@ -355,7 +360,7 @@ void __init mem_init(void)
}
codesize = (unsigned long)&_sdata - (unsigned long)&_stext;
- datasize = (unsigned long)&__init_begin - (unsigned long)&_sdata;
+ datasize = (unsigned long)&_edata - (unsigned long)&_sdata;
initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
@@ -390,10 +395,8 @@ void __init mem_init(void)
mem_init_done = 1;
-#ifdef CONFIG_PPC64
/* Initialize the vDSO */
vdso_init();
-#endif
}
/*
@@ -493,18 +496,10 @@ EXPORT_SYMBOL(flush_icache_user_range);
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t pte)
{
- /* handle i-cache coherency */
- unsigned long pfn = pte_pfn(pte);
-#ifdef CONFIG_PPC32
- pmd_t *pmd;
-#else
- unsigned long vsid;
- void *pgdir;
- pte_t *ptep;
- int local = 0;
- cpumask_t tmp;
- unsigned long flags;
+#ifdef CONFIG_PPC_STD_MMU
+ unsigned long access = 0, trap;
#endif
+ unsigned long pfn = pte_pfn(pte);
/* handle i-cache coherency */
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
@@ -535,30 +530,21 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
if (!pte_young(pte) || address >= TASK_SIZE)
return;
-#ifdef CONFIG_PPC32
- if (Hash == 0)
- return;
- pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
- if (!pmd_none(*pmd))
- add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
-#else
- pgdir = vma->vm_mm->pgd;
- if (pgdir == NULL)
- return;
- ptep = find_linux_pte(pgdir, address);
- if (!ptep)
+ /* We try to figure out if we are coming from an instruction
+ * access fault and pass that down to __hash_page so we avoid
+ * double-faulting on execution of fresh text. We have to test
+ * for regs NULL since init will get here first thing at boot
+ *
+ * We also avoid filling the hash if not coming from a fault
+ */
+ if (current->thread.regs == NULL)
return;
-
- vsid = get_vsid(vma->vm_mm->context.id, address);
-
- local_irq_save(flags);
- tmp = cpumask_of_cpu(smp_processor_id());
- if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
- local = 1;
-
- __hash_page(address, 0, vsid, ptep, 0x300, local);
- local_irq_restore(flags);
-#endif
-#endif
+ trap = TRAP(current->thread.regs);
+ if (trap == 0x400)
+ access |= _PAGE_EXEC;
+ else if (trap != 0x300)
+ return;
+ hash_preload(vma->vm_mm, address, access, trap);
+#endif /* CONFIG_PPC_STD_MMU */
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 4035cad8d7f..bd2cf133688 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -17,55 +17,123 @@
#include <linux/nodemask.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
+#include <asm/sparsemem.h>
#include <asm/lmb.h>
-#include <asm/machdep.h>
-#include <asm/abs_addr.h>
#include <asm/system.h>
+#include <asm/smp.h>
static int numa_enabled = 1;
static int numa_debug;
#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
-#ifdef DEBUG_NUMA
-#define ARRAY_INITIALISER -1
-#else
-#define ARRAY_INITIALISER 0
-#endif
-
-int numa_cpu_lookup_table[NR_CPUS] = { [ 0 ... (NR_CPUS - 1)] =
- ARRAY_INITIALISER};
-char *numa_memory_lookup_table;
+int numa_cpu_lookup_table[NR_CPUS];
cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
-int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0};
-
struct pglist_data *node_data[MAX_NUMNODES];
-bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
+
+EXPORT_SYMBOL(numa_cpu_lookup_table);
+EXPORT_SYMBOL(numa_cpumask_lookup_table);
+EXPORT_SYMBOL(node_data);
+
+static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
static int min_common_depth;
/*
- * We need somewhere to store start/span for each node until we have
+ * We need somewhere to store start/end/node for each region until we have
* allocated the real node_data structures.
*/
+#define MAX_REGIONS (MAX_LMB_REGIONS*2)
static struct {
- unsigned long node_start_pfn;
- unsigned long node_end_pfn;
- unsigned long node_present_pages;
-} init_node_data[MAX_NUMNODES] __initdata;
+ unsigned long start_pfn;
+ unsigned long end_pfn;
+ int nid;
+} init_node_data[MAX_REGIONS] __initdata;
-EXPORT_SYMBOL(node_data);
-EXPORT_SYMBOL(numa_cpu_lookup_table);
-EXPORT_SYMBOL(numa_memory_lookup_table);
-EXPORT_SYMBOL(numa_cpumask_lookup_table);
-EXPORT_SYMBOL(nr_cpus_in_node);
+int __init early_pfn_to_nid(unsigned long pfn)
+{
+ unsigned int i;
+
+ for (i = 0; init_node_data[i].end_pfn; i++) {
+ unsigned long start_pfn = init_node_data[i].start_pfn;
+ unsigned long end_pfn = init_node_data[i].end_pfn;
+
+ if ((start_pfn <= pfn) && (pfn < end_pfn))
+ return init_node_data[i].nid;
+ }
+
+ return -1;
+}
+
+void __init add_region(unsigned int nid, unsigned long start_pfn,
+ unsigned long pages)
+{
+ unsigned int i;
+
+ dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n",
+ nid, start_pfn, pages);
+
+ for (i = 0; init_node_data[i].end_pfn; i++) {
+ if (init_node_data[i].nid != nid)
+ continue;
+ if (init_node_data[i].end_pfn == start_pfn) {
+ init_node_data[i].end_pfn += pages;
+ return;
+ }
+ if (init_node_data[i].start_pfn == (start_pfn + pages)) {
+ init_node_data[i].start_pfn -= pages;
+ return;
+ }
+ }
+
+ /*
+ * Leave last entry NULL so we dont iterate off the end (we use
+ * entry.end_pfn to terminate the walk).
+ */
+ if (i >= (MAX_REGIONS - 1)) {
+ printk(KERN_ERR "WARNING: too many memory regions in "
+ "numa code, truncating\n");
+ return;
+ }
+
+ init_node_data[i].start_pfn = start_pfn;
+ init_node_data[i].end_pfn = start_pfn + pages;
+ init_node_data[i].nid = nid;
+}
+
+/* We assume init_node_data has no overlapping regions */
+void __init get_region(unsigned int nid, unsigned long *start_pfn,
+ unsigned long *end_pfn, unsigned long *pages_present)
+{
+ unsigned int i;
+
+ *start_pfn = -1UL;
+ *end_pfn = *pages_present = 0;
+
+ for (i = 0; init_node_data[i].end_pfn; i++) {
+ if (init_node_data[i].nid != nid)
+ continue;
+
+ *pages_present += init_node_data[i].end_pfn -
+ init_node_data[i].start_pfn;
+
+ if (init_node_data[i].start_pfn < *start_pfn)
+ *start_pfn = init_node_data[i].start_pfn;
+
+ if (init_node_data[i].end_pfn > *end_pfn)
+ *end_pfn = init_node_data[i].end_pfn;
+ }
+
+ /* We didnt find a matching region, return start/end as 0 */
+ if (*start_pfn == -1UL)
+ start_pfn = 0;
+}
static inline void map_cpu_to_node(int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
- if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) {
+
+ if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node])))
cpu_set(cpu, numa_cpumask_lookup_table[node]);
- nr_cpus_in_node[node]++;
- }
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -77,7 +145,6 @@ static void unmap_cpu_from_node(unsigned long cpu)
if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) {
cpu_clear(cpu, numa_cpumask_lookup_table[node]);
- nr_cpus_in_node[node]--;
} else {
printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
cpu, node);
@@ -85,7 +152,7 @@ static void unmap_cpu_from_node(unsigned long cpu)
}
#endif /* CONFIG_HOTPLUG_CPU */
-static struct device_node * __devinit find_cpu_node(unsigned int cpu)
+static struct device_node *find_cpu_node(unsigned int cpu)
{
unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
struct device_node *cpu_node = NULL;
@@ -212,7 +279,7 @@ static int __init get_mem_size_cells(void)
return rc;
}
-static unsigned long read_n_cells(int n, unsigned int **buf)
+static unsigned long __init read_n_cells(int n, unsigned int **buf)
{
unsigned long result = 0;
@@ -294,7 +361,8 @@ static int cpu_numa_callback(struct notifier_block *nfb,
* or zero. If the returned value of size is 0 the region should be
* discarded as it lies wholy above the memory limit.
*/
-static unsigned long __init numa_enforce_memory_limit(unsigned long start, unsigned long size)
+static unsigned long __init numa_enforce_memory_limit(unsigned long start,
+ unsigned long size)
{
/*
* We use lmb_end_of_DRAM() in here instead of memory_limit because
@@ -319,8 +387,7 @@ static int __init parse_numa_properties(void)
struct device_node *cpu = NULL;
struct device_node *memory = NULL;
int addr_cells, size_cells;
- int max_domain = 0;
- long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT;
+ int max_domain;
unsigned long i;
if (numa_enabled == 0) {
@@ -328,13 +395,6 @@ static int __init parse_numa_properties(void)
return -1;
}
- numa_memory_lookup_table =
- (char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
- memset(numa_memory_lookup_table, 0, entries * sizeof(char));
-
- for (i = 0; i < entries ; i++)
- numa_memory_lookup_table[i] = ARRAY_INITIALISER;
-
min_common_depth = find_min_common_depth();
dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
@@ -386,9 +446,6 @@ new_range:
start = read_n_cells(addr_cells, &memcell_buf);
size = read_n_cells(size_cells, &memcell_buf);
- start = _ALIGN_DOWN(start, MEMORY_INCREMENT);
- size = _ALIGN_UP(size, MEMORY_INCREMENT);
-
numa_domain = of_node_numa_domain(memory);
if (numa_domain >= MAX_NUMNODES) {
@@ -402,44 +459,15 @@ new_range:
if (max_domain < numa_domain)
max_domain = numa_domain;
- if (! (size = numa_enforce_memory_limit(start, size))) {
+ if (!(size = numa_enforce_memory_limit(start, size))) {
if (--ranges)
goto new_range;
else
continue;
}
- /*
- * Initialize new node struct, or add to an existing one.
- */
- if (init_node_data[numa_domain].node_end_pfn) {
- if ((start / PAGE_SIZE) <
- init_node_data[numa_domain].node_start_pfn)
- init_node_data[numa_domain].node_start_pfn =
- start / PAGE_SIZE;
- if (((start / PAGE_SIZE) + (size / PAGE_SIZE)) >
- init_node_data[numa_domain].node_end_pfn)
- init_node_data[numa_domain].node_end_pfn =
- (start / PAGE_SIZE) +
- (size / PAGE_SIZE);
-
- init_node_data[numa_domain].node_present_pages +=
- size / PAGE_SIZE;
- } else {
- node_set_online(numa_domain);
-
- init_node_data[numa_domain].node_start_pfn =
- start / PAGE_SIZE;
- init_node_data[numa_domain].node_end_pfn =
- init_node_data[numa_domain].node_start_pfn +
- size / PAGE_SIZE;
- init_node_data[numa_domain].node_present_pages =
- size / PAGE_SIZE;
- }
-
- for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
- numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] =
- numa_domain;
+ add_region(numa_domain, start >> PAGE_SHIFT,
+ size >> PAGE_SHIFT);
if (--ranges)
goto new_range;
@@ -455,32 +483,15 @@ static void __init setup_nonnuma(void)
{
unsigned long top_of_ram = lmb_end_of_DRAM();
unsigned long total_ram = lmb_phys_mem_size();
- unsigned long i;
printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
top_of_ram, total_ram);
printk(KERN_INFO "Memory hole size: %ldMB\n",
(top_of_ram - total_ram) >> 20);
- if (!numa_memory_lookup_table) {
- long entries = top_of_ram >> MEMORY_INCREMENT_SHIFT;
- numa_memory_lookup_table =
- (char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
- memset(numa_memory_lookup_table, 0, entries * sizeof(char));
- for (i = 0; i < entries ; i++)
- numa_memory_lookup_table[i] = ARRAY_INITIALISER;
- }
-
map_cpu_to_node(boot_cpuid, 0);
-
+ add_region(0, 0, lmb_end_of_DRAM() >> PAGE_SHIFT);
node_set_online(0);
-
- init_node_data[0].node_start_pfn = 0;
- init_node_data[0].node_end_pfn = lmb_end_of_DRAM() / PAGE_SIZE;
- init_node_data[0].node_present_pages = total_ram / PAGE_SIZE;
-
- for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT)
- numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
}
static void __init dump_numa_topology(void)
@@ -498,8 +509,9 @@ static void __init dump_numa_topology(void)
count = 0;
- for (i = 0; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) {
- if (numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] == node) {
+ for (i = 0; i < lmb_end_of_DRAM();
+ i += (1 << SECTION_SIZE_BITS)) {
+ if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
if (count == 0)
printk(" 0x%lx", i);
++count;
@@ -524,10 +536,12 @@ static void __init dump_numa_topology(void)
*
* Returns the physical address of the memory.
*/
-static unsigned long careful_allocation(int nid, unsigned long size,
- unsigned long align, unsigned long end)
+static void __init *careful_allocation(int nid, unsigned long size,
+ unsigned long align,
+ unsigned long end_pfn)
{
- unsigned long ret = lmb_alloc_base(size, align, end);
+ int new_nid;
+ unsigned long ret = lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
/* retry over all memory */
if (!ret)
@@ -541,28 +555,27 @@ static unsigned long careful_allocation(int nid, unsigned long size,
* If the memory came from a previously allocated node, we must
* retry with the bootmem allocator.
*/
- if (pa_to_nid(ret) < nid) {
- nid = pa_to_nid(ret);
- ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(nid),
+ new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT);
+ if (new_nid < nid) {
+ ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid),
size, align, 0);
if (!ret)
panic("numa.c: cannot allocate %lu bytes on node %d",
- size, nid);
+ size, new_nid);
- ret = virt_to_abs(ret);
+ ret = __pa(ret);
dbg("alloc_bootmem %lx %lx\n", ret, size);
}
- return ret;
+ return (void *)ret;
}
void __init do_init_bootmem(void)
{
int nid;
- int addr_cells, size_cells;
- struct device_node *memory = NULL;
+ unsigned int i;
static struct notifier_block ppc64_numa_nb = {
.notifier_call = cpu_numa_callback,
.priority = 1 /* Must run before sched domains notifier. */
@@ -580,99 +593,66 @@ void __init do_init_bootmem(void)
register_cpu_notifier(&ppc64_numa_nb);
for_each_online_node(nid) {
- unsigned long start_paddr, end_paddr;
- int i;
+ unsigned long start_pfn, end_pfn, pages_present;
unsigned long bootmem_paddr;
unsigned long bootmap_pages;
- start_paddr = init_node_data[nid].node_start_pfn * PAGE_SIZE;
- end_paddr = init_node_data[nid].node_end_pfn * PAGE_SIZE;
+ get_region(nid, &start_pfn, &end_pfn, &pages_present);
/* Allocate the node structure node local if possible */
- NODE_DATA(nid) = (struct pglist_data *)careful_allocation(nid,
+ NODE_DATA(nid) = careful_allocation(nid,
sizeof(struct pglist_data),
- SMP_CACHE_BYTES, end_paddr);
- NODE_DATA(nid) = abs_to_virt(NODE_DATA(nid));
+ SMP_CACHE_BYTES, end_pfn);
+ NODE_DATA(nid) = __va(NODE_DATA(nid));
memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
dbg("node %d\n", nid);
dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
- NODE_DATA(nid)->node_start_pfn =
- init_node_data[nid].node_start_pfn;
- NODE_DATA(nid)->node_spanned_pages =
- end_paddr - start_paddr;
+ NODE_DATA(nid)->node_start_pfn = start_pfn;
+ NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
if (NODE_DATA(nid)->node_spanned_pages == 0)
continue;
- dbg("start_paddr = %lx\n", start_paddr);
- dbg("end_paddr = %lx\n", end_paddr);
+ dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
+ dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
- bootmap_pages = bootmem_bootmap_pages((end_paddr - start_paddr) >> PAGE_SHIFT);
+ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
+ bootmem_paddr = (unsigned long)careful_allocation(nid,
+ bootmap_pages << PAGE_SHIFT,
+ PAGE_SIZE, end_pfn);
+ memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT);
- bootmem_paddr = careful_allocation(nid,
- bootmap_pages << PAGE_SHIFT,
- PAGE_SIZE, end_paddr);
- memset(abs_to_virt(bootmem_paddr), 0,
- bootmap_pages << PAGE_SHIFT);
dbg("bootmap_paddr = %lx\n", bootmem_paddr);
init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
- start_paddr >> PAGE_SHIFT,
- end_paddr >> PAGE_SHIFT);
+ start_pfn, end_pfn);
- /*
- * We need to do another scan of all memory sections to
- * associate memory with the correct node.
- */
- addr_cells = get_mem_addr_cells();
- size_cells = get_mem_size_cells();
- memory = NULL;
- while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
- unsigned long mem_start, mem_size;
- int numa_domain, ranges;
- unsigned int *memcell_buf;
- unsigned int len;
-
- memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
- if (!memcell_buf || len <= 0)
- continue;
+ /* Add free regions on this node */
+ for (i = 0; init_node_data[i].end_pfn; i++) {
+ unsigned long start, end;
- ranges = memory->n_addrs; /* ranges in cell */
-new_range:
- mem_start = read_n_cells(addr_cells, &memcell_buf);
- mem_size = read_n_cells(size_cells, &memcell_buf);
- if (numa_enabled) {
- numa_domain = of_node_numa_domain(memory);
- if (numa_domain >= MAX_NUMNODES)
- numa_domain = 0;
- } else
- numa_domain = 0;
-
- if (numa_domain != nid)
+ if (init_node_data[i].nid != nid)
continue;
- mem_size = numa_enforce_memory_limit(mem_start, mem_size);
- if (mem_size) {
- dbg("free_bootmem %lx %lx\n", mem_start, mem_size);
- free_bootmem_node(NODE_DATA(nid), mem_start, mem_size);
- }
+ start = init_node_data[i].start_pfn << PAGE_SHIFT;
+ end = init_node_data[i].end_pfn << PAGE_SHIFT;
- if (--ranges) /* process all ranges in cell */
- goto new_range;
+ dbg("free_bootmem %lx %lx\n", start, end - start);
+ free_bootmem_node(NODE_DATA(nid), start, end - start);
}
- /*
- * Mark reserved regions on this node
- */
+ /* Mark reserved regions on this node */
for (i = 0; i < lmb.reserved.cnt; i++) {
unsigned long physbase = lmb.reserved.region[i].base;
unsigned long size = lmb.reserved.region[i].size;
+ unsigned long start_paddr = start_pfn << PAGE_SHIFT;
+ unsigned long end_paddr = end_pfn << PAGE_SHIFT;
- if (pa_to_nid(physbase) != nid &&
- pa_to_nid(physbase+size-1) != nid)
+ if (early_pfn_to_nid(physbase >> PAGE_SHIFT) != nid &&
+ early_pfn_to_nid((physbase+size-1) >> PAGE_SHIFT) != nid)
continue;
if (physbase < end_paddr &&
@@ -692,46 +672,19 @@ new_range:
size);
}
}
- /*
- * This loop may look famaliar, but we have to do it again
- * after marking our reserved memory to mark memory present
- * for sparsemem.
- */
- addr_cells = get_mem_addr_cells();
- size_cells = get_mem_size_cells();
- memory = NULL;
- while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
- unsigned long mem_start, mem_size;
- int numa_domain, ranges;
- unsigned int *memcell_buf;
- unsigned int len;
-
- memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
- if (!memcell_buf || len <= 0)
- continue;
- ranges = memory->n_addrs; /* ranges in cell */
-new_range2:
- mem_start = read_n_cells(addr_cells, &memcell_buf);
- mem_size = read_n_cells(size_cells, &memcell_buf);
- if (numa_enabled) {
- numa_domain = of_node_numa_domain(memory);
- if (numa_domain >= MAX_NUMNODES)
- numa_domain = 0;
- } else
- numa_domain = 0;
-
- if (numa_domain != nid)
+ /* Add regions into sparsemem */
+ for (i = 0; init_node_data[i].end_pfn; i++) {
+ unsigned long start, end;
+
+ if (init_node_data[i].nid != nid)
continue;
- mem_size = numa_enforce_memory_limit(mem_start, mem_size);
- memory_present(numa_domain, mem_start >> PAGE_SHIFT,
- (mem_start + mem_size) >> PAGE_SHIFT);
+ start = init_node_data[i].start_pfn;
+ end = init_node_data[i].end_pfn;
- if (--ranges) /* process all ranges in cell */
- goto new_range2;
+ memory_present(nid, start, end);
}
-
}
}
@@ -745,21 +698,18 @@ void __init paging_init(void)
memset(zholes_size, 0, sizeof(zholes_size));
for_each_online_node(nid) {
- unsigned long start_pfn;
- unsigned long end_pfn;
+ unsigned long start_pfn, end_pfn, pages_present;
- start_pfn = init_node_data[nid].node_start_pfn;
- end_pfn = init_node_data[nid].node_end_pfn;
+ get_region(nid, &start_pfn, &end_pfn, &pages_present);
zones_size[ZONE_DMA] = end_pfn - start_pfn;
- zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] -
- init_node_data[nid].node_present_pages;
+ zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present;
dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);
- free_area_init_node(nid, NODE_DATA(nid), zones_size,
- start_pfn, zholes_size);
+ free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn,
+ zholes_size);
}
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index b79a7820613..c7f7bb6f30b 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -59,7 +59,6 @@
#include <asm/processor.h>
#include <asm/mmzone.h>
#include <asm/cputable.h>
-#include <asm/ppcdebug.h>
#include <asm/sections.h>
#include <asm/system.h>
#include <asm/iommu.h>
@@ -101,7 +100,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
- unsigned long vsid;
if (mem_init_done) {
pgdp = pgd_offset_k(ea);
@@ -117,27 +115,17 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
__pgprot(flags)));
} else {
- unsigned long va, vpn, hash, hpteg;
-
/*
* If the mm subsystem is not fully up, we cannot create a
* linux page table entry for this mapping. Simply bolt an
* entry in the hardware page table.
+ *
*/
- vsid = get_kernel_vsid(ea);
- va = (vsid << 28) | (ea & 0xFFFFFFF);
- vpn = va >> PAGE_SHIFT;
-
- hash = hpt_hash(vpn, 0);
-
- hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
-
- /* Panic if a pte grpup is full */
- if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
- HPTE_V_BOLTED,
- _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
- == -1) {
- panic("map_io_page: could not insert mapping");
+ if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
+ mmu_virtual_psize)) {
+ printk(KERN_ERR "Failed to do bolted mapping IO "
+ "memory at %016lx !\n", pa);
+ return -ENOMEM;
}
}
return 0;
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index cef9e83cc7e..ed7fcfe5fd3 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -179,6 +179,21 @@ void __init setbat(int index, unsigned long virt, unsigned long phys,
}
/*
+ * Preload a translation in the hash table
+ */
+void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap)
+{
+ pmd_t *pmd;
+
+ if (Hash == 0)
+ return;
+ pmd = pmd_offset(pgd_offset(mm, ea), ea);
+ if (!pmd_none(*pmd))
+ add_hash_page(mm->context, ea, pmd_val(*pmd));
+}
+
+/*
* Initialize the hash table and patch the instructions in hashtable.S.
*/
void __init MMU_init_hw(void)
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 0473953f6a3..60e852f2f8e 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -14,14 +14,32 @@
* 2 of the License, or (at your option) any later version.
*/
+#undef DEBUG
+
#include <linux/config.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/paca.h>
#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
-extern void slb_allocate(unsigned long ea);
+extern void slb_allocate_realmode(unsigned long ea);
+extern void slb_allocate_user(unsigned long ea);
+
+static void slb_allocate(unsigned long ea)
+{
+ /* Currently, we do real mode for all SLBs including user, but
+ * that will change if we bring back dynamic VSIDs
+ */
+ slb_allocate_realmode(ea);
+}
static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot)
{
@@ -46,13 +64,15 @@ static void slb_flush_and_rebolt(void)
{
/* If you change this make sure you change SLB_NUM_BOLTED
* appropriately too. */
- unsigned long ksp_flags = SLB_VSID_KERNEL;
+ unsigned long linear_llp, virtual_llp, lflags, vflags;
unsigned long ksp_esid_data;
WARN_ON(!irqs_disabled());
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- ksp_flags |= SLB_VSID_L;
+ linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
+ virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+ lflags = SLB_VSID_KERNEL | linear_llp;
+ vflags = SLB_VSID_KERNEL | virtual_llp;
ksp_esid_data = mk_esid_data(get_paca()->kstack, 2);
if ((ksp_esid_data & ESID_MASK) == KERNELBASE)
@@ -67,9 +87,9 @@ static void slb_flush_and_rebolt(void)
/* Slot 2 - kernel stack */
"slbmte %2,%3\n"
"isync"
- :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)),
+ :: "r"(mk_vsid_data(VMALLOCBASE, vflags)),
"r"(mk_esid_data(VMALLOCBASE, 1)),
- "r"(mk_vsid_data(ksp_esid_data, ksp_flags)),
+ "r"(mk_vsid_data(ksp_esid_data, lflags)),
"r"(ksp_esid_data)
: "memory");
}
@@ -102,6 +122,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
get_paca()->slb_cache_ptr = 0;
get_paca()->context = mm->context;
+#ifdef CONFIG_PPC_64K_PAGES
+ get_paca()->pgdir = mm->pgd;
+#endif /* CONFIG_PPC_64K_PAGES */
/*
* preload some userspace segments into the SLB.
@@ -131,28 +154,77 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
slb_allocate(unmapped_base);
}
+static inline void patch_slb_encoding(unsigned int *insn_addr,
+ unsigned int immed)
+{
+ /* Assume the instruction had a "0" immediate value, just
+ * "or" in the new value
+ */
+ *insn_addr |= immed;
+ flush_icache_range((unsigned long)insn_addr, 4+
+ (unsigned long)insn_addr);
+}
+
void slb_initialize(void)
{
+ unsigned long linear_llp, virtual_llp;
+ static int slb_encoding_inited;
+ extern unsigned int *slb_miss_kernel_load_linear;
+ extern unsigned int *slb_miss_kernel_load_virtual;
+ extern unsigned int *slb_miss_user_load_normal;
+#ifdef CONFIG_HUGETLB_PAGE
+ extern unsigned int *slb_miss_user_load_huge;
+ unsigned long huge_llp;
+
+ huge_llp = mmu_psize_defs[mmu_huge_psize].sllp;
+#endif
+
+ /* Prepare our SLB miss handler based on our page size */
+ linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
+ virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+ if (!slb_encoding_inited) {
+ slb_encoding_inited = 1;
+ patch_slb_encoding(slb_miss_kernel_load_linear,
+ SLB_VSID_KERNEL | linear_llp);
+ patch_slb_encoding(slb_miss_kernel_load_virtual,
+ SLB_VSID_KERNEL | virtual_llp);
+ patch_slb_encoding(slb_miss_user_load_normal,
+ SLB_VSID_USER | virtual_llp);
+
+ DBG("SLB: linear LLP = %04x\n", linear_llp);
+ DBG("SLB: virtual LLP = %04x\n", virtual_llp);
+#ifdef CONFIG_HUGETLB_PAGE
+ patch_slb_encoding(slb_miss_user_load_huge,
+ SLB_VSID_USER | huge_llp);
+ DBG("SLB: huge LLP = %04x\n", huge_llp);
+#endif
+ }
+
/* On iSeries the bolted entries have already been set up by
* the hypervisor from the lparMap data in head.S */
#ifndef CONFIG_PPC_ISERIES
- unsigned long flags = SLB_VSID_KERNEL;
+ {
+ unsigned long lflags, vflags;
- /* Invalidate the entire SLB (even slot 0) & all the ERATS */
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- flags |= SLB_VSID_L;
+ lflags = SLB_VSID_KERNEL | linear_llp;
+ vflags = SLB_VSID_KERNEL | virtual_llp;
- asm volatile("isync":::"memory");
- asm volatile("slbmte %0,%0"::"r" (0) : "memory");
+ /* Invalidate the entire SLB (even slot 0) & all the ERATS */
+ asm volatile("isync":::"memory");
+ asm volatile("slbmte %0,%0"::"r" (0) : "memory");
asm volatile("isync; slbia; isync":::"memory");
- create_slbe(KERNELBASE, flags, 0);
- create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1);
+ create_slbe(KERNELBASE, lflags, 0);
+
+ /* VMALLOC space has 4K pages always for now */
+ create_slbe(VMALLOCBASE, vflags, 1);
+
/* We don't bolt the stack for the time being - we're in boot,
* so the stack is in the bolted segment. By the time it goes
* elsewhere, we'll call _switch() which will bolt in the new
* one. */
asm volatile("isync":::"memory");
-#endif
+ }
+#endif /* CONFIG_PPC_ISERIES */
get_paca()->stab_rr = SLB_NUM_BOLTED;
}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index a3a03da503b..950ffc5848c 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -18,61 +18,28 @@
#include <linux/config.h>
#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cputable.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
-/* void slb_allocate(unsigned long ea);
+/* void slb_allocate_realmode(unsigned long ea);
*
* Create an SLB entry for the given EA (user or kernel).
* r3 = faulting address, r13 = PACA
* r9, r10, r11 are clobbered by this function
* No other registers are examined or changed.
*/
-_GLOBAL(slb_allocate)
- /*
- * First find a slot, round robin. Previously we tried to find
- * a free slot first but that took too long. Unfortunately we
- * dont have any LRU information to help us choose a slot.
- */
-#ifdef CONFIG_PPC_ISERIES
- /*
- * On iSeries, the "bolted" stack segment can be cast out on
- * shared processor switch so we need to check for a miss on
- * it and restore it to the right slot.
- */
- ld r9,PACAKSAVE(r13)
- clrrdi r9,r9,28
- clrrdi r11,r3,28
- li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
- cmpld r9,r11
- beq 3f
-#endif /* CONFIG_PPC_ISERIES */
-
- ld r10,PACASTABRR(r13)
- addi r10,r10,1
- /* use a cpu feature mask if we ever change our slb size */
- cmpldi r10,SLB_NUM_ENTRIES
-
- blt+ 4f
- li r10,SLB_NUM_BOLTED
-
-4:
- std r10,PACASTABRR(r13)
-3:
- /* r3 = faulting address, r10 = entry */
+_GLOBAL(slb_allocate_realmode)
+ /* r3 = faulting address */
srdi r9,r3,60 /* get region */
- srdi r3,r3,28 /* get esid */
+ srdi r10,r3,28 /* get esid */
cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */
- rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */
- oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */
-
- /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */
-
+ /* r3 = address, r10 = esid, cr7 = <>KERNELBASE */
blt cr7,0f /* user or kernel? */
/* kernel address: proto-VSID = ESID */
@@ -81,43 +48,166 @@ _GLOBAL(slb_allocate)
* top segment. That's ok, the scramble below will translate
* it to VSID 0, which is reserved as a bad VSID - one which
* will never have any pages in it. */
- li r11,SLB_VSID_KERNEL
-BEGIN_FTR_SECTION
- bne cr7,9f
- li r11,(SLB_VSID_KERNEL|SLB_VSID_L)
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
- b 9f
-0: /* user address: proto-VSID = context<<15 | ESID */
- srdi. r9,r3,USER_ESID_BITS
+ /* Check if hitting the linear mapping of the vmalloc/ioremap
+ * kernel space
+ */
+ bne cr7,1f
+
+ /* Linear mapping encoding bits, the "li" instruction below will
+ * be patched by the kernel at boot
+ */
+_GLOBAL(slb_miss_kernel_load_linear)
+ li r11,0
+ b slb_finish_load
+
+1: /* vmalloc/ioremap mapping encoding bits, the "li" instruction below
+ * will be patched by the kernel at boot
+ */
+_GLOBAL(slb_miss_kernel_load_virtual)
+ li r11,0
+ b slb_finish_load
+
+
+0: /* user address: proto-VSID = context << 15 | ESID. First check
+ * if the address is within the boundaries of the user region
+ */
+ srdi. r9,r10,USER_ESID_BITS
bne- 8f /* invalid ea bits set */
+ /* Figure out if the segment contains huge pages */
#ifdef CONFIG_HUGETLB_PAGE
BEGIN_FTR_SECTION
+ b 1f
+END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
+ cmpldi r10,16
+
+ lhz r9,PACALOWHTLBAREAS(r13)
+ mr r11,r10
+ blt 5f
+
lhz r9,PACAHIGHHTLBAREAS(r13)
- srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT)
- srd r9,r9,r11
- lhz r11,PACALOWHTLBAREAS(r13)
- srd r11,r11,r3
- or r9,r9,r11
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
+ srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT)
+
+5: srd r9,r9,r11
+ andi. r9,r9,1
+ beq 1f
+_GLOBAL(slb_miss_user_load_huge)
+ li r11,0
+ b 2f
+1:
#endif /* CONFIG_HUGETLB_PAGE */
- li r11,SLB_VSID_USER
+_GLOBAL(slb_miss_user_load_normal)
+ li r11,0
-#ifdef CONFIG_HUGETLB_PAGE
-BEGIN_FTR_SECTION
- rldimi r11,r9,8,55 /* shift masked bit into SLB_VSID_L */
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
-#endif /* CONFIG_HUGETLB_PAGE */
+2:
+ ld r9,PACACONTEXTID(r13)
+ rldimi r10,r9,USER_ESID_BITS,0
+ b slb_finish_load
+8: /* invalid EA */
+ li r10,0 /* BAD_VSID */
+ li r11,SLB_VSID_USER /* flags don't much matter */
+ b slb_finish_load
+
+#ifdef __DISABLED__
+
+/* void slb_allocate_user(unsigned long ea);
+ *
+ * Create an SLB entry for the given EA (user or kernel).
+ * r3 = faulting address, r13 = PACA
+ * r9, r10, r11 are clobbered by this function
+ * No other registers are examined or changed.
+ *
+ * It is called with translation enabled in order to be able to walk the
+ * page tables. This is not currently used.
+ */
+_GLOBAL(slb_allocate_user)
+ /* r3 = faulting address */
+ srdi r10,r3,28 /* get esid */
+
+ crset 4*cr7+lt /* set "user" flag for later */
+
+ /* check if we fit in the range covered by the pagetables*/
+ srdi. r9,r3,PGTABLE_EADDR_SIZE
+ crnot 4*cr0+eq,4*cr0+eq
+ beqlr
+
+ /* now we need to get to the page tables in order to get the page
+ * size encoding from the PMD. In the future, we'll be able to deal
+ * with 1T segments too by getting the encoding from the PGD instead
+ */
+ ld r9,PACAPGDIR(r13)
+ cmpldi cr0,r9,0
+ beqlr
+ rlwinm r11,r10,8,25,28
+ ldx r9,r9,r11 /* get pgd_t */
+ cmpldi cr0,r9,0
+ beqlr
+ rlwinm r11,r10,3,17,28
+ ldx r9,r9,r11 /* get pmd_t */
+ cmpldi cr0,r9,0
+ beqlr
+
+ /* build vsid flags */
+ andi. r11,r9,SLB_VSID_LLP
+ ori r11,r11,SLB_VSID_USER
+
+ /* get context to calculate proto-VSID */
ld r9,PACACONTEXTID(r13)
- rldimi r3,r9,USER_ESID_BITS,0
+ rldimi r10,r9,USER_ESID_BITS,0
+
+ /* fall through slb_finish_load */
+
+#endif /* __DISABLED__ */
+
+
+/*
+ * Finish loading of an SLB entry and return
+ *
+ * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <>KERNELBASE
+ */
+slb_finish_load:
+ ASM_VSID_SCRAMBLE(r10,r9)
+ rldimi r11,r10,SLB_VSID_SHIFT,16 /* combine VSID and flags */
+
+ /* r3 = EA, r11 = VSID data */
+ /*
+ * Find a slot, round robin. Previously we tried to find a
+ * free slot first but that took too long. Unfortunately we
+ * dont have any LRU information to help us choose a slot.
+ */
+#ifdef CONFIG_PPC_ISERIES
+ /*
+ * On iSeries, the "bolted" stack segment can be cast out on
+ * shared processor switch so we need to check for a miss on
+ * it and restore it to the right slot.
+ */
+ ld r9,PACAKSAVE(r13)
+ clrrdi r9,r9,28
+ clrrdi r3,r3,28
+ li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
+ cmpld r9,r3
+ beq 3f
+#endif /* CONFIG_PPC_ISERIES */
+
+ ld r10,PACASTABRR(r13)
+ addi r10,r10,1
+ /* use a cpu feature mask if we ever change our slb size */
+ cmpldi r10,SLB_NUM_ENTRIES
+
+ blt+ 4f
+ li r10,SLB_NUM_BOLTED
-9: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */
- ASM_VSID_SCRAMBLE(r3,r9)
+4:
+ std r10,PACASTABRR(r13)
+
+3:
+ rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
+ oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
- rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */
+ /* r3 = ESID data, r11 = VSID data */
/*
* No need for an isync before or after this slbmte. The exception
@@ -125,7 +215,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
*/
slbmte r11,r10
- bgelr cr7 /* we're done for kernel addresses */
+ /* we're done for kernel addresses */
+ crclr 4*cr0+eq /* set result to "success" */
+ bgelr cr7
/* Update the slb cache */
lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
@@ -143,9 +235,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
li r3,SLB_CACHE_ENTRIES+1
2:
sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
+ crclr 4*cr0+eq /* set result to "success" */
blr
-8: /* invalid EA */
- li r3,0 /* BAD_VSID */
- li r11,SLB_VSID_USER /* flags don't much matter */
- b 9b
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 1b83f002bf2..cfbb4e1f966 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -20,13 +20,13 @@
#include <asm/cputable.h>
#include <asm/lmb.h>
#include <asm/abs_addr.h>
+#include <asm/firmware.h>
struct stab_entry {
unsigned long esid_data;
unsigned long vsid_data;
};
-/* Both the segment table and SLB code uses the following cache */
#define NR_STAB_CACHE_ENTRIES 8
DEFINE_PER_CPU(long, stab_cache_ptr);
DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
@@ -186,7 +186,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
/* Never flush the first entry. */
ste += 1;
for (entry = 1;
- entry < (PAGE_SIZE / sizeof(struct stab_entry));
+ entry < (HW_PAGE_SIZE / sizeof(struct stab_entry));
entry++, ste++) {
unsigned long ea;
ea = ste->esid_data & ESID_MASK;
@@ -200,6 +200,10 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
__get_cpu_var(stab_cache_ptr) = 0;
+#ifdef CONFIG_PPC_64K_PAGES
+ get_paca()->pgdir = mm->pgd;
+#endif /* CONFIG_PPC_64K_PAGES */
+
/* Now preload some entries for the new task */
if (test_tsk_thread_flag(tsk, TIF_32BIT))
unmapped_base = TASK_UNMAPPED_BASE_USER32;
@@ -223,8 +227,6 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
asm volatile("sync" : : : "memory");
}
-extern void slb_initialize(void);
-
/*
* Allocate segment tables for secondary CPUs. These must all go in
* the first (bolted) segment, so that do_stab_bolted won't get a
@@ -243,18 +245,21 @@ void stabs_alloc(void)
if (cpu == 0)
continue; /* stab for CPU 0 is statically allocated */
- newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT);
+ newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
+ 1<<SID_SHIFT);
if (! newstab)
panic("Unable to allocate segment table for CPU %d.\n",
cpu);
newstab += KERNELBASE;
- memset((void *)newstab, 0, PAGE_SIZE);
+ memset((void *)newstab, 0, HW_PAGE_SIZE);
paca[cpu].stab_addr = newstab;
paca[cpu].stab_real = virt_to_abs(newstab);
- printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
+ printk(KERN_INFO "Segment table for CPU %d at 0x%lx "
+ "virtual, 0x%lx absolute\n",
+ cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
}
}
@@ -266,14 +271,28 @@ void stabs_alloc(void)
void stab_initialize(unsigned long stab)
{
unsigned long vsid = get_kernel_vsid(KERNELBASE);
+ unsigned long stabreal;
- if (cpu_has_feature(CPU_FTR_SLB)) {
- slb_initialize();
- } else {
- asm volatile("isync; slbia; isync":::"memory");
- make_ste(stab, GET_ESID(KERNELBASE), vsid);
+ asm volatile("isync; slbia; isync":::"memory");
+ make_ste(stab, GET_ESID(KERNELBASE), vsid);
- /* Order update */
- asm volatile("sync":::"memory");
+ /* Order update */
+ asm volatile("sync":::"memory");
+
+ /* Set ASR */
+ stabreal = get_paca()->stab_real | 0x1ul;
+
+#ifdef CONFIG_PPC_ISERIES
+ if (firmware_has_feature(FW_FEATURE_ISERIES)) {
+ HvCall1(HvCallBaseSetASR, stabreal);
+ return;
+ }
+#endif /* CONFIG_PPC_ISERIES */
+#ifdef CONFIG_PPC_PSERIES
+ if (platform_is_lpar()) {
+ plpar_hcall_norets(H_SET_ASR, stabreal);
+ return;
}
+#endif
+ mtspr(SPRN_ASR, stabreal);
}
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index 09ab81a10f4..53e31b834ac 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -21,6 +21,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/mm.h>
@@ -30,7 +31,7 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
-#include <linux/highmem.h>
+#include <asm/bug.h>
DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
@@ -126,28 +127,46 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
* (if we remove it we should clear the _PTE_HPTEFLAGS bits).
*/
void hpte_update(struct mm_struct *mm, unsigned long addr,
- unsigned long pte, int wrprot)
+ pte_t *ptep, unsigned long pte, int huge)
{
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
unsigned long vsid;
+ unsigned int psize = mmu_virtual_psize;
int i;
i = batch->index;
+ /* We mask the address for the base page size. Huge pages will
+ * have applied their own masking already
+ */
+ addr &= PAGE_MASK;
+
+ /* Get page size (maybe move back to caller) */
+ if (huge) {
+#ifdef CONFIG_HUGETLB_PAGE
+ psize = mmu_huge_psize;
+#else
+ BUG();
+#endif
+ }
+
/*
* This can happen when we are in the middle of a TLB batch and
* we encounter memory pressure (eg copy_page_range when it tries
* to allocate a new pte). If we have to reclaim memory and end
* up scanning and resetting referenced bits then our batch context
* will change mid stream.
+ *
+ * We also need to ensure only one page size is present in a given
+ * batch
*/
- if (i != 0 && (mm != batch->mm || batch->large != pte_huge(pte))) {
+ if (i != 0 && (mm != batch->mm || batch->psize != psize)) {
flush_tlb_pending();
i = 0;
}
if (i == 0) {
batch->mm = mm;
- batch->large = pte_huge(pte);
+ batch->psize = psize;
}
if (addr < KERNELBASE) {
vsid = get_vsid(mm->context.id, addr);
@@ -155,7 +174,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr,
} else
vsid = get_kernel_vsid(addr);
batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff);
- batch->pte[i] = __pte(pte);
+ batch->pte[i] = __real_pte(__pte(pte), ptep);
batch->index = ++i;
if (i >= PPC64_TLB_BATCH_NR)
flush_tlb_pending();
@@ -177,7 +196,8 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
local = 1;
if (i == 1)
- flush_hash_page(batch->vaddr[0], batch->pte[0], local);
+ flush_hash_page(batch->vaddr[0], batch->pte[0],
+ batch->psize, local);
else
flush_hash_range(i, local);
batch->index = 0;
diff --git a/arch/powerpc/oprofile/Kconfig b/arch/powerpc/oprofile/Kconfig
index 19d37730b66..eb2dece76a5 100644
--- a/arch/powerpc/oprofile/Kconfig
+++ b/arch/powerpc/oprofile/Kconfig
@@ -1,7 +1,3 @@
-
-menu "Profiling support"
- depends on EXPERIMENTAL
-
config PROFILING
bool "Profiling support (EXPERIMENTAL)"
help
@@ -19,5 +15,3 @@ config OPROFILE
If unsure, say N.
-endmenu
-
diff --git a/arch/powerpc/oprofile/op_model_fsl_booke.c b/arch/powerpc/oprofile/op_model_fsl_booke.c
index 86124a94c9a..26539cda602 100644
--- a/arch/powerpc/oprofile/op_model_fsl_booke.c
+++ b/arch/powerpc/oprofile/op_model_fsl_booke.c
@@ -7,7 +7,7 @@
* Copyright (c) 2004 Freescale Semiconductor, Inc
*
* Author: Andy Fleming
- * Maintainer: Kumar Gala <Kumar.Gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c
index 88644931584..a3401b46f3b 100644
--- a/arch/powerpc/oprofile/op_model_power4.c
+++ b/arch/powerpc/oprofile/op_model_power4.c
@@ -14,9 +14,9 @@
#include <asm/system.h>
#include <asm/processor.h>
#include <asm/cputable.h>
-#include <asm/systemcfg.h>
#include <asm/rtas.h>
#include <asm/oprofile_impl.h>
+#include <asm/reg.h>
#define dbg(args...)
@@ -81,6 +81,26 @@ static void power4_reg_setup(struct op_counter_config *ctr,
extern void ppc64_enable_pmcs(void);
+/*
+ * Older CPUs require the MMCRA sample bit to be always set, but newer
+ * CPUs only want it set for some groups. Eventually we will remove all
+ * knowledge of this bit in the kernel, oprofile userspace should be
+ * setting it when required.
+ *
+ * In order to keep current installations working we force the bit for
+ * those older CPUs. Once everyone has updated their oprofile userspace we
+ * can remove this hack.
+ */
+static inline int mmcra_must_set_sample(void)
+{
+ if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p) ||
+ __is_processor(PV_970) || __is_processor(PV_970FX) ||
+ __is_processor(PV_970MP))
+ return 1;
+
+ return 0;
+}
+
static void power4_cpu_setup(void *unused)
{
unsigned int mmcr0 = mmcr0_val;
@@ -98,7 +118,8 @@ static void power4_cpu_setup(void *unused)
mtspr(SPRN_MMCR1, mmcr1_val);
- mmcra |= MMCRA_SAMPLE_ENABLE;
+ if (mmcra_must_set_sample())
+ mmcra |= MMCRA_SAMPLE_ENABLE;
mtspr(SPRN_MMCRA, mmcra);
dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(),
@@ -211,8 +232,7 @@ static unsigned long get_pc(struct pt_regs *regs)
mmcra = mfspr(SPRN_MMCRA);
/* Were we in the hypervisor? */
- if ((systemcfg->platform == PLATFORM_PSERIES_LPAR) &&
- (mmcra & MMCRA_SIHV))
+ if (platform_is_lpar() && (mmcra & MMCRA_SIHV))
/* function descriptor madness */
return *((unsigned long *)hypervisor_bucket);
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index ecd32d5d85f..4099ddab920 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -361,7 +361,9 @@ static void __init chrp_find_openpic(void)
printk(KERN_INFO "OpenPIC at %lx\n", opaddr);
irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */
- prom_get_irq_senses(init_senses, NUM_8259_INTERRUPTS, NR_IRQS - 4);
+ prom_get_irq_senses(init_senses, NUM_ISA_INTERRUPTS, NR_IRQS - 4);
+ /* i8259 cascade is always positive level */
+ init_senses[0] = IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE;
iranges = (unsigned int *) get_property(np, "interrupt-ranges", &len);
if (iranges == NULL)
diff --git a/arch/powerpc/platforms/iseries/htab.c b/arch/powerpc/platforms/iseries/htab.c
index b3c6c3374ca..30bdcf3925d 100644
--- a/arch/powerpc/platforms/iseries/htab.c
+++ b/arch/powerpc/platforms/iseries/htab.c
@@ -39,15 +39,16 @@ static inline void iSeries_hunlock(unsigned long slot)
spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]);
}
-static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
- unsigned long prpn, unsigned long vflags,
- unsigned long rflags)
+long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
+ unsigned long pa, unsigned long rflags,
+ unsigned long vflags, int psize)
{
- unsigned long arpn;
long slot;
hpte_t lhpte;
int secondary = 0;
+ BUG_ON(psize != MMU_PAGE_4K);
+
/*
* The hypervisor tries both primary and secondary.
* If we are being called to insert in the secondary,
@@ -59,8 +60,19 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
iSeries_hlock(hpte_group);
- slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
- BUG_ON(lhpte.v & HPTE_V_VALID);
+ slot = HvCallHpt_findValid(&lhpte, va >> HW_PAGE_SHIFT);
+ if (unlikely(lhpte.v & HPTE_V_VALID)) {
+ if (vflags & HPTE_V_BOLTED) {
+ HvCallHpt_setSwBits(slot, 0x10, 0);
+ HvCallHpt_setPp(slot, PP_RWXX);
+ iSeries_hunlock(hpte_group);
+ if (slot < 0)
+ return 0x8 | (slot & 7);
+ else
+ return slot & 7;
+ }
+ BUG();
+ }
if (slot == -1) { /* No available entry found in either group */
iSeries_hunlock(hpte_group);
@@ -73,10 +85,9 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
slot &= 0x7fffffffffffffff;
}
- arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT;
- lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
- lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
+ lhpte.v = hpte_encode_v(va, MMU_PAGE_4K) | vflags | HPTE_V_VALID;
+ lhpte.r = hpte_encode_r(phys_to_abs(pa), MMU_PAGE_4K) | rflags;
/* Now fill in the actual HPTE */
HvCallHpt_addValidate(slot, secondary, &lhpte);
@@ -86,25 +97,6 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
return (secondary << 3) | (slot & 7);
}
-long iSeries_hpte_bolt_or_insert(unsigned long hpte_group,
- unsigned long va, unsigned long prpn, unsigned long vflags,
- unsigned long rflags)
-{
- long slot;
- hpte_t lhpte;
-
- slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
-
- if (lhpte.v & HPTE_V_VALID) {
- /* Bolt the existing HPTE */
- HvCallHpt_setSwBits(slot, 0x10, 0);
- HvCallHpt_setPp(slot, PP_RWXX);
- return 0;
- }
-
- return iSeries_hpte_insert(hpte_group, va, prpn, vflags, rflags);
-}
-
static unsigned long iSeries_hpte_getword0(unsigned long slot)
{
hpte_t hpte;
@@ -150,15 +142,17 @@ static long iSeries_hpte_remove(unsigned long hpte_group)
* bits 61..63 : PP2,PP1,PP0
*/
static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
- unsigned long va, int large, int local)
+ unsigned long va, int psize, int local)
{
hpte_t hpte;
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
iSeries_hlock(slot);
HvCallHpt_get(&hpte, slot);
- if ((HPTE_V_AVPN_VAL(hpte.v) == avpn) && (hpte.v & HPTE_V_VALID)) {
+ want_v = hpte_encode_v(va, MMU_PAGE_4K);
+
+ if (HPTE_V_COMPARE(hpte.v, want_v) && (hpte.v & HPTE_V_VALID)) {
/*
* Hypervisor expects bits as NPPP, which is
* different from how they are mapped in our PP.
@@ -210,14 +204,17 @@ static long iSeries_hpte_find(unsigned long vpn)
*
* No need to lock here because we should be the only user.
*/
-static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
+static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
+ int psize)
{
unsigned long vsid,va,vpn;
long slot;
+ BUG_ON(psize != MMU_PAGE_4K);
+
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> PAGE_SHIFT;
+ vpn = va >> HW_PAGE_SHIFT;
slot = iSeries_hpte_find(vpn);
if (slot == -1)
panic("updateboltedpp: Could not find page to bolt\n");
@@ -225,7 +222,7 @@ static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
}
static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va,
- int large, int local)
+ int psize, int local)
{
unsigned long hpte_v;
unsigned long avpn = va >> 23;
diff --git a/arch/powerpc/platforms/iseries/hvlog.c b/arch/powerpc/platforms/iseries/hvlog.c
index 62ec7347968..f476d71194f 100644
--- a/arch/powerpc/platforms/iseries/hvlog.c
+++ b/arch/powerpc/platforms/iseries/hvlog.c
@@ -22,7 +22,7 @@ void HvCall_writeLogBuffer(const void *buffer, u64 len)
while (len) {
hv_buf.addr = cur;
- left_this_page = ((cur & PAGE_MASK) + PAGE_SIZE) - cur;
+ left_this_page = ((cur & HW_PAGE_MASK) + HW_PAGE_SIZE) - cur;
if (left_this_page > len)
left_this_page = len;
hv_buf.len = left_this_page;
@@ -30,6 +30,6 @@ void HvCall_writeLogBuffer(const void *buffer, u64 len)
HvCall2(HvCallBaseWriteLogBuffer,
virt_to_abs(&hv_buf),
left_this_page);
- cur = (cur & PAGE_MASK) + PAGE_SIZE;
+ cur = (cur & HW_PAGE_MASK) + HW_PAGE_SIZE;
}
}
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index 1a6845b5c5a..bf081b34582 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -43,9 +43,12 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
u64 rc;
union tce_entry tce;
+ index <<= TCE_PAGE_FACTOR;
+ npages <<= TCE_PAGE_FACTOR;
+
while (npages--) {
tce.te_word = 0;
- tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> PAGE_SHIFT;
+ tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> TCE_SHIFT;
if (tbl->it_type == TCE_VB) {
/* Virtual Bus */
@@ -66,7 +69,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n",
rc);
index++;
- uaddr += PAGE_SIZE;
+ uaddr += TCE_PAGE_SIZE;
}
}
@@ -74,6 +77,9 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
{
u64 rc;
+ npages <<= TCE_PAGE_FACTOR;
+ index <<= TCE_PAGE_FACTOR;
+
while (npages--) {
rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0);
if (rc)
@@ -83,27 +89,6 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
}
}
-#ifdef CONFIG_PCI
-/*
- * This function compares the known tables to find an iommu_table
- * that has already been built for hardware TCEs.
- */
-static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
-{
- struct pci_dn *pdn;
-
- list_for_each_entry(pdn, &iSeries_Global_Device_List, Device_List) {
- struct iommu_table *it = pdn->iommu_table;
- if ((it != NULL) &&
- (it->it_type == TCE_PCI) &&
- (it->it_offset == tbl->it_offset) &&
- (it->it_index == tbl->it_index) &&
- (it->it_size == tbl->it_size))
- return it;
- }
- return NULL;
-}
-
/*
* Call Hv with the architected data structure to get TCE table info.
* info. Put the returned data into the Linux representation of the
@@ -113,8 +98,10 @@ static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
* 2. TCE table per Bus.
* 3. TCE Table per IOA.
*/
-static void iommu_table_getparms(struct pci_dn *pdn,
- struct iommu_table* tbl)
+void iommu_table_getparms_iSeries(unsigned long busno,
+ unsigned char slotno,
+ unsigned char virtbus,
+ struct iommu_table* tbl)
{
struct iommu_table_cb *parms;
@@ -124,9 +111,9 @@ static void iommu_table_getparms(struct pci_dn *pdn,
memset(parms, 0, sizeof(*parms));
- parms->itc_busno = pdn->busno;
- parms->itc_slotno = pdn->LogicalSlot;
- parms->itc_virtbus = 0;
+ parms->itc_busno = busno;
+ parms->itc_slotno = slotno;
+ parms->itc_virtbus = virtbus;
HvCallXm_getTceTableParms(iseries_hv_addr(parms));
@@ -134,17 +121,40 @@ static void iommu_table_getparms(struct pci_dn *pdn,
panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms);
/* itc_size is in pages worth of table, it_size is in # of entries */
- tbl->it_size = (parms->itc_size * PAGE_SIZE) / sizeof(union tce_entry);
+ tbl->it_size = ((parms->itc_size * TCE_PAGE_SIZE) /
+ sizeof(union tce_entry)) >> TCE_PAGE_FACTOR;
tbl->it_busno = parms->itc_busno;
- tbl->it_offset = parms->itc_offset;
+ tbl->it_offset = parms->itc_offset >> TCE_PAGE_FACTOR;
tbl->it_index = parms->itc_index;
tbl->it_blocksize = 1;
- tbl->it_type = TCE_PCI;
+ tbl->it_type = virtbus ? TCE_VB : TCE_PCI;
kfree(parms);
}
+#ifdef CONFIG_PCI
+/*
+ * This function compares the known tables to find an iommu_table
+ * that has already been built for hardware TCEs.
+ */
+static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
+{
+ struct pci_dn *pdn;
+
+ list_for_each_entry(pdn, &iSeries_Global_Device_List, Device_List) {
+ struct iommu_table *it = pdn->iommu_table;
+ if ((it != NULL) &&
+ (it->it_type == TCE_PCI) &&
+ (it->it_offset == tbl->it_offset) &&
+ (it->it_index == tbl->it_index) &&
+ (it->it_size == tbl->it_size))
+ return it;
+ }
+ return NULL;
+}
+
+
void iommu_devnode_init_iSeries(struct device_node *dn)
{
struct iommu_table *tbl;
@@ -152,7 +162,7 @@ void iommu_devnode_init_iSeries(struct device_node *dn)
tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
- iommu_table_getparms(pdn, tbl);
+ iommu_table_getparms_iSeries(pdn->busno, pdn->LogicalSlot, 0, tbl);
/* Look for existing tce table */
pdn->iommu_table = iommu_table_find(tbl);
diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c
index c1135912cc0..a58daa15368 100644
--- a/arch/powerpc/platforms/iseries/irq.c
+++ b/arch/powerpc/platforms/iseries/irq.c
@@ -35,7 +35,6 @@
#include <linux/irq.h>
#include <linux/spinlock.h>
-#include <asm/ppcdebug.h>
#include <asm/iseries/hv_types.h>
#include <asm/iseries/hv_lp_event.h>
#include <asm/iseries/hv_call_xm.h>
@@ -43,13 +42,6 @@
#include "irq.h"
#include "call_pci.h"
-/* This maps virtual irq numbers to real irqs */
-unsigned int virt_irq_to_real_map[NR_IRQS];
-
-/* The next available virtual irq number */
-/* Note: the pcnet32 driver assumes irq numbers < 2 aren't valid. :( */
-static int next_virtual_irq = 2;
-
static long Pci_Interrupt_Count;
static long Pci_Event_Count;
@@ -104,6 +96,9 @@ static void intReceived(struct XmPciLpEvent *eventParm,
struct pt_regs *regsParm)
{
int irq;
+#ifdef CONFIG_IRQSTACKS
+ struct thread_info *curtp, *irqtp;
+#endif
++Pci_Interrupt_Count;
@@ -111,7 +106,20 @@ static void intReceived(struct XmPciLpEvent *eventParm,
case XmPciLpEvent_SlotInterrupt:
irq = eventParm->hvLpEvent.xCorrelationToken;
/* Dispatch the interrupt handlers for this irq */
- ppc_irq_dispatch_handler(regsParm, irq);
+#ifdef CONFIG_IRQSTACKS
+ /* Switch to the irq stack to handle this */
+ curtp = current_thread_info();
+ irqtp = hardirq_ctx[smp_processor_id()];
+ if (curtp != irqtp) {
+ irqtp->task = curtp->task;
+ irqtp->flags = 0;
+ call___do_IRQ(irq, regsParm, irqtp);
+ irqtp->task = NULL;
+ if (irqtp->flags)
+ set_bits(irqtp->flags, &curtp->flags);
+ } else
+#endif
+ __do_IRQ(irq, regsParm);
HvCallPci_eoi(eventParm->eventData.slotInterrupt.busNumber,
eventParm->eventData.slotInterrupt.subBusNumber,
eventParm->eventData.slotInterrupt.deviceId);
@@ -227,8 +235,6 @@ static void iSeries_enable_IRQ(unsigned int irq)
/* Unmask secondary INTA */
mask = 0x80000000;
HvCallPci_unmaskInterrupts(bus, subBus, deviceId, mask);
- PPCDBG(PPCDBG_BUSWALK, "iSeries_enable_IRQ 0x%02X.%02X.%02X 0x%04X\n",
- bus, subBus, deviceId, irq);
}
/* This is called by iSeries_activate_IRQs */
@@ -310,15 +316,11 @@ static void iSeries_disable_IRQ(unsigned int irq)
/* Mask secondary INTA */
mask = 0x80000000;
HvCallPci_maskInterrupts(bus, subBus, deviceId, mask);
- PPCDBG(PPCDBG_BUSWALK, "iSeries_disable_IRQ 0x%02X.%02X.%02X 0x%04X\n",
- bus, subBus, deviceId, irq);
}
/*
- * Need to define this so ppc_irq_dispatch_handler will NOT call
- * enable_IRQ at the end of interrupt handling. However, this does
- * nothing because there is not enough information provided to do
- * the EOI HvCall. This is done by XmPciLpEvent.c
+ * This does nothing because there is not enough information
+ * provided to do the EOI HvCall. This is done by XmPciLpEvent.c
*/
static void iSeries_end_IRQ(unsigned int irq)
{
@@ -341,26 +343,14 @@ static hw_irq_controller iSeries_IRQ_handler = {
int __init iSeries_allocate_IRQ(HvBusNumber busNumber,
HvSubBusNumber subBusNumber, HvAgentId deviceId)
{
- unsigned int realirq, virtirq;
+ int virtirq;
+ unsigned int realirq;
u8 idsel = (deviceId >> 4);
u8 function = deviceId & 7;
- virtirq = next_virtual_irq++;
realirq = ((busNumber - 1) << 6) + ((idsel - 1) << 3) + function;
- virt_irq_to_real_map[virtirq] = realirq;
+ virtirq = virt_irq_create_mapping(realirq);
irq_desc[virtirq].handler = &iSeries_IRQ_handler;
return virtirq;
}
-
-int virt_irq_create_mapping(unsigned int real_irq)
-{
- BUG(); /* Don't call this on iSeries, yet */
-
- return 0;
-}
-
-void virt_irq_init(void)
-{
- return;
-}
diff --git a/arch/powerpc/platforms/iseries/misc.S b/arch/powerpc/platforms/iseries/misc.S
index 09f14522e17..dfe7aa1ba09 100644
--- a/arch/powerpc/platforms/iseries/misc.S
+++ b/arch/powerpc/platforms/iseries/misc.S
@@ -15,6 +15,7 @@
#include <asm/processor.h>
#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
.text
diff --git a/arch/powerpc/platforms/iseries/pci.c b/arch/powerpc/platforms/iseries/pci.c
index 7d7d5884343..4b75131773a 100644
--- a/arch/powerpc/platforms/iseries/pci.c
+++ b/arch/powerpc/platforms/iseries/pci.c
@@ -32,7 +32,6 @@
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
-#include <asm/ppcdebug.h>
#include <asm/iommu.h>
#include <asm/abs_addr.h>
@@ -207,10 +206,6 @@ static struct device_node *build_device_node(HvBusNumber Bus,
struct device_node *node;
struct pci_dn *pdn;
- PPCDBG(PPCDBG_BUSWALK,
- "-build_device_node 0x%02X.%02X.%02X Function: %02X\n",
- Bus, SubBus, AgentId, Function);
-
node = kmalloc(sizeof(struct device_node), GFP_KERNEL);
if (node == NULL)
return NULL;
@@ -243,8 +238,6 @@ unsigned long __init find_and_init_phbs(void)
struct pci_controller *phb;
HvBusNumber bus;
- PPCDBG(PPCDBG_BUSWALK, "find_and_init_phbs Entry\n");
-
/* Check all possible buses. */
for (bus = 0; bus < 256; bus++) {
int ret = HvCallXm_testBus(bus);
@@ -261,9 +254,6 @@ unsigned long __init find_and_init_phbs(void)
phb->last_busno = bus;
phb->ops = &iSeries_pci_ops;
- PPCDBG(PPCDBG_BUSWALK, "PCI:Create iSeries pci_controller(%p), Bus: %04X\n",
- phb, bus);
-
/* Find and connect the devices. */
scan_PHB_slots(phb);
}
@@ -285,11 +275,9 @@ unsigned long __init find_and_init_phbs(void)
*/
void iSeries_pcibios_init(void)
{
- PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_init Entry.\n");
iomm_table_initialize();
find_and_init_phbs();
io_page_mask = -1;
- PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_init Exit.\n");
}
/*
@@ -301,8 +289,6 @@ void __init iSeries_pci_final_fixup(void)
struct device_node *node;
int DeviceCount = 0;
- PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_fixup Entry.\n");
-
/* Fix up at the device node and pci_dev relationship */
mf_display_src(0xC9000100);
@@ -316,9 +302,6 @@ void __init iSeries_pci_final_fixup(void)
++DeviceCount;
pdev->sysdata = (void *)node;
PCI_DN(node)->pcidev = pdev;
- PPCDBG(PPCDBG_BUSWALK,
- "pdev 0x%p <==> DevNode 0x%p\n",
- pdev, node);
allocate_device_bars(pdev);
iSeries_Device_Information(pdev, DeviceCount);
iommu_devnode_init_iSeries(node);
@@ -333,13 +316,10 @@ void __init iSeries_pci_final_fixup(void)
void pcibios_fixup_bus(struct pci_bus *PciBus)
{
- PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_fixup_bus(0x%04X) Entry.\n",
- PciBus->number);
}
void pcibios_fixup_resources(struct pci_dev *pdev)
{
- PPCDBG(PPCDBG_BUSWALK, "fixup_resources pdev %p\n", pdev);
}
/*
@@ -401,9 +381,6 @@ static void scan_EADS_bridge(HvBusNumber bus, HvSubBusNumber SubBus,
printk("found device at bus %d idsel %d func %d (AgentId %x)\n",
bus, IdSel, Function, AgentId);
/* Connect EADs: 0x18.00.12 = 0x00 */
- PPCDBG(PPCDBG_BUSWALK,
- "PCI:Connect EADs: 0x%02X.%02X.%02X\n",
- bus, SubBus, AgentId);
HvRc = HvCallPci_getBusUnitInfo(bus, SubBus, AgentId,
iseries_hv_addr(BridgeInfo),
sizeof(struct HvCallPci_BridgeInfo));
@@ -414,14 +391,6 @@ static void scan_EADS_bridge(HvBusNumber bus, HvSubBusNumber SubBus,
BridgeInfo->maxAgents,
BridgeInfo->maxSubBusNumber,
BridgeInfo->logicalSlotNumber);
- PPCDBG(PPCDBG_BUSWALK,
- "PCI: BridgeInfo, Type:0x%02X, SubBus:0x%02X, MaxAgents:0x%02X, MaxSubBus: 0x%02X, LSlot: 0x%02X\n",
- BridgeInfo->busUnitInfo.deviceType,
- BridgeInfo->subBusNumber,
- BridgeInfo->maxAgents,
- BridgeInfo->maxSubBusNumber,
- BridgeInfo->logicalSlotNumber);
-
if (BridgeInfo->busUnitInfo.deviceType ==
HvCallPci_BridgeDevice) {
/* Scan_Bridge_Slot...: 0x18.00.12 */
@@ -454,9 +423,6 @@ static int scan_bridge_slot(HvBusNumber Bus,
/* iSeries_allocate_IRQ.: 0x18.00.12(0xA3) */
Irq = iSeries_allocate_IRQ(Bus, 0, EADsIdSel);
- PPCDBG(PPCDBG_BUSWALK,
- "PCI:- allocate and assign IRQ 0x%02X.%02X.%02X = 0x%02X\n",
- Bus, 0, EADsIdSel, Irq);
/*
* Connect all functions of any device found.
@@ -482,9 +448,6 @@ static int scan_bridge_slot(HvBusNumber Bus,
printk("read vendor ID: %x\n", VendorId);
/* FoundDevice: 0x18.28.10 = 0x12AE */
- PPCDBG(PPCDBG_BUSWALK,
- "PCI:- FoundDevice: 0x%02X.%02X.%02X = 0x%04X, irq %d\n",
- Bus, SubBus, AgentId, VendorId, Irq);
HvRc = HvCallPci_configStore8(Bus, SubBus, AgentId,
PCI_INTERRUPT_LINE, Irq);
if (HvRc != 0)
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index fda712b4216..da26639190d 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -39,7 +39,7 @@
#include <asm/sections.h>
#include <asm/iommu.h>
#include <asm/firmware.h>
-
+#include <asm/system.h>
#include <asm/time.h>
#include <asm/paca.h>
#include <asm/cache.h>
@@ -71,9 +71,7 @@ extern void hvlog(char *fmt, ...);
#endif
/* Function Prototypes */
-extern void ppcdbg_initialize(void);
-
-static void build_iSeries_Memory_Map(void);
+static unsigned long build_iSeries_Memory_Map(void);
static void iseries_shared_idle(void);
static void iseries_dedicated_idle(void);
#ifdef CONFIG_PCI
@@ -86,7 +84,6 @@ static void iSeries_pci_final_fixup(void) { }
int piranha_simulator;
extern int rd_size; /* Defined in drivers/block/rd.c */
-extern unsigned long klimit;
extern unsigned long embedded_sysmap_start;
extern unsigned long embedded_sysmap_end;
@@ -309,8 +306,6 @@ static void __init iSeries_init_early(void)
ppc64_firmware_features = FW_FEATURE_ISERIES;
- ppcdbg_initialize();
-
ppc64_interrupt_controller = IC_ISERIES;
#if defined(CONFIG_BLK_DEV_INITRD)
@@ -320,11 +315,11 @@ static void __init iSeries_init_early(void)
*/
if (naca.xRamDisk) {
initrd_start = (unsigned long)__va(naca.xRamDisk);
- initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE;
+ initrd_end = initrd_start + naca.xRamDiskSize * HW_PAGE_SIZE;
initrd_below_start_ok = 1; // ramdisk in kernel space
ROOT_DEV = Root_RAM0;
- if (((rd_size * 1024) / PAGE_SIZE) < naca.xRamDiskSize)
- rd_size = (naca.xRamDiskSize * PAGE_SIZE) / 1024;
+ if (((rd_size * 1024) / HW_PAGE_SIZE) < naca.xRamDiskSize)
+ rd_size = (naca.xRamDiskSize * HW_PAGE_SIZE) / 1024;
} else
#endif /* CONFIG_BLK_DEV_INITRD */
{
@@ -407,9 +402,11 @@ void mschunks_alloc(unsigned long num_chunks)
* a table used to translate Linux's physical addresses to these
* absolute addresses. Absolute addresses are needed when
* communicating with the hypervisor (e.g. to build HPT entries)
+ *
+ * Returns the physical memory size
*/
-static void __init build_iSeries_Memory_Map(void)
+static unsigned long __init build_iSeries_Memory_Map(void)
{
u32 loadAreaFirstChunk, loadAreaLastChunk, loadAreaSize;
u32 nextPhysChunk;
@@ -470,13 +467,14 @@ static void __init build_iSeries_Memory_Map(void)
*/
hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress());
hptSizePages = (u32)HvCallHpt_getHptPages();
- hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT);
+ hptSizeChunks = hptSizePages >>
+ (MSCHUNKS_CHUNK_SHIFT - HW_PAGE_SHIFT);
hptLastChunk = hptFirstChunk + hptSizeChunks - 1;
printk("HPT absolute addr = %016lx, size = %dK\n",
chunk_to_addr(hptFirstChunk), hptSizeChunks * 256);
- ppc64_pft_size = __ilog2(hptSizePages * PAGE_SIZE);
+ ppc64_pft_size = __ilog2(hptSizePages * HW_PAGE_SIZE);
/*
* The actual hashed page table is in the hypervisor,
@@ -541,7 +539,7 @@ static void __init build_iSeries_Memory_Map(void)
* which should be equal to
* nextPhysChunk
*/
- systemcfg->physicalMemorySize = chunk_to_addr(nextPhysChunk);
+ return chunk_to_addr(nextPhysChunk);
}
/*
@@ -549,8 +547,6 @@ static void __init build_iSeries_Memory_Map(void)
*/
static void __init iSeries_setup_arch(void)
{
- unsigned procIx = get_paca()->lppaca.dyn_hv_phys_proc_index;
-
if (get_paca()->lppaca.shared_proc) {
ppc_md.idle_loop = iseries_shared_idle;
printk(KERN_INFO "Using shared processor idle loop\n");
@@ -566,9 +562,6 @@ static void __init iSeries_setup_arch(void)
itVpdAreas.xSlicMaxLogicalProcs);
printk("Max physical processors = %d\n",
itVpdAreas.xSlicMaxPhysicalProcs);
-
- systemcfg->processor = xIoHriProcessorVpd[procIx].xPVR;
- printk("Processor version = %x\n", systemcfg->processor);
}
static void iSeries_show_cpuinfo(struct seq_file *m)
@@ -629,7 +622,7 @@ static void __init iSeries_fixup_klimit(void)
*/
if (naca.xRamDisk)
klimit = KERNELBASE + (u64)naca.xRamDisk +
- (naca.xRamDiskSize * PAGE_SIZE);
+ (naca.xRamDiskSize * HW_PAGE_SIZE);
else {
/*
* No ram disk was included - check and see if there
@@ -697,20 +690,18 @@ static void iseries_shared_idle(void)
if (hvlpevent_is_pending())
process_iSeries_events();
+ preempt_enable_no_resched();
schedule();
+ preempt_disable();
}
}
static void iseries_dedicated_idle(void)
{
- long oldval;
+ set_thread_flag(TIF_POLLING_NRFLAG);
while (1) {
- oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
- if (!oldval) {
- set_thread_flag(TIF_POLLING_NRFLAG);
-
+ if (!need_resched()) {
while (!need_resched()) {
ppc64_runlatch_off();
HMT_low();
@@ -723,13 +714,12 @@ static void iseries_dedicated_idle(void)
}
HMT_medium();
- clear_thread_flag(TIF_POLLING_NRFLAG);
- } else {
- set_need_resched();
}
ppc64_runlatch_on();
+ preempt_enable_no_resched();
schedule();
+ preempt_disable();
}
}
@@ -934,7 +924,7 @@ void dt_cpus(struct iseries_flat_dt *dt)
dt_end_node(dt);
}
-void build_flat_dt(struct iseries_flat_dt *dt)
+void build_flat_dt(struct iseries_flat_dt *dt, unsigned long phys_mem_size)
{
u64 tmp[2];
@@ -950,7 +940,7 @@ void build_flat_dt(struct iseries_flat_dt *dt)
dt_prop_str(dt, "name", "memory");
dt_prop_str(dt, "device_type", "memory");
tmp[0] = 0;
- tmp[1] = systemcfg->physicalMemorySize;
+ tmp[1] = phys_mem_size;
dt_prop_u64_list(dt, "reg", tmp, 2);
dt_end_node(dt);
@@ -970,13 +960,15 @@ void build_flat_dt(struct iseries_flat_dt *dt)
void * __init iSeries_early_setup(void)
{
+ unsigned long phys_mem_size;
+
iSeries_fixup_klimit();
/*
* Initialize the table which translate Linux physical addresses to
* AS/400 absolute addresses
*/
- build_iSeries_Memory_Map();
+ phys_mem_size = build_iSeries_Memory_Map();
iSeries_get_cmdline();
@@ -986,7 +978,7 @@ void * __init iSeries_early_setup(void)
/* Parse early parameters, in particular mem=x */
parse_early_param();
- build_flat_dt(&iseries_dt);
+ build_flat_dt(&iseries_dt, phys_mem_size);
return (void *) __pa(&iseries_dt);
}
diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c
index 3336bad6772..fcb094ec6ae 100644
--- a/arch/powerpc/platforms/iseries/smp.c
+++ b/arch/powerpc/platforms/iseries/smp.c
@@ -40,7 +40,6 @@
#include <asm/paca.h>
#include <asm/iseries/hv_call.h>
#include <asm/time.h>
-#include <asm/ppcdebug.h>
#include <asm/machdep.h>
#include <asm/cputable.h>
#include <asm/system.h>
diff --git a/arch/powerpc/platforms/iseries/vio.c b/arch/powerpc/platforms/iseries/vio.c
index c27a66876c2..384360ee06e 100644
--- a/arch/powerpc/platforms/iseries/vio.c
+++ b/arch/powerpc/platforms/iseries/vio.c
@@ -30,41 +30,14 @@ static struct iommu_table vio_iommu_table;
static void __init iommu_vio_init(void)
{
- struct iommu_table *t;
- struct iommu_table_cb cb;
- unsigned long cbp;
- unsigned long itc_entries;
+ iommu_table_getparms_iSeries(255, 0, 0xff, &veth_iommu_table);
+ veth_iommu_table.it_size /= 2;
+ vio_iommu_table = veth_iommu_table;
+ vio_iommu_table.it_offset += veth_iommu_table.it_size;
- cb.itc_busno = 255; /* Bus 255 is the virtual bus */
- cb.itc_virtbus = 0xff; /* Ask for virtual bus */
-
- cbp = virt_to_abs(&cb);
- HvCallXm_getTceTableParms(cbp);
-
- itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
- veth_iommu_table.it_size = itc_entries / 2;
- veth_iommu_table.it_busno = cb.itc_busno;
- veth_iommu_table.it_offset = cb.itc_offset;
- veth_iommu_table.it_index = cb.itc_index;
- veth_iommu_table.it_type = TCE_VB;
- veth_iommu_table.it_blocksize = 1;
-
- t = iommu_init_table(&veth_iommu_table);
-
- if (!t)
+ if (!iommu_init_table(&veth_iommu_table))
printk("Virtual Bus VETH TCE table failed.\n");
-
- vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size;
- vio_iommu_table.it_busno = cb.itc_busno;
- vio_iommu_table.it_offset = cb.itc_offset +
- veth_iommu_table.it_size;
- vio_iommu_table.it_index = cb.itc_index;
- vio_iommu_table.it_type = TCE_VB;
- vio_iommu_table.it_blocksize = 1;
-
- t = iommu_init_table(&vio_iommu_table);
-
- if (!t)
+ if (!iommu_init_table(&vio_iommu_table))
printk("Virtual Bus VIO TCE table failed.\n");
}
diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c
index fe97bfbf746..84267269559 100644
--- a/arch/powerpc/platforms/iseries/viopath.c
+++ b/arch/powerpc/platforms/iseries/viopath.c
@@ -68,7 +68,8 @@ static DEFINE_SPINLOCK(statuslock);
* For each kind of event we allocate a buffer that is
* guaranteed not to cross a page boundary
*/
-static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256] __page_aligned;
+static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256]
+ __attribute__((__aligned__(4096)));
static atomic_t event_buffer_available[VIO_MAX_SUBTYPES];
static int event_buffer_initialised;
@@ -116,12 +117,12 @@ static int proc_viopath_show(struct seq_file *m, void *v)
HvLpEvent_Rc hvrc;
DECLARE_MUTEX_LOCKED(Semaphore);
- buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ buf = kmalloc(HW_PAGE_SIZE, GFP_KERNEL);
if (!buf)
return 0;
- memset(buf, 0, PAGE_SIZE);
+ memset(buf, 0, HW_PAGE_SIZE);
- handle = dma_map_single(iSeries_vio_dev, buf, PAGE_SIZE,
+ handle = dma_map_single(iSeries_vio_dev, buf, HW_PAGE_SIZE,
DMA_FROM_DEVICE);
hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
@@ -131,7 +132,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
viopath_sourceinst(viopath_hostLp),
viopath_targetinst(viopath_hostLp),
(u64)(unsigned long)&Semaphore, VIOVERSION << 16,
- ((u64)handle) << 32, PAGE_SIZE, 0, 0);
+ ((u64)handle) << 32, HW_PAGE_SIZE, 0, 0);
if (hvrc != HvLpEvent_Rc_Good)
printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc);
@@ -140,7 +141,7 @@ static int proc_viopath_show(struct seq_file *m, void *v)
vlanMap = HvLpConfig_getVirtualLanIndexMap();
- buf[PAGE_SIZE-1] = '\0';
+ buf[HW_PAGE_SIZE-1] = '\0';
seq_printf(m, "%s", buf);
seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap);
seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n",
@@ -152,7 +153,8 @@ static int proc_viopath_show(struct seq_file *m, void *v)
e2a(xItExtVpdPanel.systemSerial[4]),
e2a(xItExtVpdPanel.systemSerial[5]));
- dma_unmap_single(iSeries_vio_dev, handle, PAGE_SIZE, DMA_FROM_DEVICE);
+ dma_unmap_single(iSeries_vio_dev, handle, HW_PAGE_SIZE,
+ DMA_FROM_DEVICE);
kfree(buf);
return 0;
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index 340c21caeae..895aeb3f75d 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -380,9 +380,6 @@ void __init maple_pcibios_fixup(void)
for_each_pci_dev(dev)
pci_read_irq_line(dev);
- /* Do the mapping of the IO space */
- phbs_remap_io();
-
DBG(" <- maple_pcibios_fixup\n");
}
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 4369676f1d5..c9df44fcf57 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -1,7 +1,8 @@
obj-y += pic.o setup.o time.o feature.o pci.o \
sleep.o low_i2c.o cache.o
obj-$(CONFIG_PMAC_BACKLIGHT) += backlight.o
-obj-$(CONFIG_CPU_FREQ_PMAC) += cpufreq.o
+obj-$(CONFIG_CPU_FREQ_PMAC) += cpufreq_32.o
+obj-$(CONFIG_CPU_FREQ_PMAC64) += cpufreq_64.o
obj-$(CONFIG_NVRAM) += nvram.o
# ppc64 pmac doesn't define CONFIG_NVRAM but needs nvram stuff
obj-$(CONFIG_PPC64) += nvram.o
diff --git a/arch/powerpc/platforms/powermac/cpufreq.c b/arch/powerpc/platforms/powermac/cpufreq_32.c
index c47f8b69725..56fd4e05fed 100644
--- a/arch/powerpc/platforms/powermac/cpufreq.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_32.c
@@ -397,18 +397,16 @@ static int pmac_cpufreq_target( struct cpufreq_policy *policy,
unsigned int relation)
{
unsigned int newstate = 0;
+ int rc;
if (cpufreq_frequency_table_target(policy, pmac_cpu_freqs,
target_freq, relation, &newstate))
return -EINVAL;
- return do_set_cpu_speed(newstate, 1);
-}
+ rc = do_set_cpu_speed(newstate, 1);
-unsigned int pmac_get_one_cpufreq(int i)
-{
- /* Supports only one CPU for now */
- return (i == 0) ? cur_freq : 0;
+ ppc_proc_freq = cur_freq * 1000ul;
+ return rc;
}
static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy)
@@ -474,6 +472,8 @@ static int pmac_cpufreq_resume(struct cpufreq_policy *policy)
do_set_cpu_speed(sleep_freq == low_freq ?
CPUFREQ_LOW : CPUFREQ_HIGH, 0);
+ ppc_proc_freq = cur_freq * 1000ul;
+
no_schedule = 0;
return 0;
}
@@ -547,7 +547,7 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode)
*/
if (low_freq < 98000000)
low_freq = 101000000;
-
+
/* Convert those to CPU core clocks */
low_freq = (low_freq * (*ratio)) / 2000;
hi_freq = (hi_freq * (*ratio)) / 2000;
@@ -714,6 +714,7 @@ out:
pmac_cpu_freqs[CPUFREQ_LOW].frequency = low_freq;
pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq;
+ ppc_proc_freq = cur_freq * 1000ul;
printk(KERN_INFO "Registering PowerMac CPU frequency driver\n");
printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n",
diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c
new file mode 100644
index 00000000000..39150342c6f
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/cpufreq_64.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ * and Markus Demleitner <msdemlei@cl.uni-heidelberg.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This driver adds basic cpufreq support for SMU & 970FX based G5 Macs,
+ * that is iMac G5 and latest single CPU desktop.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/cpufreq.h>
+#include <linux/init.h>
+#include <linux/completion.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/sections.h>
+#include <asm/cputable.h>
+#include <asm/time.h>
+#include <asm/smu.h>
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/* see 970FX user manual */
+
+#define SCOM_PCR 0x0aa001 /* PCR scom addr */
+
+#define PCR_HILO_SELECT 0x80000000U /* 1 = PCR, 0 = PCRH */
+#define PCR_SPEED_FULL 0x00000000U /* 1:1 speed value */
+#define PCR_SPEED_HALF 0x00020000U /* 1:2 speed value */
+#define PCR_SPEED_QUARTER 0x00040000U /* 1:4 speed value */
+#define PCR_SPEED_MASK 0x000e0000U /* speed mask */
+#define PCR_SPEED_SHIFT 17
+#define PCR_FREQ_REQ_VALID 0x00010000U /* freq request valid */
+#define PCR_VOLT_REQ_VALID 0x00008000U /* volt request valid */
+#define PCR_TARGET_TIME_MASK 0x00006000U /* target time */
+#define PCR_STATLAT_MASK 0x00001f00U /* STATLAT value */
+#define PCR_SNOOPLAT_MASK 0x000000f0U /* SNOOPLAT value */
+#define PCR_SNOOPACC_MASK 0x0000000fU /* SNOOPACC value */
+
+#define SCOM_PSR 0x408001 /* PSR scom addr */
+/* warning: PSR is a 64 bits register */
+#define PSR_CMD_RECEIVED 0x2000000000000000U /* command received */
+#define PSR_CMD_COMPLETED 0x1000000000000000U /* command completed */
+#define PSR_CUR_SPEED_MASK 0x0300000000000000U /* current speed */
+#define PSR_CUR_SPEED_SHIFT (56)
+
+/*
+ * The G5 only supports two frequencies (Quarter speed is not supported)
+ */
+#define CPUFREQ_HIGH 0
+#define CPUFREQ_LOW 1
+
+static struct cpufreq_frequency_table g5_cpu_freqs[] = {
+ {CPUFREQ_HIGH, 0},
+ {CPUFREQ_LOW, 0},
+ {0, CPUFREQ_TABLE_END},
+};
+
+static struct freq_attr* g5_cpu_freqs_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+};
+
+/* Power mode data is an array of the 32 bits PCR values to use for
+ * the various frequencies, retreived from the device-tree
+ */
+static u32 *g5_pmode_data;
+static int g5_pmode_max;
+static int g5_pmode_cur;
+
+static DECLARE_MUTEX(g5_switch_mutex);
+
+
+static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */
+static int g5_fvt_count; /* number of op. points */
+static int g5_fvt_cur; /* current op. point */
+
+/* ----------------- real hardware interface */
+
+static void g5_switch_volt(int speed_mode)
+{
+ struct smu_simple_cmd cmd;
+
+ DECLARE_COMPLETION(comp);
+ smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 8, smu_done_complete,
+ &comp, 'V', 'S', 'L', 'E', 'W',
+ 0xff, g5_fvt_cur+1, speed_mode);
+ wait_for_completion(&comp);
+}
+
+static int g5_switch_freq(int speed_mode)
+{
+ struct cpufreq_freqs freqs;
+ int to;
+
+ if (g5_pmode_cur == speed_mode)
+ return 0;
+
+ down(&g5_switch_mutex);
+
+ freqs.old = g5_cpu_freqs[g5_pmode_cur].frequency;
+ freqs.new = g5_cpu_freqs[speed_mode].frequency;
+ freqs.cpu = 0;
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+ /* If frequency is going up, first ramp up the voltage */
+ if (speed_mode < g5_pmode_cur)
+ g5_switch_volt(speed_mode);
+
+ /* Clear PCR high */
+ scom970_write(SCOM_PCR, 0);
+ /* Clear PCR low */
+ scom970_write(SCOM_PCR, PCR_HILO_SELECT | 0);
+ /* Set PCR low */
+ scom970_write(SCOM_PCR, PCR_HILO_SELECT |
+ g5_pmode_data[speed_mode]);
+
+ /* Wait for completion */
+ for (to = 0; to < 10; to++) {
+ unsigned long psr = scom970_read(SCOM_PSR);
+
+ if ((psr & PSR_CMD_RECEIVED) == 0 &&
+ (((psr >> PSR_CUR_SPEED_SHIFT) ^
+ (g5_pmode_data[speed_mode] >> PCR_SPEED_SHIFT)) & 0x3)
+ == 0)
+ break;
+ if (psr & PSR_CMD_COMPLETED)
+ break;
+ udelay(100);
+ }
+
+ /* If frequency is going down, last ramp the voltage */
+ if (speed_mode > g5_pmode_cur)
+ g5_switch_volt(speed_mode);
+
+ g5_pmode_cur = speed_mode;
+ ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul;
+
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+ up(&g5_switch_mutex);
+
+ return 0;
+}
+
+static int g5_query_freq(void)
+{
+ unsigned long psr = scom970_read(SCOM_PSR);
+ int i;
+
+ for (i = 0; i <= g5_pmode_max; i++)
+ if ((((psr >> PSR_CUR_SPEED_SHIFT) ^
+ (g5_pmode_data[i] >> PCR_SPEED_SHIFT)) & 0x3) == 0)
+ break;
+ return i;
+}
+
+/* ----------------- cpufreq bookkeeping */
+
+static int g5_cpufreq_verify(struct cpufreq_policy *policy)
+{
+ return cpufreq_frequency_table_verify(policy, g5_cpu_freqs);
+}
+
+static int g5_cpufreq_target(struct cpufreq_policy *policy,
+ unsigned int target_freq, unsigned int relation)
+{
+ unsigned int newstate = 0;
+
+ if (cpufreq_frequency_table_target(policy, g5_cpu_freqs,
+ target_freq, relation, &newstate))
+ return -EINVAL;
+
+ return g5_switch_freq(newstate);
+}
+
+static unsigned int g5_cpufreq_get_speed(unsigned int cpu)
+{
+ return g5_cpu_freqs[g5_pmode_cur].frequency;
+}
+
+static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+ if (policy->cpu != 0)
+ return -ENODEV;
+
+ policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+ policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+ policy->cur = g5_cpu_freqs[g5_query_freq()].frequency;
+ cpufreq_frequency_table_get_attr(g5_cpu_freqs, policy->cpu);
+
+ return cpufreq_frequency_table_cpuinfo(policy,
+ g5_cpu_freqs);
+}
+
+
+static struct cpufreq_driver g5_cpufreq_driver = {
+ .name = "powermac",
+ .owner = THIS_MODULE,
+ .flags = CPUFREQ_CONST_LOOPS,
+ .init = g5_cpufreq_cpu_init,
+ .verify = g5_cpufreq_verify,
+ .target = g5_cpufreq_target,
+ .get = g5_cpufreq_get_speed,
+ .attr = g5_cpu_freqs_attr,
+};
+
+
+static int __init g5_cpufreq_init(void)
+{
+ struct device_node *cpunode;
+ unsigned int psize, ssize;
+ struct smu_sdbp_header *shdr;
+ unsigned long max_freq;
+ u32 *valp;
+ int rc = -ENODEV;
+
+ /* Look for CPU and SMU nodes */
+ cpunode = of_find_node_by_type(NULL, "cpu");
+ if (!cpunode) {
+ DBG("No CPU node !\n");
+ return -ENODEV;
+ }
+
+ /* Check 970FX for now */
+ valp = (u32 *)get_property(cpunode, "cpu-version", NULL);
+ if (!valp) {
+ DBG("No cpu-version property !\n");
+ goto bail_noprops;
+ }
+ if (((*valp) >> 16) != 0x3c) {
+ DBG("Wrong CPU version: %08x\n", *valp);
+ goto bail_noprops;
+ }
+
+ /* Look for the powertune data in the device-tree */
+ g5_pmode_data = (u32 *)get_property(cpunode, "power-mode-data",&psize);
+ if (!g5_pmode_data) {
+ DBG("No power-mode-data !\n");
+ goto bail_noprops;
+ }
+ g5_pmode_max = psize / sizeof(u32) - 1;
+
+ /* Look for the FVT table */
+ shdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL);
+ if (!shdr)
+ goto bail_noprops;
+ g5_fvt_table = (struct smu_sdbp_fvt *)&shdr[1];
+ ssize = (shdr->len * sizeof(u32)) - sizeof(struct smu_sdbp_header);
+ g5_fvt_count = ssize / sizeof(struct smu_sdbp_fvt);
+ g5_fvt_cur = 0;
+
+ /* Sanity checking */
+ if (g5_fvt_count < 1 || g5_pmode_max < 1)
+ goto bail_noprops;
+
+ /*
+ * From what I see, clock-frequency is always the maximal frequency.
+ * The current driver can not slew sysclk yet, so we really only deal
+ * with powertune steps for now. We also only implement full freq and
+ * half freq in this version. So far, I haven't yet seen a machine
+ * supporting anything else.
+ */
+ valp = (u32 *)get_property(cpunode, "clock-frequency", NULL);
+ if (!valp)
+ return -ENODEV;
+ max_freq = (*valp)/1000;
+ g5_cpu_freqs[0].frequency = max_freq;
+ g5_cpu_freqs[1].frequency = max_freq/2;
+
+ /* Check current frequency */
+ g5_pmode_cur = g5_query_freq();
+ if (g5_pmode_cur > 1)
+ /* We don't support anything but 1:1 and 1:2, fixup ... */
+ g5_pmode_cur = 1;
+
+ /* Force apply current frequency to make sure everything is in
+ * sync (voltage is right for example). Firmware may leave us with
+ * a strange setting ...
+ */
+ g5_switch_freq(g5_pmode_cur);
+
+ printk(KERN_INFO "Registering G5 CPU frequency driver\n");
+ printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
+ g5_cpu_freqs[1].frequency/1000,
+ g5_cpu_freqs[0].frequency/1000,
+ g5_cpu_freqs[g5_pmode_cur].frequency/1000);
+
+ rc = cpufreq_register_driver(&g5_cpufreq_driver);
+
+ /* We keep the CPU node on hold... hopefully, Apple G5 don't have
+ * hotplug CPU with a dynamic device-tree ...
+ */
+ return rc;
+
+ bail_noprops:
+ of_node_put(cpunode);
+
+ return rc;
+}
+
+module_init(g5_cpufreq_init);
+
+
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 8f818d092e2..dfd41b9781a 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -918,9 +918,6 @@ void __init pmac_pci_init(void)
PCI_DN(np)->busno = 0xf0;
}
- /* map in PCI I/O space */
- phbs_remap_io();
-
/* pmac_check_ht_link(); */
/* Tell pci.c to not use the common resource allocation mechanism */
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 83a49e80ac2..90040c49494 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -74,6 +74,9 @@ static DEFINE_SPINLOCK(pmac_pic_lock);
#define GATWICK_IRQ_POOL_SIZE 10
static struct interrupt_info gatwick_int_pool[GATWICK_IRQ_POOL_SIZE];
+#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
+static unsigned long ppc_lost_interrupts[NR_MASK_WORDS];
+
/*
* Mark an irq as "lost". This is only used on the pmac
* since it can lose interrupts (see pmac_set_irq_mask).
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 80b58c1ec41..7acb0546671 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -193,18 +193,6 @@ static void pmac_show_cpuinfo(struct seq_file *m)
pmac_newworld ? "NewWorld" : "OldWorld");
}
-static void pmac_show_percpuinfo(struct seq_file *m, int i)
-{
-#ifdef CONFIG_CPU_FREQ_PMAC
- extern unsigned int pmac_get_one_cpufreq(int i);
- unsigned int freq = pmac_get_one_cpufreq(i);
- if (freq != 0) {
- seq_printf(m, "clock\t\t: %dMHz\n", freq/1000);
- return;
- }
-#endif /* CONFIG_CPU_FREQ_PMAC */
-}
-
#ifndef CONFIG_ADB_CUDA
int find_via_cuda(void)
{
@@ -767,7 +755,6 @@ struct machdep_calls __initdata pmac_md = {
.setup_arch = pmac_setup_arch,
.init_early = pmac_init_early,
.show_cpuinfo = pmac_show_cpuinfo,
- .show_percpuinfo = pmac_show_percpuinfo,
.init_IRQ = pmac_pic_init,
.get_irq = mpic_get_irq, /* changed later */
.pcibios_fixup = pmac_pcibios_fixup,
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index e1f9443cc87..957b0910342 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -305,9 +305,19 @@ static int __init smp_psurge_probe(void)
psurge_start = ioremap(PSURGE_START, 4);
psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4);
- /* this is not actually strictly necessary -- paulus. */
- for (i = 1; i < ncpus; ++i)
- smp_hw_index[i] = i;
+ /*
+ * This is necessary because OF doesn't know about the
+ * secondary cpu(s), and thus there aren't nodes in the
+ * device tree for them, and smp_setup_cpu_maps hasn't
+ * set their bits in cpu_possible_map and cpu_present_map.
+ */
+ if (ncpus > NR_CPUS)
+ ncpus = NR_CPUS;
+ for (i = 1; i < ncpus ; ++i) {
+ cpu_set(i, cpu_present_map);
+ cpu_set(i, cpu_possible_map);
+ set_hard_smp_processor_id(i, i);
+ }
if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352);
@@ -348,6 +358,7 @@ static void __init psurge_dual_sync_tb(int cpu_nr)
int t;
set_dec(tb_ticks_per_jiffy);
+ /* XXX fixme */
set_tb(0, 0);
last_jiffy_stamp(cpu_nr) = 0;
@@ -363,8 +374,6 @@ static void __init psurge_dual_sync_tb(int cpu_nr)
/* now interrupt the secondary, starting both TBs */
psurge_set_ipi(1);
-
- smp_tb_synchronized = 1;
}
static struct irqaction psurge_irqaction = {
@@ -625,9 +634,8 @@ void smp_core99_give_timebase(void)
for (t = 100000; t > 0 && sec_tb_reset; --t)
udelay(10);
if (sec_tb_reset)
+ /* XXX BUG_ON here? */
printk(KERN_WARNING "Timeout waiting sync(2) on second CPU\n");
- else
- smp_tb_synchronized = 1;
/* Now, restart the timebase by leaving the GPIO to an open collector */
pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0);
@@ -810,19 +818,9 @@ static void __devinit smp_core99_setup_cpu(int cpu_nr)
}
-/* Core99 Macs (dual G4s and G5s) */
-struct smp_ops_t core99_smp_ops = {
- .message_pass = smp_mpic_message_pass,
- .probe = smp_core99_probe,
- .kick_cpu = smp_core99_kick_cpu,
- .setup_cpu = smp_core99_setup_cpu,
- .give_timebase = smp_core99_give_timebase,
- .take_timebase = smp_core99_take_timebase,
-};
-
#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC32)
-int __cpu_disable(void)
+int smp_core99_cpu_disable(void)
{
cpu_clear(smp_processor_id(), cpu_online_map);
@@ -846,7 +844,7 @@ void cpu_die(void)
low_cpu_die();
}
-void __cpu_die(unsigned int cpu)
+void smp_core99_cpu_die(unsigned int cpu)
{
int timeout;
@@ -858,8 +856,21 @@ void __cpu_die(unsigned int cpu)
}
msleep(1);
}
- cpu_callin_map[cpu] = 0;
cpu_dead[cpu] = 0;
}
#endif
+
+/* Core99 Macs (dual G4s and G5s) */
+struct smp_ops_t core99_smp_ops = {
+ .message_pass = smp_mpic_message_pass,
+ .probe = smp_core99_probe,
+ .kick_cpu = smp_core99_kick_cpu,
+ .setup_cpu = smp_core99_setup_cpu,
+ .give_timebase = smp_core99_give_timebase,
+ .take_timebase = smp_core99_take_timebase,
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC32)
+ .cpu_disable = smp_core99_cpu_disable,
+ .cpu_die = smp_core99_cpu_die,
+#endif
+};
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 5947b21a858..feb0a94e781 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -102,7 +102,7 @@ static unsigned long from_rtc_time(struct rtc_time *tm)
static unsigned long cuda_get_time(void)
{
struct adb_request req;
- unsigned long now;
+ unsigned int now;
if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
return 0;
@@ -113,7 +113,7 @@ static unsigned long cuda_get_time(void)
req.reply_len);
now = (req.reply[3] << 24) + (req.reply[4] << 16)
+ (req.reply[5] << 8) + req.reply[6];
- return now - RTC_OFFSET;
+ return ((unsigned long)now) - RTC_OFFSET;
}
#define cuda_get_rtc_time(tm) to_rtc_time(cuda_get_time(), (tm))
@@ -146,7 +146,7 @@ static int cuda_set_rtc_time(struct rtc_time *tm)
static unsigned long pmu_get_time(void)
{
struct adb_request req;
- unsigned long now;
+ unsigned int now;
if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
return 0;
@@ -156,7 +156,7 @@ static unsigned long pmu_get_time(void)
req.reply_len);
now = (req.reply[0] << 24) + (req.reply[1] << 16)
+ (req.reply[2] << 8) + req.reply[3];
- return now - RTC_OFFSET;
+ return ((unsigned long)now) - RTC_OFFSET;
}
#define pmu_get_rtc_time(tm) to_rtc_time(pmu_get_time(), (tm))
@@ -199,6 +199,7 @@ static unsigned long smu_get_time(void)
#define smu_set_rtc_time(tm, spin) 0
#endif
+/* Can't be __init, it's called when suspending and resuming */
unsigned long pmac_get_boot_time(void)
{
/* Get the time from the RTC, used only at boot time */
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index b9938fece78..06d5ef50121 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -3,3 +3,8 @@ obj-y := pci.o lpar.o hvCall.o nvram.o reconfig.o \
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_XICS) += xics.o
+obj-$(CONFIG_SCANLOG) += scanlog.o
+obj-$(CONFIG_EEH) += eeh.o eeh_event.o
+
+obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
+obj-$(CONFIG_HVCS) += hvcserver.o
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
new file mode 100644
index 00000000000..79de2310e70
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -0,0 +1,1212 @@
+/*
+ * eeh.c
+ * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/rtas.h>
+
+#undef DEBUG
+
+/** Overview:
+ * EEH, or "Extended Error Handling" is a PCI bridge technology for
+ * dealing with PCI bus errors that can't be dealt with within the
+ * usual PCI framework, except by check-stopping the CPU. Systems
+ * that are designed for high-availability/reliability cannot afford
+ * to crash due to a "mere" PCI error, thus the need for EEH.
+ * An EEH-capable bridge operates by converting a detected error
+ * into a "slot freeze", taking the PCI adapter off-line, making
+ * the slot behave, from the OS'es point of view, as if the slot
+ * were "empty": all reads return 0xff's and all writes are silently
+ * ignored. EEH slot isolation events can be triggered by parity
+ * errors on the address or data busses (e.g. during posted writes),
+ * which in turn might be caused by low voltage on the bus, dust,
+ * vibration, humidity, radioactivity or plain-old failed hardware.
+ *
+ * Note, however, that one of the leading causes of EEH slot
+ * freeze events are buggy device drivers, buggy device microcode,
+ * or buggy device hardware. This is because any attempt by the
+ * device to bus-master data to a memory address that is not
+ * assigned to the device will trigger a slot freeze. (The idea
+ * is to prevent devices-gone-wild from corrupting system memory).
+ * Buggy hardware/drivers will have a miserable time co-existing
+ * with EEH.
+ *
+ * Ideally, a PCI device driver, when suspecting that an isolation
+ * event has occured (e.g. by reading 0xff's), will then ask EEH
+ * whether this is the case, and then take appropriate steps to
+ * reset the PCI slot, the PCI device, and then resume operations.
+ * However, until that day, the checking is done here, with the
+ * eeh_check_failure() routine embedded in the MMIO macros. If
+ * the slot is found to be isolated, an "EEH Event" is synthesized
+ * and sent out for processing.
+ */
+
+/* If a device driver keeps reading an MMIO register in an interrupt
+ * handler after a slot isolation event has occurred, we assume it
+ * is broken and panic. This sets the threshold for how many read
+ * attempts we allow before panicking.
+ */
+#define EEH_MAX_FAILS 100000
+
+/* Misc forward declaraions */
+static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn);
+
+/* RTAS tokens */
+static int ibm_set_eeh_option;
+static int ibm_set_slot_reset;
+static int ibm_read_slot_reset_state;
+static int ibm_read_slot_reset_state2;
+static int ibm_slot_error_detail;
+
+static int eeh_subsystem_enabled;
+
+/* Lock to avoid races due to multiple reports of an error */
+static DEFINE_SPINLOCK(confirm_error_lock);
+
+/* Buffer for reporting slot-error-detail rtas calls */
+static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
+static DEFINE_SPINLOCK(slot_errbuf_lock);
+static int eeh_error_buf_size;
+
+/* System monitoring statistics */
+static DEFINE_PER_CPU(unsigned long, no_device);
+static DEFINE_PER_CPU(unsigned long, no_dn);
+static DEFINE_PER_CPU(unsigned long, no_cfg_addr);
+static DEFINE_PER_CPU(unsigned long, ignored_check);
+static DEFINE_PER_CPU(unsigned long, total_mmio_ffs);
+static DEFINE_PER_CPU(unsigned long, false_positives);
+static DEFINE_PER_CPU(unsigned long, ignored_failures);
+static DEFINE_PER_CPU(unsigned long, slot_resets);
+
+/**
+ * The pci address cache subsystem. This subsystem places
+ * PCI device address resources into a red-black tree, sorted
+ * according to the address range, so that given only an i/o
+ * address, the corresponding PCI device can be **quickly**
+ * found. It is safe to perform an address lookup in an interrupt
+ * context; this ability is an important feature.
+ *
+ * Currently, the only customer of this code is the EEH subsystem;
+ * thus, this code has been somewhat tailored to suit EEH better.
+ * In particular, the cache does *not* hold the addresses of devices
+ * for which EEH is not enabled.
+ *
+ * (Implementation Note: The RB tree seems to be better/faster
+ * than any hash algo I could think of for this problem, even
+ * with the penalty of slow pointer chases for d-cache misses).
+ */
+struct pci_io_addr_range
+{
+ struct rb_node rb_node;
+ unsigned long addr_lo;
+ unsigned long addr_hi;
+ struct pci_dev *pcidev;
+ unsigned int flags;
+};
+
+static struct pci_io_addr_cache
+{
+ struct rb_root rb_root;
+ spinlock_t piar_lock;
+} pci_io_addr_cache_root;
+
+static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
+{
+ struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
+
+ while (n) {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+ if (addr < piar->addr_lo) {
+ n = n->rb_left;
+ } else {
+ if (addr > piar->addr_hi) {
+ n = n->rb_right;
+ } else {
+ pci_dev_get(piar->pcidev);
+ return piar->pcidev;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * pci_get_device_by_addr - Get device, given only address
+ * @addr: mmio (PIO) phys address or i/o port number
+ *
+ * Given an mmio phys address, or a port number, find a pci device
+ * that implements this address. Be sure to pci_dev_put the device
+ * when finished. I/O port numbers are assumed to be offset
+ * from zero (that is, they do *not* have pci_io_addr added in).
+ * It is safe to call this function within an interrupt.
+ */
+static struct pci_dev *pci_get_device_by_addr(unsigned long addr)
+{
+ struct pci_dev *dev;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ dev = __pci_get_device_by_addr(addr);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+ return dev;
+}
+
+#ifdef DEBUG
+/*
+ * Handy-dandy debug print routine, does nothing more
+ * than print out the contents of our addr cache.
+ */
+static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
+{
+ struct rb_node *n;
+ int cnt = 0;
+
+ n = rb_first(&cache->rb_root);
+ while (n) {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+ printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n",
+ (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
+ piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
+ cnt++;
+ n = rb_next(n);
+ }
+}
+#endif
+
+/* Insert address range into the rb tree. */
+static struct pci_io_addr_range *
+pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
+ unsigned long ahi, unsigned int flags)
+{
+ struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct pci_io_addr_range *piar;
+
+ /* Walk tree, find a place to insert into tree */
+ while (*p) {
+ parent = *p;
+ piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
+ if (ahi < piar->addr_lo) {
+ p = &parent->rb_left;
+ } else if (alo > piar->addr_hi) {
+ p = &parent->rb_right;
+ } else {
+ if (dev != piar->pcidev ||
+ alo != piar->addr_lo || ahi != piar->addr_hi) {
+ printk(KERN_WARNING "PIAR: overlapping address range\n");
+ }
+ return piar;
+ }
+ }
+ piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
+ if (!piar)
+ return NULL;
+
+ piar->addr_lo = alo;
+ piar->addr_hi = ahi;
+ piar->pcidev = dev;
+ piar->flags = flags;
+
+#ifdef DEBUG
+ printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n",
+ alo, ahi, pci_name (dev));
+#endif
+
+ rb_link_node(&piar->rb_node, parent, p);
+ rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
+
+ return piar;
+}
+
+static void __pci_addr_cache_insert_device(struct pci_dev *dev)
+{
+ struct device_node *dn;
+ struct pci_dn *pdn;
+ int i;
+ int inserted = 0;
+
+ dn = pci_device_to_OF_node(dev);
+ if (!dn) {
+ printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev));
+ return;
+ }
+
+ /* Skip any devices for which EEH is not enabled. */
+ pdn = PCI_DN(dn);
+ if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
+ pdn->eeh_mode & EEH_MODE_NOCHECK) {
+#ifdef DEBUG
+ printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n",
+ pci_name(dev), pdn->node->full_name);
+#endif
+ return;
+ }
+
+ /* The cache holds a reference to the device... */
+ pci_dev_get(dev);
+
+ /* Walk resources on this device, poke them into the tree */
+ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ unsigned long start = pci_resource_start(dev,i);
+ unsigned long end = pci_resource_end(dev,i);
+ unsigned int flags = pci_resource_flags(dev,i);
+
+ /* We are interested only bus addresses, not dma or other stuff */
+ if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
+ continue;
+ if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
+ continue;
+ pci_addr_cache_insert(dev, start, end, flags);
+ inserted = 1;
+ }
+
+ /* If there was nothing to add, the cache has no reference... */
+ if (!inserted)
+ pci_dev_put(dev);
+}
+
+/**
+ * pci_addr_cache_insert_device - Add a device to the address cache
+ * @dev: PCI device whose I/O addresses we are interested in.
+ *
+ * In order to support the fast lookup of devices based on addresses,
+ * we maintain a cache of devices that can be quickly searched.
+ * This routine adds a device to that cache.
+ */
+static void pci_addr_cache_insert_device(struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ __pci_addr_cache_insert_device(dev);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+}
+
+static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
+{
+ struct rb_node *n;
+ int removed = 0;
+
+restart:
+ n = rb_first(&pci_io_addr_cache_root.rb_root);
+ while (n) {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+ if (piar->pcidev == dev) {
+ rb_erase(n, &pci_io_addr_cache_root.rb_root);
+ removed = 1;
+ kfree(piar);
+ goto restart;
+ }
+ n = rb_next(n);
+ }
+
+ /* The cache no longer holds its reference to this device... */
+ if (removed)
+ pci_dev_put(dev);
+}
+
+/**
+ * pci_addr_cache_remove_device - remove pci device from addr cache
+ * @dev: device to remove
+ *
+ * Remove a device from the addr-cache tree.
+ * This is potentially expensive, since it will walk
+ * the tree multiple times (once per resource).
+ * But so what; device removal doesn't need to be that fast.
+ */
+static void pci_addr_cache_remove_device(struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ __pci_addr_cache_remove_device(dev);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+}
+
+/**
+ * pci_addr_cache_build - Build a cache of I/O addresses
+ *
+ * Build a cache of pci i/o addresses. This cache will be used to
+ * find the pci device that corresponds to a given address.
+ * This routine scans all pci busses to build the cache.
+ * Must be run late in boot process, after the pci controllers
+ * have been scaned for devices (after all device resources are known).
+ */
+void __init pci_addr_cache_build(void)
+{
+ struct device_node *dn;
+ struct pci_dev *dev = NULL;
+
+ if (!eeh_subsystem_enabled)
+ return;
+
+ spin_lock_init(&pci_io_addr_cache_root.piar_lock);
+
+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ /* Ignore PCI bridges ( XXX why ??) */
+ if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) {
+ continue;
+ }
+ pci_addr_cache_insert_device(dev);
+
+ /* Save the BAR's; firmware doesn't restore these after EEH reset */
+ dn = pci_device_to_OF_node(dev);
+ eeh_save_bars(dev, PCI_DN(dn));
+ }
+
+#ifdef DEBUG
+ /* Verify tree built up above, echo back the list of addrs. */
+ pci_addr_cache_print(&pci_io_addr_cache_root);
+#endif
+}
+
+/* --------------------------------------------------------------- */
+/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */
+
+void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
+{
+ unsigned long flags;
+ int rc;
+
+ /* Log the error with the rtas logger */
+ spin_lock_irqsave(&slot_errbuf_lock, flags);
+ memset(slot_errbuf, 0, eeh_error_buf_size);
+
+ rc = rtas_call(ibm_slot_error_detail,
+ 8, 1, NULL, pdn->eeh_config_addr,
+ BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid), NULL, 0,
+ virt_to_phys(slot_errbuf),
+ eeh_error_buf_size,
+ severity);
+
+ if (rc == 0)
+ log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
+ spin_unlock_irqrestore(&slot_errbuf_lock, flags);
+}
+
+/**
+ * read_slot_reset_state - Read the reset state of a device node's slot
+ * @dn: device node to read
+ * @rets: array to return results in
+ */
+static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
+{
+ int token, outputs;
+
+ if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
+ token = ibm_read_slot_reset_state2;
+ outputs = 4;
+ } else {
+ token = ibm_read_slot_reset_state;
+ rets[2] = 0; /* fake PE Unavailable info */
+ outputs = 3;
+ }
+
+ return rtas_call(token, 3, outputs, rets, pdn->eeh_config_addr,
+ BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
+}
+
+/**
+ * eeh_token_to_phys - convert EEH address token to phys address
+ * @token i/o token, should be address in the form 0xA....
+ */
+static inline unsigned long eeh_token_to_phys(unsigned long token)
+{
+ pte_t *ptep;
+ unsigned long pa;
+
+ ptep = find_linux_pte(init_mm.pgd, token);
+ if (!ptep)
+ return token;
+ pa = pte_pfn(*ptep) << PAGE_SHIFT;
+
+ return pa | (token & (PAGE_SIZE-1));
+}
+
+/**
+ * Return the "partitionable endpoint" (pe) under which this device lies
+ */
+static struct device_node * find_device_pe(struct device_node *dn)
+{
+ while ((dn->parent) && PCI_DN(dn->parent) &&
+ (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
+ dn = dn->parent;
+ }
+ return dn;
+}
+
+/** Mark all devices that are peers of this device as failed.
+ * Mark the device driver too, so that it can see the failure
+ * immediately; this is critical, since some drivers poll
+ * status registers in interrupts ... If a driver is polling,
+ * and the slot is frozen, then the driver can deadlock in
+ * an interrupt context, which is bad.
+ */
+
+static void __eeh_mark_slot (struct device_node *dn, int mode_flag)
+{
+ while (dn) {
+ if (PCI_DN(dn)) {
+ PCI_DN(dn)->eeh_mode |= mode_flag;
+
+ if (dn->child)
+ __eeh_mark_slot (dn->child, mode_flag);
+ }
+ dn = dn->sibling;
+ }
+}
+
+void eeh_mark_slot (struct device_node *dn, int mode_flag)
+{
+ dn = find_device_pe (dn);
+ PCI_DN(dn)->eeh_mode |= mode_flag;
+ __eeh_mark_slot (dn->child, mode_flag);
+}
+
+static void __eeh_clear_slot (struct device_node *dn, int mode_flag)
+{
+ while (dn) {
+ if (PCI_DN(dn)) {
+ PCI_DN(dn)->eeh_mode &= ~mode_flag;
+ PCI_DN(dn)->eeh_check_count = 0;
+ if (dn->child)
+ __eeh_clear_slot (dn->child, mode_flag);
+ }
+ dn = dn->sibling;
+ }
+}
+
+void eeh_clear_slot (struct device_node *dn, int mode_flag)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&confirm_error_lock, flags);
+ dn = find_device_pe (dn);
+ PCI_DN(dn)->eeh_mode &= ~mode_flag;
+ PCI_DN(dn)->eeh_check_count = 0;
+ __eeh_clear_slot (dn->child, mode_flag);
+ spin_unlock_irqrestore(&confirm_error_lock, flags);
+}
+
+/**
+ * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
+ * @dn device node
+ * @dev pci device, if known
+ *
+ * Check for an EEH failure for the given device node. Call this
+ * routine if the result of a read was all 0xff's and you want to
+ * find out if this is due to an EEH slot freeze. This routine
+ * will query firmware for the EEH status.
+ *
+ * Returns 0 if there has not been an EEH error; otherwise returns
+ * a non-zero value and queues up a slot isolation event notification.
+ *
+ * It is safe to call this routine in an interrupt context.
+ */
+int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
+{
+ int ret;
+ int rets[3];
+ unsigned long flags;
+ struct pci_dn *pdn;
+ int rc = 0;
+
+ __get_cpu_var(total_mmio_ffs)++;
+
+ if (!eeh_subsystem_enabled)
+ return 0;
+
+ if (!dn) {
+ __get_cpu_var(no_dn)++;
+ return 0;
+ }
+ pdn = PCI_DN(dn);
+
+ /* Access to IO BARs might get this far and still not want checking. */
+ if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
+ pdn->eeh_mode & EEH_MODE_NOCHECK) {
+ __get_cpu_var(ignored_check)++;
+#ifdef DEBUG
+ printk ("EEH:ignored check (%x) for %s %s\n",
+ pdn->eeh_mode, pci_name (dev), dn->full_name);
+#endif
+ return 0;
+ }
+
+ if (!pdn->eeh_config_addr) {
+ __get_cpu_var(no_cfg_addr)++;
+ return 0;
+ }
+
+ /* If we already have a pending isolation event for this
+ * slot, we know it's bad already, we don't need to check.
+ * Do this checking under a lock; as multiple PCI devices
+ * in one slot might report errors simultaneously, and we
+ * only want one error recovery routine running.
+ */
+ spin_lock_irqsave(&confirm_error_lock, flags);
+ rc = 1;
+ if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
+ pdn->eeh_check_count ++;
+ if (pdn->eeh_check_count >= EEH_MAX_FAILS) {
+ printk (KERN_ERR "EEH: Device driver ignored %d bad reads, panicing\n",
+ pdn->eeh_check_count);
+ dump_stack();
+
+ /* re-read the slot reset state */
+ if (read_slot_reset_state(pdn, rets) != 0)
+ rets[0] = -1; /* reset state unknown */
+
+ /* If we are here, then we hit an infinite loop. Stop. */
+ panic("EEH: MMIO halt (%d) on device:%s\n", rets[0], pci_name(dev));
+ }
+ goto dn_unlock;
+ }
+
+ /*
+ * Now test for an EEH failure. This is VERY expensive.
+ * Note that the eeh_config_addr may be a parent device
+ * in the case of a device behind a bridge, or it may be
+ * function zero of a multi-function device.
+ * In any case they must share a common PHB.
+ */
+ ret = read_slot_reset_state(pdn, rets);
+
+ /* If the call to firmware failed, punt */
+ if (ret != 0) {
+ printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",
+ ret, dn->full_name);
+ __get_cpu_var(false_positives)++;
+ rc = 0;
+ goto dn_unlock;
+ }
+
+ /* If EEH is not supported on this device, punt. */
+ if (rets[1] != 1) {
+ printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
+ ret, dn->full_name);
+ __get_cpu_var(false_positives)++;
+ rc = 0;
+ goto dn_unlock;
+ }
+
+ /* If not the kind of error we know about, punt. */
+ if (rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
+ __get_cpu_var(false_positives)++;
+ rc = 0;
+ goto dn_unlock;
+ }
+
+ /* Note that config-io to empty slots may fail;
+ * we recognize empty because they don't have children. */
+ if ((rets[0] == 5) && (dn->child == NULL)) {
+ __get_cpu_var(false_positives)++;
+ rc = 0;
+ goto dn_unlock;
+ }
+
+ __get_cpu_var(slot_resets)++;
+
+ /* Avoid repeated reports of this failure, including problems
+ * with other functions on this device, and functions under
+ * bridges. */
+ eeh_mark_slot (dn, EEH_MODE_ISOLATED);
+ spin_unlock_irqrestore(&confirm_error_lock, flags);
+
+ eeh_send_failure_event (dn, dev, rets[0], rets[2]);
+
+ /* Most EEH events are due to device driver bugs. Having
+ * a stack trace will help the device-driver authors figure
+ * out what happened. So print that out. */
+ if (rets[0] != 5) dump_stack();
+ return 1;
+
+dn_unlock:
+ spin_unlock_irqrestore(&confirm_error_lock, flags);
+ return rc;
+}
+
+EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
+
+/**
+ * eeh_check_failure - check if all 1's data is due to EEH slot freeze
+ * @token i/o token, should be address in the form 0xA....
+ * @val value, should be all 1's (XXX why do we need this arg??)
+ *
+ * Check for an EEH failure at the given token address. Call this
+ * routine if the result of a read was all 0xff's and you want to
+ * find out if this is due to an EEH slot freeze event. This routine
+ * will query firmware for the EEH status.
+ *
+ * Note this routine is safe to call in an interrupt context.
+ */
+unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
+{
+ unsigned long addr;
+ struct pci_dev *dev;
+ struct device_node *dn;
+
+ /* Finding the phys addr + pci device; this is pretty quick. */
+ addr = eeh_token_to_phys((unsigned long __force) token);
+ dev = pci_get_device_by_addr(addr);
+ if (!dev) {
+ __get_cpu_var(no_device)++;
+ return val;
+ }
+
+ dn = pci_device_to_OF_node(dev);
+ eeh_dn_check_failure (dn, dev);
+
+ pci_dev_put(dev);
+ return val;
+}
+
+EXPORT_SYMBOL(eeh_check_failure);
+
+/* ------------------------------------------------------------- */
+/* The code below deals with error recovery */
+
+/** Return negative value if a permanent error, else return
+ * a number of milliseconds to wait until the PCI slot is
+ * ready to be used.
+ */
+static int
+eeh_slot_availability(struct pci_dn *pdn)
+{
+ int rc;
+ int rets[3];
+
+ rc = read_slot_reset_state(pdn, rets);
+
+ if (rc) return rc;
+
+ if (rets[1] == 0) return -1; /* EEH is not supported */
+ if (rets[0] == 0) return 0; /* Oll Korrect */
+ if (rets[0] == 5) {
+ if (rets[2] == 0) return -1; /* permanently unavailable */
+ return rets[2]; /* number of millisecs to wait */
+ }
+ return -1;
+}
+
+/** rtas_pci_slot_reset raises/lowers the pci #RST line
+ * state: 1/0 to raise/lower the #RST
+ *
+ * Clear the EEH-frozen condition on a slot. This routine
+ * asserts the PCI #RST line if the 'state' argument is '1',
+ * and drops the #RST line if 'state is '0'. This routine is
+ * safe to call in an interrupt context.
+ *
+ */
+
+static void
+rtas_pci_slot_reset(struct pci_dn *pdn, int state)
+{
+ int rc;
+
+ BUG_ON (pdn==NULL);
+
+ if (!pdn->phb) {
+ printk (KERN_WARNING "EEH: in slot reset, device node %s has no phb\n",
+ pdn->node->full_name);
+ return;
+ }
+
+ rc = rtas_call(ibm_set_slot_reset,4,1, NULL,
+ pdn->eeh_config_addr,
+ BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid),
+ state);
+ if (rc) {
+ printk (KERN_WARNING "EEH: Unable to reset the failed slot, (%d) #RST=%d dn=%s\n",
+ rc, state, pdn->node->full_name);
+ return;
+ }
+}
+
+/** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
+ * dn -- device node to be reset.
+ */
+
+void
+rtas_set_slot_reset(struct pci_dn *pdn)
+{
+ int i, rc;
+
+ rtas_pci_slot_reset (pdn, 1);
+
+ /* The PCI bus requires that the reset be held high for at least
+ * a 100 milliseconds. We wait a bit longer 'just in case'. */
+
+#define PCI_BUS_RST_HOLD_TIME_MSEC 250
+ msleep (PCI_BUS_RST_HOLD_TIME_MSEC);
+
+ /* We might get hit with another EEH freeze as soon as the
+ * pci slot reset line is dropped. Make sure we don't miss
+ * these, and clear the flag now. */
+ eeh_clear_slot (pdn->node, EEH_MODE_ISOLATED);
+
+ rtas_pci_slot_reset (pdn, 0);
+
+ /* After a PCI slot has been reset, the PCI Express spec requires
+ * a 1.5 second idle time for the bus to stabilize, before starting
+ * up traffic. */
+#define PCI_BUS_SETTLE_TIME_MSEC 1800
+ msleep (PCI_BUS_SETTLE_TIME_MSEC);
+
+ /* Now double check with the firmware to make sure the device is
+ * ready to be used; if not, wait for recovery. */
+ for (i=0; i<10; i++) {
+ rc = eeh_slot_availability (pdn);
+ if (rc <= 0) break;
+
+ msleep (rc+100);
+ }
+}
+
+/* ------------------------------------------------------- */
+/** Save and restore of PCI BARs
+ *
+ * Although firmware will set up BARs during boot, it doesn't
+ * set up device BAR's after a device reset, although it will,
+ * if requested, set up bridge configuration. Thus, we need to
+ * configure the PCI devices ourselves.
+ */
+
+/**
+ * __restore_bars - Restore the Base Address Registers
+ * Loads the PCI configuration space base address registers,
+ * the expansion ROM base address, the latency timer, and etc.
+ * from the saved values in the device node.
+ */
+static inline void __restore_bars (struct pci_dn *pdn)
+{
+ int i;
+
+ if (NULL==pdn->phb) return;
+ for (i=4; i<10; i++) {
+ rtas_write_config(pdn, i*4, 4, pdn->config_space[i]);
+ }
+
+ /* 12 == Expansion ROM Address */
+ rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]);
+
+#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
+#define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)])
+
+ rtas_write_config (pdn, PCI_CACHE_LINE_SIZE, 1,
+ SAVED_BYTE(PCI_CACHE_LINE_SIZE));
+
+ rtas_write_config (pdn, PCI_LATENCY_TIMER, 1,
+ SAVED_BYTE(PCI_LATENCY_TIMER));
+
+ /* max latency, min grant, interrupt pin and line */
+ rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]);
+}
+
+/**
+ * eeh_restore_bars - restore the PCI config space info
+ *
+ * This routine performs a recursive walk to the children
+ * of this device as well.
+ */
+void eeh_restore_bars(struct pci_dn *pdn)
+{
+ struct device_node *dn;
+ if (!pdn)
+ return;
+
+ if (! pdn->eeh_is_bridge)
+ __restore_bars (pdn);
+
+ dn = pdn->node->child;
+ while (dn) {
+ eeh_restore_bars (PCI_DN(dn));
+ dn = dn->sibling;
+ }
+}
+
+/**
+ * eeh_save_bars - save device bars
+ *
+ * Save the values of the device bars. Unlike the restore
+ * routine, this routine is *not* recursive. This is because
+ * PCI devices are added individuallly; but, for the restore,
+ * an entire slot is reset at a time.
+ */
+static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn)
+{
+ int i;
+
+ if (!pdev || !pdn )
+ return;
+
+ for (i = 0; i < 16; i++)
+ pci_read_config_dword(pdev, i * 4, &pdn->config_space[i]);
+
+ if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+ pdn->eeh_is_bridge = 1;
+}
+
+void
+rtas_configure_bridge(struct pci_dn *pdn)
+{
+ int token = rtas_token ("ibm,configure-bridge");
+ int rc;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return;
+ rc = rtas_call(token,3,1, NULL,
+ pdn->eeh_config_addr,
+ BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid));
+ if (rc) {
+ printk (KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n",
+ rc, pdn->node->full_name);
+ }
+}
+
+/* ------------------------------------------------------------- */
+/* The code below deals with enabling EEH for devices during the
+ * early boot sequence. EEH must be enabled before any PCI probing
+ * can be done.
+ */
+
+#define EEH_ENABLE 1
+
+struct eeh_early_enable_info {
+ unsigned int buid_hi;
+ unsigned int buid_lo;
+};
+
+/* Enable eeh for the given device node. */
+static void *early_enable_eeh(struct device_node *dn, void *data)
+{
+ struct eeh_early_enable_info *info = data;
+ int ret;
+ char *status = get_property(dn, "status", NULL);
+ u32 *class_code = (u32 *)get_property(dn, "class-code", NULL);
+ u32 *vendor_id = (u32 *)get_property(dn, "vendor-id", NULL);
+ u32 *device_id = (u32 *)get_property(dn, "device-id", NULL);
+ u32 *regs;
+ int enable;
+ struct pci_dn *pdn = PCI_DN(dn);
+
+ pdn->eeh_mode = 0;
+ pdn->eeh_check_count = 0;
+ pdn->eeh_freeze_count = 0;
+
+ if (status && strcmp(status, "ok") != 0)
+ return NULL; /* ignore devices with bad status */
+
+ /* Ignore bad nodes. */
+ if (!class_code || !vendor_id || !device_id)
+ return NULL;
+
+ /* There is nothing to check on PCI to ISA bridges */
+ if (dn->type && !strcmp(dn->type, "isa")) {
+ pdn->eeh_mode |= EEH_MODE_NOCHECK;
+ return NULL;
+ }
+
+ /*
+ * Now decide if we are going to "Disable" EEH checking
+ * for this device. We still run with the EEH hardware active,
+ * but we won't be checking for ff's. This means a driver
+ * could return bad data (very bad!), an interrupt handler could
+ * hang waiting on status bits that won't change, etc.
+ * But there are a few cases like display devices that make sense.
+ */
+ enable = 1; /* i.e. we will do checking */
+ if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY)
+ enable = 0;
+
+ if (!enable)
+ pdn->eeh_mode |= EEH_MODE_NOCHECK;
+
+ /* Ok... see if this device supports EEH. Some do, some don't,
+ * and the only way to find out is to check each and every one. */
+ regs = (u32 *)get_property(dn, "reg", NULL);
+ if (regs) {
+ /* First register entry is addr (00BBSS00) */
+ /* Try to enable eeh */
+ ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
+ regs[0], info->buid_hi, info->buid_lo,
+ EEH_ENABLE);
+
+ if (ret == 0) {
+ eeh_subsystem_enabled = 1;
+ pdn->eeh_mode |= EEH_MODE_SUPPORTED;
+ pdn->eeh_config_addr = regs[0];
+#ifdef DEBUG
+ printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name);
+#endif
+ } else {
+
+ /* This device doesn't support EEH, but it may have an
+ * EEH parent, in which case we mark it as supported. */
+ if (dn->parent && PCI_DN(dn->parent)
+ && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
+ /* Parent supports EEH. */
+ pdn->eeh_mode |= EEH_MODE_SUPPORTED;
+ pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
+ return NULL;
+ }
+ }
+ } else {
+ printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
+ dn->full_name);
+ }
+
+ return NULL;
+}
+
+/*
+ * Initialize EEH by trying to enable it for all of the adapters in the system.
+ * As a side effect we can determine here if eeh is supported at all.
+ * Note that we leave EEH on so failed config cycles won't cause a machine
+ * check. If a user turns off EEH for a particular adapter they are really
+ * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
+ * grant access to a slot if EEH isn't enabled, and so we always enable
+ * EEH for all slots/all devices.
+ *
+ * The eeh-force-off option disables EEH checking globally, for all slots.
+ * Even if force-off is set, the EEH hardware is still enabled, so that
+ * newer systems can boot.
+ */
+void __init eeh_init(void)
+{
+ struct device_node *phb, *np;
+ struct eeh_early_enable_info info;
+
+ spin_lock_init(&confirm_error_lock);
+ spin_lock_init(&slot_errbuf_lock);
+
+ np = of_find_node_by_path("/rtas");
+ if (np == NULL)
+ return;
+
+ ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
+ ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
+ ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
+ ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
+ ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
+
+ if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
+ return;
+
+ eeh_error_buf_size = rtas_token("rtas-error-log-max");
+ if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
+ eeh_error_buf_size = 1024;
+ }
+ if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
+ printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
+ "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
+ eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
+ }
+
+ /* Enable EEH for all adapters. Note that eeh requires buid's */
+ for (phb = of_find_node_by_name(NULL, "pci"); phb;
+ phb = of_find_node_by_name(phb, "pci")) {
+ unsigned long buid;
+
+ buid = get_phb_buid(phb);
+ if (buid == 0 || PCI_DN(phb) == NULL)
+ continue;
+
+ info.buid_lo = BUID_LO(buid);
+ info.buid_hi = BUID_HI(buid);
+ traverse_pci_devices(phb, early_enable_eeh, &info);
+ }
+
+ if (eeh_subsystem_enabled)
+ printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
+ else
+ printk(KERN_WARNING "EEH: No capable adapters found\n");
+}
+
+/**
+ * eeh_add_device_early - enable EEH for the indicated device_node
+ * @dn: device node for which to set up EEH
+ *
+ * This routine must be used to perform EEH initialization for PCI
+ * devices that were added after system boot (e.g. hotplug, dlpar).
+ * This routine must be called before any i/o is performed to the
+ * adapter (inluding any config-space i/o).
+ * Whether this actually enables EEH or not for this device depends
+ * on the CEC architecture, type of the device, on earlier boot
+ * command-line arguments & etc.
+ */
+void eeh_add_device_early(struct device_node *dn)
+{
+ struct pci_controller *phb;
+ struct eeh_early_enable_info info;
+
+ if (!dn || !PCI_DN(dn))
+ return;
+ phb = PCI_DN(dn)->phb;
+ if (NULL == phb || 0 == phb->buid) {
+ printk(KERN_WARNING "EEH: Expected buid but found none for %s\n",
+ dn->full_name);
+ dump_stack();
+ return;
+ }
+
+ info.buid_hi = BUID_HI(phb->buid);
+ info.buid_lo = BUID_LO(phb->buid);
+ early_enable_eeh(dn, &info);
+}
+EXPORT_SYMBOL_GPL(eeh_add_device_early);
+
+/**
+ * eeh_add_device_late - perform EEH initialization for the indicated pci device
+ * @dev: pci device for which to set up EEH
+ *
+ * This routine must be used to complete EEH initialization for PCI
+ * devices that were added after system boot (e.g. hotplug, dlpar).
+ */
+void eeh_add_device_late(struct pci_dev *dev)
+{
+ struct device_node *dn;
+ struct pci_dn *pdn;
+
+ if (!dev || !eeh_subsystem_enabled)
+ return;
+
+#ifdef DEBUG
+ printk(KERN_DEBUG "EEH: adding device %s\n", pci_name(dev));
+#endif
+
+ pci_dev_get (dev);
+ dn = pci_device_to_OF_node(dev);
+ pdn = PCI_DN(dn);
+ pdn->pcidev = dev;
+
+ pci_addr_cache_insert_device (dev);
+ eeh_save_bars(dev, pdn);
+}
+EXPORT_SYMBOL_GPL(eeh_add_device_late);
+
+/**
+ * eeh_remove_device - undo EEH setup for the indicated pci device
+ * @dev: pci device to be removed
+ *
+ * This routine should be when a device is removed from a running
+ * system (e.g. by hotplug or dlpar).
+ */
+void eeh_remove_device(struct pci_dev *dev)
+{
+ struct device_node *dn;
+ if (!dev || !eeh_subsystem_enabled)
+ return;
+
+ /* Unregister the device with the EEH/PCI address search system */
+#ifdef DEBUG
+ printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev));
+#endif
+ pci_addr_cache_remove_device(dev);
+
+ dn = pci_device_to_OF_node(dev);
+ PCI_DN(dn)->pcidev = NULL;
+ pci_dev_put (dev);
+}
+EXPORT_SYMBOL_GPL(eeh_remove_device);
+
+static int proc_eeh_show(struct seq_file *m, void *v)
+{
+ unsigned int cpu;
+ unsigned long ffs = 0, positives = 0, failures = 0;
+ unsigned long resets = 0;
+ unsigned long no_dev = 0, no_dn = 0, no_cfg = 0, no_check = 0;
+
+ for_each_cpu(cpu) {
+ ffs += per_cpu(total_mmio_ffs, cpu);
+ positives += per_cpu(false_positives, cpu);
+ failures += per_cpu(ignored_failures, cpu);
+ resets += per_cpu(slot_resets, cpu);
+ no_dev += per_cpu(no_device, cpu);
+ no_dn += per_cpu(no_dn, cpu);
+ no_cfg += per_cpu(no_cfg_addr, cpu);
+ no_check += per_cpu(ignored_check, cpu);
+ }
+
+ if (0 == eeh_subsystem_enabled) {
+ seq_printf(m, "EEH Subsystem is globally disabled\n");
+ seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs);
+ } else {
+ seq_printf(m, "EEH Subsystem is enabled\n");
+ seq_printf(m,
+ "no device=%ld\n"
+ "no device node=%ld\n"
+ "no config address=%ld\n"
+ "check not wanted=%ld\n"
+ "eeh_total_mmio_ffs=%ld\n"
+ "eeh_false_positives=%ld\n"
+ "eeh_ignored_failures=%ld\n"
+ "eeh_slot_resets=%ld\n",
+ no_dev, no_dn, no_cfg, no_check,
+ ffs, positives, failures, resets);
+ }
+
+ return 0;
+}
+
+static int proc_eeh_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, proc_eeh_show, NULL);
+}
+
+static struct file_operations proc_eeh_operations = {
+ .open = proc_eeh_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init eeh_init_proc(void)
+{
+ struct proc_dir_entry *e;
+
+ if (platform_is_pseries()) {
+ e = create_proc_entry("ppc64/eeh", 0, NULL);
+ if (e)
+ e->proc_fops = &proc_eeh_operations;
+ }
+
+ return 0;
+}
+__initcall(eeh_init_proc);
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
new file mode 100644
index 00000000000..92497333c2b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -0,0 +1,155 @@
+/*
+ * eeh_event.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
+ */
+
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <asm/eeh_event.h>
+
+/** Overview:
+ * EEH error states may be detected within exception handlers;
+ * however, the recovery processing needs to occur asynchronously
+ * in a normal kernel context and not an interrupt context.
+ * This pair of routines creates an event and queues it onto a
+ * work-queue, where a worker thread can drive recovery.
+ */
+
+/* EEH event workqueue setup. */
+static spinlock_t eeh_eventlist_lock = SPIN_LOCK_UNLOCKED;
+LIST_HEAD(eeh_eventlist);
+static void eeh_thread_launcher(void *);
+DECLARE_WORK(eeh_event_wq, eeh_thread_launcher, NULL);
+
+/**
+ * eeh_panic - call panic() for an eeh event that cannot be handled.
+ * The philosophy of this routine is that it is better to panic and
+ * halt the OS than it is to risk possible data corruption by
+ * oblivious device drivers that don't know better.
+ *
+ * @dev pci device that had an eeh event
+ * @reset_state current reset state of the device slot
+ */
+static void eeh_panic(struct pci_dev *dev, int reset_state)
+{
+ /*
+ * Since the panic_on_oops sysctl is used to halt the system
+ * in light of potential corruption, we can use it here.
+ */
+ if (panic_on_oops) {
+ panic("EEH: MMIO failure (%d) on device:%s\n", reset_state,
+ pci_name(dev));
+ }
+ else {
+ printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n",
+ reset_state, pci_name(dev));
+ }
+}
+
+/**
+ * eeh_event_handler - dispatch EEH events. The detection of a frozen
+ * slot can occur inside an interrupt, where it can be hard to do
+ * anything about it. The goal of this routine is to pull these
+ * detection events out of the context of the interrupt handler, and
+ * re-dispatch them for processing at a later time in a normal context.
+ *
+ * @dummy - unused
+ */
+static int eeh_event_handler(void * dummy)
+{
+ unsigned long flags;
+ struct eeh_event *event;
+
+ daemonize ("eehd");
+
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ spin_lock_irqsave(&eeh_eventlist_lock, flags);
+ event = NULL;
+ if (!list_empty(&eeh_eventlist)) {
+ event = list_entry(eeh_eventlist.next, struct eeh_event, list);
+ list_del(&event->list);
+ }
+ spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+ if (event == NULL)
+ break;
+
+ printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",
+ pci_name(event->dev));
+
+ eeh_panic (event->dev, event->state);
+
+ kfree(event);
+ }
+
+ return 0;
+}
+
+/**
+ * eeh_thread_launcher
+ *
+ * @dummy - unused
+ */
+static void eeh_thread_launcher(void *dummy)
+{
+ if (kernel_thread(eeh_event_handler, NULL, CLONE_KERNEL) < 0)
+ printk(KERN_ERR "Failed to start EEH daemon\n");
+}
+
+/**
+ * eeh_send_failure_event - generate a PCI error event
+ * @dev pci device
+ *
+ * This routine can be called within an interrupt context;
+ * the actual event will be delivered in a normal context
+ * (from a workqueue).
+ */
+int eeh_send_failure_event (struct device_node *dn,
+ struct pci_dev *dev,
+ int state,
+ int time_unavail)
+{
+ unsigned long flags;
+ struct eeh_event *event;
+
+ event = kmalloc(sizeof(*event), GFP_ATOMIC);
+ if (event == NULL) {
+ printk (KERN_ERR "EEH: out of memory, event not handled\n");
+ return 1;
+ }
+
+ if (dev)
+ pci_dev_get(dev);
+
+ event->dn = dn;
+ event->dev = dev;
+ event->state = state;
+ event->time_unavail = time_unavail;
+
+ /* We may or may not be called in an interrupt context */
+ spin_lock_irqsave(&eeh_eventlist_lock, flags);
+ list_add(&event->list, &eeh_eventlist);
+ spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+
+ schedule_work(&eeh_event_wq);
+
+ return 0;
+}
+
+/********************** END OF FILE ******************************/
diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
new file mode 100644
index 00000000000..138e128a388
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvconsole.c
@@ -0,0 +1,74 @@
+/*
+ * hvconsole.c
+ * Copyright (C) 2004 Hollis Blanchard, IBM Corporation
+ * Copyright (C) 2004 IBM Corporation
+ *
+ * Additional Author(s):
+ * Ryan S. Arnold <rsa@us.ibm.com>
+ *
+ * LPAR console support.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/hvcall.h>
+#include <asm/hvconsole.h>
+
+/**
+ * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper
+ * @vtermno: The vtermno or unit_address of the adapter from which to fetch the
+ * data.
+ * @buf: The character buffer into which to put the character data fetched from
+ * firmware.
+ * @count: not used?
+ */
+int hvc_get_chars(uint32_t vtermno, char *buf, int count)
+{
+ unsigned long got;
+
+ if (plpar_hcall(H_GET_TERM_CHAR, vtermno, 0, 0, 0, &got,
+ (unsigned long *)buf, (unsigned long *)buf+1) == H_Success)
+ return got;
+ return 0;
+}
+
+EXPORT_SYMBOL(hvc_get_chars);
+
+
+/**
+ * hvc_put_chars: send characters to firmware for denoted vterm adapter
+ * @vtermno: The vtermno or unit_address of the adapter from which the data
+ * originated.
+ * @buf: The character buffer that contains the character data to send to
+ * firmware.
+ * @count: Send this number of characters.
+ */
+int hvc_put_chars(uint32_t vtermno, const char *buf, int count)
+{
+ unsigned long *lbuf = (unsigned long *) buf;
+ long ret;
+
+ ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count, lbuf[0],
+ lbuf[1]);
+ if (ret == H_Success)
+ return count;
+ if (ret == H_Busy)
+ return 0;
+ return -EIO;
+}
+
+EXPORT_SYMBOL(hvc_put_chars);
diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c
new file mode 100644
index 00000000000..4d584172055
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvcserver.c
@@ -0,0 +1,251 @@
+/*
+ * hvcserver.c
+ * Copyright (C) 2004 Ryan S Arnold, IBM Corporation
+ *
+ * PPC64 virtual I/O console server support.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <asm/hvcall.h>
+#include <asm/hvcserver.h>
+#include <asm/io.h>
+
+#define HVCS_ARCH_VERSION "1.0.0"
+
+MODULE_AUTHOR("Ryan S. Arnold <rsa@us.ibm.com>");
+MODULE_DESCRIPTION("IBM hvcs ppc64 API");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(HVCS_ARCH_VERSION);
+
+/*
+ * Convert arch specific return codes into relevant errnos. The hvcs
+ * functions aren't performance sensitive, so this conversion isn't an
+ * issue.
+ */
+int hvcs_convert(long to_convert)
+{
+ switch (to_convert) {
+ case H_Success:
+ return 0;
+ case H_Parameter:
+ return -EINVAL;
+ case H_Hardware:
+ return -EIO;
+ case H_Busy:
+ case H_LongBusyOrder1msec:
+ case H_LongBusyOrder10msec:
+ case H_LongBusyOrder100msec:
+ case H_LongBusyOrder1sec:
+ case H_LongBusyOrder10sec:
+ case H_LongBusyOrder100sec:
+ return -EBUSY;
+ case H_Function: /* fall through */
+ default:
+ return -EPERM;
+ }
+}
+
+/**
+ * hvcs_free_partner_info - free pi allocated by hvcs_get_partner_info
+ * @head: list_head pointer for an allocated list of partner info structs to
+ * free.
+ *
+ * This function is used to free the partner info list that was returned by
+ * calling hvcs_get_partner_info().
+ */
+int hvcs_free_partner_info(struct list_head *head)
+{
+ struct hvcs_partner_info *pi;
+ struct list_head *element;
+
+ if (!head)
+ return -EINVAL;
+
+ while (!list_empty(head)) {
+ element = head->next;
+ pi = list_entry(element, struct hvcs_partner_info, node);
+ list_del(element);
+ kfree(pi);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(hvcs_free_partner_info);
+
+/* Helper function for hvcs_get_partner_info */
+int hvcs_next_partner(uint32_t unit_address,
+ unsigned long last_p_partition_ID,
+ unsigned long last_p_unit_address, unsigned long *pi_buff)
+
+{
+ long retval;
+ retval = plpar_hcall_norets(H_VTERM_PARTNER_INFO, unit_address,
+ last_p_partition_ID,
+ last_p_unit_address, virt_to_phys(pi_buff));
+ return hvcs_convert(retval);
+}
+
+/**
+ * hvcs_get_partner_info - Get all of the partner info for a vty-server adapter
+ * @unit_address: The unit_address of the vty-server adapter for which this
+ * function is fetching partner info.
+ * @head: An initialized list_head pointer to an empty list to use to return the
+ * list of partner info fetched from the hypervisor to the caller.
+ * @pi_buff: A page sized buffer pre-allocated prior to calling this function
+ * that is to be used to be used by firmware as an iterator to keep track
+ * of the partner info retrieval.
+ *
+ * This function returns non-zero on success, or if there is no partner info.
+ *
+ * The pi_buff is pre-allocated prior to calling this function because this
+ * function may be called with a spin_lock held and kmalloc of a page is not
+ * recommended as GFP_ATOMIC.
+ *
+ * The first long of this buffer is used to store a partner unit address. The
+ * second long is used to store a partner partition ID and starting at
+ * pi_buff[2] is the 79 character Converged Location Code (diff size than the
+ * unsigned longs, hence the casting mumbo jumbo you see later).
+ *
+ * Invocation of this function should always be followed by an invocation of
+ * hvcs_free_partner_info() using a pointer to the SAME list head instance
+ * that was passed as a parameter to this function.
+ */
+int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head,
+ unsigned long *pi_buff)
+{
+ /*
+ * Dealt with as longs because of the hcall interface even though the
+ * values are uint32_t.
+ */
+ unsigned long last_p_partition_ID;
+ unsigned long last_p_unit_address;
+ struct hvcs_partner_info *next_partner_info = NULL;
+ int more = 1;
+ int retval;
+
+ memset(pi_buff, 0x00, PAGE_SIZE);
+ /* invalid parameters */
+ if (!head || !pi_buff)
+ return -EINVAL;
+
+ last_p_partition_ID = last_p_unit_address = ~0UL;
+ INIT_LIST_HEAD(head);
+
+ do {
+ retval = hvcs_next_partner(unit_address, last_p_partition_ID,
+ last_p_unit_address, pi_buff);
+ if (retval) {
+ /*
+ * Don't indicate that we've failed if we have
+ * any list elements.
+ */
+ if (!list_empty(head))
+ return 0;
+ return retval;
+ }
+
+ last_p_partition_ID = pi_buff[0];
+ last_p_unit_address = pi_buff[1];
+
+ /* This indicates that there are no further partners */
+ if (last_p_partition_ID == ~0UL
+ && last_p_unit_address == ~0UL)
+ break;
+
+ /* This is a very small struct and will be freed soon in
+ * hvcs_free_partner_info(). */
+ next_partner_info = kmalloc(sizeof(struct hvcs_partner_info),
+ GFP_ATOMIC);
+
+ if (!next_partner_info) {
+ printk(KERN_WARNING "HVCONSOLE: kmalloc() failed to"
+ " allocate partner info struct.\n");
+ hvcs_free_partner_info(head);
+ return -ENOMEM;
+ }
+
+ next_partner_info->unit_address
+ = (unsigned int)last_p_unit_address;
+ next_partner_info->partition_ID
+ = (unsigned int)last_p_partition_ID;
+
+ /* copy the Null-term char too */
+ strncpy(&next_partner_info->location_code[0],
+ (char *)&pi_buff[2],
+ strlen((char *)&pi_buff[2]) + 1);
+
+ list_add_tail(&(next_partner_info->node), head);
+ next_partner_info = NULL;
+
+ } while (more);
+
+ return 0;
+}
+EXPORT_SYMBOL(hvcs_get_partner_info);
+
+/**
+ * hvcs_register_connection - establish a connection between this vty-server and
+ * a vty.
+ * @unit_address: The unit address of the vty-server adapter that is to be
+ * establish a connection.
+ * @p_partition_ID: The partition ID of the vty adapter that is to be connected.
+ * @p_unit_address: The unit address of the vty adapter to which the vty-server
+ * is to be connected.
+ *
+ * If this function is called once and -EINVAL is returned it may
+ * indicate that the partner info needs to be refreshed for the
+ * target unit address at which point the caller must invoke
+ * hvcs_get_partner_info() and then call this function again. If,
+ * for a second time, -EINVAL is returned then it indicates that
+ * there is probably already a partner connection registered to a
+ * different vty-server adapter. It is also possible that a second
+ * -EINVAL may indicate that one of the parms is not valid, for
+ * instance if the link was removed between the vty-server adapter
+ * and the vty adapter that you are trying to open. Don't shoot the
+ * messenger. Firmware implemented it this way.
+ */
+int hvcs_register_connection( uint32_t unit_address,
+ uint32_t p_partition_ID, uint32_t p_unit_address)
+{
+ long retval;
+ retval = plpar_hcall_norets(H_REGISTER_VTERM, unit_address,
+ p_partition_ID, p_unit_address);
+ return hvcs_convert(retval);
+}
+EXPORT_SYMBOL(hvcs_register_connection);
+
+/**
+ * hvcs_free_connection - free the connection between a vty-server and vty
+ * @unit_address: The unit address of the vty-server that is to have its
+ * connection severed.
+ *
+ * This function is used to free the partner connection between a vty-server
+ * adapter and a vty adapter.
+ *
+ * If -EBUSY is returned continue to call this function until 0 is returned.
+ */
+int hvcs_free_connection(uint32_t unit_address)
+{
+ long retval;
+ retval = plpar_hcall_norets(H_FREE_VTERM, unit_address);
+ return hvcs_convert(retval);
+}
+EXPORT_SYMBOL(hvcs_free_connection);
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 513e2723149..97ba5214417 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -37,16 +37,15 @@
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
-#include <asm/ppcdebug.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
#include <asm/abs_addr.h>
#include <asm/pSeries_reconfig.h>
-#include <asm/systemcfg.h>
#include <asm/firmware.h>
#include <asm/tce.h>
#include <asm/ppc-pci.h>
+#include <asm/udbg.h>
#include "plpar_wrappers.h"
@@ -582,7 +581,7 @@ void iommu_init_early_pSeries(void)
return;
}
- if (systemcfg->platform & PLATFORM_LPAR) {
+ if (platform_is_lpar()) {
if (firmware_has_feature(FW_FEATURE_MULTITCE)) {
ppc_md.tce_build = tce_buildmulti_pSeriesLP;
ppc_md.tce_free = tce_freemulti_pSeriesLP;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index e384a5a9179..a50e5f3f396 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -19,7 +19,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#define DEBUG
+#undef DEBUG_LOW
#include <linux/config.h>
#include <linux/kernel.h>
@@ -31,20 +31,21 @@
#include <asm/machdep.h>
#include <asm/abs_addr.h>
#include <asm/mmu_context.h>
-#include <asm/ppcdebug.h>
#include <asm/iommu.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/prom.h>
#include <asm/abs_addr.h>
#include <asm/cputable.h>
+#include <asm/udbg.h>
+#include <asm/smp.h>
#include "plpar_wrappers.h"
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while(0)
#else
-#define DBG(fmt...)
+#define DBG_LOW(fmt...) do { } while(0)
#endif
/* in pSeries_hvCall.S */
@@ -276,8 +277,9 @@ void vpa_init(int cpu)
}
long pSeries_lpar_hpte_insert(unsigned long hpte_group,
- unsigned long va, unsigned long prpn,
- unsigned long vflags, unsigned long rflags)
+ unsigned long va, unsigned long pa,
+ unsigned long rflags, unsigned long vflags,
+ int psize)
{
unsigned long lpar_rc;
unsigned long flags;
@@ -285,11 +287,28 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long hpte_v, hpte_r;
unsigned long dummy0, dummy1;
- hpte_v = ((va >> 23) << HPTE_V_AVPN_SHIFT) | vflags | HPTE_V_VALID;
- if (vflags & HPTE_V_LARGE)
- hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT);
-
- hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
+ "rflags=%lx, vflags=%lx, psize=%d)\n",
+ hpte_group, va, pa, rflags, vflags, psize);
+
+ hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
+ hpte_r = hpte_encode_r(pa, psize) | rflags;
+
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
+
+#if 1
+ {
+ int i;
+ for (i=0;i<8;i++) {
+ unsigned long w0, w1;
+ plpar_pte_read(0, hpte_group, &w0, &w1);
+ BUG_ON (HPTE_V_COMPARE(hpte_v, w0)
+ && (w0 & HPTE_V_VALID));
+ }
+ }
+#endif
/* Now fill in the actual HPTE */
/* Set CEC cookie to 0 */
@@ -299,23 +318,30 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
/* Exact = 0 */
flags = 0;
- /* XXX why is this here? - Anton */
+ /* Make pHyp happy */
if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
hpte_r &= ~_PAGE_COHERENT;
lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v,
hpte_r, &slot, &dummy0, &dummy1);
-
- if (unlikely(lpar_rc == H_PTEG_Full))
+ if (unlikely(lpar_rc == H_PTEG_Full)) {
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" full\n");
return -1;
+ }
/*
* Since we try and ioremap PHBs we don't own, the pte insert
* will fail. However we must catch the failure in hash_page
* or we will loop forever, so return -2 in this case.
*/
- if (unlikely(lpar_rc != H_Success))
+ if (unlikely(lpar_rc != H_Success)) {
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" lpar err %d\n", lpar_rc);
return -2;
+ }
+ if (!(vflags & HPTE_V_BOLTED))
+ DBG_LOW(" -> slot: %d\n", slot & 7);
/* Because of iSeries, we have to pass down the secondary
* bucket bit here as well
@@ -340,10 +366,8 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
/* don't remove a bolted entry */
lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
(0x1UL << 4), &dummy1, &dummy2);
-
if (lpar_rc == H_Success)
return i;
-
BUG_ON(lpar_rc != H_Not_Found);
slot_offset++;
@@ -371,20 +395,28 @@ static void pSeries_lpar_hptab_clear(void)
* We can probably optimize here and assume the high bits of newpp are
* already zero. For now I am paranoid.
*/
-static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp,
- unsigned long va, int large, int local)
+static long pSeries_lpar_hpte_updatepp(unsigned long slot,
+ unsigned long newpp,
+ unsigned long va,
+ int psize, int local)
{
unsigned long lpar_rc;
unsigned long flags = (newpp & 7) | H_AVPN;
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
- if (large)
- avpn &= ~0x1UL;
+ want_v = hpte_encode_v(va, psize);
- lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7));
+ DBG_LOW(" update: avpnv=%016lx, hash=%016lx, f=%x, psize: %d ... ",
+ want_v & HPTE_V_AVPN, slot, flags, psize);
- if (lpar_rc == H_Not_Found)
+ lpar_rc = plpar_pte_protect(flags, slot, want_v & HPTE_V_AVPN);
+
+ if (lpar_rc == H_Not_Found) {
+ DBG_LOW("not found !\n");
return -1;
+ }
+
+ DBG_LOW("ok\n");
BUG_ON(lpar_rc != H_Success);
@@ -410,21 +442,22 @@ static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
return dword0;
}
-static long pSeries_lpar_hpte_find(unsigned long vpn)
+static long pSeries_lpar_hpte_find(unsigned long va, int psize)
{
unsigned long hash;
unsigned long i, j;
long slot;
- unsigned long hpte_v;
+ unsigned long want_v, hpte_v;
- hash = hpt_hash(vpn, 0);
+ hash = hpt_hash(va, mmu_psize_defs[psize].shift);
+ want_v = hpte_encode_v(va, psize);
for (j = 0; j < 2; j++) {
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
for (i = 0; i < HPTES_PER_GROUP; i++) {
hpte_v = pSeries_lpar_hpte_getword0(slot);
- if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
+ if (HPTE_V_COMPARE(hpte_v, want_v)
&& (hpte_v & HPTE_V_VALID)
&& (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
/* HPTE matches */
@@ -441,17 +474,15 @@ static long pSeries_lpar_hpte_find(unsigned long vpn)
}
static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
- unsigned long ea)
+ unsigned long ea,
+ int psize)
{
- unsigned long lpar_rc;
- unsigned long vsid, va, vpn, flags;
- long slot;
+ unsigned long lpar_rc, slot, vsid, va, flags;
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> PAGE_SHIFT;
- slot = pSeries_lpar_hpte_find(vpn);
+ slot = pSeries_lpar_hpte_find(va, psize);
BUG_ON(slot == -1);
flags = newpp & 7;
@@ -461,18 +492,18 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
}
static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
- int large, int local)
+ int psize, int local)
{
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
unsigned long lpar_rc;
unsigned long dummy1, dummy2;
- if (large)
- avpn &= ~0x1UL;
-
- lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1,
- &dummy2);
+ DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d",
+ slot, va, psize, local);
+ want_v = hpte_encode_v(va, psize);
+ lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v & HPTE_V_AVPN,
+ &dummy1, &dummy2);
if (lpar_rc == H_Not_Found)
return;
@@ -494,7 +525,8 @@ void pSeries_lpar_flush_hash_range(unsigned long number, int local)
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
for (i = 0; i < number; i++)
- flush_hash_page(batch->vaddr[i], batch->pte[i], local);
+ flush_hash_page(batch->vaddr[i], batch->pte[i],
+ batch->psize, local);
if (lock_tlbie)
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index c198656a3bb..999a9620b5c 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -107,7 +107,6 @@ static void __init pSeries_request_regions(void)
void __init pSeries_final_fixup(void)
{
- phbs_remap_io();
pSeries_request_regions();
pci_addr_cache_build();
@@ -123,7 +122,7 @@ static void fixup_winbond_82c105(struct pci_dev* dev)
int i;
unsigned int reg;
- if (!(systemcfg->platform & PLATFORM_PSERIES))
+ if (!platform_is_pseries())
return;
printk("Using INTC for W82c105 IDE controller.\n");
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index 382f8c5b0e7..3bd1b3e0600 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -107,14 +107,4 @@ static inline long plpar_put_term_char(unsigned long termno, unsigned long len,
lbuf[1]);
}
-static inline long plpar_set_xdabr(unsigned long address, unsigned long flags)
-{
- return plpar_hcall_norets(H_SET_XDABR, address, flags);
-}
-
-static inline long plpar_set_dabr(unsigned long val)
-{
- return plpar_hcall_norets(H_SET_DABR, val);
-}
-
#endif /* _PSERIES_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 6562ff4b0a8..fbd214d68b0 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -48,7 +48,7 @@
#include <asm/ptrace.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
-#include <asm/ppcdebug.h>
+#include <asm/udbg.h>
static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
static DEFINE_SPINLOCK(ras_log_buf_lock);
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 58c61219d08..d8864164dbe 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -286,10 +286,8 @@ static struct property *new_property(const char *name, const int length,
return new;
cleanup:
- if (new->name)
- kfree(new->name);
- if (new->value)
- kfree(new->value);
+ kfree(new->name);
+ kfree(new->value);
kfree(new);
return NULL;
}
@@ -410,7 +408,7 @@ static int proc_ppc64_create_ofdt(void)
{
struct proc_dir_entry *ent;
- if (!(systemcfg->platform & PLATFORM_PSERIES))
+ if (!platform_is_pseries())
return 0;
ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL);
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c
index e26b0420b6d..a6f628d4c9d 100644
--- a/arch/powerpc/platforms/pseries/rtasd.c
+++ b/arch/powerpc/platforms/pseries/rtasd.c
@@ -27,7 +27,6 @@
#include <asm/prom.h>
#include <asm/nvram.h>
#include <asm/atomic.h>
-#include <asm/systemcfg.h>
#if 0
#define DEBUG(A...) printk(KERN_ERR A)
@@ -482,10 +481,12 @@ static int __init rtas_init(void)
{
struct proc_dir_entry *entry;
- /* No RTAS, only warn if we are on a pSeries box */
+ if (!platform_is_pseries())
+ return 0;
+
+ /* No RTAS */
if (rtas_token("event-scan") == RTAS_UNKNOWN_SERVICE) {
- if (systemcfg->platform & PLATFORM_PSERIES)
- printk(KERN_INFO "rtasd: no event-scan on system\n");
+ printk(KERN_INFO "rtasd: no event-scan on system\n");
return 1;
}
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
new file mode 100644
index 00000000000..2edc947f7c4
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -0,0 +1,235 @@
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * scan-log-data driver for PPC64 Todd Inglett <tinglett@vnet.ibm.com>
+ *
+ * When ppc64 hardware fails the service processor dumps internal state
+ * of the system. After a reboot the operating system can access a dump
+ * of this data using this driver. A dump exists if the device-tree
+ * /chosen/ibm,scan-log-data property exists.
+ *
+ * This driver exports /proc/ppc64/scan-log-dump which can be read.
+ * The driver supports only sequential reads.
+ *
+ * The driver looks at a write to the driver for the single word "reset".
+ * If given, the driver will reset the scanlog so the platform can free it.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <asm/uaccess.h>
+#include <asm/rtas.h>
+#include <asm/prom.h>
+
+#define MODULE_VERS "1.0"
+#define MODULE_NAME "scanlog"
+
+/* Status returns from ibm,scan-log-dump */
+#define SCANLOG_COMPLETE 0
+#define SCANLOG_HWERROR -1
+#define SCANLOG_CONTINUE 1
+
+#define DEBUG(A...) do { if (scanlog_debug) printk(KERN_ERR "scanlog: " A); } while (0)
+
+static int scanlog_debug;
+static unsigned int ibm_scan_log_dump; /* RTAS token */
+static struct proc_dir_entry *proc_ppc64_scan_log_dump; /* The proc file */
+
+static ssize_t scanlog_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode * inode = file->f_dentry->d_inode;
+ struct proc_dir_entry *dp;
+ unsigned int *data;
+ int status;
+ unsigned long len, off;
+ unsigned int wait_time;
+
+ dp = PDE(inode);
+ data = (unsigned int *)dp->data;
+
+ if (!data) {
+ printk(KERN_ERR "scanlog: read failed no data\n");
+ return -EIO;
+ }
+
+ if (count > RTAS_DATA_BUF_SIZE)
+ count = RTAS_DATA_BUF_SIZE;
+
+ if (count < 1024) {
+ /* This is the min supported by this RTAS call. Rather
+ * than do all the buffering we insist the user code handle
+ * larger reads. As long as cp works... :)
+ */
+ printk(KERN_ERR "scanlog: cannot perform a small read (%ld)\n", count);
+ return -EINVAL;
+ }
+
+ if (!access_ok(VERIFY_WRITE, buf, count))
+ return -EFAULT;
+
+ for (;;) {
+ wait_time = 500; /* default wait if no data */
+ spin_lock(&rtas_data_buf_lock);
+ memcpy(rtas_data_buf, data, RTAS_DATA_BUF_SIZE);
+ status = rtas_call(ibm_scan_log_dump, 2, 1, NULL,
+ (u32) __pa(rtas_data_buf), (u32) count);
+ memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+ spin_unlock(&rtas_data_buf_lock);
+
+ DEBUG("status=%d, data[0]=%x, data[1]=%x, data[2]=%x\n",
+ status, data[0], data[1], data[2]);
+ switch (status) {
+ case SCANLOG_COMPLETE:
+ DEBUG("hit eof\n");
+ return 0;
+ case SCANLOG_HWERROR:
+ DEBUG("hardware error reading scan log data\n");
+ return -EIO;
+ case SCANLOG_CONTINUE:
+ /* We may or may not have data yet */
+ len = data[1];
+ off = data[2];
+ if (len > 0) {
+ if (copy_to_user(buf, ((char *)data)+off, len))
+ return -EFAULT;
+ return len;
+ }
+ /* Break to sleep default time */
+ break;
+ default:
+ if (status > 9900 && status <= 9905) {
+ wait_time = rtas_extended_busy_delay_time(status);
+ } else {
+ printk(KERN_ERR "scanlog: unknown error from rtas: %d\n", status);
+ return -EIO;
+ }
+ }
+ /* Apparently no data yet. Wait and try again. */
+ msleep_interruptible(wait_time);
+ }
+ /*NOTREACHED*/
+}
+
+static ssize_t scanlog_write(struct file * file, const char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ char stkbuf[20];
+ int status;
+
+ if (count > 19) count = 19;
+ if (copy_from_user (stkbuf, buf, count)) {
+ return -EFAULT;
+ }
+ stkbuf[count] = 0;
+
+ if (buf) {
+ if (strncmp(stkbuf, "reset", 5) == 0) {
+ DEBUG("reset scanlog\n");
+ status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0);
+ DEBUG("rtas returns %d\n", status);
+ } else if (strncmp(stkbuf, "debugon", 7) == 0) {
+ printk(KERN_ERR "scanlog: debug on\n");
+ scanlog_debug = 1;
+ } else if (strncmp(stkbuf, "debugoff", 8) == 0) {
+ printk(KERN_ERR "scanlog: debug off\n");
+ scanlog_debug = 0;
+ }
+ }
+ return count;
+}
+
+static int scanlog_open(struct inode * inode, struct file * file)
+{
+ struct proc_dir_entry *dp = PDE(inode);
+ unsigned int *data = (unsigned int *)dp->data;
+
+ if (!data) {
+ printk(KERN_ERR "scanlog: open failed no data\n");
+ return -EIO;
+ }
+
+ if (data[0] != 0) {
+ /* This imperfect test stops a second copy of the
+ * data (or a reset while data is being copied)
+ */
+ return -EBUSY;
+ }
+
+ data[0] = 0; /* re-init so we restart the scan */
+
+ return 0;
+}
+
+static int scanlog_release(struct inode * inode, struct file * file)
+{
+ struct proc_dir_entry *dp = PDE(inode);
+ unsigned int *data = (unsigned int *)dp->data;
+
+ if (!data) {
+ printk(KERN_ERR "scanlog: release failed no data\n");
+ return -EIO;
+ }
+ data[0] = 0;
+
+ return 0;
+}
+
+struct file_operations scanlog_fops = {
+ .owner = THIS_MODULE,
+ .read = scanlog_read,
+ .write = scanlog_write,
+ .open = scanlog_open,
+ .release = scanlog_release,
+};
+
+int __init scanlog_init(void)
+{
+ struct proc_dir_entry *ent;
+
+ ibm_scan_log_dump = rtas_token("ibm,scan-log-dump");
+ if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE) {
+ printk(KERN_ERR "scan-log-dump not implemented on this system\n");
+ return -EIO;
+ }
+
+ ent = create_proc_entry("ppc64/rtas/scan-log-dump", S_IRUSR, NULL);
+ if (ent) {
+ ent->proc_fops = &scanlog_fops;
+ /* Ideally we could allocate a buffer < 4G */
+ ent->data = kmalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+ if (!ent->data) {
+ printk(KERN_ERR "Failed to allocate a buffer\n");
+ remove_proc_entry("scan-log-dump", ent->parent);
+ return -ENOMEM;
+ }
+ ((unsigned int *)ent->data)[0] = 0;
+ } else {
+ printk(KERN_ERR "Failed to create ppc64/scan-log-dump proc entry\n");
+ return -EIO;
+ }
+ proc_ppc64_scan_log_dump = ent;
+
+ return 0;
+}
+
+void __exit scanlog_cleanup(void)
+{
+ if (proc_ppc64_scan_log_dump) {
+ kfree(proc_ppc64_scan_log_dump->data);
+ remove_proc_entry("scan-log-dump", proc_ppc64_scan_log_dump->parent);
+ }
+}
+
+module_init(scanlog_init);
+module_exit(scanlog_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 65bee939eec..b9d9732b2e0 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -65,6 +65,7 @@
#include <asm/ppc-pci.h>
#include <asm/i8259.h>
#include <asm/udbg.h>
+#include <asm/smp.h>
#include "plpar_wrappers.h"
@@ -199,14 +200,12 @@ static void __init pSeries_setup_arch(void)
if (ppc64_interrupt_controller == IC_OPEN_PIC) {
ppc_md.init_IRQ = pSeries_init_mpic;
ppc_md.get_irq = mpic_get_irq;
- ppc_md.cpu_irq_down = mpic_teardown_this_cpu;
/* Allocate the mpic now, so that find_and_init_phbs() can
* fill the ISUs */
pSeries_setup_mpic();
} else {
ppc_md.init_IRQ = xics_init_IRQ;
ppc_md.get_irq = xics_get_irq;
- ppc_md.cpu_irq_down = xics_teardown_cpu;
}
#ifdef CONFIG_SMP
@@ -248,7 +247,7 @@ static void __init pSeries_setup_arch(void)
ppc_md.idle_loop = default_idle;
}
- if (systemcfg->platform & PLATFORM_LPAR)
+ if (platform_is_lpar())
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
else
ppc_md.enable_pmcs = power4_enable_pmcs;
@@ -305,9 +304,7 @@ static void __init fw_feature_init(void)
}
of_node_put(dn);
- no_rtas:
- printk(KERN_INFO "firmware_features = 0x%lx\n",
- ppc64_firmware_features);
+no_rtas:
DBG(" <- fw_feature_init()\n");
}
@@ -353,14 +350,15 @@ static void pSeries_mach_cpu_die(void)
static int pseries_set_dabr(unsigned long dabr)
{
- if (firmware_has_feature(FW_FEATURE_XDABR)) {
- /* We want to catch accesses from kernel and userspace */
- return plpar_set_xdabr(dabr, H_DABRX_KERNEL | H_DABRX_USER);
- }
-
- return plpar_set_dabr(dabr);
+ return plpar_hcall_norets(H_SET_DABR, dabr);
}
+static int pseries_set_xdabr(unsigned long dabr)
+{
+ /* We want to catch accesses from kernel and userspace */
+ return plpar_hcall_norets(H_SET_XDABR, dabr,
+ H_DABRX_KERNEL | H_DABRX_USER);
+}
/*
* Early initialization. Relocation is on but do not reference unbolted pages
@@ -376,7 +374,7 @@ static void __init pSeries_init_early(void)
fw_feature_init();
- if (systemcfg->platform & PLATFORM_LPAR)
+ if (platform_is_lpar())
hpte_init_lpar();
else {
hpte_init_native();
@@ -386,7 +384,7 @@ static void __init pSeries_init_early(void)
generic_find_legacy_serial_ports(&physport, &default_speed);
- if (systemcfg->platform & PLATFORM_LPAR)
+ if (platform_is_lpar())
find_udbg_vterm();
else if (physport) {
/* Map the uart for udbg. */
@@ -396,8 +394,10 @@ static void __init pSeries_init_early(void)
DBG("Hello World !\n");
}
- if (firmware_has_feature(FW_FEATURE_XDABR | FW_FEATURE_DABR))
+ if (firmware_has_feature(FW_FEATURE_DABR))
ppc_md.set_dabr = pseries_set_dabr;
+ else if (firmware_has_feature(FW_FEATURE_XDABR))
+ ppc_md.set_dabr = pseries_set_xdabr;
iommu_init_early_pSeries();
@@ -465,6 +465,7 @@ static inline void dedicated_idle_sleep(unsigned int cpu)
* more.
*/
clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb__after_clear_bit();
/*
* SMT dynamic mode. Cede will result in this thread going
@@ -477,6 +478,7 @@ static inline void dedicated_idle_sleep(unsigned int cpu)
cede_processor();
else
local_irq_enable();
+ set_thread_flag(TIF_POLLING_NRFLAG);
} else {
/*
* Give the HV an opportunity at the processor, since we are
@@ -488,11 +490,11 @@ static inline void dedicated_idle_sleep(unsigned int cpu)
static void pseries_dedicated_idle(void)
{
- long oldval;
struct paca_struct *lpaca = get_paca();
unsigned int cpu = smp_processor_id();
unsigned long start_snooze;
unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
+ set_thread_flag(TIF_POLLING_NRFLAG);
while (1) {
/*
@@ -501,10 +503,7 @@ static void pseries_dedicated_idle(void)
*/
lpaca->lppaca.idle = 1;
- oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
- if (!oldval) {
- set_thread_flag(TIF_POLLING_NRFLAG);
-
+ if (!need_resched()) {
start_snooze = __get_tb() +
*smt_snooze_delay * tb_ticks_per_usec;
@@ -527,15 +526,14 @@ static void pseries_dedicated_idle(void)
}
HMT_medium();
- clear_thread_flag(TIF_POLLING_NRFLAG);
- } else {
- set_need_resched();
}
lpaca->lppaca.idle = 0;
ppc64_runlatch_on();
+ preempt_enable_no_resched();
schedule();
+ preempt_disable();
if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
cpu_die();
@@ -579,7 +577,9 @@ static void pseries_shared_idle(void)
lpaca->lppaca.idle = 0;
ppc64_runlatch_on();
+ preempt_enable_no_resched();
schedule();
+ preempt_disable();
if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
cpu_die();
@@ -588,11 +588,32 @@ static void pseries_shared_idle(void)
static int pSeries_pci_probe_mode(struct pci_bus *bus)
{
- if (systemcfg->platform & PLATFORM_LPAR)
+ if (platform_is_lpar())
return PCI_PROBE_DEVTREE;
return PCI_PROBE_NORMAL;
}
+#ifdef CONFIG_KEXEC
+static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+ /* Don't risk a hypervisor call if we're crashing */
+ if (!crash_shutdown) {
+ unsigned long vpa = __pa(&get_paca()->lppaca);
+
+ if (unregister_vpa(hard_smp_processor_id(), vpa)) {
+ printk("VPA deregistration of cpu %u (hw_cpu_id %d) "
+ "failed\n", smp_processor_id(),
+ hard_smp_processor_id());
+ }
+ }
+
+ if (ppc64_interrupt_controller == IC_OPEN_PIC)
+ mpic_teardown_this_cpu(secondary);
+ else
+ xics_teardown_cpu(secondary);
+}
+#endif
+
struct machdep_calls __initdata pSeries_md = {
.probe = pSeries_probe,
.setup_arch = pSeries_setup_arch,
@@ -615,4 +636,7 @@ struct machdep_calls __initdata pSeries_md = {
.check_legacy_ioport = pSeries_check_legacy_ioport,
.system_reset_exception = pSeries_system_reset_exception,
.machine_check_exception = pSeries_machine_check_exception,
+#ifdef CONFIG_KEXEC
+ .kexec_cpu_down = pseries_kexec_cpu_down,
+#endif
};
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 7a243e8ccd7..5800cde7d5a 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -46,6 +46,7 @@
#include <asm/rtas.h>
#include <asm/pSeries_reconfig.h>
#include <asm/mpic.h>
+#include <asm/vdso_datapage.h>
#include "plpar_wrappers.h"
@@ -96,7 +97,7 @@ int pSeries_cpu_disable(void)
int cpu = smp_processor_id();
cpu_clear(cpu, cpu_online_map);
- systemcfg->processorCount--;
+ vdso_data->processorCount--;
/*fix boot_cpuid here*/
if (cpu == boot_cpuid)
@@ -441,7 +442,7 @@ void __init smp_init_pSeries(void)
smp_ops->cpu_die = pSeries_cpu_die;
/* Processors can be added/removed only on LPAR */
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
+ if (platform_is_lpar())
pSeries_reconfig_notifier_register(&pSeries_smp_nb);
#endif
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index c72c86f05cb..72ac18067ec 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -545,7 +545,9 @@ nextnode:
of_node_put(np);
}
- if (systemcfg->platform == PLATFORM_PSERIES) {
+ if (platform_is_lpar())
+ ops = &pSeriesLP_ops;
+ else {
#ifdef CONFIG_SMP
for_each_cpu(i) {
int hard_id;
@@ -561,12 +563,11 @@ nextnode:
#else
xics_per_cpu[0] = ioremap(intr_base, intr_size);
#endif /* CONFIG_SMP */
- } else if (systemcfg->platform == PLATFORM_PSERIES_LPAR) {
- ops = &pSeriesLP_ops;
}
xics_8259_pic.enable = i8259_pic.enable;
xics_8259_pic.disable = i8259_pic.disable;
+ xics_8259_pic.end = i8259_pic.end;
for (i = 0; i < 16; ++i)
get_irq_desc(i)->handler = &xics_8259_pic;
for (; i < NR_IRQS; ++i)
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index 90bce6e0c19..b7ac32fdd77 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -207,6 +207,9 @@ void __init i8259_init(unsigned long intack_addr, int offset)
spin_unlock_irqrestore(&i8259_lock, flags);
+ for (i = 0; i < NUM_ISA_INTERRUPTS; ++i)
+ irq_desc[offset + i].handler = &i8259_pic;
+
/* reserve our resources */
setup_irq(offset + 2, &i8259_irqaction);
request_resource(&ioport_resource, &pic1_iores);
@@ -216,6 +219,4 @@ void __init i8259_init(unsigned long intack_addr, int offset)
if (intack_addr != 0)
pci_intack = ioremap(intack_addr, 1);
- for (i = 0; i < NUM_ISA_INTERRUPTS; ++i)
- irq_desc[offset + i].handler = &i8259_pic;
}
diff --git a/arch/powerpc/sysdev/u3_iommu.c b/arch/powerpc/sysdev/u3_iommu.c
index 607722178c1..f32baf7f469 100644
--- a/arch/powerpc/sysdev/u3_iommu.c
+++ b/arch/powerpc/sysdev/u3_iommu.c
@@ -37,7 +37,6 @@
#include <linux/vmalloc.h>
#include <asm/io.h>
#include <asm/prom.h>
-#include <asm/ppcdebug.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
@@ -227,7 +226,7 @@ static void iommu_table_u3_setup(void)
iommu_table_u3.it_busno = 0;
iommu_table_u3.it_offset = 0;
/* it_size is in number of entries */
- iommu_table_u3.it_size = dart_tablesize / sizeof(u32);
+ iommu_table_u3.it_size = (dart_tablesize / sizeof(u32)) >> DART_PAGE_FACTOR;
/* Initialize the common IOMMU code */
iommu_table_u3.it_base = (unsigned long)dart_vbase;
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index 79a784f0e7a..b20312e5ed2 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -8,4 +8,4 @@ obj-$(CONFIG_8xx) += start_8xx.o
obj-$(CONFIG_6xx) += start_32.o
obj-$(CONFIG_4xx) += start_32.o
obj-$(CONFIG_PPC64) += start_64.o
-obj-y += xmon.o ppc-dis.o ppc-opc.o subr_prf.o setjmp.o
+obj-y += xmon.o ppc-dis.o ppc-opc.o setjmp.o nonstdio.o
diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c
new file mode 100644
index 00000000000..78765833f4c
--- /dev/null
+++ b/arch/powerpc/xmon/nonstdio.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/string.h>
+#include <asm/time.h>
+#include "nonstdio.h"
+
+int xmon_putchar(int c)
+{
+ char ch = c;
+
+ if (c == '\n')
+ xmon_putchar('\r');
+ return xmon_write(&ch, 1) == 1? c: -1;
+}
+
+static char line[256];
+static char *lineptr;
+static int lineleft;
+
+int xmon_expect(const char *str, unsigned long timeout)
+{
+ int c;
+ unsigned long t0;
+
+ /* assume 25MHz default timebase if tb_ticks_per_sec not set yet */
+ timeout *= tb_ticks_per_sec? tb_ticks_per_sec: 25000000;
+ t0 = get_tbl();
+ do {
+ lineptr = line;
+ for (;;) {
+ c = xmon_read_poll();
+ if (c == -1) {
+ if (get_tbl() - t0 > timeout)
+ return 0;
+ continue;
+ }
+ if (c == '\n')
+ break;
+ if (c != '\r' && lineptr < &line[sizeof(line) - 1])
+ *lineptr++ = c;
+ }
+ *lineptr = 0;
+ } while (strstr(line, str) == NULL);
+ return 1;
+}
+
+int xmon_getchar(void)
+{
+ int c;
+
+ if (lineleft == 0) {
+ lineptr = line;
+ for (;;) {
+ c = xmon_readchar();
+ if (c == -1 || c == 4)
+ break;
+ if (c == '\r' || c == '\n') {
+ *lineptr++ = '\n';
+ xmon_putchar('\n');
+ break;
+ }
+ switch (c) {
+ case 0177:
+ case '\b':
+ if (lineptr > line) {
+ xmon_putchar('\b');
+ xmon_putchar(' ');
+ xmon_putchar('\b');
+ --lineptr;
+ }
+ break;
+ case 'U' & 0x1F:
+ while (lineptr > line) {
+ xmon_putchar('\b');
+ xmon_putchar(' ');
+ xmon_putchar('\b');
+ --lineptr;
+ }
+ break;
+ default:
+ if (lineptr >= &line[sizeof(line) - 1])
+ xmon_putchar('\a');
+ else {
+ xmon_putchar(c);
+ *lineptr++ = c;
+ }
+ }
+ }
+ lineleft = lineptr - line;
+ lineptr = line;
+ }
+ if (lineleft == 0)
+ return -1;
+ --lineleft;
+ return *lineptr++;
+}
+
+char *xmon_gets(char *str, int nb)
+{
+ char *p;
+ int c;
+
+ for (p = str; p < str + nb - 1; ) {
+ c = xmon_getchar();
+ if (c == -1) {
+ if (p == str)
+ return NULL;
+ break;
+ }
+ *p++ = c;
+ if (c == '\n')
+ break;
+ }
+ *p = 0;
+ return str;
+}
+
+void xmon_printf(const char *format, ...)
+{
+ va_list args;
+ int n;
+ static char xmon_outbuf[1024];
+
+ va_start(args, format);
+ n = vsnprintf(xmon_outbuf, sizeof(xmon_outbuf), format, args);
+ va_end(args);
+ xmon_write(xmon_outbuf, n);
+}
diff --git a/arch/powerpc/xmon/nonstdio.h b/arch/powerpc/xmon/nonstdio.h
index 84211a21c6f..47cebbd2b1b 100644
--- a/arch/powerpc/xmon/nonstdio.h
+++ b/arch/powerpc/xmon/nonstdio.h
@@ -1,22 +1,14 @@
-typedef int FILE;
-extern FILE *xmon_stdin, *xmon_stdout;
#define EOF (-1)
-#define stdin xmon_stdin
-#define stdout xmon_stdout
+
#define printf xmon_printf
-#define fprintf xmon_fprintf
-#define fputs xmon_fputs
-#define fgets xmon_fgets
#define putchar xmon_putchar
-#define getchar xmon_getchar
-#define putc xmon_putc
-#define getc xmon_getc
-#define fopen(n, m) NULL
-#define fflush(f) do {} while (0)
-#define fclose(f) do {} while (0)
-extern char *fgets(char *, int, void *);
-extern void xmon_printf(const char *, ...);
-extern void xmon_fprintf(void *, const char *, ...);
-extern void xmon_sprintf(char *, const char *, ...);
-#define perror(s) printf("%s: no files!\n", (s))
+extern int xmon_putchar(int c);
+extern int xmon_getchar(void);
+extern char *xmon_gets(char *, int);
+extern void xmon_printf(const char *, ...);
+extern void xmon_map_scc(void);
+extern int xmon_expect(const char *str, unsigned long timeout);
+extern int xmon_write(void *ptr, int nb);
+extern int xmon_readchar(void);
+extern int xmon_read_poll(void);
diff --git a/arch/powerpc/xmon/setjmp.S b/arch/powerpc/xmon/setjmp.S
index f8e40dfd2bf..96a91f10e2e 100644
--- a/arch/powerpc/xmon/setjmp.S
+++ b/arch/powerpc/xmon/setjmp.S
@@ -14,61 +14,61 @@
_GLOBAL(xmon_setjmp)
mflr r0
- STL r0,0(r3)
- STL r1,SZL(r3)
- STL r2,2*SZL(r3)
+ PPC_STL r0,0(r3)
+ PPC_STL r1,SZL(r3)
+ PPC_STL r2,2*SZL(r3)
mfcr r0
- STL r0,3*SZL(r3)
- STL r13,4*SZL(r3)
- STL r14,5*SZL(r3)
- STL r15,6*SZL(r3)
- STL r16,7*SZL(r3)
- STL r17,8*SZL(r3)
- STL r18,9*SZL(r3)
- STL r19,10*SZL(r3)
- STL r20,11*SZL(r3)
- STL r21,12*SZL(r3)
- STL r22,13*SZL(r3)
- STL r23,14*SZL(r3)
- STL r24,15*SZL(r3)
- STL r25,16*SZL(r3)
- STL r26,17*SZL(r3)
- STL r27,18*SZL(r3)
- STL r28,19*SZL(r3)
- STL r29,20*SZL(r3)
- STL r30,21*SZL(r3)
- STL r31,22*SZL(r3)
+ PPC_STL r0,3*SZL(r3)
+ PPC_STL r13,4*SZL(r3)
+ PPC_STL r14,5*SZL(r3)
+ PPC_STL r15,6*SZL(r3)
+ PPC_STL r16,7*SZL(r3)
+ PPC_STL r17,8*SZL(r3)
+ PPC_STL r18,9*SZL(r3)
+ PPC_STL r19,10*SZL(r3)
+ PPC_STL r20,11*SZL(r3)
+ PPC_STL r21,12*SZL(r3)
+ PPC_STL r22,13*SZL(r3)
+ PPC_STL r23,14*SZL(r3)
+ PPC_STL r24,15*SZL(r3)
+ PPC_STL r25,16*SZL(r3)
+ PPC_STL r26,17*SZL(r3)
+ PPC_STL r27,18*SZL(r3)
+ PPC_STL r28,19*SZL(r3)
+ PPC_STL r29,20*SZL(r3)
+ PPC_STL r30,21*SZL(r3)
+ PPC_STL r31,22*SZL(r3)
li r3,0
blr
_GLOBAL(xmon_longjmp)
- CMPI r4,0
+ PPC_LCMPI r4,0
bne 1f
li r4,1
-1: LDL r13,4*SZL(r3)
- LDL r14,5*SZL(r3)
- LDL r15,6*SZL(r3)
- LDL r16,7*SZL(r3)
- LDL r17,8*SZL(r3)
- LDL r18,9*SZL(r3)
- LDL r19,10*SZL(r3)
- LDL r20,11*SZL(r3)
- LDL r21,12*SZL(r3)
- LDL r22,13*SZL(r3)
- LDL r23,14*SZL(r3)
- LDL r24,15*SZL(r3)
- LDL r25,16*SZL(r3)
- LDL r26,17*SZL(r3)
- LDL r27,18*SZL(r3)
- LDL r28,19*SZL(r3)
- LDL r29,20*SZL(r3)
- LDL r30,21*SZL(r3)
- LDL r31,22*SZL(r3)
- LDL r0,3*SZL(r3)
+1: PPC_LL r13,4*SZL(r3)
+ PPC_LL r14,5*SZL(r3)
+ PPC_LL r15,6*SZL(r3)
+ PPC_LL r16,7*SZL(r3)
+ PPC_LL r17,8*SZL(r3)
+ PPC_LL r18,9*SZL(r3)
+ PPC_LL r19,10*SZL(r3)
+ PPC_LL r20,11*SZL(r3)
+ PPC_LL r21,12*SZL(r3)
+ PPC_LL r22,13*SZL(r3)
+ PPC_LL r23,14*SZL(r3)
+ PPC_LL r24,15*SZL(r3)
+ PPC_LL r25,16*SZL(r3)
+ PPC_LL r26,17*SZL(r3)
+ PPC_LL r27,18*SZL(r3)
+ PPC_LL r28,19*SZL(r3)
+ PPC_LL r29,20*SZL(r3)
+ PPC_LL r30,21*SZL(r3)
+ PPC_LL r31,22*SZL(r3)
+ PPC_LL r0,3*SZL(r3)
mtcrf 0x38,r0
- LDL r0,0(r3)
- LDL r1,SZL(r3)
- LDL r2,2*SZL(r3)
+ PPC_LL r0,0(r3)
+ PPC_LL r1,SZL(r3)
+ PPC_LL r2,2*SZL(r3)
mtlr r0
mr r3,r4
blr
@@ -84,52 +84,52 @@ _GLOBAL(xmon_longjmp)
* different ABIs, though).
*/
_GLOBAL(xmon_save_regs)
- STL r0,0*SZL(r3)
- STL r2,2*SZL(r3)
- STL r3,3*SZL(r3)
- STL r4,4*SZL(r3)
- STL r5,5*SZL(r3)
- STL r6,6*SZL(r3)
- STL r7,7*SZL(r3)
- STL r8,8*SZL(r3)
- STL r9,9*SZL(r3)
- STL r10,10*SZL(r3)
- STL r11,11*SZL(r3)
- STL r12,12*SZL(r3)
- STL r13,13*SZL(r3)
- STL r14,14*SZL(r3)
- STL r15,15*SZL(r3)
- STL r16,16*SZL(r3)
- STL r17,17*SZL(r3)
- STL r18,18*SZL(r3)
- STL r19,19*SZL(r3)
- STL r20,20*SZL(r3)
- STL r21,21*SZL(r3)
- STL r22,22*SZL(r3)
- STL r23,23*SZL(r3)
- STL r24,24*SZL(r3)
- STL r25,25*SZL(r3)
- STL r26,26*SZL(r3)
- STL r27,27*SZL(r3)
- STL r28,28*SZL(r3)
- STL r29,29*SZL(r3)
- STL r30,30*SZL(r3)
- STL r31,31*SZL(r3)
+ PPC_STL r0,0*SZL(r3)
+ PPC_STL r2,2*SZL(r3)
+ PPC_STL r3,3*SZL(r3)
+ PPC_STL r4,4*SZL(r3)
+ PPC_STL r5,5*SZL(r3)
+ PPC_STL r6,6*SZL(r3)
+ PPC_STL r7,7*SZL(r3)
+ PPC_STL r8,8*SZL(r3)
+ PPC_STL r9,9*SZL(r3)
+ PPC_STL r10,10*SZL(r3)
+ PPC_STL r11,11*SZL(r3)
+ PPC_STL r12,12*SZL(r3)
+ PPC_STL r13,13*SZL(r3)
+ PPC_STL r14,14*SZL(r3)
+ PPC_STL r15,15*SZL(r3)
+ PPC_STL r16,16*SZL(r3)
+ PPC_STL r17,17*SZL(r3)
+ PPC_STL r18,18*SZL(r3)
+ PPC_STL r19,19*SZL(r3)
+ PPC_STL r20,20*SZL(r3)
+ PPC_STL r21,21*SZL(r3)
+ PPC_STL r22,22*SZL(r3)
+ PPC_STL r23,23*SZL(r3)
+ PPC_STL r24,24*SZL(r3)
+ PPC_STL r25,25*SZL(r3)
+ PPC_STL r26,26*SZL(r3)
+ PPC_STL r27,27*SZL(r3)
+ PPC_STL r28,28*SZL(r3)
+ PPC_STL r29,29*SZL(r3)
+ PPC_STL r30,30*SZL(r3)
+ PPC_STL r31,31*SZL(r3)
/* go up one stack frame for SP */
- LDL r4,0(r1)
- STL r4,1*SZL(r3)
+ PPC_LL r4,0(r1)
+ PPC_STL r4,1*SZL(r3)
/* get caller's LR */
- LDL r0,LRSAVE(r4)
- STL r0,_NIP-STACK_FRAME_OVERHEAD(r3)
- STL r0,_LINK-STACK_FRAME_OVERHEAD(r3)
+ PPC_LL r0,LRSAVE(r4)
+ PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3)
mfmsr r0
- STL r0,_MSR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3)
mfctr r0
- STL r0,_CTR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_CTR-STACK_FRAME_OVERHEAD(r3)
mfxer r0
- STL r0,_XER-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_XER-STACK_FRAME_OVERHEAD(r3)
mfcr r0
- STL r0,_CCR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3)
li r0,0
- STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
blr
diff --git a/arch/powerpc/xmon/start_32.c b/arch/powerpc/xmon/start_32.c
index 69b658c0f76..c2464df4217 100644
--- a/arch/powerpc/xmon/start_32.c
+++ b/arch/powerpc/xmon/start_32.c
@@ -11,7 +11,6 @@
#include <linux/cuda.h>
#include <linux/kernel.h>
#include <linux/errno.h>
-#include <linux/sysrq.h>
#include <linux/bitops.h>
#include <asm/xmon.h>
#include <asm/prom.h>
@@ -22,10 +21,11 @@
#include <asm/processor.h>
#include <asm/delay.h>
#include <asm/btext.h>
+#include <asm/time.h>
+#include "nonstdio.h"
static volatile unsigned char __iomem *sccc, *sccd;
unsigned int TXRDY, RXRDY, DLAB;
-static int xmon_expect(const char *str, unsigned int timeout);
static int use_serial;
static int use_screen;
@@ -33,16 +33,6 @@ static int via_modem;
static int xmon_use_sccb;
static struct device_node *channel_node;
-#define TB_SPEED 25000000
-
-static inline unsigned int readtb(void)
-{
- unsigned int ret;
-
- asm volatile("mftb %0" : "=r" (ret) :);
- return ret;
-}
-
void buf_access(void)
{
if (DLAB)
@@ -91,23 +81,7 @@ static unsigned long chrp_find_phys_io_base(void)
}
#endif /* CONFIG_PPC_CHRP */
-#ifdef CONFIG_MAGIC_SYSRQ
-static void sysrq_handle_xmon(int key, struct pt_regs *regs,
- struct tty_struct *tty)
-{
- xmon(regs);
-}
-
-static struct sysrq_key_op sysrq_xmon_op =
-{
- .handler = sysrq_handle_xmon,
- .help_msg = "Xmon",
- .action_msg = "Entering xmon",
-};
-#endif
-
-void
-xmon_map_scc(void)
+void xmon_map_scc(void)
{
#ifdef CONFIG_PPC_MULTIPLATFORM
volatile unsigned char __iomem *base;
@@ -217,8 +191,6 @@ xmon_map_scc(void)
RXRDY = 1;
DLAB = 0x80;
#endif /* platform */
-
- register_sysrq_key('x', &sysrq_xmon_op);
}
static int scc_initialized = 0;
@@ -238,8 +210,7 @@ static inline void do_poll_adb(void)
#endif /* CONFIG_ADB_CUDA */
}
-int
-xmon_write(void *handle, void *ptr, int nb)
+int xmon_write(void *ptr, int nb)
{
char *p = ptr;
int i, c, ct;
@@ -311,8 +282,7 @@ static unsigned char xmon_shift_keytab[128] =
"\0.\0*\0+\0\0\0\0\0/\r\0-\0" /* 0x40 - 0x4f */
"\0\0000123456789\0\0\0"; /* 0x50 - 0x5f */
-static int
-xmon_get_adb_key(void)
+static int xmon_get_adb_key(void)
{
int k, t, on;
@@ -350,32 +320,21 @@ xmon_get_adb_key(void)
}
#endif /* CONFIG_BOOTX_TEXT */
-int
-xmon_read(void *handle, void *ptr, int nb)
+int xmon_readchar(void)
{
- char *p = ptr;
- int i;
-
#ifdef CONFIG_BOOTX_TEXT
- if (use_screen) {
- for (i = 0; i < nb; ++i)
- *p++ = xmon_get_adb_key();
- return i;
- }
+ if (use_screen)
+ return xmon_get_adb_key();
#endif
- if (!scc_initialized)
- xmon_init_scc();
- for (i = 0; i < nb; ++i) {
+ if (!scc_initialized)
+ xmon_init_scc();
while ((*sccc & RXRDY) == 0)
- do_poll_adb();
+ do_poll_adb();
buf_access();
- *p++ = *sccd;
- }
- return i;
+ return *sccd;
}
-int
-xmon_read_poll(void)
+int xmon_read_poll(void)
{
if ((*sccc & RXRDY) == 0) {
do_poll_adb();
@@ -395,8 +354,7 @@ static unsigned char scc_inittab[] = {
3, 0xc1, /* rx enable, 8 bits */
};
-void
-xmon_init_scc(void)
+void xmon_init_scc(void)
{
if ( _machine == _MACH_chrp )
{
@@ -410,6 +368,7 @@ xmon_init_scc(void)
else if ( _machine == _MACH_Pmac )
{
int i, x;
+ unsigned long timeout;
if (channel_node != 0)
pmac_call_feature(
@@ -424,8 +383,12 @@ xmon_init_scc(void)
PMAC_FTR_MODEM_ENABLE,
channel_node, 0, 1);
printk(KERN_INFO "Modem powered up by debugger !\n");
- t0 = readtb();
- while (readtb() - t0 < 3*TB_SPEED)
+ t0 = get_tbl();
+ timeout = 3 * tb_ticks_per_sec;
+ if (timeout == 0)
+ /* assume 25MHz if tb_ticks_per_sec not set */
+ timeout = 75000000;
+ while (get_tbl() - t0 < timeout)
eieio();
}
/* use the B channel if requested */
@@ -447,164 +410,19 @@ xmon_init_scc(void)
scc_initialized = 1;
if (via_modem) {
for (;;) {
- xmon_write(NULL, "ATE1V1\r", 7);
+ xmon_write("ATE1V1\r", 7);
if (xmon_expect("OK", 5)) {
- xmon_write(NULL, "ATA\r", 4);
+ xmon_write("ATA\r", 4);
if (xmon_expect("CONNECT", 40))
break;
}
- xmon_write(NULL, "+++", 3);
+ xmon_write("+++", 3);
xmon_expect("OK", 3);
}
}
}
-void *xmon_stdin;
-void *xmon_stdout;
-void *xmon_stderr;
-
-int xmon_putc(int c, void *f)
-{
- char ch = c;
-
- if (c == '\n')
- xmon_putc('\r', f);
- return xmon_write(f, &ch, 1) == 1? c: -1;
-}
-
-int xmon_putchar(int c)
-{
- return xmon_putc(c, xmon_stdout);
-}
-
-int xmon_fputs(char *str, void *f)
-{
- int n = strlen(str);
-
- return xmon_write(f, str, n) == n? 0: -1;
-}
-
-int
-xmon_readchar(void)
-{
- char ch;
-
- for (;;) {
- switch (xmon_read(xmon_stdin, &ch, 1)) {
- case 1:
- return ch;
- case -1:
- xmon_printf("read(stdin) returned -1\r\n", 0, 0);
- return -1;
- }
- }
-}
-
-static char line[256];
-static char *lineptr;
-static int lineleft;
-
-int xmon_expect(const char *str, unsigned int timeout)
-{
- int c;
- unsigned int t0;
-
- timeout *= TB_SPEED;
- t0 = readtb();
- do {
- lineptr = line;
- for (;;) {
- c = xmon_read_poll();
- if (c == -1) {
- if (readtb() - t0 > timeout)
- return 0;
- continue;
- }
- if (c == '\n')
- break;
- if (c != '\r' && lineptr < &line[sizeof(line) - 1])
- *lineptr++ = c;
- }
- *lineptr = 0;
- } while (strstr(line, str) == NULL);
- return 1;
-}
-
-int
-xmon_getchar(void)
-{
- int c;
-
- if (lineleft == 0) {
- lineptr = line;
- for (;;) {
- c = xmon_readchar();
- if (c == -1 || c == 4)
- break;
- if (c == '\r' || c == '\n') {
- *lineptr++ = '\n';
- xmon_putchar('\n');
- break;
- }
- switch (c) {
- case 0177:
- case '\b':
- if (lineptr > line) {
- xmon_putchar('\b');
- xmon_putchar(' ');
- xmon_putchar('\b');
- --lineptr;
- }
- break;
- case 'U' & 0x1F:
- while (lineptr > line) {
- xmon_putchar('\b');
- xmon_putchar(' ');
- xmon_putchar('\b');
- --lineptr;
- }
- break;
- default:
- if (lineptr >= &line[sizeof(line) - 1])
- xmon_putchar('\a');
- else {
- xmon_putchar(c);
- *lineptr++ = c;
- }
- }
- }
- lineleft = lineptr - line;
- lineptr = line;
- }
- if (lineleft == 0)
- return -1;
- --lineleft;
- return *lineptr++;
-}
-
-char *
-xmon_fgets(char *str, int nb, void *f)
-{
- char *p;
- int c;
-
- for (p = str; p < str + nb - 1; ) {
- c = xmon_getchar();
- if (c == -1) {
- if (p == str)
- return NULL;
- break;
- }
- *p++ = c;
- if (c == '\n')
- break;
- }
- *p = 0;
- return str;
-}
-
-void
-xmon_enter(void)
+void xmon_enter(void)
{
#ifdef CONFIG_ADB_PMU
if (_machine == _MACH_Pmac) {
@@ -613,8 +431,7 @@ xmon_enter(void)
#endif
}
-void
-xmon_leave(void)
+void xmon_leave(void)
{
#ifdef CONFIG_ADB_PMU
if (_machine == _MACH_Pmac) {
diff --git a/arch/powerpc/xmon/start_64.c b/arch/powerpc/xmon/start_64.c
index e50c158191e..712552c4f24 100644
--- a/arch/powerpc/xmon/start_64.c
+++ b/arch/powerpc/xmon/start_64.c
@@ -6,182 +6,29 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/config.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/sysrq.h>
-#include <linux/init.h>
#include <asm/machdep.h>
-#include <asm/io.h>
-#include <asm/page.h>
-#include <asm/prom.h>
-#include <asm/processor.h>
#include <asm/udbg.h>
-#include <asm/system.h>
#include "nonstdio.h"
-#ifdef CONFIG_MAGIC_SYSRQ
-
-static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs,
- struct tty_struct *tty)
-{
- /* ensure xmon is enabled */
- xmon_init(1);
- debugger(pt_regs);
-}
-
-static struct sysrq_key_op sysrq_xmon_op =
+void xmon_map_scc(void)
{
- .handler = sysrq_handle_xmon,
- .help_msg = "Xmon",
- .action_msg = "Entering xmon",
-};
-
-static int __init setup_xmon_sysrq(void)
-{
- register_sysrq_key('x', &sysrq_xmon_op);
- return 0;
}
-__initcall(setup_xmon_sysrq);
-#endif /* CONFIG_MAGIC_SYSRQ */
-int
-xmon_write(void *handle, void *ptr, int nb)
+int xmon_write(void *ptr, int nb)
{
return udbg_write(ptr, nb);
}
-int
-xmon_read(void *handle, void *ptr, int nb)
+int xmon_readchar(void)
{
- return udbg_read(ptr, nb);
+ if (udbg_getc)
+ return udbg_getc();
+ return -1;
}
-int
-xmon_read_poll(void)
+int xmon_read_poll(void)
{
if (udbg_getc_poll)
return udbg_getc_poll();
return -1;
}
-
-FILE *xmon_stdin;
-FILE *xmon_stdout;
-
-int
-xmon_putc(int c, void *f)
-{
- char ch = c;
-
- if (c == '\n')
- xmon_putc('\r', f);
- return xmon_write(f, &ch, 1) == 1? c: -1;
-}
-
-int
-xmon_putchar(int c)
-{
- return xmon_putc(c, xmon_stdout);
-}
-
-int
-xmon_fputs(char *str, void *f)
-{
- int n = strlen(str);
-
- return xmon_write(f, str, n) == n? 0: -1;
-}
-
-int
-xmon_readchar(void)
-{
- char ch;
-
- for (;;) {
- switch (xmon_read(xmon_stdin, &ch, 1)) {
- case 1:
- return ch;
- case -1:
- xmon_printf("read(stdin) returned -1\r\n", 0, 0);
- return -1;
- }
- }
-}
-
-static char line[256];
-static char *lineptr;
-static int lineleft;
-
-int
-xmon_getchar(void)
-{
- int c;
-
- if (lineleft == 0) {
- lineptr = line;
- for (;;) {
- c = xmon_readchar();
- if (c == -1 || c == 4)
- break;
- if (c == '\r' || c == '\n') {
- *lineptr++ = '\n';
- xmon_putchar('\n');
- break;
- }
- switch (c) {
- case 0177:
- case '\b':
- if (lineptr > line) {
- xmon_putchar('\b');
- xmon_putchar(' ');
- xmon_putchar('\b');
- --lineptr;
- }
- break;
- case 'U' & 0x1F:
- while (lineptr > line) {
- xmon_putchar('\b');
- xmon_putchar(' ');
- xmon_putchar('\b');
- --lineptr;
- }
- break;
- default:
- if (lineptr >= &line[sizeof(line) - 1])
- xmon_putchar('\a');
- else {
- xmon_putchar(c);
- *lineptr++ = c;
- }
- }
- }
- lineleft = lineptr - line;
- lineptr = line;
- }
- if (lineleft == 0)
- return -1;
- --lineleft;
- return *lineptr++;
-}
-
-char *
-xmon_fgets(char *str, int nb, void *f)
-{
- char *p;
- int c;
-
- for (p = str; p < str + nb - 1; ) {
- c = xmon_getchar();
- if (c == -1) {
- if (p == str)
- return NULL;
- break;
- }
- *p++ = c;
- if (c == '\n')
- break;
- }
- *p = 0;
- return str;
-}
diff --git a/arch/powerpc/xmon/start_8xx.c b/arch/powerpc/xmon/start_8xx.c
index a48bd594cf6..4c17b0486ad 100644
--- a/arch/powerpc/xmon/start_8xx.c
+++ b/arch/powerpc/xmon/start_8xx.c
@@ -15,273 +15,30 @@
#include <asm/8xx_immap.h>
#include <asm/mpc8xx.h>
#include <asm/commproc.h>
+#include "nonstdio.h"
-extern void xmon_printf(const char *fmt, ...);
extern int xmon_8xx_write(char *str, int nb);
extern int xmon_8xx_read_poll(void);
extern int xmon_8xx_read_char(void);
-void prom_drawhex(uint);
-void prom_drawstring(const char *str);
-static int use_screen = 1; /* default */
-
-#define TB_SPEED 25000000
-
-static inline unsigned int readtb(void)
-{
- unsigned int ret;
-
- asm volatile("mftb %0" : "=r" (ret) :);
- return ret;
-}
-
-void buf_access(void)
-{
-}
-
-void
-xmon_map_scc(void)
+void xmon_map_scc(void)
{
-
cpmp = (cpm8xx_t *)&(((immap_t *)IMAP_ADDR)->im_cpm);
- use_screen = 0;
-
- prom_drawstring("xmon uses serial port\n");
}
-static int scc_initialized = 0;
-
void xmon_init_scc(void);
-int
-xmon_write(void *handle, void *ptr, int nb)
+int xmon_write(void *ptr, int nb)
{
- char *p = ptr;
- int i, c, ct;
-
- if (!scc_initialized)
- xmon_init_scc();
-
return(xmon_8xx_write(ptr, nb));
}
-int xmon_wants_key;
-
-int
-xmon_read(void *handle, void *ptr, int nb)
+int xmon_readchar(void)
{
- char *p = ptr;
- int i;
-
- if (!scc_initialized)
- xmon_init_scc();
-
- for (i = 0; i < nb; ++i) {
- *p++ = xmon_8xx_read_char();
- }
- return i;
+ return xmon_8xx_read_char();
}
-int
-xmon_read_poll(void)
+int xmon_read_poll(void)
{
return(xmon_8xx_read_poll());
}
-
-void
-xmon_init_scc()
-{
- scc_initialized = 1;
-}
-
-#if 0
-extern int (*prom_entry)(void *);
-
-int
-xmon_exit(void)
-{
- struct prom_args {
- char *service;
- } args;
-
- for (;;) {
- args.service = "exit";
- (*prom_entry)(&args);
- }
-}
-#endif
-
-void *xmon_stdin;
-void *xmon_stdout;
-void *xmon_stderr;
-
-void
-xmon_init(void)
-{
-}
-
-int
-xmon_putc(int c, void *f)
-{
- char ch = c;
-
- if (c == '\n')
- xmon_putc('\r', f);
- return xmon_write(f, &ch, 1) == 1? c: -1;
-}
-
-int
-xmon_putchar(int c)
-{
- return xmon_putc(c, xmon_stdout);
-}
-
-int
-xmon_fputs(char *str, void *f)
-{
- int n = strlen(str);
-
- return xmon_write(f, str, n) == n? 0: -1;
-}
-
-int
-xmon_readchar(void)
-{
- char ch;
-
- for (;;) {
- switch (xmon_read(xmon_stdin, &ch, 1)) {
- case 1:
- return ch;
- case -1:
- xmon_printf("read(stdin) returned -1\r\n", 0, 0);
- return -1;
- }
- }
-}
-
-static char line[256];
-static char *lineptr;
-static int lineleft;
-
-#if 0
-int xmon_expect(const char *str, unsigned int timeout)
-{
- int c;
- unsigned int t0;
-
- timeout *= TB_SPEED;
- t0 = readtb();
- do {
- lineptr = line;
- for (;;) {
- c = xmon_read_poll();
- if (c == -1) {
- if (readtb() - t0 > timeout)
- return 0;
- continue;
- }
- if (c == '\n')
- break;
- if (c != '\r' && lineptr < &line[sizeof(line) - 1])
- *lineptr++ = c;
- }
- *lineptr = 0;
- } while (strstr(line, str) == NULL);
- return 1;
-}
-#endif
-
-int
-xmon_getchar(void)
-{
- int c;
-
- if (lineleft == 0) {
- lineptr = line;
- for (;;) {
- c = xmon_readchar();
- if (c == -1 || c == 4)
- break;
- if (c == '\r' || c == '\n') {
- *lineptr++ = '\n';
- xmon_putchar('\n');
- break;
- }
- switch (c) {
- case 0177:
- case '\b':
- if (lineptr > line) {
- xmon_putchar('\b');
- xmon_putchar(' ');
- xmon_putchar('\b');
- --lineptr;
- }
- break;
- case 'U' & 0x1F:
- while (lineptr > line) {
- xmon_putchar('\b');
- xmon_putchar(' ');
- xmon_putchar('\b');
- --lineptr;
- }
- break;
- default:
- if (lineptr >= &line[sizeof(line) - 1])
- xmon_putchar('\a');
- else {
- xmon_putchar(c);
- *lineptr++ = c;
- }
- }
- }
- lineleft = lineptr - line;
- lineptr = line;
- }
- if (lineleft == 0)
- return -1;
- --lineleft;
- return *lineptr++;
-}
-
-char *
-xmon_fgets(char *str, int nb, void *f)
-{
- char *p;
- int c;
-
- for (p = str; p < str + nb - 1; ) {
- c = xmon_getchar();
- if (c == -1) {
- if (p == str)
- return 0;
- break;
- }
- *p++ = c;
- if (c == '\n')
- break;
- }
- *p = 0;
- return str;
-}
-
-void
-prom_drawhex(uint val)
-{
- unsigned char buf[10];
-
- int i;
- for (i = 7; i >= 0; i--)
- {
- buf[i] = "0123456789abcdef"[val & 0x0f];
- val >>= 4;
- }
- buf[8] = '\0';
- xmon_fputs(buf, xmon_stdout);
-}
-
-void
-prom_drawstring(const char *str)
-{
- xmon_fputs(str, xmon_stdout);
-}
diff --git a/arch/powerpc/xmon/subr_prf.c b/arch/powerpc/xmon/subr_prf.c
deleted file mode 100644
index b48738c6dd3..00000000000
--- a/arch/powerpc/xmon/subr_prf.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Written by Cort Dougan to replace the version originally used
- * by Paul Mackerras, which came from NetBSD and thus had copyright
- * conflicts with Linux.
- *
- * This file makes liberal use of the standard linux utility
- * routines to reduce the size of the binary. We assume we can
- * trust some parts of Linux inside the debugger.
- * -- Cort (cort@cs.nmt.edu)
- *
- * Copyright (C) 1999 Cort Dougan.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <stdarg.h>
-#include "nonstdio.h"
-
-extern int xmon_write(void *, void *, int);
-
-void xmon_vfprintf(void *f, const char *fmt, va_list ap)
-{
- static char xmon_buf[2048];
- int n;
-
- n = vsprintf(xmon_buf, fmt, ap);
- xmon_write(f, xmon_buf, n);
-}
-
-void xmon_printf(const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- xmon_vfprintf(stdout, fmt, ap);
- va_end(ap);
-}
-EXPORT_SYMBOL(xmon_printf);
-
-void xmon_fprintf(void *f, const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- xmon_vfprintf(f, fmt, ap);
- va_end(ap);
-}
-
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 1124f114620..c45a6ad5f3b 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1,7 +1,7 @@
/*
* Routines providing a simple monitor for use on the PowerMac.
*
- * Copyright (C) 1996 Paul Mackerras.
+ * Copyright (C) 1996-2005 Paul Mackerras.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -18,6 +18,8 @@
#include <linux/kallsyms.h>
#include <linux/cpumask.h>
#include <linux/module.h>
+#include <linux/sysrq.h>
+#include <linux/interrupt.h>
#include <asm/ptrace.h>
#include <asm/string.h>
@@ -144,15 +146,10 @@ static void xmon_print_symbol(unsigned long address, const char *mid,
static const char *getvecname(unsigned long vec);
extern int print_insn_powerpc(unsigned long, unsigned long, int);
-extern void printf(const char *fmt, ...);
-extern void xmon_vfprintf(void *f, const char *fmt, va_list ap);
-extern int xmon_putc(int c, void *f);
-extern int putchar(int ch);
extern void xmon_enter(void);
extern void xmon_leave(void);
-extern int xmon_read_poll(void);
extern long setjmp(long *);
extern void longjmp(long *, long);
extern void xmon_save_regs(struct pt_regs *);
@@ -748,7 +745,6 @@ cmds(struct pt_regs *excp)
printf("%x:", smp_processor_id());
#endif /* CONFIG_SMP */
printf("mon> ");
- fflush(stdout);
flush_input();
termch = 0;
cmd = skipbl();
@@ -1472,17 +1468,23 @@ read_spr(int n)
{
unsigned int instrs[2];
unsigned long (*code)(void);
- unsigned long opd[3];
unsigned long ret = -1UL;
+#ifdef CONFIG_PPC64
+ unsigned long opd[3];
- instrs[0] = 0x7c6002a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6);
- instrs[1] = 0x4e800020;
opd[0] = (unsigned long)instrs;
opd[1] = 0;
opd[2] = 0;
+ code = (unsigned long (*)(void)) opd;
+#else
+ code = (unsigned long (*)(void)) instrs;
+#endif
+
+ /* mfspr r3,n; blr */
+ instrs[0] = 0x7c6002a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6);
+ instrs[1] = 0x4e800020;
store_inst(instrs);
store_inst(instrs+1);
- code = (unsigned long (*)(void)) opd;
if (setjmp(bus_error_jmp) == 0) {
catch_memory_errors = 1;
@@ -1504,16 +1506,21 @@ write_spr(int n, unsigned long val)
{
unsigned int instrs[2];
unsigned long (*code)(unsigned long);
+#ifdef CONFIG_PPC64
unsigned long opd[3];
- instrs[0] = 0x7c6003a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6);
- instrs[1] = 0x4e800020;
opd[0] = (unsigned long)instrs;
opd[1] = 0;
opd[2] = 0;
+ code = (unsigned long (*)(unsigned long)) opd;
+#else
+ code = (unsigned long (*)(unsigned long)) instrs;
+#endif
+
+ instrs[0] = 0x7c6003a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6);
+ instrs[1] = 0x4e800020;
store_inst(instrs);
store_inst(instrs+1);
- code = (unsigned long (*)(unsigned long)) opd;
if (setjmp(bus_error_jmp) == 0) {
catch_memory_errors = 1;
@@ -1797,7 +1804,7 @@ memex(void)
for(;;){
if (!mnoread)
n = mread(adrs, val, size);
- printf("%.16x%c", adrs, brev? 'r': ' ');
+ printf(REG"%c", adrs, brev? 'r': ' ');
if (!mnoread) {
if (brev)
byterev(val, size);
@@ -1976,17 +1983,18 @@ prdump(unsigned long adrs, long ndump)
nr = mread(adrs, temp, r);
adrs += nr;
for (m = 0; m < r; ++m) {
- if ((m & 7) == 0 && m > 0)
- putchar(' ');
+ if ((m & (sizeof(long) - 1)) == 0 && m > 0)
+ putchar(' ');
if (m < nr)
printf("%.2x", temp[m]);
else
printf("%s", fault_chars[fault_type]);
}
- if (m <= 8)
- printf(" ");
- for (; m < 16; ++m)
+ for (; m < 16; ++m) {
+ if ((m & (sizeof(long) - 1)) == 0)
+ putchar(' ');
printf(" ");
+ }
printf(" |");
for (m = 0; m < r; ++m) {
if (m < nr) {
@@ -2151,7 +2159,6 @@ memzcan(void)
ok = mread(a, &v, 1);
if (ok && !ook) {
printf("%.8x .. ", a);
- fflush(stdout);
} else if (!ok && ook)
printf("%.8x\n", a - mskip);
ook = ok;
@@ -2372,7 +2379,7 @@ int
inchar(void)
{
if (lineptr == NULL || *lineptr == 0) {
- if (fgets(line, sizeof(line), stdin) == NULL) {
+ if (xmon_gets(line, sizeof(line)) == NULL) {
lineptr = NULL;
return EOF;
}
@@ -2526,4 +2533,29 @@ void xmon_init(int enable)
__debugger_dabr_match = NULL;
__debugger_fault_handler = NULL;
}
+ xmon_map_scc();
+}
+
+#ifdef CONFIG_MAGIC_SYSRQ
+static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs,
+ struct tty_struct *tty)
+{
+ /* ensure xmon is enabled */
+ xmon_init(1);
+ debugger(pt_regs);
+}
+
+static struct sysrq_key_op sysrq_xmon_op =
+{
+ .handler = sysrq_handle_xmon,
+ .help_msg = "Xmon",
+ .action_msg = "Entering xmon",
+};
+
+static int __init setup_xmon_sysrq(void)
+{
+ register_sysrq_key('x', &sysrq_xmon_op);
+ return 0;
}
+__initcall(setup_xmon_sysrq);
+#endif /* CONFIG_MAGIC_SYSRQ */