aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile27
-rw-r--r--arch/powerpc/kernel/align.c530
-rw-r--r--arch/powerpc/kernel/asm-offsets.c6
-rw-r--r--arch/powerpc/kernel/dma_64.c151
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S2
-rw-r--r--arch/powerpc/kernel/idle_64.c121
-rw-r--r--arch/powerpc/kernel/ioctl32.c4
-rw-r--r--arch/powerpc/kernel/iomap.c146
-rw-r--r--arch/powerpc/kernel/iommu.c572
-rw-r--r--arch/powerpc/kernel/irq.c9
-rw-r--r--arch/powerpc/kernel/kprobes.c459
-rw-r--r--arch/powerpc/kernel/lparcfg.c51
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c358
-rw-r--r--arch/powerpc/kernel/misc_32.S8
-rw-r--r--arch/powerpc/kernel/module_64.c455
-rw-r--r--arch/powerpc/kernel/nvram_64.c742
-rw-r--r--arch/powerpc/kernel/pci_64.c1381
-rw-r--r--arch/powerpc/kernel/pci_direct_iommu.c94
-rw-r--r--arch/powerpc/kernel/pci_dn.c230
-rw-r--r--arch/powerpc/kernel/pci_iommu.c128
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c7
-rw-r--r--arch/powerpc/kernel/prom.c2
-rw-r--r--arch/powerpc/kernel/rtas-rtc.c105
-rw-r--r--arch/powerpc/kernel/rtas_pci.c68
-rw-r--r--arch/powerpc/kernel/setup-common.c1
-rw-r--r--arch/powerpc/kernel/setup_32.c4
-rw-r--r--arch/powerpc/kernel/setup_64.c5
-rw-r--r--arch/powerpc/kernel/signal_32.c7
-rw-r--r--arch/powerpc/kernel/signal_64.c6
-rw-r--r--arch/powerpc/kernel/smp.c7
-rw-r--r--arch/powerpc/kernel/time.c28
-rw-r--r--arch/powerpc/kernel/vdso32/cacheflush.S2
-rw-r--r--arch/powerpc/kernel/vdso32/datapage.S6
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S16
-rw-r--r--arch/powerpc/kernel/vdso64/cacheflush.S2
-rw-r--r--arch/powerpc/kernel/vdso64/datapage.S4
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S35
37 files changed, 5632 insertions, 147 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 9a74b7ab03a..9ed551b6c17 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -12,12 +12,12 @@ CFLAGS_btext.o += -fPIC
endif
obj-y := semaphore.o cputable.o ptrace.o syscalls.o \
- irq.o signal_32.o pmc.o vdso.o
+ irq.o align.o signal_32.o pmc.o vdso.o
obj-y += vdso32/
obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \
signal_64.o ptrace32.o systbl.o \
paca.o ioctl32.o cpu_setup_power4.o \
- firmware.o sysfs.o udbg.o
+ firmware.o sysfs.o udbg.o idle_64.o
obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o
obj-$(CONFIG_POWER4) += idle_power4.o
@@ -25,7 +25,7 @@ obj-$(CONFIG_PPC_OF) += of_device.o
procfs-$(CONFIG_PPC64) := proc_ppc64.o
obj-$(CONFIG_PROC_FS) += $(procfs-y)
rtaspci-$(CONFIG_PPC64) := rtas_pci.o
-obj-$(CONFIG_PPC_RTAS) += rtas.o $(rtaspci-y)
+obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y)
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
obj-$(CONFIG_LPARCFG) += lparcfg.o
@@ -35,6 +35,7 @@ obj-$(CONFIG_PPC_PSERIES) += udbg_16550.o
obj-$(CONFIG_PPC_MAPLE) += udbg_16550.o
udbgscc-$(CONFIG_PPC64) := udbg_scc.o
obj-$(CONFIG_PPC_PMAC) += $(udbgscc-y)
+obj64-$(CONFIG_PPC_MULTIPLATFORM) += nvram_64.o
ifeq ($(CONFIG_PPC_MERGE),y)
@@ -49,12 +50,23 @@ extra-y += vmlinux.lds
obj-y += process.o init_task.o time.o \
prom.o traps.o setup-common.o
obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o systbl.o
-obj-$(CONFIG_PPC64) += misc_64.o
+obj-$(CONFIG_PPC64) += misc_64.o dma_64.o iommu.o
obj-$(CONFIG_PPC_OF) += prom_init.o
obj-$(CONFIG_MODULES) += ppc_ksyms.o
obj-$(CONFIG_BOOTX_TEXT) += btext.o
obj-$(CONFIG_6xx) += idle_6xx.o
obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_KPROBES) += kprobes.o
+
+module-$(CONFIG_PPC64) += module_64.o
+obj-$(CONFIG_MODULES) += $(module-y)
+
+pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o pci_iommu.o \
+ pci_direct_iommu.o iomap.o
+obj-$(CONFIG_PCI) += $(pci64-y)
+
+kexec64-$(CONFIG_PPC64) += machine_kexec_64.o
+obj-$(CONFIG_KEXEC) += $(kexec64-y)
ifeq ($(CONFIG_PPC_ISERIES),y)
$(obj)/head_64.o: $(obj)/lparmap.s
@@ -62,13 +74,12 @@ AFLAGS_head_64.o += -I$(obj)
endif
else
-# stuff used from here for ARCH=ppc or ARCH=ppc64
+# stuff used from here for ARCH=ppc
smpobj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_PPC64) += traps.o process.o init_task.o time.o \
- setup-common.o $(smpobj-y)
-
endif
+obj-$(CONFIG_PPC64) += $(obj64-y)
+
extra-$(CONFIG_PPC_FPU) += fpu.o
extra-$(CONFIG_PPC64) += entry_64.o
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
new file mode 100644
index 00000000000..faaec9c6f78
--- /dev/null
+++ b/arch/powerpc/kernel/align.c
@@ -0,0 +1,530 @@
+/* align.c - handle alignment exceptions for the Power PC.
+ *
+ * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ * Copyright (c) 1998-1999 TiVo, Inc.
+ * PowerPC 403GCX modifications.
+ * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ * PowerPC 403GCX/405GP modifications.
+ * Copyright (c) 2001-2002 PPC64 team, IBM Corp
+ * 64-bit and Power4 support
+ * Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp
+ * <benh@kernel.crashing.org>
+ * Merge ppc32 and ppc64 implementations
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/cache.h>
+#include <asm/cputable.h>
+
+struct aligninfo {
+ unsigned char len;
+ unsigned char flags;
+};
+
+#define IS_XFORM(inst) (((inst) >> 26) == 31)
+#define IS_DSFORM(inst) (((inst) >> 26) >= 56)
+
+#define INVALID { 0, 0 }
+
+#define LD 1 /* load */
+#define ST 2 /* store */
+#define SE 4 /* sign-extend value */
+#define F 8 /* to/from fp regs */
+#define U 0x10 /* update index register */
+#define M 0x20 /* multiple load/store */
+#define SW 0x40 /* byte swap int or ... */
+#define S 0x40 /* ... single-precision fp */
+#define SX 0x40 /* byte count in XER */
+#define HARD 0x80 /* string, stwcx. */
+
+#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */
+
+#define SWAP(a, b) (t = (a), (a) = (b), (b) = t)
+
+/*
+ * The PowerPC stores certain bits of the instruction that caused the
+ * alignment exception in the DSISR register. This array maps those
+ * bits to information about the operand length and what the
+ * instruction would do.
+ */
+static struct aligninfo aligninfo[128] = {
+ { 4, LD }, /* 00 0 0000: lwz / lwarx */
+ INVALID, /* 00 0 0001 */
+ { 4, ST }, /* 00 0 0010: stw */
+ INVALID, /* 00 0 0011 */
+ { 2, LD }, /* 00 0 0100: lhz */
+ { 2, LD+SE }, /* 00 0 0101: lha */
+ { 2, ST }, /* 00 0 0110: sth */
+ { 4, LD+M }, /* 00 0 0111: lmw */
+ { 4, LD+F+S }, /* 00 0 1000: lfs */
+ { 8, LD+F }, /* 00 0 1001: lfd */
+ { 4, ST+F+S }, /* 00 0 1010: stfs */
+ { 8, ST+F }, /* 00 0 1011: stfd */
+ INVALID, /* 00 0 1100 */
+ { 8, LD }, /* 00 0 1101: ld/ldu/lwa */
+ INVALID, /* 00 0 1110 */
+ { 8, ST }, /* 00 0 1111: std/stdu */
+ { 4, LD+U }, /* 00 1 0000: lwzu */
+ INVALID, /* 00 1 0001 */
+ { 4, ST+U }, /* 00 1 0010: stwu */
+ INVALID, /* 00 1 0011 */
+ { 2, LD+U }, /* 00 1 0100: lhzu */
+ { 2, LD+SE+U }, /* 00 1 0101: lhau */
+ { 2, ST+U }, /* 00 1 0110: sthu */
+ { 4, ST+M }, /* 00 1 0111: stmw */
+ { 4, LD+F+S+U }, /* 00 1 1000: lfsu */
+ { 8, LD+F+U }, /* 00 1 1001: lfdu */
+ { 4, ST+F+S+U }, /* 00 1 1010: stfsu */
+ { 8, ST+F+U }, /* 00 1 1011: stfdu */
+ INVALID, /* 00 1 1100 */
+ INVALID, /* 00 1 1101 */
+ INVALID, /* 00 1 1110 */
+ INVALID, /* 00 1 1111 */
+ { 8, LD }, /* 01 0 0000: ldx */
+ INVALID, /* 01 0 0001 */
+ { 8, ST }, /* 01 0 0010: stdx */
+ INVALID, /* 01 0 0011 */
+ INVALID, /* 01 0 0100 */
+ { 4, LD+SE }, /* 01 0 0101: lwax */
+ INVALID, /* 01 0 0110 */
+ INVALID, /* 01 0 0111 */
+ { 4, LD+M+HARD+SX }, /* 01 0 1000: lswx */
+ { 4, LD+M+HARD }, /* 01 0 1001: lswi */
+ { 4, ST+M+HARD+SX }, /* 01 0 1010: stswx */
+ { 4, ST+M+HARD }, /* 01 0 1011: stswi */
+ INVALID, /* 01 0 1100 */
+ { 8, LD+U }, /* 01 0 1101: ldu */
+ INVALID, /* 01 0 1110 */
+ { 8, ST+U }, /* 01 0 1111: stdu */
+ { 8, LD+U }, /* 01 1 0000: ldux */
+ INVALID, /* 01 1 0001 */
+ { 8, ST+U }, /* 01 1 0010: stdux */
+ INVALID, /* 01 1 0011 */
+ INVALID, /* 01 1 0100 */
+ { 4, LD+SE+U }, /* 01 1 0101: lwaux */
+ INVALID, /* 01 1 0110 */
+ INVALID, /* 01 1 0111 */
+ INVALID, /* 01 1 1000 */
+ INVALID, /* 01 1 1001 */
+ INVALID, /* 01 1 1010 */
+ INVALID, /* 01 1 1011 */
+ INVALID, /* 01 1 1100 */
+ INVALID, /* 01 1 1101 */
+ INVALID, /* 01 1 1110 */
+ INVALID, /* 01 1 1111 */
+ INVALID, /* 10 0 0000 */
+ INVALID, /* 10 0 0001 */
+ INVALID, /* 10 0 0010: stwcx. */
+ INVALID, /* 10 0 0011 */
+ INVALID, /* 10 0 0100 */
+ INVALID, /* 10 0 0101 */
+ INVALID, /* 10 0 0110 */
+ INVALID, /* 10 0 0111 */
+ { 4, LD+SW }, /* 10 0 1000: lwbrx */
+ INVALID, /* 10 0 1001 */
+ { 4, ST+SW }, /* 10 0 1010: stwbrx */
+ INVALID, /* 10 0 1011 */
+ { 2, LD+SW }, /* 10 0 1100: lhbrx */
+ { 4, LD+SE }, /* 10 0 1101 lwa */
+ { 2, ST+SW }, /* 10 0 1110: sthbrx */
+ INVALID, /* 10 0 1111 */
+ INVALID, /* 10 1 0000 */
+ INVALID, /* 10 1 0001 */
+ INVALID, /* 10 1 0010 */
+ INVALID, /* 10 1 0011 */
+ INVALID, /* 10 1 0100 */
+ INVALID, /* 10 1 0101 */
+ INVALID, /* 10 1 0110 */
+ INVALID, /* 10 1 0111 */
+ INVALID, /* 10 1 1000 */
+ INVALID, /* 10 1 1001 */
+ INVALID, /* 10 1 1010 */
+ INVALID, /* 10 1 1011 */
+ INVALID, /* 10 1 1100 */
+ INVALID, /* 10 1 1101 */
+ INVALID, /* 10 1 1110 */
+ { 0, ST+HARD }, /* 10 1 1111: dcbz */
+ { 4, LD }, /* 11 0 0000: lwzx */
+ INVALID, /* 11 0 0001 */
+ { 4, ST }, /* 11 0 0010: stwx */
+ INVALID, /* 11 0 0011 */
+ { 2, LD }, /* 11 0 0100: lhzx */
+ { 2, LD+SE }, /* 11 0 0101: lhax */
+ { 2, ST }, /* 11 0 0110: sthx */
+ INVALID, /* 11 0 0111 */
+ { 4, LD+F+S }, /* 11 0 1000: lfsx */
+ { 8, LD+F }, /* 11 0 1001: lfdx */
+ { 4, ST+F+S }, /* 11 0 1010: stfsx */
+ { 8, ST+F }, /* 11 0 1011: stfdx */
+ INVALID, /* 11 0 1100 */
+ { 8, LD+M }, /* 11 0 1101: lmd */
+ INVALID, /* 11 0 1110 */
+ { 8, ST+M }, /* 11 0 1111: stmd */
+ { 4, LD+U }, /* 11 1 0000: lwzux */
+ INVALID, /* 11 1 0001 */
+ { 4, ST+U }, /* 11 1 0010: stwux */
+ INVALID, /* 11 1 0011 */
+ { 2, LD+U }, /* 11 1 0100: lhzux */
+ { 2, LD+SE+U }, /* 11 1 0101: lhaux */
+ { 2, ST+U }, /* 11 1 0110: sthux */
+ INVALID, /* 11 1 0111 */
+ { 4, LD+F+S+U }, /* 11 1 1000: lfsux */
+ { 8, LD+F+U }, /* 11 1 1001: lfdux */
+ { 4, ST+F+S+U }, /* 11 1 1010: stfsux */
+ { 8, ST+F+U }, /* 11 1 1011: stfdux */
+ INVALID, /* 11 1 1100 */
+ INVALID, /* 11 1 1101 */
+ INVALID, /* 11 1 1110 */
+ INVALID, /* 11 1 1111 */
+};
+
+/*
+ * Create a DSISR value from the instruction
+ */
+static inline unsigned make_dsisr(unsigned instr)
+{
+ unsigned dsisr;
+
+
+ /* bits 6:15 --> 22:31 */
+ dsisr = (instr & 0x03ff0000) >> 16;
+
+ if (IS_XFORM(instr)) {
+ /* bits 29:30 --> 15:16 */
+ dsisr |= (instr & 0x00000006) << 14;
+ /* bit 25 --> 17 */
+ dsisr |= (instr & 0x00000040) << 8;
+ /* bits 21:24 --> 18:21 */
+ dsisr |= (instr & 0x00000780) << 3;
+ } else {
+ /* bit 5 --> 17 */
+ dsisr |= (instr & 0x04000000) >> 12;
+ /* bits 1: 4 --> 18:21 */
+ dsisr |= (instr & 0x78000000) >> 17;
+ /* bits 30:31 --> 12:13 */
+ if (IS_DSFORM(instr))
+ dsisr |= (instr & 0x00000003) << 18;
+ }
+
+ return dsisr;
+}
+
+/*
+ * The dcbz (data cache block zero) instruction
+ * gives an alignment fault if used on non-cacheable
+ * memory. We handle the fault mainly for the
+ * case when we are running with the cache disabled
+ * for debugging.
+ */
+static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
+{
+ long __user *p;
+ int i, size;
+
+#ifdef __powerpc64__
+ size = ppc64_caches.dline_size;
+#else
+ size = L1_CACHE_BYTES;
+#endif
+ p = (long __user *) (regs->dar & -size);
+ if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size))
+ return -EFAULT;
+ for (i = 0; i < size / sizeof(long); ++i)
+ if (__put_user(0, p+i))
+ return -EFAULT;
+ return 1;
+}
+
+/*
+ * Emulate load & store multiple instructions
+ * On 64-bit machines, these instructions only affect/use the
+ * bottom 4 bytes of each register, and the loads clear the
+ * top 4 bytes of the affected register.
+ */
+#ifdef CONFIG_PPC64
+#define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4)
+#else
+#define REG_BYTE(rp, i) *((u8 *)(rp) + (i))
+#endif
+
+static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
+ unsigned int reg, unsigned int nb,
+ unsigned int flags, unsigned int instr)
+{
+ unsigned long *rptr;
+ unsigned int nb0, i;
+
+ /*
+ * We do not try to emulate 8 bytes multiple as they aren't really
+ * available in our operating environments and we don't try to
+ * emulate multiples operations in kernel land as they should never
+ * be used/generated there at least not on unaligned boundaries
+ */
+ if (unlikely((nb > 4) || !user_mode(regs)))
+ return 0;
+
+ /* lmw, stmw, lswi/x, stswi/x */
+ nb0 = 0;
+ if (flags & HARD) {
+ if (flags & SX) {
+ nb = regs->xer & 127;
+ if (nb == 0)
+ return 1;
+ } else {
+ if (__get_user(instr,
+ (unsigned int __user *)regs->nip))
+ return -EFAULT;
+ nb = (instr >> 11) & 0x1f;
+ if (nb == 0)
+ nb = 32;
+ }
+ if (nb + reg * 4 > 128) {
+ nb0 = nb + reg * 4 - 128;
+ nb = 128 - reg * 4;
+ }
+ } else {
+ /* lwm, stmw */
+ nb = (32 - reg) * 4;
+ }
+
+ if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0))
+ return -EFAULT; /* bad address */
+
+ rptr = &regs->gpr[reg];
+ if (flags & LD) {
+ /*
+ * This zeroes the top 4 bytes of the affected registers
+ * in 64-bit mode, and also zeroes out any remaining
+ * bytes of the last register for lsw*.
+ */
+ memset(rptr, 0, ((nb + 3) / 4) * sizeof(unsigned long));
+ if (nb0 > 0)
+ memset(&regs->gpr[0], 0,
+ ((nb0 + 3) / 4) * sizeof(unsigned long));
+
+ for (i = 0; i < nb; ++i)
+ if (__get_user(REG_BYTE(rptr, i), addr + i))
+ return -EFAULT;
+ if (nb0 > 0) {
+ rptr = &regs->gpr[0];
+ addr += nb;
+ for (i = 0; i < nb0; ++i)
+ if (__get_user(REG_BYTE(rptr, i), addr + i))
+ return -EFAULT;
+ }
+
+ } else {
+ for (i = 0; i < nb; ++i)
+ if (__put_user(REG_BYTE(rptr, i), addr + i))
+ return -EFAULT;
+ if (nb0 > 0) {
+ rptr = &regs->gpr[0];
+ addr += nb;
+ for (i = 0; i < nb0; ++i)
+ if (__put_user(REG_BYTE(rptr, i), addr + i))
+ return -EFAULT;
+ }
+ }
+ return 1;
+}
+
+
+/*
+ * Called on alignment exception. Attempts to fixup
+ *
+ * Return 1 on success
+ * Return 0 if unable to handle the interrupt
+ * Return -EFAULT if data address is bad
+ */
+
+int fix_alignment(struct pt_regs *regs)
+{
+ unsigned int instr, nb, flags;
+ unsigned int reg, areg;
+ unsigned int dsisr;
+ unsigned char __user *addr;
+ unsigned char __user *p;
+ int ret, t;
+ union {
+ u64 ll;
+ double dd;
+ unsigned char v[8];
+ struct {
+ unsigned hi32;
+ int low32;
+ } x32;
+ struct {
+ unsigned char hi48[6];
+ short low16;
+ } x16;
+ } data;
+
+ /*
+ * We require a complete register set, if not, then our assembly
+ * is broken
+ */
+ CHECK_FULL_REGS(regs);
+
+ dsisr = regs->dsisr;
+
+ /* Some processors don't provide us with a DSISR we can use here,
+ * let's make one up from the instruction
+ */
+ if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) {
+ unsigned int real_instr;
+ if (unlikely(__get_user(real_instr,
+ (unsigned int __user *)regs->nip)))
+ return -EFAULT;
+ dsisr = make_dsisr(real_instr);
+ }
+
+ /* extract the operation and registers from the dsisr */
+ reg = (dsisr >> 5) & 0x1f; /* source/dest register */
+ areg = dsisr & 0x1f; /* register to update */
+ instr = (dsisr >> 10) & 0x7f;
+ instr |= (dsisr >> 13) & 0x60;
+
+ /* Lookup the operation in our table */
+ nb = aligninfo[instr].len;
+ flags = aligninfo[instr].flags;
+
+ /* DAR has the operand effective address */
+ addr = (unsigned char __user *)regs->dar;
+
+ /* A size of 0 indicates an instruction we don't support, with
+ * the exception of DCBZ which is handled as a special case here
+ */
+ if (instr == DCBZ)
+ return emulate_dcbz(regs, addr);
+ if (unlikely(nb == 0))
+ return 0;
+
+ /* Load/Store Multiple instructions are handled in their own
+ * function
+ */
+ if (flags & M)
+ return emulate_multiple(regs, addr, reg, nb, flags, instr);
+
+ /* Verify the address of the operand */
+ if (unlikely(user_mode(regs) &&
+ !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
+ addr, nb)))
+ return -EFAULT;
+
+ /* Force the fprs into the save area so we can reference them */
+ if (flags & F) {
+ /* userland only */
+ if (unlikely(!user_mode(regs)))
+ return 0;
+ flush_fp_to_thread(current);
+ }
+
+ /* If we are loading, get the data from user space, else
+ * get it from register values
+ */
+ if (flags & LD) {
+ data.ll = 0;
+ ret = 0;
+ p = addr;
+ switch (nb) {
+ case 8:
+ ret |= __get_user(data.v[0], p++);
+ ret |= __get_user(data.v[1], p++);
+ ret |= __get_user(data.v[2], p++);
+ ret |= __get_user(data.v[3], p++);
+ case 4:
+ ret |= __get_user(data.v[4], p++);
+ ret |= __get_user(data.v[5], p++);
+ case 2:
+ ret |= __get_user(data.v[6], p++);
+ ret |= __get_user(data.v[7], p++);
+ if (unlikely(ret))
+ return -EFAULT;
+ }
+ } else if (flags & F)
+ data.dd = current->thread.fpr[reg];
+ else
+ data.ll = regs->gpr[reg];
+
+ /* Perform other misc operations like sign extension, byteswap,
+ * or floating point single precision conversion
+ */
+ switch (flags & ~U) {
+ case LD+SE: /* sign extend */
+ if ( nb == 2 )
+ data.ll = data.x16.low16;
+ else /* nb must be 4 */
+ data.ll = data.x32.low32;
+ break;
+ case LD+S: /* byte-swap */
+ case ST+S:
+ if (nb == 2) {
+ SWAP(data.v[6], data.v[7]);
+ } else {
+ SWAP(data.v[4], data.v[7]);
+ SWAP(data.v[5], data.v[6]);
+ }
+ break;
+
+ /* Single-precision FP load and store require conversions... */
+ case LD+F+S:
+#ifdef CONFIG_PPC_FPU
+ preempt_disable();
+ enable_kernel_fp();
+ cvt_fd((float *)&data.v[4], &data.dd, &current->thread);
+ preempt_enable();
+#else
+ return 0;
+#endif
+ break;
+ case ST+F+S:
+#ifdef CONFIG_PPC_FPU
+ preempt_disable();
+ enable_kernel_fp();
+ cvt_df(&data.dd, (float *)&data.v[4], &current->thread);
+ preempt_enable();
+#else
+ return 0;
+#endif
+ break;
+ }
+
+ /* Store result to memory or update registers */
+ if (flags & ST) {
+ ret = 0;
+ p = addr;
+ switch (nb) {
+ case 8:
+ ret |= __put_user(data.v[0], p++);
+ ret |= __put_user(data.v[1], p++);
+ ret |= __put_user(data.v[2], p++);
+ ret |= __put_user(data.v[3], p++);
+ case 4:
+ ret |= __put_user(data.v[4], p++);
+ ret |= __put_user(data.v[5], p++);
+ case 2:
+ ret |= __put_user(data.v[6], p++);
+ ret |= __put_user(data.v[7], p++);
+ }
+ if (unlikely(ret))
+ return -EFAULT;
+ } else if (flags & F)
+ current->thread.fpr[reg] = data.dd;
+ else
+ regs->gpr[reg] = data.ll;
+
+ /* Update RA as needed */
+ if (flags & U)
+ regs->gpr[areg] = regs->dar;
+
+ return 1;
+}
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4550eb4f4fb..91538d2445b 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -270,13 +270,15 @@ int main(void)
DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
+ DEFINE(TSPC64_TV_SEC, offsetof(struct timespec, tv_sec));
+ DEFINE(TSPC64_TV_NSEC, offsetof(struct timespec, tv_nsec));
DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec));
DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec));
#else
DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec));
DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec));
- DEFINE(TSPEC32_TV_SEC, offsetof(struct timespec, tv_sec));
- DEFINE(TSPEC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
+ DEFINE(TSPC32_TV_SEC, offsetof(struct timespec, tv_sec));
+ DEFINE(TSPC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
#endif
/* timeval/timezone offsets for use by vdso */
DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
new file mode 100644
index 00000000000..7c3419656cc
--- /dev/null
+++ b/arch/powerpc/kernel/dma_64.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2004 IBM Corporation
+ *
+ * Implements the generic device dma API for ppc64. Handles
+ * the pci and vio busses
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+/* Include the busses we support */
+#include <linux/pci.h>
+#include <asm/vio.h>
+#include <asm/scatterlist.h>
+#include <asm/bug.h>
+
+static struct dma_mapping_ops *get_dma_ops(struct device *dev)
+{
+#ifdef CONFIG_PCI
+ if (dev->bus == &pci_bus_type)
+ return &pci_dma_ops;
+#endif
+#ifdef CONFIG_IBMVIO
+ if (dev->bus == &vio_bus_type)
+ return &vio_dma_ops;
+#endif
+ return NULL;
+}
+
+int dma_supported(struct device *dev, u64 mask)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->dma_supported(dev, mask);
+ BUG();
+ return 0;
+}
+EXPORT_SYMBOL(dma_supported);
+
+int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+#ifdef CONFIG_PCI
+ if (dev->bus == &pci_bus_type)
+ return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
+#endif
+#ifdef CONFIG_IBMVIO
+ if (dev->bus == &vio_bus_type)
+ return -EIO;
+#endif /* CONFIG_IBMVIO */
+ BUG();
+ return 0;
+}
+EXPORT_SYMBOL(dma_set_mask);
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
+ BUG();
+ return NULL;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_handle)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
+ else
+ BUG();
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->map_single(dev, cpu_addr, size, direction);
+ BUG();
+ return (dma_addr_t)0;
+}
+EXPORT_SYMBOL(dma_map_single);
+
+void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ dma_ops->unmap_single(dev, dma_addr, size, direction);
+ else
+ BUG();
+}
+EXPORT_SYMBOL(dma_unmap_single);
+
+dma_addr_t dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->map_single(dev,
+ (page_address(page) + offset), size, direction);
+ BUG();
+ return (dma_addr_t)0;
+}
+EXPORT_SYMBOL(dma_map_page);
+
+void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ dma_ops->unmap_single(dev, dma_address, size, direction);
+ else
+ BUG();
+}
+EXPORT_SYMBOL(dma_unmap_page);
+
+int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ return dma_ops->map_sg(dev, sg, nents, direction);
+ BUG();
+ return 0;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+ enum dma_data_direction direction)
+{
+ struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops)
+ dma_ops->unmap_sg(dev, sg, nhwentries, direction);
+ else
+ BUG();
+}
+EXPORT_SYMBOL(dma_unmap_sg);
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 5063c603fad..8d60fa99fc4 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -24,7 +24,7 @@
* Copyright 2002-2004 MontaVista Software, Inc.
* PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
* Copyright 2004 Freescale Semiconductor, Inc
- * PowerPC e500 modifications, Kumar Gala <kumar.gala@freescale.com>
+ * PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
diff --git a/arch/powerpc/kernel/idle_64.c b/arch/powerpc/kernel/idle_64.c
new file mode 100644
index 00000000000..b879d3057ef
--- /dev/null
+++ b/arch/powerpc/kernel/idle_64.c
@@ -0,0 +1,121 @@
+/*
+ * Idle daemon for PowerPC. Idle daemon will handle any action
+ * that needs to be taken when the system becomes idle.
+ *
+ * Originally Written by Cort Dougan (cort@cs.nmt.edu)
+ *
+ * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com>
+ *
+ * Additional shared processor, SMT, and firmware support
+ * Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+#include <linux/sysctl.h>
+
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+
+extern void power4_idle(void);
+
+void default_idle(void)
+{
+ unsigned int cpu = smp_processor_id();
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
+ while (1) {
+ if (!need_resched()) {
+ while (!need_resched() && !cpu_is_offline(cpu)) {
+ ppc64_runlatch_off();
+
+ /*
+ * Go into low thread priority and possibly
+ * low power mode.
+ */
+ HMT_low();
+ HMT_very_low();
+ }
+
+ HMT_medium();
+ }
+
+ ppc64_runlatch_on();
+ preempt_enable_no_resched();
+ schedule();
+ preempt_disable();
+ if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
+ cpu_die();
+ }
+}
+
+void native_idle(void)
+{
+ while (1) {
+ ppc64_runlatch_off();
+
+ if (!need_resched())
+ power4_idle();
+
+ if (need_resched()) {
+ ppc64_runlatch_on();
+ preempt_enable_no_resched();
+ schedule();
+ preempt_disable();
+ }
+
+ if (cpu_is_offline(smp_processor_id()) &&
+ system_state == SYSTEM_RUNNING)
+ cpu_die();
+ }
+}
+
+void cpu_idle(void)
+{
+ BUG_ON(NULL == ppc_md.idle_loop);
+ ppc_md.idle_loop();
+}
+
+int powersave_nap;
+
+#ifdef CONFIG_SYSCTL
+/*
+ * Register the sysctl to set/clear powersave_nap.
+ */
+static ctl_table powersave_nap_ctl_table[]={
+ {
+ .ctl_name = KERN_PPC_POWERSAVE_NAP,
+ .procname = "powersave-nap",
+ .data = &powersave_nap,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ { 0, },
+};
+static ctl_table powersave_nap_sysctl_root[] = {
+ { 1, "kernel", NULL, 0, 0755, powersave_nap_ctl_table, },
+ { 0,},
+};
+
+static int __init
+register_powersave_nap_sysctl(void)
+{
+ register_sysctl_table(powersave_nap_sysctl_root, 0);
+
+ return 0;
+}
+__initcall(register_powersave_nap_sysctl);
+#endif
diff --git a/arch/powerpc/kernel/ioctl32.c b/arch/powerpc/kernel/ioctl32.c
index 3fa6a93adbd..0fa3d27fef0 100644
--- a/arch/powerpc/kernel/ioctl32.c
+++ b/arch/powerpc/kernel/ioctl32.c
@@ -40,10 +40,6 @@ IOCTL_TABLE_START
#define DECLARES
#include "compat_ioctl.c"
-/* Little p (/dev/rtc, /dev/envctrl, etc.) */
-COMPATIBLE_IOCTL(_IOR('p', 20, int[7])) /* RTCGET */
-COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */
-
IOCTL_TABLE_END
int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
new file mode 100644
index 00000000000..6160c8dbb7c
--- /dev/null
+++ b/arch/powerpc/kernel/iomap.c
@@ -0,0 +1,146 @@
+/*
+ * arch/ppc64/kernel/iomap.c
+ *
+ * ppc64 "iomap" interface implementation.
+ *
+ * (C) Copyright 2004 Linus Torvalds
+ */
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/mm.h>
+#include <asm/io.h>
+
+/*
+ * Here comes the ppc64 implementation of the IOMAP
+ * interfaces.
+ */
+unsigned int fastcall ioread8(void __iomem *addr)
+{
+ return readb(addr);
+}
+unsigned int fastcall ioread16(void __iomem *addr)
+{
+ return readw(addr);
+}
+unsigned int fastcall ioread16be(void __iomem *addr)
+{
+ return in_be16(addr);
+}
+unsigned int fastcall ioread32(void __iomem *addr)
+{
+ return readl(addr);
+}
+unsigned int fastcall ioread32be(void __iomem *addr)
+{
+ return in_be32(addr);
+}
+EXPORT_SYMBOL(ioread8);
+EXPORT_SYMBOL(ioread16);
+EXPORT_SYMBOL(ioread16be);
+EXPORT_SYMBOL(ioread32);
+EXPORT_SYMBOL(ioread32be);
+
+void fastcall iowrite8(u8 val, void __iomem *addr)
+{
+ writeb(val, addr);
+}
+void fastcall iowrite16(u16 val, void __iomem *addr)
+{
+ writew(val, addr);
+}
+void fastcall iowrite16be(u16 val, void __iomem *addr)
+{
+ out_be16(addr, val);
+}
+void fastcall iowrite32(u32 val, void __iomem *addr)
+{
+ writel(val, addr);
+}
+void fastcall iowrite32be(u32 val, void __iomem *addr)
+{
+ out_be32(addr, val);
+}
+EXPORT_SYMBOL(iowrite8);
+EXPORT_SYMBOL(iowrite16);
+EXPORT_SYMBOL(iowrite16be);
+EXPORT_SYMBOL(iowrite32);
+EXPORT_SYMBOL(iowrite32be);
+
+/*
+ * These are the "repeat read/write" functions. Note the
+ * non-CPU byte order. We do things in "IO byteorder"
+ * here.
+ *
+ * FIXME! We could make these do EEH handling if we really
+ * wanted. Not clear if we do.
+ */
+void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ _insb((u8 __iomem *) addr, dst, count);
+}
+void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ _insw_ns((u16 __iomem *) addr, dst, count);
+}
+void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ _insl_ns((u32 __iomem *) addr, dst, count);
+}
+EXPORT_SYMBOL(ioread8_rep);
+EXPORT_SYMBOL(ioread16_rep);
+EXPORT_SYMBOL(ioread32_rep);
+
+void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ _outsb((u8 __iomem *) addr, src, count);
+}
+void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ _outsw_ns((u16 __iomem *) addr, src, count);
+}
+void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ _outsl_ns((u32 __iomem *) addr, src, count);
+}
+EXPORT_SYMBOL(iowrite8_rep);
+EXPORT_SYMBOL(iowrite16_rep);
+EXPORT_SYMBOL(iowrite32_rep);
+
+void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+ if (!_IO_IS_VALID(port))
+ return NULL;
+ return (void __iomem *) (port+pci_io_base);
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+ /* Nothing to do */
+}
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
+{
+ unsigned long start = pci_resource_start(dev, bar);
+ unsigned long len = pci_resource_len(dev, bar);
+ unsigned long flags = pci_resource_flags(dev, bar);
+
+ if (!len)
+ return NULL;
+ if (max && len > max)
+ len = max;
+ if (flags & IORESOURCE_IO)
+ return ioport_map(start, len);
+ if (flags & IORESOURCE_MEM)
+ return ioremap(start, len);
+ /* What? */
+ return NULL;
+}
+
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{
+ /* Nothing to do */
+}
+EXPORT_SYMBOL(pci_iomap);
+EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
new file mode 100644
index 00000000000..4d9b4388918
--- /dev/null
+++ b/arch/powerpc/kernel/iommu.c
@@ -0,0 +1,572 @@
+/*
+ * arch/ppc64/kernel/iommu.c
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ *
+ * Rewrite, cleanup, new allocation schemes, virtual merging:
+ * Copyright (C) 2004 Olof Johansson, IBM Corporation
+ * and Ben. Herrenschmidt, IBM Corporation
+ *
+ * Dynamic DMA mapping support, bus-independent parts.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+
+#define DBG(...)
+
+#ifdef CONFIG_IOMMU_VMERGE
+static int novmerge = 0;
+#else
+static int novmerge = 1;
+#endif
+
+static int __init setup_iommu(char *str)
+{
+ if (!strcmp(str, "novmerge"))
+ novmerge = 1;
+ else if (!strcmp(str, "vmerge"))
+ novmerge = 0;
+ return 1;
+}
+
+__setup("iommu=", setup_iommu);
+
+static unsigned long iommu_range_alloc(struct iommu_table *tbl,
+ unsigned long npages,
+ unsigned long *handle,
+ unsigned int align_order)
+{
+ unsigned long n, end, i, start;
+ unsigned long limit;
+ int largealloc = npages > 15;
+ int pass = 0;
+ unsigned long align_mask;
+
+ align_mask = 0xffffffffffffffffl >> (64 - align_order);
+
+ /* This allocator was derived from x86_64's bit string search */
+
+ /* Sanity check */
+ if (unlikely(npages) == 0) {
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return DMA_ERROR_CODE;
+ }
+
+ if (handle && *handle)
+ start = *handle;
+ else
+ start = largealloc ? tbl->it_largehint : tbl->it_hint;
+
+ /* Use only half of the table for small allocs (15 pages or less) */
+ limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
+
+ if (largealloc && start < tbl->it_halfpoint)
+ start = tbl->it_halfpoint;
+
+ /* The case below can happen if we have a small segment appended
+ * to a large, or when the previous alloc was at the very end of
+ * the available space. If so, go back to the initial start.
+ */
+ if (start >= limit)
+ start = largealloc ? tbl->it_largehint : tbl->it_hint;
+
+ again:
+
+ n = find_next_zero_bit(tbl->it_map, limit, start);
+
+ /* Align allocation */
+ n = (n + align_mask) & ~align_mask;
+
+ end = n + npages;
+
+ if (unlikely(end >= limit)) {
+ if (likely(pass < 2)) {
+ /* First failure, just rescan the half of the table.
+ * Second failure, rescan the other half of the table.
+ */
+ start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
+ limit = pass ? tbl->it_size : limit;
+ pass++;
+ goto again;
+ } else {
+ /* Third failure, give up */
+ return DMA_ERROR_CODE;
+ }
+ }
+
+ for (i = n; i < end; i++)
+ if (test_bit(i, tbl->it_map)) {
+ start = i+1;
+ goto again;
+ }
+
+ for (i = n; i < end; i++)
+ __set_bit(i, tbl->it_map);
+
+ /* Bump the hint to a new block for small allocs. */
+ if (largealloc) {
+ /* Don't bump to new block to avoid fragmentation */
+ tbl->it_largehint = end;
+ } else {
+ /* Overflow will be taken care of at the next allocation */
+ tbl->it_hint = (end + tbl->it_blocksize - 1) &
+ ~(tbl->it_blocksize - 1);
+ }
+
+ /* Update handle for SG allocations */
+ if (handle)
+ *handle = end;
+
+ return n;
+}
+
+static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
+ unsigned int npages, enum dma_data_direction direction,
+ unsigned int align_order)
+{
+ unsigned long entry, flags;
+ dma_addr_t ret = DMA_ERROR_CODE;
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ entry = iommu_range_alloc(tbl, npages, NULL, align_order);
+
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ return DMA_ERROR_CODE;
+ }
+
+ entry += tbl->it_offset; /* Offset into real TCE table */
+ ret = entry << PAGE_SHIFT; /* Set the return dma address */
+
+ /* Put the TCEs in the HW table */
+ ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK,
+ direction);
+
+
+ /* Flush/invalidate TLB caches if necessary */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+
+ /* Make sure updates are seen by hardware */
+ mb();
+
+ return ret;
+}
+
+static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long entry, free_entry;
+ unsigned long i;
+
+ entry = dma_addr >> PAGE_SHIFT;
+ free_entry = entry - tbl->it_offset;
+
+ if (((free_entry + npages) > tbl->it_size) ||
+ (entry < tbl->it_offset)) {
+ if (printk_ratelimit()) {
+ printk(KERN_INFO "iommu_free: invalid entry\n");
+ printk(KERN_INFO "\tentry = 0x%lx\n", entry);
+ printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr);
+ printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl);
+ printk(KERN_INFO "\tbus# = 0x%lx\n", (u64)tbl->it_busno);
+ printk(KERN_INFO "\tsize = 0x%lx\n", (u64)tbl->it_size);
+ printk(KERN_INFO "\tstartOff = 0x%lx\n", (u64)tbl->it_offset);
+ printk(KERN_INFO "\tindex = 0x%lx\n", (u64)tbl->it_index);
+ WARN_ON(1);
+ }
+ return;
+ }
+
+ ppc_md.tce_free(tbl, entry, npages);
+
+ for (i = 0; i < npages; i++)
+ __clear_bit(free_entry+i, tbl->it_map);
+}
+
+static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ __iommu_free(tbl, dma_addr, npages);
+
+ /* Make sure TLB cache is flushed if the HW needs it. We do
+ * not do an mb() here on purpose, it is not needed on any of
+ * the current platforms.
+ */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+}
+
+int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
+ struct scatterlist *sglist, int nelems,
+ enum dma_data_direction direction)
+{
+ dma_addr_t dma_next = 0, dma_addr;
+ unsigned long flags;
+ struct scatterlist *s, *outs, *segstart;
+ int outcount, incount;
+ unsigned long handle;
+
+ BUG_ON(direction == DMA_NONE);
+
+ if ((nelems == 0) || !tbl)
+ return 0;
+
+ outs = s = segstart = &sglist[0];
+ outcount = 1;
+ incount = nelems;
+ handle = 0;
+
+ /* Init first segment length for backout at failure */
+ outs->dma_length = 0;
+
+ DBG("mapping %d elements:\n", nelems);
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ for (s = outs; nelems; nelems--, s++) {
+ unsigned long vaddr, npages, entry, slen;
+
+ slen = s->length;
+ /* Sanity check */
+ if (slen == 0) {
+ dma_next = 0;
+ continue;
+ }
+ /* Allocate iommu entries for that segment */
+ vaddr = (unsigned long)page_address(s->page) + s->offset;
+ npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK);
+ npages >>= PAGE_SHIFT;
+ entry = iommu_range_alloc(tbl, npages, &handle, 0);
+
+ DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
+
+ /* Handle failure */
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ if (printk_ratelimit())
+ printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx"
+ " npages %lx\n", tbl, vaddr, npages);
+ goto failure;
+ }
+
+ /* Convert entry to a dma_addr_t */
+ entry += tbl->it_offset;
+ dma_addr = entry << PAGE_SHIFT;
+ dma_addr |= s->offset;
+
+ DBG(" - %lx pages, entry: %lx, dma_addr: %lx\n",
+ npages, entry, dma_addr);
+
+ /* Insert into HW table */
+ ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction);
+
+ /* If we are in an open segment, try merging */
+ if (segstart != s) {
+ DBG(" - trying merge...\n");
+ /* We cannot merge if:
+ * - allocated dma_addr isn't contiguous to previous allocation
+ */
+ if (novmerge || (dma_addr != dma_next)) {
+ /* Can't merge: create a new segment */
+ segstart = s;
+ outcount++; outs++;
+ DBG(" can't merge, new segment.\n");
+ } else {
+ outs->dma_length += s->length;
+ DBG(" merged, new len: %lx\n", outs->dma_length);
+ }
+ }
+
+ if (segstart == s) {
+ /* This is a new segment, fill entries */
+ DBG(" - filling new segment.\n");
+ outs->dma_address = dma_addr;
+ outs->dma_length = slen;
+ }
+
+ /* Calculate next page pointer for contiguous check */
+ dma_next = dma_addr + slen;
+
+ DBG(" - dma next is: %lx\n", dma_next);
+ }
+
+ /* Flush/invalidate TLB caches if necessary */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+
+ /* Make sure updates are seen by hardware */
+ mb();
+
+ DBG("mapped %d elements:\n", outcount);
+
+ /* For the sake of iommu_unmap_sg, we clear out the length in the
+ * next entry of the sglist if we didn't fill the list completely
+ */
+ if (outcount < incount) {
+ outs++;
+ outs->dma_address = DMA_ERROR_CODE;
+ outs->dma_length = 0;
+ }
+ return outcount;
+
+ failure:
+ for (s = &sglist[0]; s <= outs; s++) {
+ if (s->dma_length != 0) {
+ unsigned long vaddr, npages;
+
+ vaddr = s->dma_address & PAGE_MASK;
+ npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr)
+ >> PAGE_SHIFT;
+ __iommu_free(tbl, vaddr, npages);
+ }
+ }
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ return 0;
+}
+
+
+void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
+{
+ unsigned long flags;
+
+ BUG_ON(direction == DMA_NONE);
+
+ if (!tbl)
+ return;
+
+ spin_lock_irqsave(&(tbl->it_lock), flags);
+
+ while (nelems--) {
+ unsigned int npages;
+ dma_addr_t dma_handle = sglist->dma_address;
+
+ if (sglist->dma_length == 0)
+ break;
+ npages = (PAGE_ALIGN(dma_handle + sglist->dma_length)
+ - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT;
+ __iommu_free(tbl, dma_handle, npages);
+ sglist++;
+ }
+
+ /* Flush/invalidate TLBs if necessary. As for iommu_free(), we
+ * do not do an mb() here, the affected platforms do not need it
+ * when freeing.
+ */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+}
+
+/*
+ * Build a iommu_table structure. This contains a bit map which
+ * is used to manage allocation of the tce space.
+ */
+struct iommu_table *iommu_init_table(struct iommu_table *tbl)
+{
+ unsigned long sz;
+ static int welcomed = 0;
+
+ /* Set aside 1/4 of the table for large allocations. */
+ tbl->it_halfpoint = tbl->it_size * 3 / 4;
+
+ /* number of bytes needed for the bitmap */
+ sz = (tbl->it_size + 7) >> 3;
+
+ tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz));
+ if (!tbl->it_map)
+ panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
+
+ memset(tbl->it_map, 0, sz);
+
+ tbl->it_hint = 0;
+ tbl->it_largehint = tbl->it_halfpoint;
+ spin_lock_init(&tbl->it_lock);
+
+ /* Clear the hardware table in case firmware left allocations in it */
+ ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
+
+ if (!welcomed) {
+ printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
+ novmerge ? "disabled" : "enabled");
+ welcomed = 1;
+ }
+
+ return tbl;
+}
+
+void iommu_free_table(struct device_node *dn)
+{
+ struct pci_dn *pdn = dn->data;
+ struct iommu_table *tbl = pdn->iommu_table;
+ unsigned long bitmap_sz, i;
+ unsigned int order;
+
+ if (!tbl || !tbl->it_map) {
+ printk(KERN_ERR "%s: expected TCE map for %s\n", __FUNCTION__,
+ dn->full_name);
+ return;
+ }
+
+ /* verify that table contains no entries */
+ /* it_size is in entries, and we're examining 64 at a time */
+ for (i = 0; i < (tbl->it_size/64); i++) {
+ if (tbl->it_map[i] != 0) {
+ printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
+ __FUNCTION__, dn->full_name);
+ break;
+ }
+ }
+
+ /* calculate bitmap size in bytes */
+ bitmap_sz = (tbl->it_size + 7) / 8;
+
+ /* free bitmap */
+ order = get_order(bitmap_sz);
+ free_pages((unsigned long) tbl->it_map, order);
+
+ /* free table */
+ kfree(tbl);
+}
+
+/* Creates TCEs for a user provided buffer. The user buffer must be
+ * contiguous real kernel storage (not vmalloc). The address of the buffer
+ * passed here is the kernel (virtual) address of the buffer. The buffer
+ * need not be page aligned, the dma_addr_t returned will point to the same
+ * byte within the page as vaddr.
+ */
+dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
+ size_t size, enum dma_data_direction direction)
+{
+ dma_addr_t dma_handle = DMA_ERROR_CODE;
+ unsigned long uaddr;
+ unsigned int npages;
+
+ BUG_ON(direction == DMA_NONE);
+
+ uaddr = (unsigned long)vaddr;
+ npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK);
+ npages >>= PAGE_SHIFT;
+
+ if (tbl) {
+ dma_handle = iommu_alloc(tbl, vaddr, npages, direction, 0);
+ if (dma_handle == DMA_ERROR_CODE) {
+ if (printk_ratelimit()) {
+ printk(KERN_INFO "iommu_alloc failed, "
+ "tbl %p vaddr %p npages %d\n",
+ tbl, vaddr, npages);
+ }
+ } else
+ dma_handle |= (uaddr & ~PAGE_MASK);
+ }
+
+ return dma_handle;
+}
+
+void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction)
+{
+ BUG_ON(direction == DMA_NONE);
+
+ if (tbl)
+ iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) -
+ (dma_handle & PAGE_MASK)) >> PAGE_SHIFT);
+}
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ void *ret = NULL;
+ dma_addr_t mapping;
+ unsigned int npages, order;
+
+ size = PAGE_ALIGN(size);
+ npages = size >> PAGE_SHIFT;
+ order = get_order(size);
+
+ /*
+ * Client asked for way too much space. This is checked later
+ * anyway. It is easier to debug here for the drivers than in
+ * the tce tables.
+ */
+ if (order >= IOMAP_MAX_ORDER) {
+ printk("iommu_alloc_consistent size too large: 0x%lx\n", size);
+ return NULL;
+ }
+
+ if (!tbl)
+ return NULL;
+
+ /* Alloc enough pages (and possibly more) */
+ ret = (void *)__get_free_pages(flag, order);
+ if (!ret)
+ return NULL;
+ memset(ret, 0, size);
+
+ /* Set up tces to cover the allocated range */
+ mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL, order);
+ if (mapping == DMA_ERROR_CODE) {
+ free_pages((unsigned long)ret, order);
+ ret = NULL;
+ } else
+ *dma_handle = mapping;
+ return ret;
+}
+
+void iommu_free_coherent(struct iommu_table *tbl, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ unsigned int npages;
+
+ if (tbl) {
+ size = PAGE_ALIGN(size);
+ npages = size >> PAGE_SHIFT;
+ iommu_free(tbl, dma_handle, npages);
+ free_pages((unsigned long)vaddr, get_order(size));
+ }
+}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4b7940693f3..5a71ed9612f 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -71,6 +71,11 @@
#include <asm/paca.h>
#endif
+int __irq_offset_value;
+#ifdef CONFIG_PPC32
+EXPORT_SYMBOL(__irq_offset_value);
+#endif
+
static int ppc_spurious_interrupts;
#if defined(CONFIG_PPC_ISERIES) && defined(CONFIG_SMP)
@@ -98,7 +103,6 @@ extern atomic_t ipi_sent;
EXPORT_SYMBOL(irq_desc);
int distribute_irqs = 1;
-int __irq_offset_value;
u64 ppc64_interrupt_controller;
#endif /* CONFIG_PPC64 */
@@ -311,7 +315,6 @@ void __init init_IRQ(void)
}
#ifdef CONFIG_PPC64
-#ifndef CONFIG_PPC_ISERIES
/*
* Virtual IRQ mapping code, used on systems with XICS interrupt controllers.
*/
@@ -420,8 +423,6 @@ unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
}
-#endif /* CONFIG_PPC_ISERIES */
-
#ifdef CONFIG_IRQSTACKS
struct thread_info *softirq_ctx[NR_CPUS];
struct thread_info *hardirq_ctx[NR_CPUS];
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
new file mode 100644
index 00000000000..511af54e623
--- /dev/null
+++ b/arch/powerpc/kernel/kprobes.c
@@ -0,0 +1,459 @@
+/*
+ * Kernel Probes (KProbes)
+ * arch/ppc64/kernel/kprobes.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ * Probes initial implementation ( includes contributions from
+ * Rusty Russell).
+ * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
+ * interface to access function arguments.
+ * 2004-Nov Ananth N Mavinakayanahalli <ananth@in.ibm.com> kprobes port
+ * for PPC64
+ */
+
+#include <linux/config.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <asm/cacheflush.h>
+#include <asm/kdebug.h>
+#include <asm/sstep.h>
+
+static DECLARE_MUTEX(kprobe_mutex);
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ int ret = 0;
+ kprobe_opcode_t insn = *p->addr;
+
+ if ((unsigned long)p->addr & 0x03) {
+ printk("Attempt to register kprobe at an unaligned address\n");
+ ret = -EINVAL;
+ } else if (IS_MTMSRD(insn) || IS_RFID(insn)) {
+ printk("Cannot register a kprobe on rfid or mtmsrd\n");
+ ret = -EINVAL;
+ }
+
+ /* insn must be on a special executable page on ppc64 */
+ if (!ret) {
+ down(&kprobe_mutex);
+ p->ainsn.insn = get_insn_slot();
+ up(&kprobe_mutex);
+ if (!p->ainsn.insn)
+ ret = -ENOMEM;
+ }
+ return ret;
+}
+
+void __kprobes arch_copy_kprobe(struct kprobe *p)
+{
+ memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ p->opcode = *p->addr;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ *p->addr = BREAKPOINT_INSTRUCTION;
+ flush_icache_range((unsigned long) p->addr,
+ (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+ *p->addr = p->opcode;
+ flush_icache_range((unsigned long) p->addr,
+ (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+ down(&kprobe_mutex);
+ free_insn_slot(p->ainsn.insn);
+ up(&kprobe_mutex);
+}
+
+static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+ kprobe_opcode_t insn = *p->ainsn.insn;
+
+ regs->msr |= MSR_SE;
+
+ /* single step inline if it is a trap variant */
+ if (is_trap(insn))
+ regs->nip = (unsigned long)p->addr;
+ else
+ regs->nip = (unsigned long)p->ainsn.insn;
+}
+
+static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+ kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr;
+}
+
+static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+ kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr;
+}
+
+static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ __get_cpu_var(current_kprobe) = p;
+ kcb->kprobe_saved_msr = regs->msr;
+}
+
+/* Called with kretprobe_lock held */
+void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
+ struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri;
+
+ if ((ri = get_free_rp_inst(rp)) != NULL) {
+ ri->rp = rp;
+ ri->task = current;
+ ri->ret_addr = (kprobe_opcode_t *)regs->link;
+
+ /* Replace the return addr with trampoline addr */
+ regs->link = (unsigned long)kretprobe_trampoline;
+ add_rp_inst(ri);
+ } else {
+ rp->nmissed++;
+ }
+}
+
+static inline int kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *p;
+ int ret = 0;
+ unsigned int *addr = (unsigned int *)regs->nip;
+ struct kprobe_ctlblk *kcb;
+
+ /*
+ * We don't want to be preempted for the entire
+ * duration of kprobe processing
+ */
+ preempt_disable();
+ kcb = get_kprobe_ctlblk();
+
+ /* Check we're not actually recursing */
+ if (kprobe_running()) {
+ p = get_kprobe(addr);
+ if (p) {
+ kprobe_opcode_t insn = *p->ainsn.insn;
+ if (kcb->kprobe_status == KPROBE_HIT_SS &&
+ is_trap(insn)) {
+ regs->msr &= ~MSR_SE;
+ regs->msr |= kcb->kprobe_saved_msr;
+ goto no_kprobe;
+ }
+ /* We have reentered the kprobe_handler(), since
+ * another probe was hit while within the handler.
+ * We here save the original kprobes variables and
+ * just single step on the instruction of the new probe
+ * without calling any user handlers.
+ */
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kcb->kprobe_saved_msr = regs->msr;
+ p->nmissed++;
+ prepare_singlestep(p, regs);
+ kcb->kprobe_status = KPROBE_REENTER;
+ return 1;
+ } else {
+ p = __get_cpu_var(current_kprobe);
+ if (p->break_handler && p->break_handler(p, regs)) {
+ goto ss_probe;
+ }
+ }
+ goto no_kprobe;
+ }
+
+ p = get_kprobe(addr);
+ if (!p) {
+ if (*addr != BREAKPOINT_INSTRUCTION) {
+ /*
+ * PowerPC has multiple variants of the "trap"
+ * instruction. If the current instruction is a
+ * trap variant, it could belong to someone else
+ */
+ kprobe_opcode_t cur_insn = *addr;
+ if (is_trap(cur_insn))
+ goto no_kprobe;
+ /*
+ * The breakpoint instruction was removed right
+ * after we hit it. Another cpu has removed
+ * either a probepoint or a debugger breakpoint
+ * at this address. In either case, no further
+ * handling of this interrupt is appropriate.
+ */
+ ret = 1;
+ }
+ /* Not one of ours: let kernel handle it */
+ goto no_kprobe;
+ }
+
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ set_current_kprobe(p, regs, kcb);
+ if (p->pre_handler && p->pre_handler(p, regs))
+ /* handler has already set things up, so skip ss setup */
+ return 1;
+
+ss_probe:
+ prepare_singlestep(p, regs);
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ return 1;
+
+no_kprobe:
+ preempt_enable_no_resched();
+ return ret;
+}
+
+/*
+ * Function return probe trampoline:
+ * - init_kprobes() establishes a probepoint here
+ * - When the probed function returns, this probe
+ * causes the handlers to fire
+ */
+void kretprobe_trampoline_holder(void)
+{
+ asm volatile(".global kretprobe_trampoline\n"
+ "kretprobe_trampoline:\n"
+ "nop\n");
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head;
+ struct hlist_node *node, *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+
+ spin_lock_irqsave(&kretprobe_lock, flags);
+ head = kretprobe_inst_table_head(current);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more then one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ recycle_rp_inst(ri);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+ regs->nip = orig_ret_address;
+
+ reset_current_kprobe();
+ spin_unlock_irqrestore(&kretprobe_lock, flags);
+ preempt_enable_no_resched();
+
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
+ return 1;
+}
+
+/*
+ * Called after single-stepping. p->addr is the address of the
+ * instruction whose first byte has been replaced by the "breakpoint"
+ * instruction. To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction. The address of this
+ * copy is p->ainsn.insn.
+ */
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+ int ret;
+ unsigned int insn = *p->ainsn.insn;
+
+ regs->nip = (unsigned long)p->addr;
+ ret = emulate_step(regs, insn);
+ if (ret == 0)
+ regs->nip = (unsigned long)p->addr + 4;
+}
+
+static inline int post_kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (!cur)
+ return 0;
+
+ if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ cur->post_handler(cur, regs, 0);
+ }
+
+ resume_execution(cur, regs);
+ regs->msr |= kcb->kprobe_saved_msr;
+
+ /*Restore back the original saved kprobes variables and continue. */
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
+ goto out;
+ }
+ reset_current_kprobe();
+out:
+ preempt_enable_no_resched();
+
+ /*
+ * if somebody else is singlestepping across a probe point, msr
+ * will have SE set, in which case, continue the remaining processing
+ * of do_debug, as if this is not a probe hit.
+ */
+ if (regs->msr & MSR_SE)
+ return 0;
+
+ return 1;
+}
+
+static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+ return 1;
+
+ if (kcb->kprobe_status & KPROBE_HIT_SS) {
+ resume_execution(cur, regs);
+ regs->msr &= ~MSR_SE;
+ regs->msr |= kcb->kprobe_saved_msr;
+
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ }
+ return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+
+ switch (val) {
+ case DIE_BPT:
+ if (kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_SSTEP:
+ if (post_kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_PAGE_FAULT:
+ /* kprobe_running() needs smp_processor_id() */
+ preempt_disable();
+ if (kprobe_running() &&
+ kprobe_fault_handler(args->regs, args->trapnr))
+ ret = NOTIFY_STOP;
+ preempt_enable();
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
+
+ /* setup return addr to the jprobe handler routine */
+ regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry);
+ regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
+
+ return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+ asm volatile("trap" ::: "memory");
+}
+
+void __kprobes jprobe_return_end(void)
+{
+};
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ /*
+ * FIXME - we should ideally be validating that we got here 'cos
+ * of the "trap" in jprobe_return() above, before restoring the
+ * saved regs...
+ */
+ memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+ preempt_enable_no_resched();
+ return 1;
+}
+
+static struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+ return register_kprobe(&trampoline_p);
+}
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 1b3ba8a440a..9dda16ccde7 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -42,32 +42,6 @@
/* #define LPARCFG_DEBUG */
-/* find a better place for this function... */
-static void log_plpar_hcall_return(unsigned long rc, char *tag)
-{
- if (rc == 0) /* success, return */
- return;
-/* check for null tag ? */
- if (rc == H_Hardware)
- printk(KERN_INFO
- "plpar-hcall (%s) failed with hardware fault\n", tag);
- else if (rc == H_Function)
- printk(KERN_INFO
- "plpar-hcall (%s) failed; function not allowed\n", tag);
- else if (rc == H_Authority)
- printk(KERN_INFO
- "plpar-hcall (%s) failed; not authorized to this function\n",
- tag);
- else if (rc == H_Parameter)
- printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n",
- tag);
- else
- printk(KERN_INFO
- "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n",
- tag, rc);
-
-}
-
static struct proc_dir_entry *proc_ppc64_lparcfg;
#define LPARCFG_BUFF_SIZE 4096
@@ -172,6 +146,31 @@ static int lparcfg_data(struct seq_file *m, void *v)
/*
* Methods used to fetch LPAR data when running on a pSeries platform.
*/
+/* find a better place for this function... */
+static void log_plpar_hcall_return(unsigned long rc, char *tag)
+{
+ if (rc == 0) /* success, return */
+ return;
+/* check for null tag ? */
+ if (rc == H_Hardware)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed with hardware fault\n", tag);
+ else if (rc == H_Function)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed; function not allowed\n", tag);
+ else if (rc == H_Authority)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed; not authorized to this function\n",
+ tag);
+ else if (rc == H_Parameter)
+ printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n",
+ tag);
+ else
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n",
+ tag, rc);
+
+}
/*
* H_GET_PPP hcall returns info in 4 parms.
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
new file mode 100644
index 00000000000..97c51e452be
--- /dev/null
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -0,0 +1,358 @@
+/*
+ * machine_kexec.c - handle transition of Linux booting another kernel
+ *
+ * Copyright (C) 2004-2005, IBM Corp.
+ *
+ * Created by: Milton D Miller II
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+
+#include <linux/cpumask.h>
+#include <linux/kexec.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/errno.h>
+
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
+#include <asm/paca.h>
+#include <asm/mmu.h>
+#include <asm/sections.h> /* _end */
+#include <asm/prom.h>
+#include <asm/smp.h>
+
+#define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */
+
+/* Have this around till we move it into crash specific file */
+note_buf_t crash_notes[NR_CPUS];
+
+/* Dummy for now. Not sure if we need to have a crash shutdown in here
+ * and if what it will achieve. Letting it be now to compile the code
+ * in generic kexec environment
+ */
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ /* do nothing right now */
+ /* smp_relase_cpus() if we want smp on panic kernel */
+ /* cpu_irq_down to isolate us until we are ready */
+}
+
+int machine_kexec_prepare(struct kimage *image)
+{
+ int i;
+ unsigned long begin, end; /* limits of segment */
+ unsigned long low, high; /* limits of blocked memory range */
+ struct device_node *node;
+ unsigned long *basep;
+ unsigned int *sizep;
+
+ if (!ppc_md.hpte_clear_all)
+ return -ENOENT;
+
+ /*
+ * Since we use the kernel fault handlers and paging code to
+ * handle the virtual mode, we must make sure no destination
+ * overlaps kernel static data or bss.
+ */
+ for (i = 0; i < image->nr_segments; i++)
+ if (image->segment[i].mem < __pa(_end))
+ return -ETXTBSY;
+
+ /*
+ * For non-LPAR, we absolutely can not overwrite the mmu hash
+ * table, since we are still using the bolted entries in it to
+ * do the copy. Check that here.
+ *
+ * It is safe if the end is below the start of the blocked
+ * region (end <= low), or if the beginning is after the
+ * end of the blocked region (begin >= high). Use the
+ * boolean identity !(a || b) === (!a && !b).
+ */
+ if (htab_address) {
+ low = __pa(htab_address);
+ high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE;
+
+ for (i = 0; i < image->nr_segments; i++) {
+ begin = image->segment[i].mem;
+ end = begin + image->segment[i].memsz;
+
+ if ((begin < high) && (end > low))
+ return -ETXTBSY;
+ }
+ }
+
+ /* We also should not overwrite the tce tables */
+ for (node = of_find_node_by_type(NULL, "pci"); node != NULL;
+ node = of_find_node_by_type(node, "pci")) {
+ basep = (unsigned long *)get_property(node, "linux,tce-base",
+ NULL);
+ sizep = (unsigned int *)get_property(node, "linux,tce-size",
+ NULL);
+ if (basep == NULL || sizep == NULL)
+ continue;
+
+ low = *basep;
+ high = low + (*sizep);
+
+ for (i = 0; i < image->nr_segments; i++) {
+ begin = image->segment[i].mem;
+ end = begin + image->segment[i].memsz;
+
+ if ((begin < high) && (end > low))
+ return -ETXTBSY;
+ }
+ }
+
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+ /* we do nothing in prepare that needs to be undone */
+}
+
+#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
+
+static void copy_segments(unsigned long ind)
+{
+ unsigned long entry;
+ unsigned long *ptr;
+ void *dest;
+ void *addr;
+
+ /*
+ * We rely on kexec_load to create a lists that properly
+ * initializes these pointers before they are used.
+ * We will still crash if the list is wrong, but at least
+ * the compiler will be quiet.
+ */
+ ptr = NULL;
+ dest = NULL;
+
+ for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
+ addr = __va(entry & PAGE_MASK);
+
+ switch (entry & IND_FLAGS) {
+ case IND_DESTINATION:
+ dest = addr;
+ break;
+ case IND_INDIRECTION:
+ ptr = addr;
+ break;
+ case IND_SOURCE:
+ copy_page(dest, addr);
+ dest += PAGE_SIZE;
+ }
+ }
+}
+
+void kexec_copy_flush(struct kimage *image)
+{
+ long i, nr_segments = image->nr_segments;
+ struct kexec_segment ranges[KEXEC_SEGMENT_MAX];
+
+ /* save the ranges on the stack to efficiently flush the icache */
+ memcpy(ranges, image->segment, sizeof(ranges));
+
+ /*
+ * After this call we may not use anything allocated in dynamic
+ * memory, including *image.
+ *
+ * Only globals and the stack are allowed.
+ */
+ copy_segments(image->head);
+
+ /*
+ * we need to clear the icache for all dest pages sometime,
+ * including ones that were in place on the original copy
+ */
+ for (i = 0; i < nr_segments; i++)
+ flush_icache_range(ranges[i].mem + KERNELBASE,
+ ranges[i].mem + KERNELBASE +
+ ranges[i].memsz);
+}
+
+#ifdef CONFIG_SMP
+
+/* FIXME: we should schedule this function to be called on all cpus based
+ * on calling the interrupts, but we would like to call it off irq level
+ * so that the interrupt controller is clean.
+ */
+void kexec_smp_down(void *arg)
+{
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0, 1);
+
+ local_irq_disable();
+ kexec_smp_wait();
+ /* NOTREACHED */
+}
+
+static void kexec_prepare_cpus(void)
+{
+ int my_cpu, i, notified=-1;
+
+ smp_call_function(kexec_smp_down, NULL, 0, /* wait */0);
+ my_cpu = get_cpu();
+
+ /* check the others cpus are now down (via paca hw cpu id == -1) */
+ for (i=0; i < NR_CPUS; i++) {
+ if (i == my_cpu)
+ continue;
+
+ while (paca[i].hw_cpu_id != -1) {
+ barrier();
+ if (!cpu_possible(i)) {
+ printk("kexec: cpu %d hw_cpu_id %d is not"
+ " possible, ignoring\n",
+ i, paca[i].hw_cpu_id);
+ break;
+ }
+ if (!cpu_online(i)) {
+ /* Fixme: this can be spinning in
+ * pSeries_secondary_wait with a paca
+ * waiting for it to go online.
+ */
+ printk("kexec: cpu %d hw_cpu_id %d is not"
+ " online, ignoring\n",
+ i, paca[i].hw_cpu_id);
+ break;
+ }
+ if (i != notified) {
+ printk( "kexec: waiting for cpu %d (physical"
+ " %d) to go down\n",
+ i, paca[i].hw_cpu_id);
+ notified = i;
+ }
+ }
+ }
+
+ /* after we tell the others to go down */
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0, 0);
+
+ put_cpu();
+
+ local_irq_disable();
+}
+
+#else /* ! SMP */
+
+static void kexec_prepare_cpus(void)
+{
+ /*
+ * move the secondarys to us so that we can copy
+ * the new kernel 0-0x100 safely
+ *
+ * do this if kexec in setup.c ?
+ *
+ * We need to release the cpus if we are ever going from an
+ * UP to an SMP kernel.
+ */
+ smp_release_cpus();
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0, 0);
+ local_irq_disable();
+}
+
+#endif /* SMP */
+
+/*
+ * kexec thread structure and stack.
+ *
+ * We need to make sure that this is 16384-byte aligned due to the
+ * way process stacks are handled. It also must be statically allocated
+ * or allocated as part of the kimage, because everything else may be
+ * overwritten when we copy the kexec image. We piggyback on the
+ * "init_task" linker section here to statically allocate a stack.
+ *
+ * We could use a smaller stack if we don't care about anything using
+ * current, but that audit has not been performed.
+ */
+union thread_union kexec_stack
+ __attribute__((__section__(".data.init_task"))) = { };
+
+/* Our assembly helper, in kexec_stub.S */
+extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
+ void *image, void *control,
+ void (*clear_all)(void)) ATTRIB_NORET;
+
+/* too late to fail here */
+void machine_kexec(struct kimage *image)
+{
+
+ /* prepare control code if any */
+
+ /* shutdown other cpus into our wait loop and quiesce interrupts */
+ kexec_prepare_cpus();
+
+ /* switch to a staticly allocated stack. Based on irq stack code.
+ * XXX: the task struct will likely be invalid once we do the copy!
+ */
+ kexec_stack.thread_info.task = current_thread_info()->task;
+ kexec_stack.thread_info.flags = 0;
+
+ /* Some things are best done in assembly. Finding globals with
+ * a toc is easier in C, so pass in what we can.
+ */
+ kexec_sequence(&kexec_stack, image->start, image,
+ page_address(image->control_code_page),
+ ppc_md.hpte_clear_all);
+ /* NOTREACHED */
+}
+
+/* Values we need to export to the second kernel via the device tree. */
+static unsigned long htab_base, htab_size, kernel_end;
+
+static struct property htab_base_prop = {
+ .name = "linux,htab-base",
+ .length = sizeof(unsigned long),
+ .value = (unsigned char *)&htab_base,
+};
+
+static struct property htab_size_prop = {
+ .name = "linux,htab-size",
+ .length = sizeof(unsigned long),
+ .value = (unsigned char *)&htab_size,
+};
+
+static struct property kernel_end_prop = {
+ .name = "linux,kernel-end",
+ .length = sizeof(unsigned long),
+ .value = (unsigned char *)&kernel_end,
+};
+
+static void __init export_htab_values(void)
+{
+ struct device_node *node;
+
+ node = of_find_node_by_path("/chosen");
+ if (!node)
+ return;
+
+ kernel_end = __pa(_end);
+ prom_add_property(node, &kernel_end_prop);
+
+ /* On machines with no htab htab_address is NULL */
+ if (NULL == htab_address)
+ goto out;
+
+ htab_base = __pa(htab_address);
+ prom_add_property(node, &htab_base_prop);
+
+ htab_size = 1UL << ppc64_pft_size;
+ prom_add_property(node, &htab_size_prop);
+
+ out:
+ of_node_put(node);
+}
+
+void __init kexec_setup(void)
+{
+ export_htab_values();
+}
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index f6d84a75ed2..624a983a967 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -27,14 +27,6 @@
.text
- .align 5
-_GLOBAL(__delay)
- cmpwi 0,r3,0
- mtctr r3
- beqlr
-1: bdnz 1b
- blr
-
/*
* This returns the high 64 bits of the product of two 64-bit numbers.
*/
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
new file mode 100644
index 00000000000..928b8581fcb
--- /dev/null
+++ b/arch/powerpc/kernel/module_64.c
@@ -0,0 +1,455 @@
+/* Kernel module help for PPC64.
+ Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <linux/module.h>
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+#include <asm/module.h>
+#include <asm/uaccess.h>
+
+/* FIXME: We don't do .init separately. To do this, we'd need to have
+ a separate r2 value in the init and core section, and stub between
+ them, too.
+
+ Using a magic allocator which places modules within 32MB solves
+ this, and makes other things simpler. Anton?
+ --RR. */
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt , ...)
+#endif
+
+/* There's actually a third entry here, but it's unused */
+struct ppc64_opd_entry
+{
+ unsigned long funcaddr;
+ unsigned long r2;
+};
+
+/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
+ the kernel itself). But on PPC64, these need to be used for every
+ jump, actually, to reset r2 (TOC+0x8000). */
+struct ppc64_stub_entry
+{
+ /* 28 byte jump instruction sequence (7 instructions) */
+ unsigned char jump[28];
+ unsigned char unused[4];
+ /* Data for the above code */
+ struct ppc64_opd_entry opd;
+};
+
+/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external)
+ function which may be more than 24-bits away. We could simply
+ patch the new r2 value and function pointer into the stub, but it's
+ significantly shorter to put these values at the end of the stub
+ code, and patch the stub address (32-bits relative to the TOC ptr,
+ r2) into the stub. */
+static struct ppc64_stub_entry ppc64_stub =
+{ .jump = {
+ 0x3d, 0x82, 0x00, 0x00, /* addis r12,r2, <high> */
+ 0x39, 0x8c, 0x00, 0x00, /* addi r12,r12, <low> */
+ /* Save current r2 value in magic place on the stack. */
+ 0xf8, 0x41, 0x00, 0x28, /* std r2,40(r1) */
+ 0xe9, 0x6c, 0x00, 0x20, /* ld r11,32(r12) */
+ 0xe8, 0x4c, 0x00, 0x28, /* ld r2,40(r12) */
+ 0x7d, 0x69, 0x03, 0xa6, /* mtctr r11 */
+ 0x4e, 0x80, 0x04, 0x20 /* bctr */
+} };
+
+/* Count how many different 24-bit relocations (different symbol,
+ different addend) */
+static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
+{
+ unsigned int i, j, ret = 0;
+
+ /* FIXME: Only count external ones --RR */
+ /* Sure, this is order(n^2), but it's usually short, and not
+ time critical */
+ for (i = 0; i < num; i++) {
+ /* Only count 24-bit relocs, others don't need stubs */
+ if (ELF64_R_TYPE(rela[i].r_info) != R_PPC_REL24)
+ continue;
+ for (j = 0; j < i; j++) {
+ /* If this addend appeared before, it's
+ already been counted */
+ if (rela[i].r_info == rela[j].r_info
+ && rela[i].r_addend == rela[j].r_addend)
+ break;
+ }
+ if (j == i) ret++;
+ }
+ return ret;
+}
+
+void *module_alloc(unsigned long size)
+{
+ if (size == 0)
+ return NULL;
+
+ return vmalloc_exec(size);
+}
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+ vfree(module_region);
+ /* FIXME: If module_region == mod->init_region, trim exception
+ table entries. */
+}
+
+/* Get size of potential trampolines required. */
+static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
+ const Elf64_Shdr *sechdrs)
+{
+ /* One extra reloc so it's always 0-funcaddr terminated */
+ unsigned long relocs = 1;
+ unsigned i;
+
+ /* Every relocated section... */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_RELA) {
+ DEBUGP("Found relocations in section %u\n", i);
+ DEBUGP("Ptr: %p. Number: %lu\n",
+ (void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size / sizeof(Elf64_Rela));
+ relocs += count_relocs((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela));
+ }
+ }
+
+ DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
+ return relocs * sizeof(struct ppc64_stub_entry);
+}
+
+static void dedotify_versions(struct modversion_info *vers,
+ unsigned long size)
+{
+ struct modversion_info *end;
+
+ for (end = (void *)vers + size; vers < end; vers++)
+ if (vers->name[0] == '.')
+ memmove(vers->name, vers->name+1, strlen(vers->name));
+}
+
+/* Undefined symbols which refer to .funcname, hack to funcname */
+static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
+{
+ unsigned int i;
+
+ for (i = 1; i < numsyms; i++) {
+ if (syms[i].st_shndx == SHN_UNDEF) {
+ char *name = strtab + syms[i].st_name;
+ if (name[0] == '.')
+ memmove(name, name+1, strlen(name));
+ }
+ }
+}
+
+int module_frob_arch_sections(Elf64_Ehdr *hdr,
+ Elf64_Shdr *sechdrs,
+ char *secstrings,
+ struct module *me)
+{
+ unsigned int i;
+
+ /* Find .toc and .stubs sections, symtab and strtab */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ char *p;
+ if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
+ me->arch.stubs_section = i;
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0)
+ me->arch.toc_section = i;
+ else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
+ dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size);
+
+ /* We don't handle .init for the moment: rename to _init */
+ while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
+ p[0] = '_';
+
+ if (sechdrs[i].sh_type == SHT_SYMTAB)
+ dedotify((void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size / sizeof(Elf64_Sym),
+ (void *)hdr
+ + sechdrs[sechdrs[i].sh_link].sh_offset);
+ }
+ if (!me->arch.stubs_section || !me->arch.toc_section) {
+ printk("%s: doesn't contain .toc or .stubs.\n", me->name);
+ return -ENOEXEC;
+ }
+
+ /* Override the stubs size */
+ sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
+ return 0;
+}
+
+int apply_relocate(Elf64_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n", me->name);
+ return -ENOEXEC;
+}
+
+/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
+ gives the value maximum span in an instruction which uses a signed
+ offset) */
+static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
+{
+ return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
+}
+
+/* Both low and high 16 bits are added as SIGNED additions, so if low
+ 16 bits has high bit set, high 16 bits must be adjusted. These
+ macros do that (stolen from binutils). */
+#define PPC_LO(v) ((v) & 0xffff)
+#define PPC_HI(v) (((v) >> 16) & 0xffff)
+#define PPC_HA(v) PPC_HI ((v) + 0x8000)
+
+/* Patch stub to reference function and correct r2 value. */
+static inline int create_stub(Elf64_Shdr *sechdrs,
+ struct ppc64_stub_entry *entry,
+ struct ppc64_opd_entry *opd,
+ struct module *me)
+{
+ Elf64_Half *loc1, *loc2;
+ long reladdr;
+
+ *entry = ppc64_stub;
+
+ loc1 = (Elf64_Half *)&entry->jump[2];
+ loc2 = (Elf64_Half *)&entry->jump[6];
+
+ /* Stub uses address relative to r2. */
+ reladdr = (unsigned long)entry - my_r2(sechdrs, me);
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ printk("%s: Address %p of stub out of range of %p.\n",
+ me->name, (void *)reladdr, (void *)my_r2);
+ return 0;
+ }
+ DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr);
+
+ *loc1 = PPC_HA(reladdr);
+ *loc2 = PPC_LO(reladdr);
+ entry->opd.funcaddr = opd->funcaddr;
+ entry->opd.r2 = opd->r2;
+ return 1;
+}
+
+/* Create stub to jump to function described in this OPD: we need the
+ stub to set up the TOC ptr (r2) for the function. */
+static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
+ unsigned long opdaddr,
+ struct module *me)
+{
+ struct ppc64_stub_entry *stubs;
+ struct ppc64_opd_entry *opd = (void *)opdaddr;
+ unsigned int i, num_stubs;
+
+ num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs);
+
+ /* Find this stub, or if that fails, the next avail. entry */
+ stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
+ for (i = 0; stubs[i].opd.funcaddr; i++) {
+ BUG_ON(i >= num_stubs);
+
+ if (stubs[i].opd.funcaddr == opd->funcaddr)
+ return (unsigned long)&stubs[i];
+ }
+
+ if (!create_stub(sechdrs, &stubs[i], opd, me))
+ return 0;
+
+ return (unsigned long)&stubs[i];
+}
+
+/* We expect a noop next: if it is, replace it with instruction to
+ restore r2. */
+static int restore_r2(u32 *instruction, struct module *me)
+{
+ if (*instruction != 0x60000000) {
+ printk("%s: Expect noop after relocate, got %08x\n",
+ me->name, *instruction);
+ return 0;
+ }
+ *instruction = 0xe8410028; /* ld r2,40(r1) */
+ return 1;
+}
+
+int apply_relocate_add(Elf64_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ unsigned int i;
+ Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr;
+ Elf64_Sym *sym;
+ unsigned long *location;
+ unsigned long value;
+
+ DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
+ sechdrs[relsec].sh_info);
+ for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
+ /* This is where to make the change */
+ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+ + rela[i].r_offset;
+ /* This is the symbol it is referring to */
+ sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ + ELF64_R_SYM(rela[i].r_info);
+
+ DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n",
+ location, (long)ELF64_R_TYPE(rela[i].r_info),
+ strtab + sym->st_name, (unsigned long)sym->st_value,
+ (long)rela[i].r_addend);
+
+ /* `Everything is relative'. */
+ value = sym->st_value + rela[i].r_addend;
+
+ switch (ELF64_R_TYPE(rela[i].r_info)) {
+ case R_PPC64_ADDR32:
+ /* Simply set it */
+ *(u32 *)location = value;
+ break;
+
+ case R_PPC64_ADDR64:
+ /* Simply set it */
+ *(unsigned long *)location = value;
+ break;
+
+ case R_PPC64_TOC:
+ *(unsigned long *)location = my_r2(sechdrs, me);
+ break;
+
+ case R_PPC64_TOC16:
+ /* Subtact TOC pointer */
+ value -= my_r2(sechdrs, me);
+ if (value + 0x8000 > 0xffff) {
+ printk("%s: bad TOC16 relocation (%lu)\n",
+ me->name, value);
+ return -ENOEXEC;
+ }
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
+ case R_PPC64_TOC16_DS:
+ /* Subtact TOC pointer */
+ value -= my_r2(sechdrs, me);
+ if ((value & 3) != 0 || value + 0x8000 > 0xffff) {
+ printk("%s: bad TOC16_DS relocation (%lu)\n",
+ me->name, value);
+ return -ENOEXEC;
+ }
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xfffc)
+ | (value & 0xfffc);
+ break;
+
+ case R_PPC_REL24:
+ /* FIXME: Handle weak symbols here --RR */
+ if (sym->st_shndx == SHN_UNDEF) {
+ /* External: go via stub */
+ value = stub_for_addr(sechdrs, value, me);
+ if (!value)
+ return -ENOENT;
+ if (!restore_r2((u32 *)location + 1, me))
+ return -ENOEXEC;
+ }
+
+ /* Convert value to relative */
+ value -= (unsigned long)location;
+ if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){
+ printk("%s: REL24 %li out of range!\n",
+ me->name, (long int)value);
+ return -ENOEXEC;
+ }
+
+ /* Only replace bits 2 through 26 */
+ *(uint32_t *)location
+ = (*(uint32_t *)location & ~0x03fffffc)
+ | (value & 0x03fffffc);
+ break;
+
+ default:
+ printk("%s: Unknown ADD relocation: %lu\n",
+ me->name,
+ (unsigned long)ELF64_R_TYPE(rela[i].r_info));
+ return -ENOEXEC;
+ }
+ }
+
+ return 0;
+}
+
+LIST_HEAD(module_bug_list);
+
+int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs, struct module *me)
+{
+ char *secstrings;
+ unsigned int i;
+
+ me->arch.bug_table = NULL;
+ me->arch.num_bugs = 0;
+
+ /* Find the __bug_table section, if present */
+ secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table"))
+ continue;
+ me->arch.bug_table = (void *) sechdrs[i].sh_addr;
+ me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry);
+ break;
+ }
+
+ /*
+ * Strictly speaking this should have a spinlock to protect against
+ * traversals, but since we only traverse on BUG()s, a spinlock
+ * could potentially lead to deadlock and thus be counter-productive.
+ */
+ list_add(&me->arch.bug_list, &module_bug_list);
+
+ return 0;
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+ list_del(&mod->arch.bug_list);
+}
+
+struct bug_entry *module_find_bug(unsigned long bugaddr)
+{
+ struct mod_arch_specific *mod;
+ unsigned int i;
+ struct bug_entry *bug;
+
+ list_for_each_entry(mod, &module_bug_list, bug_list) {
+ bug = mod->bug_table;
+ for (i = 0; i < mod->num_bugs; ++i, ++bug)
+ if (bugaddr == bug->bug_addr)
+ return bug;
+ }
+ return NULL;
+}
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
new file mode 100644
index 00000000000..c0fcd29918c
--- /dev/null
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -0,0 +1,742 @@
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * /dev/nvram driver for PPC64
+ *
+ * This perhaps should live in drivers/char
+ *
+ * TODO: Split the /dev/nvram part (that one can use
+ * drivers/char/generic_nvram.c) from the arch & partition
+ * parsing code.
+ */
+
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/fcntl.h>
+#include <linux/nvram.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+#include <asm/nvram.h>
+#include <asm/rtas.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+
+#undef DEBUG_NVRAM
+
+static int nvram_scan_partitions(void);
+static int nvram_setup_partition(void);
+static int nvram_create_os_partition(void);
+static int nvram_remove_os_partition(void);
+
+static struct nvram_partition * nvram_part;
+static long nvram_error_log_index = -1;
+static long nvram_error_log_size = 0;
+
+int no_logging = 1; /* Until we initialize everything,
+ * make sure we don't try logging
+ * anything */
+
+extern volatile int error_log_cnt;
+
+struct err_log_info {
+ int error_type;
+ unsigned int seq_num;
+};
+
+static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin)
+{
+ int size;
+
+ if (ppc_md.nvram_size == NULL)
+ return -ENODEV;
+ size = ppc_md.nvram_size();
+
+ switch (origin) {
+ case 1:
+ offset += file->f_pos;
+ break;
+ case 2:
+ offset += size;
+ break;
+ }
+ if (offset < 0)
+ return -EINVAL;
+ file->f_pos = offset;
+ return file->f_pos;
+}
+
+
+static ssize_t dev_nvram_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ ssize_t len;
+ char *tmp_buffer;
+ int size;
+
+ if (ppc_md.nvram_size == NULL)
+ return -ENODEV;
+ size = ppc_md.nvram_size();
+
+ if (!access_ok(VERIFY_WRITE, buf, count))
+ return -EFAULT;
+ if (*ppos >= size)
+ return 0;
+ if (count > size)
+ count = size;
+
+ tmp_buffer = (char *) kmalloc(count, GFP_KERNEL);
+ if (!tmp_buffer) {
+ printk(KERN_ERR "dev_read_nvram: kmalloc failed\n");
+ return -ENOMEM;
+ }
+
+ len = ppc_md.nvram_read(tmp_buffer, count, ppos);
+ if ((long)len <= 0) {
+ kfree(tmp_buffer);
+ return len;
+ }
+
+ if (copy_to_user(buf, tmp_buffer, len)) {
+ kfree(tmp_buffer);
+ return -EFAULT;
+ }
+
+ kfree(tmp_buffer);
+ return len;
+
+}
+
+static ssize_t dev_nvram_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ ssize_t len;
+ char * tmp_buffer;
+ int size;
+
+ if (ppc_md.nvram_size == NULL)
+ return -ENODEV;
+ size = ppc_md.nvram_size();
+
+ if (!access_ok(VERIFY_READ, buf, count))
+ return -EFAULT;
+ if (*ppos >= size)
+ return 0;
+ if (count > size)
+ count = size;
+
+ tmp_buffer = (char *) kmalloc(count, GFP_KERNEL);
+ if (!tmp_buffer) {
+ printk(KERN_ERR "dev_nvram_write: kmalloc failed\n");
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(tmp_buffer, buf, count)) {
+ kfree(tmp_buffer);
+ return -EFAULT;
+ }
+
+ len = ppc_md.nvram_write(tmp_buffer, count, ppos);
+ if ((long)len <= 0) {
+ kfree(tmp_buffer);
+ return len;
+ }
+
+ kfree(tmp_buffer);
+ return len;
+}
+
+static int dev_nvram_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ switch(cmd) {
+#ifdef CONFIG_PPC_PMAC
+ case OBSOLETE_PMAC_NVRAM_GET_OFFSET:
+ printk(KERN_WARNING "nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n");
+ case IOC_NVRAM_GET_OFFSET: {
+ int part, offset;
+
+ if (_machine != PLATFORM_POWERMAC)
+ return -EINVAL;
+ if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
+ return -EFAULT;
+ if (part < pmac_nvram_OF || part > pmac_nvram_NR)
+ return -EINVAL;
+ offset = pmac_get_partition(part);
+ if (offset < 0)
+ return offset;
+ if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0)
+ return -EFAULT;
+ return 0;
+ }
+#endif /* CONFIG_PPC_PMAC */
+ }
+ return -EINVAL;
+}
+
+struct file_operations nvram_fops = {
+ .owner = THIS_MODULE,
+ .llseek = dev_nvram_llseek,
+ .read = dev_nvram_read,
+ .write = dev_nvram_write,
+ .ioctl = dev_nvram_ioctl,
+};
+
+static struct miscdevice nvram_dev = {
+ NVRAM_MINOR,
+ "nvram",
+ &nvram_fops
+};
+
+
+#ifdef DEBUG_NVRAM
+static void nvram_print_partitions(char * label)
+{
+ struct list_head * p;
+ struct nvram_partition * tmp_part;
+
+ printk(KERN_WARNING "--------%s---------\n", label);
+ printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n");
+ list_for_each(p, &nvram_part->partition) {
+ tmp_part = list_entry(p, struct nvram_partition, partition);
+ printk(KERN_WARNING "%d \t%02x\t%02x\t%d\t%s\n",
+ tmp_part->index, tmp_part->header.signature,
+ tmp_part->header.checksum, tmp_part->header.length,
+ tmp_part->header.name);
+ }
+}
+#endif
+
+
+static int nvram_write_header(struct nvram_partition * part)
+{
+ loff_t tmp_index;
+ int rc;
+
+ tmp_index = part->index;
+ rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index);
+
+ return rc;
+}
+
+
+static unsigned char nvram_checksum(struct nvram_header *p)
+{
+ unsigned int c_sum, c_sum2;
+ unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */
+ c_sum = p->signature + p->length + sp[0] + sp[1] + sp[2] + sp[3] + sp[4] + sp[5];
+
+ /* The sum may have spilled into the 3rd byte. Fold it back. */
+ c_sum = ((c_sum & 0xffff) + (c_sum >> 16)) & 0xffff;
+ /* The sum cannot exceed 2 bytes. Fold it into a checksum */
+ c_sum2 = (c_sum >> 8) + (c_sum << 8);
+ c_sum = ((c_sum + c_sum2) >> 8) & 0xff;
+ return c_sum;
+}
+
+
+/*
+ * Find an nvram partition, sig can be 0 for any
+ * partition or name can be NULL for any name, else
+ * tries to match both
+ */
+struct nvram_partition *nvram_find_partition(int sig, const char *name)
+{
+ struct nvram_partition * part;
+ struct list_head * p;
+
+ list_for_each(p, &nvram_part->partition) {
+ part = list_entry(p, struct nvram_partition, partition);
+
+ if (sig && part->header.signature != sig)
+ continue;
+ if (name && 0 != strncmp(name, part->header.name, 12))
+ continue;
+ return part;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(nvram_find_partition);
+
+
+static int nvram_remove_os_partition(void)
+{
+ struct list_head *i;
+ struct list_head *j;
+ struct nvram_partition * part;
+ struct nvram_partition * cur_part;
+ int rc;
+
+ list_for_each(i, &nvram_part->partition) {
+ part = list_entry(i, struct nvram_partition, partition);
+ if (part->header.signature != NVRAM_SIG_OS)
+ continue;
+
+ /* Make os partition a free partition */
+ part->header.signature = NVRAM_SIG_FREE;
+ sprintf(part->header.name, "wwwwwwwwwwww");
+ part->header.checksum = nvram_checksum(&part->header);
+
+ /* Merge contiguous free partitions backwards */
+ list_for_each_prev(j, &part->partition) {
+ cur_part = list_entry(j, struct nvram_partition, partition);
+ if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) {
+ break;
+ }
+
+ part->header.length += cur_part->header.length;
+ part->header.checksum = nvram_checksum(&part->header);
+ part->index = cur_part->index;
+
+ list_del(&cur_part->partition);
+ kfree(cur_part);
+ j = &part->partition; /* fixup our loop */
+ }
+
+ /* Merge contiguous free partitions forwards */
+ list_for_each(j, &part->partition) {
+ cur_part = list_entry(j, struct nvram_partition, partition);
+ if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) {
+ break;
+ }
+
+ part->header.length += cur_part->header.length;
+ part->header.checksum = nvram_checksum(&part->header);
+
+ list_del(&cur_part->partition);
+ kfree(cur_part);
+ j = &part->partition; /* fixup our loop */
+ }
+
+ rc = nvram_write_header(part);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_remove_os_partition: nvram_write failed (%d)\n", rc);
+ return rc;
+ }
+
+ }
+
+ return 0;
+}
+
+/* nvram_create_os_partition
+ *
+ * Create a OS linux partition to buffer error logs.
+ * Will create a partition starting at the first free
+ * space found if space has enough room.
+ */
+static int nvram_create_os_partition(void)
+{
+ struct nvram_partition *part;
+ struct nvram_partition *new_part;
+ struct nvram_partition *free_part = NULL;
+ int seq_init[2] = { 0, 0 };
+ loff_t tmp_index;
+ long size = 0;
+ int rc;
+
+ /* Find a free partition that will give us the maximum needed size
+ If can't find one that will give us the minimum size needed */
+ list_for_each_entry(part, &nvram_part->partition, partition) {
+ if (part->header.signature != NVRAM_SIG_FREE)
+ continue;
+
+ if (part->header.length >= NVRAM_MAX_REQ) {
+ size = NVRAM_MAX_REQ;
+ free_part = part;
+ break;
+ }
+ if (!size && part->header.length >= NVRAM_MIN_REQ) {
+ size = NVRAM_MIN_REQ;
+ free_part = part;
+ }
+ }
+ if (!size)
+ return -ENOSPC;
+
+ /* Create our OS partition */
+ new_part = kmalloc(sizeof(*new_part), GFP_KERNEL);
+ if (!new_part) {
+ printk(KERN_ERR "nvram_create_os_partition: kmalloc failed\n");
+ return -ENOMEM;
+ }
+
+ new_part->index = free_part->index;
+ new_part->header.signature = NVRAM_SIG_OS;
+ new_part->header.length = size;
+ strcpy(new_part->header.name, "ppc64,linux");
+ new_part->header.checksum = nvram_checksum(&new_part->header);
+
+ rc = nvram_write_header(new_part);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_create_os_partition: nvram_write_header \
+ failed (%d)\n", rc);
+ return rc;
+ }
+
+ /* make sure and initialize to zero the sequence number and the error
+ type logged */
+ tmp_index = new_part->index + NVRAM_HEADER_LEN;
+ rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_create_os_partition: nvram_write "
+ "failed (%d)\n", rc);
+ return rc;
+ }
+
+ nvram_error_log_index = new_part->index + NVRAM_HEADER_LEN;
+ nvram_error_log_size = ((part->header.length - 1) *
+ NVRAM_BLOCK_LEN) - sizeof(struct err_log_info);
+
+ list_add_tail(&new_part->partition, &free_part->partition);
+
+ if (free_part->header.length <= size) {
+ list_del(&free_part->partition);
+ kfree(free_part);
+ return 0;
+ }
+
+ /* Adjust the partition we stole the space from */
+ free_part->index += size * NVRAM_BLOCK_LEN;
+ free_part->header.length -= size;
+ free_part->header.checksum = nvram_checksum(&free_part->header);
+
+ rc = nvram_write_header(free_part);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_create_os_partition: nvram_write_header "
+ "failed (%d)\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+
+/* nvram_setup_partition
+ *
+ * This will setup the partition we need for buffering the
+ * error logs and cleanup partitions if needed.
+ *
+ * The general strategy is the following:
+ * 1.) If there is ppc64,linux partition large enough then use it.
+ * 2.) If there is not a ppc64,linux partition large enough, search
+ * for a free partition that is large enough.
+ * 3.) If there is not a free partition large enough remove
+ * _all_ OS partitions and consolidate the space.
+ * 4.) Will first try getting a chunk that will satisfy the maximum
+ * error log size (NVRAM_MAX_REQ).
+ * 5.) If the max chunk cannot be allocated then try finding a chunk
+ * that will satisfy the minum needed (NVRAM_MIN_REQ).
+ */
+static int nvram_setup_partition(void)
+{
+ struct list_head * p;
+ struct nvram_partition * part;
+ int rc;
+
+ /* For now, we don't do any of this on pmac, until I
+ * have figured out if it's worth killing some unused stuffs
+ * in our nvram, as Apple defined partitions use pretty much
+ * all of the space
+ */
+ if (_machine == PLATFORM_POWERMAC)
+ return -ENOSPC;
+
+ /* see if we have an OS partition that meets our needs.
+ will try getting the max we need. If not we'll delete
+ partitions and try again. */
+ list_for_each(p, &nvram_part->partition) {
+ part = list_entry(p, struct nvram_partition, partition);
+ if (part->header.signature != NVRAM_SIG_OS)
+ continue;
+
+ if (strcmp(part->header.name, "ppc64,linux"))
+ continue;
+
+ if (part->header.length >= NVRAM_MIN_REQ) {
+ /* found our partition */
+ nvram_error_log_index = part->index + NVRAM_HEADER_LEN;
+ nvram_error_log_size = ((part->header.length - 1) *
+ NVRAM_BLOCK_LEN) - sizeof(struct err_log_info);
+ return 0;
+ }
+ }
+
+ /* try creating a partition with the free space we have */
+ rc = nvram_create_os_partition();
+ if (!rc) {
+ return 0;
+ }
+
+ /* need to free up some space */
+ rc = nvram_remove_os_partition();
+ if (rc) {
+ return rc;
+ }
+
+ /* create a partition in this new space */
+ rc = nvram_create_os_partition();
+ if (rc) {
+ printk(KERN_ERR "nvram_create_os_partition: Could not find a "
+ "NVRAM partition large enough\n");
+ return rc;
+ }
+
+ return 0;
+}
+
+
+static int nvram_scan_partitions(void)
+{
+ loff_t cur_index = 0;
+ struct nvram_header phead;
+ struct nvram_partition * tmp_part;
+ unsigned char c_sum;
+ char * header;
+ int total_size;
+ int err;
+
+ if (ppc_md.nvram_size == NULL)
+ return -ENODEV;
+ total_size = ppc_md.nvram_size();
+
+ header = (char *) kmalloc(NVRAM_HEADER_LEN, GFP_KERNEL);
+ if (!header) {
+ printk(KERN_ERR "nvram_scan_partitions: Failed kmalloc\n");
+ return -ENOMEM;
+ }
+
+ while (cur_index < total_size) {
+
+ err = ppc_md.nvram_read(header, NVRAM_HEADER_LEN, &cur_index);
+ if (err != NVRAM_HEADER_LEN) {
+ printk(KERN_ERR "nvram_scan_partitions: Error parsing "
+ "nvram partitions\n");
+ goto out;
+ }
+
+ cur_index -= NVRAM_HEADER_LEN; /* nvram_read will advance us */
+
+ memcpy(&phead, header, NVRAM_HEADER_LEN);
+
+ err = 0;
+ c_sum = nvram_checksum(&phead);
+ if (c_sum != phead.checksum) {
+ printk(KERN_WARNING "WARNING: nvram partition checksum"
+ " was %02x, should be %02x!\n",
+ phead.checksum, c_sum);
+ printk(KERN_WARNING "Terminating nvram partition scan\n");
+ goto out;
+ }
+ if (!phead.length) {
+ printk(KERN_WARNING "WARNING: nvram corruption "
+ "detected: 0-length partition\n");
+ goto out;
+ }
+ tmp_part = (struct nvram_partition *)
+ kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+ err = -ENOMEM;
+ if (!tmp_part) {
+ printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");
+ goto out;
+ }
+
+ memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN);
+ tmp_part->index = cur_index;
+ list_add_tail(&tmp_part->partition, &nvram_part->partition);
+
+ cur_index += phead.length * NVRAM_BLOCK_LEN;
+ }
+ err = 0;
+
+ out:
+ kfree(header);
+ return err;
+}
+
+static int __init nvram_init(void)
+{
+ int error;
+ int rc;
+
+ if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
+ return -ENODEV;
+
+ rc = misc_register(&nvram_dev);
+ if (rc != 0) {
+ printk(KERN_ERR "nvram_init: failed to register device\n");
+ return rc;
+ }
+
+ /* initialize our anchor for the nvram partition list */
+ nvram_part = (struct nvram_partition *) kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+ if (!nvram_part) {
+ printk(KERN_ERR "nvram_init: Failed kmalloc\n");
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(&nvram_part->partition);
+
+ /* Get all the NVRAM partitions */
+ error = nvram_scan_partitions();
+ if (error) {
+ printk(KERN_ERR "nvram_init: Failed nvram_scan_partitions\n");
+ return error;
+ }
+
+ if(nvram_setup_partition())
+ printk(KERN_WARNING "nvram_init: Could not find nvram partition"
+ " for nvram buffered error logging.\n");
+
+#ifdef DEBUG_NVRAM
+ nvram_print_partitions("NVRAM Partitions");
+#endif
+
+ return rc;
+}
+
+void __exit nvram_cleanup(void)
+{
+ misc_deregister( &nvram_dev );
+}
+
+
+#ifdef CONFIG_PPC_PSERIES
+
+/* nvram_write_error_log
+ *
+ * We need to buffer the error logs into nvram to ensure that we have
+ * the failure information to decode. If we have a severe error there
+ * is no way to guarantee that the OS or the machine is in a state to
+ * get back to user land and write the error to disk. For example if
+ * the SCSI device driver causes a Machine Check by writing to a bad
+ * IO address, there is no way of guaranteeing that the device driver
+ * is in any state that is would also be able to write the error data
+ * captured to disk, thus we buffer it in NVRAM for analysis on the
+ * next boot.
+ *
+ * In NVRAM the partition containing the error log buffer will looks like:
+ * Header (in bytes):
+ * +-----------+----------+--------+------------+------------------+
+ * | signature | checksum | length | name | data |
+ * |0 |1 |2 3|4 15|16 length-1|
+ * +-----------+----------+--------+------------+------------------+
+ *
+ * The 'data' section would look like (in bytes):
+ * +--------------+------------+-----------------------------------+
+ * | event_logged | sequence # | error log |
+ * |0 3|4 7|8 nvram_error_log_size-1|
+ * +--------------+------------+-----------------------------------+
+ *
+ * event_logged: 0 if event has not been logged to syslog, 1 if it has
+ * sequence #: The unique sequence # for each event. (until it wraps)
+ * error log: The error log from event_scan
+ */
+int nvram_write_error_log(char * buff, int length, unsigned int err_type)
+{
+ int rc;
+ loff_t tmp_index;
+ struct err_log_info info;
+
+ if (no_logging) {
+ return -EPERM;
+ }
+
+ if (nvram_error_log_index == -1) {
+ return -ESPIPE;
+ }
+
+ if (length > nvram_error_log_size) {
+ length = nvram_error_log_size;
+ }
+
+ info.error_type = err_type;
+ info.seq_num = error_log_cnt;
+
+ tmp_index = nvram_error_log_index;
+
+ rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
+ return rc;
+ }
+
+ rc = ppc_md.nvram_write(buff, length, &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+/* nvram_read_error_log
+ *
+ * Reads nvram for error log for at most 'length'
+ */
+int nvram_read_error_log(char * buff, int length, unsigned int * err_type)
+{
+ int rc;
+ loff_t tmp_index;
+ struct err_log_info info;
+
+ if (nvram_error_log_index == -1)
+ return -1;
+
+ if (length > nvram_error_log_size)
+ length = nvram_error_log_size;
+
+ tmp_index = nvram_error_log_index;
+
+ rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+ return rc;
+ }
+
+ rc = ppc_md.nvram_read(buff, length, &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+ return rc;
+ }
+
+ error_log_cnt = info.seq_num;
+ *err_type = info.error_type;
+
+ return 0;
+}
+
+/* This doesn't actually zero anything, but it sets the event_logged
+ * word to tell that this event is safely in syslog.
+ */
+int nvram_clear_error_log(void)
+{
+ loff_t tmp_index;
+ int clear_word = ERR_FLAG_ALREADY_LOGGED;
+ int rc;
+
+ tmp_index = nvram_error_log_index;
+
+ rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+#endif /* CONFIG_PPC_PSERIES */
+
+module_init(nvram_init);
+module_exit(nvram_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
new file mode 100644
index 00000000000..5a5b2468508
--- /dev/null
+++ b/arch/powerpc/kernel/pci_64.c
@@ -0,0 +1,1381 @@
+/*
+ * Port for PPC64 David Engebretsen, IBM Corp.
+ * Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
+ *
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Rework, based on alpha PCI code.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+
+#ifdef DEBUG
+#include <asm/udbg.h>
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+unsigned long pci_probe_only = 1;
+unsigned long pci_assign_all_buses = 0;
+
+/*
+ * legal IO pages under MAX_ISA_PORT. This is to ensure we don't touch
+ * devices we don't have access to.
+ */
+unsigned long io_page_mask;
+
+EXPORT_SYMBOL(io_page_mask);
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+static void fixup_resource(struct resource *res, struct pci_dev *dev);
+static void do_bus_setup(struct pci_bus *bus);
+#endif
+
+unsigned int pcibios_assign_all_busses(void)
+{
+ return pci_assign_all_buses;
+}
+
+/* pci_io_base -- the base address from which io bars are offsets.
+ * This is the lowest I/O base address (so bar values are always positive),
+ * and it *must* be the start of ISA space if an ISA bus exists because
+ * ISA drivers use hard coded offsets. If no ISA bus exists a dummy
+ * page is mapped and isa_io_limit prevents access to it.
+ */
+unsigned long isa_io_base; /* NULL if no ISA bus */
+EXPORT_SYMBOL(isa_io_base);
+unsigned long pci_io_base;
+EXPORT_SYMBOL(pci_io_base);
+
+void iSeries_pcibios_init(void);
+
+LIST_HEAD(hose_list);
+
+struct dma_mapping_ops pci_dma_ops;
+EXPORT_SYMBOL(pci_dma_ops);
+
+int global_phb_number; /* Global phb counter */
+
+/* Cached ISA bridge dev. */
+struct pci_dev *ppc64_isabridge_dev = NULL;
+
+static void fixup_broken_pcnet32(struct pci_dev* dev)
+{
+ if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) {
+ dev->vendor = PCI_VENDOR_ID_AMD;
+ pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD);
+ }
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32);
+
+void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+ struct resource *res)
+{
+ unsigned long offset = 0;
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+ if (!hose)
+ return;
+
+ if (res->flags & IORESOURCE_IO)
+ offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+ if (res->flags & IORESOURCE_MEM)
+ offset = hose->pci_mem_offset;
+
+ region->start = res->start - offset;
+ region->end = res->end - offset;
+}
+
+void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+ struct pci_bus_region *region)
+{
+ unsigned long offset = 0;
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+ if (!hose)
+ return;
+
+ if (res->flags & IORESOURCE_IO)
+ offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+ if (res->flags & IORESOURCE_MEM)
+ offset = hose->pci_mem_offset;
+
+ res->start = region->start + offset;
+ res->end = region->end + offset;
+}
+
+#ifdef CONFIG_HOTPLUG
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+EXPORT_SYMBOL(pcibios_bus_to_resource);
+#endif
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+void pcibios_align_resource(void *data, struct resource *res,
+ unsigned long size, unsigned long align)
+{
+ struct pci_dev *dev = data;
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long start = res->start;
+ unsigned long alignto;
+
+ if (res->flags & IORESOURCE_IO) {
+ unsigned long offset = (unsigned long)hose->io_base_virt -
+ pci_io_base;
+ /* Make sure we start at our min on all hoses */
+ if (start - offset < PCIBIOS_MIN_IO)
+ start = PCIBIOS_MIN_IO + offset;
+
+ /*
+ * Put everything into 0x00-0xff region modulo 0x400
+ */
+ if (start & 0x300)
+ start = (start + 0x3ff) & ~0x3ff;
+
+ } else if (res->flags & IORESOURCE_MEM) {
+ /* Make sure we start at our min on all hoses */
+ if (start - hose->pci_mem_offset < PCIBIOS_MIN_MEM)
+ start = PCIBIOS_MIN_MEM + hose->pci_mem_offset;
+
+ /* Align to multiple of size of minimum base. */
+ alignto = max(0x1000UL, align);
+ start = ALIGN(start, alignto);
+ }
+
+ res->start = start;
+}
+
+static DEFINE_SPINLOCK(hose_spinlock);
+
+/*
+ * pci_controller(phb) initialized common variables.
+ */
+static void __devinit pci_setup_pci_controller(struct pci_controller *hose)
+{
+ memset(hose, 0, sizeof(struct pci_controller));
+
+ spin_lock(&hose_spinlock);
+ hose->global_number = global_phb_number++;
+ list_add_tail(&hose->list_node, &hose_list);
+ spin_unlock(&hose_spinlock);
+}
+
+static void add_linux_pci_domain(struct device_node *dev,
+ struct pci_controller *phb)
+{
+ struct property *of_prop;
+ unsigned int size;
+
+ of_prop = (struct property *)
+ get_property(dev, "linux,pci-domain", &size);
+ if (of_prop != NULL)
+ return;
+ WARN_ON(of_prop && size < sizeof(int));
+ if (of_prop && size < sizeof(int))
+ of_prop = NULL;
+ size = sizeof(struct property) + sizeof(int);
+ if (of_prop == NULL) {
+ if (mem_init_done)
+ of_prop = kmalloc(size, GFP_KERNEL);
+ else
+ of_prop = alloc_bootmem(size);
+ }
+ memset(of_prop, 0, sizeof(struct property));
+ of_prop->name = "linux,pci-domain";
+ of_prop->length = sizeof(int);
+ of_prop->value = (unsigned char *)&of_prop[1];
+ *((int *)of_prop->value) = phb->global_number;
+ prom_add_property(dev, of_prop);
+}
+
+struct pci_controller * pcibios_alloc_controller(struct device_node *dev)
+{
+ struct pci_controller *phb;
+
+ if (mem_init_done)
+ phb = kmalloc(sizeof(struct pci_controller), GFP_KERNEL);
+ else
+ phb = alloc_bootmem(sizeof (struct pci_controller));
+ if (phb == NULL)
+ return NULL;
+ pci_setup_pci_controller(phb);
+ phb->arch_data = dev;
+ phb->is_dynamic = mem_init_done;
+ if (dev)
+ add_linux_pci_domain(dev, phb);
+ return phb;
+}
+
+void pcibios_free_controller(struct pci_controller *phb)
+{
+ if (phb->arch_data) {
+ struct device_node *np = phb->arch_data;
+ int *domain = (int *)get_property(np,
+ "linux,pci-domain", NULL);
+ if (domain)
+ *domain = -1;
+ }
+ if (phb->is_dynamic)
+ kfree(phb);
+}
+
+static void __init pcibios_claim_one_bus(struct pci_bus *b)
+{
+ struct pci_dev *dev;
+ struct pci_bus *child_bus;
+
+ list_for_each_entry(dev, &b->devices, bus_list) {
+ int i;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *r = &dev->resource[i];
+
+ if (r->parent || !r->start || !r->flags)
+ continue;
+ pci_claim_resource(dev, i);
+ }
+ }
+
+ list_for_each_entry(child_bus, &b->children, node)
+ pcibios_claim_one_bus(child_bus);
+}
+
+#ifndef CONFIG_PPC_ISERIES
+static void __init pcibios_claim_of_setup(void)
+{
+ struct pci_bus *b;
+
+ list_for_each_entry(b, &pci_root_buses, node)
+ pcibios_claim_one_bus(b);
+}
+#endif
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
+{
+ u32 *prop;
+ int len;
+
+ prop = (u32 *) get_property(np, name, &len);
+ if (prop && len >= 4)
+ return *prop;
+ return def;
+}
+
+static unsigned int pci_parse_of_flags(u32 addr0)
+{
+ unsigned int flags = 0;
+
+ if (addr0 & 0x02000000) {
+ flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
+ flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
+ flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
+ if (addr0 & 0x40000000)
+ flags |= IORESOURCE_PREFETCH
+ | PCI_BASE_ADDRESS_MEM_PREFETCH;
+ } else if (addr0 & 0x01000000)
+ flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+ return flags;
+}
+
+#define GET_64BIT(prop, i) ((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1])
+
+static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev)
+{
+ u64 base, size;
+ unsigned int flags;
+ struct resource *res;
+ u32 *addrs, i;
+ int proplen;
+
+ addrs = (u32 *) get_property(node, "assigned-addresses", &proplen);
+ if (!addrs)
+ return;
+ for (; proplen >= 20; proplen -= 20, addrs += 5) {
+ flags = pci_parse_of_flags(addrs[0]);
+ if (!flags)
+ continue;
+ base = GET_64BIT(addrs, 1);
+ size = GET_64BIT(addrs, 3);
+ if (!size)
+ continue;
+ i = addrs[0] & 0xff;
+ if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
+ res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
+ } else if (i == dev->rom_base_reg) {
+ res = &dev->resource[PCI_ROM_RESOURCE];
+ flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+ } else {
+ printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
+ continue;
+ }
+ res->start = base;
+ res->end = base + size - 1;
+ res->flags = flags;
+ res->name = pci_name(dev);
+ fixup_resource(res, dev);
+ }
+}
+
+struct pci_dev *of_create_pci_dev(struct device_node *node,
+ struct pci_bus *bus, int devfn)
+{
+ struct pci_dev *dev;
+ const char *type;
+
+ dev = kmalloc(sizeof(struct pci_dev), GFP_KERNEL);
+ if (!dev)
+ return NULL;
+ type = get_property(node, "device_type", NULL);
+ if (type == NULL)
+ type = "";
+
+ memset(dev, 0, sizeof(struct pci_dev));
+ dev->bus = bus;
+ dev->sysdata = node;
+ dev->dev.parent = bus->bridge;
+ dev->dev.bus = &pci_bus_type;
+ dev->devfn = devfn;
+ dev->multifunction = 0; /* maybe a lie? */
+
+ dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
+ dev->device = get_int_prop(node, "device-id", 0xffff);
+ dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
+ dev->subsystem_device = get_int_prop(node, "subsystem-id", 0);
+
+ dev->cfg_size = 256; /*pci_cfg_space_size(dev);*/
+
+ sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus),
+ dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ dev->class = get_int_prop(node, "class-code", 0);
+
+ dev->current_state = 4; /* unknown power state */
+
+ if (!strcmp(type, "pci")) {
+ /* a PCI-PCI bridge */
+ dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
+ dev->rom_base_reg = PCI_ROM_ADDRESS1;
+ } else if (!strcmp(type, "cardbus")) {
+ dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
+ } else {
+ dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
+ dev->rom_base_reg = PCI_ROM_ADDRESS;
+ dev->irq = NO_IRQ;
+ if (node->n_intrs > 0) {
+ dev->irq = node->intrs[0].line;
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE,
+ dev->irq);
+ }
+ }
+
+ pci_parse_of_addrs(node, dev);
+
+ pci_device_add(dev, bus);
+
+ /* XXX pci_scan_msi_device(dev); */
+
+ return dev;
+}
+EXPORT_SYMBOL(of_create_pci_dev);
+
+void __devinit of_scan_bus(struct device_node *node,
+ struct pci_bus *bus)
+{
+ struct device_node *child = NULL;
+ u32 *reg;
+ int reglen, devfn;
+ struct pci_dev *dev;
+
+ while ((child = of_get_next_child(node, child)) != NULL) {
+ reg = (u32 *) get_property(child, "reg", &reglen);
+ if (reg == NULL || reglen < 20)
+ continue;
+ devfn = (reg[0] >> 8) & 0xff;
+ /* create a new pci_dev for this device */
+ dev = of_create_pci_dev(child, bus, devfn);
+ if (!dev)
+ continue;
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+ dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+ of_scan_pci_bridge(child, dev);
+ }
+
+ do_bus_setup(bus);
+}
+EXPORT_SYMBOL(of_scan_bus);
+
+void __devinit of_scan_pci_bridge(struct device_node *node,
+ struct pci_dev *dev)
+{
+ struct pci_bus *bus;
+ u32 *busrange, *ranges;
+ int len, i, mode;
+ struct resource *res;
+ unsigned int flags;
+ u64 size;
+
+ /* parse bus-range property */
+ busrange = (u32 *) get_property(node, "bus-range", &len);
+ if (busrange == NULL || len != 8) {
+ printk(KERN_ERR "Can't get bus-range for PCI-PCI bridge %s\n",
+ node->full_name);
+ return;
+ }
+ ranges = (u32 *) get_property(node, "ranges", &len);
+ if (ranges == NULL) {
+ printk(KERN_ERR "Can't get ranges for PCI-PCI bridge %s\n",
+ node->full_name);
+ return;
+ }
+
+ bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+ if (!bus) {
+ printk(KERN_ERR "Failed to create pci bus for %s\n",
+ node->full_name);
+ return;
+ }
+
+ bus->primary = dev->bus->number;
+ bus->subordinate = busrange[1];
+ bus->bridge_ctl = 0;
+ bus->sysdata = node;
+
+ /* parse ranges property */
+ /* PCI #address-cells == 3 and #size-cells == 2 always */
+ res = &dev->resource[PCI_BRIDGE_RESOURCES];
+ for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
+ res->flags = 0;
+ bus->resource[i] = res;
+ ++res;
+ }
+ i = 1;
+ for (; len >= 32; len -= 32, ranges += 8) {
+ flags = pci_parse_of_flags(ranges[0]);
+ size = GET_64BIT(ranges, 6);
+ if (flags == 0 || size == 0)
+ continue;
+ if (flags & IORESOURCE_IO) {
+ res = bus->resource[0];
+ if (res->flags) {
+ printk(KERN_ERR "PCI: ignoring extra I/O range"
+ " for bridge %s\n", node->full_name);
+ continue;
+ }
+ } else {
+ if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
+ printk(KERN_ERR "PCI: too many memory ranges"
+ " for bridge %s\n", node->full_name);
+ continue;
+ }
+ res = bus->resource[i];
+ ++i;
+ }
+ res->start = GET_64BIT(ranges, 1);
+ res->end = res->start + size - 1;
+ res->flags = flags;
+ fixup_resource(res, dev);
+ }
+ sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
+ bus->number);
+
+ mode = PCI_PROBE_NORMAL;
+ if (ppc_md.pci_probe_mode)
+ mode = ppc_md.pci_probe_mode(bus);
+ if (mode == PCI_PROBE_DEVTREE)
+ of_scan_bus(node, bus);
+ else if (mode == PCI_PROBE_NORMAL)
+ pci_scan_child_bus(bus);
+}
+EXPORT_SYMBOL(of_scan_pci_bridge);
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+void __devinit scan_phb(struct pci_controller *hose)
+{
+ struct pci_bus *bus;
+ struct device_node *node = hose->arch_data;
+ int i, mode;
+ struct resource *res;
+
+ bus = pci_create_bus(NULL, hose->first_busno, hose->ops, node);
+ if (bus == NULL) {
+ printk(KERN_ERR "Failed to create bus for PCI domain %04x\n",
+ hose->global_number);
+ return;
+ }
+ bus->secondary = hose->first_busno;
+ hose->bus = bus;
+
+ bus->resource[0] = res = &hose->io_resource;
+ if (res->flags && request_resource(&ioport_resource, res))
+ printk(KERN_ERR "Failed to request PCI IO region "
+ "on PCI domain %04x\n", hose->global_number);
+
+ for (i = 0; i < 3; ++i) {
+ res = &hose->mem_resources[i];
+ bus->resource[i+1] = res;
+ if (res->flags && request_resource(&iomem_resource, res))
+ printk(KERN_ERR "Failed to request PCI memory region "
+ "on PCI domain %04x\n", hose->global_number);
+ }
+
+ mode = PCI_PROBE_NORMAL;
+#ifdef CONFIG_PPC_MULTIPLATFORM
+ if (ppc_md.pci_probe_mode)
+ mode = ppc_md.pci_probe_mode(bus);
+ if (mode == PCI_PROBE_DEVTREE) {
+ bus->subordinate = hose->last_busno;
+ of_scan_bus(node, bus);
+ }
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+ if (mode == PCI_PROBE_NORMAL)
+ hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
+ pci_bus_add_devices(bus);
+}
+
+static int __init pcibios_init(void)
+{
+ struct pci_controller *hose, *tmp;
+
+ /* For now, override phys_mem_access_prot. If we need it,
+ * later, we may move that initialization to each ppc_md
+ */
+ ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot;
+
+#ifdef CONFIG_PPC_ISERIES
+ iSeries_pcibios_init();
+#endif
+
+ printk("PCI: Probing PCI hardware\n");
+
+ /* Scan all of the recorded PCI controllers. */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ scan_phb(hose);
+
+#ifndef CONFIG_PPC_ISERIES
+ if (pci_probe_only)
+ pcibios_claim_of_setup();
+ else
+ /* FIXME: `else' will be removed when
+ pci_assign_unassigned_resources() is able to work
+ correctly with [partially] allocated PCI tree. */
+ pci_assign_unassigned_resources();
+#endif /* !CONFIG_PPC_ISERIES */
+
+ /* Call machine dependent final fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
+ /* Cache the location of the ISA bridge (if we have one) */
+ ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
+ if (ppc64_isabridge_dev != NULL)
+ printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev));
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+ /* map in PCI I/O space */
+ phbs_remap_io();
+#endif
+
+ printk("PCI: Probing PCI hardware done\n");
+
+ return 0;
+}
+
+subsys_initcall(pcibios_init);
+
+char __init *pcibios_setup(char *str)
+{
+ return str;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+ u16 cmd, oldcmd;
+ int i;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ oldcmd = cmd;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *res = &dev->resource[i];
+
+ /* Only set up the requested stuff */
+ if (!(mask & (1<<i)))
+ continue;
+
+ if (res->flags & IORESOURCE_IO)
+ cmd |= PCI_COMMAND_IO;
+ if (res->flags & IORESOURCE_MEM)
+ cmd |= PCI_COMMAND_MEMORY;
+ }
+
+ if (cmd != oldcmd) {
+ printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n",
+ pci_name(dev), cmd);
+ /* Enable the appropriate bits in the PCI command register. */
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+ return 0;
+}
+
+/*
+ * Return the domain number for this bus.
+ */
+int pci_domain_nr(struct pci_bus *bus)
+{
+#ifdef CONFIG_PPC_ISERIES
+ return 0;
+#else
+ struct pci_controller *hose = pci_bus_to_host(bus);
+
+ return hose->global_number;
+#endif
+}
+
+EXPORT_SYMBOL(pci_domain_nr);
+
+/* Decide whether to display the domain number in /proc */
+int pci_proc_domain(struct pci_bus *bus)
+{
+#ifdef CONFIG_PPC_ISERIES
+ return 0;
+#else
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ return hose->buid;
+#endif
+}
+
+/*
+ * Platform support for /proc/bus/pci/X/Y mmap()s,
+ * modelled on the sparc64 implementation by Dave Miller.
+ * -- paulus.
+ */
+
+/*
+ * Adjust vm_pgoff of VMA such that it is the physical page offset
+ * corresponding to the 32-bit pci bus offset for DEV requested by the user.
+ *
+ * Basically, the user finds the base address for his device which he wishes
+ * to mmap. They read the 32-bit value from the config space base register,
+ * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
+ * offset parameter of mmap on /proc/bus/pci/XXX for that device.
+ *
+ * Returns negative error code on failure, zero on success.
+ */
+static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
+ unsigned long *offset,
+ enum pci_mmap_state mmap_state)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long io_offset = 0;
+ int i, res_bit;
+
+ if (hose == 0)
+ return NULL; /* should never happen */
+
+ /* If memory, add on the PCI bridge address offset */
+ if (mmap_state == pci_mmap_mem) {
+ *offset += hose->pci_mem_offset;
+ res_bit = IORESOURCE_MEM;
+ } else {
+ io_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+ *offset += io_offset;
+ res_bit = IORESOURCE_IO;
+ }
+
+ /*
+ * Check that the offset requested corresponds to one of the
+ * resources of the device.
+ */
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ struct resource *rp = &dev->resource[i];
+ int flags = rp->flags;
+
+ /* treat ROM as memory (should be already) */
+ if (i == PCI_ROM_RESOURCE)
+ flags |= IORESOURCE_MEM;
+
+ /* Active and same type? */
+ if ((flags & res_bit) == 0)
+ continue;
+
+ /* In the range of this resource? */
+ if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
+ continue;
+
+ /* found it! construct the final physical address */
+ if (mmap_state == pci_mmap_io)
+ *offset += hose->io_base_phys - io_offset;
+ return rp;
+ }
+
+ return NULL;
+}
+
+/*
+ * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
+ * device mapping.
+ */
+static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
+ pgprot_t protection,
+ enum pci_mmap_state mmap_state,
+ int write_combine)
+{
+ unsigned long prot = pgprot_val(protection);
+
+ /* Write combine is always 0 on non-memory space mappings. On
+ * memory space, if the user didn't pass 1, we check for a
+ * "prefetchable" resource. This is a bit hackish, but we use
+ * this to workaround the inability of /sysfs to provide a write
+ * combine bit
+ */
+ if (mmap_state != pci_mmap_mem)
+ write_combine = 0;
+ else if (write_combine == 0) {
+ if (rp->flags & IORESOURCE_PREFETCH)
+ write_combine = 1;
+ }
+
+ /* XXX would be nice to have a way to ask for write-through */
+ prot |= _PAGE_NO_CACHE;
+ if (write_combine)
+ prot &= ~_PAGE_GUARDED;
+ else
+ prot |= _PAGE_GUARDED;
+
+ printk("PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start,
+ prot);
+
+ return __pgprot(prot);
+}
+
+/*
+ * This one is used by /dev/mem and fbdev who have no clue about the
+ * PCI device, it tries to find the PCI device first and calls the
+ * above routine
+ */
+pgprot_t pci_phys_mem_access_prot(struct file *file,
+ unsigned long pfn,
+ unsigned long size,
+ pgprot_t protection)
+{
+ struct pci_dev *pdev = NULL;
+ struct resource *found = NULL;
+ unsigned long prot = pgprot_val(protection);
+ unsigned long offset = pfn << PAGE_SHIFT;
+ int i;
+
+ if (page_is_ram(pfn))
+ return __pgprot(prot);
+
+ prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
+
+ for_each_pci_dev(pdev) {
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ struct resource *rp = &pdev->resource[i];
+ int flags = rp->flags;
+
+ /* Active and same type? */
+ if ((flags & IORESOURCE_MEM) == 0)
+ continue;
+ /* In the range of this resource? */
+ if (offset < (rp->start & PAGE_MASK) ||
+ offset > rp->end)
+ continue;
+ found = rp;
+ break;
+ }
+ if (found)
+ break;
+ }
+ if (found) {
+ if (found->flags & IORESOURCE_PREFETCH)
+ prot &= ~_PAGE_GUARDED;
+ pci_dev_put(pdev);
+ }
+
+ DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
+
+ return __pgprot(prot);
+}
+
+
+/*
+ * Perform the actual remap of the pages for a PCI device mapping, as
+ * appropriate for this architecture. The region in the process to map
+ * is described by vm_start and vm_end members of VMA, the base physical
+ * address is found in vm_pgoff.
+ * The pci device structure is provided so that architectures may make mapping
+ * decisions on a per-device or per-bus basis.
+ *
+ * Returns a negative error code on failure, zero on success.
+ */
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state,
+ int write_combine)
+{
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+ struct resource *rp;
+ int ret;
+
+ rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
+ if (rp == NULL)
+ return -EINVAL;
+
+ vma->vm_pgoff = offset >> PAGE_SHIFT;
+ vma->vm_flags |= VM_SHM | VM_LOCKED | VM_IO;
+ vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
+ vma->vm_page_prot,
+ mmap_state, write_combine);
+
+ ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start, vma->vm_page_prot);
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+static ssize_t pci_show_devspec(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct pci_dev *pdev;
+ struct device_node *np;
+
+ pdev = to_pci_dev (dev);
+ np = pci_device_to_OF_node(pdev);
+ if (np == NULL || np->full_name == NULL)
+ return 0;
+ return sprintf(buf, "%s", np->full_name);
+}
+static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL);
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+void pcibios_add_platform_entries(struct pci_dev *pdev)
+{
+#ifdef CONFIG_PPC_MULTIPLATFORM
+ device_create_file(&pdev->dev, &dev_attr_devspec);
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+}
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+
+#define ISA_SPACE_MASK 0x1
+#define ISA_SPACE_IO 0x1
+
+static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
+ unsigned long phb_io_base_phys,
+ void __iomem * phb_io_base_virt)
+{
+ struct isa_range *range;
+ unsigned long pci_addr;
+ unsigned int isa_addr;
+ unsigned int size;
+ int rlen = 0;
+
+ range = (struct isa_range *) get_property(isa_node, "ranges", &rlen);
+ if (range == NULL || (rlen < sizeof(struct isa_range))) {
+ printk(KERN_ERR "no ISA ranges or unexpected isa range size,"
+ "mapping 64k\n");
+ __ioremap_explicit(phb_io_base_phys,
+ (unsigned long)phb_io_base_virt,
+ 0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED);
+ return;
+ }
+
+ /* From "ISA Binding to 1275"
+ * The ranges property is laid out as an array of elements,
+ * each of which comprises:
+ * cells 0 - 1: an ISA address
+ * cells 2 - 4: a PCI address
+ * (size depending on dev->n_addr_cells)
+ * cell 5: the size of the range
+ */
+ if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) {
+ isa_addr = range->isa_addr.a_lo;
+ pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
+ range->pci_addr.a_lo;
+
+ /* Assume these are both zero */
+ if ((pci_addr != 0) || (isa_addr != 0)) {
+ printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
+ __FUNCTION__);
+ return;
+ }
+
+ size = PAGE_ALIGN(range->size);
+
+ __ioremap_explicit(phb_io_base_phys,
+ (unsigned long) phb_io_base_virt,
+ size, _PAGE_NO_CACHE | _PAGE_GUARDED);
+ }
+}
+
+void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
+ struct device_node *dev, int prim)
+{
+ unsigned int *ranges, pci_space;
+ unsigned long size;
+ int rlen = 0;
+ int memno = 0;
+ struct resource *res;
+ int np, na = prom_n_addr_cells(dev);
+ unsigned long pci_addr, cpu_phys_addr;
+
+ np = na + 5;
+
+ /* From "PCI Binding to 1275"
+ * The ranges property is laid out as an array of elements,
+ * each of which comprises:
+ * cells 0 - 2: a PCI address
+ * cells 3 or 3+4: a CPU physical address
+ * (size depending on dev->n_addr_cells)
+ * cells 4+5 or 5+6: the size of the range
+ */
+ ranges = (unsigned int *) get_property(dev, "ranges", &rlen);
+ if (ranges == NULL)
+ return;
+ hose->io_base_phys = 0;
+ while ((rlen -= np * sizeof(unsigned int)) >= 0) {
+ res = NULL;
+ pci_space = ranges[0];
+ pci_addr = ((unsigned long)ranges[1] << 32) | ranges[2];
+
+ cpu_phys_addr = ranges[3];
+ if (na >= 2)
+ cpu_phys_addr = (cpu_phys_addr << 32) | ranges[4];
+
+ size = ((unsigned long)ranges[na+3] << 32) | ranges[na+4];
+ ranges += np;
+ if (size == 0)
+ continue;
+
+ /* Now consume following elements while they are contiguous */
+ while (rlen >= np * sizeof(unsigned int)) {
+ unsigned long addr, phys;
+
+ if (ranges[0] != pci_space)
+ break;
+ addr = ((unsigned long)ranges[1] << 32) | ranges[2];
+ phys = ranges[3];
+ if (na >= 2)
+ phys = (phys << 32) | ranges[4];
+ if (addr != pci_addr + size ||
+ phys != cpu_phys_addr + size)
+ break;
+
+ size += ((unsigned long)ranges[na+3] << 32)
+ | ranges[na+4];
+ ranges += np;
+ rlen -= np * sizeof(unsigned int);
+ }
+
+ switch ((pci_space >> 24) & 0x3) {
+ case 1: /* I/O space */
+ hose->io_base_phys = cpu_phys_addr;
+ hose->pci_io_size = size;
+
+ res = &hose->io_resource;
+ res->flags = IORESOURCE_IO;
+ res->start = pci_addr;
+ DBG("phb%d: IO 0x%lx -> 0x%lx\n", hose->global_number,
+ res->start, res->start + size - 1);
+ break;
+ case 2: /* memory space */
+ memno = 0;
+ while (memno < 3 && hose->mem_resources[memno].flags)
+ ++memno;
+
+ if (memno == 0)
+ hose->pci_mem_offset = cpu_phys_addr - pci_addr;
+ if (memno < 3) {
+ res = &hose->mem_resources[memno];
+ res->flags = IORESOURCE_MEM;
+ res->start = cpu_phys_addr;
+ DBG("phb%d: MEM 0x%lx -> 0x%lx\n", hose->global_number,
+ res->start, res->start + size - 1);
+ }
+ break;
+ }
+ if (res != NULL) {
+ res->name = dev->full_name;
+ res->end = res->start + size - 1;
+ res->parent = NULL;
+ res->sibling = NULL;
+ res->child = NULL;
+ }
+ }
+}
+
+void __init pci_setup_phb_io(struct pci_controller *hose, int primary)
+{
+ unsigned long size = hose->pci_io_size;
+ unsigned long io_virt_offset;
+ struct resource *res;
+ struct device_node *isa_dn;
+
+ hose->io_base_virt = reserve_phb_iospace(size);
+ DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
+ hose->global_number, hose->io_base_phys,
+ (unsigned long) hose->io_base_virt);
+
+ if (primary) {
+ pci_io_base = (unsigned long)hose->io_base_virt;
+ isa_dn = of_find_node_by_type(NULL, "isa");
+ if (isa_dn) {
+ isa_io_base = pci_io_base;
+ pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys,
+ hose->io_base_virt);
+ of_node_put(isa_dn);
+ /* Allow all IO */
+ io_page_mask = -1;
+ }
+ }
+
+ io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+ res = &hose->io_resource;
+ res->start += io_virt_offset;
+ res->end += io_virt_offset;
+}
+
+void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose,
+ int primary)
+{
+ unsigned long size = hose->pci_io_size;
+ unsigned long io_virt_offset;
+ struct resource *res;
+
+ hose->io_base_virt = __ioremap(hose->io_base_phys, size,
+ _PAGE_NO_CACHE | _PAGE_GUARDED);
+ DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
+ hose->global_number, hose->io_base_phys,
+ (unsigned long) hose->io_base_virt);
+
+ if (primary)
+ pci_io_base = (unsigned long)hose->io_base_virt;
+
+ io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+ res = &hose->io_resource;
+ res->start += io_virt_offset;
+ res->end += io_virt_offset;
+}
+
+
+static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys,
+ unsigned long *start_virt, unsigned long *size)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct pci_bus_region region;
+ struct resource *res;
+
+ if (bus->self) {
+ res = bus->resource[0];
+ pcibios_resource_to_bus(bus->self, &region, res);
+ *start_phys = hose->io_base_phys + region.start;
+ *start_virt = (unsigned long) hose->io_base_virt +
+ region.start;
+ if (region.end > region.start)
+ *size = region.end - region.start + 1;
+ else {
+ printk("%s(): unexpected region 0x%lx->0x%lx\n",
+ __FUNCTION__, region.start, region.end);
+ return 1;
+ }
+
+ } else {
+ /* Root Bus */
+ res = &hose->io_resource;
+ *start_phys = hose->io_base_phys;
+ *start_virt = (unsigned long) hose->io_base_virt;
+ if (res->end > res->start)
+ *size = res->end - res->start + 1;
+ else {
+ printk("%s(): unexpected region 0x%lx->0x%lx\n",
+ __FUNCTION__, res->start, res->end);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int unmap_bus_range(struct pci_bus *bus)
+{
+ unsigned long start_phys;
+ unsigned long start_virt;
+ unsigned long size;
+
+ if (!bus) {
+ printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
+ return 1;
+ }
+
+ if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
+ return 1;
+ if (iounmap_explicit((void __iomem *) start_virt, size))
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(unmap_bus_range);
+
+int remap_bus_range(struct pci_bus *bus)
+{
+ unsigned long start_phys;
+ unsigned long start_virt;
+ unsigned long size;
+
+ if (!bus) {
+ printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
+ return 1;
+ }
+
+
+ if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
+ return 1;
+ if (start_phys == 0)
+ return 1;
+ printk("mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size);
+ if (__ioremap_explicit(start_phys, start_virt, size,
+ _PAGE_NO_CACHE | _PAGE_GUARDED))
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(remap_bus_range);
+
+void phbs_remap_io(void)
+{
+ struct pci_controller *hose, *tmp;
+
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ remap_bus_range(hose->bus);
+}
+
+/*
+ * ppc64 can have multifunction devices that do not respond to function 0.
+ * In this case we must scan all functions.
+ * XXX this can go now, we use the OF device tree in all the
+ * cases that caused problems. -- paulus
+ */
+int pcibios_scan_all_fns(struct pci_bus *bus, int devfn)
+{
+ return 0;
+}
+
+static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long start, end, mask, offset;
+
+ if (res->flags & IORESOURCE_IO) {
+ offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+ start = res->start += offset;
+ end = res->end += offset;
+
+ /* Need to allow IO access to pages that are in the
+ ISA range */
+ if (start < MAX_ISA_PORT) {
+ if (end > MAX_ISA_PORT)
+ end = MAX_ISA_PORT;
+
+ start >>= PAGE_SHIFT;
+ end >>= PAGE_SHIFT;
+
+ /* get the range of pages for the map */
+ mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1);
+ io_page_mask |= mask;
+ }
+ } else if (res->flags & IORESOURCE_MEM) {
+ res->start += hose->pci_mem_offset;
+ res->end += hose->pci_mem_offset;
+ }
+}
+
+void __devinit pcibios_fixup_device_resources(struct pci_dev *dev,
+ struct pci_bus *bus)
+{
+ /* Update device resources. */
+ int i;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++)
+ if (dev->resource[i].flags)
+ fixup_resource(&dev->resource[i], dev);
+}
+EXPORT_SYMBOL(pcibios_fixup_device_resources);
+
+static void __devinit do_bus_setup(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+
+ ppc_md.iommu_bus_setup(bus);
+
+ list_for_each_entry(dev, &bus->devices, bus_list)
+ ppc_md.iommu_dev_setup(dev);
+
+ if (ppc_md.irq_bus_setup)
+ ppc_md.irq_bus_setup(bus);
+}
+
+void __devinit pcibios_fixup_bus(struct pci_bus *bus)
+{
+ struct pci_dev *dev = bus->self;
+
+ if (dev && pci_probe_only &&
+ (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+ /* This is a subordinate bridge */
+
+ pci_read_bridge_bases(bus);
+ pcibios_fixup_device_resources(dev, bus);
+ }
+
+ do_bus_setup(bus);
+
+ if (!pci_probe_only)
+ return;
+
+ list_for_each_entry(dev, &bus->devices, bus_list)
+ if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+ pcibios_fixup_device_resources(dev, bus);
+}
+EXPORT_SYMBOL(pcibios_fixup_bus);
+
+/*
+ * Reads the interrupt pin to determine if interrupt is use by card.
+ * If the interrupt is used, then gets the interrupt line from the
+ * openfirmware and sets it in the pci_dev and pci_config line.
+ */
+int pci_read_irq_line(struct pci_dev *pci_dev)
+{
+ u8 intpin;
+ struct device_node *node;
+
+ pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intpin);
+ if (intpin == 0)
+ return 0;
+
+ node = pci_device_to_OF_node(pci_dev);
+ if (node == NULL)
+ return -1;
+
+ if (node->n_intrs == 0)
+ return -1;
+
+ pci_dev->irq = node->intrs[0].line;
+
+ pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq);
+
+ return 0;
+}
+EXPORT_SYMBOL(pci_read_irq_line);
+
+void pci_resource_to_user(const struct pci_dev *dev, int bar,
+ const struct resource *rsrc,
+ u64 *start, u64 *end)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long offset = 0;
+
+ if (hose == NULL)
+ return;
+
+ if (rsrc->flags & IORESOURCE_IO)
+ offset = pci_io_base - (unsigned long)hose->io_base_virt +
+ hose->io_base_phys;
+
+ *start = rsrc->start + offset;
+ *end = rsrc->end + offset;
+}
+
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+
+#define IOBASE_BRIDGE_NUMBER 0
+#define IOBASE_MEMORY 1
+#define IOBASE_IO 2
+#define IOBASE_ISA_IO 3
+#define IOBASE_ISA_MEM 4
+
+long sys_pciconfig_iobase(long which, unsigned long in_bus,
+ unsigned long in_devfn)
+{
+ struct pci_controller* hose;
+ struct list_head *ln;
+ struct pci_bus *bus = NULL;
+ struct device_node *hose_node;
+
+ /* Argh ! Please forgive me for that hack, but that's the
+ * simplest way to get existing XFree to not lockup on some
+ * G5 machines... So when something asks for bus 0 io base
+ * (bus 0 is HT root), we return the AGP one instead.
+ */
+ if (machine_is_compatible("MacRISC4"))
+ if (in_bus == 0)
+ in_bus = 0xf0;
+
+ /* That syscall isn't quite compatible with PCI domains, but it's
+ * used on pre-domains setup. We return the first match
+ */
+
+ for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) {
+ bus = pci_bus_b(ln);
+ if (in_bus >= bus->number && in_bus < (bus->number + bus->subordinate))
+ break;
+ bus = NULL;
+ }
+ if (bus == NULL || bus->sysdata == NULL)
+ return -ENODEV;
+
+ hose_node = (struct device_node *)bus->sysdata;
+ hose = PCI_DN(hose_node)->phb;
+
+ switch (which) {
+ case IOBASE_BRIDGE_NUMBER:
+ return (long)hose->first_busno;
+ case IOBASE_MEMORY:
+ return (long)hose->pci_mem_offset;
+ case IOBASE_IO:
+ return (long)hose->io_base_phys;
+ case IOBASE_ISA_IO:
+ return (long)isa_io_base;
+ case IOBASE_ISA_MEM:
+ return -EINVAL;
+ }
+
+ return -EOPNOTSUPP;
+}
diff --git a/arch/powerpc/kernel/pci_direct_iommu.c b/arch/powerpc/kernel/pci_direct_iommu.c
new file mode 100644
index 00000000000..e1a32f802c0
--- /dev/null
+++ b/arch/powerpc/kernel/pci_direct_iommu.c
@@ -0,0 +1,94 @@
+/*
+ * Support for DMA from PCI devices to main memory on
+ * machines without an iommu or with directly addressable
+ * RAM (typically a pmac with 2Gb of RAM or less)
+ *
+ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/abs_addr.h>
+#include <asm/ppc-pci.h>
+
+static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ void *ret;
+
+ ret = (void *)__get_free_pages(flag, get_order(size));
+ if (ret != NULL) {
+ memset(ret, 0, size);
+ *dma_handle = virt_to_abs(ret);
+ }
+ return ret;
+}
+
+static void pci_direct_free_coherent(struct device *hwdev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ free_pages((unsigned long)vaddr, get_order(size));
+}
+
+static dma_addr_t pci_direct_map_single(struct device *hwdev, void *ptr,
+ size_t size, enum dma_data_direction direction)
+{
+ return virt_to_abs(ptr);
+}
+
+static void pci_direct_unmap_single(struct device *hwdev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction direction)
+{
+}
+
+static int pci_direct_map_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction)
+{
+ int i;
+
+ for (i = 0; i < nents; i++, sg++) {
+ sg->dma_address = page_to_phys(sg->page) + sg->offset;
+ sg->dma_length = sg->length;
+ }
+
+ return nents;
+}
+
+static void pci_direct_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction)
+{
+}
+
+static int pci_direct_dma_supported(struct device *dev, u64 mask)
+{
+ return mask < 0x100000000ull;
+}
+
+void __init pci_direct_iommu_init(void)
+{
+ pci_dma_ops.alloc_coherent = pci_direct_alloc_coherent;
+ pci_dma_ops.free_coherent = pci_direct_free_coherent;
+ pci_dma_ops.map_single = pci_direct_map_single;
+ pci_dma_ops.unmap_single = pci_direct_unmap_single;
+ pci_dma_ops.map_sg = pci_direct_map_sg;
+ pci_dma_ops.unmap_sg = pci_direct_unmap_sg;
+ pci_dma_ops.dma_supported = pci_direct_dma_supported;
+}
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
new file mode 100644
index 00000000000..12c4c9e9bbc
--- /dev/null
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -0,0 +1,230 @@
+/*
+ * pci_dn.c
+ *
+ * Copyright (C) 2001 Todd Inglett, IBM Corporation
+ *
+ * PCI manipulation via device_nodes.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/pSeries_reconfig.h>
+#include <asm/ppc-pci.h>
+
+/*
+ * Traverse_func that inits the PCI fields of the device node.
+ * NOTE: this *must* be done before read/write config to the device.
+ */
+static void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
+{
+ struct pci_controller *phb = data;
+ int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL);
+ u32 *regs;
+ struct pci_dn *pdn;
+
+ if (mem_init_done)
+ pdn = kmalloc(sizeof(*pdn), GFP_KERNEL);
+ else
+ pdn = alloc_bootmem(sizeof(*pdn));
+ if (pdn == NULL)
+ return NULL;
+ memset(pdn, 0, sizeof(*pdn));
+ dn->data = pdn;
+ pdn->node = dn;
+ pdn->phb = phb;
+ regs = (u32 *)get_property(dn, "reg", NULL);
+ if (regs) {
+ /* First register entry is addr (00BBSS00) */
+ pdn->busno = (regs[0] >> 16) & 0xff;
+ pdn->devfn = (regs[0] >> 8) & 0xff;
+ }
+
+ pdn->pci_ext_config_space = (type && *type == 1);
+ return NULL;
+}
+
+/*
+ * Traverse a device tree stopping each PCI device in the tree.
+ * This is done depth first. As each node is processed, a "pre"
+ * function is called and the children are processed recursively.
+ *
+ * The "pre" func returns a value. If non-zero is returned from
+ * the "pre" func, the traversal stops and this value is returned.
+ * This return value is useful when using traverse as a method of
+ * finding a device.
+ *
+ * NOTE: we do not run the func for devices that do not appear to
+ * be PCI except for the start node which we assume (this is good
+ * because the start node is often a phb which may be missing PCI
+ * properties).
+ * We use the class-code as an indicator. If we run into
+ * one of these nodes we also assume its siblings are non-pci for
+ * performance.
+ */
+void *traverse_pci_devices(struct device_node *start, traverse_func pre,
+ void *data)
+{
+ struct device_node *dn, *nextdn;
+ void *ret;
+
+ /* We started with a phb, iterate all childs */
+ for (dn = start->child; dn; dn = nextdn) {
+ u32 *classp, class;
+
+ nextdn = NULL;
+ classp = (u32 *)get_property(dn, "class-code", NULL);
+ class = classp ? *classp : 0;
+
+ if (pre && ((ret = pre(dn, data)) != NULL))
+ return ret;
+
+ /* If we are a PCI bridge, go down */
+ if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI ||
+ (class >> 8) == PCI_CLASS_BRIDGE_CARDBUS))
+ /* Depth first...do children */
+ nextdn = dn->child;
+ else if (dn->sibling)
+ /* ok, try next sibling instead. */
+ nextdn = dn->sibling;
+ if (!nextdn) {
+ /* Walk up to next valid sibling. */
+ do {
+ dn = dn->parent;
+ if (dn == start)
+ return NULL;
+ } while (dn->sibling == NULL);
+ nextdn = dn->sibling;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * pci_devs_phb_init_dynamic - setup pci devices under this PHB
+ * phb: pci-to-host bridge (top-level bridge connecting to cpu)
+ *
+ * This routine is called both during boot, (before the memory
+ * subsystem is set up, before kmalloc is valid) and during the
+ * dynamic lpar operation of adding a PHB to a running system.
+ */
+void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb)
+{
+ struct device_node * dn = (struct device_node *) phb->arch_data;
+ struct pci_dn *pdn;
+
+ /* PHB nodes themselves must not match */
+ update_dn_pci_info(dn, phb);
+ pdn = dn->data;
+ if (pdn) {
+ pdn->devfn = pdn->busno = -1;
+ pdn->phb = phb;
+ }
+
+ /* Update dn->phb ptrs for new phb and children devices */
+ traverse_pci_devices(dn, update_dn_pci_info, phb);
+}
+
+/*
+ * Traversal func that looks for a <busno,devfcn> value.
+ * If found, the pci_dn is returned (thus terminating the traversal).
+ */
+static void *is_devfn_node(struct device_node *dn, void *data)
+{
+ int busno = ((unsigned long)data >> 8) & 0xff;
+ int devfn = ((unsigned long)data) & 0xff;
+ struct pci_dn *pci = dn->data;
+
+ if (pci && (devfn == pci->devfn) && (busno == pci->busno))
+ return dn;
+ return NULL;
+}
+
+/*
+ * This is the "slow" path for looking up a device_node from a
+ * pci_dev. It will hunt for the device under its parent's
+ * phb and then update sysdata for a future fastpath.
+ *
+ * It may also do fixups on the actual device since this happens
+ * on the first read/write.
+ *
+ * Note that it also must deal with devices that don't exist.
+ * In this case it may probe for real hardware ("just in case")
+ * and add a device_node to the device tree if necessary.
+ *
+ */
+struct device_node *fetch_dev_dn(struct pci_dev *dev)
+{
+ struct device_node *orig_dn = dev->sysdata;
+ struct device_node *dn;
+ unsigned long searchval = (dev->bus->number << 8) | dev->devfn;
+
+ dn = traverse_pci_devices(orig_dn, is_devfn_node, (void *)searchval);
+ if (dn)
+ dev->sysdata = dn;
+ return dn;
+}
+EXPORT_SYMBOL(fetch_dev_dn);
+
+static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
+{
+ struct device_node *np = node;
+ struct pci_dn *pci = NULL;
+ int err = NOTIFY_OK;
+
+ switch (action) {
+ case PSERIES_RECONFIG_ADD:
+ pci = np->parent->data;
+ if (pci)
+ update_dn_pci_info(np, pci->phb);
+ break;
+ default:
+ err = NOTIFY_DONE;
+ break;
+ }
+ return err;
+}
+
+static struct notifier_block pci_dn_reconfig_nb = {
+ .notifier_call = pci_dn_reconfig_notifier,
+};
+
+/**
+ * pci_devs_phb_init - Initialize phbs and pci devs under them.
+ *
+ * This routine walks over all phb's (pci-host bridges) on the
+ * system, and sets up assorted pci-related structures
+ * (including pci info in the device node structs) for each
+ * pci device found underneath. This routine runs once,
+ * early in the boot sequence.
+ */
+void __init pci_devs_phb_init(void)
+{
+ struct pci_controller *phb, *tmp;
+
+ /* This must be done first so the device nodes have valid pci info! */
+ list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
+ pci_devs_phb_init_dynamic(phb);
+
+ pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
+}
diff --git a/arch/powerpc/kernel/pci_iommu.c b/arch/powerpc/kernel/pci_iommu.c
new file mode 100644
index 00000000000..bdf15dbbf4f
--- /dev/null
+++ b/arch/powerpc/kernel/pci_iommu.c
@@ -0,0 +1,128 @@
+/*
+ * arch/ppc64/kernel/pci_iommu.c
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ *
+ * Rewrite, cleanup, new allocation schemes:
+ * Copyright (C) 2004 Olof Johansson, IBM Corporation
+ *
+ * Dynamic DMA mapping support, platform-independent parts.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+
+/*
+ * We can use ->sysdata directly and avoid the extra work in
+ * pci_device_to_OF_node since ->sysdata will have been initialised
+ * in the iommu init code for all devices.
+ */
+#define PCI_GET_DN(dev) ((struct device_node *)((dev)->sysdata))
+
+static inline struct iommu_table *devnode_table(struct device *dev)
+{
+ struct pci_dev *pdev;
+
+ if (!dev) {
+ pdev = ppc64_isabridge_dev;
+ if (!pdev)
+ return NULL;
+ } else
+ pdev = to_pci_dev(dev);
+
+ return PCI_DN(PCI_GET_DN(pdev))->iommu_table;
+}
+
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+static void *pci_iommu_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ return iommu_alloc_coherent(devnode_table(hwdev), size, dma_handle,
+ flag);
+}
+
+static void pci_iommu_free_coherent(struct device *hwdev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ iommu_free_coherent(devnode_table(hwdev), size, vaddr, dma_handle);
+}
+
+/* Creates TCEs for a user provided buffer. The user buffer must be
+ * contiguous real kernel storage (not vmalloc). The address of the buffer
+ * passed here is the kernel (virtual) address of the buffer. The buffer
+ * need not be page aligned, the dma_addr_t returned will point to the same
+ * byte within the page as vaddr.
+ */
+static dma_addr_t pci_iommu_map_single(struct device *hwdev, void *vaddr,
+ size_t size, enum dma_data_direction direction)
+{
+ return iommu_map_single(devnode_table(hwdev), vaddr, size, direction);
+}
+
+
+static void pci_iommu_unmap_single(struct device *hwdev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction)
+{
+ iommu_unmap_single(devnode_table(hwdev), dma_handle, size, direction);
+}
+
+
+static int pci_iommu_map_sg(struct device *pdev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
+{
+ return iommu_map_sg(pdev, devnode_table(pdev), sglist,
+ nelems, direction);
+}
+
+static void pci_iommu_unmap_sg(struct device *pdev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
+{
+ iommu_unmap_sg(devnode_table(pdev), sglist, nelems, direction);
+}
+
+/* We support DMA to/from any memory page via the iommu */
+static int pci_iommu_dma_supported(struct device *dev, u64 mask)
+{
+ return 1;
+}
+
+void pci_iommu_init(void)
+{
+ pci_dma_ops.alloc_coherent = pci_iommu_alloc_coherent;
+ pci_dma_ops.free_coherent = pci_iommu_free_coherent;
+ pci_dma_ops.map_single = pci_iommu_map_single;
+ pci_dma_ops.unmap_single = pci_iommu_unmap_single;
+ pci_dma_ops.map_sg = pci_iommu_map_sg;
+ pci_dma_ops.unmap_sg = pci_iommu_unmap_sg;
+ pci_dma_ops.dma_supported = pci_iommu_dma_supported;
+}
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 5dcf4ba05ee..59846b40d52 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -105,6 +105,13 @@ EXPORT_SYMBOL(__clear_user);
EXPORT_SYMBOL(__strncpy_from_user);
EXPORT_SYMBOL(__strnlen_user);
+#ifndef __powerpc64__
+EXPORT_SYMBOL(__ide_mm_insl);
+EXPORT_SYMBOL(__ide_mm_outsw);
+EXPORT_SYMBOL(__ide_mm_insw);
+EXPORT_SYMBOL(__ide_mm_outsl);
+#endif
+
EXPORT_SYMBOL(_insb);
EXPORT_SYMBOL(_outsb);
EXPORT_SYMBOL(_insw);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 6a5b468edb4..3bf968e7409 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -1368,6 +1368,7 @@ prom_n_addr_cells(struct device_node* np)
/* No #address-cells property for the root node, default to 1 */
return 1;
}
+EXPORT_SYMBOL(prom_n_addr_cells);
int
prom_n_size_cells(struct device_node* np)
@@ -1383,6 +1384,7 @@ prom_n_size_cells(struct device_node* np)
/* No #size-cells property for the root node, default to 1 */
return 1;
}
+EXPORT_SYMBOL(prom_n_size_cells);
/**
* Work out the sense (active-low level / active-high edge)
diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c
new file mode 100644
index 00000000000..635d3b9a881
--- /dev/null
+++ b/arch/powerpc/kernel/rtas-rtc.c
@@ -0,0 +1,105 @@
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <linux/rtc.h>
+#include <linux/delay.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/time.h>
+
+
+#define MAX_RTC_WAIT 5000 /* 5 sec */
+#define RTAS_CLOCK_BUSY (-2)
+unsigned long __init rtas_get_boot_time(void)
+{
+ int ret[8];
+ int error, wait_time;
+ u64 max_wait_tb;
+
+ max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+ do {
+ error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+ if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
+ wait_time = rtas_extended_busy_delay_time(error);
+ /* This is boot time so we spin. */
+ udelay(wait_time*1000);
+ error = RTAS_CLOCK_BUSY;
+ }
+ } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb));
+
+ if (error != 0 && printk_ratelimit()) {
+ printk(KERN_WARNING "error: reading the clock failed (%d)\n",
+ error);
+ return 0;
+ }
+
+ return mktime(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]);
+}
+
+/* NOTE: get_rtc_time will get an error if executed in interrupt context
+ * and if a delay is needed to read the clock. In this case we just
+ * silently return without updating rtc_tm.
+ */
+void rtas_get_rtc_time(struct rtc_time *rtc_tm)
+{
+ int ret[8];
+ int error, wait_time;
+ u64 max_wait_tb;
+
+ max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+ do {
+ error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+ if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
+ if (in_interrupt() && printk_ratelimit()) {
+ memset(&rtc_tm, 0, sizeof(struct rtc_time));
+ printk(KERN_WARNING "error: reading clock"
+ " would delay interrupt\n");
+ return; /* delay not allowed */
+ }
+ wait_time = rtas_extended_busy_delay_time(error);
+ msleep(wait_time);
+ error = RTAS_CLOCK_BUSY;
+ }
+ } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb));
+
+ if (error != 0 && printk_ratelimit()) {
+ printk(KERN_WARNING "error: reading the clock failed (%d)\n",
+ error);
+ return;
+ }
+
+ rtc_tm->tm_sec = ret[5];
+ rtc_tm->tm_min = ret[4];
+ rtc_tm->tm_hour = ret[3];
+ rtc_tm->tm_mday = ret[2];
+ rtc_tm->tm_mon = ret[1] - 1;
+ rtc_tm->tm_year = ret[0] - 1900;
+}
+
+int rtas_set_rtc_time(struct rtc_time *tm)
+{
+ int error, wait_time;
+ u64 max_wait_tb;
+
+ max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+ do {
+ error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL,
+ tm->tm_year + 1900, tm->tm_mon + 1,
+ tm->tm_mday, tm->tm_hour, tm->tm_min,
+ tm->tm_sec, 0);
+ if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
+ if (in_interrupt())
+ return 1; /* probably decrementer */
+ wait_time = rtas_extended_busy_delay_time(error);
+ msleep(wait_time);
+ error = RTAS_CLOCK_BUSY;
+ }
+ } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb));
+
+ if (error != 0 && printk_ratelimit())
+ printk(KERN_WARNING "error: setting the clock failed (%d)\n",
+ error);
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 0e5a8e11665..60dec2401c2 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -304,75 +304,18 @@ static int __devinit setup_phb(struct device_node *dev,
struct pci_controller *phb,
unsigned int addr_size_words)
{
- pci_setup_pci_controller(phb);
-
if (is_python(dev))
python_countermeasures(dev, addr_size_words);
if (phb_set_bus_ranges(dev, phb))
return 1;
- phb->arch_data = dev;
phb->ops = &rtas_pci_ops;
phb->buid = get_phb_buid(dev);
return 0;
}
-static void __devinit add_linux_pci_domain(struct device_node *dev,
- struct pci_controller *phb,
- struct property *of_prop)
-{
- memset(of_prop, 0, sizeof(struct property));
- of_prop->name = "linux,pci-domain";
- of_prop->length = sizeof(phb->global_number);
- of_prop->value = (unsigned char *)&of_prop[1];
- memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number));
- prom_add_property(dev, of_prop);
-}
-
-static struct pci_controller * __init alloc_phb(struct device_node *dev,
- unsigned int addr_size_words)
-{
- struct pci_controller *phb;
- struct property *of_prop;
-
- phb = alloc_bootmem(sizeof(struct pci_controller));
- if (phb == NULL)
- return NULL;
-
- of_prop = alloc_bootmem(sizeof(struct property) +
- sizeof(phb->global_number));
- if (!of_prop)
- return NULL;
-
- if (setup_phb(dev, phb, addr_size_words))
- return NULL;
-
- add_linux_pci_domain(dev, phb, of_prop);
-
- return phb;
-}
-
-static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words)
-{
- struct pci_controller *phb;
-
- phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller),
- GFP_KERNEL);
- if (phb == NULL)
- return NULL;
-
- if (setup_phb(dev, phb, addr_size_words))
- return NULL;
-
- phb->is_dynamic = 1;
-
- /* TODO: linux,pci-domain? */
-
- return phb;
-}
-
unsigned long __init find_and_init_phbs(void)
{
struct device_node *node;
@@ -397,10 +340,10 @@ unsigned long __init find_and_init_phbs(void)
if (node->type == NULL || strcmp(node->type, "pci") != 0)
continue;
- phb = alloc_phb(node, root_size_cells);
+ phb = pcibios_alloc_controller(node);
if (!phb)
continue;
-
+ setup_phb(node, phb, root_size_cells);
pci_process_bridge_OF_ranges(phb, node, 0);
pci_setup_phb_io(phb, index == 0);
#ifdef CONFIG_PPC_PSERIES
@@ -446,10 +389,10 @@ struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
root_size_cells = prom_n_size_cells(root);
primary = list_empty(&hose_list);
- phb = alloc_phb_dynamic(dn, root_size_cells);
+ phb = pcibios_alloc_controller(dn);
if (!phb)
return NULL;
-
+ setup_phb(dn, phb, root_size_cells);
pci_process_bridge_OF_ranges(phb, dn, primary);
pci_setup_phb_io_dynamic(phb, primary);
@@ -505,8 +448,7 @@ int pcibios_remove_root_bus(struct pci_controller *phb)
}
list_del(&phb->list_node);
- if (phb->is_dynamic)
- kfree(phb);
+ pcibios_free_controller(phb);
return 0;
}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 33e7f2c7f19..bd3eb4292b5 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -59,6 +59,7 @@
#undef DEBUG
#ifdef DEBUG
+#include <asm/udbg.h>
#define DBG(fmt...) udbg_printf(fmt)
#else
#define DBG(fmt...)
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index c98cfcc9cd9..e5694335bf1 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -57,10 +57,6 @@ extern void power4_idle(void);
boot_infos_t *boot_infos;
struct ide_machdep_calls ppc_ide_md;
-/* XXX should go elsewhere */
-int __irq_offset_value;
-EXPORT_SYMBOL(__irq_offset_value);
-
int boot_cpuid;
EXPORT_SYMBOL_GPL(boot_cpuid);
int boot_cpuid_phys;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index fdbd9f9122f..608fee7c7e2 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -59,6 +59,7 @@
#include <asm/firmware.h>
#include <asm/xmon.h>
#include <asm/udbg.h>
+#include <asm/kexec.h>
#include "setup.h"
@@ -415,6 +416,10 @@ void __init setup_system(void)
*/
unflatten_device_tree();
+#ifdef CONFIG_KEXEC
+ kexec_setup(); /* requires unflattened device tree. */
+#endif
+
/*
* Fill the ppc64_caches & systemcfg structures with informations
* retreived from the device-tree. Need to be called before
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 8bdf95b7e42..5a2eba60dd3 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -403,8 +403,6 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
ELF_NFPREG * sizeof(double)))
return 1;
- current->thread.fpscr.val = 0; /* turn off all fp exceptions */
-
#ifdef CONFIG_ALTIVEC
/* save altivec registers */
if (current->thread.used_vr) {
@@ -818,6 +816,9 @@ static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka,
goto badframe;
regs->link = (unsigned long) frame->tramp;
}
+
+ current->thread.fpscr.val = 0; /* turn off all fp exceptions */
+
if (put_user(regs->gpr[1], (u32 __user *)newsp))
goto badframe;
regs->gpr[1] = newsp;
@@ -1097,6 +1098,8 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
regs->link = (unsigned long) frame->mctx.tramp;
}
+ current->thread.fpscr.val = 0; /* turn off all fp exceptions */
+
if (put_user(regs->gpr[1], (u32 __user *)newsp))
goto badframe;
regs->gpr[1] = newsp;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 58194e15071..1decf278553 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -131,9 +131,6 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
flush_fp_to_thread(current);
- /* Make sure signal doesn't get spurrious FP exceptions */
- current->thread.fpscr.val = 0;
-
#ifdef CONFIG_ALTIVEC
err |= __put_user(v_regs, &sc->v_regs);
@@ -423,6 +420,9 @@ static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info,
if (err)
goto badframe;
+ /* Make sure signal handler doesn't get spurious FP exceptions */
+ current->thread.fpscr.val = 0;
+
/* Set up to return from userspace. */
if (vdso64_rt_sigtramp && current->thread.vdso_base) {
regs->link = current->thread.vdso_base + vdso64_rt_sigtramp;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 62dfc5b8d76..30374d2f88e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -49,15 +49,16 @@
#include <asm/paca.h>
#endif
-int smp_hw_index[NR_CPUS];
-struct thread_info *secondary_ti;
-
#ifdef DEBUG
+#include <asm/udbg.h>
#define DBG(fmt...) udbg_printf(fmt)
#else
#define DBG(fmt...)
#endif
+int smp_hw_index[NR_CPUS];
+struct thread_info *secondary_ti;
+
cpumask_t cpu_possible_map = CPU_MASK_NONE;
cpumask_t cpu_online_map = CPU_MASK_NONE;
cpumask_t cpu_sibling_map[NR_CPUS] = { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 070b4b458aa..de8479769bb 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -130,6 +130,34 @@ unsigned long tb_last_stamp;
*/
DEFINE_PER_CPU(unsigned long, last_jiffy);
+void __delay(unsigned long loops)
+{
+ unsigned long start;
+ int diff;
+
+ if (__USE_RTC()) {
+ start = get_rtcl();
+ do {
+ /* the RTCL register wraps at 1000000000 */
+ diff = get_rtcl() - start;
+ if (diff < 0)
+ diff += 1000000000;
+ } while (diff < loops);
+ } else {
+ start = get_tbl();
+ while (get_tbl() - start < loops)
+ HMT_low();
+ HMT_medium();
+ }
+}
+EXPORT_SYMBOL(__delay);
+
+void udelay(unsigned long usecs)
+{
+ __delay(tb_ticks_per_usec * usecs);
+}
+EXPORT_SYMBOL(udelay);
+
static __inline__ void timer_check_rtc(void)
{
/*
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
index c8db993574e..09629aea3e4 100644
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -35,6 +35,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
srwi. r8,r8,7 /* compute line count */
+ crclr cr0*4+so
beqlr /* nothing to do? */
mtctr r8
mr r3,r6
@@ -58,6 +59,7 @@ V_FUNCTION_END(__kernel_sync_dicache)
*/
V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
.cfi_startproc
+ crclr cr0*4+so
sync
isync
li r3,0
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index a08c26e8783..4709f1d9542 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -54,7 +54,6 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
.cfi_startproc
mflr r12
.cfi_register lr,r12
-
mr r4,r3
bl __get_datapage@local
mtlr r12
@@ -63,6 +62,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
beqlr
li r0,__NR_syscalls
stw r0,0(r4)
+ crclr cr0*4+so
blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_syscall_map)
@@ -77,8 +77,10 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
mflr r12
.cfi_register lr,r12
bl __get_datapage@local
- lwz r3,CFG_TB_TICKS_PER_SEC(r3)
lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
+ lwz r3,CFG_TB_TICKS_PER_SEC(r3)
mtlr r12
+ crclr cr0*4+so
+ blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index aeb5fc9b87b..7eebff03a04 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -59,6 +59,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
stw r5,TZONE_TZ_DSTTIME(r11)
1: mtlr r12
+ crclr cr0*4+so
li r3,0
blr
@@ -83,7 +84,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
/* Check for supported clock IDs */
cmpli cr0,r3,CLOCK_REALTIME
cmpli cr1,r3,CLOCK_MONOTONIC
- cror cr0,cr0,cr1
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
bne cr0,99f
mflr r12 /* r12 saves lr */
@@ -91,7 +92,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
mr r10,r3 /* r10 saves id */
mr r11,r4 /* r11 saves tp */
bl __get_datapage@local /* get data page */
- mr r9, r3 /* datapage ptr in r9 */
+ mr r9,r3 /* datapage ptr in r9 */
beq cr1,50f /* if monotonic -> jump there */
/*
@@ -117,6 +118,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
mulli r5,r5,1000
stw r5,TSPC32_TV_NSEC(r11)
mtlr r12
+ crclr cr0*4+so
li r3,0
blr
@@ -173,14 +175,19 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
add r4,r4,r7
lis r5,NSEC_PER_SEC@h
ori r5,r5,NSEC_PER_SEC@l
- cmpli cr0,r4,r5
+ cmpl cr0,r4,r5
+ cmpli cr1,r4,0
blt 1f
subf r4,r5,r4
addi r3,r3,1
+1: bge cr1,1f
+ addi r3,r3,-1
+ add r4,r4,r5
1: stw r3,TSPC32_TV_SEC(r11)
stw r4,TSPC32_TV_NSEC(r11)
mtlr r12
+ crclr cr0*4+so
li r3,0
blr
@@ -210,11 +217,12 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
/* Check for supported clock IDs */
cmpwi cr0,r3,CLOCK_REALTIME
cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0,cr0,cr1
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
bne cr0,99f
li r3,0
cmpli cr0,r4,0
+ crclr cr0*4+so
beqlr
lis r5,CLOCK_REALTIME_RES@h
ori r5,r5,CLOCK_REALTIME_RES@l
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
index d4a0ad28d53..cb4ae0a5edd 100644
--- a/arch/powerpc/kernel/vdso64/cacheflush.S
+++ b/arch/powerpc/kernel/vdso64/cacheflush.S
@@ -35,6 +35,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
srwi. r8,r8,7 /* compute line count */
+ crclr cr0*4+so
beqlr /* nothing to do? */
mtctr r8
mr r3,r6
@@ -58,6 +59,7 @@ V_FUNCTION_END(__kernel_sync_dicache)
*/
V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
.cfi_startproc
+ crclr cr0*4+so
sync
isync
li r3,0
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
index e67eda0f8cd..3b2dd7d0c1e 100644
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -54,12 +54,12 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
.cfi_startproc
mflr r12
.cfi_register lr,r12
-
mr r4,r3
bl V_LOCAL_FUNC(__get_datapage)
mtlr r12
addi r3,r3,CFG_SYSCALL_MAP64
cmpli cr0,r4,0
+ crclr cr0*4+so
beqlr
li r0,__NR_syscalls
stw r0,0(r4)
@@ -80,5 +80,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
bl V_LOCAL_FUNC(__get_datapage)
ld r3,CFG_TB_TICKS_PER_SEC(r3)
mtlr r12
+ crclr cr0*4+so
+ blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index d371c02a8c0..ccaeda5136d 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -1,4 +1,5 @@
-/*
+
+ /*
* Userland implementation of gettimeofday() for 64 bits processes in a
* ppc64 kernel for use in the vDSO
*
@@ -51,6 +52,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
stw r4,TZONE_TZ_MINWEST(r10)
stw r5,TZONE_TZ_DSTTIME(r10)
1: mtlr r12
+ crclr cr0*4+so
li r3,0 /* always success */
blr
.cfi_endproc
@@ -68,7 +70,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
/* Check for supported clock IDs */
cmpwi cr0,r3,CLOCK_REALTIME
cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0,cr0,cr1
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
bne cr0,99f
mflr r12 /* r12 saves lr */
@@ -84,19 +86,21 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
- lis r7,0x3b9a /* r7 = 1000000000 = NSEC_PER_SEC */
- ori r7,r7,0xca00
+ lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
+ ori r7,r7,16960
rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
std r5,TSPC64_TV_SEC(r11) /* store sec in tv */
subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
- mulld r0,r0,r7 /* nsec = (xsec * NSEC_PER_SEC) /
+ mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
* XSEC_PER_SEC
*/
rldicl r0,r0,44,20
+ mulli r0,r0,1000 /* nsec = usec * 1000 */
std r0,TSPC64_TV_NSEC(r11) /* store nsec in tp */
mtlr r12
+ crclr cr0*4+so
li r3,0
blr
@@ -106,15 +110,16 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
50: bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
- lis r7,0x3b9a /* r7 = 1000000000 = NSEC_PER_SEC */
- ori r7,r7,0xca00
+ lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
+ ori r7,r7,16960
rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
- mulld r0,r0,r7 /* nsec = (xsec * NSEC_PER_SEC) /
+ mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
* XSEC_PER_SEC
*/
rldicl r6,r0,44,20
+ mulli r6,r6,1000 /* nsec = usec * 1000 */
/* now we must fixup using wall to monotonic. We need to snapshot
* that value and do the counter trick again. Fortunately, we still
@@ -123,8 +128,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
* can be used
*/
- lwz r4,WTOM_CLOCK_SEC(r9)
- lwz r7,WTOM_CLOCK_NSEC(r9)
+ lwa r4,WTOM_CLOCK_SEC(r3)
+ lwa r7,WTOM_CLOCK_NSEC(r3)
/* We now have our result in r4,r7. We create a fake dependency
* on that result and re-check the counter
@@ -144,14 +149,19 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
add r7,r7,r6
lis r9,NSEC_PER_SEC@h
ori r9,r9,NSEC_PER_SEC@l
- cmpli cr0,r7,r9
+ cmpl cr0,r7,r9
+ cmpli cr1,r7,0
blt 1f
subf r7,r9,r7
addi r4,r4,1
+1: bge cr1,1f
+ addi r4,r4,-1
+ add r7,r7,r9
1: std r4,TSPC64_TV_SEC(r11)
std r7,TSPC64_TV_NSEC(r11)
mtlr r12
+ crclr cr0*4+so
li r3,0
blr
@@ -181,11 +191,12 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
/* Check for supported clock IDs */
cmpwi cr0,r3,CLOCK_REALTIME
cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0,cr0,cr1
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
bne cr0,99f
li r3,0
cmpli cr0,r4,0
+ crclr cr0*4+so
beqlr
lis r5,CLOCK_REALTIME_RES@h
ori r5,r5,CLOCK_REALTIME_RES@l