From 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@ppc970.osdl.org>
Date: Sat, 16 Apr 2005 15:20:36 -0700
Subject: Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
---
 arch/mips/mm/Makefile          |   44 +
 arch/mips/mm/c-r3k.c           |  349 ++++++++
 arch/mips/mm/c-r4k.c           | 1260 ++++++++++++++++++++++++++++
 arch/mips/mm/c-sb1.c           |  558 ++++++++++++
 arch/mips/mm/c-tx39.c          |  493 +++++++++++
 arch/mips/mm/cache.c           |  157 ++++
 arch/mips/mm/cerr-sb1.c        |  543 ++++++++++++
 arch/mips/mm/cex-gen.S         |   42 +
 arch/mips/mm/cex-sb1.S         |  170 ++++
 arch/mips/mm/dma-coherent.c    |  255 ++++++
 arch/mips/mm/dma-ip27.c        |  257 ++++++
 arch/mips/mm/dma-ip32.c        |  382 +++++++++
 arch/mips/mm/dma-noncoherent.c |  400 +++++++++
 arch/mips/mm/extable.c         |   21 +
 arch/mips/mm/fault.c           |  236 ++++++
 arch/mips/mm/highmem.c         |  103 +++
 arch/mips/mm/init.c            |  304 +++++++
 arch/mips/mm/ioremap.c         |  202 +++++
 arch/mips/mm/pg-r4k.c          |  489 +++++++++++
 arch/mips/mm/pg-sb1.c          |  287 +++++++
 arch/mips/mm/pgtable-32.c      |   97 +++
 arch/mips/mm/pgtable-64.c      |   58 ++
 arch/mips/mm/pgtable.c         |   36 +
 arch/mips/mm/sc-ip22.c         |  177 ++++
 arch/mips/mm/sc-r5k.c          |  108 +++
 arch/mips/mm/sc-rm7k.c         |  193 +++++
 arch/mips/mm/tlb-andes.c       |  257 ++++++
 arch/mips/mm/tlb-r3k.c         |  289 +++++++
 arch/mips/mm/tlb-r4k.c         |  419 ++++++++++
 arch/mips/mm/tlb-r8k.c         |  250 ++++++
 arch/mips/mm/tlb-sb1.c         |  376 +++++++++
 arch/mips/mm/tlbex-fault.S     |   28 +
 arch/mips/mm/tlbex.c           | 1815 ++++++++++++++++++++++++++++++++++++++++
 33 files changed, 10655 insertions(+)
 create mode 100644 arch/mips/mm/Makefile
 create mode 100644 arch/mips/mm/c-r3k.c
 create mode 100644 arch/mips/mm/c-r4k.c
 create mode 100644 arch/mips/mm/c-sb1.c
 create mode 100644 arch/mips/mm/c-tx39.c
 create mode 100644 arch/mips/mm/cache.c
 create mode 100644 arch/mips/mm/cerr-sb1.c
 create mode 100644 arch/mips/mm/cex-gen.S
 create mode 100644 arch/mips/mm/cex-sb1.S
 create mode 100644 arch/mips/mm/dma-coherent.c
 create mode 100644 arch/mips/mm/dma-ip27.c
 create mode 100644 arch/mips/mm/dma-ip32.c
 create mode 100644 arch/mips/mm/dma-noncoherent.c
 create mode 100644 arch/mips/mm/extable.c
 create mode 100644 arch/mips/mm/fault.c
 create mode 100644 arch/mips/mm/highmem.c
 create mode 100644 arch/mips/mm/init.c
 create mode 100644 arch/mips/mm/ioremap.c
 create mode 100644 arch/mips/mm/pg-r4k.c
 create mode 100644 arch/mips/mm/pg-sb1.c
 create mode 100644 arch/mips/mm/pgtable-32.c
 create mode 100644 arch/mips/mm/pgtable-64.c
 create mode 100644 arch/mips/mm/pgtable.c
 create mode 100644 arch/mips/mm/sc-ip22.c
 create mode 100644 arch/mips/mm/sc-r5k.c
 create mode 100644 arch/mips/mm/sc-rm7k.c
 create mode 100644 arch/mips/mm/tlb-andes.c
 create mode 100644 arch/mips/mm/tlb-r3k.c
 create mode 100644 arch/mips/mm/tlb-r4k.c
 create mode 100644 arch/mips/mm/tlb-r8k.c
 create mode 100644 arch/mips/mm/tlb-sb1.c
 create mode 100644 arch/mips/mm/tlbex-fault.S
 create mode 100644 arch/mips/mm/tlbex.c

(limited to 'arch/mips/mm')

diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile
new file mode 100644
index 00000000000..f61e038b444
--- /dev/null
+++ b/arch/mips/mm/Makefile
@@ -0,0 +1,44 @@
+#
+# Makefile for the Linux/MIPS-specific parts of the memory manager.
+#
+
+obj-y				+= cache.o extable.o fault.o init.o pgtable.o \
+				   tlbex.o tlbex-fault.o
+
+obj-$(CONFIG_MIPS32)		+= ioremap.o pgtable-32.o
+obj-$(CONFIG_MIPS64)		+= pgtable-64.o
+obj-$(CONFIG_HIGHMEM)		+= highmem.o
+
+obj-$(CONFIG_CPU_MIPS32)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
+obj-$(CONFIG_CPU_MIPS64)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
+obj-$(CONFIG_CPU_NEVADA)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
+obj-$(CONFIG_CPU_R10000)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-andes.o
+obj-$(CONFIG_CPU_R3000)		+= c-r3k.o tlb-r3k.o pg-r4k.o
+obj-$(CONFIG_CPU_R4300)		+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
+obj-$(CONFIG_CPU_R4X00)		+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
+obj-$(CONFIG_CPU_R5000)		+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
+obj-$(CONFIG_CPU_R5432)		+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
+obj-$(CONFIG_CPU_R8000)		+= c-r4k.o cex-gen.o pg-r4k.o tlb-r8k.o
+obj-$(CONFIG_CPU_RM7000)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
+obj-$(CONFIG_CPU_RM9000)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
+obj-$(CONFIG_CPU_SB1)		+= c-sb1.o cerr-sb1.o cex-sb1.o pg-sb1.o \
+				   tlb-sb1.o
+obj-$(CONFIG_CPU_TX39XX)	+= c-tx39.o pg-r4k.o tlb-r3k.o
+obj-$(CONFIG_CPU_TX49XX)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
+obj-$(CONFIG_CPU_VR41XX)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
+
+obj-$(CONFIG_IP22_CPU_SCACHE)	+= sc-ip22.o
+obj-$(CONFIG_R5000_CPU_SCACHE)  += sc-r5k.o
+obj-$(CONFIG_RM7000_CPU_SCACHE)	+= sc-rm7k.o
+
+#
+# Choose one DMA coherency model
+#
+ifndef CONFIG_OWN_DMA
+obj-$(CONFIG_DMA_COHERENT)	+= dma-coherent.o
+obj-$(CONFIG_DMA_NONCOHERENT)	+= dma-noncoherent.o
+endif
+obj-$(CONFIG_DMA_IP27)		+= dma-ip27.o
+obj-$(CONFIG_DMA_IP32)		+= dma-ip32.o
+
+EXTRA_AFLAGS := $(CFLAGS)
diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c
new file mode 100644
index 00000000000..c659f99eb39
--- /dev/null
+++ b/arch/mips/mm/c-r3k.c
@@ -0,0 +1,349 @@
+/*
+ * r2300.c: R2000 and R3000 specific mmu/cache code.
+ *
+ * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
+ *
+ * with a lot of changes to make this thing work for R3000s
+ * Tx39XX R4k style caches added. HK
+ * Copyright (C) 1998, 1999, 2000 Harald Koerfgen
+ * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov
+ * Copyright (C) 2001, 2004  Maciej W. Rozycki
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mmu_context.h>
+#include <asm/system.h>
+#include <asm/isadep.h>
+#include <asm/io.h>
+#include <asm/bootinfo.h>
+#include <asm/cpu.h>
+
+static unsigned long icache_size, dcache_size;		/* Size in bytes */
+static unsigned long icache_lsize, dcache_lsize;	/* Size in bytes */
+
+#undef DEBUG_CACHE
+
+unsigned long __init r3k_cache_size(unsigned long ca_flags)
+{
+	unsigned long flags, status, dummy, size;
+	volatile unsigned long *p;
+
+	p = (volatile unsigned long *) KSEG0;
+
+	flags = read_c0_status();
+
+	/* isolate cache space */
+	write_c0_status((ca_flags|flags)&~ST0_IEC);
+
+	*p = 0xa5a55a5a;
+	dummy = *p;
+	status = read_c0_status();
+
+	if (dummy != 0xa5a55a5a || (status & ST0_CM)) {
+		size = 0;
+	} else {
+		for (size = 128; size <= 0x40000; size <<= 1)
+			*(p + size) = 0;
+		*p = -1;
+		for (size = 128;
+		     (size <= 0x40000) && (*(p + size) == 0);
+		     size <<= 1)
+			;
+		if (size > 0x40000)
+			size = 0;
+	}
+
+	write_c0_status(flags);
+
+	return size * sizeof(*p);
+}
+
+unsigned long __init r3k_cache_lsize(unsigned long ca_flags)
+{
+	unsigned long flags, status, lsize, i;
+	volatile unsigned long *p;
+
+	p = (volatile unsigned long *) KSEG0;
+
+	flags = read_c0_status();
+
+	/* isolate cache space */
+	write_c0_status((ca_flags|flags)&~ST0_IEC);
+
+	for (i = 0; i < 128; i++)
+		*(p + i) = 0;
+	*(volatile unsigned char *)p = 0;
+	for (lsize = 1; lsize < 128; lsize <<= 1) {
+		*(p + lsize);
+		status = read_c0_status();
+		if (!(status & ST0_CM))
+			break;
+	}
+	for (i = 0; i < 128; i += lsize)
+		*(volatile unsigned char *)(p + i) = 0;
+
+	write_c0_status(flags);
+
+	return lsize * sizeof(*p);
+}
+
+static void __init r3k_probe_cache(void)
+{
+	dcache_size = r3k_cache_size(ST0_ISC);
+	if (dcache_size)
+		dcache_lsize = r3k_cache_lsize(ST0_ISC);
+
+	icache_size = r3k_cache_size(ST0_ISC|ST0_SWC);
+	if (icache_size)
+		icache_lsize = r3k_cache_lsize(ST0_ISC|ST0_SWC);
+}
+
+static void r3k_flush_icache_range(unsigned long start, unsigned long end)
+{
+	unsigned long size, i, flags;
+	volatile unsigned char *p;
+
+	size = end - start;
+	if (size > icache_size || KSEGX(start) != KSEG0) {
+		start = KSEG0;
+		size = icache_size;
+	}
+	p = (char *)start;
+
+	flags = read_c0_status();
+
+	/* isolate cache space */
+	write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC);
+
+	for (i = 0; i < size; i += 0x080) {
+		asm ( 	"sb\t$0, 0x000(%0)\n\t"
+			"sb\t$0, 0x004(%0)\n\t"
+			"sb\t$0, 0x008(%0)\n\t"
+			"sb\t$0, 0x00c(%0)\n\t"
+			"sb\t$0, 0x010(%0)\n\t"
+			"sb\t$0, 0x014(%0)\n\t"
+			"sb\t$0, 0x018(%0)\n\t"
+			"sb\t$0, 0x01c(%0)\n\t"
+		 	"sb\t$0, 0x020(%0)\n\t"
+			"sb\t$0, 0x024(%0)\n\t"
+			"sb\t$0, 0x028(%0)\n\t"
+			"sb\t$0, 0x02c(%0)\n\t"
+			"sb\t$0, 0x030(%0)\n\t"
+			"sb\t$0, 0x034(%0)\n\t"
+			"sb\t$0, 0x038(%0)\n\t"
+			"sb\t$0, 0x03c(%0)\n\t"
+			"sb\t$0, 0x040(%0)\n\t"
+			"sb\t$0, 0x044(%0)\n\t"
+			"sb\t$0, 0x048(%0)\n\t"
+			"sb\t$0, 0x04c(%0)\n\t"
+			"sb\t$0, 0x050(%0)\n\t"
+			"sb\t$0, 0x054(%0)\n\t"
+			"sb\t$0, 0x058(%0)\n\t"
+			"sb\t$0, 0x05c(%0)\n\t"
+		 	"sb\t$0, 0x060(%0)\n\t"
+			"sb\t$0, 0x064(%0)\n\t"
+			"sb\t$0, 0x068(%0)\n\t"
+			"sb\t$0, 0x06c(%0)\n\t"
+			"sb\t$0, 0x070(%0)\n\t"
+			"sb\t$0, 0x074(%0)\n\t"
+			"sb\t$0, 0x078(%0)\n\t"
+			"sb\t$0, 0x07c(%0)\n\t"
+			: : "r" (p) );
+		p += 0x080;
+	}
+
+	write_c0_status(flags);
+}
+
+static void r3k_flush_dcache_range(unsigned long start, unsigned long end)
+{
+	unsigned long size, i, flags;
+	volatile unsigned char *p;
+
+	size = end - start;
+	if (size > dcache_size || KSEGX(start) != KSEG0) {
+		start = KSEG0;
+		size = dcache_size;
+	}
+	p = (char *)start;
+
+	flags = read_c0_status();
+
+	/* isolate cache space */
+	write_c0_status((ST0_ISC|flags)&~ST0_IEC);
+
+	for (i = 0; i < size; i += 0x080) {
+		asm ( 	"sb\t$0, 0x000(%0)\n\t"
+			"sb\t$0, 0x004(%0)\n\t"
+			"sb\t$0, 0x008(%0)\n\t"
+			"sb\t$0, 0x00c(%0)\n\t"
+		 	"sb\t$0, 0x010(%0)\n\t"
+			"sb\t$0, 0x014(%0)\n\t"
+			"sb\t$0, 0x018(%0)\n\t"
+			"sb\t$0, 0x01c(%0)\n\t"
+		 	"sb\t$0, 0x020(%0)\n\t"
+			"sb\t$0, 0x024(%0)\n\t"
+			"sb\t$0, 0x028(%0)\n\t"
+			"sb\t$0, 0x02c(%0)\n\t"
+		 	"sb\t$0, 0x030(%0)\n\t"
+			"sb\t$0, 0x034(%0)\n\t"
+			"sb\t$0, 0x038(%0)\n\t"
+			"sb\t$0, 0x03c(%0)\n\t"
+		 	"sb\t$0, 0x040(%0)\n\t"
+			"sb\t$0, 0x044(%0)\n\t"
+			"sb\t$0, 0x048(%0)\n\t"
+			"sb\t$0, 0x04c(%0)\n\t"
+		 	"sb\t$0, 0x050(%0)\n\t"
+			"sb\t$0, 0x054(%0)\n\t"
+			"sb\t$0, 0x058(%0)\n\t"
+			"sb\t$0, 0x05c(%0)\n\t"
+		 	"sb\t$0, 0x060(%0)\n\t"
+			"sb\t$0, 0x064(%0)\n\t"
+			"sb\t$0, 0x068(%0)\n\t"
+			"sb\t$0, 0x06c(%0)\n\t"
+		 	"sb\t$0, 0x070(%0)\n\t"
+			"sb\t$0, 0x074(%0)\n\t"
+			"sb\t$0, 0x078(%0)\n\t"
+			"sb\t$0, 0x07c(%0)\n\t"
+			: : "r" (p) );
+		p += 0x080;
+	}
+
+	write_c0_status(flags);
+}
+
+static inline unsigned long get_phys_page (unsigned long addr,
+					   struct mm_struct *mm)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long physpage;
+
+	pgd = pgd_offset(mm, addr);
+	pmd = pmd_offset(pgd, addr);
+	pte = pte_offset(pmd, addr);
+
+	if ((physpage = pte_val(*pte)) & _PAGE_VALID)
+		return KSEG0ADDR(physpage & PAGE_MASK);
+
+	return 0;
+}
+
+static inline void r3k_flush_cache_all(void)
+{
+}
+
+static inline void r3k___flush_cache_all(void)
+{
+	r3k_flush_dcache_range(KSEG0, KSEG0 + dcache_size);
+	r3k_flush_icache_range(KSEG0, KSEG0 + icache_size);
+}
+
+static void r3k_flush_cache_mm(struct mm_struct *mm)
+{
+}
+
+static void r3k_flush_cache_range(struct vm_area_struct *vma,
+	unsigned long start, unsigned long end)
+{
+}
+
+static void r3k_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn)
+{
+}
+
+static void r3k_flush_data_cache_page(unsigned long addr)
+{
+}
+
+static void r3k_flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long physpage;
+
+	if (cpu_context(smp_processor_id(), mm) == 0)
+		return;
+
+	if (!(vma->vm_flags & VM_EXEC))
+		return;
+
+#ifdef DEBUG_CACHE
+	printk("cpage[%d,%08lx]", cpu_context(smp_processor_id(), mm), page);
+#endif
+
+	physpage = (unsigned long) page_address(page);
+	if (physpage)
+		r3k_flush_icache_range(physpage, physpage + PAGE_SIZE);
+}
+
+static void r3k_flush_cache_sigtramp(unsigned long addr)
+{
+	unsigned long flags;
+
+#ifdef DEBUG_CACHE
+	printk("csigtramp[%08lx]", addr);
+#endif
+
+	flags = read_c0_status();
+
+	write_c0_status(flags&~ST0_IEC);
+
+	/* Fill the TLB to avoid an exception with caches isolated. */
+	asm ( 	"lw\t$0, 0x000(%0)\n\t"
+		"lw\t$0, 0x004(%0)\n\t"
+		: : "r" (addr) );
+
+	write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC);
+
+	asm ( 	"sb\t$0, 0x000(%0)\n\t"
+		"sb\t$0, 0x004(%0)\n\t"
+		: : "r" (addr) );
+
+	write_c0_status(flags);
+}
+
+static void r3k_dma_cache_wback_inv(unsigned long start, unsigned long size)
+{
+	/* Catch bad driver code */
+	BUG_ON(size == 0);
+
+	iob();
+	r3k_flush_dcache_range(start, start + size);
+}
+
+void __init ld_mmu_r23000(void)
+{
+	extern void build_clear_page(void);
+	extern void build_copy_page(void);
+
+	r3k_probe_cache();
+
+	flush_cache_all = r3k_flush_cache_all;
+	__flush_cache_all = r3k___flush_cache_all;
+	flush_cache_mm = r3k_flush_cache_mm;
+	flush_cache_range = r3k_flush_cache_range;
+	flush_cache_page = r3k_flush_cache_page;
+	flush_icache_page = r3k_flush_icache_page;
+	flush_icache_range = r3k_flush_icache_range;
+
+	flush_cache_sigtramp = r3k_flush_cache_sigtramp;
+	flush_data_cache_page = r3k_flush_data_cache_page;
+
+	_dma_cache_wback_inv = r3k_dma_cache_wback_inv;
+	_dma_cache_wback = r3k_dma_cache_wback_inv;
+	_dma_cache_inv = r3k_dma_cache_wback_inv;
+
+	printk("Primary instruction cache %ldkB, linesize %ld bytes.\n",
+		icache_size >> 10, icache_lsize);
+	printk("Primary data cache %ldkB, linesize %ld bytes.\n",
+		dcache_size >> 10, dcache_lsize);
+
+	build_clear_page();
+	build_copy_page();
+}
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
new file mode 100644
index 00000000000..a03ebb2cba6
--- /dev/null
+++ b/arch/mips/mm/c-r4k.c
@@ -0,0 +1,1260 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
+ * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Ralf Baechle (ralf@gnu.org)
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/bitops.h>
+
+#include <asm/bcache.h>
+#include <asm/bootinfo.h>
+#include <asm/cacheops.h>
+#include <asm/cpu.h>
+#include <asm/cpu-features.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/r4kcache.h>
+#include <asm/system.h>
+#include <asm/mmu_context.h>
+#include <asm/war.h>
+
+static unsigned long icache_size, dcache_size, scache_size;
+
+/*
+ * Dummy cache handling routines for machines without boardcaches
+ */
+static void no_sc_noop(void) {}
+
+static struct bcache_ops no_sc_ops = {
+	.bc_enable = (void *)no_sc_noop,
+	.bc_disable = (void *)no_sc_noop,
+	.bc_wback_inv = (void *)no_sc_noop,
+	.bc_inv = (void *)no_sc_noop
+};
+
+struct bcache_ops *bcops = &no_sc_ops;
+
+#define cpu_is_r4600_v1_x()	((read_c0_prid() & 0xfffffff0) == 0x2010)
+#define cpu_is_r4600_v2_x()	((read_c0_prid() & 0xfffffff0) == 0x2020)
+
+#define R4600_HIT_CACHEOP_WAR_IMPL					\
+do {									\
+	if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())		\
+		*(volatile unsigned long *)CKSEG1;			\
+	if (R4600_V1_HIT_CACHEOP_WAR)					\
+		__asm__ __volatile__("nop;nop;nop;nop");		\
+} while (0)
+
+static void (*r4k_blast_dcache_page)(unsigned long addr);
+
+static inline void r4k_blast_dcache_page_dc32(unsigned long addr)
+{
+	R4600_HIT_CACHEOP_WAR_IMPL;
+	blast_dcache32_page(addr);
+}
+
+static inline void r4k_blast_dcache_page_setup(void)
+{
+	unsigned long  dc_lsize = cpu_dcache_line_size();
+
+	if (dc_lsize == 16)
+		r4k_blast_dcache_page = blast_dcache16_page;
+	else if (dc_lsize == 32)
+		r4k_blast_dcache_page = r4k_blast_dcache_page_dc32;
+}
+
+static void (* r4k_blast_dcache_page_indexed)(unsigned long addr);
+
+static inline void r4k_blast_dcache_page_indexed_setup(void)
+{
+	unsigned long dc_lsize = cpu_dcache_line_size();
+
+	if (dc_lsize == 16)
+		r4k_blast_dcache_page_indexed = blast_dcache16_page_indexed;
+	else if (dc_lsize == 32)
+		r4k_blast_dcache_page_indexed = blast_dcache32_page_indexed;
+}
+
+static void (* r4k_blast_dcache)(void);
+
+static inline void r4k_blast_dcache_setup(void)
+{
+	unsigned long dc_lsize = cpu_dcache_line_size();
+
+	if (dc_lsize == 16)
+		r4k_blast_dcache = blast_dcache16;
+	else if (dc_lsize == 32)
+		r4k_blast_dcache = blast_dcache32;
+}
+
+/* force code alignment (used for TX49XX_ICACHE_INDEX_INV_WAR) */
+#define JUMP_TO_ALIGN(order) \
+	__asm__ __volatile__( \
+		"b\t1f\n\t" \
+		".align\t" #order "\n\t" \
+		"1:\n\t" \
+		)
+#define CACHE32_UNROLL32_ALIGN	JUMP_TO_ALIGN(10) /* 32 * 32 = 1024 */
+#define CACHE32_UNROLL32_ALIGN2	JUMP_TO_ALIGN(11)
+
+static inline void blast_r4600_v1_icache32(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	blast_icache32();
+	local_irq_restore(flags);
+}
+
+static inline void tx49_blast_icache32(void)
+{
+	unsigned long start = INDEX_BASE;
+	unsigned long end = start + current_cpu_data.icache.waysize;
+	unsigned long ws_inc = 1UL << current_cpu_data.icache.waybit;
+	unsigned long ws_end = current_cpu_data.icache.ways <<
+	                       current_cpu_data.icache.waybit;
+	unsigned long ws, addr;
+
+	CACHE32_UNROLL32_ALIGN2;
+	/* I'm in even chunk.  blast odd chunks */
+	for (ws = 0; ws < ws_end; ws += ws_inc) 
+		for (addr = start + 0x400; addr < end; addr += 0x400 * 2) 
+			cache32_unroll32(addr|ws,Index_Invalidate_I);
+	CACHE32_UNROLL32_ALIGN;
+	/* I'm in odd chunk.  blast even chunks */
+	for (ws = 0; ws < ws_end; ws += ws_inc) 
+		for (addr = start; addr < end; addr += 0x400 * 2) 
+			cache32_unroll32(addr|ws,Index_Invalidate_I);
+}
+
+static inline void blast_icache32_r4600_v1_page_indexed(unsigned long page)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	blast_icache32_page_indexed(page);
+	local_irq_restore(flags);
+}
+
+static inline void tx49_blast_icache32_page_indexed(unsigned long page)
+{
+	unsigned long start = page;
+	unsigned long end = start + PAGE_SIZE;
+	unsigned long ws_inc = 1UL << current_cpu_data.icache.waybit;
+	unsigned long ws_end = current_cpu_data.icache.ways <<
+	                       current_cpu_data.icache.waybit;
+	unsigned long ws, addr;
+
+	CACHE32_UNROLL32_ALIGN2;
+	/* I'm in even chunk.  blast odd chunks */
+	for (ws = 0; ws < ws_end; ws += ws_inc) 
+		for (addr = start + 0x400; addr < end; addr += 0x400 * 2) 
+			cache32_unroll32(addr|ws,Index_Invalidate_I);
+	CACHE32_UNROLL32_ALIGN;
+	/* I'm in odd chunk.  blast even chunks */
+	for (ws = 0; ws < ws_end; ws += ws_inc) 
+		for (addr = start; addr < end; addr += 0x400 * 2) 
+			cache32_unroll32(addr|ws,Index_Invalidate_I);
+}
+
+static void (* r4k_blast_icache_page)(unsigned long addr);
+
+static inline void r4k_blast_icache_page_setup(void)
+{
+	unsigned long ic_lsize = cpu_icache_line_size();
+
+	if (ic_lsize == 16)
+		r4k_blast_icache_page = blast_icache16_page;
+	else if (ic_lsize == 32)
+		r4k_blast_icache_page = blast_icache32_page;
+	else if (ic_lsize == 64)
+		r4k_blast_icache_page = blast_icache64_page;
+}
+
+
+static void (* r4k_blast_icache_page_indexed)(unsigned long addr);
+
+static inline void r4k_blast_icache_page_indexed_setup(void)
+{
+	unsigned long ic_lsize = cpu_icache_line_size();
+
+	if (ic_lsize == 16)
+		r4k_blast_icache_page_indexed = blast_icache16_page_indexed;
+	else if (ic_lsize == 32) {
+		if (TX49XX_ICACHE_INDEX_INV_WAR)
+			r4k_blast_icache_page_indexed =
+				tx49_blast_icache32_page_indexed;
+		else if (R4600_V1_INDEX_ICACHEOP_WAR && cpu_is_r4600_v1_x())
+			r4k_blast_icache_page_indexed =
+				blast_icache32_r4600_v1_page_indexed;
+		else
+			r4k_blast_icache_page_indexed =
+				blast_icache32_page_indexed;
+	} else if (ic_lsize == 64)
+		r4k_blast_icache_page_indexed = blast_icache64_page_indexed;
+}
+
+static void (* r4k_blast_icache)(void);
+
+static inline void r4k_blast_icache_setup(void)
+{
+	unsigned long ic_lsize = cpu_icache_line_size();
+
+	if (ic_lsize == 16)
+		r4k_blast_icache = blast_icache16;
+	else if (ic_lsize == 32) {
+		if (R4600_V1_INDEX_ICACHEOP_WAR && cpu_is_r4600_v1_x())
+			r4k_blast_icache = blast_r4600_v1_icache32;
+		else if (TX49XX_ICACHE_INDEX_INV_WAR)
+			r4k_blast_icache = tx49_blast_icache32;
+		else
+			r4k_blast_icache = blast_icache32;
+	} else if (ic_lsize == 64)
+		r4k_blast_icache = blast_icache64;
+}
+
+static void (* r4k_blast_scache_page)(unsigned long addr);
+
+static inline void r4k_blast_scache_page_setup(void)
+{
+	unsigned long sc_lsize = cpu_scache_line_size();
+
+	if (sc_lsize == 16)
+		r4k_blast_scache_page = blast_scache16_page;
+	else if (sc_lsize == 32)
+		r4k_blast_scache_page = blast_scache32_page;
+	else if (sc_lsize == 64)
+		r4k_blast_scache_page = blast_scache64_page;
+	else if (sc_lsize == 128)
+		r4k_blast_scache_page = blast_scache128_page;
+}
+
+static void (* r4k_blast_scache_page_indexed)(unsigned long addr);
+
+static inline void r4k_blast_scache_page_indexed_setup(void)
+{
+	unsigned long sc_lsize = cpu_scache_line_size();
+
+	if (sc_lsize == 16)
+		r4k_blast_scache_page_indexed = blast_scache16_page_indexed;
+	else if (sc_lsize == 32)
+		r4k_blast_scache_page_indexed = blast_scache32_page_indexed;
+	else if (sc_lsize == 64)
+		r4k_blast_scache_page_indexed = blast_scache64_page_indexed;
+	else if (sc_lsize == 128)
+		r4k_blast_scache_page_indexed = blast_scache128_page_indexed;
+}
+
+static void (* r4k_blast_scache)(void);
+
+static inline void r4k_blast_scache_setup(void)
+{
+	unsigned long sc_lsize = cpu_scache_line_size();
+
+	if (sc_lsize == 16)
+		r4k_blast_scache = blast_scache16;
+	else if (sc_lsize == 32)
+		r4k_blast_scache = blast_scache32;
+	else if (sc_lsize == 64)
+		r4k_blast_scache = blast_scache64;
+	else if (sc_lsize == 128)
+		r4k_blast_scache = blast_scache128;
+}
+
+/*
+ * This is former mm's flush_cache_all() which really should be
+ * flush_cache_vunmap these days ...
+ */
+static inline void local_r4k_flush_cache_all(void * args)
+{
+	r4k_blast_dcache();
+	r4k_blast_icache();
+}
+
+static void r4k_flush_cache_all(void)
+{
+	if (!cpu_has_dc_aliases)
+		return;
+
+	on_each_cpu(local_r4k_flush_cache_all, NULL, 1, 1);
+}
+
+static inline void local_r4k___flush_cache_all(void * args)
+{
+	r4k_blast_dcache();
+	r4k_blast_icache();
+
+	switch (current_cpu_data.cputype) {
+	case CPU_R4000SC:
+	case CPU_R4000MC:
+	case CPU_R4400SC:
+	case CPU_R4400MC:
+	case CPU_R10000:
+	case CPU_R12000:
+		r4k_blast_scache();
+	}
+}
+
+static void r4k___flush_cache_all(void)
+{
+	on_each_cpu(local_r4k___flush_cache_all, NULL, 1, 1);
+}
+
+static inline void local_r4k_flush_cache_range(void * args)
+{
+	struct vm_area_struct *vma = args;
+	int exec;
+
+	if (!(cpu_context(smp_processor_id(), vma->vm_mm)))
+		return;
+
+	exec = vma->vm_flags & VM_EXEC;
+	if (cpu_has_dc_aliases || exec)
+		r4k_blast_dcache();
+	if (exec)
+		r4k_blast_icache();
+}
+
+static void r4k_flush_cache_range(struct vm_area_struct *vma,
+	unsigned long start, unsigned long end)
+{
+	on_each_cpu(local_r4k_flush_cache_range, vma, 1, 1);
+}
+
+static inline void local_r4k_flush_cache_mm(void * args)
+{
+	struct mm_struct *mm = args;
+
+	if (!cpu_context(smp_processor_id(), mm))
+		return;
+
+	r4k_blast_dcache();
+	r4k_blast_icache();
+
+	/*
+	 * Kludge alert.  For obscure reasons R4000SC and R4400SC go nuts if we
+	 * only flush the primary caches but R10000 and R12000 behave sane ...
+	 */
+	if (current_cpu_data.cputype == CPU_R4000SC ||
+	    current_cpu_data.cputype == CPU_R4000MC ||
+	    current_cpu_data.cputype == CPU_R4400SC ||
+	    current_cpu_data.cputype == CPU_R4400MC)
+		r4k_blast_scache();
+}
+
+static void r4k_flush_cache_mm(struct mm_struct *mm)
+{
+	if (!cpu_has_dc_aliases)
+		return;
+
+	on_each_cpu(local_r4k_flush_cache_mm, mm, 1, 1);
+}
+
+struct flush_cache_page_args {
+	struct vm_area_struct *vma;
+	unsigned long page;
+};
+
+static inline void local_r4k_flush_cache_page(void *args)
+{
+	struct flush_cache_page_args *fcp_args = args;
+	struct vm_area_struct *vma = fcp_args->vma;
+	unsigned long page = fcp_args->page;
+	int exec = vma->vm_flags & VM_EXEC;
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	page &= PAGE_MASK;
+	pgdp = pgd_offset(mm, page);
+	pmdp = pmd_offset(pgdp, page);
+	ptep = pte_offset(pmdp, page);
+
+	/*
+	 * If the page isn't marked valid, the page cannot possibly be
+	 * in the cache.
+	 */
+	if (!(pte_val(*ptep) & _PAGE_PRESENT))
+		return;
+
+	/*
+	 * Doing flushes for another ASID than the current one is
+	 * too difficult since stupid R4k caches do a TLB translation
+	 * for every cache flush operation.  So we do indexed flushes
+	 * in that case, which doesn't overly flush the cache too much.
+	 */
+	if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID)) {
+		if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) {
+			r4k_blast_dcache_page(page);
+			if (exec && !cpu_icache_snoops_remote_store)
+				r4k_blast_scache_page(page);
+		}
+		if (exec)
+			r4k_blast_icache_page(page);
+
+		return;
+	}
+
+	/*
+	 * Do indexed flush, too much work to get the (possible) TLB refills
+	 * to work correctly.
+	 */
+	page = INDEX_BASE + (page & (dcache_size - 1));
+	if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) {
+		r4k_blast_dcache_page_indexed(page);
+		if (exec && !cpu_icache_snoops_remote_store)
+			r4k_blast_scache_page_indexed(page);
+	}
+	if (exec) {
+		if (cpu_has_vtag_icache) {
+			int cpu = smp_processor_id();
+
+			if (cpu_context(cpu, vma->vm_mm) != 0)
+				drop_mmu_context(vma->vm_mm, cpu);
+		} else
+			r4k_blast_icache_page_indexed(page);
+	}
+}
+
+static void r4k_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn)
+{
+	struct flush_cache_page_args args;
+
+	/*
+	 * If ownes no valid ASID yet, cannot possibly have gotten
+	 * this page into the cache.
+	 */
+	if (cpu_context(smp_processor_id(), vma->vm_mm) == 0)
+		return;
+
+	args.vma = vma;
+	args.page = page;
+
+	on_each_cpu(local_r4k_flush_cache_page, &args, 1, 1);
+}
+
+static inline void local_r4k_flush_data_cache_page(void * addr)
+{
+	r4k_blast_dcache_page((unsigned long) addr);
+}
+
+static void r4k_flush_data_cache_page(unsigned long addr)
+{
+	on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr, 1, 1);
+}
+
+struct flush_icache_range_args {
+	unsigned long start;
+	unsigned long end;
+};
+
+static inline void local_r4k_flush_icache_range(void *args)
+{
+	struct flush_icache_range_args *fir_args = args;
+	unsigned long dc_lsize = current_cpu_data.dcache.linesz;
+	unsigned long ic_lsize = current_cpu_data.icache.linesz;
+	unsigned long sc_lsize = current_cpu_data.scache.linesz;
+	unsigned long start = fir_args->start;
+	unsigned long end = fir_args->end;
+	unsigned long addr, aend;
+
+	if (!cpu_has_ic_fills_f_dc) {
+		if (end - start > dcache_size) {
+			r4k_blast_dcache();
+		} else {
+			addr = start & ~(dc_lsize - 1);
+			aend = (end - 1) & ~(dc_lsize - 1);
+
+			while (1) {
+				/* Hit_Writeback_Inv_D */
+				protected_writeback_dcache_line(addr);
+				if (addr == aend)
+					break;
+				addr += dc_lsize;
+			}
+		}
+
+		if (!cpu_icache_snoops_remote_store) {
+			if (end - start > scache_size) {
+				r4k_blast_scache();
+			} else {
+				addr = start & ~(sc_lsize - 1);
+				aend = (end - 1) & ~(sc_lsize - 1);
+
+				while (1) {
+					/* Hit_Writeback_Inv_D */
+					protected_writeback_scache_line(addr);
+					if (addr == aend)
+						break;
+					addr += sc_lsize;
+				}
+			}
+		}
+	}
+
+	if (end - start > icache_size)
+		r4k_blast_icache();
+	else {
+		addr = start & ~(ic_lsize - 1);
+		aend = (end - 1) & ~(ic_lsize - 1);
+		while (1) {
+			/* Hit_Invalidate_I */
+			protected_flush_icache_line(addr);
+			if (addr == aend)
+				break;
+			addr += ic_lsize;
+		}
+	}
+}
+
+static void r4k_flush_icache_range(unsigned long start, unsigned long end)
+{
+	struct flush_icache_range_args args;
+
+	args.start = start;
+	args.end = end;
+
+	on_each_cpu(local_r4k_flush_icache_range, &args, 1, 1);
+}
+
+/*
+ * Ok, this seriously sucks.  We use them to flush a user page but don't
+ * know the virtual address, so we have to blast away the whole icache
+ * which is significantly more expensive than the real thing.  Otoh we at
+ * least know the kernel address of the page so we can flush it
+ * selectivly.
+ */
+
+struct flush_icache_page_args {
+	struct vm_area_struct *vma;
+	struct page *page;
+};
+
+static inline void local_r4k_flush_icache_page(void *args)
+{
+	struct flush_icache_page_args *fip_args = args;
+	struct vm_area_struct *vma = fip_args->vma;
+	struct page *page = fip_args->page;
+
+	/*
+	 * Tricky ...  Because we don't know the virtual address we've got the
+	 * choice of either invalidating the entire primary and secondary
+	 * caches or invalidating the secondary caches also.  With the subset
+	 * enforcment on R4000SC, R4400SC, R10000 and R12000 invalidating the
+	 * secondary cache will result in any entries in the primary caches
+	 * also getting invalidated which hopefully is a bit more economical.
+	 */
+	if (cpu_has_subset_pcaches) {
+		unsigned long addr = (unsigned long) page_address(page);
+
+		r4k_blast_scache_page(addr);
+		ClearPageDcacheDirty(page);
+
+		return;
+	}
+
+	if (!cpu_has_ic_fills_f_dc) {
+		unsigned long addr = (unsigned long) page_address(page);
+		r4k_blast_dcache_page(addr);
+		if (!cpu_icache_snoops_remote_store)
+			r4k_blast_scache_page(addr);
+		ClearPageDcacheDirty(page);
+	}
+
+	/*
+	 * We're not sure of the virtual address(es) involved here, so
+	 * we have to flush the entire I-cache.
+	 */
+	if (cpu_has_vtag_icache) {
+		int cpu = smp_processor_id();
+
+		if (cpu_context(cpu, vma->vm_mm) != 0)
+			drop_mmu_context(vma->vm_mm, cpu);
+	} else
+		r4k_blast_icache();
+}
+
+static void r4k_flush_icache_page(struct vm_area_struct *vma,
+	struct page *page)
+{
+	struct flush_icache_page_args args;
+
+	/*
+	 * If there's no context yet, or the page isn't executable, no I-cache
+	 * flush is needed.
+	 */
+	if (!(vma->vm_flags & VM_EXEC))
+		return;
+
+	args.vma = vma;
+	args.page = page;
+
+	on_each_cpu(local_r4k_flush_icache_page, &args, 1, 1);
+}
+
+
+#ifdef CONFIG_DMA_NONCOHERENT
+
+static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
+{
+	unsigned long end, a;
+
+	/* Catch bad driver code */
+	BUG_ON(size == 0);
+
+	if (cpu_has_subset_pcaches) {
+		unsigned long sc_lsize = current_cpu_data.scache.linesz;
+
+		if (size >= scache_size) {
+			r4k_blast_scache();
+			return;
+		}
+
+		a = addr & ~(sc_lsize - 1);
+		end = (addr + size - 1) & ~(sc_lsize - 1);
+		while (1) {
+			flush_scache_line(a);	/* Hit_Writeback_Inv_SD */
+			if (a == end)
+				break;
+			a += sc_lsize;
+		}
+		return;
+	}
+
+	/*
+	 * Either no secondary cache or the available caches don't have the
+	 * subset property so we have to flush the primary caches
+	 * explicitly
+	 */
+	if (size >= dcache_size) {
+		r4k_blast_dcache();
+	} else {
+		unsigned long dc_lsize = current_cpu_data.dcache.linesz;
+
+		R4600_HIT_CACHEOP_WAR_IMPL;
+		a = addr & ~(dc_lsize - 1);
+		end = (addr + size - 1) & ~(dc_lsize - 1);
+		while (1) {
+			flush_dcache_line(a);	/* Hit_Writeback_Inv_D */
+			if (a == end)
+				break;
+			a += dc_lsize;
+		}
+	}
+
+	bc_wback_inv(addr, size);
+}
+
+static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
+{
+	unsigned long end, a;
+
+	/* Catch bad driver code */
+	BUG_ON(size == 0);
+
+	if (cpu_has_subset_pcaches) {
+		unsigned long sc_lsize = current_cpu_data.scache.linesz;
+
+		if (size >= scache_size) {
+			r4k_blast_scache();
+			return;
+		}
+
+		a = addr & ~(sc_lsize - 1);
+		end = (addr + size - 1) & ~(sc_lsize - 1);
+		while (1) {
+			flush_scache_line(a);	/* Hit_Writeback_Inv_SD */
+			if (a == end)
+				break;
+			a += sc_lsize;
+		}
+		return;
+	}
+
+	if (size >= dcache_size) {
+		r4k_blast_dcache();
+	} else {
+		unsigned long dc_lsize = current_cpu_data.dcache.linesz;
+
+		R4600_HIT_CACHEOP_WAR_IMPL;
+		a = addr & ~(dc_lsize - 1);
+		end = (addr + size - 1) & ~(dc_lsize - 1);
+		while (1) {
+			flush_dcache_line(a);	/* Hit_Writeback_Inv_D */
+			if (a == end)
+				break;
+			a += dc_lsize;
+		}
+	}
+
+	bc_inv(addr, size);
+}
+#endif /* CONFIG_DMA_NONCOHERENT */
+
+/*
+ * While we're protected against bad userland addresses we don't care
+ * very much about what happens in that case.  Usually a segmentation
+ * fault will dump the process later on anyway ...
+ */
+static void local_r4k_flush_cache_sigtramp(void * arg)
+{
+	unsigned long ic_lsize = current_cpu_data.icache.linesz;
+	unsigned long dc_lsize = current_cpu_data.dcache.linesz;
+	unsigned long sc_lsize = current_cpu_data.scache.linesz;
+	unsigned long addr = (unsigned long) arg;
+
+	R4600_HIT_CACHEOP_WAR_IMPL;
+	protected_writeback_dcache_line(addr & ~(dc_lsize - 1));
+	if (!cpu_icache_snoops_remote_store)
+		protected_writeback_scache_line(addr & ~(sc_lsize - 1));
+	protected_flush_icache_line(addr & ~(ic_lsize - 1));
+	if (MIPS4K_ICACHE_REFILL_WAR) {
+		__asm__ __volatile__ (
+			".set push\n\t"
+			".set noat\n\t"
+			".set mips3\n\t"
+#ifdef CONFIG_MIPS32
+			"la	$at,1f\n\t"
+#endif
+#ifdef CONFIG_MIPS64
+			"dla	$at,1f\n\t"
+#endif
+			"cache	%0,($at)\n\t"
+			"nop; nop; nop\n"
+			"1:\n\t"
+			".set pop"
+			:
+			: "i" (Hit_Invalidate_I));
+	}
+	if (MIPS_CACHE_SYNC_WAR)
+		__asm__ __volatile__ ("sync");
+}
+
+static void r4k_flush_cache_sigtramp(unsigned long addr)
+{
+	on_each_cpu(local_r4k_flush_cache_sigtramp, (void *) addr, 1, 1);
+}
+
+static void r4k_flush_icache_all(void)
+{
+	if (cpu_has_vtag_icache)
+		r4k_blast_icache();
+}
+
+static inline void rm7k_erratum31(void)
+{
+	const unsigned long ic_lsize = 32;
+	unsigned long addr;
+
+	/* RM7000 erratum #31. The icache is screwed at startup. */
+	write_c0_taglo(0);
+	write_c0_taghi(0);
+
+	for (addr = INDEX_BASE; addr <= INDEX_BASE + 4096; addr += ic_lsize) {
+		__asm__ __volatile__ (
+			".set noreorder\n\t"
+			".set mips3\n\t"
+			"cache\t%1, 0(%0)\n\t"
+			"cache\t%1, 0x1000(%0)\n\t"
+			"cache\t%1, 0x2000(%0)\n\t"
+			"cache\t%1, 0x3000(%0)\n\t"
+			"cache\t%2, 0(%0)\n\t"
+			"cache\t%2, 0x1000(%0)\n\t"
+			"cache\t%2, 0x2000(%0)\n\t"
+			"cache\t%2, 0x3000(%0)\n\t"
+			"cache\t%1, 0(%0)\n\t"
+			"cache\t%1, 0x1000(%0)\n\t"
+			"cache\t%1, 0x2000(%0)\n\t"
+			"cache\t%1, 0x3000(%0)\n\t"
+			".set\tmips0\n\t"
+			".set\treorder\n\t"
+			:
+			: "r" (addr), "i" (Index_Store_Tag_I), "i" (Fill));
+	}
+}
+
+static char *way_string[] __initdata = { NULL, "direct mapped", "2-way",
+	"3-way", "4-way", "5-way", "6-way", "7-way", "8-way"
+};
+
+static void __init probe_pcache(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+	unsigned int config = read_c0_config();
+	unsigned int prid = read_c0_prid();
+	unsigned long config1;
+	unsigned int lsize;
+
+	switch (c->cputype) {
+	case CPU_R4600:			/* QED style two way caches? */
+	case CPU_R4700:
+	case CPU_R5000:
+	case CPU_NEVADA:
+		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
+		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
+		c->icache.ways = 2;
+		c->icache.waybit = ffs(icache_size/2) - 1;
+
+		dcache_size = 1 << (12 + ((config & CONF_DC) >> 6));
+		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4);
+		c->dcache.ways = 2;
+		c->dcache.waybit= ffs(dcache_size/2) - 1;
+
+		c->options |= MIPS_CPU_CACHE_CDEX_P;
+		break;
+
+	case CPU_R5432:
+	case CPU_R5500:
+		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
+		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
+		c->icache.ways = 2;
+		c->icache.waybit= 0;
+
+		dcache_size = 1 << (12 + ((config & CONF_DC) >> 6));
+		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4);
+		c->dcache.ways = 2;
+		c->dcache.waybit = 0;
+
+		c->options |= MIPS_CPU_CACHE_CDEX_P;
+		break;
+
+	case CPU_TX49XX:
+		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
+		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
+		c->icache.ways = 4;
+		c->icache.waybit= 0;
+
+		dcache_size = 1 << (12 + ((config & CONF_DC) >> 6));
+		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4);
+		c->dcache.ways = 4;
+		c->dcache.waybit = 0;
+
+		c->options |= MIPS_CPU_CACHE_CDEX_P;
+		break;
+
+	case CPU_R4000PC:
+	case CPU_R4000SC:
+	case CPU_R4000MC:
+	case CPU_R4400PC:
+	case CPU_R4400SC:
+	case CPU_R4400MC:
+	case CPU_R4300:
+		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
+		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
+		c->icache.ways = 1;
+		c->icache.waybit = 0; 	/* doesn't matter */
+
+		dcache_size = 1 << (12 + ((config & CONF_DC) >> 6));
+		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4);
+		c->dcache.ways = 1;
+		c->dcache.waybit = 0;	/* does not matter */
+
+		c->options |= MIPS_CPU_CACHE_CDEX_P;
+		break;
+
+	case CPU_R10000:
+	case CPU_R12000:
+		icache_size = 1 << (12 + ((config & R10K_CONF_IC) >> 29));
+		c->icache.linesz = 64;
+		c->icache.ways = 2;
+		c->icache.waybit = 0;
+
+		dcache_size = 1 << (12 + ((config & R10K_CONF_DC) >> 26));
+		c->dcache.linesz = 32;
+		c->dcache.ways = 2;
+		c->dcache.waybit = 0;
+
+		c->options |= MIPS_CPU_PREFETCH;
+		break;
+
+	case CPU_VR4133:
+		write_c0_config(config & ~CONF_EB);
+	case CPU_VR4131:
+		/* Workaround for cache instruction bug of VR4131 */
+		if (c->processor_id == 0x0c80U || c->processor_id == 0x0c81U ||
+		    c->processor_id == 0x0c82U) {
+			config &= ~0x00000030U;
+			config |= 0x00410000U;
+			write_c0_config(config);
+		}
+		icache_size = 1 << (10 + ((config & CONF_IC) >> 9));
+		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
+		c->icache.ways = 2;
+		c->icache.waybit = ffs(icache_size/2) - 1;
+
+		dcache_size = 1 << (10 + ((config & CONF_DC) >> 6));
+		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4);
+		c->dcache.ways = 2;
+		c->dcache.waybit = ffs(dcache_size/2) - 1;
+
+		c->options |= MIPS_CPU_CACHE_CDEX_P;
+		break;
+
+	case CPU_VR41XX:
+	case CPU_VR4111:
+	case CPU_VR4121:
+	case CPU_VR4122:
+	case CPU_VR4181:
+	case CPU_VR4181A:
+		icache_size = 1 << (10 + ((config & CONF_IC) >> 9));
+		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
+		c->icache.ways = 1;
+		c->icache.waybit = 0; 	/* doesn't matter */
+
+		dcache_size = 1 << (10 + ((config & CONF_DC) >> 6));
+		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4);
+		c->dcache.ways = 1;
+		c->dcache.waybit = 0;	/* does not matter */
+
+		c->options |= MIPS_CPU_CACHE_CDEX_P;
+		break;
+
+	case CPU_RM7000:
+		rm7k_erratum31();
+
+	case CPU_RM9000:
+		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));
+		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);
+		c->icache.ways = 4;
+		c->icache.waybit = ffs(icache_size / c->icache.ways) - 1;
+
+		dcache_size = 1 << (12 + ((config & CONF_DC) >> 6));
+		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4);
+		c->dcache.ways = 4;
+		c->dcache.waybit = ffs(dcache_size / c->dcache.ways) - 1;
+
+#if !defined(CONFIG_SMP) || !defined(RM9000_CDEX_SMP_WAR)
+		c->options |= MIPS_CPU_CACHE_CDEX_P;
+#endif
+		c->options |= MIPS_CPU_PREFETCH;
+		break;
+
+	default:
+		if (!(config & MIPS_CONF_M))
+			panic("Don't know how to probe P-caches on this cpu.");
+
+		/*
+		 * So we seem to be a MIPS32 or MIPS64 CPU
+		 * So let's probe the I-cache ...
+		 */
+		config1 = read_c0_config1();
+
+		if ((lsize = ((config1 >> 19) & 7)))
+			c->icache.linesz = 2 << lsize;
+		else
+			c->icache.linesz = lsize;
+		c->icache.sets = 64 << ((config1 >> 22) & 7);
+		c->icache.ways = 1 + ((config1 >> 16) & 7);
+
+		icache_size = c->icache.sets *
+		              c->icache.ways *
+		              c->icache.linesz;
+		c->icache.waybit = ffs(icache_size/c->icache.ways) - 1;
+
+		if (config & 0x8)		/* VI bit */
+			c->icache.flags |= MIPS_CACHE_VTAG;
+
+		/*
+		 * Now probe the MIPS32 / MIPS64 data cache.
+		 */
+		c->dcache.flags = 0;
+
+		if ((lsize = ((config1 >> 10) & 7)))
+			c->dcache.linesz = 2 << lsize;
+		else
+			c->dcache.linesz= lsize;
+		c->dcache.sets = 64 << ((config1 >> 13) & 7);
+		c->dcache.ways = 1 + ((config1 >> 7) & 7);
+
+		dcache_size = c->dcache.sets *
+		              c->dcache.ways *
+		              c->dcache.linesz;
+		c->dcache.waybit = ffs(dcache_size/c->dcache.ways) - 1;
+
+		c->options |= MIPS_CPU_PREFETCH;
+		break;
+	}
+
+	/*
+	 * Processor configuration sanity check for the R4000SC erratum
+	 * #5.  With page sizes larger than 32kB there is no possibility
+	 * to get a VCE exception anymore so we don't care about this
+	 * misconfiguration.  The case is rather theoretical anyway;
+	 * presumably no vendor is shipping his hardware in the "bad"
+	 * configuration.
+	 */
+	if ((prid & 0xff00) == PRID_IMP_R4000 && (prid & 0xff) < 0x40 &&
+	    !(config & CONF_SC) && c->icache.linesz != 16 &&
+	    PAGE_SIZE <= 0x8000)
+		panic("Improper R4000SC processor configuration detected");
+
+	/* compute a couple of other cache variables */
+	c->icache.waysize = icache_size / c->icache.ways;
+	c->dcache.waysize = dcache_size / c->dcache.ways;
+
+	c->icache.sets = icache_size / (c->icache.linesz * c->icache.ways);
+	c->dcache.sets = dcache_size / (c->dcache.linesz * c->dcache.ways);
+
+	/*
+	 * R10000 and R12000 P-caches are odd in a positive way.  They're 32kB
+	 * 2-way virtually indexed so normally would suffer from aliases.  So
+	 * normally they'd suffer from aliases but magic in the hardware deals
+	 * with that for us so we don't need to take care ourselves.
+	 */
+	if (c->cputype != CPU_R10000 && c->cputype != CPU_R12000)
+		if (c->dcache.waysize > PAGE_SIZE)
+		        c->dcache.flags |= MIPS_CACHE_ALIASES;
+
+	switch (c->cputype) {
+	case CPU_20KC:
+		/*
+		 * Some older 20Kc chips doesn't have the 'VI' bit in
+		 * the config register.
+		 */
+		c->icache.flags |= MIPS_CACHE_VTAG;
+		break;
+
+	case CPU_AU1500:
+		c->icache.flags |= MIPS_CACHE_IC_F_DC;
+		break;
+	}
+
+	printk("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n",
+	       icache_size >> 10,
+	       cpu_has_vtag_icache ? "virtually tagged" : "physically tagged",
+	       way_string[c->icache.ways], c->icache.linesz);
+
+	printk("Primary data cache %ldkB, %s, linesize %d bytes.\n",
+	       dcache_size >> 10, way_string[c->dcache.ways], c->dcache.linesz);
+}
+
+/*
+ * If you even _breathe_ on this function, look at the gcc output and make sure
+ * it does not pop things on and off the stack for the cache sizing loop that
+ * executes in KSEG1 space or else you will crash and burn badly.  You have
+ * been warned.
+ */
+static int __init probe_scache(void)
+{
+	extern unsigned long stext;
+	unsigned long flags, addr, begin, end, pow2;
+	unsigned int config = read_c0_config();
+	struct cpuinfo_mips *c = &current_cpu_data;
+	int tmp;
+
+	if (config & CONF_SC)
+		return 0;
+
+	begin = (unsigned long) &stext;
+	begin &= ~((4 * 1024 * 1024) - 1);
+	end = begin + (4 * 1024 * 1024);
+
+	/*
+	 * This is such a bitch, you'd think they would make it easy to do
+	 * this.  Away you daemons of stupidity!
+	 */
+	local_irq_save(flags);
+
+	/* Fill each size-multiple cache line with a valid tag. */
+	pow2 = (64 * 1024);
+	for (addr = begin; addr < end; addr = (begin + pow2)) {
+		unsigned long *p = (unsigned long *) addr;
+		__asm__ __volatile__("nop" : : "r" (*p)); /* whee... */
+		pow2 <<= 1;
+	}
+
+	/* Load first line with zero (therefore invalid) tag. */
+	write_c0_taglo(0);
+	write_c0_taghi(0);
+	__asm__ __volatile__("nop; nop; nop; nop;"); /* avoid the hazard */
+	cache_op(Index_Store_Tag_I, begin);
+	cache_op(Index_Store_Tag_D, begin);
+	cache_op(Index_Store_Tag_SD, begin);
+
+	/* Now search for the wrap around point. */
+	pow2 = (128 * 1024);
+	tmp = 0;
+	for (addr = begin + (128 * 1024); addr < end; addr = begin + pow2) {
+		cache_op(Index_Load_Tag_SD, addr);
+		__asm__ __volatile__("nop; nop; nop; nop;"); /* hazard... */
+		if (!read_c0_taglo())
+			break;
+		pow2 <<= 1;
+	}
+	local_irq_restore(flags);
+	addr -= begin;
+
+	scache_size = addr;
+	c->scache.linesz = 16 << ((config & R4K_CONF_SB) >> 22);
+	c->scache.ways = 1;
+	c->dcache.waybit = 0;		/* does not matter */
+
+	return 1;
+}
+
+typedef int (*probe_func_t)(unsigned long);
+extern int r5k_sc_init(void);
+extern int rm7k_sc_init(void);
+
+static void __init setup_scache(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+	unsigned int config = read_c0_config();
+	probe_func_t probe_scache_kseg1;
+	int sc_present = 0;
+
+	/*
+	 * Do the probing thing on R4000SC and R4400SC processors.  Other
+	 * processors don't have a S-cache that would be relevant to the
+	 * Linux memory managment.
+	 */
+	switch (c->cputype) {
+	case CPU_R4000SC:
+	case CPU_R4000MC:
+	case CPU_R4400SC:
+	case CPU_R4400MC:
+		probe_scache_kseg1 = (probe_func_t) (CKSEG1ADDR(&probe_scache));
+		sc_present = probe_scache_kseg1(config);
+		if (sc_present)
+			c->options |= MIPS_CPU_CACHE_CDEX_S;
+		break;
+
+	case CPU_R10000:
+	case CPU_R12000:
+		scache_size = 0x80000 << ((config & R10K_CONF_SS) >> 16);
+		c->scache.linesz = 64 << ((config >> 13) & 1);
+		c->scache.ways = 2;
+		c->scache.waybit= 0;
+		sc_present = 1;
+		break;
+
+	case CPU_R5000:
+	case CPU_NEVADA:
+#ifdef CONFIG_R5000_CPU_SCACHE
+		r5k_sc_init();
+#endif
+                return;
+
+	case CPU_RM7000:
+	case CPU_RM9000:
+#ifdef CONFIG_RM7000_CPU_SCACHE
+		rm7k_sc_init();
+#endif
+		return;
+
+	default:
+		sc_present = 0;
+	}
+
+	if (!sc_present)
+		return;
+
+	if ((c->isa_level == MIPS_CPU_ISA_M32 ||
+	     c->isa_level == MIPS_CPU_ISA_M64) &&
+	    !(c->scache.flags & MIPS_CACHE_NOT_PRESENT))
+		panic("Dunno how to handle MIPS32 / MIPS64 second level cache");
+
+	/* compute a couple of other cache variables */
+	c->scache.waysize = scache_size / c->scache.ways;
+
+	c->scache.sets = scache_size / (c->scache.linesz * c->scache.ways);
+
+	printk("Unified secondary cache %ldkB %s, linesize %d bytes.\n",
+	       scache_size >> 10, way_string[c->scache.ways], c->scache.linesz);
+
+	c->options |= MIPS_CPU_SUBSET_CACHES;
+}
+
+static inline void coherency_setup(void)
+{
+	change_c0_config(CONF_CM_CMASK, CONF_CM_DEFAULT);
+
+	/*
+	 * c0_status.cu=0 specifies that updates by the sc instruction use
+	 * the coherency mode specified by the TLB; 1 means cachable
+	 * coherent update on write will be used.  Not all processors have
+	 * this bit and; some wire it to zero, others like Toshiba had the
+	 * silly idea of putting something else there ...
+	 */
+	switch (current_cpu_data.cputype) {
+	case CPU_R4000PC:
+	case CPU_R4000SC:
+	case CPU_R4000MC:
+	case CPU_R4400PC:
+	case CPU_R4400SC:
+	case CPU_R4400MC:
+		clear_c0_config(CONF_CU);
+		break;
+	}
+}
+
+void __init ld_mmu_r4xx0(void)
+{
+	extern void build_clear_page(void);
+	extern void build_copy_page(void);
+	extern char except_vec2_generic;
+	struct cpuinfo_mips *c = &current_cpu_data;
+
+	/* Default cache error handler for R4000 and R5000 family */
+	memcpy((void *)(CAC_BASE   + 0x100), &except_vec2_generic, 0x80);
+	memcpy((void *)(UNCAC_BASE + 0x100), &except_vec2_generic, 0x80);
+
+	probe_pcache();
+	setup_scache();
+
+	if (c->dcache.sets * c->dcache.ways > PAGE_SIZE)
+		c->dcache.flags |= MIPS_CACHE_ALIASES;
+
+	r4k_blast_dcache_page_setup();
+	r4k_blast_dcache_page_indexed_setup();
+	r4k_blast_dcache_setup();
+	r4k_blast_icache_page_setup();
+	r4k_blast_icache_page_indexed_setup();
+	r4k_blast_icache_setup();
+	r4k_blast_scache_page_setup();
+	r4k_blast_scache_page_indexed_setup();
+	r4k_blast_scache_setup();
+
+	/*
+	 * Some MIPS32 and MIPS64 processors have physically indexed caches.
+	 * This code supports virtually indexed processors and will be
+	 * unnecessarily inefficient on physically indexed processors.
+	 */
+	shm_align_mask = max_t( unsigned long,
+				c->dcache.sets * c->dcache.linesz - 1,
+				PAGE_SIZE - 1);
+
+	flush_cache_all		= r4k_flush_cache_all;
+	__flush_cache_all	= r4k___flush_cache_all;
+	flush_cache_mm		= r4k_flush_cache_mm;
+	flush_cache_page	= r4k_flush_cache_page;
+	flush_icache_page	= r4k_flush_icache_page;
+	flush_cache_range	= r4k_flush_cache_range;
+
+	flush_cache_sigtramp	= r4k_flush_cache_sigtramp;
+	flush_icache_all	= r4k_flush_icache_all;
+	flush_data_cache_page	= r4k_flush_data_cache_page;
+	flush_icache_range	= r4k_flush_icache_range;
+
+#ifdef CONFIG_DMA_NONCOHERENT
+	_dma_cache_wback_inv	= r4k_dma_cache_wback_inv;
+	_dma_cache_wback	= r4k_dma_cache_wback_inv;
+	_dma_cache_inv		= r4k_dma_cache_inv;
+#endif
+
+	__flush_cache_all();
+	coherency_setup();
+
+	build_clear_page();
+	build_copy_page();
+}
diff --git a/arch/mips/mm/c-sb1.c b/arch/mips/mm/c-sb1.c
new file mode 100644
index 00000000000..ab30afd63b3
--- /dev/null
+++ b/arch/mips/mm/c-sb1.c
@@ -0,0 +1,558 @@
+/*
+ * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
+ * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org)
+ * Copyright (C) 2000, 2001, 2002, 2003 Broadcom Corporation
+ * Copyright (C) 2004  Maciej W. Rozycki
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+
+#include <asm/asm.h>
+#include <asm/bootinfo.h>
+#include <asm/cacheops.h>
+#include <asm/cpu.h>
+#include <asm/mipsregs.h>
+#include <asm/mmu_context.h>
+#include <asm/uaccess.h>
+
+extern void sb1_dma_init(void);
+
+/* These are probed at ld_mmu time */
+static unsigned long icache_size;
+static unsigned long dcache_size;
+
+static unsigned short icache_line_size;
+static unsigned short dcache_line_size;
+
+static unsigned int icache_index_mask;
+static unsigned int dcache_index_mask;
+
+static unsigned short icache_assoc;
+static unsigned short dcache_assoc;
+
+static unsigned short icache_sets;
+static unsigned short dcache_sets;
+
+static unsigned int icache_range_cutoff;
+static unsigned int dcache_range_cutoff;
+
+/*
+ * The dcache is fully coherent to the system, with one
+ * big caveat:  the instruction stream.  In other words,
+ * if we miss in the icache, and have dirty data in the
+ * L1 dcache, then we'll go out to memory (or the L2) and
+ * get the not-as-recent data.
+ *
+ * So the only time we have to flush the dcache is when
+ * we're flushing the icache.  Since the L2 is fully
+ * coherent to everything, including I/O, we never have
+ * to flush it
+ */
+
+#define cache_set_op(op, addr)						\
+	__asm__ __volatile__(						\
+	"	.set	noreorder		\n"			\
+	"	.set	mips64\n\t		\n"			\
+	"	cache	%0, (0<<13)(%1)		\n"			\
+	"	cache	%0, (1<<13)(%1)		\n"			\
+	"	cache	%0, (2<<13)(%1)		\n"			\
+	"	cache	%0, (3<<13)(%1)		\n"			\
+	"	.set	mips0			\n"			\
+	"	.set	reorder"					\
+	:								\
+	: "i" (op), "r" (addr))
+
+#define sync()								\
+	__asm__ __volatile(						\
+	"	.set	mips64\n\t		\n"			\
+	"	sync				\n"			\
+	"	.set	mips0")
+
+#define mispredict()							\
+	__asm__ __volatile__(						\
+	"	bnezl  $0, 1f		\n" /* Force mispredict */	\
+	"1:				\n");
+
+/*
+ * Writeback and invalidate the entire dcache
+ */
+static inline void __sb1_writeback_inv_dcache_all(void)
+{
+	unsigned long addr = 0;
+
+	while (addr < dcache_line_size * dcache_sets) {
+		cache_set_op(Index_Writeback_Inv_D, addr);
+		addr += dcache_line_size;
+	}
+}
+
+/*
+ * Writeback and invalidate a range of the dcache.  The addresses are
+ * virtual, and since we're using index ops and bit 12 is part of both
+ * the virtual frame and physical index, we have to clear both sets
+ * (bit 12 set and cleared).
+ */
+static inline void __sb1_writeback_inv_dcache_range(unsigned long start,
+	unsigned long end)
+{
+	unsigned long index;
+
+	start &= ~(dcache_line_size - 1);
+	end = (end + dcache_line_size - 1) & ~(dcache_line_size - 1);
+
+	while (start != end) {
+		index = start & dcache_index_mask;
+		cache_set_op(Index_Writeback_Inv_D, index);
+		cache_set_op(Index_Writeback_Inv_D, index ^ (1<<12));
+		start += dcache_line_size;
+	}
+	sync();
+}
+
+/*
+ * Writeback and invalidate a range of the dcache.  With physical
+ * addresseses, we don't have to worry about possible bit 12 aliasing.
+ * XXXKW is it worth turning on KX and using hit ops with xkphys?
+ */
+static inline void __sb1_writeback_inv_dcache_phys_range(unsigned long start,
+	unsigned long end)
+{
+	start &= ~(dcache_line_size - 1);
+	end = (end + dcache_line_size - 1) & ~(dcache_line_size - 1);
+
+	while (start != end) {
+		cache_set_op(Index_Writeback_Inv_D, start & dcache_index_mask);
+		start += dcache_line_size;
+	}
+	sync();
+}
+
+
+/*
+ * Invalidate the entire icache
+ */
+static inline void __sb1_flush_icache_all(void)
+{
+	unsigned long addr = 0;
+
+	while (addr < icache_line_size * icache_sets) {
+		cache_set_op(Index_Invalidate_I, addr);
+		addr += icache_line_size;
+	}
+}
+
+/*
+ * Flush the icache for a given physical page.  Need to writeback the
+ * dcache first, then invalidate the icache.  If the page isn't
+ * executable, nothing is required.
+ */
+static void local_sb1_flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn)
+{
+	int cpu = smp_processor_id();
+
+#ifndef CONFIG_SMP
+	if (!(vma->vm_flags & VM_EXEC))
+		return;
+#endif
+
+	__sb1_writeback_inv_dcache_range(addr, addr + PAGE_SIZE);
+
+	/*
+	 * Bumping the ASID is probably cheaper than the flush ...
+	 */
+	if (cpu_context(cpu, vma->vm_mm) != 0)
+		drop_mmu_context(vma->vm_mm, cpu);
+}
+
+#ifdef CONFIG_SMP
+struct flush_cache_page_args {
+	struct vm_area_struct *vma;
+	unsigned long addr;
+	unsigned long pfn;
+};
+
+static void sb1_flush_cache_page_ipi(void *info)
+{
+	struct flush_cache_page_args *args = info;
+
+	local_sb1_flush_cache_page(args->vma, args->addr, args->pfn);
+}
+
+/* Dirty dcache could be on another CPU, so do the IPIs */
+static void sb1_flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn)
+{
+	struct flush_cache_page_args args;
+
+	if (!(vma->vm_flags & VM_EXEC))
+		return;
+
+	addr &= PAGE_MASK;
+	args.vma = vma;
+	args.addr = addr;
+	args.pfn = pfn;
+	on_each_cpu(sb1_flush_cache_page_ipi, (void *) &args, 1, 1);
+}
+#else
+void sb1_flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn)
+	__attribute__((alias("local_sb1_flush_cache_page")));
+#endif
+
+/*
+ * Invalidate a range of the icache.  The addresses are virtual, and
+ * the cache is virtually indexed and tagged.  However, we don't
+ * necessarily have the right ASID context, so use index ops instead
+ * of hit ops.
+ */
+static inline void __sb1_flush_icache_range(unsigned long start,
+	unsigned long end)
+{
+	start &= ~(icache_line_size - 1);
+	end = (end + icache_line_size - 1) & ~(icache_line_size - 1);
+
+	while (start != end) {
+		cache_set_op(Index_Invalidate_I, start & icache_index_mask);
+		start += icache_line_size;
+	}
+	mispredict();
+	sync();
+}
+
+
+/*
+ * Invalidate all caches on this CPU
+ */
+static void local_sb1___flush_cache_all(void)
+{
+	__sb1_writeback_inv_dcache_all();
+	__sb1_flush_icache_all();
+}
+
+#ifdef CONFIG_SMP
+void sb1___flush_cache_all_ipi(void *ignored)
+	__attribute__((alias("local_sb1___flush_cache_all")));
+
+static void sb1___flush_cache_all(void)
+{
+	on_each_cpu(sb1___flush_cache_all_ipi, 0, 1, 1);
+}
+#else
+void sb1___flush_cache_all(void)
+	__attribute__((alias("local_sb1___flush_cache_all")));
+#endif
+
+/*
+ * When flushing a range in the icache, we have to first writeback
+ * the dcache for the same range, so new ifetches will see any
+ * data that was dirty in the dcache.
+ *
+ * The start/end arguments are Kseg addresses (possibly mapped Kseg).
+ */
+
+static void local_sb1_flush_icache_range(unsigned long start,
+	unsigned long end)
+{
+	/* Just wb-inv the whole dcache if the range is big enough */
+	if ((end - start) > dcache_range_cutoff)
+		__sb1_writeback_inv_dcache_all();
+	else
+		__sb1_writeback_inv_dcache_range(start, end);
+	
+	/* Just flush the whole icache if the range is big enough */
+	if ((end - start) > icache_range_cutoff)
+		__sb1_flush_icache_all();
+	else
+		__sb1_flush_icache_range(start, end);
+}
+
+#ifdef CONFIG_SMP
+struct flush_icache_range_args {
+	unsigned long start;
+	unsigned long end;
+};
+
+static void sb1_flush_icache_range_ipi(void *info)
+{
+	struct flush_icache_range_args *args = info;
+
+	local_sb1_flush_icache_range(args->start, args->end);
+}
+
+void sb1_flush_icache_range(unsigned long start, unsigned long end)
+{
+	struct flush_icache_range_args args;
+
+	args.start = start;
+	args.end = end;
+	on_each_cpu(sb1_flush_icache_range_ipi, &args, 1, 1);
+}
+#else
+void sb1_flush_icache_range(unsigned long start, unsigned long end)
+	__attribute__((alias("local_sb1_flush_icache_range")));
+#endif
+
+/*
+ * Flush the icache for a given physical page.  Need to writeback the
+ * dcache first, then invalidate the icache.  If the page isn't
+ * executable, nothing is required.
+ */
+static void local_sb1_flush_icache_page(struct vm_area_struct *vma,
+	struct page *page)
+{
+	unsigned long start;
+	int cpu = smp_processor_id();
+
+#ifndef CONFIG_SMP
+	if (!(vma->vm_flags & VM_EXEC))
+		return;
+#endif
+
+	/* Need to writeback any dirty data for that page, we have the PA */
+	start = (unsigned long)(page-mem_map) << PAGE_SHIFT;
+	__sb1_writeback_inv_dcache_phys_range(start, start + PAGE_SIZE);
+	/*
+	 * If there's a context, bump the ASID (cheaper than a flush,
+	 * since we don't know VAs!)
+	 */
+	if (cpu_context(cpu, vma->vm_mm) != 0) {
+		drop_mmu_context(vma->vm_mm, cpu);
+	}
+}
+
+#ifdef CONFIG_SMP
+struct flush_icache_page_args {
+	struct vm_area_struct *vma;
+	struct page *page;
+};
+
+static void sb1_flush_icache_page_ipi(void *info)
+{
+	struct flush_icache_page_args *args = info;
+	local_sb1_flush_icache_page(args->vma, args->page);
+}
+
+/* Dirty dcache could be on another CPU, so do the IPIs */
+static void sb1_flush_icache_page(struct vm_area_struct *vma,
+	struct page *page)
+{
+	struct flush_icache_page_args args;
+
+	if (!(vma->vm_flags & VM_EXEC))
+		return;
+	args.vma = vma;
+	args.page = page;
+	on_each_cpu(sb1_flush_icache_page_ipi, (void *) &args, 1, 1);
+}
+#else
+void sb1_flush_icache_page(struct vm_area_struct *vma, struct page *page)
+	__attribute__((alias("local_sb1_flush_icache_page")));
+#endif
+
+/*
+ * A signal trampoline must fit into a single cacheline.
+ */
+static void local_sb1_flush_cache_sigtramp(unsigned long addr)
+{
+	cache_set_op(Index_Writeback_Inv_D, addr & dcache_index_mask);
+	cache_set_op(Index_Writeback_Inv_D, (addr ^ (1<<12)) & dcache_index_mask);
+	cache_set_op(Index_Invalidate_I, addr & icache_index_mask);
+	mispredict();
+}
+
+#ifdef CONFIG_SMP
+static void sb1_flush_cache_sigtramp_ipi(void *info)
+{
+	unsigned long iaddr = (unsigned long) info;
+	local_sb1_flush_cache_sigtramp(iaddr);
+}
+
+static void sb1_flush_cache_sigtramp(unsigned long addr)
+{
+	on_each_cpu(sb1_flush_cache_sigtramp_ipi, (void *) addr, 1, 1);
+}
+#else
+void sb1_flush_cache_sigtramp(unsigned long addr)
+	__attribute__((alias("local_sb1_flush_cache_sigtramp")));
+#endif
+
+
+/*
+ * Anything that just flushes dcache state can be ignored, as we're always
+ * coherent in dcache space.  This is just a dummy function that all the
+ * nop'ed routines point to
+ */
+static void sb1_nop(void)
+{
+}
+
+/*
+ *  Cache set values (from the mips64 spec)
+ * 0 - 64
+ * 1 - 128
+ * 2 - 256
+ * 3 - 512
+ * 4 - 1024
+ * 5 - 2048
+ * 6 - 4096
+ * 7 - Reserved
+ */
+
+static unsigned int decode_cache_sets(unsigned int config_field)
+{
+	if (config_field == 7) {
+		/* JDCXXX - Find a graceful way to abort. */
+		return 0;
+	}
+	return (1<<(config_field + 6));
+}
+
+/*
+ *  Cache line size values (from the mips64 spec)
+ * 0 - No cache present.
+ * 1 - 4 bytes
+ * 2 - 8 bytes
+ * 3 - 16 bytes
+ * 4 - 32 bytes
+ * 5 - 64 bytes
+ * 6 - 128 bytes
+ * 7 - Reserved
+ */
+
+static unsigned int decode_cache_line_size(unsigned int config_field)
+{
+	if (config_field == 0) {
+		return 0;
+	} else if (config_field == 7) {
+		/* JDCXXX - Find a graceful way to abort. */
+		return 0;
+	}
+	return (1<<(config_field + 1));
+}
+
+/*
+ * Relevant bits of the config1 register format (from the MIPS32/MIPS64 specs)
+ *
+ * 24:22 Icache sets per way
+ * 21:19 Icache line size
+ * 18:16 Icache Associativity
+ * 15:13 Dcache sets per way
+ * 12:10 Dcache line size
+ * 9:7   Dcache Associativity
+ */
+
+static char *way_string[] = {
+	"direct mapped", "2-way", "3-way", "4-way",
+	"5-way", "6-way", "7-way", "8-way",
+};
+
+static __init void probe_cache_sizes(void)
+{
+	u32 config1;
+
+	config1 = read_c0_config1();
+	icache_line_size = decode_cache_line_size((config1 >> 19) & 0x7);
+	dcache_line_size = decode_cache_line_size((config1 >> 10) & 0x7);
+	icache_sets = decode_cache_sets((config1 >> 22) & 0x7);
+	dcache_sets = decode_cache_sets((config1 >> 13) & 0x7);
+	icache_assoc = ((config1 >> 16) & 0x7) + 1;
+	dcache_assoc = ((config1 >> 7) & 0x7) + 1;
+	icache_size = icache_line_size * icache_sets * icache_assoc;
+	dcache_size = dcache_line_size * dcache_sets * dcache_assoc;
+	/* Need to remove non-index bits for index ops */
+	icache_index_mask = (icache_sets - 1) * icache_line_size;
+	dcache_index_mask = (dcache_sets - 1) * dcache_line_size;
+	/*
+	 * These are for choosing range (index ops) versus all.
+	 * icache flushes all ways for each set, so drop icache_assoc.
+	 * dcache flushes all ways and each setting of bit 12 for each
+	 * index, so drop dcache_assoc and halve the dcache_sets.
+	 */
+	icache_range_cutoff = icache_sets * icache_line_size;
+	dcache_range_cutoff = (dcache_sets / 2) * icache_line_size;
+
+	printk("Primary instruction cache %ldkB, %s, linesize %d bytes.\n",
+	       icache_size >> 10, way_string[icache_assoc - 1],
+	       icache_line_size);
+	printk("Primary data cache %ldkB, %s, linesize %d bytes.\n",
+	       dcache_size >> 10, way_string[dcache_assoc - 1],
+	       dcache_line_size);
+}
+
+/*
+ * This is called from loadmmu.c.  We have to set up all the
+ * memory management function pointers, as well as initialize
+ * the caches and tlbs
+ */
+void ld_mmu_sb1(void)
+{
+	extern char except_vec2_sb1;
+	extern char handle_vec2_sb1;
+
+	/* Special cache error handler for SB1 */
+	memcpy((void *)(CAC_BASE   + 0x100), &except_vec2_sb1, 0x80);
+	memcpy((void *)(UNCAC_BASE + 0x100), &except_vec2_sb1, 0x80);
+	memcpy((void *)CKSEG1ADDR(&handle_vec2_sb1), &handle_vec2_sb1, 0x80);
+
+	probe_cache_sizes();
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+	sb1_dma_init();
+#endif
+
+	/*
+	 * None of these are needed for the SB1 - the Dcache is
+	 * physically indexed and tagged, so no virtual aliasing can
+	 * occur
+	 */
+	flush_cache_range = (void *) sb1_nop;
+	flush_cache_mm = (void (*)(struct mm_struct *))sb1_nop;
+	flush_cache_all = sb1_nop;
+
+	/* These routines are for Icache coherence with the Dcache */
+	flush_icache_range = sb1_flush_icache_range;
+	flush_icache_page = sb1_flush_icache_page;
+	flush_icache_all = __sb1_flush_icache_all; /* local only */
+
+	/* This implies an Icache flush too, so can't be nop'ed */
+	flush_cache_page = sb1_flush_cache_page;
+
+	flush_cache_sigtramp = sb1_flush_cache_sigtramp;
+	flush_data_cache_page = (void *) sb1_nop;
+
+	/* Full flush */
+	__flush_cache_all = sb1___flush_cache_all;
+
+	change_c0_config(CONF_CM_CMASK, CONF_CM_DEFAULT);
+
+	/*
+	 * This is the only way to force the update of K0 to complete
+	 * before subsequent instruction fetch.
+	 */
+	__asm__ __volatile__(
+		".set	push			\n"
+	"	.set	noat			\n"
+	"	.set	noreorder		\n"
+	"	.set	mips3			\n"
+	"	" STR(PTR_LA) "	$1, 1f		\n"
+	"	" STR(MTC0) "	$1, $14		\n"
+	"	eret				\n"
+	"1:	.set	pop"
+	:
+	:
+	: "memory");
+
+	flush_cache_all();
+}
diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c
new file mode 100644
index 00000000000..ff5afab64b2
--- /dev/null
+++ b/arch/mips/mm/c-tx39.c
@@ -0,0 +1,493 @@
+/*
+ * r2300.c: R2000 and R3000 specific mmu/cache code.
+ *
+ * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
+ *
+ * with a lot of changes to make this thing work for R3000s
+ * Tx39XX R4k style caches added. HK
+ * Copyright (C) 1998, 1999, 2000 Harald Koerfgen
+ * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/cacheops.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mmu_context.h>
+#include <asm/system.h>
+#include <asm/isadep.h>
+#include <asm/io.h>
+#include <asm/bootinfo.h>
+#include <asm/cpu.h>
+
+/* For R3000 cores with R4000 style caches */
+static unsigned long icache_size, dcache_size;		/* Size in bytes */
+
+#include <asm/r4kcache.h>
+
+extern int r3k_have_wired_reg;	/* in r3k-tlb.c */
+
+/* This sequence is required to ensure icache is disabled immediately */
+#define TX39_STOP_STREAMING() \
+__asm__ __volatile__( \
+	".set    push\n\t" \
+	".set    noreorder\n\t" \
+	"b       1f\n\t" \
+	"nop\n\t" \
+	"1:\n\t" \
+	".set pop" \
+	)
+
+/* TX39H-style cache flush routines. */
+static void tx39h_flush_icache_all(void)
+{
+	unsigned long start = KSEG0;
+	unsigned long end = (start + icache_size);
+	unsigned long flags, config;
+
+	/* disable icache (set ICE#) */
+	local_irq_save(flags);
+	config = read_c0_conf();
+	write_c0_conf(config & ~TX39_CONF_ICE);
+	TX39_STOP_STREAMING();
+
+	/* invalidate icache */
+	while (start < end) {
+		cache16_unroll32(start, Index_Invalidate_I);
+		start += 0x200;
+	}
+
+	write_c0_conf(config);
+	local_irq_restore(flags);
+}
+
+static void tx39h_dma_cache_wback_inv(unsigned long addr, unsigned long size)
+{
+	unsigned long end, a;
+	unsigned long dc_lsize = current_cpu_data.dcache.linesz;
+
+	/* Catch bad driver code */
+	BUG_ON(size == 0);
+
+	iob();
+	a = addr & ~(dc_lsize - 1);
+	end = (addr + size - 1) & ~(dc_lsize - 1);
+	while (1) {
+		invalidate_dcache_line(a); /* Hit_Invalidate_D */
+		if (a == end) break;
+		a += dc_lsize;
+	}
+}
+
+
+/* TX39H2,TX39H3 */
+static inline void tx39_blast_dcache_page(unsigned long addr)
+{
+	if (current_cpu_data.cputype != CPU_TX3912)
+		blast_dcache16_page(addr);
+}
+
+static inline void tx39_blast_dcache_page_indexed(unsigned long addr)
+{
+	blast_dcache16_page_indexed(addr);
+}
+
+static inline void tx39_blast_dcache(void)
+{
+	blast_dcache16();
+}
+
+static inline void tx39_blast_icache_page(unsigned long addr)
+{
+	unsigned long flags, config;
+	/* disable icache (set ICE#) */
+	local_irq_save(flags);
+	config = read_c0_conf();
+	write_c0_conf(config & ~TX39_CONF_ICE);
+	TX39_STOP_STREAMING();
+	blast_icache16_page(addr);
+	write_c0_conf(config);
+	local_irq_restore(flags);
+}
+
+static inline void tx39_blast_icache_page_indexed(unsigned long addr)
+{
+	unsigned long flags, config;
+	/* disable icache (set ICE#) */
+	local_irq_save(flags);
+	config = read_c0_conf();
+	write_c0_conf(config & ~TX39_CONF_ICE);
+	TX39_STOP_STREAMING();
+	blast_icache16_page_indexed(addr);
+	write_c0_conf(config);
+	local_irq_restore(flags);
+}
+
+static inline void tx39_blast_icache(void)
+{
+	unsigned long flags, config;
+	/* disable icache (set ICE#) */
+	local_irq_save(flags);
+	config = read_c0_conf();
+	write_c0_conf(config & ~TX39_CONF_ICE);
+	TX39_STOP_STREAMING();
+	blast_icache16();
+	write_c0_conf(config);
+	local_irq_restore(flags);
+}
+
+static inline void tx39_flush_cache_all(void)
+{
+	if (!cpu_has_dc_aliases)
+		return;
+
+	tx39_blast_dcache();
+	tx39_blast_icache();
+}
+
+static inline void tx39___flush_cache_all(void)
+{
+	tx39_blast_dcache();
+	tx39_blast_icache();
+}
+
+static void tx39_flush_cache_mm(struct mm_struct *mm)
+{
+	if (!cpu_has_dc_aliases)
+		return;
+
+	if (cpu_context(smp_processor_id(), mm) != 0) {
+		tx39_flush_cache_all();
+	}
+}
+
+static void tx39_flush_cache_range(struct vm_area_struct *vma,
+	unsigned long start, unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (!cpu_has_dc_aliases)
+		return;
+
+	if (cpu_context(smp_processor_id(), mm) != 0) {
+		tx39_blast_dcache();
+		tx39_blast_icache();
+	}
+}
+
+static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn)
+{
+	int exec = vma->vm_flags & VM_EXEC;
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	/*
+	 * If ownes no valid ASID yet, cannot possibly have gotten
+	 * this page into the cache.
+	 */
+	if (cpu_context(smp_processor_id(), mm) == 0)
+		return;
+
+	page &= PAGE_MASK;
+	pgdp = pgd_offset(mm, page);
+	pmdp = pmd_offset(pgdp, page);
+	ptep = pte_offset(pmdp, page);
+
+	/*
+	 * If the page isn't marked valid, the page cannot possibly be
+	 * in the cache.
+	 */
+	if (!(pte_val(*ptep) & _PAGE_PRESENT))
+		return;
+
+	/*
+	 * Doing flushes for another ASID than the current one is
+	 * too difficult since stupid R4k caches do a TLB translation
+	 * for every cache flush operation.  So we do indexed flushes
+	 * in that case, which doesn't overly flush the cache too much.
+	 */
+	if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID)) {
+		if (cpu_has_dc_aliases || exec)
+			tx39_blast_dcache_page(page);
+		if (exec)
+			tx39_blast_icache_page(page);
+
+		return;
+	}
+
+	/*
+	 * Do indexed flush, too much work to get the (possible) TLB refills
+	 * to work correctly.
+	 */
+	page = (KSEG0 + (page & (dcache_size - 1)));
+	if (cpu_has_dc_aliases || exec)
+		tx39_blast_dcache_page_indexed(page);
+	if (exec)
+		tx39_blast_icache_page_indexed(page);
+}
+
+static void tx39_flush_data_cache_page(unsigned long addr)
+{
+	tx39_blast_dcache_page(addr);
+}
+
+static void tx39_flush_icache_range(unsigned long start, unsigned long end)
+{
+	unsigned long dc_lsize = current_cpu_data.dcache.linesz;
+	unsigned long addr, aend;
+
+	if (end - start > dcache_size)
+		tx39_blast_dcache();
+	else {
+		addr = start & ~(dc_lsize - 1);
+		aend = (end - 1) & ~(dc_lsize - 1);
+
+		while (1) {
+			/* Hit_Writeback_Inv_D */
+			protected_writeback_dcache_line(addr);
+			if (addr == aend)
+				break;
+			addr += dc_lsize;
+		}
+	}
+
+	if (end - start > icache_size)
+		tx39_blast_icache();
+	else {
+		unsigned long flags, config;
+		addr = start & ~(dc_lsize - 1);
+		aend = (end - 1) & ~(dc_lsize - 1);
+		/* disable icache (set ICE#) */
+		local_irq_save(flags);
+		config = read_c0_conf();
+		write_c0_conf(config & ~TX39_CONF_ICE);
+		TX39_STOP_STREAMING();
+		while (1) {
+			/* Hit_Invalidate_I */
+			protected_flush_icache_line(addr);
+			if (addr == aend)
+				break;
+			addr += dc_lsize;
+		}
+		write_c0_conf(config);
+		local_irq_restore(flags);
+	}
+}
+
+/*
+ * Ok, this seriously sucks.  We use them to flush a user page but don't
+ * know the virtual address, so we have to blast away the whole icache
+ * which is significantly more expensive than the real thing.  Otoh we at
+ * least know the kernel address of the page so we can flush it
+ * selectivly.
+ */
+static void tx39_flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+	unsigned long addr;
+	/*
+	 * If there's no context yet, or the page isn't executable, no icache
+	 * flush is needed.
+	 */
+	if (!(vma->vm_flags & VM_EXEC))
+		return;
+
+	addr = (unsigned long) page_address(page);
+	tx39_blast_dcache_page(addr);
+
+	/*
+	 * We're not sure of the virtual address(es) involved here, so
+	 * we have to flush the entire I-cache.
+	 */
+	tx39_blast_icache();
+}
+
+static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size)
+{
+	unsigned long end, a;
+
+	if (((size | addr) & (PAGE_SIZE - 1)) == 0) {
+		end = addr + size;
+		do {
+			tx39_blast_dcache_page(addr);
+			addr += PAGE_SIZE;
+		} while(addr != end);
+	} else if (size > dcache_size) {
+		tx39_blast_dcache();
+	} else {
+		unsigned long dc_lsize = current_cpu_data.dcache.linesz;
+		a = addr & ~(dc_lsize - 1);
+		end = (addr + size - 1) & ~(dc_lsize - 1);
+		while (1) {
+			flush_dcache_line(a); /* Hit_Writeback_Inv_D */
+			if (a == end) break;
+			a += dc_lsize;
+		}
+	}
+}
+
+static void tx39_dma_cache_inv(unsigned long addr, unsigned long size)
+{
+	unsigned long end, a;
+
+	if (((size | addr) & (PAGE_SIZE - 1)) == 0) {
+		end = addr + size;
+		do {
+			tx39_blast_dcache_page(addr);
+			addr += PAGE_SIZE;
+		} while(addr != end);
+	} else if (size > dcache_size) {
+		tx39_blast_dcache();
+	} else {
+		unsigned long dc_lsize = current_cpu_data.dcache.linesz;
+		a = addr & ~(dc_lsize - 1);
+		end = (addr + size - 1) & ~(dc_lsize - 1);
+		while (1) {
+			invalidate_dcache_line(a); /* Hit_Invalidate_D */
+			if (a == end) break;
+			a += dc_lsize;
+		}
+	}
+}
+
+static void tx39_flush_cache_sigtramp(unsigned long addr)
+{
+	unsigned long ic_lsize = current_cpu_data.icache.linesz;
+	unsigned long dc_lsize = current_cpu_data.dcache.linesz;
+	unsigned long config;
+	unsigned long flags;
+
+	protected_writeback_dcache_line(addr & ~(dc_lsize - 1));
+
+	/* disable icache (set ICE#) */
+	local_irq_save(flags);
+	config = read_c0_conf();
+	write_c0_conf(config & ~TX39_CONF_ICE);
+	TX39_STOP_STREAMING();
+	protected_flush_icache_line(addr & ~(ic_lsize - 1));
+	write_c0_conf(config);
+	local_irq_restore(flags);
+}
+
+static __init void tx39_probe_cache(void)
+{
+	unsigned long config;
+
+	config = read_c0_conf();
+
+	icache_size = 1 << (10 + ((config & TX39_CONF_ICS_MASK) >>
+				  TX39_CONF_ICS_SHIFT));
+	dcache_size = 1 << (10 + ((config & TX39_CONF_DCS_MASK) >>
+				  TX39_CONF_DCS_SHIFT));
+
+	current_cpu_data.icache.linesz = 16;
+	switch (current_cpu_data.cputype) {
+	case CPU_TX3912:
+		current_cpu_data.icache.ways = 1;
+		current_cpu_data.dcache.ways = 1;
+		current_cpu_data.dcache.linesz = 4;
+		break;
+
+	case CPU_TX3927:
+		current_cpu_data.icache.ways = 2;
+		current_cpu_data.dcache.ways = 2;
+		current_cpu_data.dcache.linesz = 16;
+		break;
+
+	case CPU_TX3922:
+	default:
+		current_cpu_data.icache.ways = 1;
+		current_cpu_data.dcache.ways = 1;
+		current_cpu_data.dcache.linesz = 16;
+		break;
+	}
+}
+
+void __init ld_mmu_tx39(void)
+{
+	extern void build_clear_page(void);
+	extern void build_copy_page(void);
+	unsigned long config;
+
+	config = read_c0_conf();
+	config &= ~TX39_CONF_WBON;
+	write_c0_conf(config);
+
+	tx39_probe_cache();
+
+	switch (current_cpu_data.cputype) {
+	case CPU_TX3912:
+		/* TX39/H core (writethru direct-map cache) */
+		flush_cache_all	= tx39h_flush_icache_all;
+		__flush_cache_all	= tx39h_flush_icache_all;
+		flush_cache_mm		= (void *) tx39h_flush_icache_all;
+		flush_cache_range	= (void *) tx39h_flush_icache_all;
+		flush_cache_page	= (void *) tx39h_flush_icache_all;
+		flush_icache_page	= (void *) tx39h_flush_icache_all;
+		flush_icache_range	= (void *) tx39h_flush_icache_all;
+
+		flush_cache_sigtramp	= (void *) tx39h_flush_icache_all;
+		flush_data_cache_page	= (void *) tx39h_flush_icache_all;
+
+		_dma_cache_wback_inv	= tx39h_dma_cache_wback_inv;
+
+		shm_align_mask		= PAGE_SIZE - 1;
+
+		break;
+
+	case CPU_TX3922:
+	case CPU_TX3927:
+	default:
+		/* TX39/H2,H3 core (writeback 2way-set-associative cache) */
+		r3k_have_wired_reg = 1;
+		write_c0_wired(0);	/* set 8 on reset... */
+		/* board-dependent init code may set WBON */
+
+		flush_cache_all = tx39_flush_cache_all;
+		__flush_cache_all = tx39___flush_cache_all;
+		flush_cache_mm = tx39_flush_cache_mm;
+		flush_cache_range = tx39_flush_cache_range;
+		flush_cache_page = tx39_flush_cache_page;
+		flush_icache_page = tx39_flush_icache_page;
+		flush_icache_range = tx39_flush_icache_range;
+
+		flush_cache_sigtramp = tx39_flush_cache_sigtramp;
+		flush_data_cache_page = tx39_flush_data_cache_page;
+
+		_dma_cache_wback_inv = tx39_dma_cache_wback_inv;
+		_dma_cache_wback = tx39_dma_cache_wback_inv;
+		_dma_cache_inv = tx39_dma_cache_inv;
+
+		shm_align_mask = max_t(unsigned long,
+		                       (dcache_size / current_cpu_data.dcache.ways) - 1,
+		                       PAGE_SIZE - 1);
+
+		break;
+	}
+
+	current_cpu_data.icache.waysize = icache_size / current_cpu_data.icache.ways;
+	current_cpu_data.dcache.waysize = dcache_size / current_cpu_data.dcache.ways;
+
+	current_cpu_data.icache.sets =
+		current_cpu_data.icache.waysize / current_cpu_data.icache.linesz;
+	current_cpu_data.dcache.sets =
+		current_cpu_data.dcache.waysize / current_cpu_data.dcache.linesz;
+
+	if (current_cpu_data.dcache.waysize > PAGE_SIZE)
+		current_cpu_data.dcache.flags |= MIPS_CACHE_ALIASES;
+
+	current_cpu_data.icache.waybit = 0;
+	current_cpu_data.dcache.waybit = 0;
+
+	printk("Primary instruction cache %ldkB, linesize %d bytes\n",
+		icache_size >> 10, current_cpu_data.icache.linesz);
+	printk("Primary data cache %ldkB, linesize %d bytes\n",
+		dcache_size >> 10, current_cpu_data.dcache.linesz);
+
+	build_clear_page();
+	build_copy_page();
+}
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
new file mode 100644
index 00000000000..1d95cdb77be
--- /dev/null
+++ b/arch/mips/mm/cache.c
@@ -0,0 +1,157 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1994 - 2003 by Ralf Baechle
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/cacheflush.h>
+#include <asm/processor.h>
+#include <asm/cpu.h>
+#include <asm/cpu-features.h>
+
+/* Cache operations. */
+void (*flush_cache_all)(void);
+void (*__flush_cache_all)(void);
+void (*flush_cache_mm)(struct mm_struct *mm);
+void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start,
+	unsigned long end);
+void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn);
+void (*flush_icache_range)(unsigned long start, unsigned long end);
+void (*flush_icache_page)(struct vm_area_struct *vma, struct page *page);
+
+/* MIPS specific cache operations */
+void (*flush_cache_sigtramp)(unsigned long addr);
+void (*flush_data_cache_page)(unsigned long addr);
+void (*flush_icache_all)(void);
+
+#ifdef CONFIG_DMA_NONCOHERENT
+
+/* DMA cache operations. */
+void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size);
+void (*_dma_cache_wback)(unsigned long start, unsigned long size);
+void (*_dma_cache_inv)(unsigned long start, unsigned long size);
+
+EXPORT_SYMBOL(_dma_cache_wback_inv);
+EXPORT_SYMBOL(_dma_cache_wback);
+EXPORT_SYMBOL(_dma_cache_inv);
+
+#endif /* CONFIG_DMA_NONCOHERENT */
+
+/*
+ * We could optimize the case where the cache argument is not BCACHE but
+ * that seems very atypical use ...
+ */
+asmlinkage int sys_cacheflush(unsigned long addr, unsigned long int bytes,
+	unsigned int cache)
+{
+	if (!access_ok(VERIFY_WRITE, (void *) addr, bytes))
+		return -EFAULT;
+
+	flush_icache_range(addr, addr + bytes);
+
+	return 0;
+}
+
+void __flush_dcache_page(struct page *page)
+{
+	struct address_space *mapping = page_mapping(page);
+	unsigned long addr;
+
+	if (mapping && !mapping_mapped(mapping)) {
+		SetPageDcacheDirty(page);
+		return;
+	}
+
+	/*
+	 * We could delay the flush for the !page_mapping case too.  But that
+	 * case is for exec env/arg pages and those are %99 certainly going to
+	 * get faulted into the tlb (and thus flushed) anyways.
+	 */
+	addr = (unsigned long) page_address(page);
+	flush_data_cache_page(addr);
+}
+
+EXPORT_SYMBOL(__flush_dcache_page);
+
+void __update_cache(struct vm_area_struct *vma, unsigned long address,
+	pte_t pte)
+{
+	struct page *page;
+	unsigned long pfn, addr;
+
+	pfn = pte_pfn(pte);
+	if (pfn_valid(pfn) && (page = pfn_to_page(pfn), page_mapping(page)) &&
+	    Page_dcache_dirty(page)) {
+		if (pages_do_alias((unsigned long)page_address(page),
+		                   address & PAGE_MASK)) {
+			addr = (unsigned long) page_address(page);
+			flush_data_cache_page(addr);
+		}
+
+		ClearPageDcacheDirty(page);
+	}
+}
+
+extern void ld_mmu_r23000(void);
+extern void ld_mmu_r4xx0(void);
+extern void ld_mmu_tx39(void);
+extern void ld_mmu_r6000(void);
+extern void ld_mmu_tfp(void);
+extern void ld_mmu_andes(void);
+extern void ld_mmu_sb1(void);
+
+void __init cpu_cache_init(void)
+{
+	if (cpu_has_4ktlb) {
+#if defined(CONFIG_CPU_R4X00)  || defined(CONFIG_CPU_VR41XX) || \
+    defined(CONFIG_CPU_R4300)  || defined(CONFIG_CPU_R5000)  || \
+    defined(CONFIG_CPU_NEVADA) || defined(CONFIG_CPU_R5432)  || \
+    defined(CONFIG_CPU_R5500)  || defined(CONFIG_CPU_MIPS32) || \
+    defined(CONFIG_CPU_MIPS64) || defined(CONFIG_CPU_TX49XX) || \
+    defined(CONFIG_CPU_RM7000) || defined(CONFIG_CPU_RM9000)
+		ld_mmu_r4xx0();
+#endif
+	} else switch (current_cpu_data.cputype) {
+#ifdef CONFIG_CPU_R3000
+	case CPU_R2000:
+	case CPU_R3000:
+	case CPU_R3000A:
+	case CPU_R3081E:
+		ld_mmu_r23000();
+		break;
+#endif
+#ifdef CONFIG_CPU_TX39XX
+	case CPU_TX3912:
+	case CPU_TX3922:
+	case CPU_TX3927:
+		ld_mmu_tx39();
+		break;
+#endif
+#ifdef CONFIG_CPU_R10000
+	case CPU_R10000:
+	case CPU_R12000:
+		ld_mmu_r4xx0();
+		break;
+#endif
+#ifdef CONFIG_CPU_SB1
+	case CPU_SB1:
+		ld_mmu_sb1();
+		break;
+#endif
+
+	case CPU_R8000:
+		panic("R8000 is unsupported");
+		break;
+
+	default:
+		panic("Yeee, unsupported cache architecture.");
+	}
+}
diff --git a/arch/mips/mm/cerr-sb1.c b/arch/mips/mm/cerr-sb1.c
new file mode 100644
index 00000000000..13d96d62764
--- /dev/null
+++ b/arch/mips/mm/cerr-sb1.c
@@ -0,0 +1,543 @@
+/*
+ * Copyright (C) 2001,2002,2003 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <asm/mipsregs.h>
+#include <asm/sibyte/sb1250.h>
+
+#ifndef CONFIG_SIBYTE_BUS_WATCHER
+#include <asm/io.h>
+#include <asm/sibyte/sb1250_regs.h>
+#include <asm/sibyte/sb1250_scd.h>
+#endif
+ 
+/* SB1 definitions */
+
+/* XXX should come from config1 XXX */
+#define SB1_CACHE_INDEX_MASK   0x1fe0
+
+#define CP0_ERRCTL_RECOVERABLE (1 << 31)
+#define CP0_ERRCTL_DCACHE      (1 << 30)
+#define CP0_ERRCTL_ICACHE      (1 << 29)
+#define CP0_ERRCTL_MULTIBUS    (1 << 23)
+#define CP0_ERRCTL_MC_TLB      (1 << 15)
+#define CP0_ERRCTL_MC_TIMEOUT  (1 << 14)
+
+#define CP0_CERRI_TAG_PARITY   (1 << 29)
+#define CP0_CERRI_DATA_PARITY  (1 << 28)
+#define CP0_CERRI_EXTERNAL     (1 << 26)
+
+#define CP0_CERRI_IDX_VALID(c) (!((c) & CP0_CERRI_EXTERNAL))
+#define CP0_CERRI_DATA         (CP0_CERRI_DATA_PARITY)
+
+#define CP0_CERRD_MULTIPLE     (1 << 31)
+#define CP0_CERRD_TAG_STATE    (1 << 30)
+#define CP0_CERRD_TAG_ADDRESS  (1 << 29)
+#define CP0_CERRD_DATA_SBE     (1 << 28)
+#define CP0_CERRD_DATA_DBE     (1 << 27)
+#define CP0_CERRD_EXTERNAL     (1 << 26)
+#define CP0_CERRD_LOAD         (1 << 25)
+#define CP0_CERRD_STORE        (1 << 24)
+#define CP0_CERRD_FILLWB       (1 << 23)
+#define CP0_CERRD_COHERENCY    (1 << 22)
+#define CP0_CERRD_DUPTAG       (1 << 21)
+
+#define CP0_CERRD_DPA_VALID(c) (!((c) & CP0_CERRD_EXTERNAL))
+#define CP0_CERRD_IDX_VALID(c) \
+   (((c) & (CP0_CERRD_LOAD | CP0_CERRD_STORE)) ? (!((c) & CP0_CERRD_EXTERNAL)) : 0)
+#define CP0_CERRD_CAUSES \
+   (CP0_CERRD_LOAD | CP0_CERRD_STORE | CP0_CERRD_FILLWB | CP0_CERRD_COHERENCY | CP0_CERRD_DUPTAG)
+#define CP0_CERRD_TYPES \
+   (CP0_CERRD_TAG_STATE | CP0_CERRD_TAG_ADDRESS | CP0_CERRD_DATA_SBE | CP0_CERRD_DATA_DBE | CP0_CERRD_EXTERNAL)
+#define CP0_CERRD_DATA         (CP0_CERRD_DATA_SBE | CP0_CERRD_DATA_DBE)
+
+static uint32_t	extract_ic(unsigned short addr, int data);
+static uint32_t	extract_dc(unsigned short addr, int data);
+
+static inline void breakout_errctl(unsigned int val)
+{
+	if (val & CP0_ERRCTL_RECOVERABLE)
+		prom_printf(" recoverable");
+	if (val & CP0_ERRCTL_DCACHE)
+		prom_printf(" dcache");
+	if (val & CP0_ERRCTL_ICACHE)
+		prom_printf(" icache");
+	if (val & CP0_ERRCTL_MULTIBUS)
+		prom_printf(" multiple-buserr");
+	prom_printf("\n");
+}
+
+static inline void breakout_cerri(unsigned int val)
+{
+	if (val & CP0_CERRI_TAG_PARITY)
+		prom_printf(" tag-parity");
+	if (val & CP0_CERRI_DATA_PARITY)
+		prom_printf(" data-parity");
+	if (val & CP0_CERRI_EXTERNAL)
+		prom_printf(" external");
+	prom_printf("\n");
+}
+
+static inline void breakout_cerrd(unsigned int val)
+{
+	switch (val & CP0_CERRD_CAUSES) {
+	case CP0_CERRD_LOAD:
+		prom_printf(" load,");
+		break;
+	case CP0_CERRD_STORE:
+		prom_printf(" store,");
+		break;
+	case CP0_CERRD_FILLWB:
+		prom_printf(" fill/wb,");
+		break;
+	case CP0_CERRD_COHERENCY:
+		prom_printf(" coherency,");
+		break;
+	case CP0_CERRD_DUPTAG:
+		prom_printf(" duptags,");
+		break;
+	default:
+		prom_printf(" NO CAUSE,");
+		break;
+	}
+	if (!(val & CP0_CERRD_TYPES))
+		prom_printf(" NO TYPE");
+	else {
+		if (val & CP0_CERRD_MULTIPLE)
+			prom_printf(" multi-err");
+		if (val & CP0_CERRD_TAG_STATE)
+			prom_printf(" tag-state");
+		if (val & CP0_CERRD_TAG_ADDRESS)
+			prom_printf(" tag-address");
+		if (val & CP0_CERRD_DATA_SBE)
+			prom_printf(" data-SBE");
+		if (val & CP0_CERRD_DATA_DBE)
+			prom_printf(" data-DBE");
+		if (val & CP0_CERRD_EXTERNAL)
+			prom_printf(" external");
+	}
+	prom_printf("\n");
+}
+
+#ifndef CONFIG_SIBYTE_BUS_WATCHER
+
+static void check_bus_watcher(void)              
+{                               
+	uint32_t status, l2_err, memio_err;
+
+	/* Destructive read, clears register and interrupt */
+	status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS));
+	/* Bit 31 is always on, but there's no #define for that */
+	if (status & ~(1UL << 31)) {  
+		l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS));
+		memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS));
+		prom_printf("Bus watcher error counters: %08x %08x\n", l2_err, memio_err);
+		prom_printf("\nLast recorded signature:\n");
+		prom_printf("Request %02x from %d, answered by %d with Dcode %d\n",
+		       (unsigned int)(G_SCD_BERR_TID(status) & 0x3f),
+		       (int)(G_SCD_BERR_TID(status) >> 6),
+		       (int)G_SCD_BERR_RID(status),
+		       (int)G_SCD_BERR_DCODE(status));
+	} else {		
+		prom_printf("Bus watcher indicates no error\n"); 
+	}			
+}                                       
+#else                                                    
+extern void check_bus_watcher(void);    
+#endif                                          
+                                
+asmlinkage void sb1_cache_error(void)
+{
+	uint64_t cerr_dpa;
+	uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res;
+
+	prom_printf("Cache error exception on CPU %x:\n",
+		    (read_c0_prid() >> 25) & 0x7);
+
+	__asm__ __volatile__ (
+	"	.set	push\n\t"
+	"	.set	mips64\n\t"
+	"	.set	noat\n\t"
+	"	mfc0	%0, $26\n\t"
+	"	mfc0	%1, $27\n\t"
+	"	mfc0	%2, $27, 1\n\t"
+	"	dmfc0	$1, $27, 3\n\t"
+	"	dsrl32	%3, $1, 0 \n\t"
+	"	sll	%4, $1, 0 \n\t"
+	"	mfc0	%5, $30\n\t"
+	"	.set	pop"
+	: "=r" (errctl), "=r" (cerr_i), "=r" (cerr_d),
+	  "=r" (dpahi), "=r" (dpalo), "=r" (eepc));
+
+	cerr_dpa = (((uint64_t)dpahi) << 32) | dpalo;
+	prom_printf(" c0_errorepc ==   %08x\n", eepc);
+	prom_printf(" c0_errctl   ==   %08x", errctl);
+	breakout_errctl(errctl);
+	if (errctl & CP0_ERRCTL_ICACHE) {
+		prom_printf(" c0_cerr_i   ==   %08x", cerr_i);
+		breakout_cerri(cerr_i);
+		if (CP0_CERRI_IDX_VALID(cerr_i)) {
+			/* Check index of EPC, allowing for delay slot */
+			if (((eepc & SB1_CACHE_INDEX_MASK) != (cerr_i & SB1_CACHE_INDEX_MASK)) &&
+			    ((eepc & SB1_CACHE_INDEX_MASK) != ((cerr_i & SB1_CACHE_INDEX_MASK) - 4)))
+				prom_printf(" cerr_i idx doesn't match eepc\n");
+			else {
+				res = extract_ic(cerr_i & SB1_CACHE_INDEX_MASK,
+						 (cerr_i & CP0_CERRI_DATA) != 0);
+				if (!(res & cerr_i))
+					prom_printf("...didn't see indicated icache problem\n");
+			}
+		}
+	}
+	if (errctl & CP0_ERRCTL_DCACHE) {
+		prom_printf(" c0_cerr_d   ==   %08x", cerr_d);
+		breakout_cerrd(cerr_d);
+		if (CP0_CERRD_DPA_VALID(cerr_d)) {
+			prom_printf(" c0_cerr_dpa == %010llx\n", cerr_dpa);
+			if (!CP0_CERRD_IDX_VALID(cerr_d)) {
+				res = extract_dc(cerr_dpa & SB1_CACHE_INDEX_MASK,
+						 (cerr_d & CP0_CERRD_DATA) != 0);
+				if (!(res & cerr_d))
+					prom_printf("...didn't see indicated dcache problem\n");
+			} else {
+				if ((cerr_dpa & SB1_CACHE_INDEX_MASK) != (cerr_d & SB1_CACHE_INDEX_MASK))
+					prom_printf(" cerr_d idx doesn't match cerr_dpa\n");
+				else {
+					res = extract_dc(cerr_d & SB1_CACHE_INDEX_MASK,
+							 (cerr_d & CP0_CERRD_DATA) != 0);
+					if (!(res & cerr_d))
+						prom_printf("...didn't see indicated problem\n");
+				}
+			}
+		}
+	}
+
+	check_bus_watcher();
+
+	while (1);
+	/*
+	 * This tends to make things get really ugly; let's just stall instead.
+	 *    panic("Can't handle the cache error!");
+	 */
+}
+
+
+/* Parity lookup table. */
+static const uint8_t parity[256] = {
+	0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
+	1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
+	1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
+	0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
+	1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
+	0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
+	0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
+	1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0
+};
+
+/* Masks to select bits for Hamming parity, mask_72_64[i] for bit[i] */
+static const uint64_t mask_72_64[8] = {
+	0x0738C808099264FFULL,
+	0x38C808099264FF07ULL,
+	0xC808099264FF0738ULL,
+	0x08099264FF0738C8ULL,
+	0x099264FF0738C808ULL,
+	0x9264FF0738C80809ULL,
+	0x64FF0738C8080992ULL,
+	0xFF0738C808099264ULL
+};
+
+/* Calculate the parity on a range of bits */
+static char range_parity(uint64_t dword, int max, int min)
+{
+	char parity = 0;
+	int i;
+	dword >>= min;
+	for (i=max-min; i>=0; i--) {
+		if (dword & 0x1)
+			parity = !parity;
+		dword >>= 1;
+	}
+	return parity;
+}
+
+/* Calculate the 4-bit even byte-parity for an instruction */
+static unsigned char inst_parity(uint32_t word)
+{
+	int i, j;
+	char parity = 0;
+	for (j=0; j<4; j++) {
+		char byte_parity = 0;
+		for (i=0; i<8; i++) {
+			if (word & 0x80000000)
+				byte_parity = !byte_parity;
+			word <<= 1;
+		}
+		parity <<= 1;
+		parity |= byte_parity;
+	}
+	return parity;
+}
+
+static uint32_t extract_ic(unsigned short addr, int data)
+{
+	unsigned short way;
+	int valid;
+	uint64_t taglo, va, tlo_tmp;
+	uint32_t taghi, taglolo, taglohi;
+	uint8_t lru;
+	int res = 0;
+
+	prom_printf("Icache index 0x%04x  ", addr);
+	for (way = 0; way < 4; way++) {
+		/* Index-load-tag-I */
+		__asm__ __volatile__ (
+		"	.set	push		\n\t"
+		"	.set	noreorder	\n\t"
+		"	.set	mips64		\n\t"
+		"	.set	noat		\n\t"
+		"	cache	4, 0(%3)	\n\t"
+		"	mfc0	%0, $29		\n\t"
+		"	dmfc0	$1, $28		\n\t"
+		"	dsrl32	%1, $1, 0	\n\t"
+		"	sll	%2, $1, 0	\n\t"
+		"	.set	pop"
+		: "=r" (taghi), "=r" (taglohi), "=r" (taglolo)
+		: "r" ((way << 13) | addr));
+
+		taglo = ((unsigned long long)taglohi << 32) | taglolo;
+		if (way == 0) {
+			lru = (taghi >> 14) & 0xff;
+			prom_printf("[Bank %d Set 0x%02x]  LRU > %d %d %d %d > MRU\n",
+				    ((addr >> 5) & 0x3), /* bank */
+				    ((addr >> 7) & 0x3f), /* index */
+				    (lru & 0x3),
+				    ((lru >> 2) & 0x3),
+				    ((lru >> 4) & 0x3),
+				    ((lru >> 6) & 0x3));
+		}
+		va = (taglo & 0xC0000FFFFFFFE000ULL) | addr;
+		if ((taglo & (1 << 31)) && (((taglo >> 62) & 0x3) == 3))
+			va |= 0x3FFFF00000000000ULL;
+		valid = ((taghi >> 29) & 1);
+		if (valid) {
+			tlo_tmp = taglo & 0xfff3ff;
+			if (((taglo >> 10) & 1) ^ range_parity(tlo_tmp, 23, 0)) {
+				prom_printf("   ** bad parity in VTag0/G/ASID\n");
+				res |= CP0_CERRI_TAG_PARITY;
+			}
+			if (((taglo >> 11) & 1) ^ range_parity(taglo, 63, 24)) {
+				prom_printf("   ** bad parity in R/VTag1\n");
+				res |= CP0_CERRI_TAG_PARITY;
+			}
+		}
+		if (valid ^ ((taghi >> 27) & 1)) {
+			prom_printf("   ** bad parity for valid bit\n");
+			res |= CP0_CERRI_TAG_PARITY;
+		}
+		prom_printf(" %d  [VA %016llx]  [Vld? %d]  raw tags: %08X-%016llX\n",
+			    way, va, valid, taghi, taglo);
+
+		if (data) {
+			uint32_t datahi, insta, instb;
+			uint8_t predecode;
+			int offset;
+
+			/* (hit all banks and ways) */
+			for (offset = 0; offset < 4; offset++) {
+				/* Index-load-data-I */
+				__asm__ __volatile__ (
+				"	.set	push\n\t"
+				"	.set	noreorder\n\t"
+				"	.set	mips64\n\t"
+				"	.set	noat\n\t"
+				"	cache	6, 0(%3)  \n\t"
+				"	mfc0	%0, $29, 1\n\t"
+				"	dmfc0  $1, $28, 1\n\t"
+				"	dsrl32 %1, $1, 0 \n\t"
+				"	sll    %2, $1, 0 \n\t"
+				"	.set	pop         \n"
+				: "=r" (datahi), "=r" (insta), "=r" (instb)
+				: "r" ((way << 13) | addr | (offset << 3)));
+				predecode = (datahi >> 8) & 0xff;
+				if (((datahi >> 16) & 1) != (uint32_t)range_parity(predecode, 7, 0)) {
+					prom_printf("   ** bad parity in predecode\n");
+					res |= CP0_CERRI_DATA_PARITY;
+				}
+				/* XXXKW should/could check predecode bits themselves */
+				if (((datahi >> 4) & 0xf) ^ inst_parity(insta)) {
+					prom_printf("   ** bad parity in instruction a\n");
+					res |= CP0_CERRI_DATA_PARITY;
+				}
+				if ((datahi & 0xf) ^ inst_parity(instb)) {
+					prom_printf("   ** bad parity in instruction b\n");
+					res |= CP0_CERRI_DATA_PARITY;
+				}
+				prom_printf("  %05X-%08X%08X", datahi, insta, instb);
+			}
+			prom_printf("\n");
+		}
+	}
+	return res;
+}
+
+/* Compute the ECC for a data doubleword */
+static uint8_t dc_ecc(uint64_t dword)
+{
+	uint64_t t;
+	uint32_t w;
+	uint8_t  p;
+	int      i;
+
+	p = 0;
+	for (i = 7; i >= 0; i--)
+	{
+		p <<= 1;
+		t = dword & mask_72_64[i];
+		w = (uint32_t)(t >> 32);
+		p ^= (parity[w>>24] ^ parity[(w>>16) & 0xFF]
+		      ^ parity[(w>>8) & 0xFF] ^ parity[w & 0xFF]);
+		w = (uint32_t)(t & 0xFFFFFFFF);
+		p ^= (parity[w>>24] ^ parity[(w>>16) & 0xFF]
+		      ^ parity[(w>>8) & 0xFF] ^ parity[w & 0xFF]);
+	}
+	return p;
+}
+
+struct dc_state {
+	unsigned char val;
+	char *name;
+};
+
+static struct dc_state dc_states[] = {
+	{ 0x00, "INVALID" },
+	{ 0x0f, "COH-SHD" },
+	{ 0x13, "NCO-E-C" },
+	{ 0x19, "NCO-E-D" },
+	{ 0x16, "COH-E-C" },
+	{ 0x1c, "COH-E-D" },
+	{ 0xff, "*ERROR*" }
+};
+
+#define DC_TAG_VALID(state) \
+    (((state) == 0xf) || ((state) == 0x13) || ((state) == 0x19) || ((state == 0x16)) || ((state) == 0x1c))
+
+static char *dc_state_str(unsigned char state)
+{
+	struct dc_state *dsc = dc_states;
+	while (dsc->val != 0xff) {
+		if (dsc->val == state)
+			break;
+		dsc++;
+	}
+	return dsc->name;
+}
+
+static uint32_t extract_dc(unsigned short addr, int data)
+{
+	int valid, way;
+	unsigned char state;
+	uint64_t taglo, pa;
+	uint32_t taghi, taglolo, taglohi;
+	uint8_t ecc, lru;
+	int res = 0;
+
+	prom_printf("Dcache index 0x%04x  ", addr);
+	for (way = 0; way < 4; way++) {
+		__asm__ __volatile__ (
+		"	.set	push\n\t"
+		"	.set	noreorder\n\t"
+		"	.set	mips64\n\t"
+		"	.set	noat\n\t"
+		"	cache	5, 0(%3)\n\t"	/* Index-load-tag-D */
+		"	mfc0	%0, $29, 2\n\t"
+		"	dmfc0	$1, $28, 2\n\t"
+		"	dsrl32	%1, $1, 0\n\t"
+		"	sll	%2, $1, 0\n\t"
+		"	.set	pop"
+		: "=r" (taghi), "=r" (taglohi), "=r" (taglolo)
+		: "r" ((way << 13) | addr));
+
+		taglo = ((unsigned long long)taglohi << 32) | taglolo;
+		pa = (taglo & 0xFFFFFFE000ULL) | addr;
+		if (way == 0) {
+			lru = (taghi >> 14) & 0xff;
+			prom_printf("[Bank %d Set 0x%02x]  LRU > %d %d %d %d > MRU\n",
+				    ((addr >> 11) & 0x2) | ((addr >> 5) & 1), /* bank */
+				    ((addr >> 6) & 0x3f), /* index */
+				    (lru & 0x3),
+				    ((lru >> 2) & 0x3),
+				    ((lru >> 4) & 0x3),
+				    ((lru >> 6) & 0x3));
+		}
+		state = (taghi >> 25) & 0x1f;
+		valid = DC_TAG_VALID(state);
+		prom_printf(" %d  [PA %010llx]  [state %s (%02x)]  raw tags: %08X-%016llX\n",
+			    way, pa, dc_state_str(state), state, taghi, taglo);
+		if (valid) {
+			if (((taglo >> 11) & 1) ^ range_parity(taglo, 39, 26)) {
+				prom_printf("   ** bad parity in PTag1\n");
+				res |= CP0_CERRD_TAG_ADDRESS;
+			}
+			if (((taglo >> 10) & 1) ^ range_parity(taglo, 25, 13)) {
+				prom_printf("   ** bad parity in PTag0\n");
+				res |= CP0_CERRD_TAG_ADDRESS;
+			}
+		} else {
+			res |= CP0_CERRD_TAG_STATE;
+		}
+
+		if (data) {
+			uint64_t datalo;
+			uint32_t datalohi, datalolo, datahi;
+			int offset;
+
+			for (offset = 0; offset < 4; offset++) {
+				/* Index-load-data-D */
+				__asm__ __volatile__ (
+				"	.set	push\n\t"
+				"	.set	noreorder\n\t"
+				"	.set	mips64\n\t"
+				"	.set	noat\n\t"
+				"	cache	7, 0(%3)\n\t" /* Index-load-data-D */
+				"	mfc0	%0, $29, 3\n\t"
+				"	dmfc0	$1, $28, 3\n\t"
+				"	dsrl32	%1, $1, 0 \n\t"
+				"	sll	%2, $1, 0 \n\t"
+				"	.set	pop"
+				: "=r" (datahi), "=r" (datalohi), "=r" (datalolo)
+				: "r" ((way << 13) | addr | (offset << 3)));
+				datalo = ((unsigned long long)datalohi << 32) | datalolo;
+				ecc = dc_ecc(datalo);
+				if (ecc != datahi) {
+					int bits = 0;
+					prom_printf("  ** bad ECC (%02x %02x) ->",
+						    datahi, ecc);
+					ecc ^= datahi;
+					while (ecc) {
+						if (ecc & 1) bits++;
+						ecc >>= 1;
+					}
+					res |= (bits == 1) ? CP0_CERRD_DATA_SBE : CP0_CERRD_DATA_DBE;
+				}
+				prom_printf("  %02X-%016llX", datahi, datalo);
+			}
+			prom_printf("\n");
+		}
+	}
+	return res;
+}
diff --git a/arch/mips/mm/cex-gen.S b/arch/mips/mm/cex-gen.S
new file mode 100644
index 00000000000..e743622fd24
--- /dev/null
+++ b/arch/mips/mm/cex-gen.S
@@ -0,0 +1,42 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1995 - 1999 Ralf Baechle
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ *
+ * Cache error handler
+ */
+#include <asm/asm.h>
+#include <asm/regdef.h>
+#include <asm/mipsregs.h>
+#include <asm/stackframe.h>
+
+/*
+ * Game over.  Go to the button.  Press gently.  Swear where allowed by
+ * legislation.
+ */
+	LEAF(except_vec2_generic)
+	.set	noreorder
+	.set	noat
+	.set    mips0
+	/*
+	 * This is a very bad place to be.  Our cache error
+	 * detection has triggered.  If we have write-back data
+	 * in the cache, we may not be able to recover.  As a
+	 * first-order desperate measure, turn off KSEG0 cacheing.
+	 */
+	mfc0	k0,CP0_CONFIG
+	li	k1,~CONF_CM_CMASK
+	and	k0,k0,k1
+	ori	k0,k0,CONF_CM_UNCACHED
+	mtc0	k0,CP0_CONFIG
+	/* Give it a few cycles to sink in... */
+	nop
+	nop
+	nop
+
+	j	cache_parity_error
+	nop
+	END(except_vec2_generic)
diff --git a/arch/mips/mm/cex-sb1.S b/arch/mips/mm/cex-sb1.S
new file mode 100644
index 00000000000..2c3a23aa88c
--- /dev/null
+++ b/arch/mips/mm/cex-sb1.S
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2001,2002,2003 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#include <linux/init.h>
+
+#include <asm/asm.h>
+#include <asm/regdef.h>
+#include <asm/mipsregs.h>
+#include <asm/stackframe.h>
+#include <asm/cacheops.h>
+#include <asm/sibyte/board.h>
+
+#define C0_ERRCTL     $26             /* CP0: Error info */
+#define C0_CERR_I     $27             /* CP0: Icache error */
+#define C0_CERR_D     $27,1           /* CP0: Dcache error */
+
+	/*
+	 * Based on SiByte sample software cache-err/cerr.S
+	 * CVS revision 1.8.  Only the 'unrecoverable' case
+	 * is changed.
+	 */
+
+        __INIT
+
+	.set	mips64
+	.set	noreorder
+	.set	noat
+
+	/*
+	 * sb1_cerr_vec: code to be copied to the Cache Error
+	 * Exception vector.  The code must be pushed out to memory
+	 * (either by copying to Kseg0 and Kseg1 both, or by flushing
+	 * the L1 and L2) since it is fetched as 0xa0000100.
+	 *
+	 * NOTE: Be sure this handler is at most 28 instructions long
+	 * since the final 16 bytes of the exception vector memory
+	 * (0x170-0x17f) are used to preserve k0, k1, and ra.
+	 */
+
+LEAF(except_vec2_sb1)
+	/*
+	 * If this error is recoverable, we need to exit the handler
+	 * without having dirtied any registers.  To do this,
+	 * save/restore k0 and k1 from low memory (Useg is direct
+	 * mapped while ERL=1). Note that we can't save to a
+	 * CPU-specific location without ruining a register in the
+	 * process.  This means we are vulnerable to data corruption
+	 * whenever the handler is reentered by a second CPU.
+	 */
+	sd	k0,0x170($0)
+	sd	k1,0x178($0)
+
+	/*
+	 * M_ERRCTL_RECOVERABLE is bit 31, which makes it easy to tell
+	 * if we can fast-path out of here for a h/w-recovered error.
+	 */
+	mfc0	k1,C0_ERRCTL
+	bgtz	k1,attempt_recovery
+	 sll	k0,k1,1
+
+recovered_dcache:
+	/*
+	 * Unlock CacheErr-D (which in turn unlocks CacheErr-DPA).
+	 * Ought to log the occurence of this recovered dcache error.
+	 */
+	b	recovered
+	 mtc0	$0,C0_CERR_D
+
+attempt_recovery:
+	/*
+	 * k0 has C0_ERRCTL << 1, which puts 'DC' at bit 31.  Any
+	 * Dcache errors we can recover from will take more extensive
+	 * processing.  For now, they are considered "unrecoverable".
+	 * Note that 'DC' becoming set (outside of ERL mode) will
+	 * cause 'IC' to clear; so if there's an Icache error, we'll
+	 * only find out about it if we recover from this error and
+	 * continue executing.
+	 */
+	bltz	k0,unrecoverable
+	 sll	k0,1
+
+	/*
+	 * k0 has C0_ERRCTL << 2, which puts 'IC' at bit 31.  If an
+	 * Icache error isn't indicated, I'm not sure why we got here.
+	 * Consider that case "unrecoverable" for now.
+	 */
+	bgez	k0,unrecoverable
+
+attempt_icache_recovery:
+	/*
+	 * External icache errors are due to uncorrectable ECC errors
+	 * in the L2 cache or Memory Controller and cannot be
+	 * recovered here.
+	 */
+	 mfc0	k0,C0_CERR_I		/* delay slot */
+	li	k1,1 << 26		/* ICACHE_EXTERNAL */
+	and	k1,k0
+	bnez	k1,unrecoverable
+	 andi	k0,0x1fe0
+
+	/*
+	 * Since the error is internal, the 'IDX' field from
+	 * CacheErr-I is valid and we can just invalidate all blocks
+	 * in that set.
+	 */
+	cache	Index_Invalidate_I,(0<<13)(k0)
+	cache	Index_Invalidate_I,(1<<13)(k0)
+	cache	Index_Invalidate_I,(2<<13)(k0)
+	cache	Index_Invalidate_I,(3<<13)(k0)
+
+	/* Ought to log this recovered icache error */
+
+recovered:
+	/* Restore the saved registers */
+	ld	k0,0x170($0)
+	ld	k1,0x178($0)
+	eret
+
+unrecoverable:
+	/* Unrecoverable Icache or Dcache error; log it and/or fail */
+	j	handle_vec2_sb1
+	 nop
+
+END(except_vec2_sb1)
+
+	__FINIT
+
+	LEAF(handle_vec2_sb1)
+	mfc0	k0,CP0_CONFIG
+	li	k1,~CONF_CM_CMASK
+	and	k0,k0,k1
+	ori	k0,k0,CONF_CM_UNCACHED
+	mtc0	k0,CP0_CONFIG
+
+	SSNOP
+	SSNOP
+	SSNOP
+	SSNOP
+	bnezl	$0, 1f
+1:
+	mfc0	k0, CP0_STATUS
+	sll	k0, k0, 3			# check CU0 (kernel?)
+	bltz	k0, 2f
+	 nop
+
+	/* Get a valid Kseg0 stack pointer.  Any task's stack pointer
+	 * will do, although if we ever want to resume execution we
+	 * better not have corrupted any state. */
+	get_saved_sp
+	move	sp, k1
+
+2:
+	j	sb1_cache_error
+	 nop
+
+	END(handle_vec2_sb1)
diff --git a/arch/mips/mm/dma-coherent.c b/arch/mips/mm/dma-coherent.c
new file mode 100644
index 00000000000..97a50d38c98
--- /dev/null
+++ b/arch/mips/mm/dma-coherent.c
@@ -0,0 +1,255 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000  Ani Joshi <ajoshi@unixbox.com>
+ * Copyright (C) 2000, 2001  Ralf Baechle <ralf@gnu.org>
+ * swiped from i386, and cloned for MIPS by Geert, polished by Ralf.
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+
+#include <asm/cache.h>
+#include <asm/io.h>
+
+void *dma_alloc_noncoherent(struct device *dev, size_t size,
+	dma_addr_t * dma_handle, int gfp)
+{
+	void *ret;
+	/* ignore region specifiers */
+	gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+	if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
+		gfp |= GFP_DMA;
+	ret = (void *) __get_free_pages(gfp, get_order(size));
+
+	if (ret != NULL) {
+		memset(ret, 0, size);
+		*dma_handle = virt_to_phys(ret);
+	}
+
+	return ret;
+}
+
+EXPORT_SYMBOL(dma_alloc_noncoherent);
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+	dma_addr_t * dma_handle, int gfp)
+	__attribute__((alias("dma_alloc_noncoherent")));
+
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
+	dma_addr_t dma_handle)
+{
+	unsigned long addr = (unsigned long) vaddr;
+
+	free_pages(addr, get_order(size));
+}
+
+EXPORT_SYMBOL(dma_free_noncoherent);
+
+void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
+	dma_addr_t dma_handle) __attribute__((alias("dma_free_noncoherent")));
+
+EXPORT_SYMBOL(dma_free_coherent);
+
+dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
+	enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+
+	return __pa(ptr);
+}
+
+EXPORT_SYMBOL(dma_map_single);
+
+void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+		 enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_unmap_single);
+
+int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+	enum dma_data_direction direction)
+{
+	int i;
+
+	BUG_ON(direction == DMA_NONE);
+
+	for (i = 0; i < nents; i++, sg++) {
+		sg->dma_address = (dma_addr_t)page_to_phys(sg->page) + sg->offset;
+	}
+
+	return nents;
+}
+
+EXPORT_SYMBOL(dma_map_sg);
+
+dma_addr_t dma_map_page(struct device *dev, struct page *page,
+	unsigned long offset, size_t size, enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+
+	return page_to_phys(page) + offset;
+}
+
+EXPORT_SYMBOL(dma_map_page);
+
+void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+	       enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_unmap_page);
+
+void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+	     enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_unmap_sg);
+
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+	size_t size, enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_single_for_cpu);
+
+void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+	size_t size, enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_single_for_device);
+
+void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+		      unsigned long offset, size_t size,
+		      enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
+
+void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+		      unsigned long offset, size_t size,
+		      enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_single_range_for_device);
+
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+		 enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+		 enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_sg_for_device);
+
+int dma_mapping_error(dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+EXPORT_SYMBOL(dma_mapping_error);
+
+int dma_supported(struct device *dev, u64 mask)
+{
+	/*
+	 * we fall back to GFP_DMA when the mask isn't all 1s,
+	 * so we can't guarantee allocations that must be
+	 * within a tighter range than GFP_DMA..
+	 */
+	if (mask < 0x00ffffff)
+		return 0;
+
+	return 1;
+}
+
+EXPORT_SYMBOL(dma_supported);
+
+int dma_is_consistent(dma_addr_t dma_addr)
+{
+	return 1;
+}
+
+EXPORT_SYMBOL(dma_is_consistent);
+
+void dma_cache_sync(void *vaddr, size_t size,
+	       enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_cache_sync);
+
+/* The DAC routines are a PCIism.. */
+
+#ifdef CONFIG_PCI
+
+#include <linux/pci.h>
+
+dma64_addr_t pci_dac_page_to_dma(struct pci_dev *pdev,
+	struct page *page, unsigned long offset, int direction)
+{
+	return (dma64_addr_t)page_to_phys(page) + offset;
+}
+
+EXPORT_SYMBOL(pci_dac_page_to_dma);
+
+struct page *pci_dac_dma_to_page(struct pci_dev *pdev,
+	dma64_addr_t dma_addr)
+{
+	return mem_map + (dma_addr >> PAGE_SHIFT);
+}
+
+EXPORT_SYMBOL(pci_dac_dma_to_page);
+
+unsigned long pci_dac_dma_to_offset(struct pci_dev *pdev,
+	dma64_addr_t dma_addr)
+{
+	return dma_addr & ~PAGE_MASK;
+}
+
+EXPORT_SYMBOL(pci_dac_dma_to_offset);
+
+void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev,
+	dma64_addr_t dma_addr, size_t len, int direction)
+{
+	BUG_ON(direction == PCI_DMA_NONE);
+}
+
+EXPORT_SYMBOL(pci_dac_dma_sync_single_for_cpu);
+
+void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev,
+	dma64_addr_t dma_addr, size_t len, int direction)
+{
+	BUG_ON(direction == PCI_DMA_NONE);
+}
+
+EXPORT_SYMBOL(pci_dac_dma_sync_single_for_device);
+
+#endif /* CONFIG_PCI */
diff --git a/arch/mips/mm/dma-ip27.c b/arch/mips/mm/dma-ip27.c
new file mode 100644
index 00000000000..aa7c94b5d78
--- /dev/null
+++ b/arch/mips/mm/dma-ip27.c
@@ -0,0 +1,257 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000  Ani Joshi <ajoshi@unixbox.com>
+ * Copyright (C) 2000, 2001  Ralf Baechle <ralf@gnu.org>
+ * swiped from i386, and cloned for MIPS by Geert, polished by Ralf.
+ */
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+
+#include <asm/cache.h>
+#include <asm/pci/bridge.h>
+
+#define pdev_to_baddr(pdev, addr) \
+	(BRIDGE_CONTROLLER(pdev->bus)->baddr + (addr))
+#define dev_to_baddr(dev, addr) \
+	pdev_to_baddr(to_pci_dev(dev), (addr))
+
+void *dma_alloc_noncoherent(struct device *dev, size_t size,
+	dma_addr_t * dma_handle, int gfp)
+{
+	void *ret;
+
+	/* ignore region specifiers */
+	gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+	if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
+		gfp |= GFP_DMA;
+	ret = (void *) __get_free_pages(gfp, get_order(size));
+
+	if (ret != NULL) {
+		memset(ret, 0, size);
+		*dma_handle = dev_to_baddr(dev, virt_to_phys(ret));
+	}
+
+	return ret;
+}
+
+EXPORT_SYMBOL(dma_alloc_noncoherent);
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+	dma_addr_t * dma_handle, int gfp)
+	__attribute__((alias("dma_alloc_noncoherent")));
+
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
+	dma_addr_t dma_handle)
+{
+	unsigned long addr = (unsigned long) vaddr;
+
+	free_pages(addr, get_order(size));
+}
+
+EXPORT_SYMBOL(dma_free_noncoherent);
+
+void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
+	dma_addr_t dma_handle) __attribute__((alias("dma_free_noncoherent")));
+
+EXPORT_SYMBOL(dma_free_coherent);
+
+dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
+	enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+
+	return dev_to_baddr(dev, __pa(ptr));
+}
+
+EXPORT_SYMBOL(dma_map_single);
+
+void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+		 enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_unmap_single);
+
+int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+	enum dma_data_direction direction)
+{
+	int i;
+
+	BUG_ON(direction == DMA_NONE);
+
+	for (i = 0; i < nents; i++, sg++) {
+		sg->dma_address = (dma_addr_t) dev_to_baddr(dev,
+			page_to_phys(sg->page) + sg->offset);
+	}
+
+	return nents;
+}
+
+EXPORT_SYMBOL(dma_map_sg);
+
+dma_addr_t dma_map_page(struct device *dev, struct page *page,
+	unsigned long offset, size_t size, enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+
+	return dev_to_baddr(dev, page_to_phys(page) + offset);
+}
+
+EXPORT_SYMBOL(dma_map_page);
+
+void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+	       enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_unmap_page);
+
+void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+	     enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_unmap_sg);
+
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+		enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_single_for_cpu);
+
+void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+		enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_single_for_device);
+
+void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+		      unsigned long offset, size_t size,
+		      enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
+
+void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+		      unsigned long offset, size_t size,
+		      enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_single_range_for_device);
+
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+		 enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+		 enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_sg_for_device);
+
+int dma_mapping_error(dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+EXPORT_SYMBOL(dma_mapping_error);
+
+int dma_supported(struct device *dev, u64 mask)
+{
+	/*
+	 * we fall back to GFP_DMA when the mask isn't all 1s,
+	 * so we can't guarantee allocations that must be
+	 * within a tighter range than GFP_DMA..
+	 */
+	if (mask < 0x00ffffff)
+		return 0;
+
+	return 1;
+}
+
+EXPORT_SYMBOL(dma_supported);
+
+int dma_is_consistent(dma_addr_t dma_addr)
+{
+	return 1;
+}
+
+EXPORT_SYMBOL(dma_is_consistent);
+
+void dma_cache_sync(void *vaddr, size_t size,
+	       enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_cache_sync);
+
+dma64_addr_t pci_dac_page_to_dma(struct pci_dev *pdev,
+	struct page *page, unsigned long offset, int direction)
+{
+	dma64_addr_t addr = page_to_phys(page) + offset;
+
+	return (dma64_addr_t) pdev_to_baddr(pdev, addr);
+}
+
+EXPORT_SYMBOL(pci_dac_page_to_dma);
+
+struct page *pci_dac_dma_to_page(struct pci_dev *pdev,
+	dma64_addr_t dma_addr)
+{
+	struct bridge_controller *bc = BRIDGE_CONTROLLER(pdev->bus);
+
+	return pfn_to_page((dma_addr - bc->baddr) >> PAGE_SHIFT);
+}
+
+EXPORT_SYMBOL(pci_dac_dma_to_page);
+
+unsigned long pci_dac_dma_to_offset(struct pci_dev *pdev,
+	dma64_addr_t dma_addr)
+{
+	return dma_addr & ~PAGE_MASK;
+}
+
+EXPORT_SYMBOL(pci_dac_dma_to_offset);
+
+void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev,
+	dma64_addr_t dma_addr, size_t len, int direction)
+{
+	BUG_ON(direction == PCI_DMA_NONE);
+}
+
+EXPORT_SYMBOL(pci_dac_dma_sync_single_for_cpu);
+
+void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev,
+	dma64_addr_t dma_addr, size_t len, int direction)
+{
+	BUG_ON(direction == PCI_DMA_NONE);
+}
+
+EXPORT_SYMBOL(pci_dac_dma_sync_single_for_device);
diff --git a/arch/mips/mm/dma-ip32.c b/arch/mips/mm/dma-ip32.c
new file mode 100644
index 00000000000..2cbe196c35f
--- /dev/null
+++ b/arch/mips/mm/dma-ip32.c
@@ -0,0 +1,382 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000  Ani Joshi <ajoshi@unixbox.com>
+ * Copyright (C) 2000, 2001  Ralf Baechle <ralf@gnu.org>
+ * Copyright (C) 2005 Ilya A. Volynets-Evenbakh <ilya@total-knowledge.com>
+ * swiped from i386, and cloned for MIPS by Geert, polished by Ralf.
+ * IP32 changes by Ilya.
+ */
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <asm/ip32/crime.h>
+
+/*
+ * Warning on the terminology - Linux calls an uncached area coherent;
+ * MIPS terminology calls memory areas with hardware maintained coherency
+ * coherent.
+ */
+
+/*
+ * Few notes.
+ * 1. CPU sees memory as two chunks: 0-256M@0x0, and the rest @0x40000000+256M
+ * 2. PCI sees memory as one big chunk @0x0 (or we could use 0x40000000 for native-endian)
+ * 3. All other devices see memory as one big chunk at 0x40000000
+ * 4. Non-PCI devices will pass NULL as struct device*
+ * Thus we translate differently, depending on device.
+ */
+
+#define RAM_OFFSET_MASK	0x3fffffff
+
+void *dma_alloc_noncoherent(struct device *dev, size_t size,
+	dma_addr_t * dma_handle, int gfp)
+{
+	void *ret;
+	/* ignore region specifiers */
+	gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+	if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
+		gfp |= GFP_DMA;
+	ret = (void *) __get_free_pages(gfp, get_order(size));
+
+	if (ret != NULL) {
+		unsigned long addr = virt_to_phys(ret)&RAM_OFFSET_MASK;
+		memset(ret, 0, size);
+		if(dev==NULL)
+		    addr+= CRIME_HI_MEM_BASE;
+		*dma_handle = addr;
+	}
+
+	return ret;
+}
+
+EXPORT_SYMBOL(dma_alloc_noncoherent);
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+	dma_addr_t * dma_handle, int gfp)
+{
+	void *ret;
+
+	ret = dma_alloc_noncoherent(dev, size, dma_handle, gfp);
+	if (ret) {
+		dma_cache_wback_inv((unsigned long) ret, size);
+		ret = UNCAC_ADDR(ret);
+	}
+
+	return ret;
+}
+
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
+	dma_addr_t dma_handle)
+{
+	free_pages((unsigned long) vaddr, get_order(size));
+}
+
+EXPORT_SYMBOL(dma_free_noncoherent);
+
+void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
+	dma_addr_t dma_handle)
+{
+	unsigned long addr = (unsigned long) vaddr;
+
+	addr = CAC_ADDR(addr);
+	free_pages(addr, get_order(size));
+}
+
+EXPORT_SYMBOL(dma_free_coherent);
+
+static inline void __dma_sync(unsigned long addr, size_t size,
+	enum dma_data_direction direction)
+{
+	switch (direction) {
+	case DMA_TO_DEVICE:
+		dma_cache_wback(addr, size);
+		break;
+
+	case DMA_FROM_DEVICE:
+		dma_cache_inv(addr, size);
+		break;
+
+	case DMA_BIDIRECTIONAL:
+		dma_cache_wback_inv(addr, size);
+		break;
+
+	default:
+		BUG();
+	}
+}
+
+dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
+	enum dma_data_direction direction)
+{
+	unsigned long addr = (unsigned long) ptr;
+
+	switch (direction) {
+	case DMA_TO_DEVICE:
+		dma_cache_wback(addr, size);
+		break;
+
+	case DMA_FROM_DEVICE:
+		dma_cache_inv(addr, size);
+		break;
+
+	case DMA_BIDIRECTIONAL:
+		dma_cache_wback_inv(addr, size);
+		break;
+
+	default:
+		BUG();
+	}
+
+	addr = virt_to_phys(ptr)&RAM_OFFSET_MASK;;
+	if(dev == NULL)
+	    addr+=CRIME_HI_MEM_BASE;
+	return (dma_addr_t)addr;
+}
+
+EXPORT_SYMBOL(dma_map_single);
+
+void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+	enum dma_data_direction direction)
+{
+	switch (direction) {
+	case DMA_TO_DEVICE:
+		break;
+
+	case DMA_FROM_DEVICE:
+		break;
+
+	case DMA_BIDIRECTIONAL:
+		break;
+
+	default:
+		BUG();
+	}
+}
+
+EXPORT_SYMBOL(dma_unmap_single);
+
+int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+	enum dma_data_direction direction)
+{
+	int i;
+
+	BUG_ON(direction == DMA_NONE);
+
+	for (i = 0; i < nents; i++, sg++) {
+		unsigned long addr;
+
+		addr = (unsigned long) page_address(sg->page)+sg->offset;
+		if (addr)
+			__dma_sync(addr, sg->length, direction);
+		addr = __pa(addr)&RAM_OFFSET_MASK;;
+		if(dev == NULL)
+			addr +=  CRIME_HI_MEM_BASE;
+		sg->dma_address = (dma_addr_t)addr;
+	}
+
+	return nents;
+}
+
+EXPORT_SYMBOL(dma_map_sg);
+
+dma_addr_t dma_map_page(struct device *dev, struct page *page,
+	unsigned long offset, size_t size, enum dma_data_direction direction)
+{
+	unsigned long addr;
+
+	BUG_ON(direction == DMA_NONE);
+
+	addr = (unsigned long) page_address(page) + offset;
+	dma_cache_wback_inv(addr, size);
+	addr = __pa(addr)&RAM_OFFSET_MASK;;
+	if(dev == NULL)
+		addr +=  CRIME_HI_MEM_BASE;
+
+	return (dma_addr_t)addr;
+}
+
+EXPORT_SYMBOL(dma_map_page);
+
+void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+	enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+
+	if (direction != DMA_TO_DEVICE) {
+		unsigned long addr;
+
+		dma_address&=RAM_OFFSET_MASK;
+		addr = dma_address + PAGE_OFFSET;
+		if(dma_address>=256*1024*1024)
+			addr+=CRIME_HI_MEM_BASE;
+		dma_cache_wback_inv(addr, size);
+	}
+}
+
+EXPORT_SYMBOL(dma_unmap_page);
+
+void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+	enum dma_data_direction direction)
+{
+	unsigned long addr;
+	int i;
+
+	BUG_ON(direction == DMA_NONE);
+
+	if (direction == DMA_TO_DEVICE)
+		return;
+
+	for (i = 0; i < nhwentries; i++, sg++) {
+		addr = (unsigned long) page_address(sg->page);
+		if (!addr)
+			continue;
+		dma_cache_wback_inv(addr + sg->offset, sg->length);
+	}
+}
+
+EXPORT_SYMBOL(dma_unmap_sg);
+
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+	size_t size, enum dma_data_direction direction)
+{
+	unsigned long addr;
+
+	BUG_ON(direction == DMA_NONE);
+
+	dma_handle&=RAM_OFFSET_MASK;
+	addr = dma_handle + PAGE_OFFSET;
+	if(dma_handle>=256*1024*1024)
+	    addr+=CRIME_HI_MEM_BASE;
+	__dma_sync(addr, size, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_single_for_cpu);
+
+void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+	size_t size, enum dma_data_direction direction)
+{
+	unsigned long addr;
+
+	BUG_ON(direction == DMA_NONE);
+
+	dma_handle&=RAM_OFFSET_MASK;
+	addr = dma_handle + PAGE_OFFSET;
+	if(dma_handle>=256*1024*1024)
+	    addr+=CRIME_HI_MEM_BASE;
+	__dma_sync(addr, size, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_single_for_device);
+
+void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+	unsigned long offset, size_t size, enum dma_data_direction direction)
+{
+	unsigned long addr;
+
+	BUG_ON(direction == DMA_NONE);
+
+	dma_handle&=RAM_OFFSET_MASK;
+	addr = dma_handle + offset + PAGE_OFFSET;
+	if(dma_handle>=256*1024*1024)
+	    addr+=CRIME_HI_MEM_BASE;
+	__dma_sync(addr, size, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
+
+void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+	unsigned long offset, size_t size, enum dma_data_direction direction)
+{
+	unsigned long addr;
+
+	BUG_ON(direction == DMA_NONE);
+
+	dma_handle&=RAM_OFFSET_MASK;
+	addr = dma_handle + offset + PAGE_OFFSET;
+	if(dma_handle>=256*1024*1024)
+	    addr+=CRIME_HI_MEM_BASE;
+	__dma_sync(addr, size, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_single_range_for_device);
+
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+	enum dma_data_direction direction)
+{
+	int i;
+
+	BUG_ON(direction == DMA_NONE);
+
+	/* Make sure that gcc doesn't leave the empty loop body.  */
+	for (i = 0; i < nelems; i++, sg++)
+		__dma_sync((unsigned long)page_address(sg->page),
+		           sg->length, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+	enum dma_data_direction direction)
+{
+	int i;
+
+	BUG_ON(direction == DMA_NONE);
+
+	/* Make sure that gcc doesn't leave the empty loop body.  */
+	for (i = 0; i < nelems; i++, sg++)
+		__dma_sync((unsigned long)page_address(sg->page),
+		           sg->length, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_sg_for_device);
+
+int dma_mapping_error(dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+EXPORT_SYMBOL(dma_mapping_error);
+
+int dma_supported(struct device *dev, u64 mask)
+{
+	/*
+	 * we fall back to GFP_DMA when the mask isn't all 1s,
+	 * so we can't guarantee allocations that must be
+	 * within a tighter range than GFP_DMA..
+	 */
+	if (mask < 0x00ffffff)
+		return 0;
+
+	return 1;
+}
+
+EXPORT_SYMBOL(dma_supported);
+
+int dma_is_consistent(dma_addr_t dma_addr)
+{
+	return 1;
+}
+
+EXPORT_SYMBOL(dma_is_consistent);
+
+void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction direction)
+{
+	if (direction == DMA_NONE)
+		return;
+
+	dma_cache_wback_inv((unsigned long)vaddr, size);
+}
+
+EXPORT_SYMBOL(dma_cache_sync);
+
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
new file mode 100644
index 00000000000..9895e32b0fc
--- /dev/null
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -0,0 +1,400 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000  Ani Joshi <ajoshi@unixbox.com>
+ * Copyright (C) 2000, 2001  Ralf Baechle <ralf@gnu.org>
+ * swiped from i386, and cloned for MIPS by Geert, polished by Ralf.
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/cache.h>
+#include <asm/io.h>
+
+/*
+ * Warning on the terminology - Linux calls an uncached area coherent;
+ * MIPS terminology calls memory areas with hardware maintained coherency
+ * coherent.
+ */
+
+void *dma_alloc_noncoherent(struct device *dev, size_t size,
+	dma_addr_t * dma_handle, int gfp)
+{
+	void *ret;
+	/* ignore region specifiers */
+	gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+	if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
+		gfp |= GFP_DMA;
+	ret = (void *) __get_free_pages(gfp, get_order(size));
+
+	if (ret != NULL) {
+		memset(ret, 0, size);
+		*dma_handle = virt_to_phys(ret);
+	}
+
+	return ret;
+}
+
+EXPORT_SYMBOL(dma_alloc_noncoherent);
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+	dma_addr_t * dma_handle, int gfp)
+{
+	void *ret;
+
+	ret = dma_alloc_noncoherent(dev, size, dma_handle, gfp);
+	if (ret) {
+		dma_cache_wback_inv((unsigned long) ret, size);
+		ret = UNCAC_ADDR(ret);
+	}
+
+	return ret;
+}
+
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
+	dma_addr_t dma_handle)
+{
+	free_pages((unsigned long) vaddr, get_order(size));
+}
+
+EXPORT_SYMBOL(dma_free_noncoherent);
+
+void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
+	dma_addr_t dma_handle)
+{
+	unsigned long addr = (unsigned long) vaddr;
+
+	addr = CAC_ADDR(addr);
+	free_pages(addr, get_order(size));
+}
+
+EXPORT_SYMBOL(dma_free_coherent);
+
+static inline void __dma_sync(unsigned long addr, size_t size,
+	enum dma_data_direction direction)
+{
+	switch (direction) {
+	case DMA_TO_DEVICE:
+		dma_cache_wback(addr, size);
+		break;
+
+	case DMA_FROM_DEVICE:
+		dma_cache_inv(addr, size);
+		break;
+
+	case DMA_BIDIRECTIONAL:
+		dma_cache_wback_inv(addr, size);
+		break;
+
+	default:
+		BUG();
+	}
+}
+
+dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
+	enum dma_data_direction direction)
+{
+	unsigned long addr = (unsigned long) ptr;
+
+	switch (direction) {
+	case DMA_TO_DEVICE:
+		dma_cache_wback(addr, size);
+		break;
+
+	case DMA_FROM_DEVICE:
+		dma_cache_inv(addr, size);
+		break;
+
+	case DMA_BIDIRECTIONAL:
+		dma_cache_wback_inv(addr, size);
+		break;
+
+	default:
+		BUG();
+	}
+
+	return virt_to_phys(ptr);
+}
+
+EXPORT_SYMBOL(dma_map_single);
+
+void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+	enum dma_data_direction direction)
+{
+	unsigned long addr;
+	addr = dma_addr + PAGE_OFFSET;
+
+	switch (direction) {
+	case DMA_TO_DEVICE:
+		//dma_cache_wback(addr, size);
+		break;
+
+	case DMA_FROM_DEVICE:
+		//dma_cache_inv(addr, size);
+		break;
+
+	case DMA_BIDIRECTIONAL:
+		//dma_cache_wback_inv(addr, size);
+		break;
+
+	default:
+		BUG();
+	}
+}
+
+EXPORT_SYMBOL(dma_unmap_single);
+
+int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+	enum dma_data_direction direction)
+{
+	int i;
+
+	BUG_ON(direction == DMA_NONE);
+
+	for (i = 0; i < nents; i++, sg++) {
+		unsigned long addr;
+ 
+		addr = (unsigned long) page_address(sg->page);
+		if (addr)
+			__dma_sync(addr + sg->offset, sg->length, direction);
+		sg->dma_address = (dma_addr_t)
+			(page_to_phys(sg->page) + sg->offset);
+	}
+
+	return nents;
+}
+
+EXPORT_SYMBOL(dma_map_sg);
+
+dma_addr_t dma_map_page(struct device *dev, struct page *page,
+	unsigned long offset, size_t size, enum dma_data_direction direction)
+{
+	unsigned long addr;
+
+	BUG_ON(direction == DMA_NONE);
+
+	addr = (unsigned long) page_address(page) + offset;
+	dma_cache_wback_inv(addr, size);
+
+	return page_to_phys(page) + offset;
+}
+
+EXPORT_SYMBOL(dma_map_page);
+
+void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+	enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+
+	if (direction != DMA_TO_DEVICE) {
+		unsigned long addr;
+
+		addr = dma_address + PAGE_OFFSET;
+		dma_cache_wback_inv(addr, size);
+	}
+}
+
+EXPORT_SYMBOL(dma_unmap_page);
+
+void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+	enum dma_data_direction direction)
+{
+	unsigned long addr;
+	int i;
+
+	BUG_ON(direction == DMA_NONE);
+
+	if (direction == DMA_TO_DEVICE)
+		return;
+
+	for (i = 0; i < nhwentries; i++, sg++) {
+		addr = (unsigned long) page_address(sg->page);
+		if (!addr)
+			continue;
+		dma_cache_wback_inv(addr + sg->offset, sg->length);
+	}
+}
+
+EXPORT_SYMBOL(dma_unmap_sg);
+
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+	size_t size, enum dma_data_direction direction)
+{
+	unsigned long addr;
+ 
+	BUG_ON(direction == DMA_NONE);
+ 
+	addr = dma_handle + PAGE_OFFSET;
+	__dma_sync(addr, size, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_single_for_cpu);
+
+void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+	size_t size, enum dma_data_direction direction)
+{
+	unsigned long addr;
+
+	BUG_ON(direction == DMA_NONE);
+
+	addr = dma_handle + PAGE_OFFSET;
+	__dma_sync(addr, size, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_single_for_device);
+
+void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+	unsigned long offset, size_t size, enum dma_data_direction direction)
+{
+	unsigned long addr;
+
+	BUG_ON(direction == DMA_NONE);
+
+	addr = dma_handle + offset + PAGE_OFFSET;
+	__dma_sync(addr, size, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
+
+void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+	unsigned long offset, size_t size, enum dma_data_direction direction)
+{
+	unsigned long addr;
+
+	BUG_ON(direction == DMA_NONE);
+
+	addr = dma_handle + offset + PAGE_OFFSET;
+	__dma_sync(addr, size, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_single_range_for_device);
+
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+	enum dma_data_direction direction)
+{
+	int i;
+ 
+	BUG_ON(direction == DMA_NONE);
+ 
+	/* Make sure that gcc doesn't leave the empty loop body.  */
+	for (i = 0; i < nelems; i++, sg++)
+		__dma_sync((unsigned long)page_address(sg->page),
+		           sg->length, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+	enum dma_data_direction direction)
+{
+	int i;
+
+	BUG_ON(direction == DMA_NONE);
+
+	/* Make sure that gcc doesn't leave the empty loop body.  */
+	for (i = 0; i < nelems; i++, sg++)
+		__dma_sync((unsigned long)page_address(sg->page),
+		           sg->length, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_sg_for_device);
+
+int dma_mapping_error(dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+EXPORT_SYMBOL(dma_mapping_error);
+
+int dma_supported(struct device *dev, u64 mask)
+{
+	/*
+	 * we fall back to GFP_DMA when the mask isn't all 1s,
+	 * so we can't guarantee allocations that must be
+	 * within a tighter range than GFP_DMA..
+	 */
+	if (mask < 0x00ffffff)
+		return 0;
+
+	return 1;
+}
+
+EXPORT_SYMBOL(dma_supported);
+
+int dma_is_consistent(dma_addr_t dma_addr)
+{
+	return 1;
+}
+
+EXPORT_SYMBOL(dma_is_consistent);
+
+void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction direction)
+{
+	if (direction == DMA_NONE)
+		return;
+
+	dma_cache_wback_inv((unsigned long)vaddr, size);
+}
+
+EXPORT_SYMBOL(dma_cache_sync);
+
+/* The DAC routines are a PCIism.. */
+
+#ifdef CONFIG_PCI
+
+#include <linux/pci.h>
+
+dma64_addr_t pci_dac_page_to_dma(struct pci_dev *pdev,
+	struct page *page, unsigned long offset, int direction)
+{
+	return (dma64_addr_t)page_to_phys(page) + offset;
+}
+
+EXPORT_SYMBOL(pci_dac_page_to_dma);
+
+struct page *pci_dac_dma_to_page(struct pci_dev *pdev,
+	dma64_addr_t dma_addr)
+{
+	return mem_map + (dma_addr >> PAGE_SHIFT);
+}
+
+EXPORT_SYMBOL(pci_dac_dma_to_page);
+
+unsigned long pci_dac_dma_to_offset(struct pci_dev *pdev,
+	dma64_addr_t dma_addr)
+{
+	return dma_addr & ~PAGE_MASK;
+}
+
+EXPORT_SYMBOL(pci_dac_dma_to_offset);
+
+void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev,
+	dma64_addr_t dma_addr, size_t len, int direction)
+{
+	BUG_ON(direction == PCI_DMA_NONE);
+
+	dma_cache_wback_inv(dma_addr + PAGE_OFFSET, len);
+}
+
+EXPORT_SYMBOL(pci_dac_dma_sync_single_for_cpu);
+
+void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev,
+	dma64_addr_t dma_addr, size_t len, int direction)
+{
+	BUG_ON(direction == PCI_DMA_NONE);
+
+	dma_cache_wback_inv(dma_addr + PAGE_OFFSET, len);
+}
+
+EXPORT_SYMBOL(pci_dac_dma_sync_single_for_device);
+
+#endif /* CONFIG_PCI */
diff --git a/arch/mips/mm/extable.c b/arch/mips/mm/extable.c
new file mode 100644
index 00000000000..297fb9f390d
--- /dev/null
+++ b/arch/mips/mm/extable.c
@@ -0,0 +1,21 @@
+/*
+ * linux/arch/mips/mm/extable.c
+ */
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/branch.h>
+#include <asm/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_exception_tables(exception_epc(regs));
+	if (fixup) {
+		regs->cp0_epc = fixup->nextinsn;
+
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
new file mode 100644
index 00000000000..ec8077c74e9
--- /dev/null
+++ b/arch/mips/mm/fault.c
@@ -0,0 +1,236 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1995 - 2000 by Ralf Baechle
+ */
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/vt_kern.h>		/* For unblank_screen() */
+#include <linux/module.h>
+
+#include <asm/branch.h>
+#include <asm/mmu_context.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/ptrace.h>
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
+			      unsigned long address)
+{
+	struct vm_area_struct * vma = NULL;
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->mm;
+	const int field = sizeof(unsigned long) * 2;
+	siginfo_t info;
+
+#if 0
+	printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", smp_processor_id(),
+	       current->comm, current->pid, field, address, write,
+	       field, regs->cp0_epc);
+#endif
+
+	info.si_code = SEGV_MAPERR;
+
+	/*
+	 * We fault-in kernel-space virtual memory on-demand. The
+	 * 'reference' page table is init_mm.pgd.
+	 *
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
+	 */
+	if (unlikely(address >= VMALLOC_START))
+		goto vmalloc_fault;
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (in_atomic() || !mm)
+		goto bad_area_nosemaphore;
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, address))
+		goto bad_area;
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+	info.si_code = SEGV_ACCERR;
+
+	if (write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	} else {
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+
+survive:
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	switch (handle_mm_fault(mm, vma, address, write)) {
+	case VM_FAULT_MINOR:
+		tsk->min_flt++;
+		break;
+	case VM_FAULT_MAJOR:
+		tsk->maj_flt++;
+		break;
+	case VM_FAULT_SIGBUS:
+		goto do_sigbus;
+	case VM_FAULT_OOM:
+		goto out_of_memory;
+	default:
+		BUG();
+	}
+
+	up_read(&mm->mmap_sem);
+	return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+	up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+	/* User mode accesses just cause a SIGSEGV */
+	if (user_mode(regs)) {
+		tsk->thread.cp0_badvaddr = address;
+		tsk->thread.error_code = write;
+#if 0
+		printk("do_page_fault() #2: sending SIGSEGV to %s for "
+		       "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
+		       tsk->comm,
+		       write ? "write access to" : "read access from",
+		       field, address,
+		       field, (unsigned long) regs->cp0_epc,
+		       field, (unsigned long) regs->regs[31]);
+#endif
+		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		/* info.si_code has been set above */
+		info.si_addr = (void *) address;
+		force_sig_info(SIGSEGV, &info, tsk);
+		return;
+	}
+
+no_context:
+	/* Are we prepared to handle this kernel fault?  */
+	if (fixup_exception(regs)) {
+		current->thread.cp0_baduaddr = address;
+		return;
+	}
+
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have to
+	 * terminate things with extreme prejudice.
+	 */
+
+	bust_spinlocks(1);
+
+	printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at "
+	       "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n",
+	       smp_processor_id(), field, address, field, regs->cp0_epc,
+	       field,  regs->regs[31]);
+	die("Oops", regs);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	up_read(&mm->mmap_sem);
+	if (tsk->pid == 1) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	printk("VM: killing process %s\n", tsk->comm);
+	if (user_mode(regs))
+		do_exit(SIGKILL);
+	goto no_context;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (!user_mode(regs))
+		goto no_context;
+
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	tsk->thread.cp0_badvaddr = address;
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code = BUS_ADRERR;
+	info.si_addr = (void *) address;
+	force_sig_info(SIGBUS, &info, tsk);
+
+	return;
+
+vmalloc_fault:
+	{
+		/*
+		 * Synchronize this task's top level page-table
+		 * with the 'reference' page table.
+		 *
+		 * Do _not_ use "tsk" here. We might be inside
+		 * an interrupt in the middle of a task switch..
+		 */
+		int offset = __pgd_offset(address);
+		pgd_t *pgd, *pgd_k;
+		pmd_t *pmd, *pmd_k;
+		pte_t *pte_k;
+
+		pgd = (pgd_t *) pgd_current[smp_processor_id()] + offset;
+		pgd_k = init_mm.pgd + offset;
+
+		if (!pgd_present(*pgd_k))
+			goto no_context;
+		set_pgd(pgd, *pgd_k);
+
+		pmd = pmd_offset(pgd, address);
+		pmd_k = pmd_offset(pgd_k, address);
+		if (!pmd_present(*pmd_k))
+			goto no_context;
+		set_pmd(pmd, *pmd_k);
+
+		pte_k = pte_offset_kernel(pmd_k, address);
+		if (!pte_present(*pte_k))
+			goto no_context;
+		return;
+	}
+}
diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c
new file mode 100644
index 00000000000..dd5e2e31885
--- /dev/null
+++ b/arch/mips/mm/highmem.c
@@ -0,0 +1,103 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <asm/tlbflush.h>
+
+void *__kmap(struct page *page)
+{
+	void *addr;
+
+	might_sleep();
+	if (!PageHighMem(page))
+		return page_address(page);
+	addr = kmap_high(page);
+	flush_tlb_one((unsigned long)addr);
+
+	return addr;
+}
+
+void __kunmap(struct page *page)
+{
+	if (in_interrupt())
+		BUG();
+	if (!PageHighMem(page))
+		return;
+	kunmap_high(page);
+}
+
+/*
+ * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
+ * no global lock is needed and because the kmap code must perform a global TLB
+ * invalidation when the kmap pool wraps.
+ *
+ * However when holding an atomic kmap is is not legal to sleep, so atomic
+ * kmaps are appropriate for short, tight code paths only.
+ */
+
+void *__kmap_atomic(struct page *page, enum km_type type)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+
+	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+	inc_preempt_count();
+	if (!PageHighMem(page))
+		return page_address(page);
+
+	idx = type + KM_TYPE_NR*smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+	if (!pte_none(*(kmap_pte-idx)))
+		BUG();
+#endif
+	set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+	local_flush_tlb_one((unsigned long)vaddr);
+
+	return (void*) vaddr;
+}
+
+void __kunmap_atomic(void *kvaddr, enum km_type type)
+{
+#ifdef CONFIG_DEBUG_HIGHMEM
+	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+
+	if (vaddr < FIXADDR_START) { // FIXME
+		dec_preempt_count();
+		preempt_check_resched();
+		return;
+	}
+
+	if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
+		BUG();
+
+	/*
+	 * force other mappings to Oops if they'll try to access
+	 * this pte without first remap it
+	 */
+	pte_clear(&init_mm, vaddr, kmap_pte-idx);
+	local_flush_tlb_one(vaddr);
+#endif
+
+	dec_preempt_count();
+	preempt_check_resched();
+}
+
+struct page *__kmap_atomic_to_page(void *ptr)
+{
+	unsigned long idx, vaddr = (unsigned long)ptr;
+	pte_t *pte;
+
+	if (vaddr < FIXADDR_START)
+		return virt_to_page(ptr);
+
+	idx = virt_to_fix(vaddr);
+	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
+	return pte_page(*pte);
+}
+
+EXPORT_SYMBOL(__kmap);
+EXPORT_SYMBOL(__kunmap);
+EXPORT_SYMBOL(__kmap_atomic);
+EXPORT_SYMBOL(__kunmap_atomic);
+EXPORT_SYMBOL(__kmap_atomic_to_page);
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
new file mode 100644
index 00000000000..b027ce7efbc
--- /dev/null
+++ b/arch/mips/mm/init.c
@@ -0,0 +1,304 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1994 - 2000 Ralf Baechle
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
+ * Copyright (C) 2000 MIPS Technologies, Inc.  All rights reserved.
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/pagemap.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/swap.h>
+
+#include <asm/bootinfo.h>
+#include <asm/cachectl.h>
+#include <asm/cpu.h>
+#include <asm/dma.h>
+#include <asm/mmu_context.h>
+#include <asm/sections.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+unsigned long highstart_pfn, highend_pfn;
+
+/*
+ * We have up to 8 empty zeroed pages so we can map one of the right colour
+ * when needed.  This is necessary only on R4000 / R4400 SC and MC versions
+ * where we have to avoid VCED / VECI exceptions for good performance at
+ * any price.  Since page is never written to after the initialization we
+ * don't have to care about aliases on other CPUs.
+ */
+unsigned long empty_zero_page, zero_page_mask;
+
+/*
+ * Not static inline because used by IP27 special magic initialization code
+ */
+unsigned long setup_zero_pages(void)
+{
+	unsigned long order, size;
+	struct page *page;
+
+	if (cpu_has_vce)
+		order = 3;
+	else
+		order = 0;
+
+	empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!empty_zero_page)
+		panic("Oh boy, that early out of memory?");
+
+	page = virt_to_page(empty_zero_page);
+	while (page < virt_to_page(empty_zero_page + (PAGE_SIZE << order))) {
+		set_bit(PG_reserved, &page->flags);
+		set_page_count(page, 0);
+		page++;
+	}
+
+	size = PAGE_SIZE << order;
+	zero_page_mask = (size - 1) & PAGE_MASK;
+
+	return 1UL << order;
+}
+
+#ifdef CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
+#define kmap_get_fixmap_pte(vaddr)					\
+	pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
+
+static void __init kmap_init(void)
+{
+	unsigned long kmap_vstart;
+
+	/* cache the first kmap pte */
+	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+
+	kmap_prot = PAGE_KERNEL;
+}
+
+#ifdef CONFIG_MIPS64
+static void __init fixrange_init(unsigned long start, unsigned long end,
+	pgd_t *pgd_base)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	int i, j;
+	unsigned long vaddr;
+
+	vaddr = start;
+	i = __pgd_offset(vaddr);
+	j = __pmd_offset(vaddr);
+	pgd = pgd_base + i;
+
+	for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+		pmd = (pmd_t *)pgd;
+		for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
+			if (pmd_none(*pmd)) {
+				pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+				set_pmd(pmd, __pmd(pte));
+				if (pte != pte_offset_kernel(pmd, 0))
+					BUG();
+			}
+			vaddr += PMD_SIZE;
+		}
+		j = 0;
+	}
+}
+#endif /* CONFIG_MIPS64 */
+#endif /* CONFIG_HIGHMEM */
+
+#ifndef CONFIG_DISCONTIGMEM
+extern void pagetable_init(void);
+
+void __init paging_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+	unsigned long max_dma, high, low;
+
+	pagetable_init();
+
+#ifdef CONFIG_HIGHMEM
+	kmap_init();
+#endif
+
+	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+	low = max_low_pfn;
+	high = highend_pfn;
+
+#ifdef CONFIG_ISA
+	if (low < max_dma)
+		zones_size[ZONE_DMA] = low;
+	else {
+		zones_size[ZONE_DMA] = max_dma;
+		zones_size[ZONE_NORMAL] = low - max_dma;
+	}
+#else
+	zones_size[ZONE_DMA] = low;
+#endif
+#ifdef CONFIG_HIGHMEM
+	if (cpu_has_dc_aliases) {
+		printk(KERN_WARNING "This processor doesn't support highmem.");
+		if (high - low)
+			printk(" %ldk highmem ignored", high - low);
+		printk("\n");
+	} else
+		zones_size[ZONE_HIGHMEM] = high - low;
+#endif
+
+	free_area_init(zones_size);
+}
+
+#define PFN_UP(x)	(((x) + PAGE_SIZE - 1) >> PAGE_SHIFT)
+#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
+
+static inline int page_is_ram(unsigned long pagenr)
+{
+	int i;
+
+	for (i = 0; i < boot_mem_map.nr_map; i++) {
+		unsigned long addr, end;
+
+		if (boot_mem_map.map[i].type != BOOT_MEM_RAM)
+			/* not usable memory */
+			continue;
+
+		addr = PFN_UP(boot_mem_map.map[i].addr);
+		end = PFN_DOWN(boot_mem_map.map[i].addr +
+			       boot_mem_map.map[i].size);
+
+		if (pagenr >= addr && pagenr < end)
+			return 1;
+	}
+
+	return 0;
+}
+
+void __init mem_init(void)
+{
+	unsigned long codesize, reservedpages, datasize, initsize;
+	unsigned long tmp, ram;
+
+#ifdef CONFIG_HIGHMEM
+#ifdef CONFIG_DISCONTIGMEM
+#error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet"
+#endif
+	max_mapnr = num_physpages = highend_pfn;
+#else
+	max_mapnr = num_physpages = max_low_pfn;
+#endif
+	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
+
+	totalram_pages += free_all_bootmem();
+	totalram_pages -= setup_zero_pages();	/* Setup zeroed pages.  */
+
+	reservedpages = ram = 0;
+	for (tmp = 0; tmp < max_low_pfn; tmp++)
+		if (page_is_ram(tmp)) {
+			ram++;
+			if (PageReserved(mem_map+tmp))
+				reservedpages++;
+		}
+
+#ifdef CONFIG_HIGHMEM
+	for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
+		struct page *page = mem_map + tmp;
+
+		if (!page_is_ram(tmp)) {
+			SetPageReserved(page);
+			continue;
+		}
+		ClearPageReserved(page);
+#ifdef CONFIG_LIMITED_DMA
+		set_page_address(page, lowmem_page_address(page));
+#endif
+		set_bit(PG_highmem, &page->flags);
+		set_page_count(page, 1);
+		__free_page(page);
+		totalhigh_pages++;
+	}
+	totalram_pages += totalhigh_pages;
+#endif
+
+	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
+	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
+	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
+	       "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n",
+	       (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+	       ram << (PAGE_SHIFT-10),
+	       codesize >> 10,
+	       reservedpages << (PAGE_SHIFT-10),
+	       datasize >> 10,
+	       initsize >> 10,
+	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+}
+#endif /* !CONFIG_DISCONTIGMEM */
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+#ifdef CONFIG_MIPS64
+	/* Switch from KSEG0 to XKPHYS addresses */
+	start = (unsigned long)phys_to_virt(CPHYSADDR(start));
+	end = (unsigned long)phys_to_virt(CPHYSADDR(end));
+#endif
+	if (start < end)
+		printk(KERN_INFO "Freeing initrd memory: %ldk freed\n",
+		       (end - start) >> 10);
+
+	for (; start < end; start += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(start));
+		set_page_count(virt_to_page(start), 1);
+		free_page(start);
+		totalram_pages++;
+	}
+}
+#endif
+
+extern unsigned long prom_free_prom_memory(void);
+
+void free_initmem(void)
+{
+	unsigned long addr, page, freed;
+
+	freed = prom_free_prom_memory();
+
+	addr = (unsigned long) &__init_begin;
+	while (addr < (unsigned long) &__init_end) {
+#ifdef CONFIG_MIPS64
+		page = PAGE_OFFSET | CPHYSADDR(addr);
+#else
+		page = addr;
+#endif
+		ClearPageReserved(virt_to_page(page));
+		set_page_count(virt_to_page(page), 1);
+		free_page(page);
+		totalram_pages++;
+		freed += PAGE_SIZE;
+		addr += PAGE_SIZE;
+	}
+	printk(KERN_INFO "Freeing unused kernel memory: %ldk freed\n",
+	       freed >> 10);
+}
diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
new file mode 100644
index 00000000000..adf352273f6
--- /dev/null
+++ b/arch/mips/mm/ioremap.c
@@ -0,0 +1,202 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ * (C) Copyright 2001, 2002 Ralf Baechle
+ */
+#include <linux/module.h>
+#include <asm/addrspace.h>
+#include <asm/byteorder.h>
+
+#include <linux/vmalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+#include <asm/tlbflush.h>
+
+static inline void remap_area_pte(pte_t * pte, unsigned long address,
+	phys_t size, phys_t phys_addr, unsigned long flags)
+{
+	phys_t end;
+	unsigned long pfn;
+	pgprot_t pgprot = __pgprot(_PAGE_GLOBAL | _PAGE_PRESENT | __READABLE
+	                           | __WRITEABLE | flags);
+
+	address &= ~PMD_MASK;
+	end = address + size;
+	if (end > PMD_SIZE)
+		end = PMD_SIZE;
+	if (address >= end)
+		BUG();
+	pfn = phys_addr >> PAGE_SHIFT;
+	do {
+		if (!pte_none(*pte)) {
+			printk("remap_area_pte: page already exists\n");
+			BUG();
+		}
+		set_pte(pte, pfn_pte(pfn, pgprot));
+		address += PAGE_SIZE;
+		pfn++;
+		pte++;
+	} while (address && (address < end));
+}
+
+static inline int remap_area_pmd(pmd_t * pmd, unsigned long address,
+	phys_t size, phys_t phys_addr, unsigned long flags)
+{
+	phys_t end;
+
+	address &= ~PGDIR_MASK;
+	end = address + size;
+	if (end > PGDIR_SIZE)
+		end = PGDIR_SIZE;
+	phys_addr -= address;
+	if (address >= end)
+		BUG();
+	do {
+		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		if (!pte)
+			return -ENOMEM;
+		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
+		address = (address + PMD_SIZE) & PMD_MASK;
+		pmd++;
+	} while (address && (address < end));
+	return 0;
+}
+
+static int remap_area_pages(unsigned long address, phys_t phys_addr,
+	phys_t size, unsigned long flags)
+{
+	int error;
+	pgd_t * dir;
+	unsigned long end = address + size;
+
+	phys_addr -= address;
+	dir = pgd_offset(&init_mm, address);
+	flush_cache_all();
+	if (address >= end)
+		BUG();
+	spin_lock(&init_mm.page_table_lock);
+	do {
+		pmd_t *pmd;
+		pmd = pmd_alloc(&init_mm, dir, address);
+		error = -ENOMEM;
+		if (!pmd)
+			break;
+		if (remap_area_pmd(pmd, address, end - address,
+					 phys_addr + address, flags))
+			break;
+		error = 0;
+		address = (address + PGDIR_SIZE) & PGDIR_MASK;
+		dir++;
+	} while (address && (address < end));
+	spin_unlock(&init_mm.page_table_lock);
+	flush_tlb_all();
+	return error;
+}
+
+/*
+ * Allow physical addresses to be fixed up to help 36 bit peripherals.
+ */
+phys_t __attribute__ ((weak))
+fixup_bigphys_addr(phys_t phys_addr, phys_t size)
+{
+	return phys_addr;
+}
+
+/*
+ * Generic mapping function (not visible outside):
+ */
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+
+#define IS_LOW512(addr) (!((phys_t)(addr) & (phys_t) ~0x1fffffffULL))
+
+void * __ioremap(phys_t phys_addr, phys_t size, unsigned long flags)
+{
+	struct vm_struct * area;
+	unsigned long offset;
+	phys_t last_addr;
+	void * addr;
+
+	phys_addr = fixup_bigphys_addr(phys_addr, size);
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	/*
+	 * Map uncached objects in the low 512mb of address space using KSEG1,
+	 * otherwise map using page tables.
+	 */
+	if (IS_LOW512(phys_addr) && IS_LOW512(last_addr) &&
+	    flags == _CACHE_UNCACHED)
+		return (void *) KSEG1ADDR(phys_addr);
+
+	/*
+	 * Don't allow anybody to remap normal RAM that we're using..
+	 */
+	if (phys_addr < virt_to_phys(high_memory)) {
+		char *t_addr, *t_end;
+		struct page *page;
+
+		t_addr = __va(phys_addr);
+		t_end = t_addr + (size - 1);
+
+		for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
+			if(!PageReserved(page))
+				return NULL;
+	}
+
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr + 1) - phys_addr;
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_vm_area(size, VM_IOREMAP);
+	if (!area)
+		return NULL;
+	addr = area->addr;
+	if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
+		vunmap(addr);
+		return NULL;
+	}
+
+	return (void *) (offset + (char *)addr);
+}
+
+#define IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == KSEG1)
+
+void __iounmap(volatile void __iomem *addr)
+{
+	struct vm_struct *p;
+
+	if (IS_KSEG1(addr))
+		return;
+
+	p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
+	if (!p) {
+		printk(KERN_ERR "iounmap: bad address %p\n", addr);
+		return;
+	}
+
+        kfree(p);
+}
+
+EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(__iounmap);
diff --git a/arch/mips/mm/pg-r4k.c b/arch/mips/mm/pg-r4k.c
new file mode 100644
index 00000000000..9f8b1654157
--- /dev/null
+++ b/arch/mips/mm/pg-r4k.c
@@ -0,0 +1,489 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+
+#include <asm/cacheops.h>
+#include <asm/inst.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/prefetch.h>
+#include <asm/system.h>
+#include <asm/bootinfo.h>
+#include <asm/mipsregs.h>
+#include <asm/mmu_context.h>
+#include <asm/cpu.h>
+#include <asm/war.h>
+
+#define half_scache_line_size()		(cpu_scache_line_size() >> 1)
+
+/*
+ * Maximum sizes:
+ *
+ * R4000 128 bytes S-cache:		0x58 bytes
+ * R4600 v1.7:				0x5c bytes
+ * R4600 v2.0:				0x60 bytes
+ * With prefetching, 16 byte strides	0xa0 bytes
+ */
+
+static unsigned int clear_page_array[0x130 / 4];
+
+void clear_page(void * page) __attribute__((alias("clear_page_array")));
+
+EXPORT_SYMBOL(clear_page);
+
+/*
+ * Maximum sizes:
+ *
+ * R4000 128 bytes S-cache:		0x11c bytes
+ * R4600 v1.7:				0x080 bytes
+ * R4600 v2.0:				0x07c bytes
+ * With prefetching, 16 byte strides	0x0b8 bytes
+ */
+static unsigned int copy_page_array[0x148 / 4];
+
+void copy_page(void *to, void *from) __attribute__((alias("copy_page_array")));
+
+EXPORT_SYMBOL(copy_page);
+
+/*
+ * This is suboptimal for 32-bit kernels; we assume that R10000 is only used
+ * with 64-bit kernels.  The prefetch offsets have been experimentally tuned
+ * an Origin 200.
+ */
+static int pref_offset_clear __initdata = 512;
+static int pref_offset_copy  __initdata = 256;
+
+static unsigned int pref_src_mode __initdata;
+static unsigned int pref_dst_mode __initdata;
+
+static int load_offset __initdata;
+static int store_offset __initdata;
+
+static unsigned int __initdata *dest, *epc;
+
+static unsigned int instruction_pending;
+static union mips_instruction delayed_mi;
+
+static void __init emit_instruction(union mips_instruction mi)
+{
+	if (instruction_pending)
+		*epc++ = delayed_mi.word;
+
+	instruction_pending = 1;
+	delayed_mi = mi;
+}
+
+static inline void flush_delay_slot_or_nop(void)
+{
+	if (instruction_pending) {
+		*epc++ = delayed_mi.word;
+		instruction_pending = 0;
+		return;
+	}
+
+	*epc++ = 0;
+}
+
+static inline unsigned int *label(void)
+{
+	if (instruction_pending) {
+		*epc++ = delayed_mi.word;
+		instruction_pending = 0;
+	}
+
+	return epc;
+}
+
+static inline void build_insn_word(unsigned int word)
+{
+	union mips_instruction mi;
+
+	mi.word		 = word;
+
+	emit_instruction(mi);
+}
+
+static inline void build_nop(void)
+{
+	build_insn_word(0);			/* nop */
+}
+
+static inline void build_src_pref(int advance)
+{
+	if (!(load_offset & (cpu_dcache_line_size() - 1))) {
+		union mips_instruction mi;
+
+		mi.i_format.opcode     = pref_op;
+		mi.i_format.rs         = 5;		/* $a1 */
+		mi.i_format.rt         = pref_src_mode;
+		mi.i_format.simmediate = load_offset + advance;
+
+		emit_instruction(mi);
+	}
+}
+
+static inline void __build_load_reg(int reg)
+{
+	union mips_instruction mi;
+	unsigned int width;
+
+	if (cpu_has_64bit_gp_regs) {
+		mi.i_format.opcode     = ld_op;
+		width = 8;
+	} else {
+		mi.i_format.opcode     = lw_op;
+		width = 4;
+	}
+	mi.i_format.rs         = 5;		/* $a1 */
+	mi.i_format.rt         = reg;		/* $reg */
+	mi.i_format.simmediate = load_offset;
+
+	load_offset += width;
+	emit_instruction(mi);
+}
+
+static inline void build_load_reg(int reg)
+{
+	if (cpu_has_prefetch)
+		build_src_pref(pref_offset_copy);
+
+	__build_load_reg(reg);
+}
+
+static inline void build_dst_pref(int advance)
+{
+	if (!(store_offset & (cpu_dcache_line_size() - 1))) {
+		union mips_instruction mi;
+
+		mi.i_format.opcode     = pref_op;
+		mi.i_format.rs         = 4;		/* $a0 */
+		mi.i_format.rt         = pref_dst_mode;
+		mi.i_format.simmediate = store_offset + advance;
+
+		emit_instruction(mi);
+	}
+}
+
+static inline void build_cdex_s(void)
+{
+	union mips_instruction mi;
+
+	if ((store_offset & (cpu_scache_line_size() - 1)))
+		return;
+
+	mi.c_format.opcode     = cache_op;
+	mi.c_format.rs         = 4;		/* $a0 */
+	mi.c_format.c_op       = 3;		/* Create Dirty Exclusive */
+	mi.c_format.cache      = 3;		/* Secondary Data Cache */
+	mi.c_format.simmediate = store_offset;
+
+	emit_instruction(mi);
+}
+
+static inline void build_cdex_p(void)
+{
+	union mips_instruction mi;
+
+	if (store_offset & (cpu_dcache_line_size() - 1))
+		return;
+
+	if (R4600_V1_HIT_CACHEOP_WAR && ((read_c0_prid() & 0xfff0) == 0x2010)) {
+		build_nop();
+		build_nop();
+		build_nop();
+		build_nop();
+	}
+
+	if (R4600_V2_HIT_CACHEOP_WAR && ((read_c0_prid() & 0xfff0) == 0x2020))
+		build_insn_word(0x8c200000);	/* lw      $zero, ($at) */
+
+	mi.c_format.opcode     = cache_op;
+	mi.c_format.rs         = 4;		/* $a0 */
+	mi.c_format.c_op       = 3;		/* Create Dirty Exclusive */
+	mi.c_format.cache      = 1;		/* Data Cache */
+	mi.c_format.simmediate = store_offset;
+
+	emit_instruction(mi);
+}
+
+static void __init __build_store_reg(int reg)
+{
+	union mips_instruction mi;
+	unsigned int width;
+
+	if (cpu_has_64bit_gp_regs ||
+	    (cpu_has_64bit_zero_reg && reg == 0)) {
+		mi.i_format.opcode     = sd_op;
+		width = 8;
+	} else {
+		mi.i_format.opcode     = sw_op;
+		width = 4;
+	}
+	mi.i_format.rs         = 4;		/* $a0 */
+	mi.i_format.rt         = reg;		/* $reg */
+	mi.i_format.simmediate = store_offset;
+
+	store_offset += width;
+	emit_instruction(mi);
+}
+
+static inline void build_store_reg(int reg)
+{
+	if (cpu_has_prefetch)
+		if (reg)
+			build_dst_pref(pref_offset_copy);
+		else
+			build_dst_pref(pref_offset_clear);
+	else if (cpu_has_cache_cdex_s)
+		build_cdex_s();
+	else if (cpu_has_cache_cdex_p)
+		build_cdex_p();
+
+	__build_store_reg(reg);
+}
+
+static inline void build_addiu_a2_a0(unsigned long offset)
+{
+	union mips_instruction mi;
+
+	BUG_ON(offset > 0x7fff);
+
+	mi.i_format.opcode     = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op;
+	mi.i_format.rs         = 4;		/* $a0 */
+	mi.i_format.rt         = 6;		/* $a2 */
+	mi.i_format.simmediate = offset;
+
+	emit_instruction(mi);
+}
+
+static inline void build_addiu_a1(unsigned long offset)
+{
+	union mips_instruction mi;
+
+	BUG_ON(offset > 0x7fff);
+
+	mi.i_format.opcode     = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op;
+	mi.i_format.rs         = 5;		/* $a1 */
+	mi.i_format.rt         = 5;		/* $a1 */
+	mi.i_format.simmediate = offset;
+
+	load_offset -= offset;
+
+	emit_instruction(mi);
+}
+
+static inline void build_addiu_a0(unsigned long offset)
+{
+	union mips_instruction mi;
+
+	BUG_ON(offset > 0x7fff);
+
+	mi.i_format.opcode     = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op;
+	mi.i_format.rs         = 4;		/* $a0 */
+	mi.i_format.rt         = 4;		/* $a0 */
+	mi.i_format.simmediate = offset;
+
+	store_offset -= offset;
+
+	emit_instruction(mi);
+}
+
+static inline void build_bne(unsigned int *dest)
+{
+	union mips_instruction mi;
+
+	mi.i_format.opcode = bne_op;
+	mi.i_format.rs     = 6;			/* $a2 */
+	mi.i_format.rt     = 4;			/* $a0 */
+	mi.i_format.simmediate = dest - epc - 1;
+
+	*epc++ = mi.word;
+	flush_delay_slot_or_nop();
+}
+
+static inline void build_jr_ra(void)
+{
+	union mips_instruction mi;
+
+	mi.r_format.opcode = spec_op;
+	mi.r_format.rs     = 31;
+	mi.r_format.rt     = 0;
+	mi.r_format.rd     = 0;
+	mi.r_format.re     = 0;
+	mi.r_format.func   = jr_op;
+
+	*epc++ = mi.word;
+	flush_delay_slot_or_nop();
+}
+
+void __init build_clear_page(void)
+{
+	unsigned int loop_start;
+
+	epc = (unsigned int *) &clear_page_array;
+	instruction_pending = 0;
+	store_offset = 0;
+
+	if (cpu_has_prefetch) {
+		switch (current_cpu_data.cputype) {
+		case CPU_RM9000:
+			/*
+			 * As a workaround for erratum G105 which make the
+			 * PrepareForStore hint unusable we fall back to
+			 * StoreRetained on the RM9000.  Once it is known which
+			 * versions of the RM9000 we'll be able to condition-
+			 * alize this.
+			 */
+
+		case CPU_R10000:
+		case CPU_R12000:
+			pref_src_mode = Pref_LoadStreamed;
+			pref_dst_mode = Pref_StoreStreamed;
+			break;
+
+		default:
+			pref_src_mode = Pref_LoadStreamed;
+			pref_dst_mode = Pref_PrepareForStore;
+			break;
+		}
+	}
+
+	build_addiu_a2_a0(PAGE_SIZE - (cpu_has_prefetch ? pref_offset_clear : 0));
+
+	if (R4600_V2_HIT_CACHEOP_WAR && ((read_c0_prid() & 0xfff0) == 0x2020))
+		build_insn_word(0x3c01a000);	/* lui     $at, 0xa000  */
+
+dest = label();
+	do {
+		build_store_reg(0);
+		build_store_reg(0);
+		build_store_reg(0);
+		build_store_reg(0);
+	} while (store_offset < half_scache_line_size());
+	build_addiu_a0(2 * store_offset);
+	loop_start = store_offset;
+	do {
+		build_store_reg(0);
+		build_store_reg(0);
+		build_store_reg(0);
+		build_store_reg(0);
+	} while ((store_offset - loop_start) < half_scache_line_size());
+	build_bne(dest);
+
+	if (cpu_has_prefetch && pref_offset_clear) {
+		build_addiu_a2_a0(pref_offset_clear);
+	dest = label();
+		loop_start = store_offset;
+		do {
+			__build_store_reg(0);
+			__build_store_reg(0);
+			__build_store_reg(0);
+			__build_store_reg(0);
+		} while ((store_offset - loop_start) < half_scache_line_size());
+		build_addiu_a0(2 * store_offset);
+		loop_start = store_offset;
+		do {
+			__build_store_reg(0);
+			__build_store_reg(0);
+			__build_store_reg(0);
+			__build_store_reg(0);
+		} while ((store_offset - loop_start) < half_scache_line_size());
+		build_bne(dest);
+	}
+
+	build_jr_ra();
+
+	flush_icache_range((unsigned long)&clear_page_array,
+	                   (unsigned long) epc);
+
+	BUG_ON(epc > clear_page_array + ARRAY_SIZE(clear_page_array));
+}
+
+void __init build_copy_page(void)
+{
+	unsigned int loop_start;
+
+	epc = (unsigned int *) &copy_page_array;
+	store_offset = load_offset = 0;
+	instruction_pending = 0;
+
+	build_addiu_a2_a0(PAGE_SIZE - (cpu_has_prefetch ? pref_offset_copy : 0));
+
+	if (R4600_V2_HIT_CACHEOP_WAR && ((read_c0_prid() & 0xfff0) == 0x2020))
+		build_insn_word(0x3c01a000);	/* lui     $at, 0xa000  */
+
+dest = label();
+	loop_start = store_offset;
+	do {
+		build_load_reg( 8);
+		build_load_reg( 9);
+		build_load_reg(10);
+		build_load_reg(11);
+		build_store_reg( 8);
+		build_store_reg( 9);
+		build_store_reg(10);
+		build_store_reg(11);
+	} while ((store_offset - loop_start) < half_scache_line_size());
+	build_addiu_a0(2 * store_offset);
+	build_addiu_a1(2 * load_offset);
+	loop_start = store_offset;
+	do {
+		build_load_reg( 8);
+		build_load_reg( 9);
+		build_load_reg(10);
+		build_load_reg(11);
+		build_store_reg( 8);
+		build_store_reg( 9);
+		build_store_reg(10);
+		build_store_reg(11);
+	} while ((store_offset - loop_start) < half_scache_line_size());
+	build_bne(dest);
+
+	if (cpu_has_prefetch && pref_offset_copy) {
+		build_addiu_a2_a0(pref_offset_copy);
+	dest = label();
+		loop_start = store_offset;
+		do {
+			__build_load_reg( 8);
+			__build_load_reg( 9);
+			__build_load_reg(10);
+			__build_load_reg(11);
+			__build_store_reg( 8);
+			__build_store_reg( 9);
+			__build_store_reg(10);
+			__build_store_reg(11);
+		} while ((store_offset - loop_start) < half_scache_line_size());
+		build_addiu_a0(2 * store_offset);
+		build_addiu_a1(2 * load_offset);
+		loop_start = store_offset;
+		do {
+			__build_load_reg( 8);
+			__build_load_reg( 9);
+			__build_load_reg(10);
+			__build_load_reg(11);
+			__build_store_reg( 8);
+			__build_store_reg( 9);
+			__build_store_reg(10);
+			__build_store_reg(11);
+		} while ((store_offset - loop_start) < half_scache_line_size());
+		build_bne(dest);
+	}
+
+	build_jr_ra();
+
+	flush_icache_range((unsigned long)&copy_page_array,
+	                   (unsigned long) epc);
+
+	BUG_ON(epc > copy_page_array + ARRAY_SIZE(copy_page_array));
+}
diff --git a/arch/mips/mm/pg-sb1.c b/arch/mips/mm/pg-sb1.c
new file mode 100644
index 00000000000..59d131b5e53
--- /dev/null
+++ b/arch/mips/mm/pg-sb1.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
+ * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org)
+ * Copyright (C) 2000 SiByte, Inc.
+ * Copyright (C) 2005 Thiemo Seufer
+ *
+ * Written by Justin Carlson of SiByte, Inc.
+ *         and Kip Walker of Broadcom Corp.
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+
+#include <asm/io.h>
+#include <asm/sibyte/sb1250.h>
+#include <asm/sibyte/sb1250_regs.h>
+#include <asm/sibyte/sb1250_dma.h>
+
+#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS
+#define SB1_PREF_LOAD_STREAMED_HINT "0"
+#define SB1_PREF_STORE_STREAMED_HINT "1"
+#else
+#define SB1_PREF_LOAD_STREAMED_HINT "4"
+#define SB1_PREF_STORE_STREAMED_HINT "5"
+#endif
+
+static inline void clear_page_cpu(void *page)
+{
+	unsigned char *addr = (unsigned char *) page;
+	unsigned char *end = addr + PAGE_SIZE;
+
+	/*
+	 * JDCXXX - This should be bottlenecked by the write buffer, but these
+	 * things tend to be mildly unpredictable...should check this on the
+	 * performance model
+	 *
+	 * We prefetch 4 lines ahead.  We're also "cheating" slightly here...
+	 * since we know we're on an SB1, we force the assembler to take
+	 * 64-bit operands to speed things up
+	 */
+	__asm__ __volatile__(
+	"	.set	push		\n"
+	"	.set	mips4		\n"
+	"	.set	noreorder	\n"
+#ifdef CONFIG_CPU_HAS_PREFETCH
+	"	daddiu	%0, %0, 128	\n"
+	"	pref	" SB1_PREF_STORE_STREAMED_HINT ", -128(%0)  \n"  /* Prefetch the first 4 lines */
+	"	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -96(%0)  \n"
+	"	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -64(%0)  \n"
+	"	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -32(%0)  \n"
+	"1:	sd	$0, -128(%0)	\n"  /* Throw out a cacheline of 0's */
+	"	sd	$0, -120(%0)	\n"
+	"	sd	$0, -112(%0)	\n"
+	"	sd	$0, -104(%0)	\n"
+	"	daddiu	%0, %0, 32	\n"
+	"	bnel	%0, %1, 1b	\n"
+	"	 pref	" SB1_PREF_STORE_STREAMED_HINT ",  -32(%0)  \n"
+	"	daddiu	%0, %0, -128	\n"
+#endif
+	"	sd	$0, 0(%0)	\n"  /* Throw out a cacheline of 0's */
+	"1:	sd	$0, 8(%0)	\n"
+	"	sd	$0, 16(%0)	\n"
+	"	sd	$0, 24(%0)	\n"
+	"	daddiu	%0, %0, 32	\n"
+	"	bnel	%0, %1, 1b	\n"
+	"	 sd	$0, 0(%0)	\n"
+	"	.set	pop		\n"
+	: "+r" (addr)
+	: "r" (end)
+	: "memory");
+}
+
+static inline void copy_page_cpu(void *to, void *from)
+{
+	unsigned char *src = (unsigned char *)from;
+	unsigned char *dst = (unsigned char *)to;
+	unsigned char *end = src + PAGE_SIZE;
+
+	/*
+	 * The pref's used here are using "streaming" hints, which cause the
+	 * copied data to be kicked out of the cache sooner.  A page copy often
+	 * ends up copying a lot more data than is commonly used, so this seems
+	 * to make sense in terms of reducing cache pollution, but I've no real
+	 * performance data to back this up
+	 */
+	__asm__ __volatile__(
+	"	.set	push		\n"
+	"	.set	mips4		\n"
+	"	.set	noreorder	\n"
+#ifdef CONFIG_CPU_HAS_PREFETCH
+	"	daddiu	%0, %0, 128	\n"
+	"	daddiu	%1, %1, 128	\n"
+	"	pref	" SB1_PREF_LOAD_STREAMED_HINT  ", -128(%0)\n"  /* Prefetch the first 4 lines */
+	"	pref	" SB1_PREF_STORE_STREAMED_HINT ", -128(%1)\n"
+	"	pref	" SB1_PREF_LOAD_STREAMED_HINT  ",  -96(%0)\n"
+	"	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -96(%1)\n"
+	"	pref	" SB1_PREF_LOAD_STREAMED_HINT  ",  -64(%0)\n"
+	"	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -64(%1)\n"
+	"	pref	" SB1_PREF_LOAD_STREAMED_HINT  ",  -32(%0)\n"
+	"1:	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -32(%1)\n"
+# ifdef CONFIG_MIPS64
+	"	ld	$8, -128(%0)	\n"  /* Block copy a cacheline */
+	"	ld	$9, -120(%0)	\n"
+	"	ld	$10, -112(%0)	\n"
+	"	ld	$11, -104(%0)	\n"
+	"	sd	$8, -128(%1)	\n"
+	"	sd	$9, -120(%1)	\n"
+	"	sd	$10, -112(%1)	\n"
+	"	sd	$11, -104(%1)	\n"
+# else
+	"	lw	$2, -128(%0)	\n"  /* Block copy a cacheline */
+	"	lw	$3, -124(%0)	\n"
+	"	lw	$6, -120(%0)	\n"
+	"	lw	$7, -116(%0)	\n"
+	"	lw	$8, -112(%0)	\n"
+	"	lw	$9, -108(%0)	\n"
+	"	lw	$10, -104(%0)	\n"
+	"	lw	$11, -100(%0)	\n"
+	"	sw	$2, -128(%1)	\n"
+	"	sw	$3, -124(%1)	\n"
+	"	sw	$6, -120(%1)	\n"
+	"	sw	$7, -116(%1)	\n"
+	"	sw	$8, -112(%1)	\n"
+	"	sw	$9, -108(%1)	\n"
+	"	sw	$10, -104(%1)	\n"
+	"	sw	$11, -100(%1)	\n"
+# endif
+	"	daddiu	%0, %0, 32	\n"
+	"	daddiu	%1, %1, 32	\n"
+	"	bnel	%0, %2, 1b	\n"
+	"	 pref	" SB1_PREF_LOAD_STREAMED_HINT  ",  -32(%0)\n"
+	"	daddiu	%0, %0, -128	\n"
+	"	daddiu	%1, %1, -128	\n"
+#endif
+#ifdef CONFIG_MIPS64
+	"	ld	$8, 0(%0)	\n"  /* Block copy a cacheline */
+	"1:	ld	$9, 8(%0)	\n"
+	"	ld	$10, 16(%0)	\n"
+	"	ld	$11, 24(%0)	\n"
+	"	sd	$8, 0(%1)	\n"
+	"	sd	$9, 8(%1)	\n"
+	"	sd	$10, 16(%1)	\n"
+	"	sd	$11, 24(%1)	\n"
+#else
+	"	lw	$2, 0(%0)	\n"  /* Block copy a cacheline */
+	"1:	lw	$3, 4(%0)	\n"
+	"	lw	$6, 8(%0)	\n"
+	"	lw	$7, 12(%0)	\n"
+	"	lw	$8, 16(%0)	\n"
+	"	lw	$9, 20(%0)	\n"
+	"	lw	$10, 24(%0)	\n"
+	"	lw	$11, 28(%0)	\n"
+	"	sw	$2, 0(%1)	\n"
+	"	sw	$3, 4(%1)	\n"
+	"	sw	$6, 8(%1)	\n"
+	"	sw	$7, 12(%1)	\n"
+	"	sw	$8, 16(%1)	\n"
+	"	sw	$9, 20(%1)	\n"
+	"	sw	$10, 24(%1)	\n"
+	"	sw	$11, 28(%1)	\n"
+#endif
+	"	daddiu	%0, %0, 32	\n"
+	"	daddiu	%1, %1, 32	\n"
+	"	bnel	%0, %2, 1b	\n"
+#ifdef CONFIG_MIPS64
+	"	 ld	$8, 0(%0)	\n"
+#else
+	"	 lw	$2, 0(%0)	\n"
+#endif
+	"	.set	pop		\n"
+	: "+r" (src), "+r" (dst)
+	: "r" (end)
+#ifdef CONFIG_MIPS64
+	: "$8","$9","$10","$11","memory");
+#else
+	: "$2","$3","$6","$7","$8","$9","$10","$11","memory");
+#endif
+}
+
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+
+/*
+ * Pad descriptors to cacheline, since each is exclusively owned by a
+ * particular CPU. 
+ */
+typedef struct dmadscr_s {
+	u64 dscr_a;
+	u64 dscr_b;
+	u64 pad_a;
+	u64 pad_b;
+} dmadscr_t;
+
+static dmadscr_t page_descr[NR_CPUS] __attribute__((aligned(SMP_CACHE_BYTES)));
+
+void sb1_dma_init(void)
+{
+	int cpu = smp_processor_id();
+	u64 base_val = CPHYSADDR(&page_descr[cpu]) | V_DM_DSCR_BASE_RINGSZ(1);
+
+	bus_writeq(base_val,
+		   (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
+	bus_writeq(base_val | M_DM_DSCR_BASE_RESET,
+		   (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
+	bus_writeq(base_val | M_DM_DSCR_BASE_ENABL,
+		   (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
+}
+
+void clear_page(void *page)
+{
+	int cpu = smp_processor_id();
+
+	/* if the page is above Kseg0, use old way */
+	if ((long)KSEGX(page) != (long)CKSEG0)
+		return clear_page_cpu(page);
+
+	page_descr[cpu].dscr_a = CPHYSADDR(page) | M_DM_DSCRA_ZERO_MEM | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
+	page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
+	bus_writeq(1, (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
+
+	/*
+	 * Don't really want to do it this way, but there's no
+	 * reliable way to delay completion detection.
+	 */
+	while (!(bus_readq((void *)(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) &
+			   M_DM_DSCR_BASE_INTERRUPT))))
+		;
+	bus_readq((void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
+}
+
+void copy_page(void *to, void *from)
+{
+	unsigned long from_phys = CPHYSADDR(from);
+	unsigned long to_phys = CPHYSADDR(to);
+	int cpu = smp_processor_id();
+
+	/* if either page is above Kseg0, use old way */
+	if ((long)KSEGX(to) != (long)CKSEG0
+	    || (long)KSEGX(from) != (long)CKSEG0)
+		return copy_page_cpu(to, from);
+
+	page_descr[cpu].dscr_a = CPHYSADDR(to_phys) | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
+	page_descr[cpu].dscr_b = CPHYSADDR(from_phys) | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
+	bus_writeq(1, (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
+
+	/*
+	 * Don't really want to do it this way, but there's no
+	 * reliable way to delay completion detection.
+	 */
+	while (!(bus_readq((void *)(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) &
+				    M_DM_DSCR_BASE_INTERRUPT))))
+		;
+	bus_readq((void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
+}
+
+#else /* !CONFIG_SIBYTE_DMA_PAGEOPS */
+
+void clear_page(void *page)
+{
+	return clear_page_cpu(page);
+}
+
+void copy_page(void *to, void *from)
+{
+	return copy_page_cpu(to, from);
+}
+
+#endif /* !CONFIG_SIBYTE_DMA_PAGEOPS */
+
+EXPORT_SYMBOL(clear_page);
+EXPORT_SYMBOL(copy_page);
diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
new file mode 100644
index 00000000000..4f07f81e850
--- /dev/null
+++ b/arch/mips/mm/pgtable-32.c
@@ -0,0 +1,97 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003 by Ralf Baechle
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <asm/pgtable.h>
+
+void pgd_init(unsigned long page)
+{
+	unsigned long *p = (unsigned long *) page;
+	int i;
+
+	for (i = 0; i < USER_PTRS_PER_PGD; i+=8) {
+		p[i + 0] = (unsigned long) invalid_pte_table;
+		p[i + 1] = (unsigned long) invalid_pte_table;
+		p[i + 2] = (unsigned long) invalid_pte_table;
+		p[i + 3] = (unsigned long) invalid_pte_table;
+		p[i + 4] = (unsigned long) invalid_pte_table;
+		p[i + 5] = (unsigned long) invalid_pte_table;
+		p[i + 6] = (unsigned long) invalid_pte_table;
+		p[i + 7] = (unsigned long) invalid_pte_table;
+	}
+}
+
+#ifdef CONFIG_HIGHMEM
+static void __init fixrange_init (unsigned long start, unsigned long end,
+	pgd_t *pgd_base)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	int i, j;
+	unsigned long vaddr;
+
+	vaddr = start;
+	i = __pgd_offset(vaddr);
+	j = __pmd_offset(vaddr);
+	pgd = pgd_base + i;
+
+	for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+		pmd = (pmd_t *)pgd;
+		for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
+			if (pmd_none(*pmd)) {
+				pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+				set_pmd(pmd, __pmd((unsigned long)pte));
+				if (pte != pte_offset_kernel(pmd, 0))
+					BUG();
+			}
+			vaddr += PMD_SIZE;
+		}
+		j = 0;
+	}
+}
+#endif
+
+void __init pagetable_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+	unsigned long vaddr;
+	pgd_t *pgd, *pgd_base;
+	pmd_t *pmd;
+	pte_t *pte;
+#endif
+
+	/* Initialize the entire pgd.  */
+	pgd_init((unsigned long)swapper_pg_dir);
+	pgd_init((unsigned long)swapper_pg_dir
+		 + sizeof(pgd_t) * USER_PTRS_PER_PGD);
+
+#ifdef CONFIG_HIGHMEM
+	pgd_base = swapper_pg_dir;
+
+	/*
+	 * Fixed mappings:
+	 */
+	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+	fixrange_init(vaddr, 0, pgd_base);
+
+	/*
+	 * Permanent kmaps:
+	 */
+	vaddr = PKMAP_BASE;
+	fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+
+	pgd = swapper_pg_dir + __pgd_offset(vaddr);
+	pmd = pmd_offset(pgd, vaddr);
+	pte = pte_offset_kernel(pmd, vaddr);
+	pkmap_page_table = pte;
+#endif
+}
diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
new file mode 100644
index 00000000000..44b5e97fff6
--- /dev/null
+++ b/arch/mips/mm/pgtable-64.c
@@ -0,0 +1,58 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999, 2000 by Silicon Graphics
+ * Copyright (C) 2003 by Ralf Baechle
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+
+void pgd_init(unsigned long page)
+{
+	unsigned long *p, *end;
+
+ 	p = (unsigned long *) page;
+	end = p + PTRS_PER_PGD;
+
+	while (p < end) {
+		p[0] = (unsigned long) invalid_pmd_table;
+		p[1] = (unsigned long) invalid_pmd_table;
+		p[2] = (unsigned long) invalid_pmd_table;
+		p[3] = (unsigned long) invalid_pmd_table;
+		p[4] = (unsigned long) invalid_pmd_table;
+		p[5] = (unsigned long) invalid_pmd_table;
+		p[6] = (unsigned long) invalid_pmd_table;
+		p[7] = (unsigned long) invalid_pmd_table;
+		p += 8;
+	}
+}
+
+void pmd_init(unsigned long addr, unsigned long pagetable)
+{
+	unsigned long *p, *end;
+
+ 	p = (unsigned long *) addr;
+	end = p + PTRS_PER_PMD;
+
+	while (p < end) {
+		p[0] = (unsigned long)pagetable;
+		p[1] = (unsigned long)pagetable;
+		p[2] = (unsigned long)pagetable;
+		p[3] = (unsigned long)pagetable;
+		p[4] = (unsigned long)pagetable;
+		p[5] = (unsigned long)pagetable;
+		p[6] = (unsigned long)pagetable;
+		p[7] = (unsigned long)pagetable;
+		p += 8;
+	}
+}
+
+void __init pagetable_init(void)
+{
+	/* Initialize the entire pgd.  */
+	pgd_init((unsigned long)swapper_pg_dir);
+	pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table);
+}
diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c
new file mode 100644
index 00000000000..3b88fdeef32
--- /dev/null
+++ b/arch/mips/mm/pgtable.c
@@ -0,0 +1,36 @@
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+
+void show_mem(void)
+{
+#ifndef CONFIG_DISCONTIGMEM  /* XXX(hch): later.. */
+	int pfn, total = 0, reserved = 0;
+	int shared = 0, cached = 0;
+	int highmem = 0;
+	struct page *page;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	pfn = max_mapnr;
+	while (pfn-- > 0) {
+		page = pfn_to_page(pfn);
+		total++;
+		if (PageHighMem(page))
+			highmem++;
+		if (PageReserved(page))
+			reserved++;
+		else if (PageSwapCache(page))
+			cached++;
+		else if (page_count(page))
+			shared += page_count(page) - 1;
+	}
+	printk("%d pages of RAM\n", total);
+	printk("%d pages of HIGHMEM\n",highmem);
+	printk("%d reserved pages\n",reserved);
+	printk("%d pages shared\n",shared);
+	printk("%d pages swap cached\n",cached);
+#endif
+}
diff --git a/arch/mips/mm/sc-ip22.c b/arch/mips/mm/sc-ip22.c
new file mode 100644
index 00000000000..d236cf8b737
--- /dev/null
+++ b/arch/mips/mm/sc-ip22.c
@@ -0,0 +1,177 @@
+/*
+ * sc-ip22.c: Indy cache management functions.
+ *
+ * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org),
+ * derived from r4xx0.c by David S. Miller (dm@engr.sgi.com).
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/bcache.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/bootinfo.h>
+#include <asm/sgi/ip22.h>
+#include <asm/sgi/mc.h>
+
+/* Secondary cache size in bytes, if present.  */
+static unsigned long scache_size;
+
+#undef DEBUG_CACHE
+
+#define SC_SIZE 0x00080000
+#define SC_LINE 32
+#define CI_MASK (SC_SIZE - SC_LINE)
+#define SC_INDEX(n) ((n) & CI_MASK)
+
+static inline void indy_sc_wipe(unsigned long first, unsigned long last)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+	".set\tpush\t\t\t# indy_sc_wipe\n\t"
+	".set\tnoreorder\n\t"
+	".set\tmips3\n\t"
+	".set\tnoat\n\t"
+	"mfc0\t%2, $12\n\t"
+	"li\t$1, 0x80\t\t\t# Go 64 bit\n\t"
+	"mtc0\t$1, $12\n\t"
+
+	"dli\t$1, 0x9000000080000000\n\t"
+	"or\t%0, $1\t\t\t# first line to flush\n\t"
+	"or\t%1, $1\t\t\t# last line to flush\n\t"
+	".set\tat\n\t"
+
+	"1:\tsw\t$0, 0(%0)\n\t"
+	"bne\t%0, %1, 1b\n\t"
+	" daddu\t%0, 32\n\t"
+
+	"mtc0\t%2, $12\t\t\t# Back to 32 bit\n\t"
+	"nop; nop; nop; nop;\n\t"
+	".set\tpop"
+	: "=r" (first), "=r" (last), "=&r" (tmp)
+	: "0" (first), "1" (last));
+}
+
+static void indy_sc_wback_invalidate(unsigned long addr, unsigned long size)
+{
+	unsigned long first_line, last_line;
+	unsigned long flags;
+
+#ifdef DEBUG_CACHE
+	printk("indy_sc_wback_invalidate[%08lx,%08lx]", addr, size);
+#endif
+
+	/* Catch bad driver code */
+	BUG_ON(size == 0);
+
+	/* Which lines to flush?  */
+	first_line = SC_INDEX(addr);
+	last_line = SC_INDEX(addr + size - 1);
+
+	local_irq_save(flags);
+	if (first_line <= last_line) {
+		indy_sc_wipe(first_line, last_line);
+		goto out;
+	}
+
+	indy_sc_wipe(first_line, SC_SIZE - SC_LINE);
+	indy_sc_wipe(0, last_line);
+out:
+	local_irq_restore(flags);
+}
+
+static void indy_sc_enable(void)
+{
+	unsigned long addr, tmp1, tmp2;
+
+	/* This is really cool... */
+#ifdef DEBUG_CACHE
+	printk("Enabling R4600 SCACHE\n");
+#endif
+	__asm__ __volatile__(
+	".set\tpush\n\t"
+	".set\tnoreorder\n\t"
+	".set\tmips3\n\t"
+	"mfc0\t%2, $12\n\t"
+	"nop; nop; nop; nop;\n\t"
+	"li\t%1, 0x80\n\t"
+	"mtc0\t%1, $12\n\t"
+	"nop; nop; nop; nop;\n\t"
+	"li\t%0, 0x1\n\t"
+	"dsll\t%0, 31\n\t"
+	"lui\t%1, 0x9000\n\t"
+	"dsll32\t%1, 0\n\t"
+	"or\t%0, %1, %0\n\t"
+	"sb\t$0, 0(%0)\n\t"
+	"mtc0\t$0, $12\n\t"
+	"nop; nop; nop; nop;\n\t"
+	"mtc0\t%2, $12\n\t"
+	"nop; nop; nop; nop;\n\t"
+	".set\tpop"
+	: "=r" (tmp1), "=r" (tmp2), "=r" (addr));
+}
+
+static void indy_sc_disable(void)
+{
+	unsigned long tmp1, tmp2, tmp3;
+
+#ifdef DEBUG_CACHE
+	printk("Disabling R4600 SCACHE\n");
+#endif
+	__asm__ __volatile__(
+	".set\tpush\n\t"
+	".set\tnoreorder\n\t"
+	".set\tmips3\n\t"
+	"li\t%0, 0x1\n\t"
+	"dsll\t%0, 31\n\t"
+	"lui\t%1, 0x9000\n\t"
+	"dsll32\t%1, 0\n\t"
+	"or\t%0, %1, %0\n\t"
+	"mfc0\t%2, $12\n\t"
+	"nop; nop; nop; nop\n\t"
+	"li\t%1, 0x80\n\t"
+	"mtc0\t%1, $12\n\t"
+	"nop; nop; nop; nop\n\t"
+	"sh\t$0, 0(%0)\n\t"
+	"mtc0\t$0, $12\n\t"
+	"nop; nop; nop; nop\n\t"
+	"mtc0\t%2, $12\n\t"
+	"nop; nop; nop; nop\n\t"
+	".set\tpop"
+	: "=r" (tmp1), "=r" (tmp2), "=r" (tmp3));
+}
+
+static inline int __init indy_sc_probe(void)
+{
+	unsigned int size = ip22_eeprom_read(&sgimc->eeprom, 17);
+	if (size == 0)
+		return 0;
+
+	size <<= PAGE_SHIFT;
+	printk(KERN_INFO "R4600/R5000 SCACHE size %dK, linesize 32 bytes.\n",
+	       size >> 10);
+	scache_size = size;
+
+	return 1;
+}
+
+/* XXX Check with wje if the Indy caches can differenciate between
+   writeback + invalidate and just invalidate.  */
+struct bcache_ops indy_sc_ops = {
+	.bc_enable = indy_sc_enable,
+	.bc_disable = indy_sc_disable,
+	.bc_wback_inv = indy_sc_wback_invalidate,
+	.bc_inv = indy_sc_wback_invalidate
+};
+
+void __init indy_sc_init(void)
+{
+	if (indy_sc_probe()) {
+		indy_sc_enable();
+		bcops = &indy_sc_ops;
+	}
+}
diff --git a/arch/mips/mm/sc-r5k.c b/arch/mips/mm/sc-r5k.c
new file mode 100644
index 00000000000..d35b6c1103a
--- /dev/null
+++ b/arch/mips/mm/sc-r5k.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org),
+ * derived from r4xx0.c by David S. Miller (dm@engr.sgi.com).
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/mipsregs.h>
+#include <asm/bcache.h>
+#include <asm/cacheops.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/mmu_context.h>
+#include <asm/r4kcache.h>
+
+/* Secondary cache size in bytes, if present.  */
+static unsigned long scache_size;
+
+#define SC_LINE 32
+#define SC_PAGE (128*SC_LINE)
+
+static inline void blast_r5000_scache(void)
+{
+	unsigned long start = INDEX_BASE;
+	unsigned long end = start + scache_size;
+
+	while(start < end) {
+		cache_op(R5K_Page_Invalidate_S, start);
+		start += SC_PAGE;
+	}
+}
+
+static void r5k_dma_cache_inv_sc(unsigned long addr, unsigned long size)
+{
+	unsigned long end, a;
+
+	/* Catch bad driver code */
+	BUG_ON(size == 0);
+
+	if (size >= scache_size) {
+		blast_r5000_scache();
+		return;
+	}
+
+	/* On the R5000 secondary cache we cannot
+	 * invalidate less than a page at a time.
+	 * The secondary cache is physically indexed, write-through.
+	 */
+	a = addr & ~(SC_PAGE - 1);
+	end = (addr + size - 1) & ~(SC_PAGE - 1);
+	while (a <= end) {
+		cache_op(R5K_Page_Invalidate_S, a);
+		a += SC_PAGE;
+	}
+}
+
+static void r5k_sc_enable(void)
+{
+        unsigned long flags;
+
+	local_irq_save(flags);
+	set_c0_config(R5K_CONF_SE);
+	blast_r5000_scache();
+	local_irq_restore(flags);
+}
+
+static void r5k_sc_disable(void)
+{
+        unsigned long flags;
+
+	local_irq_save(flags);
+	blast_r5000_scache();
+	clear_c0_config(R5K_CONF_SE);
+	local_irq_restore(flags);
+}
+
+static inline int __init r5k_sc_probe(void)
+{
+	unsigned long config = read_c0_config();
+
+	if (config & CONF_SC)
+		return(0);
+
+	scache_size = (512 * 1024) << ((config & R5K_CONF_SS) >> 20);
+
+	printk("R5000 SCACHE size %ldkB, linesize 32 bytes.\n",
+			scache_size >> 10);
+
+	return 1;
+}
+
+static struct bcache_ops r5k_sc_ops = {
+	.bc_enable = r5k_sc_enable,
+	.bc_disable = r5k_sc_disable,
+	.bc_wback_inv = r5k_dma_cache_inv_sc,
+	.bc_inv = r5k_dma_cache_inv_sc
+};
+
+void __init r5k_sc_init(void)
+{
+	if (r5k_sc_probe()) {
+		r5k_sc_enable();
+		bcops = &r5k_sc_ops;
+	}
+}
diff --git a/arch/mips/mm/sc-rm7k.c b/arch/mips/mm/sc-rm7k.c
new file mode 100644
index 00000000000..4e92f931aab
--- /dev/null
+++ b/arch/mips/mm/sc-rm7k.c
@@ -0,0 +1,193 @@
+/*
+ * sc-rm7k.c: RM7000 cache management functions.
+ *
+ * Copyright (C) 1997, 2001, 2003, 2004 Ralf Baechle (ralf@linux-mips.org)
+ */
+
+#undef DEBUG
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#include <asm/addrspace.h>
+#include <asm/bcache.h>
+#include <asm/cacheops.h>
+#include <asm/mipsregs.h>
+#include <asm/processor.h>
+
+/* Primary cache parameters. */
+#define sc_lsize	32
+#define tc_pagesize	(32*128)
+
+/* Secondary cache parameters. */
+#define scache_size	(256*1024)	/* Fixed to 256KiB on RM7000 */
+
+extern unsigned long icache_way_size, dcache_way_size;
+
+#include <asm/r4kcache.h>
+
+int rm7k_tcache_enabled;
+
+/*
+ * Writeback and invalidate the primary cache dcache before DMA.
+ * (XXX These need to be fixed ...)
+ */
+static void rm7k_sc_wback_inv(unsigned long addr, unsigned long size)
+{
+	unsigned long end, a;
+
+	pr_debug("rm7k_sc_wback_inv[%08lx,%08lx]", addr, size);
+
+	/* Catch bad driver code */
+	BUG_ON(size == 0);
+
+	a = addr & ~(sc_lsize - 1);
+	end = (addr + size - 1) & ~(sc_lsize - 1);
+	while (1) {
+		flush_scache_line(a);	/* Hit_Writeback_Inv_SD */
+		if (a == end)
+			break;
+		a += sc_lsize;
+	}
+
+	if (!rm7k_tcache_enabled)
+		return;
+
+	a = addr & ~(tc_pagesize - 1);
+	end = (addr + size - 1) & ~(tc_pagesize - 1);
+	while(1) {
+		invalidate_tcache_page(a);	/* Page_Invalidate_T */
+		if (a == end)
+			break;
+		a += tc_pagesize;
+	}
+}
+
+static void rm7k_sc_inv(unsigned long addr, unsigned long size)
+{
+	unsigned long end, a;
+
+	pr_debug("rm7k_sc_inv[%08lx,%08lx]", addr, size);
+
+	/* Catch bad driver code */
+	BUG_ON(size == 0);
+
+	a = addr & ~(sc_lsize - 1);
+	end = (addr + size - 1) & ~(sc_lsize - 1);
+	while (1) {
+		invalidate_scache_line(a);	/* Hit_Invalidate_SD */
+		if (a == end)
+			break;
+		a += sc_lsize;
+	}
+
+	if (!rm7k_tcache_enabled)
+		return;
+
+	a = addr & ~(tc_pagesize - 1);
+	end = (addr + size - 1) & ~(tc_pagesize - 1);
+	while(1) {
+		invalidate_tcache_page(a);	/* Page_Invalidate_T */
+		if (a == end)
+			break;
+		a += tc_pagesize;
+	}
+}
+
+/*
+ * This function is executed in the uncached segment CKSEG1.
+ * It must not touch the stack, because the stack pointer still points
+ * into CKSEG0.
+ *
+ * Three options:
+ *	- Write it in assembly and guarantee that we don't use the stack.
+ *	- Disable caching for CKSEG0 before calling it.
+ *	- Pray that GCC doesn't randomly start using the stack.
+ *
+ * This being Linux, we obviously take the least sane of those options -
+ * following DaveM's lead in c-r4k.c
+ *
+ * It seems we get our kicks from relying on unguaranteed behaviour in GCC
+ */
+static __init void __rm7k_sc_enable(void)
+{
+	int i;
+
+	set_c0_config(1 << 3);				/* CONF_SE */
+
+	write_c0_taglo(0);
+	write_c0_taghi(0);
+
+	for (i = 0; i < scache_size; i += sc_lsize) {
+		__asm__ __volatile__ (
+		      ".set noreorder\n\t"
+		      ".set mips3\n\t"
+		      "cache %1, (%0)\n\t"
+		      ".set mips0\n\t"
+		      ".set reorder"
+		      :
+		      : "r" (KSEG0ADDR(i)), "i" (Index_Store_Tag_SD));
+	}
+}
+
+static __init void rm7k_sc_enable(void)
+{
+	void (*func)(void) = (void *) KSEG1ADDR(&__rm7k_sc_enable);
+
+	if (read_c0_config() & 0x08)			/* CONF_SE */
+		return;
+
+	printk(KERN_INFO "Enabling secondary cache...");
+	func();
+}
+
+static void rm7k_sc_disable(void)
+{
+	clear_c0_config(1<<3);				/* CONF_SE */
+}
+
+struct bcache_ops rm7k_sc_ops = {
+	.bc_enable = rm7k_sc_enable,
+	.bc_disable = rm7k_sc_disable,
+	.bc_wback_inv = rm7k_sc_wback_inv,
+	.bc_inv = rm7k_sc_inv
+};
+
+void __init rm7k_sc_init(void)
+{
+	unsigned int config = read_c0_config();
+
+	if ((config >> 31) & 1)		/* Bit 31 set -> no S-Cache */
+		return;
+
+	printk(KERN_INFO "Secondary cache size %dK, linesize %d bytes.\n",
+	       (scache_size >> 10), sc_lsize);
+
+	if (!((config >> 3) & 1))	/* CONF_SE */
+		rm7k_sc_enable();
+
+	/*
+	 * While we're at it let's deal with the tertiary cache.
+	 */
+	if (!((config >> 17) & 1)) {
+
+		/*
+		 * We can't enable the L3 cache yet. There may be board-specific
+		 * magic necessary to turn it on, and blindly asking the CPU to
+		 * start using it would may give cache errors.
+		 *
+		 * Also, board-specific knowledge may allow us to use the
+		 * CACHE Flash_Invalidate_T instruction if the tag RAM supports
+		 * it, and may specify the size of the L3 cache so we don't have
+		 * to probe it.
+		 */
+		printk(KERN_INFO "Tertiary cache present, %s enabled\n",
+		       config&(1<<12) ? "already" : "not (yet)");
+
+		if ((config >> 12) & 1)
+			rm7k_tcache_enabled = 1;
+	}
+
+	bcops = &rm7k_sc_ops;
+}
diff --git a/arch/mips/mm/tlb-andes.c b/arch/mips/mm/tlb-andes.c
new file mode 100644
index 00000000000..167e08e9661
--- /dev/null
+++ b/arch/mips/mm/tlb-andes.c
@@ -0,0 +1,257 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1997, 1998, 1999 Ralf Baechle (ralf@gnu.org)
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) 2000 Kanoj Sarcar (kanoj@sgi.com)
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/mmu_context.h>
+
+extern void build_tlb_refill_handler(void);
+
+#define NTLB_ENTRIES       64
+#define NTLB_ENTRIES_HALF  32
+
+void local_flush_tlb_all(void)
+{
+	unsigned long flags;
+	unsigned long old_ctx;
+	unsigned long entry;
+
+	local_irq_save(flags);
+	/* Save old context and create impossible VPN2 value */
+	old_ctx = read_c0_entryhi() & ASID_MASK;
+	write_c0_entryhi(CKSEG0);
+	write_c0_entrylo0(0);
+	write_c0_entrylo1(0);
+
+	entry = read_c0_wired();
+
+	/* Blast 'em all away. */
+	while (entry < NTLB_ENTRIES) {
+		write_c0_index(entry);
+		tlb_write_indexed();
+		entry++;
+	}
+	write_c0_entryhi(old_ctx);
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	int cpu = smp_processor_id();
+	if (cpu_context(cpu, mm) != 0) {
+		drop_mmu_context(mm,cpu);
+	}
+}
+
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+                           unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int cpu = smp_processor_id();
+
+	if (cpu_context(cpu, mm) != 0) {
+		unsigned long flags;
+		int size;
+
+		local_irq_save(flags);
+		size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+		size = (size + 1) >> 1;
+		if (size <= NTLB_ENTRIES_HALF) {
+			int oldpid = (read_c0_entryhi() & ASID_MASK);
+			int newpid = (cpu_context(smp_processor_id(), mm)
+				      & ASID_MASK);
+
+			start &= (PAGE_MASK << 1);
+			end += ((PAGE_SIZE << 1) - 1);
+			end &= (PAGE_MASK << 1);
+			while(start < end) {
+				int idx;
+
+				write_c0_entryhi(start | newpid);
+				start += (PAGE_SIZE << 1);
+				tlb_probe();
+				idx = read_c0_index();
+				write_c0_entrylo0(0);
+				write_c0_entrylo1(0);
+				write_c0_entryhi(CKSEG0);
+				if(idx < 0)
+					continue;
+				tlb_write_indexed();
+			}
+			write_c0_entryhi(oldpid);
+		} else {
+			drop_mmu_context(mm, cpu);
+		}
+		local_irq_restore(flags);
+	}
+}
+
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+	int size;
+
+	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	size = (size + 1) >> 1;
+
+	local_irq_save(flags);
+	if (size <= NTLB_ENTRIES_HALF) {
+		int pid = read_c0_entryhi();
+
+		start &= (PAGE_MASK << 1);
+		end += ((PAGE_SIZE << 1) - 1);
+		end &= (PAGE_MASK << 1);
+
+		while (start < end) {
+			int idx;
+
+			write_c0_entryhi(start);
+			start += (PAGE_SIZE << 1);
+			tlb_probe();
+			idx = read_c0_index();
+			write_c0_entrylo0(0);
+			write_c0_entrylo1(0);
+			write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT+1)));
+			if (idx < 0)
+				continue;
+			tlb_write_indexed();
+		}
+		write_c0_entryhi(pid);
+	} else {
+		local_flush_tlb_all();
+	}
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	if (cpu_context(smp_processor_id(), vma->vm_mm) != 0) {
+		unsigned long flags;
+		int oldpid, newpid, idx;
+
+		newpid = (cpu_context(smp_processor_id(), vma->vm_mm) &
+			  ASID_MASK);
+		page &= (PAGE_MASK << 1);
+		local_irq_save(flags);
+		oldpid = (read_c0_entryhi() & ASID_MASK);
+		write_c0_entryhi(page | newpid);
+		tlb_probe();
+		idx = read_c0_index();
+		write_c0_entrylo0(0);
+		write_c0_entrylo1(0);
+		write_c0_entryhi(CKSEG0);
+		if (idx < 0)
+			goto finish;
+		tlb_write_indexed();
+
+	finish:
+		write_c0_entryhi(oldpid);
+		local_irq_restore(flags);
+	}
+}
+
+/*
+ * This one is only used for pages with the global bit set so we don't care
+ * much about the ASID.
+ */
+void local_flush_tlb_one(unsigned long page)
+{
+	unsigned long flags;
+	int oldpid, idx;
+
+	local_irq_save(flags);
+	page &= (PAGE_MASK << 1);
+	oldpid = read_c0_entryhi() & 0xff;
+	write_c0_entryhi(page);
+	tlb_probe();
+	idx = read_c0_index();
+	write_c0_entrylo0(0);
+	write_c0_entrylo1(0);
+	if (idx >= 0) {
+		/* Make sure all entries differ. */
+		write_c0_entryhi(CKSEG0+(idx<<(PAGE_SHIFT+1)));
+		tlb_write_indexed();
+	}
+	write_c0_entryhi(oldpid);
+
+	local_irq_restore(flags);
+}
+
+/* XXX Simplify this.  On the R10000 writing a TLB entry for an virtual
+   address that already exists will overwrite the old entry and not result
+   in TLB malfunction or TLB shutdown.  */
+void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
+{
+	unsigned long flags;
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	int idx, pid;
+
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
+	if (current->active_mm != vma->vm_mm)
+		return;
+
+	pid = read_c0_entryhi() & ASID_MASK;
+
+	if ((pid != (cpu_context(smp_processor_id(), vma->vm_mm) & ASID_MASK))
+	    || (cpu_context(smp_processor_id(), vma->vm_mm) == 0)) {
+		printk(KERN_WARNING
+		       "%s: Wheee, bogus tlbpid mmpid=%d tlbpid=%d\n",
+		       __FUNCTION__, (int) (cpu_context(smp_processor_id(),
+		       vma->vm_mm) & ASID_MASK), pid);
+	}
+
+	local_irq_save(flags);
+	address &= (PAGE_MASK << 1);
+	write_c0_entryhi(address | (pid));
+	pgdp = pgd_offset(vma->vm_mm, address);
+	tlb_probe();
+	pmdp = pmd_offset(pgdp, address);
+	idx = read_c0_index();
+	ptep = pte_offset_map(pmdp, address);
+	write_c0_entrylo0(pte_val(*ptep++) >> 6);
+	write_c0_entrylo1(pte_val(*ptep) >> 6);
+	write_c0_entryhi(address | pid);
+	if (idx < 0) {
+		tlb_write_random();
+	} else {
+		tlb_write_indexed();
+	}
+	write_c0_entryhi(pid);
+	local_irq_restore(flags);
+}
+
+void __init tlb_init(void)
+{
+	/*
+	 * You should never change this register:
+	 *   - On R4600 1.7 the tlbp never hits for pages smaller than
+	 *     the value in the c0_pagemask register.
+	 *   - The entire mm handling assumes the c0_pagemask register to
+	 *     be set for 4kb pages.
+	 */
+	write_c0_pagemask(PM_4K);
+	write_c0_wired(0);
+	write_c0_framemask(0);
+
+        /* From this point on the ARC firmware is dead.  */
+	local_flush_tlb_all();
+
+	/* Did I tell you that ARC SUCKS?  */
+
+	build_tlb_refill_handler();
+}
diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c
new file mode 100644
index 00000000000..7948e9a5e37
--- /dev/null
+++ b/arch/mips/mm/tlb-r3k.c
@@ -0,0 +1,289 @@
+/*
+ * r2300.c: R2000 and R3000 specific mmu/cache code.
+ *
+ * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
+ *
+ * with a lot of changes to make this thing work for R3000s
+ * Tx39XX R4k style caches added. HK
+ * Copyright (C) 1998, 1999, 2000 Harald Koerfgen
+ * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov
+ * Copyright (C) 2002  Ralf Baechle
+ * Copyright (C) 2002  Maciej W. Rozycki
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mmu_context.h>
+#include <asm/system.h>
+#include <asm/isadep.h>
+#include <asm/io.h>
+#include <asm/bootinfo.h>
+#include <asm/cpu.h>
+
+#undef DEBUG_TLB
+
+extern void build_tlb_refill_handler(void);
+
+/* CP0 hazard avoidance. */
+#define BARRIER				\
+	__asm__ __volatile__(		\
+		".set	push\n\t"	\
+		".set	noreorder\n\t"	\
+		"nop\n\t"		\
+		".set	pop\n\t")
+
+int r3k_have_wired_reg;		/* should be in cpu_data? */
+
+/* TLB operations. */
+void local_flush_tlb_all(void)
+{
+	unsigned long flags;
+	unsigned long old_ctx;
+	int entry;
+
+#ifdef DEBUG_TLB
+	printk("[tlball]");
+#endif
+
+	local_irq_save(flags);
+	old_ctx = read_c0_entryhi() & ASID_MASK;
+	write_c0_entrylo0(0);
+	entry = r3k_have_wired_reg ? read_c0_wired() : 8;
+	for (; entry < current_cpu_data.tlbsize; entry++) {
+		write_c0_index(entry << 8);
+		write_c0_entryhi((entry | 0x80000) << 12);
+		BARRIER;
+		tlb_write_indexed();
+	}
+	write_c0_entryhi(old_ctx);
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	int cpu = smp_processor_id();
+
+	if (cpu_context(cpu, mm) != 0) {
+#ifdef DEBUG_TLB
+		printk("[tlbmm<%lu>]", (unsigned long)cpu_context(cpu, mm));
+#endif
+		drop_mmu_context(mm, cpu);
+	}
+}
+
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			   unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int cpu = smp_processor_id();
+
+	if (cpu_context(cpu, mm) != 0) {
+		unsigned long flags;
+		int size;
+
+#ifdef DEBUG_TLB
+		printk("[tlbrange<%lu,0x%08lx,0x%08lx>]",
+			cpu_context(cpu, mm) & ASID_MASK, start, end);
+#endif
+		local_irq_save(flags);
+		size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+		if (size <= current_cpu_data.tlbsize) {
+			int oldpid = read_c0_entryhi() & ASID_MASK;
+			int newpid = cpu_context(cpu, mm) & ASID_MASK;
+
+			start &= PAGE_MASK;
+			end += PAGE_SIZE - 1;
+			end &= PAGE_MASK;
+			while (start < end) {
+				int idx;
+
+				write_c0_entryhi(start | newpid);
+				start += PAGE_SIZE;	/* BARRIER */
+				tlb_probe();
+				idx = read_c0_index();
+				write_c0_entrylo0(0);
+				write_c0_entryhi(KSEG0);
+				if (idx < 0)		/* BARRIER */
+					continue;
+				tlb_write_indexed();
+			}
+			write_c0_entryhi(oldpid);
+		} else {
+			drop_mmu_context(mm, cpu);
+		}
+		local_irq_restore(flags);
+	}
+}
+
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+	int size;
+
+#ifdef DEBUG_TLB
+	printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", start, end);
+#endif
+	local_irq_save(flags);
+	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	if (size <= current_cpu_data.tlbsize) {
+		int pid = read_c0_entryhi();
+
+		start &= PAGE_MASK;
+		end += PAGE_SIZE - 1;
+		end &= PAGE_MASK;
+
+		while (start < end) {
+			int idx;
+
+			write_c0_entryhi(start);
+			start += PAGE_SIZE;		/* BARRIER */
+			tlb_probe();
+			idx = read_c0_index();
+			write_c0_entrylo0(0);
+			write_c0_entryhi(KSEG0);
+			if (idx < 0)			/* BARRIER */
+				continue;
+			tlb_write_indexed();
+		}
+		write_c0_entryhi(pid);
+	} else {
+		local_flush_tlb_all();
+	}
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	int cpu = smp_processor_id();
+
+	if (!vma || cpu_context(cpu, vma->vm_mm) != 0) {
+		unsigned long flags;
+		int oldpid, newpid, idx;
+
+#ifdef DEBUG_TLB
+		printk("[tlbpage<%lu,0x%08lx>]", cpu_context(cpu, vma->vm_mm), page);
+#endif
+		newpid = cpu_context(cpu, vma->vm_mm) & ASID_MASK;
+		page &= PAGE_MASK;
+		local_irq_save(flags);
+		oldpid = read_c0_entryhi() & ASID_MASK;
+		write_c0_entryhi(page | newpid);
+		BARRIER;
+		tlb_probe();
+		idx = read_c0_index();
+		write_c0_entrylo0(0);
+		write_c0_entryhi(KSEG0);
+		if (idx < 0)				/* BARRIER */
+			goto finish;
+		tlb_write_indexed();
+
+finish:
+		write_c0_entryhi(oldpid);
+		local_irq_restore(flags);
+	}
+}
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+	unsigned long flags;
+	int idx, pid;
+
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
+	if (current->active_mm != vma->vm_mm)
+		return;
+
+	pid = read_c0_entryhi() & ASID_MASK;
+
+#ifdef DEBUG_TLB
+	if ((pid != (cpu_context(cpu, vma->vm_mm) & ASID_MASK)) || (cpu_context(cpu, vma->vm_mm) == 0)) {
+		printk("update_mmu_cache: Wheee, bogus tlbpid mmpid=%lu tlbpid=%d\n",
+		       (cpu_context(cpu, vma->vm_mm)), pid);
+	}
+#endif
+
+	local_irq_save(flags);
+	address &= PAGE_MASK;
+	write_c0_entryhi(address | pid);
+	BARRIER;
+	tlb_probe();
+	idx = read_c0_index();
+	write_c0_entrylo0(pte_val(pte));
+	write_c0_entryhi(address | pid);
+	if (idx < 0) {					/* BARRIER */
+		tlb_write_random();
+	} else {
+		tlb_write_indexed();
+	}
+	write_c0_entryhi(pid);
+	local_irq_restore(flags);
+}
+
+void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
+			    unsigned long entryhi, unsigned long pagemask)
+{
+	unsigned long flags;
+	unsigned long old_ctx;
+	static unsigned long wired = 0;
+
+	if (r3k_have_wired_reg) {			/* TX39XX */
+		unsigned long old_pagemask;
+		unsigned long w;
+
+#ifdef DEBUG_TLB
+		printk("[tlbwired<entry lo0 %8x, hi %8x\n, pagemask %8x>]\n",
+		       entrylo0, entryhi, pagemask);
+#endif
+
+		local_irq_save(flags);
+		/* Save old context and create impossible VPN2 value */
+		old_ctx = read_c0_entryhi() & ASID_MASK;
+		old_pagemask = read_c0_pagemask();
+		w = read_c0_wired();
+		write_c0_wired(w + 1);
+		if (read_c0_wired() != w + 1) {
+			printk("[tlbwired] No WIRED reg?\n");
+			return;
+		}
+		write_c0_index(w << 8);
+		write_c0_pagemask(pagemask);
+		write_c0_entryhi(entryhi);
+		write_c0_entrylo0(entrylo0);
+		BARRIER;
+		tlb_write_indexed();
+
+		write_c0_entryhi(old_ctx);
+		write_c0_pagemask(old_pagemask);
+		local_flush_tlb_all();
+		local_irq_restore(flags);
+
+	} else if (wired < 8) {
+#ifdef DEBUG_TLB
+		printk("[tlbwired<entry lo0 %8x, hi %8x\n>]\n",
+		       entrylo0, entryhi);
+#endif
+
+		local_irq_save(flags);
+		old_ctx = read_c0_entryhi() & ASID_MASK;
+		write_c0_entrylo0(entrylo0);
+		write_c0_entryhi(entryhi);
+		write_c0_index(wired);
+		wired++;				/* BARRIER */
+		tlb_write_indexed();
+		write_c0_entryhi(old_ctx);
+		local_flush_tlb_all();
+		local_irq_restore(flags);
+	}
+}
+
+void __init tlb_init(void)
+{
+	local_flush_tlb_all();
+
+	build_tlb_refill_handler();
+}
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
new file mode 100644
index 00000000000..59d38bc05b6
--- /dev/null
+++ b/arch/mips/mm/tlb-r4k.c
@@ -0,0 +1,419 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
+ * Copyright (C) 1997, 1998, 1999, 2000 Ralf Baechle ralf@gnu.org
+ * Carsten Langgaard, carstenl@mips.com
+ * Copyright (C) 2002 MIPS Technologies, Inc.  All rights reserved.
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/cpu.h>
+#include <asm/bootinfo.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+
+extern void build_tlb_refill_handler(void);
+
+/* CP0 hazard avoidance. */
+#define BARRIER __asm__ __volatile__(".set noreorder\n\t" \
+				     "nop; nop; nop; nop; nop; nop;\n\t" \
+				     ".set reorder\n\t")
+
+void local_flush_tlb_all(void)
+{
+	unsigned long flags;
+	unsigned long old_ctx;
+	int entry;
+
+	local_irq_save(flags);
+	/* Save old context and create impossible VPN2 value */
+	old_ctx = read_c0_entryhi();
+	write_c0_entrylo0(0);
+	write_c0_entrylo1(0);
+
+	entry = read_c0_wired();
+
+	/* Blast 'em all away. */
+	while (entry < current_cpu_data.tlbsize) {
+		/*
+		 * Make sure all entries differ.  If they're not different
+		 * MIPS32 will take revenge ...
+		 */
+		write_c0_entryhi(CKSEG0 + (entry << (PAGE_SHIFT + 1)));
+		write_c0_index(entry);
+		mtc0_tlbw_hazard();
+		tlb_write_indexed();
+		entry++;
+	}
+	tlbw_use_hazard();
+	write_c0_entryhi(old_ctx);
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	int cpu = smp_processor_id();
+
+	if (cpu_context(cpu, mm) != 0)
+		drop_mmu_context(mm,cpu);
+}
+
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+	unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int cpu = smp_processor_id();
+
+	if (cpu_context(cpu, mm) != 0) {
+		unsigned long flags;
+		int size;
+
+		local_irq_save(flags);
+		size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+		size = (size + 1) >> 1;
+		if (size <= current_cpu_data.tlbsize/2) {
+			int oldpid = read_c0_entryhi();
+			int newpid = cpu_asid(cpu, mm);
+
+			start &= (PAGE_MASK << 1);
+			end += ((PAGE_SIZE << 1) - 1);
+			end &= (PAGE_MASK << 1);
+			while (start < end) {
+				int idx;
+
+				write_c0_entryhi(start | newpid);
+				start += (PAGE_SIZE << 1);
+				mtc0_tlbw_hazard();
+				tlb_probe();
+				BARRIER;
+				idx = read_c0_index();
+				write_c0_entrylo0(0);
+				write_c0_entrylo1(0);
+				if (idx < 0)
+					continue;
+				/* Make sure all entries differ. */
+				write_c0_entryhi(CKSEG0 +
+				                 (idx << (PAGE_SHIFT + 1)));
+				mtc0_tlbw_hazard();
+				tlb_write_indexed();
+			}
+			tlbw_use_hazard();
+			write_c0_entryhi(oldpid);
+		} else {
+			drop_mmu_context(mm, cpu);
+		}
+		local_irq_restore(flags);
+	}
+}
+
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+	int size;
+
+	local_irq_save(flags);
+	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	size = (size + 1) >> 1;
+	if (size <= current_cpu_data.tlbsize / 2) {
+		int pid = read_c0_entryhi();
+
+		start &= (PAGE_MASK << 1);
+		end += ((PAGE_SIZE << 1) - 1);
+		end &= (PAGE_MASK << 1);
+
+		while (start < end) {
+			int idx;
+
+			write_c0_entryhi(start);
+			start += (PAGE_SIZE << 1);
+			mtc0_tlbw_hazard();
+			tlb_probe();
+			BARRIER;
+			idx = read_c0_index();
+			write_c0_entrylo0(0);
+			write_c0_entrylo1(0);
+			if (idx < 0)
+				continue;
+			/* Make sure all entries differ. */
+			write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1)));
+			mtc0_tlbw_hazard();
+			tlb_write_indexed();
+		}
+		tlbw_use_hazard();
+		write_c0_entryhi(pid);
+	} else {
+		local_flush_tlb_all();
+	}
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	int cpu = smp_processor_id();
+
+	if (cpu_context(cpu, vma->vm_mm) != 0) {
+		unsigned long flags;
+		int oldpid, newpid, idx;
+
+		newpid = cpu_asid(cpu, vma->vm_mm);
+		page &= (PAGE_MASK << 1);
+		local_irq_save(flags);
+		oldpid = read_c0_entryhi();
+		write_c0_entryhi(page | newpid);
+		mtc0_tlbw_hazard();
+		tlb_probe();
+		BARRIER;
+		idx = read_c0_index();
+		write_c0_entrylo0(0);
+		write_c0_entrylo1(0);
+		if (idx < 0)
+			goto finish;
+		/* Make sure all entries differ. */
+		write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1)));
+		mtc0_tlbw_hazard();
+		tlb_write_indexed();
+		tlbw_use_hazard();
+
+	finish:
+		write_c0_entryhi(oldpid);
+		local_irq_restore(flags);
+	}
+}
+
+/*
+ * This one is only used for pages with the global bit set so we don't care
+ * much about the ASID.
+ */
+void local_flush_tlb_one(unsigned long page)
+{
+	unsigned long flags;
+	int oldpid, idx;
+
+	local_irq_save(flags);
+	page &= (PAGE_MASK << 1);
+	oldpid = read_c0_entryhi();
+	write_c0_entryhi(page);
+	mtc0_tlbw_hazard();
+	tlb_probe();
+	BARRIER;
+	idx = read_c0_index();
+	write_c0_entrylo0(0);
+	write_c0_entrylo1(0);
+	if (idx >= 0) {
+		/* Make sure all entries differ. */
+		write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1)));
+		mtc0_tlbw_hazard();
+		tlb_write_indexed();
+		tlbw_use_hazard();
+	}
+	write_c0_entryhi(oldpid);
+
+	local_irq_restore(flags);
+}
+
+/*
+ * We will need multiple versions of update_mmu_cache(), one that just
+ * updates the TLB with the new pte(s), and another which also checks
+ * for the R4k "end of page" hardware bug and does the needy.
+ */
+void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
+{
+	unsigned long flags;
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	int idx, pid;
+
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
+	if (current->active_mm != vma->vm_mm)
+		return;
+
+	pid = read_c0_entryhi() & ASID_MASK;
+
+	local_irq_save(flags);
+	address &= (PAGE_MASK << 1);
+	write_c0_entryhi(address | pid);
+	pgdp = pgd_offset(vma->vm_mm, address);
+	mtc0_tlbw_hazard();
+	tlb_probe();
+	BARRIER;
+	pmdp = pmd_offset(pgdp, address);
+	idx = read_c0_index();
+	ptep = pte_offset_map(pmdp, address);
+
+ #if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32)
+ 	write_c0_entrylo0(ptep->pte_high);
+ 	ptep++;
+ 	write_c0_entrylo1(ptep->pte_high);
+#else
+  	write_c0_entrylo0(pte_val(*ptep++) >> 6);
+  	write_c0_entrylo1(pte_val(*ptep) >> 6);
+#endif
+	write_c0_entryhi(address | pid);
+	mtc0_tlbw_hazard();
+	if (idx < 0)
+		tlb_write_random();
+	else
+		tlb_write_indexed();
+	tlbw_use_hazard();
+	write_c0_entryhi(pid);
+	local_irq_restore(flags);
+}
+
+#if 0
+static void r4k_update_mmu_cache_hwbug(struct vm_area_struct * vma,
+				       unsigned long address, pte_t pte)
+{
+	unsigned long flags;
+	unsigned int asid;
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	int idx;
+
+	local_irq_save(flags);
+	address &= (PAGE_MASK << 1);
+	asid = read_c0_entryhi() & ASID_MASK;
+	write_c0_entryhi(address | asid);
+	pgdp = pgd_offset(vma->vm_mm, address);
+	mtc0_tlbw_hazard();
+	tlb_probe();
+	BARRIER;
+	pmdp = pmd_offset(pgdp, address);
+	idx = read_c0_index();
+	ptep = pte_offset_map(pmdp, address);
+	write_c0_entrylo0(pte_val(*ptep++) >> 6);
+	write_c0_entrylo1(pte_val(*ptep) >> 6);
+	mtc0_tlbw_hazard();
+	if (idx < 0)
+		tlb_write_random();
+	else
+		tlb_write_indexed();
+	tlbw_use_hazard();
+	local_irq_restore(flags);
+}
+#endif
+
+void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
+	unsigned long entryhi, unsigned long pagemask)
+{
+	unsigned long flags;
+	unsigned long wired;
+	unsigned long old_pagemask;
+	unsigned long old_ctx;
+
+	local_irq_save(flags);
+	/* Save old context and create impossible VPN2 value */
+	old_ctx = read_c0_entryhi();
+	old_pagemask = read_c0_pagemask();
+	wired = read_c0_wired();
+	write_c0_wired(wired + 1);
+	write_c0_index(wired);
+	BARRIER;
+	write_c0_pagemask(pagemask);
+	write_c0_entryhi(entryhi);
+	write_c0_entrylo0(entrylo0);
+	write_c0_entrylo1(entrylo1);
+	mtc0_tlbw_hazard();
+	tlb_write_indexed();
+	tlbw_use_hazard();
+
+	write_c0_entryhi(old_ctx);
+	BARRIER;
+	write_c0_pagemask(old_pagemask);
+	local_flush_tlb_all();
+	local_irq_restore(flags);
+}
+
+/*
+ * Used for loading TLB entries before trap_init() has started, when we
+ * don't actually want to add a wired entry which remains throughout the
+ * lifetime of the system
+ */
+
+static int temp_tlb_entry __initdata;
+
+__init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1,
+			       unsigned long entryhi, unsigned long pagemask)
+{
+	int ret = 0;
+	unsigned long flags;
+	unsigned long wired;
+	unsigned long old_pagemask;
+	unsigned long old_ctx;
+
+	local_irq_save(flags);
+	/* Save old context and create impossible VPN2 value */
+	old_ctx = read_c0_entryhi();
+	old_pagemask = read_c0_pagemask();
+	wired = read_c0_wired();
+	if (--temp_tlb_entry < wired) {
+		printk(KERN_WARNING "No TLB space left for add_temporary_entry\n");
+		ret = -ENOSPC;
+		goto out;
+	}
+
+	write_c0_index(temp_tlb_entry);
+	write_c0_pagemask(pagemask);
+	write_c0_entryhi(entryhi);
+	write_c0_entrylo0(entrylo0);
+	write_c0_entrylo1(entrylo1);
+	mtc0_tlbw_hazard();
+	tlb_write_indexed();
+	tlbw_use_hazard();
+
+	write_c0_entryhi(old_ctx);
+	write_c0_pagemask(old_pagemask);
+out:
+	local_irq_restore(flags);
+	return ret;
+}
+
+static void __init probe_tlb(unsigned long config)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+	unsigned int reg;
+
+	/*
+	 * If this isn't a MIPS32 / MIPS64 compliant CPU.  Config 1 register
+	 * is not supported, we assume R4k style.  Cpu probing already figured
+	 * out the number of tlb entries.
+	 */
+	if ((c->processor_id  & 0xff0000) == PRID_COMP_LEGACY)
+		return;
+
+	reg = read_c0_config1();
+	if (!((config >> 7) & 3))
+		panic("No TLB present");
+
+	c->tlbsize = ((reg >> 25) & 0x3f) + 1;
+}
+
+void __init tlb_init(void)
+{
+	unsigned int config = read_c0_config();
+
+	/*
+	 * You should never change this register:
+	 *   - On R4600 1.7 the tlbp never hits for pages smaller than
+	 *     the value in the c0_pagemask register.
+	 *   - The entire mm handling assumes the c0_pagemask register to
+	 *     be set for 4kb pages.
+	 */
+	probe_tlb(config);
+	write_c0_pagemask(PM_DEFAULT_MASK);
+	write_c0_wired(0);
+	temp_tlb_entry = current_cpu_data.tlbsize - 1;
+	local_flush_tlb_all();
+
+	build_tlb_refill_handler();
+}
diff --git a/arch/mips/mm/tlb-r8k.c b/arch/mips/mm/tlb-r8k.c
new file mode 100644
index 00000000000..1bfb09198ce
--- /dev/null
+++ b/arch/mips/mm/tlb-r8k.c
@@ -0,0 +1,250 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
+ * Copyright (C) 1997, 1998, 1999, 2000 Ralf Baechle ralf@gnu.org
+ * Carsten Langgaard, carstenl@mips.com
+ * Copyright (C) 2002 MIPS Technologies, Inc.  All rights reserved.
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/cpu.h>
+#include <asm/bootinfo.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+
+extern void build_tlb_refill_handler(void);
+
+#define TFP_TLB_SIZE		384
+#define TFP_TLB_SET_SHIFT	7
+
+/* CP0 hazard avoidance. */
+#define BARRIER __asm__ __volatile__(".set noreorder\n\t" \
+				     "nop; nop; nop; nop; nop; nop;\n\t" \
+				     ".set reorder\n\t")
+
+void local_flush_tlb_all(void)
+{
+	unsigned long flags;
+	unsigned long old_ctx;
+	int entry;
+
+	local_irq_save(flags);
+	/* Save old context and create impossible VPN2 value */
+	old_ctx = read_c0_entryhi();
+	write_c0_entrylo(0);
+
+	for (entry = 0; entry < TFP_TLB_SIZE; entry++) {
+		write_c0_tlbset(entry >> TFP_TLB_SET_SHIFT);
+		write_c0_vaddr(entry << PAGE_SHIFT);
+		write_c0_entryhi(CKSEG0 + (entry << (PAGE_SHIFT + 1)));
+		mtc0_tlbw_hazard();
+		tlb_write();
+	}
+	tlbw_use_hazard();
+	write_c0_entryhi(old_ctx);
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	int cpu = smp_processor_id();
+
+	if (cpu_context(cpu, mm) != 0)
+		drop_mmu_context(mm,cpu);
+}
+
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+	unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int cpu = smp_processor_id();
+	unsigned long flags;
+	int oldpid, newpid, size;
+
+	if (!cpu_context(cpu, mm))
+		return;
+
+	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	size = (size + 1) >> 1;
+
+	local_irq_save(flags);
+
+	if (size > TFP_TLB_SIZE / 2) {
+		drop_mmu_context(mm, cpu);
+		goto out_restore;
+	}
+
+	oldpid = read_c0_entryhi();
+	newpid = cpu_asid(cpu, mm);
+
+	write_c0_entrylo(0);
+
+	start &= PAGE_MASK;
+	end += (PAGE_SIZE - 1);
+	end &= PAGE_MASK;
+	while (start < end) {
+		signed long idx;
+
+		write_c0_vaddr(start);
+		write_c0_entryhi(start);
+		start += PAGE_SIZE;
+		tlb_probe();
+		idx = read_c0_tlbset();
+		if (idx < 0)
+			continue;
+
+		write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1)));
+		tlb_write();
+	}
+	write_c0_entryhi(oldpid);
+
+out_restore:
+	local_irq_restore(flags);
+}
+
+/* Usable for KV1 addresses only! */
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+	int size;
+
+	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	size = (size + 1) >> 1;
+
+	if (size > TFP_TLB_SIZE / 2) {
+		local_flush_tlb_all();
+		return;
+	}
+
+	local_irq_save(flags);
+
+	write_c0_entrylo(0);
+
+	start &= PAGE_MASK;
+	end += (PAGE_SIZE - 1);
+	end &= PAGE_MASK;
+	while (start < end) {
+		signed long idx;
+
+		write_c0_vaddr(start);
+		write_c0_entryhi(start);
+		start += PAGE_SIZE;
+		tlb_probe();
+		idx = read_c0_tlbset();
+		if (idx < 0)
+			continue;
+
+		write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1)));
+		tlb_write();
+	}
+
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	int cpu = smp_processor_id();
+	unsigned long flags;
+	int oldpid, newpid;
+	signed long idx;
+
+	if (!cpu_context(cpu, vma->vm_mm))
+		return;
+
+	newpid = cpu_asid(cpu, vma->vm_mm);
+	page &= PAGE_MASK;
+	local_irq_save(flags);
+	oldpid = read_c0_entryhi();
+	write_c0_vaddr(page);
+	write_c0_entryhi(newpid);
+	tlb_probe();
+	idx = read_c0_tlbset();
+	if (idx < 0)
+		goto finish;
+
+	write_c0_entrylo(0);
+	write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1)));
+	tlb_write();
+
+finish:
+	write_c0_entryhi(oldpid);
+	local_irq_restore(flags);
+}
+
+/*
+ * We will need multiple versions of update_mmu_cache(), one that just
+ * updates the TLB with the new pte(s), and another which also checks
+ * for the R4k "end of page" hardware bug and does the needy.
+ */
+void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
+{
+	unsigned long flags;
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	int pid;
+
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
+	if (current->active_mm != vma->vm_mm)
+		return;
+
+	pid = read_c0_entryhi() & ASID_MASK;
+
+	local_irq_save(flags);
+	address &= PAGE_MASK;
+	write_c0_vaddr(address);
+	write_c0_entryhi(pid);
+	pgdp = pgd_offset(vma->vm_mm, address);
+	pmdp = pmd_offset(pgdp, address);
+	ptep = pte_offset_map(pmdp, address);
+	tlb_probe();
+
+	write_c0_entrylo(pte_val(*ptep++) >> 6);
+	tlb_write();
+
+	write_c0_entryhi(pid);
+	local_irq_restore(flags);
+}
+
+static void __init probe_tlb(unsigned long config)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+
+	c->tlbsize = 3 * 128;		/* 3 sets each 128 entries */
+}
+
+void __init tlb_init(void)
+{
+	unsigned int config = read_c0_config();
+	unsigned long status;
+
+	probe_tlb(config);
+
+	status = read_c0_status();
+	status &= ~(ST0_UPS | ST0_KPS);
+#ifdef CONFIG_PAGE_SIZE_4KB
+	status |= (TFP_PAGESIZE_4K << 32) | (TFP_PAGESIZE_4K << 36);
+#elif defined(CONFIG_PAGE_SIZE_8KB)
+	status |= (TFP_PAGESIZE_8K << 32) | (TFP_PAGESIZE_8K << 36);
+#elif defined(CONFIG_PAGE_SIZE_16KB)
+	status |= (TFP_PAGESIZE_16K << 32) | (TFP_PAGESIZE_16K << 36);
+#elif defined(CONFIG_PAGE_SIZE_64KB)
+	status |= (TFP_PAGESIZE_64K << 32) | (TFP_PAGESIZE_64K << 36);
+#endif
+	write_c0_status(status);
+
+	write_c0_wired(0);
+
+	local_flush_tlb_all();
+
+	build_tlb_refill_handler();
+}
diff --git a/arch/mips/mm/tlb-sb1.c b/arch/mips/mm/tlb-sb1.c
new file mode 100644
index 00000000000..6256cafcf3a
--- /dev/null
+++ b/arch/mips/mm/tlb-sb1.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
+ * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org)
+ * Copyright (C) 2000, 2001, 2002, 2003 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+#include <linux/init.h>
+#include <asm/mmu_context.h>
+#include <asm/bootinfo.h>
+#include <asm/cpu.h>
+
+extern void build_tlb_refill_handler(void);
+
+#define UNIQUE_ENTRYHI(idx) (CKSEG0 + ((idx) << (PAGE_SHIFT + 1)))
+
+/* Dump the current entry* and pagemask registers */
+static inline void dump_cur_tlb_regs(void)
+{
+	unsigned int entryhihi, entryhilo, entrylo0hi, entrylo0lo, entrylo1hi;
+	unsigned int entrylo1lo, pagemask;
+
+	__asm__ __volatile__ (
+		".set push             \n"
+		".set noreorder        \n"
+		".set mips64           \n"
+		".set noat             \n"
+		"     tlbr             \n"
+		"     dmfc0  $1, $10   \n"
+		"     dsrl32 %0, $1, 0 \n"
+		"     sll    %1, $1, 0 \n"
+		"     dmfc0  $1, $2    \n"
+		"     dsrl32 %2, $1, 0 \n"
+		"     sll    %3, $1, 0 \n"
+		"     dmfc0  $1, $3    \n"
+		"     dsrl32 %4, $1, 0 \n"
+		"     sll    %5, $1, 0 \n"
+		"     mfc0   %6, $5    \n"
+		".set pop              \n"
+		: "=r" (entryhihi), "=r" (entryhilo),
+		  "=r" (entrylo0hi), "=r" (entrylo0lo),
+		  "=r" (entrylo1hi), "=r" (entrylo1lo),
+		  "=r" (pagemask));
+
+	printk("%08X%08X %08X%08X %08X%08X %08X",
+	       entryhihi, entryhilo,
+	       entrylo0hi, entrylo0lo,
+	       entrylo1hi, entrylo1lo,
+	       pagemask);
+}
+
+void sb1_dump_tlb(void)
+{
+	unsigned long old_ctx;
+	unsigned long flags;
+	int entry;
+	local_irq_save(flags);
+	old_ctx = read_c0_entryhi();
+	printk("Current TLB registers state:\n"
+	       "      EntryHi       EntryLo0          EntryLo1     PageMask  Index\n"
+	       "--------------------------------------------------------------------\n");
+	dump_cur_tlb_regs();
+	printk(" %08X\n", read_c0_index());
+	printk("\n\nFull TLB Dump:\n"
+	       "Idx      EntryHi       EntryLo0          EntryLo1     PageMask\n"
+	       "--------------------------------------------------------------\n");
+	for (entry = 0; entry < current_cpu_data.tlbsize; entry++) {
+		write_c0_index(entry);
+		printk("\n%02i ", entry);
+		dump_cur_tlb_regs();
+	}
+	printk("\n");
+	write_c0_entryhi(old_ctx);
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_all(void)
+{
+	unsigned long flags;
+	unsigned long old_ctx;
+	int entry;
+
+	local_irq_save(flags);
+	/* Save old context and create impossible VPN2 value */
+	old_ctx = read_c0_entryhi() & ASID_MASK;
+	write_c0_entrylo0(0);
+	write_c0_entrylo1(0);
+
+	entry = read_c0_wired();
+	while (entry < current_cpu_data.tlbsize) {
+		write_c0_entryhi(UNIQUE_ENTRYHI(entry));
+		write_c0_index(entry);
+		tlb_write_indexed();
+		entry++;
+	}
+	write_c0_entryhi(old_ctx);
+	local_irq_restore(flags);
+}
+
+
+/*
+ * Use a bogus region of memory (starting at 0) to sanitize the TLB's.
+ * Use increments of the maximum page size (16MB), and check for duplicate
+ * entries before doing a given write.  Then, when we're safe from collisions
+ * with the firmware, go back and give all the entries invalid addresses with
+ * the normal flush routine.  Wired entries will be killed as well!
+ */
+static void __init sb1_sanitize_tlb(void)
+{
+	int entry;
+	long addr = 0;
+
+	long inc = 1<<24;  /* 16MB */
+	/* Save old context and create impossible VPN2 value */
+	write_c0_entrylo0(0);
+	write_c0_entrylo1(0);
+	for (entry = 0; entry < current_cpu_data.tlbsize; entry++) {
+		do {
+			addr += inc;
+			write_c0_entryhi(addr);
+			tlb_probe();
+		} while ((int)(read_c0_index()) >= 0);
+		write_c0_index(entry);
+		tlb_write_indexed();
+	}
+	/* Now that we know we're safe from collisions, we can safely flush
+	   the TLB with the "normal" routine. */
+	local_flush_tlb_all();
+}
+
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+	unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long flags;
+	int cpu;
+
+	local_irq_save(flags);
+	cpu = smp_processor_id();
+	if (cpu_context(cpu, mm) != 0) {
+		int size;
+		size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+		size = (size + 1) >> 1;
+		if (size <= (current_cpu_data.tlbsize/2)) {
+			int oldpid = read_c0_entryhi() & ASID_MASK;
+			int newpid = cpu_asid(cpu, mm);
+
+			start &= (PAGE_MASK << 1);
+			end += ((PAGE_SIZE << 1) - 1);
+			end &= (PAGE_MASK << 1);
+			while (start < end) {
+				int idx;
+
+				write_c0_entryhi(start | newpid);
+				start += (PAGE_SIZE << 1);
+				tlb_probe();
+				idx = read_c0_index();
+				write_c0_entrylo0(0);
+				write_c0_entrylo1(0);
+				write_c0_entryhi(UNIQUE_ENTRYHI(idx));
+				if (idx < 0)
+					continue;
+				tlb_write_indexed();
+			}
+			write_c0_entryhi(oldpid);
+		} else {
+			drop_mmu_context(mm, cpu);
+		}
+	}
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+	int size;
+
+	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	size = (size + 1) >> 1;
+
+	local_irq_save(flags);
+	if (size <= (current_cpu_data.tlbsize/2)) {
+		int pid = read_c0_entryhi();
+
+		start &= (PAGE_MASK << 1);
+		end += ((PAGE_SIZE << 1) - 1);
+		end &= (PAGE_MASK << 1);
+
+		while (start < end) {
+			int idx;
+
+			write_c0_entryhi(start);
+			start += (PAGE_SIZE << 1);
+			tlb_probe();
+			idx = read_c0_index();
+			write_c0_entrylo0(0);
+			write_c0_entrylo1(0);
+			write_c0_entryhi(UNIQUE_ENTRYHI(idx));
+			if (idx < 0)
+				continue;
+			tlb_write_indexed();
+		}
+		write_c0_entryhi(pid);
+	} else {
+		local_flush_tlb_all();
+	}
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	unsigned long flags;
+	int cpu = smp_processor_id();
+
+	local_irq_save(flags);
+	if (cpu_context(cpu, vma->vm_mm) != 0) {
+		int oldpid, newpid, idx;
+		newpid = cpu_asid(cpu, vma->vm_mm);
+		page &= (PAGE_MASK << 1);
+		oldpid = read_c0_entryhi() & ASID_MASK;
+		write_c0_entryhi(page | newpid);
+		tlb_probe();
+		idx = read_c0_index();
+		write_c0_entrylo0(0);
+		write_c0_entrylo1(0);
+		if (idx < 0)
+			goto finish;
+		/* Make sure all entries differ. */
+		write_c0_entryhi(UNIQUE_ENTRYHI(idx));
+		tlb_write_indexed();
+	finish:
+		write_c0_entryhi(oldpid);
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Remove one kernel space TLB entry.  This entry is assumed to be marked
+ * global so we don't do the ASID thing.
+ */
+void local_flush_tlb_one(unsigned long page)
+{
+	unsigned long flags;
+	int oldpid, idx;
+
+	page &= (PAGE_MASK << 1);
+	oldpid = read_c0_entryhi() & ASID_MASK;
+
+	local_irq_save(flags);
+	write_c0_entryhi(page);
+	tlb_probe();
+	idx = read_c0_index();
+	if (idx >= 0) {
+		/* Make sure all entries differ. */
+		write_c0_entryhi(UNIQUE_ENTRYHI(idx));
+		write_c0_entrylo0(0);
+		write_c0_entrylo1(0);
+		tlb_write_indexed();
+	}
+
+	write_c0_entryhi(oldpid);
+	local_irq_restore(flags);
+}
+
+/* All entries common to a mm share an asid.  To effectively flush
+   these entries, we just bump the asid. */
+void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	int cpu;
+
+	preempt_disable();
+
+	cpu = smp_processor_id();
+
+	if (cpu_context(cpu, mm) != 0) {
+		drop_mmu_context(mm, cpu);
+	}
+
+	preempt_enable();
+}
+
+/* Stolen from mips32 routines */
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+	unsigned long flags;
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	int idx, pid;
+
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
+	if (current->active_mm != vma->vm_mm)
+		return;
+
+	local_irq_save(flags);
+
+	pid = read_c0_entryhi() & ASID_MASK;
+	address &= (PAGE_MASK << 1);
+	write_c0_entryhi(address | (pid));
+	pgdp = pgd_offset(vma->vm_mm, address);
+	tlb_probe();
+	pmdp = pmd_offset(pgdp, address);
+	idx = read_c0_index();
+	ptep = pte_offset_map(pmdp, address);
+	write_c0_entrylo0(pte_val(*ptep++) >> 6);
+	write_c0_entrylo1(pte_val(*ptep) >> 6);
+	if (idx < 0) {
+		tlb_write_random();
+	} else {
+		tlb_write_indexed();
+	}
+	local_irq_restore(flags);
+}
+
+void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
+	unsigned long entryhi, unsigned long pagemask)
+{
+	unsigned long flags;
+	unsigned long wired;
+	unsigned long old_pagemask;
+	unsigned long old_ctx;
+
+	local_irq_save(flags);
+	old_ctx = read_c0_entryhi() & 0xff;
+	old_pagemask = read_c0_pagemask();
+	wired = read_c0_wired();
+	write_c0_wired(wired + 1);
+	write_c0_index(wired);
+
+	write_c0_pagemask(pagemask);
+	write_c0_entryhi(entryhi);
+	write_c0_entrylo0(entrylo0);
+	write_c0_entrylo1(entrylo1);
+	tlb_write_indexed();
+
+	write_c0_entryhi(old_ctx);
+	write_c0_pagemask(old_pagemask);
+
+	local_flush_tlb_all();
+	local_irq_restore(flags);
+}
+
+/*
+ * This is called from loadmmu.c.  We have to set up all the
+ * memory management function pointers, as well as initialize
+ * the caches and tlbs
+ */
+void tlb_init(void)
+{
+	write_c0_pagemask(PM_DEFAULT_MASK);
+	write_c0_wired(0);
+
+	/*
+	 * We don't know what state the firmware left the TLB's in, so this is
+	 * the ultra-conservative way to flush the TLB's and avoid machine
+	 * check exceptions due to duplicate TLB entries
+	 */
+	sb1_sanitize_tlb();
+
+	build_tlb_refill_handler();
+}
diff --git a/arch/mips/mm/tlbex-fault.S b/arch/mips/mm/tlbex-fault.S
new file mode 100644
index 00000000000..9e7f4175b49
--- /dev/null
+++ b/arch/mips/mm/tlbex-fault.S
@@ -0,0 +1,28 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999 Ralf Baechle
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ */
+#include <asm/mipsregs.h>
+#include <asm/page.h>
+#include <asm/regdef.h>
+#include <asm/stackframe.h>
+
+	.macro tlb_do_page_fault, write
+	NESTED(tlb_do_page_fault_\write, PT_SIZE, sp)
+	SAVE_ALL
+	MFC0	a2, CP0_BADVADDR
+	KMODE
+	move	a0, sp
+	REG_S	a2, PT_BVADDR(sp)
+	li	a1, \write
+	jal	do_page_fault
+	j	ret_from_exception
+	END(tlb_do_page_fault_\write)
+	.endm
+
+	tlb_do_page_fault 0
+	tlb_do_page_fault 1
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
new file mode 100644
index 00000000000..87e229f4d3d
--- /dev/null
+++ b/arch/mips/mm/tlbex.c
@@ -0,0 +1,1815 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Synthesize TLB refill handlers at runtime.
+ *
+ * Copyright (C) 2004,2005 by Thiemo Seufer
+ */
+
+#include <stdarg.h>
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/init.h>
+
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+#include <asm/inst.h>
+#include <asm/elf.h>
+#include <asm/smp.h>
+#include <asm/war.h>
+
+/* #define DEBUG_TLB */
+
+static __init int __attribute__((unused)) r45k_bvahwbug(void)
+{
+	/* XXX: We should probe for the presence of this bug, but we don't. */
+	return 0;
+}
+
+static __init int __attribute__((unused)) r4k_250MHZhwbug(void)
+{
+	/* XXX: We should probe for the presence of this bug, but we don't. */
+	return 0;
+}
+
+static __init int __attribute__((unused)) bcm1250_m3_war(void)
+{
+	return BCM1250_M3_WAR;
+}
+
+static __init int __attribute__((unused)) r10000_llsc_war(void)
+{
+	return R10000_LLSC_WAR;
+}
+
+/*
+ * A little micro-assembler, intended for TLB refill handler
+ * synthesizing. It is intentionally kept simple, does only support
+ * a subset of instructions, and does not try to hide pipeline effects
+ * like branch delay slots.
+ */
+
+enum fields
+{
+	RS = 0x001,
+	RT = 0x002,
+	RD = 0x004,
+	RE = 0x008,
+	SIMM = 0x010,
+	UIMM = 0x020,
+	BIMM = 0x040,
+	JIMM = 0x080,
+	FUNC = 0x100,
+};
+
+#define OP_MASK		0x2f
+#define OP_SH		26
+#define RS_MASK		0x1f
+#define RS_SH		21
+#define RT_MASK		0x1f
+#define RT_SH		16
+#define RD_MASK		0x1f
+#define RD_SH		11
+#define RE_MASK		0x1f
+#define RE_SH		6
+#define IMM_MASK	0xffff
+#define IMM_SH		0
+#define JIMM_MASK	0x3ffffff
+#define JIMM_SH		0
+#define FUNC_MASK	0x2f
+#define FUNC_SH		0
+
+enum opcode {
+	insn_invalid,
+	insn_addu, insn_addiu, insn_and, insn_andi, insn_beq,
+	insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl,
+	insn_bne, insn_daddu, insn_daddiu, insn_dmfc0, insn_dmtc0,
+	insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32,
+	insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr, insn_ld,
+	insn_ll, insn_lld, insn_lui, insn_lw, insn_mfc0, insn_mtc0,
+	insn_ori, insn_rfe, insn_sc, insn_scd, insn_sd, insn_sll,
+	insn_sra, insn_srl, insn_subu, insn_sw, insn_tlbp, insn_tlbwi,
+	insn_tlbwr, insn_xor, insn_xori
+};
+
+struct insn {
+	enum opcode opcode;
+	u32 match;
+	enum fields fields;
+};
+
+/* This macro sets the non-variable bits of an instruction. */
+#define M(a, b, c, d, e, f)					\
+	((a) << OP_SH						\
+	 | (b) << RS_SH						\
+	 | (c) << RT_SH						\
+	 | (d) << RD_SH						\
+	 | (e) << RE_SH						\
+	 | (f) << FUNC_SH)
+
+static __initdata struct insn insn_table[] = {
+	{ insn_addiu, M(addiu_op,0,0,0,0,0), RS | RT | SIMM },
+	{ insn_addu, M(spec_op,0,0,0,0,addu_op), RS | RT | RD },
+	{ insn_and, M(spec_op,0,0,0,0,and_op), RS | RT | RD },
+	{ insn_andi, M(andi_op,0,0,0,0,0), RS | RT | UIMM },
+	{ insn_beq, M(beq_op,0,0,0,0,0), RS | RT | BIMM },
+	{ insn_beql, M(beql_op,0,0,0,0,0), RS | RT | BIMM },
+	{ insn_bgez, M(bcond_op,0,bgez_op,0,0,0), RS | BIMM },
+	{ insn_bgezl, M(bcond_op,0,bgezl_op,0,0,0), RS | BIMM },
+	{ insn_bltz, M(bcond_op,0,bltz_op,0,0,0), RS | BIMM },
+	{ insn_bltzl, M(bcond_op,0,bltzl_op,0,0,0), RS | BIMM },
+	{ insn_bne, M(bne_op,0,0,0,0,0), RS | RT | BIMM },
+	{ insn_daddiu, M(daddiu_op,0,0,0,0,0), RS | RT | SIMM },
+	{ insn_daddu, M(spec_op,0,0,0,0,daddu_op), RS | RT | RD },
+	{ insn_dmfc0, M(cop0_op,dmfc_op,0,0,0,0), RT | RD },
+	{ insn_dmtc0, M(cop0_op,dmtc_op,0,0,0,0), RT | RD },
+	{ insn_dsll, M(spec_op,0,0,0,0,dsll_op), RT | RD | RE },
+	{ insn_dsll32, M(spec_op,0,0,0,0,dsll32_op), RT | RD | RE },
+	{ insn_dsra, M(spec_op,0,0,0,0,dsra_op), RT | RD | RE },
+	{ insn_dsrl, M(spec_op,0,0,0,0,dsrl_op), RT | RD | RE },
+	{ insn_dsrl32, M(spec_op,0,0,0,0,dsrl32_op), RT | RD | RE },
+	{ insn_dsubu, M(spec_op,0,0,0,0,dsubu_op), RS | RT | RD },
+	{ insn_eret, M(cop0_op,cop_op,0,0,0,eret_op), 0 },
+	{ insn_j, M(j_op,0,0,0,0,0), JIMM },
+	{ insn_jal, M(jal_op,0,0,0,0,0), JIMM },
+	{ insn_jr, M(spec_op,0,0,0,0,jr_op), RS },
+	{ insn_ld, M(ld_op,0,0,0,0,0), RS | RT | SIMM },
+	{ insn_ll, M(ll_op,0,0,0,0,0), RS | RT | SIMM },
+	{ insn_lld, M(lld_op,0,0,0,0,0), RS | RT | SIMM },
+	{ insn_lui, M(lui_op,0,0,0,0,0), RT | SIMM },
+	{ insn_lw, M(lw_op,0,0,0,0,0), RS | RT | SIMM },
+	{ insn_mfc0, M(cop0_op,mfc_op,0,0,0,0), RT | RD },
+	{ insn_mtc0, M(cop0_op,mtc_op,0,0,0,0), RT | RD },
+	{ insn_ori, M(ori_op,0,0,0,0,0), RS | RT | UIMM },
+	{ insn_rfe, M(cop0_op,cop_op,0,0,0,rfe_op), 0 },
+	{ insn_sc, M(sc_op,0,0,0,0,0), RS | RT | SIMM },
+	{ insn_scd, M(scd_op,0,0,0,0,0), RS | RT | SIMM },
+	{ insn_sd, M(sd_op,0,0,0,0,0), RS | RT | SIMM },
+	{ insn_sll, M(spec_op,0,0,0,0,sll_op), RT | RD | RE },
+	{ insn_sra, M(spec_op,0,0,0,0,sra_op), RT | RD | RE },
+	{ insn_srl, M(spec_op,0,0,0,0,srl_op), RT | RD | RE },
+	{ insn_subu, M(spec_op,0,0,0,0,subu_op), RS | RT | RD },
+	{ insn_sw, M(sw_op,0,0,0,0,0), RS | RT | SIMM },
+	{ insn_tlbp, M(cop0_op,cop_op,0,0,0,tlbp_op), 0 },
+	{ insn_tlbwi, M(cop0_op,cop_op,0,0,0,tlbwi_op), 0 },
+	{ insn_tlbwr, M(cop0_op,cop_op,0,0,0,tlbwr_op), 0 },
+	{ insn_xor, M(spec_op,0,0,0,0,xor_op), RS | RT | RD },
+	{ insn_xori, M(xori_op,0,0,0,0,0), RS | RT | UIMM },
+	{ insn_invalid, 0, 0 }
+};
+
+#undef M
+
+static __init u32 build_rs(u32 arg)
+{
+	if (arg & ~RS_MASK)
+		printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+	return (arg & RS_MASK) << RS_SH;
+}
+
+static __init u32 build_rt(u32 arg)
+{
+	if (arg & ~RT_MASK)
+		printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+	return (arg & RT_MASK) << RT_SH;
+}
+
+static __init u32 build_rd(u32 arg)
+{
+	if (arg & ~RD_MASK)
+		printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+	return (arg & RD_MASK) << RD_SH;
+}
+
+static __init u32 build_re(u32 arg)
+{
+	if (arg & ~RE_MASK)
+		printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+	return (arg & RE_MASK) << RE_SH;
+}
+
+static __init u32 build_simm(s32 arg)
+{
+	if (arg > 0x7fff || arg < -0x8000)
+		printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+	return arg & 0xffff;
+}
+
+static __init u32 build_uimm(u32 arg)
+{
+	if (arg & ~IMM_MASK)
+		printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+	return arg & IMM_MASK;
+}
+
+static __init u32 build_bimm(s32 arg)
+{
+	if (arg > 0x1ffff || arg < -0x20000)
+		printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+	if (arg & 0x3)
+		printk(KERN_WARNING "Invalid TLB synthesizer branch target\n");
+
+	return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 2) & 0x7fff);
+}
+
+static __init u32 build_jimm(u32 arg)
+{
+	if (arg & ~((JIMM_MASK) << 2))
+		printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+	return (arg >> 2) & JIMM_MASK;
+}
+
+static __init u32 build_func(u32 arg)
+{
+	if (arg & ~FUNC_MASK)
+		printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+	return arg & FUNC_MASK;
+}
+
+/*
+ * The order of opcode arguments is implicitly left to right,
+ * starting with RS and ending with FUNC or IMM.
+ */
+static void __init build_insn(u32 **buf, enum opcode opc, ...)
+{
+	struct insn *ip = NULL;
+	unsigned int i;
+	va_list ap;
+	u32 op;
+
+	for (i = 0; insn_table[i].opcode != insn_invalid; i++)
+		if (insn_table[i].opcode == opc) {
+			ip = &insn_table[i];
+			break;
+		}
+
+	if (!ip)
+		panic("Unsupported TLB synthesizer instruction %d", opc);
+
+	op = ip->match;
+	va_start(ap, opc);
+	if (ip->fields & RS) op |= build_rs(va_arg(ap, u32));
+	if (ip->fields & RT) op |= build_rt(va_arg(ap, u32));
+	if (ip->fields & RD) op |= build_rd(va_arg(ap, u32));
+	if (ip->fields & RE) op |= build_re(va_arg(ap, u32));
+	if (ip->fields & SIMM) op |= build_simm(va_arg(ap, s32));
+	if (ip->fields & UIMM) op |= build_uimm(va_arg(ap, u32));
+	if (ip->fields & BIMM) op |= build_bimm(va_arg(ap, s32));
+	if (ip->fields & JIMM) op |= build_jimm(va_arg(ap, u32));
+	if (ip->fields & FUNC) op |= build_func(va_arg(ap, u32));
+	va_end(ap);
+
+	**buf = op;
+	(*buf)++;
+}
+
+#define I_u1u2u3(op)						\
+	static inline void i##op(u32 **buf, unsigned int a,	\
+	 	unsigned int b, unsigned int c)			\
+	{							\
+		build_insn(buf, insn##op, a, b, c);		\
+	}
+
+#define I_u2u1u3(op)						\
+	static inline void i##op(u32 **buf, unsigned int a,	\
+	 	unsigned int b, unsigned int c)			\
+	{							\
+		build_insn(buf, insn##op, b, a, c);		\
+	}
+
+#define I_u3u1u2(op)						\
+	static inline void i##op(u32 **buf, unsigned int a,	\
+	 	unsigned int b, unsigned int c)			\
+	{							\
+		build_insn(buf, insn##op, b, c, a);		\
+	}
+
+#define I_u1u2s3(op)						\
+	static inline void i##op(u32 **buf, unsigned int a,	\
+	 	unsigned int b, signed int c)			\
+	{							\
+		build_insn(buf, insn##op, a, b, c);		\
+	}
+
+#define I_u2s3u1(op)						\
+	static inline void i##op(u32 **buf, unsigned int a,	\
+	 	signed int b, unsigned int c)			\
+	{							\
+		build_insn(buf, insn##op, c, a, b);		\
+	}
+
+#define I_u2u1s3(op)						\
+	static inline void i##op(u32 **buf, unsigned int a,	\
+	 	unsigned int b, signed int c)			\
+	{							\
+		build_insn(buf, insn##op, b, a, c);		\
+	}
+
+#define I_u1u2(op)						\
+	static inline void i##op(u32 **buf, unsigned int a,	\
+	 	unsigned int b)					\
+	{							\
+		build_insn(buf, insn##op, a, b);		\
+	}
+
+#define I_u1s2(op)						\
+	static inline void i##op(u32 **buf, unsigned int a,	\
+	 	signed int b)					\
+	{							\
+		build_insn(buf, insn##op, a, b);		\
+	}
+
+#define I_u1(op)						\
+	static inline void i##op(u32 **buf, unsigned int a)	\
+	{							\
+		build_insn(buf, insn##op, a);			\
+	}
+
+#define I_0(op)							\
+	static inline void i##op(u32 **buf)			\
+	{							\
+		build_insn(buf, insn##op);			\
+	}
+
+I_u2u1s3(_addiu);
+I_u3u1u2(_addu);
+I_u2u1u3(_andi);
+I_u3u1u2(_and);
+I_u1u2s3(_beq);
+I_u1u2s3(_beql);
+I_u1s2(_bgez);
+I_u1s2(_bgezl);
+I_u1s2(_bltz);
+I_u1s2(_bltzl);
+I_u1u2s3(_bne);
+I_u1u2(_dmfc0);
+I_u1u2(_dmtc0);
+I_u2u1s3(_daddiu);
+I_u3u1u2(_daddu);
+I_u2u1u3(_dsll);
+I_u2u1u3(_dsll32);
+I_u2u1u3(_dsra);
+I_u2u1u3(_dsrl);
+I_u2u1u3(_dsrl32);
+I_u3u1u2(_dsubu);
+I_0(_eret);
+I_u1(_j);
+I_u1(_jal);
+I_u1(_jr);
+I_u2s3u1(_ld);
+I_u2s3u1(_ll);
+I_u2s3u1(_lld);
+I_u1s2(_lui);
+I_u2s3u1(_lw);
+I_u1u2(_mfc0);
+I_u1u2(_mtc0);
+I_u2u1u3(_ori);
+I_0(_rfe);
+I_u2s3u1(_sc);
+I_u2s3u1(_scd);
+I_u2s3u1(_sd);
+I_u2u1u3(_sll);
+I_u2u1u3(_sra);
+I_u2u1u3(_srl);
+I_u3u1u2(_subu);
+I_u2s3u1(_sw);
+I_0(_tlbp);
+I_0(_tlbwi);
+I_0(_tlbwr);
+I_u3u1u2(_xor)
+I_u2u1u3(_xori);
+
+/*
+ * handling labels
+ */
+
+enum label_id {
+	label_invalid,
+	label_second_part,
+	label_leave,
+	label_vmalloc,
+	label_vmalloc_done,
+	label_tlbw_hazard,
+	label_split,
+	label_nopage_tlbl,
+	label_nopage_tlbs,
+	label_nopage_tlbm,
+	label_smp_pgtable_change,
+	label_r3000_write_probe_fail,
+	label_r3000_write_probe_ok
+};
+
+struct label {
+	u32 *addr;
+	enum label_id lab;
+};
+
+static __init void build_label(struct label **lab, u32 *addr,
+			       enum label_id l)
+{
+	(*lab)->addr = addr;
+	(*lab)->lab = l;
+	(*lab)++;
+}
+
+#define L_LA(lb)						\
+	static inline void l##lb(struct label **lab, u32 *addr) \
+	{							\
+		build_label(lab, addr, label##lb);		\
+	}
+
+L_LA(_second_part)
+L_LA(_leave)
+L_LA(_vmalloc)
+L_LA(_vmalloc_done)
+L_LA(_tlbw_hazard)
+L_LA(_split)
+L_LA(_nopage_tlbl)
+L_LA(_nopage_tlbs)
+L_LA(_nopage_tlbm)
+L_LA(_smp_pgtable_change)
+L_LA(_r3000_write_probe_fail)
+L_LA(_r3000_write_probe_ok)
+
+/* convenience macros for instructions */
+#ifdef CONFIG_MIPS64
+# define i_LW(buf, rs, rt, off) i_ld(buf, rs, rt, off)
+# define i_SW(buf, rs, rt, off) i_sd(buf, rs, rt, off)
+# define i_SLL(buf, rs, rt, sh) i_dsll(buf, rs, rt, sh)
+# define i_SRA(buf, rs, rt, sh) i_dsra(buf, rs, rt, sh)
+# define i_SRL(buf, rs, rt, sh) i_dsrl(buf, rs, rt, sh)
+# define i_MFC0(buf, rt, rd) i_dmfc0(buf, rt, rd)
+# define i_MTC0(buf, rt, rd) i_dmtc0(buf, rt, rd)
+# define i_ADDIU(buf, rs, rt, val) i_daddiu(buf, rs, rt, val)
+# define i_ADDU(buf, rs, rt, rd) i_daddu(buf, rs, rt, rd)
+# define i_SUBU(buf, rs, rt, rd) i_dsubu(buf, rs, rt, rd)
+# define i_LL(buf, rs, rt, off) i_lld(buf, rs, rt, off)
+# define i_SC(buf, rs, rt, off) i_scd(buf, rs, rt, off)
+#else
+# define i_LW(buf, rs, rt, off) i_lw(buf, rs, rt, off)
+# define i_SW(buf, rs, rt, off) i_sw(buf, rs, rt, off)
+# define i_SLL(buf, rs, rt, sh) i_sll(buf, rs, rt, sh)
+# define i_SRA(buf, rs, rt, sh) i_sra(buf, rs, rt, sh)
+# define i_SRL(buf, rs, rt, sh) i_srl(buf, rs, rt, sh)
+# define i_MFC0(buf, rt, rd) i_mfc0(buf, rt, rd)
+# define i_MTC0(buf, rt, rd) i_mtc0(buf, rt, rd)
+# define i_ADDIU(buf, rs, rt, val) i_addiu(buf, rs, rt, val)
+# define i_ADDU(buf, rs, rt, rd) i_addu(buf, rs, rt, rd)
+# define i_SUBU(buf, rs, rt, rd) i_subu(buf, rs, rt, rd)
+# define i_LL(buf, rs, rt, off) i_ll(buf, rs, rt, off)
+# define i_SC(buf, rs, rt, off) i_sc(buf, rs, rt, off)
+#endif
+
+#define i_b(buf, off) i_beq(buf, 0, 0, off)
+#define i_beqz(buf, rs, off) i_beq(buf, rs, 0, off)
+#define i_beqzl(buf, rs, off) i_beql(buf, rs, 0, off)
+#define i_bnez(buf, rs, off) i_bne(buf, rs, 0, off)
+#define i_bnezl(buf, rs, off) i_bnel(buf, rs, 0, off)
+#define i_move(buf, a, b) i_ADDU(buf, a, 0, b)
+#define i_nop(buf) i_sll(buf, 0, 0, 0)
+#define i_ssnop(buf) i_sll(buf, 0, 0, 1)
+#define i_ehb(buf) i_sll(buf, 0, 0, 3)
+
+#ifdef CONFIG_MIPS64
+static __init int __attribute__((unused)) in_compat_space_p(long addr)
+{
+	/* Is this address in 32bit compat space? */
+	return (((addr) & 0xffffffff00000000) == 0xffffffff00000000);
+}
+
+static __init int __attribute__((unused)) rel_highest(long val)
+{
+	return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000;
+}
+
+static __init int __attribute__((unused)) rel_higher(long val)
+{
+	return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000;
+}
+#endif
+
+static __init int rel_hi(long val)
+{
+	return ((((val + 0x8000L) >> 16) & 0xffff) ^ 0x8000) - 0x8000;
+}
+
+static __init int rel_lo(long val)
+{
+	return ((val & 0xffff) ^ 0x8000) - 0x8000;
+}
+
+static __init void i_LA_mostly(u32 **buf, unsigned int rs, long addr)
+{
+#if CONFIG_MIPS64
+	if (!in_compat_space_p(addr)) {
+		i_lui(buf, rs, rel_highest(addr));
+		if (rel_higher(addr))
+			i_daddiu(buf, rs, rs, rel_higher(addr));
+		if (rel_hi(addr)) {
+			i_dsll(buf, rs, rs, 16);
+			i_daddiu(buf, rs, rs, rel_hi(addr));
+			i_dsll(buf, rs, rs, 16);
+		} else
+			i_dsll32(buf, rs, rs, 0);
+	} else
+#endif
+		i_lui(buf, rs, rel_hi(addr));
+}
+
+static __init void __attribute__((unused)) i_LA(u32 **buf, unsigned int rs,
+						long addr)
+{
+	i_LA_mostly(buf, rs, addr);
+	if (rel_lo(addr))
+		i_ADDIU(buf, rs, rs, rel_lo(addr));
+}
+
+/*
+ * handle relocations
+ */
+
+struct reloc {
+	u32 *addr;
+	unsigned int type;
+	enum label_id lab;
+};
+
+static __init void r_mips_pc16(struct reloc **rel, u32 *addr,
+			       enum label_id l)
+{
+	(*rel)->addr = addr;
+	(*rel)->type = R_MIPS_PC16;
+	(*rel)->lab = l;
+	(*rel)++;
+}
+
+static inline void __resolve_relocs(struct reloc *rel, struct label *lab)
+{
+	long laddr = (long)lab->addr;
+	long raddr = (long)rel->addr;
+
+	switch (rel->type) {
+	case R_MIPS_PC16:
+		*rel->addr |= build_bimm(laddr - (raddr + 4));
+		break;
+
+	default:
+		panic("Unsupported TLB synthesizer relocation %d",
+		      rel->type);
+	}
+}
+
+static __init void resolve_relocs(struct reloc *rel, struct label *lab)
+{
+	struct label *l;
+
+	for (; rel->lab != label_invalid; rel++)
+		for (l = lab; l->lab != label_invalid; l++)
+			if (rel->lab == l->lab)
+				__resolve_relocs(rel, l);
+}
+
+static __init void move_relocs(struct reloc *rel, u32 *first, u32 *end,
+			       long off)
+{
+	for (; rel->lab != label_invalid; rel++)
+		if (rel->addr >= first && rel->addr < end)
+			rel->addr += off;
+}
+
+static __init void move_labels(struct label *lab, u32 *first, u32 *end,
+			       long off)
+{
+	for (; lab->lab != label_invalid; lab++)
+		if (lab->addr >= first && lab->addr < end)
+			lab->addr += off;
+}
+
+static __init void copy_handler(struct reloc *rel, struct label *lab,
+				u32 *first, u32 *end, u32 *target)
+{
+	long off = (long)(target - first);
+
+	memcpy(target, first, (end - first) * sizeof(u32));
+
+	move_relocs(rel, first, end, off);
+	move_labels(lab, first, end, off);
+}
+
+static __init int __attribute__((unused)) insn_has_bdelay(struct reloc *rel,
+							  u32 *addr)
+{
+	for (; rel->lab != label_invalid; rel++) {
+		if (rel->addr == addr
+		    && (rel->type == R_MIPS_PC16
+			|| rel->type == R_MIPS_26))
+			return 1;
+	}
+
+	return 0;
+}
+
+/* convenience functions for labeled branches */
+static void __attribute__((unused)) il_bltz(u32 **p, struct reloc **r,
+					    unsigned int reg, enum label_id l)
+{
+	r_mips_pc16(r, *p, l);
+	i_bltz(p, reg, 0);
+}
+
+static void __attribute__((unused)) il_b(u32 **p, struct reloc **r,
+					 enum label_id l)
+{
+	r_mips_pc16(r, *p, l);
+	i_b(p, 0);
+}
+
+static void il_beqz(u32 **p, struct reloc **r, unsigned int reg,
+		    enum label_id l)
+{
+	r_mips_pc16(r, *p, l);
+	i_beqz(p, reg, 0);
+}
+
+static void __attribute__((unused))
+il_beqzl(u32 **p, struct reloc **r, unsigned int reg, enum label_id l)
+{
+	r_mips_pc16(r, *p, l);
+	i_beqzl(p, reg, 0);
+}
+
+static void il_bnez(u32 **p, struct reloc **r, unsigned int reg,
+		    enum label_id l)
+{
+	r_mips_pc16(r, *p, l);
+	i_bnez(p, reg, 0);
+}
+
+static void il_bgezl(u32 **p, struct reloc **r, unsigned int reg,
+		     enum label_id l)
+{
+	r_mips_pc16(r, *p, l);
+	i_bgezl(p, reg, 0);
+}
+
+/* The only general purpose registers allowed in TLB handlers. */
+#define K0		26
+#define K1		27
+
+/* Some CP0 registers */
+#define C0_INDEX	0
+#define C0_ENTRYLO0	2
+#define C0_ENTRYLO1	3
+#define C0_CONTEXT	4
+#define C0_BADVADDR	8
+#define C0_ENTRYHI	10
+#define C0_EPC		14
+#define C0_XCONTEXT	20
+
+#ifdef CONFIG_MIPS64
+# define GET_CONTEXT(buf, reg) i_MFC0(buf, reg, C0_XCONTEXT)
+#else
+# define GET_CONTEXT(buf, reg) i_MFC0(buf, reg, C0_CONTEXT)
+#endif
+
+/* The worst case length of the handler is around 18 instructions for
+ * R3000-style TLBs and up to 63 instructions for R4000-style TLBs.
+ * Maximum space available is 32 instructions for R3000 and 64
+ * instructions for R4000.
+ *
+ * We deliberately chose a buffer size of 128, so we won't scribble
+ * over anything important on overflow before we panic.
+ */
+static __initdata u32 tlb_handler[128];
+
+/* simply assume worst case size for labels and relocs */
+static __initdata struct label labels[128];
+static __initdata struct reloc relocs[128];
+
+/*
+ * The R3000 TLB handler is simple.
+ */
+static void __init build_r3000_tlb_refill_handler(void)
+{
+	long pgdc = (long)pgd_current;
+	u32 *p;
+
+	memset(tlb_handler, 0, sizeof(tlb_handler));
+	p = tlb_handler;
+
+	i_mfc0(&p, K0, C0_BADVADDR);
+	i_lui(&p, K1, rel_hi(pgdc)); /* cp0 delay */
+	i_lw(&p, K1, rel_lo(pgdc), K1);
+	i_srl(&p, K0, K0, 22); /* load delay */
+	i_sll(&p, K0, K0, 2);
+	i_addu(&p, K1, K1, K0);
+	i_mfc0(&p, K0, C0_CONTEXT);
+	i_lw(&p, K1, 0, K1); /* cp0 delay */
+	i_andi(&p, K0, K0, 0xffc); /* load delay */
+	i_addu(&p, K1, K1, K0);
+	i_lw(&p, K0, 0, K1);
+	i_nop(&p); /* load delay */
+	i_mtc0(&p, K0, C0_ENTRYLO0);
+	i_mfc0(&p, K1, C0_EPC); /* cp0 delay */
+	i_tlbwr(&p); /* cp0 delay */
+	i_jr(&p, K1);
+	i_rfe(&p); /* branch delay */
+
+	if (p > tlb_handler + 32)
+		panic("TLB refill handler space exceeded");
+
+	printk("Synthesized TLB handler (%u instructions).\n",
+	       (unsigned int)(p - tlb_handler));
+#ifdef DEBUG_TLB
+	{
+		int i;
+
+		for (i = 0; i < (p - tlb_handler); i++)
+			printk("%08x\n", tlb_handler[i]);
+	}
+#endif
+
+	memcpy((void *)CAC_BASE, tlb_handler, 0x80);
+	flush_icache_range(CAC_BASE, CAC_BASE + 0x80);
+}
+
+/*
+ * The R4000 TLB handler is much more complicated. We have two
+ * consecutive handler areas with 32 instructions space each.
+ * Since they aren't used at the same time, we can overflow in the
+ * other one.To keep things simple, we first assume linear space,
+ * then we relocate it to the final handler layout as needed.
+ */
+static __initdata u32 final_handler[64];
+
+/*
+ * Hazards
+ *
+ * From the IDT errata for the QED RM5230 (Nevada), processor revision 1.0:
+ * 2. A timing hazard exists for the TLBP instruction.
+ *
+ *      stalling_instruction
+ *      TLBP
+ *
+ * The JTLB is being read for the TLBP throughout the stall generated by the
+ * previous instruction. This is not really correct as the stalling instruction
+ * can modify the address used to access the JTLB.  The failure symptom is that
+ * the TLBP instruction will use an address created for the stalling instruction
+ * and not the address held in C0_ENHI and thus report the wrong results.
+ *
+ * The software work-around is to not allow the instruction preceding the TLBP
+ * to stall - make it an NOP or some other instruction guaranteed not to stall.
+ *
+ * Errata 2 will not be fixed.  This errata is also on the R5000.
+ *
+ * As if we MIPS hackers wouldn't know how to nop pipelines happy ...
+ */
+static __init void __attribute__((unused)) build_tlb_probe_entry(u32 **p)
+{
+	switch (current_cpu_data.cputype) {
+	case CPU_R5000:
+	case CPU_R5000A:
+	case CPU_NEVADA:
+		i_nop(p);
+		i_tlbp(p);
+		break;
+
+	default:
+		i_tlbp(p);
+		break;
+	}
+}
+
+/*
+ * Write random or indexed TLB entry, and care about the hazards from
+ * the preceeding mtc0 and for the following eret.
+ */
+enum tlb_write_entry { tlb_random, tlb_indexed };
+
+static __init void build_tlb_write_entry(u32 **p, struct label **l,
+					 struct reloc **r,
+					 enum tlb_write_entry wmode)
+{
+	void(*tlbw)(u32 **) = NULL;
+
+	switch (wmode) {
+	case tlb_random: tlbw = i_tlbwr; break;
+	case tlb_indexed: tlbw = i_tlbwi; break;
+	}
+
+	switch (current_cpu_data.cputype) {
+	case CPU_R4000PC:
+	case CPU_R4000SC:
+	case CPU_R4000MC:
+	case CPU_R4400PC:
+	case CPU_R4400SC:
+	case CPU_R4400MC:
+		/*
+		 * This branch uses up a mtc0 hazard nop slot and saves
+		 * two nops after the tlbw instruction.
+		 */
+		il_bgezl(p, r, 0, label_tlbw_hazard);
+		tlbw(p);
+		l_tlbw_hazard(l, *p);
+		i_nop(p);
+		break;
+
+	case CPU_R4600:
+	case CPU_R4700:
+	case CPU_R5000:
+	case CPU_R5000A:
+	case CPU_5KC:
+	case CPU_TX49XX:
+	case CPU_AU1000:
+	case CPU_AU1100:
+	case CPU_AU1500:
+	case CPU_AU1550:
+		i_nop(p);
+		tlbw(p);
+		break;
+
+	case CPU_R10000:
+	case CPU_R12000:
+	case CPU_4KC:
+	case CPU_SB1:
+	case CPU_4KSC:
+	case CPU_20KC:
+	case CPU_25KF:
+		tlbw(p);
+		break;
+
+	case CPU_NEVADA:
+		i_nop(p); /* QED specifies 2 nops hazard */
+		/*
+		 * This branch uses up a mtc0 hazard nop slot and saves
+		 * a nop after the tlbw instruction.
+		 */
+		il_bgezl(p, r, 0, label_tlbw_hazard);
+		tlbw(p);
+		l_tlbw_hazard(l, *p);
+		break;
+
+	case CPU_RM7000:
+		i_nop(p);
+		i_nop(p);
+		i_nop(p);
+		i_nop(p);
+		tlbw(p);
+		break;
+
+	case CPU_4KEC:
+	case CPU_24K:
+		i_ehb(p);
+		tlbw(p);
+		break;
+
+	case CPU_RM9000:
+		/*
+		 * When the JTLB is updated by tlbwi or tlbwr, a subsequent
+		 * use of the JTLB for instructions should not occur for 4
+		 * cpu cycles and use for data translations should not occur
+		 * for 3 cpu cycles.
+		 */
+		i_ssnop(p);
+		i_ssnop(p);
+		i_ssnop(p);
+		i_ssnop(p);
+		tlbw(p);
+		i_ssnop(p);
+		i_ssnop(p);
+		i_ssnop(p);
+		i_ssnop(p);
+		break;
+
+	case CPU_VR4111:
+	case CPU_VR4121:
+	case CPU_VR4122:
+	case CPU_VR4181:
+	case CPU_VR4181A:
+		i_nop(p);
+		i_nop(p);
+		tlbw(p);
+		i_nop(p);
+		i_nop(p);
+		break;
+
+	case CPU_VR4131:
+	case CPU_VR4133:
+		i_nop(p);
+		i_nop(p);
+		tlbw(p);
+		break;
+
+	default:
+		panic("No TLB refill handler yet (CPU type: %d)",
+		      current_cpu_data.cputype);
+		break;
+	}
+}
+
+#ifdef CONFIG_MIPS64
+/*
+ * TMP and PTR are scratch.
+ * TMP will be clobbered, PTR will hold the pmd entry.
+ */
+static __init void
+build_get_pmde64(u32 **p, struct label **l, struct reloc **r,
+		 unsigned int tmp, unsigned int ptr)
+{
+	long pgdc = (long)pgd_current;
+
+	/*
+	 * The vmalloc handling is not in the hotpath.
+	 */
+	i_dmfc0(p, tmp, C0_BADVADDR);
+	il_bltz(p, r, tmp, label_vmalloc);
+	/* No i_nop needed here, since the next insn doesn't touch TMP. */
+
+#ifdef CONFIG_SMP
+	/*
+	 * 64 bit SMP has the lower part of &pgd_current[smp_processor_id()]
+	 * stored in CONTEXT.
+	 */
+	if (in_compat_space_p(pgdc)) {
+		i_dmfc0(p, ptr, C0_CONTEXT);
+		i_dsra(p, ptr, ptr, 23);
+		i_ld(p, ptr, 0, ptr);
+	} else {
+#ifdef CONFIG_BUILD_ELF64
+		i_dmfc0(p, ptr, C0_CONTEXT);
+		i_dsrl(p, ptr, ptr, 23);
+		i_dsll(p, ptr, ptr, 3);
+		i_LA_mostly(p, tmp, pgdc);
+		i_daddu(p, ptr, ptr, tmp);
+		i_dmfc0(p, tmp, C0_BADVADDR);
+		i_ld(p, ptr, rel_lo(pgdc), ptr);
+#else
+		i_dmfc0(p, ptr, C0_CONTEXT);
+		i_lui(p, tmp, rel_highest(pgdc));
+		i_dsll(p, ptr, ptr, 9);
+		i_daddiu(p, tmp, tmp, rel_higher(pgdc));
+		i_dsrl32(p, ptr, ptr, 0);
+		i_and(p, ptr, ptr, tmp);
+		i_dmfc0(p, tmp, C0_BADVADDR);
+		i_ld(p, ptr, 0, ptr);
+#endif
+	}
+#else
+	i_LA_mostly(p, ptr, pgdc);
+	i_ld(p, ptr, rel_lo(pgdc), ptr);
+#endif
+
+	l_vmalloc_done(l, *p);
+	i_dsrl(p, tmp, tmp, PGDIR_SHIFT-3); /* get pgd offset in bytes */
+	i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3);
+	i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */
+	i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */
+	i_ld(p, ptr, 0, ptr); /* get pmd pointer */
+	i_dsrl(p, tmp, tmp, PMD_SHIFT-3); /* get pmd offset in bytes */
+	i_andi(p, tmp, tmp, (PTRS_PER_PMD - 1)<<3);
+	i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */
+}
+
+/*
+ * BVADDR is the faulting address, PTR is scratch.
+ * PTR will hold the pgd for vmalloc.
+ */
+static __init void
+build_get_pgd_vmalloc64(u32 **p, struct label **l, struct reloc **r,
+			unsigned int bvaddr, unsigned int ptr)
+{
+	long swpd = (long)swapper_pg_dir;
+
+	l_vmalloc(l, *p);
+	i_LA(p, ptr, VMALLOC_START);
+	i_dsubu(p, bvaddr, bvaddr, ptr);
+
+	if (in_compat_space_p(swpd) && !rel_lo(swpd)) {
+		il_b(p, r, label_vmalloc_done);
+		i_lui(p, ptr, rel_hi(swpd));
+	} else {
+		i_LA_mostly(p, ptr, swpd);
+		il_b(p, r, label_vmalloc_done);
+		i_daddiu(p, ptr, ptr, rel_lo(swpd));
+	}
+}
+
+#else /* !CONFIG_MIPS64 */
+
+/*
+ * TMP and PTR are scratch.
+ * TMP will be clobbered, PTR will hold the pgd entry.
+ */
+static __init void __attribute__((unused))
+build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
+{
+	long pgdc = (long)pgd_current;
+
+	/* 32 bit SMP has smp_processor_id() stored in CONTEXT. */
+#ifdef CONFIG_SMP
+	i_mfc0(p, ptr, C0_CONTEXT);
+	i_LA_mostly(p, tmp, pgdc);
+	i_srl(p, ptr, ptr, 23);
+	i_sll(p, ptr, ptr, 2);
+	i_addu(p, ptr, tmp, ptr);
+#else
+	i_LA_mostly(p, ptr, pgdc);
+#endif
+	i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */
+	i_lw(p, ptr, rel_lo(pgdc), ptr);
+	i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */
+	i_sll(p, tmp, tmp, PGD_T_LOG2);
+	i_addu(p, ptr, ptr, tmp); /* add in pgd offset */
+}
+
+#endif /* !CONFIG_MIPS64 */
+
+static __init void build_adjust_context(u32 **p, unsigned int ctx)
+{
+	unsigned int shift = 4 - (PTE_T_LOG2 + 1);
+	unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1);
+
+	switch (current_cpu_data.cputype) {
+	case CPU_VR41XX:
+	case CPU_VR4111:
+	case CPU_VR4121:
+	case CPU_VR4122:
+	case CPU_VR4131:
+	case CPU_VR4181:
+	case CPU_VR4181A:
+	case CPU_VR4133:
+		shift += 2;
+		break;
+
+	default:
+		break;
+	}
+
+	if (shift)
+		i_SRL(p, ctx, ctx, shift);
+	i_andi(p, ctx, ctx, mask);
+}
+
+static __init void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)
+{
+	/*
+	 * Bug workaround for the Nevada. It seems as if under certain
+	 * circumstances the move from cp0_context might produce a
+	 * bogus result when the mfc0 instruction and its consumer are
+	 * in a different cacheline or a load instruction, probably any
+	 * memory reference, is between them.
+	 */
+	switch (current_cpu_data.cputype) {
+	case CPU_NEVADA:
+		i_LW(p, ptr, 0, ptr);
+		GET_CONTEXT(p, tmp); /* get context reg */
+		break;
+
+	default:
+		GET_CONTEXT(p, tmp); /* get context reg */
+		i_LW(p, ptr, 0, ptr);
+		break;
+	}
+
+	build_adjust_context(p, tmp);
+	i_ADDU(p, ptr, ptr, tmp); /* add in offset */
+}
+
+static __init void build_update_entries(u32 **p, unsigned int tmp,
+					unsigned int ptep)
+{
+	/*
+	 * 64bit address support (36bit on a 32bit CPU) in a 32bit
+	 * Kernel is a special case. Only a few CPUs use it.
+	 */
+#ifdef CONFIG_64BIT_PHYS_ADDR
+	if (cpu_has_64bits) {
+		i_ld(p, tmp, 0, ptep); /* get even pte */
+		i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */
+		i_dsrl(p, tmp, tmp, 6); /* convert to entrylo0 */
+		i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */
+		i_dsrl(p, ptep, ptep, 6); /* convert to entrylo1 */
+		i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */
+	} else {
+		int pte_off_even = sizeof(pte_t) / 2;
+		int pte_off_odd = pte_off_even + sizeof(pte_t);
+
+		/* The pte entries are pre-shifted */
+		i_lw(p, tmp, pte_off_even, ptep); /* get even pte */
+		i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */
+		i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */
+		i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */
+	}
+#else
+	i_LW(p, tmp, 0, ptep); /* get even pte */
+	i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */
+	if (r45k_bvahwbug())
+		build_tlb_probe_entry(p);
+	i_SRL(p, tmp, tmp, 6); /* convert to entrylo0 */
+	if (r4k_250MHZhwbug())
+		i_mtc0(p, 0, C0_ENTRYLO0);
+	i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */
+	i_SRL(p, ptep, ptep, 6); /* convert to entrylo1 */
+	if (r45k_bvahwbug())
+		i_mfc0(p, tmp, C0_INDEX);
+	if (r4k_250MHZhwbug())
+		i_mtc0(p, 0, C0_ENTRYLO1);
+	i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */
+#endif
+}
+
+static void __init build_r4000_tlb_refill_handler(void)
+{
+	u32 *p = tlb_handler;
+	struct label *l = labels;
+	struct reloc *r = relocs;
+	u32 *f;
+	unsigned int final_len;
+
+	memset(tlb_handler, 0, sizeof(tlb_handler));
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+	memset(final_handler, 0, sizeof(final_handler));
+
+	/*
+	 * create the plain linear handler
+	 */
+	if (bcm1250_m3_war()) {
+		i_MFC0(&p, K0, C0_BADVADDR);
+		i_MFC0(&p, K1, C0_ENTRYHI);
+		i_xor(&p, K0, K0, K1);
+		i_SRL(&p, K0, K0, PAGE_SHIFT + 1);
+		il_bnez(&p, &r, K0, label_leave);
+		/* No need for i_nop */
+	}
+
+#ifdef CONFIG_MIPS64
+	build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */
+#else
+	build_get_pgde32(&p, K0, K1); /* get pgd in K1 */
+#endif
+
+	build_get_ptep(&p, K0, K1);
+	build_update_entries(&p, K0, K1);
+	build_tlb_write_entry(&p, &l, &r, tlb_random);
+	l_leave(&l, p);
+	i_eret(&p); /* return from trap */
+
+#ifdef CONFIG_MIPS64
+	build_get_pgd_vmalloc64(&p, &l, &r, K0, K1);
+#endif
+
+	/*
+	 * Overflow check: For the 64bit handler, we need at least one
+	 * free instruction slot for the wrap-around branch. In worst
+	 * case, if the intended insertion point is a delay slot, we
+	 * need three, with the the second nop'ed and the third being
+	 * unused.
+	 */
+#ifdef CONFIG_MIPS32
+	if ((p - tlb_handler) > 64)
+		panic("TLB refill handler space exceeded");
+#else
+	if (((p - tlb_handler) > 63)
+	    || (((p - tlb_handler) > 61)
+		&& insn_has_bdelay(relocs, tlb_handler + 29)))
+		panic("TLB refill handler space exceeded");
+#endif
+
+	/*
+	 * Now fold the handler in the TLB refill handler space.
+	 */
+#ifdef CONFIG_MIPS32
+	f = final_handler;
+	/* Simplest case, just copy the handler. */
+	copy_handler(relocs, labels, tlb_handler, p, f);
+	final_len = p - tlb_handler;
+#else /* CONFIG_MIPS64 */
+	f = final_handler + 32;
+	if ((p - tlb_handler) <= 32) {
+		/* Just copy the handler. */
+		copy_handler(relocs, labels, tlb_handler, p, f);
+		final_len = p - tlb_handler;
+	} else {
+		u32 *split = tlb_handler + 30;
+
+		/*
+		 * Find the split point.
+		 */
+		if (insn_has_bdelay(relocs, split - 1))
+			split--;
+
+		/* Copy first part of the handler. */
+		copy_handler(relocs, labels, tlb_handler, split, f);
+		f += split - tlb_handler;
+
+		/* Insert branch. */
+		l_split(&l, final_handler);
+		il_b(&f, &r, label_split);
+		if (insn_has_bdelay(relocs, split))
+			i_nop(&f);
+		else {
+			copy_handler(relocs, labels, split, split + 1, f);
+			move_labels(labels, f, f + 1, -1);
+			f++;
+			split++;
+		}
+
+		/* Copy the rest of the handler. */
+		copy_handler(relocs, labels, split, p, final_handler);
+		final_len = (f - (final_handler + 32)) + (p - split);
+	}
+#endif /* CONFIG_MIPS64 */
+
+	resolve_relocs(relocs, labels);
+	printk("Synthesized TLB refill handler (%u instructions).\n",
+	       final_len);
+
+#ifdef DEBUG_TLB
+	{
+		int i;
+
+		for (i = 0; i < 64; i++)
+			printk("%08x\n", final_handler[i]);
+	}
+#endif
+
+	memcpy((void *)CAC_BASE, final_handler, 0x100);
+	flush_icache_range(CAC_BASE, CAC_BASE + 0x100);
+}
+
+/*
+ * TLB load/store/modify handlers.
+ *
+ * Only the fastpath gets synthesized at runtime, the slowpath for
+ * do_page_fault remains normal asm.
+ */
+extern void tlb_do_page_fault_0(void);
+extern void tlb_do_page_fault_1(void);
+
+#define __tlb_handler_align \
+	__attribute__((__aligned__(1 << CONFIG_MIPS_L1_CACHE_SHIFT)))
+
+/*
+ * 128 instructions for the fastpath handler is generous and should
+ * never be exceeded.
+ */
+#define FASTPATH_SIZE 128
+
+u32 __tlb_handler_align handle_tlbl[FASTPATH_SIZE];
+u32 __tlb_handler_align handle_tlbs[FASTPATH_SIZE];
+u32 __tlb_handler_align handle_tlbm[FASTPATH_SIZE];
+
+static void __init
+iPTE_LW(u32 **p, struct label **l, unsigned int pte, int offset,
+	unsigned int ptr)
+{
+#ifdef CONFIG_SMP
+# ifdef CONFIG_64BIT_PHYS_ADDR
+	if (cpu_has_64bits)
+		i_lld(p, pte, offset, ptr);
+	else
+# endif
+		i_LL(p, pte, offset, ptr);
+#else
+# ifdef CONFIG_64BIT_PHYS_ADDR
+	if (cpu_has_64bits)
+		i_ld(p, pte, offset, ptr);
+	else
+# endif
+		i_LW(p, pte, offset, ptr);
+#endif
+}
+
+static void __init
+iPTE_SW(u32 **p, struct reloc **r, unsigned int pte, int offset,
+	unsigned int ptr)
+{
+#ifdef CONFIG_SMP
+# ifdef CONFIG_64BIT_PHYS_ADDR
+	if (cpu_has_64bits)
+		i_scd(p, pte, offset, ptr);
+	else
+# endif
+		i_SC(p, pte, offset, ptr);
+
+	if (r10000_llsc_war())
+		il_beqzl(p, r, pte, label_smp_pgtable_change);
+	else
+		il_beqz(p, r, pte, label_smp_pgtable_change);
+
+# ifdef CONFIG_64BIT_PHYS_ADDR
+	if (!cpu_has_64bits) {
+		/* no i_nop needed */
+		i_ll(p, pte, sizeof(pte_t) / 2, ptr);
+		i_ori(p, pte, pte, _PAGE_VALID);
+		i_sc(p, pte, sizeof(pte_t) / 2, ptr);
+		il_beqz(p, r, pte, label_smp_pgtable_change);
+		/* no i_nop needed */
+		i_lw(p, pte, 0, ptr);
+	} else
+		i_nop(p);
+# else
+	i_nop(p);
+# endif
+#else
+# ifdef CONFIG_64BIT_PHYS_ADDR
+	if (cpu_has_64bits)
+		i_sd(p, pte, offset, ptr);
+	else
+# endif
+		i_SW(p, pte, offset, ptr);
+
+# ifdef CONFIG_64BIT_PHYS_ADDR
+	if (!cpu_has_64bits) {
+		i_lw(p, pte, sizeof(pte_t) / 2, ptr);
+		i_ori(p, pte, pte, _PAGE_VALID);
+		i_sw(p, pte, sizeof(pte_t) / 2, ptr);
+		i_lw(p, pte, 0, ptr);
+	}
+# endif
+#endif
+}
+
+/*
+ * Check if PTE is present, if not then jump to LABEL. PTR points to
+ * the page table where this PTE is located, PTE will be re-loaded
+ * with it's original value.
+ */
+static void __init
+build_pte_present(u32 **p, struct label **l, struct reloc **r,
+		  unsigned int pte, unsigned int ptr, enum label_id lid)
+{
+	i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_READ);
+	i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_READ);
+	il_bnez(p, r, pte, lid);
+	iPTE_LW(p, l, pte, 0, ptr);
+}
+
+/* Make PTE valid, store result in PTR. */
+static void __init
+build_make_valid(u32 **p, struct reloc **r, unsigned int pte,
+		 unsigned int ptr)
+{
+	i_ori(p, pte, pte, _PAGE_VALID | _PAGE_ACCESSED);
+	iPTE_SW(p, r, pte, 0, ptr);
+}
+
+/*
+ * Check if PTE can be written to, if not branch to LABEL. Regardless
+ * restore PTE with value from PTR when done.
+ */
+static void __init
+build_pte_writable(u32 **p, struct label **l, struct reloc **r,
+		   unsigned int pte, unsigned int ptr, enum label_id lid)
+{
+	i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE);
+	i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE);
+	il_bnez(p, r, pte, lid);
+	iPTE_LW(p, l, pte, 0, ptr);
+}
+
+/* Make PTE writable, update software status bits as well, then store
+ * at PTR.
+ */
+static void __init
+build_make_write(u32 **p, struct reloc **r, unsigned int pte,
+		 unsigned int ptr)
+{
+	i_ori(p, pte, pte,
+	      _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
+	iPTE_SW(p, r, pte, 0, ptr);
+}
+
+/*
+ * Check if PTE can be modified, if not branch to LABEL. Regardless
+ * restore PTE with value from PTR when done.
+ */
+static void __init
+build_pte_modifiable(u32 **p, struct label **l, struct reloc **r,
+		     unsigned int pte, unsigned int ptr, enum label_id lid)
+{
+	i_andi(p, pte, pte, _PAGE_WRITE);
+	il_beqz(p, r, pte, lid);
+	iPTE_LW(p, l, pte, 0, ptr);
+}
+
+/*
+ * R3000 style TLB load/store/modify handlers.
+ */
+
+/* This places the pte in the page table at PTR into ENTRYLO0. */
+static void __init
+build_r3000_pte_reload(u32 **p, unsigned int ptr)
+{
+	i_lw(p, ptr, 0, ptr);
+	i_nop(p); /* load delay */
+	i_mtc0(p, ptr, C0_ENTRYLO0);
+	i_nop(p); /* cp0 delay */
+}
+
+/*
+ * The index register may have the probe fail bit set,
+ * because we would trap on access kseg2, i.e. without refill.
+ */
+static void __init
+build_r3000_tlb_write(u32 **p, struct label **l, struct reloc **r,
+		      unsigned int tmp)
+{
+	i_mfc0(p, tmp, C0_INDEX);
+	i_nop(p); /* cp0 delay */
+	il_bltz(p, r, tmp, label_r3000_write_probe_fail);
+	i_nop(p); /* branch delay */
+	i_tlbwi(p);
+	il_b(p, r, label_r3000_write_probe_ok);
+	i_nop(p); /* branch delay */
+	l_r3000_write_probe_fail(l, *p);
+	i_tlbwr(p);
+	l_r3000_write_probe_ok(l, *p);
+}
+
+static void __init
+build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte,
+				   unsigned int ptr)
+{
+	long pgdc = (long)pgd_current;
+
+	i_mfc0(p, pte, C0_BADVADDR);
+	i_lui(p, ptr, rel_hi(pgdc)); /* cp0 delay */
+	i_lw(p, ptr, rel_lo(pgdc), ptr);
+	i_srl(p, pte, pte, 22); /* load delay */
+	i_sll(p, pte, pte, 2);
+	i_addu(p, ptr, ptr, pte);
+	i_mfc0(p, pte, C0_CONTEXT);
+	i_lw(p, ptr, 0, ptr); /* cp0 delay */
+	i_andi(p, pte, pte, 0xffc); /* load delay */
+	i_addu(p, ptr, ptr, pte);
+	i_lw(p, pte, 0, ptr);
+	i_nop(p); /* load delay */
+	i_tlbp(p);
+}
+
+static void __init
+build_r3000_tlbchange_handler_tail(u32 **p, unsigned int tmp)
+{
+	i_mfc0(p, tmp, C0_EPC);
+	i_nop(p); /* cp0 delay */
+	i_jr(p, tmp);
+	i_rfe(p); /* branch delay */
+}
+
+static void __init build_r3000_tlb_load_handler(void)
+{
+	u32 *p = handle_tlbl;
+	struct label *l = labels;
+	struct reloc *r = relocs;
+
+	memset(handle_tlbl, 0, sizeof(handle_tlbl));
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	build_r3000_tlbchange_handler_head(&p, K0, K1);
+	build_pte_present(&p, &l, &r, K0, K1, label_nopage_tlbl);
+	build_make_valid(&p, &r, K0, K1);
+	build_r3000_pte_reload(&p, K1);
+	build_r3000_tlb_write(&p, &l, &r, K0);
+	build_r3000_tlbchange_handler_tail(&p, K0);
+
+	l_nopage_tlbl(&l, p);
+	i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff);
+	i_nop(&p);
+
+	if ((p - handle_tlbl) > FASTPATH_SIZE)
+		panic("TLB load handler fastpath space exceeded");
+
+	resolve_relocs(relocs, labels);
+	printk("Synthesized TLB load handler fastpath (%u instructions).\n",
+	       (unsigned int)(p - handle_tlbl));
+
+#ifdef DEBUG_TLB
+	{
+		int i;
+
+		for (i = 0; i < FASTPATH_SIZE; i++)
+			printk("%08x\n", handle_tlbl[i]);
+	}
+#endif
+
+	flush_icache_range((unsigned long)handle_tlbl,
+			   (unsigned long)handle_tlbl + FASTPATH_SIZE * sizeof(u32));
+}
+
+static void __init build_r3000_tlb_store_handler(void)
+{
+	u32 *p = handle_tlbs;
+	struct label *l = labels;
+	struct reloc *r = relocs;
+
+	memset(handle_tlbs, 0, sizeof(handle_tlbs));
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	build_r3000_tlbchange_handler_head(&p, K0, K1);
+	build_pte_writable(&p, &l, &r, K0, K1, label_nopage_tlbs);
+	build_make_write(&p, &r, K0, K1);
+	build_r3000_pte_reload(&p, K1);
+	build_r3000_tlb_write(&p, &l, &r, K0);
+	build_r3000_tlbchange_handler_tail(&p, K0);
+
+	l_nopage_tlbs(&l, p);
+	i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
+	i_nop(&p);
+
+	if ((p - handle_tlbs) > FASTPATH_SIZE)
+		panic("TLB store handler fastpath space exceeded");
+
+	resolve_relocs(relocs, labels);
+	printk("Synthesized TLB store handler fastpath (%u instructions).\n",
+	       (unsigned int)(p - handle_tlbs));
+
+#ifdef DEBUG_TLB
+	{
+		int i;
+
+		for (i = 0; i < FASTPATH_SIZE; i++)
+			printk("%08x\n", handle_tlbs[i]);
+	}
+#endif
+
+	flush_icache_range((unsigned long)handle_tlbs,
+			   (unsigned long)handle_tlbs + FASTPATH_SIZE * sizeof(u32));
+}
+
+static void __init build_r3000_tlb_modify_handler(void)
+{
+	u32 *p = handle_tlbm;
+	struct label *l = labels;
+	struct reloc *r = relocs;
+
+	memset(handle_tlbm, 0, sizeof(handle_tlbm));
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	build_r3000_tlbchange_handler_head(&p, K0, K1);
+	build_pte_modifiable(&p, &l, &r, K0, K1, label_nopage_tlbm);
+	build_make_write(&p, &r, K0, K1);
+	build_r3000_pte_reload(&p, K1);
+	i_tlbwi(&p);
+	build_r3000_tlbchange_handler_tail(&p, K0);
+
+	l_nopage_tlbm(&l, p);
+	i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
+	i_nop(&p);
+
+	if ((p - handle_tlbm) > FASTPATH_SIZE)
+		panic("TLB modify handler fastpath space exceeded");
+
+	resolve_relocs(relocs, labels);
+	printk("Synthesized TLB modify handler fastpath (%u instructions).\n",
+	       (unsigned int)(p - handle_tlbm));
+
+#ifdef DEBUG_TLB
+	{
+		int i;
+
+		for (i = 0; i < FASTPATH_SIZE; i++)
+			printk("%08x\n", handle_tlbm[i]);
+	}
+#endif
+
+	flush_icache_range((unsigned long)handle_tlbm,
+			   (unsigned long)handle_tlbm + FASTPATH_SIZE * sizeof(u32));
+}
+
+/*
+ * R4000 style TLB load/store/modify handlers.
+ */
+static void __init
+build_r4000_tlbchange_handler_head(u32 **p, struct label **l,
+				   struct reloc **r, unsigned int pte,
+				   unsigned int ptr)
+{
+#ifdef CONFIG_MIPS64
+	build_get_pmde64(p, l, r, pte, ptr); /* get pmd in ptr */
+#else
+	build_get_pgde32(p, pte, ptr); /* get pgd in ptr */
+#endif
+
+	i_MFC0(p, pte, C0_BADVADDR);
+	i_LW(p, ptr, 0, ptr);
+	i_SRL(p, pte, pte, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2);
+	i_andi(p, pte, pte, (PTRS_PER_PTE - 1) << PTE_T_LOG2);
+	i_ADDU(p, ptr, ptr, pte);
+
+#ifdef CONFIG_SMP
+	l_smp_pgtable_change(l, *p);
+# endif
+	iPTE_LW(p, l, pte, 0, ptr); /* get even pte */
+	build_tlb_probe_entry(p);
+}
+
+static void __init
+build_r4000_tlbchange_handler_tail(u32 **p, struct label **l,
+				   struct reloc **r, unsigned int tmp,
+				   unsigned int ptr)
+{
+	i_ori(p, ptr, ptr, sizeof(pte_t));
+	i_xori(p, ptr, ptr, sizeof(pte_t));
+	build_update_entries(p, tmp, ptr);
+	build_tlb_write_entry(p, l, r, tlb_indexed);
+	l_leave(l, *p);
+	i_eret(p); /* return from trap */
+
+#ifdef CONFIG_MIPS64
+	build_get_pgd_vmalloc64(p, l, r, tmp, ptr);
+#endif
+}
+
+static void __init build_r4000_tlb_load_handler(void)
+{
+	u32 *p = handle_tlbl;
+	struct label *l = labels;
+	struct reloc *r = relocs;
+
+	memset(handle_tlbl, 0, sizeof(handle_tlbl));
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	if (bcm1250_m3_war()) {
+		i_MFC0(&p, K0, C0_BADVADDR);
+		i_MFC0(&p, K1, C0_ENTRYHI);
+		i_xor(&p, K0, K0, K1);
+		i_SRL(&p, K0, K0, PAGE_SHIFT + 1);
+		il_bnez(&p, &r, K0, label_leave);
+		/* No need for i_nop */
+	}
+
+	build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1);
+	build_pte_present(&p, &l, &r, K0, K1, label_nopage_tlbl);
+	build_make_valid(&p, &r, K0, K1);
+	build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1);
+
+	l_nopage_tlbl(&l, p);
+	i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff);
+	i_nop(&p);
+
+	if ((p - handle_tlbl) > FASTPATH_SIZE)
+		panic("TLB load handler fastpath space exceeded");
+
+	resolve_relocs(relocs, labels);
+	printk("Synthesized TLB load handler fastpath (%u instructions).\n",
+	       (unsigned int)(p - handle_tlbl));
+
+#ifdef DEBUG_TLB
+	{
+		int i;
+
+		for (i = 0; i < FASTPATH_SIZE; i++)
+			printk("%08x\n", handle_tlbl[i]);
+	}
+#endif
+
+	flush_icache_range((unsigned long)handle_tlbl,
+			   (unsigned long)handle_tlbl + FASTPATH_SIZE * sizeof(u32));
+}
+
+static void __init build_r4000_tlb_store_handler(void)
+{
+	u32 *p = handle_tlbs;
+	struct label *l = labels;
+	struct reloc *r = relocs;
+
+	memset(handle_tlbs, 0, sizeof(handle_tlbs));
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1);
+	build_pte_writable(&p, &l, &r, K0, K1, label_nopage_tlbs);
+	build_make_write(&p, &r, K0, K1);
+	build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1);
+
+	l_nopage_tlbs(&l, p);
+	i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
+	i_nop(&p);
+
+	if ((p - handle_tlbs) > FASTPATH_SIZE)
+		panic("TLB store handler fastpath space exceeded");
+
+	resolve_relocs(relocs, labels);
+	printk("Synthesized TLB store handler fastpath (%u instructions).\n",
+	       (unsigned int)(p - handle_tlbs));
+
+#ifdef DEBUG_TLB
+	{
+		int i;
+
+		for (i = 0; i < FASTPATH_SIZE; i++)
+			printk("%08x\n", handle_tlbs[i]);
+	}
+#endif
+
+	flush_icache_range((unsigned long)handle_tlbs,
+			   (unsigned long)handle_tlbs + FASTPATH_SIZE * sizeof(u32));
+}
+
+static void __init build_r4000_tlb_modify_handler(void)
+{
+	u32 *p = handle_tlbm;
+	struct label *l = labels;
+	struct reloc *r = relocs;
+
+	memset(handle_tlbm, 0, sizeof(handle_tlbm));
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1);
+	build_pte_modifiable(&p, &l, &r, K0, K1, label_nopage_tlbm);
+	/* Present and writable bits set, set accessed and dirty bits. */
+	build_make_write(&p, &r, K0, K1);
+	build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1);
+
+	l_nopage_tlbm(&l, p);
+	i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
+	i_nop(&p);
+
+	if ((p - handle_tlbm) > FASTPATH_SIZE)
+		panic("TLB modify handler fastpath space exceeded");
+
+	resolve_relocs(relocs, labels);
+	printk("Synthesized TLB modify handler fastpath (%u instructions).\n",
+	       (unsigned int)(p - handle_tlbm));
+
+#ifdef DEBUG_TLB
+	{
+		int i;
+
+		for (i = 0; i < FASTPATH_SIZE; i++)
+			printk("%08x\n", handle_tlbm[i]);
+	}
+#endif
+
+	flush_icache_range((unsigned long)handle_tlbm,
+			   (unsigned long)handle_tlbm + FASTPATH_SIZE * sizeof(u32));
+}
+
+void __init build_tlb_refill_handler(void)
+{
+	/*
+	 * The refill handler is generated per-CPU, multi-node systems
+	 * may have local storage for it. The other handlers are only
+	 * needed once.
+	 */
+	static int run_once = 0;
+
+	switch (current_cpu_data.cputype) {
+	case CPU_R2000:
+	case CPU_R3000:
+	case CPU_R3000A:
+	case CPU_R3081E:
+	case CPU_TX3912:
+	case CPU_TX3922:
+	case CPU_TX3927:
+		build_r3000_tlb_refill_handler();
+		if (!run_once) {
+			build_r3000_tlb_load_handler();
+			build_r3000_tlb_store_handler();
+			build_r3000_tlb_modify_handler();
+			run_once++;
+		}
+		break;
+
+	case CPU_R6000:
+	case CPU_R6000A:
+		panic("No R6000 TLB refill handler yet");
+		break;
+
+	case CPU_R8000:
+		panic("No R8000 TLB refill handler yet");
+		break;
+
+	default:
+		build_r4000_tlb_refill_handler();
+		if (!run_once) {
+			build_r4000_tlb_load_handler();
+			build_r4000_tlb_store_handler();
+			build_r4000_tlb_modify_handler();
+			run_once++;
+		}
+	}
+}
-- 
cgit v1.2.3