From 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 16 Apr 2005 15:20:36 -0700 Subject: Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! --- arch/mips/mm/Makefile | 44 + arch/mips/mm/c-r3k.c | 349 ++++++++ arch/mips/mm/c-r4k.c | 1260 ++++++++++++++++++++++++++++ arch/mips/mm/c-sb1.c | 558 ++++++++++++ arch/mips/mm/c-tx39.c | 493 +++++++++++ arch/mips/mm/cache.c | 157 ++++ arch/mips/mm/cerr-sb1.c | 543 ++++++++++++ arch/mips/mm/cex-gen.S | 42 + arch/mips/mm/cex-sb1.S | 170 ++++ arch/mips/mm/dma-coherent.c | 255 ++++++ arch/mips/mm/dma-ip27.c | 257 ++++++ arch/mips/mm/dma-ip32.c | 382 +++++++++ arch/mips/mm/dma-noncoherent.c | 400 +++++++++ arch/mips/mm/extable.c | 21 + arch/mips/mm/fault.c | 236 ++++++ arch/mips/mm/highmem.c | 103 +++ arch/mips/mm/init.c | 304 +++++++ arch/mips/mm/ioremap.c | 202 +++++ arch/mips/mm/pg-r4k.c | 489 +++++++++++ arch/mips/mm/pg-sb1.c | 287 +++++++ arch/mips/mm/pgtable-32.c | 97 +++ arch/mips/mm/pgtable-64.c | 58 ++ arch/mips/mm/pgtable.c | 36 + arch/mips/mm/sc-ip22.c | 177 ++++ arch/mips/mm/sc-r5k.c | 108 +++ arch/mips/mm/sc-rm7k.c | 193 +++++ arch/mips/mm/tlb-andes.c | 257 ++++++ arch/mips/mm/tlb-r3k.c | 289 +++++++ arch/mips/mm/tlb-r4k.c | 419 ++++++++++ arch/mips/mm/tlb-r8k.c | 250 ++++++ arch/mips/mm/tlb-sb1.c | 376 +++++++++ arch/mips/mm/tlbex-fault.S | 28 + arch/mips/mm/tlbex.c | 1815 ++++++++++++++++++++++++++++++++++++++++ 33 files changed, 10655 insertions(+) create mode 100644 arch/mips/mm/Makefile create mode 100644 arch/mips/mm/c-r3k.c create mode 100644 arch/mips/mm/c-r4k.c create mode 100644 arch/mips/mm/c-sb1.c create mode 100644 arch/mips/mm/c-tx39.c create mode 100644 arch/mips/mm/cache.c create mode 100644 arch/mips/mm/cerr-sb1.c create mode 100644 arch/mips/mm/cex-gen.S create mode 100644 arch/mips/mm/cex-sb1.S create mode 100644 arch/mips/mm/dma-coherent.c create mode 100644 arch/mips/mm/dma-ip27.c create mode 100644 arch/mips/mm/dma-ip32.c create mode 100644 arch/mips/mm/dma-noncoherent.c create mode 100644 arch/mips/mm/extable.c create mode 100644 arch/mips/mm/fault.c create mode 100644 arch/mips/mm/highmem.c create mode 100644 arch/mips/mm/init.c create mode 100644 arch/mips/mm/ioremap.c create mode 100644 arch/mips/mm/pg-r4k.c create mode 100644 arch/mips/mm/pg-sb1.c create mode 100644 arch/mips/mm/pgtable-32.c create mode 100644 arch/mips/mm/pgtable-64.c create mode 100644 arch/mips/mm/pgtable.c create mode 100644 arch/mips/mm/sc-ip22.c create mode 100644 arch/mips/mm/sc-r5k.c create mode 100644 arch/mips/mm/sc-rm7k.c create mode 100644 arch/mips/mm/tlb-andes.c create mode 100644 arch/mips/mm/tlb-r3k.c create mode 100644 arch/mips/mm/tlb-r4k.c create mode 100644 arch/mips/mm/tlb-r8k.c create mode 100644 arch/mips/mm/tlb-sb1.c create mode 100644 arch/mips/mm/tlbex-fault.S create mode 100644 arch/mips/mm/tlbex.c (limited to 'arch/mips/mm') diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile new file mode 100644 index 00000000000..f61e038b444 --- /dev/null +++ b/arch/mips/mm/Makefile @@ -0,0 +1,44 @@ +# +# Makefile for the Linux/MIPS-specific parts of the memory manager. +# + +obj-y += cache.o extable.o fault.o init.o pgtable.o \ + tlbex.o tlbex-fault.o + +obj-$(CONFIG_MIPS32) += ioremap.o pgtable-32.o +obj-$(CONFIG_MIPS64) += pgtable-64.o +obj-$(CONFIG_HIGHMEM) += highmem.o + +obj-$(CONFIG_CPU_MIPS32) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o +obj-$(CONFIG_CPU_MIPS64) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o +obj-$(CONFIG_CPU_NEVADA) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o +obj-$(CONFIG_CPU_R10000) += c-r4k.o cex-gen.o pg-r4k.o tlb-andes.o +obj-$(CONFIG_CPU_R3000) += c-r3k.o tlb-r3k.o pg-r4k.o +obj-$(CONFIG_CPU_R4300) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o +obj-$(CONFIG_CPU_R4X00) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o +obj-$(CONFIG_CPU_R5000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o +obj-$(CONFIG_CPU_R5432) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o +obj-$(CONFIG_CPU_R8000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r8k.o +obj-$(CONFIG_CPU_RM7000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o +obj-$(CONFIG_CPU_RM9000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o +obj-$(CONFIG_CPU_SB1) += c-sb1.o cerr-sb1.o cex-sb1.o pg-sb1.o \ + tlb-sb1.o +obj-$(CONFIG_CPU_TX39XX) += c-tx39.o pg-r4k.o tlb-r3k.o +obj-$(CONFIG_CPU_TX49XX) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o +obj-$(CONFIG_CPU_VR41XX) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o + +obj-$(CONFIG_IP22_CPU_SCACHE) += sc-ip22.o +obj-$(CONFIG_R5000_CPU_SCACHE) += sc-r5k.o +obj-$(CONFIG_RM7000_CPU_SCACHE) += sc-rm7k.o + +# +# Choose one DMA coherency model +# +ifndef CONFIG_OWN_DMA +obj-$(CONFIG_DMA_COHERENT) += dma-coherent.o +obj-$(CONFIG_DMA_NONCOHERENT) += dma-noncoherent.o +endif +obj-$(CONFIG_DMA_IP27) += dma-ip27.o +obj-$(CONFIG_DMA_IP32) += dma-ip32.o + +EXTRA_AFLAGS := $(CFLAGS) diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c new file mode 100644 index 00000000000..c659f99eb39 --- /dev/null +++ b/arch/mips/mm/c-r3k.c @@ -0,0 +1,349 @@ +/* + * r2300.c: R2000 and R3000 specific mmu/cache code. + * + * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * + * with a lot of changes to make this thing work for R3000s + * Tx39XX R4k style caches added. HK + * Copyright (C) 1998, 1999, 2000 Harald Koerfgen + * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov + * Copyright (C) 2001, 2004 Maciej W. Rozycki + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned long icache_size, dcache_size; /* Size in bytes */ +static unsigned long icache_lsize, dcache_lsize; /* Size in bytes */ + +#undef DEBUG_CACHE + +unsigned long __init r3k_cache_size(unsigned long ca_flags) +{ + unsigned long flags, status, dummy, size; + volatile unsigned long *p; + + p = (volatile unsigned long *) KSEG0; + + flags = read_c0_status(); + + /* isolate cache space */ + write_c0_status((ca_flags|flags)&~ST0_IEC); + + *p = 0xa5a55a5a; + dummy = *p; + status = read_c0_status(); + + if (dummy != 0xa5a55a5a || (status & ST0_CM)) { + size = 0; + } else { + for (size = 128; size <= 0x40000; size <<= 1) + *(p + size) = 0; + *p = -1; + for (size = 128; + (size <= 0x40000) && (*(p + size) == 0); + size <<= 1) + ; + if (size > 0x40000) + size = 0; + } + + write_c0_status(flags); + + return size * sizeof(*p); +} + +unsigned long __init r3k_cache_lsize(unsigned long ca_flags) +{ + unsigned long flags, status, lsize, i; + volatile unsigned long *p; + + p = (volatile unsigned long *) KSEG0; + + flags = read_c0_status(); + + /* isolate cache space */ + write_c0_status((ca_flags|flags)&~ST0_IEC); + + for (i = 0; i < 128; i++) + *(p + i) = 0; + *(volatile unsigned char *)p = 0; + for (lsize = 1; lsize < 128; lsize <<= 1) { + *(p + lsize); + status = read_c0_status(); + if (!(status & ST0_CM)) + break; + } + for (i = 0; i < 128; i += lsize) + *(volatile unsigned char *)(p + i) = 0; + + write_c0_status(flags); + + return lsize * sizeof(*p); +} + +static void __init r3k_probe_cache(void) +{ + dcache_size = r3k_cache_size(ST0_ISC); + if (dcache_size) + dcache_lsize = r3k_cache_lsize(ST0_ISC); + + icache_size = r3k_cache_size(ST0_ISC|ST0_SWC); + if (icache_size) + icache_lsize = r3k_cache_lsize(ST0_ISC|ST0_SWC); +} + +static void r3k_flush_icache_range(unsigned long start, unsigned long end) +{ + unsigned long size, i, flags; + volatile unsigned char *p; + + size = end - start; + if (size > icache_size || KSEGX(start) != KSEG0) { + start = KSEG0; + size = icache_size; + } + p = (char *)start; + + flags = read_c0_status(); + + /* isolate cache space */ + write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC); + + for (i = 0; i < size; i += 0x080) { + asm ( "sb\t$0, 0x000(%0)\n\t" + "sb\t$0, 0x004(%0)\n\t" + "sb\t$0, 0x008(%0)\n\t" + "sb\t$0, 0x00c(%0)\n\t" + "sb\t$0, 0x010(%0)\n\t" + "sb\t$0, 0x014(%0)\n\t" + "sb\t$0, 0x018(%0)\n\t" + "sb\t$0, 0x01c(%0)\n\t" + "sb\t$0, 0x020(%0)\n\t" + "sb\t$0, 0x024(%0)\n\t" + "sb\t$0, 0x028(%0)\n\t" + "sb\t$0, 0x02c(%0)\n\t" + "sb\t$0, 0x030(%0)\n\t" + "sb\t$0, 0x034(%0)\n\t" + "sb\t$0, 0x038(%0)\n\t" + "sb\t$0, 0x03c(%0)\n\t" + "sb\t$0, 0x040(%0)\n\t" + "sb\t$0, 0x044(%0)\n\t" + "sb\t$0, 0x048(%0)\n\t" + "sb\t$0, 0x04c(%0)\n\t" + "sb\t$0, 0x050(%0)\n\t" + "sb\t$0, 0x054(%0)\n\t" + "sb\t$0, 0x058(%0)\n\t" + "sb\t$0, 0x05c(%0)\n\t" + "sb\t$0, 0x060(%0)\n\t" + "sb\t$0, 0x064(%0)\n\t" + "sb\t$0, 0x068(%0)\n\t" + "sb\t$0, 0x06c(%0)\n\t" + "sb\t$0, 0x070(%0)\n\t" + "sb\t$0, 0x074(%0)\n\t" + "sb\t$0, 0x078(%0)\n\t" + "sb\t$0, 0x07c(%0)\n\t" + : : "r" (p) ); + p += 0x080; + } + + write_c0_status(flags); +} + +static void r3k_flush_dcache_range(unsigned long start, unsigned long end) +{ + unsigned long size, i, flags; + volatile unsigned char *p; + + size = end - start; + if (size > dcache_size || KSEGX(start) != KSEG0) { + start = KSEG0; + size = dcache_size; + } + p = (char *)start; + + flags = read_c0_status(); + + /* isolate cache space */ + write_c0_status((ST0_ISC|flags)&~ST0_IEC); + + for (i = 0; i < size; i += 0x080) { + asm ( "sb\t$0, 0x000(%0)\n\t" + "sb\t$0, 0x004(%0)\n\t" + "sb\t$0, 0x008(%0)\n\t" + "sb\t$0, 0x00c(%0)\n\t" + "sb\t$0, 0x010(%0)\n\t" + "sb\t$0, 0x014(%0)\n\t" + "sb\t$0, 0x018(%0)\n\t" + "sb\t$0, 0x01c(%0)\n\t" + "sb\t$0, 0x020(%0)\n\t" + "sb\t$0, 0x024(%0)\n\t" + "sb\t$0, 0x028(%0)\n\t" + "sb\t$0, 0x02c(%0)\n\t" + "sb\t$0, 0x030(%0)\n\t" + "sb\t$0, 0x034(%0)\n\t" + "sb\t$0, 0x038(%0)\n\t" + "sb\t$0, 0x03c(%0)\n\t" + "sb\t$0, 0x040(%0)\n\t" + "sb\t$0, 0x044(%0)\n\t" + "sb\t$0, 0x048(%0)\n\t" + "sb\t$0, 0x04c(%0)\n\t" + "sb\t$0, 0x050(%0)\n\t" + "sb\t$0, 0x054(%0)\n\t" + "sb\t$0, 0x058(%0)\n\t" + "sb\t$0, 0x05c(%0)\n\t" + "sb\t$0, 0x060(%0)\n\t" + "sb\t$0, 0x064(%0)\n\t" + "sb\t$0, 0x068(%0)\n\t" + "sb\t$0, 0x06c(%0)\n\t" + "sb\t$0, 0x070(%0)\n\t" + "sb\t$0, 0x074(%0)\n\t" + "sb\t$0, 0x078(%0)\n\t" + "sb\t$0, 0x07c(%0)\n\t" + : : "r" (p) ); + p += 0x080; + } + + write_c0_status(flags); +} + +static inline unsigned long get_phys_page (unsigned long addr, + struct mm_struct *mm) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + unsigned long physpage; + + pgd = pgd_offset(mm, addr); + pmd = pmd_offset(pgd, addr); + pte = pte_offset(pmd, addr); + + if ((physpage = pte_val(*pte)) & _PAGE_VALID) + return KSEG0ADDR(physpage & PAGE_MASK); + + return 0; +} + +static inline void r3k_flush_cache_all(void) +{ +} + +static inline void r3k___flush_cache_all(void) +{ + r3k_flush_dcache_range(KSEG0, KSEG0 + dcache_size); + r3k_flush_icache_range(KSEG0, KSEG0 + icache_size); +} + +static void r3k_flush_cache_mm(struct mm_struct *mm) +{ +} + +static void r3k_flush_cache_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ +} + +static void r3k_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn) +{ +} + +static void r3k_flush_data_cache_page(unsigned long addr) +{ +} + +static void r3k_flush_icache_page(struct vm_area_struct *vma, struct page *page) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long physpage; + + if (cpu_context(smp_processor_id(), mm) == 0) + return; + + if (!(vma->vm_flags & VM_EXEC)) + return; + +#ifdef DEBUG_CACHE + printk("cpage[%d,%08lx]", cpu_context(smp_processor_id(), mm), page); +#endif + + physpage = (unsigned long) page_address(page); + if (physpage) + r3k_flush_icache_range(physpage, physpage + PAGE_SIZE); +} + +static void r3k_flush_cache_sigtramp(unsigned long addr) +{ + unsigned long flags; + +#ifdef DEBUG_CACHE + printk("csigtramp[%08lx]", addr); +#endif + + flags = read_c0_status(); + + write_c0_status(flags&~ST0_IEC); + + /* Fill the TLB to avoid an exception with caches isolated. */ + asm ( "lw\t$0, 0x000(%0)\n\t" + "lw\t$0, 0x004(%0)\n\t" + : : "r" (addr) ); + + write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC); + + asm ( "sb\t$0, 0x000(%0)\n\t" + "sb\t$0, 0x004(%0)\n\t" + : : "r" (addr) ); + + write_c0_status(flags); +} + +static void r3k_dma_cache_wback_inv(unsigned long start, unsigned long size) +{ + /* Catch bad driver code */ + BUG_ON(size == 0); + + iob(); + r3k_flush_dcache_range(start, start + size); +} + +void __init ld_mmu_r23000(void) +{ + extern void build_clear_page(void); + extern void build_copy_page(void); + + r3k_probe_cache(); + + flush_cache_all = r3k_flush_cache_all; + __flush_cache_all = r3k___flush_cache_all; + flush_cache_mm = r3k_flush_cache_mm; + flush_cache_range = r3k_flush_cache_range; + flush_cache_page = r3k_flush_cache_page; + flush_icache_page = r3k_flush_icache_page; + flush_icache_range = r3k_flush_icache_range; + + flush_cache_sigtramp = r3k_flush_cache_sigtramp; + flush_data_cache_page = r3k_flush_data_cache_page; + + _dma_cache_wback_inv = r3k_dma_cache_wback_inv; + _dma_cache_wback = r3k_dma_cache_wback_inv; + _dma_cache_inv = r3k_dma_cache_wback_inv; + + printk("Primary instruction cache %ldkB, linesize %ld bytes.\n", + icache_size >> 10, icache_lsize); + printk("Primary data cache %ldkB, linesize %ld bytes.\n", + dcache_size >> 10, dcache_lsize); + + build_clear_page(); + build_copy_page(); +} diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c new file mode 100644 index 00000000000..a03ebb2cba6 --- /dev/null +++ b/arch/mips/mm/c-r4k.c @@ -0,0 +1,1260 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Ralf Baechle (ralf@gnu.org) + * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned long icache_size, dcache_size, scache_size; + +/* + * Dummy cache handling routines for machines without boardcaches + */ +static void no_sc_noop(void) {} + +static struct bcache_ops no_sc_ops = { + .bc_enable = (void *)no_sc_noop, + .bc_disable = (void *)no_sc_noop, + .bc_wback_inv = (void *)no_sc_noop, + .bc_inv = (void *)no_sc_noop +}; + +struct bcache_ops *bcops = &no_sc_ops; + +#define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x2010) +#define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x2020) + +#define R4600_HIT_CACHEOP_WAR_IMPL \ +do { \ + if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) \ + *(volatile unsigned long *)CKSEG1; \ + if (R4600_V1_HIT_CACHEOP_WAR) \ + __asm__ __volatile__("nop;nop;nop;nop"); \ +} while (0) + +static void (*r4k_blast_dcache_page)(unsigned long addr); + +static inline void r4k_blast_dcache_page_dc32(unsigned long addr) +{ + R4600_HIT_CACHEOP_WAR_IMPL; + blast_dcache32_page(addr); +} + +static inline void r4k_blast_dcache_page_setup(void) +{ + unsigned long dc_lsize = cpu_dcache_line_size(); + + if (dc_lsize == 16) + r4k_blast_dcache_page = blast_dcache16_page; + else if (dc_lsize == 32) + r4k_blast_dcache_page = r4k_blast_dcache_page_dc32; +} + +static void (* r4k_blast_dcache_page_indexed)(unsigned long addr); + +static inline void r4k_blast_dcache_page_indexed_setup(void) +{ + unsigned long dc_lsize = cpu_dcache_line_size(); + + if (dc_lsize == 16) + r4k_blast_dcache_page_indexed = blast_dcache16_page_indexed; + else if (dc_lsize == 32) + r4k_blast_dcache_page_indexed = blast_dcache32_page_indexed; +} + +static void (* r4k_blast_dcache)(void); + +static inline void r4k_blast_dcache_setup(void) +{ + unsigned long dc_lsize = cpu_dcache_line_size(); + + if (dc_lsize == 16) + r4k_blast_dcache = blast_dcache16; + else if (dc_lsize == 32) + r4k_blast_dcache = blast_dcache32; +} + +/* force code alignment (used for TX49XX_ICACHE_INDEX_INV_WAR) */ +#define JUMP_TO_ALIGN(order) \ + __asm__ __volatile__( \ + "b\t1f\n\t" \ + ".align\t" #order "\n\t" \ + "1:\n\t" \ + ) +#define CACHE32_UNROLL32_ALIGN JUMP_TO_ALIGN(10) /* 32 * 32 = 1024 */ +#define CACHE32_UNROLL32_ALIGN2 JUMP_TO_ALIGN(11) + +static inline void blast_r4600_v1_icache32(void) +{ + unsigned long flags; + + local_irq_save(flags); + blast_icache32(); + local_irq_restore(flags); +} + +static inline void tx49_blast_icache32(void) +{ + unsigned long start = INDEX_BASE; + unsigned long end = start + current_cpu_data.icache.waysize; + unsigned long ws_inc = 1UL << current_cpu_data.icache.waybit; + unsigned long ws_end = current_cpu_data.icache.ways << + current_cpu_data.icache.waybit; + unsigned long ws, addr; + + CACHE32_UNROLL32_ALIGN2; + /* I'm in even chunk. blast odd chunks */ + for (ws = 0; ws < ws_end; ws += ws_inc) + for (addr = start + 0x400; addr < end; addr += 0x400 * 2) + cache32_unroll32(addr|ws,Index_Invalidate_I); + CACHE32_UNROLL32_ALIGN; + /* I'm in odd chunk. blast even chunks */ + for (ws = 0; ws < ws_end; ws += ws_inc) + for (addr = start; addr < end; addr += 0x400 * 2) + cache32_unroll32(addr|ws,Index_Invalidate_I); +} + +static inline void blast_icache32_r4600_v1_page_indexed(unsigned long page) +{ + unsigned long flags; + + local_irq_save(flags); + blast_icache32_page_indexed(page); + local_irq_restore(flags); +} + +static inline void tx49_blast_icache32_page_indexed(unsigned long page) +{ + unsigned long start = page; + unsigned long end = start + PAGE_SIZE; + unsigned long ws_inc = 1UL << current_cpu_data.icache.waybit; + unsigned long ws_end = current_cpu_data.icache.ways << + current_cpu_data.icache.waybit; + unsigned long ws, addr; + + CACHE32_UNROLL32_ALIGN2; + /* I'm in even chunk. blast odd chunks */ + for (ws = 0; ws < ws_end; ws += ws_inc) + for (addr = start + 0x400; addr < end; addr += 0x400 * 2) + cache32_unroll32(addr|ws,Index_Invalidate_I); + CACHE32_UNROLL32_ALIGN; + /* I'm in odd chunk. blast even chunks */ + for (ws = 0; ws < ws_end; ws += ws_inc) + for (addr = start; addr < end; addr += 0x400 * 2) + cache32_unroll32(addr|ws,Index_Invalidate_I); +} + +static void (* r4k_blast_icache_page)(unsigned long addr); + +static inline void r4k_blast_icache_page_setup(void) +{ + unsigned long ic_lsize = cpu_icache_line_size(); + + if (ic_lsize == 16) + r4k_blast_icache_page = blast_icache16_page; + else if (ic_lsize == 32) + r4k_blast_icache_page = blast_icache32_page; + else if (ic_lsize == 64) + r4k_blast_icache_page = blast_icache64_page; +} + + +static void (* r4k_blast_icache_page_indexed)(unsigned long addr); + +static inline void r4k_blast_icache_page_indexed_setup(void) +{ + unsigned long ic_lsize = cpu_icache_line_size(); + + if (ic_lsize == 16) + r4k_blast_icache_page_indexed = blast_icache16_page_indexed; + else if (ic_lsize == 32) { + if (TX49XX_ICACHE_INDEX_INV_WAR) + r4k_blast_icache_page_indexed = + tx49_blast_icache32_page_indexed; + else if (R4600_V1_INDEX_ICACHEOP_WAR && cpu_is_r4600_v1_x()) + r4k_blast_icache_page_indexed = + blast_icache32_r4600_v1_page_indexed; + else + r4k_blast_icache_page_indexed = + blast_icache32_page_indexed; + } else if (ic_lsize == 64) + r4k_blast_icache_page_indexed = blast_icache64_page_indexed; +} + +static void (* r4k_blast_icache)(void); + +static inline void r4k_blast_icache_setup(void) +{ + unsigned long ic_lsize = cpu_icache_line_size(); + + if (ic_lsize == 16) + r4k_blast_icache = blast_icache16; + else if (ic_lsize == 32) { + if (R4600_V1_INDEX_ICACHEOP_WAR && cpu_is_r4600_v1_x()) + r4k_blast_icache = blast_r4600_v1_icache32; + else if (TX49XX_ICACHE_INDEX_INV_WAR) + r4k_blast_icache = tx49_blast_icache32; + else + r4k_blast_icache = blast_icache32; + } else if (ic_lsize == 64) + r4k_blast_icache = blast_icache64; +} + +static void (* r4k_blast_scache_page)(unsigned long addr); + +static inline void r4k_blast_scache_page_setup(void) +{ + unsigned long sc_lsize = cpu_scache_line_size(); + + if (sc_lsize == 16) + r4k_blast_scache_page = blast_scache16_page; + else if (sc_lsize == 32) + r4k_blast_scache_page = blast_scache32_page; + else if (sc_lsize == 64) + r4k_blast_scache_page = blast_scache64_page; + else if (sc_lsize == 128) + r4k_blast_scache_page = blast_scache128_page; +} + +static void (* r4k_blast_scache_page_indexed)(unsigned long addr); + +static inline void r4k_blast_scache_page_indexed_setup(void) +{ + unsigned long sc_lsize = cpu_scache_line_size(); + + if (sc_lsize == 16) + r4k_blast_scache_page_indexed = blast_scache16_page_indexed; + else if (sc_lsize == 32) + r4k_blast_scache_page_indexed = blast_scache32_page_indexed; + else if (sc_lsize == 64) + r4k_blast_scache_page_indexed = blast_scache64_page_indexed; + else if (sc_lsize == 128) + r4k_blast_scache_page_indexed = blast_scache128_page_indexed; +} + +static void (* r4k_blast_scache)(void); + +static inline void r4k_blast_scache_setup(void) +{ + unsigned long sc_lsize = cpu_scache_line_size(); + + if (sc_lsize == 16) + r4k_blast_scache = blast_scache16; + else if (sc_lsize == 32) + r4k_blast_scache = blast_scache32; + else if (sc_lsize == 64) + r4k_blast_scache = blast_scache64; + else if (sc_lsize == 128) + r4k_blast_scache = blast_scache128; +} + +/* + * This is former mm's flush_cache_all() which really should be + * flush_cache_vunmap these days ... + */ +static inline void local_r4k_flush_cache_all(void * args) +{ + r4k_blast_dcache(); + r4k_blast_icache(); +} + +static void r4k_flush_cache_all(void) +{ + if (!cpu_has_dc_aliases) + return; + + on_each_cpu(local_r4k_flush_cache_all, NULL, 1, 1); +} + +static inline void local_r4k___flush_cache_all(void * args) +{ + r4k_blast_dcache(); + r4k_blast_icache(); + + switch (current_cpu_data.cputype) { + case CPU_R4000SC: + case CPU_R4000MC: + case CPU_R4400SC: + case CPU_R4400MC: + case CPU_R10000: + case CPU_R12000: + r4k_blast_scache(); + } +} + +static void r4k___flush_cache_all(void) +{ + on_each_cpu(local_r4k___flush_cache_all, NULL, 1, 1); +} + +static inline void local_r4k_flush_cache_range(void * args) +{ + struct vm_area_struct *vma = args; + int exec; + + if (!(cpu_context(smp_processor_id(), vma->vm_mm))) + return; + + exec = vma->vm_flags & VM_EXEC; + if (cpu_has_dc_aliases || exec) + r4k_blast_dcache(); + if (exec) + r4k_blast_icache(); +} + +static void r4k_flush_cache_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + on_each_cpu(local_r4k_flush_cache_range, vma, 1, 1); +} + +static inline void local_r4k_flush_cache_mm(void * args) +{ + struct mm_struct *mm = args; + + if (!cpu_context(smp_processor_id(), mm)) + return; + + r4k_blast_dcache(); + r4k_blast_icache(); + + /* + * Kludge alert. For obscure reasons R4000SC and R4400SC go nuts if we + * only flush the primary caches but R10000 and R12000 behave sane ... + */ + if (current_cpu_data.cputype == CPU_R4000SC || + current_cpu_data.cputype == CPU_R4000MC || + current_cpu_data.cputype == CPU_R4400SC || + current_cpu_data.cputype == CPU_R4400MC) + r4k_blast_scache(); +} + +static void r4k_flush_cache_mm(struct mm_struct *mm) +{ + if (!cpu_has_dc_aliases) + return; + + on_each_cpu(local_r4k_flush_cache_mm, mm, 1, 1); +} + +struct flush_cache_page_args { + struct vm_area_struct *vma; + unsigned long page; +}; + +static inline void local_r4k_flush_cache_page(void *args) +{ + struct flush_cache_page_args *fcp_args = args; + struct vm_area_struct *vma = fcp_args->vma; + unsigned long page = fcp_args->page; + int exec = vma->vm_flags & VM_EXEC; + struct mm_struct *mm = vma->vm_mm; + pgd_t *pgdp; + pmd_t *pmdp; + pte_t *ptep; + + page &= PAGE_MASK; + pgdp = pgd_offset(mm, page); + pmdp = pmd_offset(pgdp, page); + ptep = pte_offset(pmdp, page); + + /* + * If the page isn't marked valid, the page cannot possibly be + * in the cache. + */ + if (!(pte_val(*ptep) & _PAGE_PRESENT)) + return; + + /* + * Doing flushes for another ASID than the current one is + * too difficult since stupid R4k caches do a TLB translation + * for every cache flush operation. So we do indexed flushes + * in that case, which doesn't overly flush the cache too much. + */ + if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID)) { + if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) { + r4k_blast_dcache_page(page); + if (exec && !cpu_icache_snoops_remote_store) + r4k_blast_scache_page(page); + } + if (exec) + r4k_blast_icache_page(page); + + return; + } + + /* + * Do indexed flush, too much work to get the (possible) TLB refills + * to work correctly. + */ + page = INDEX_BASE + (page & (dcache_size - 1)); + if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) { + r4k_blast_dcache_page_indexed(page); + if (exec && !cpu_icache_snoops_remote_store) + r4k_blast_scache_page_indexed(page); + } + if (exec) { + if (cpu_has_vtag_icache) { + int cpu = smp_processor_id(); + + if (cpu_context(cpu, vma->vm_mm) != 0) + drop_mmu_context(vma->vm_mm, cpu); + } else + r4k_blast_icache_page_indexed(page); + } +} + +static void r4k_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn) +{ + struct flush_cache_page_args args; + + /* + * If ownes no valid ASID yet, cannot possibly have gotten + * this page into the cache. + */ + if (cpu_context(smp_processor_id(), vma->vm_mm) == 0) + return; + + args.vma = vma; + args.page = page; + + on_each_cpu(local_r4k_flush_cache_page, &args, 1, 1); +} + +static inline void local_r4k_flush_data_cache_page(void * addr) +{ + r4k_blast_dcache_page((unsigned long) addr); +} + +static void r4k_flush_data_cache_page(unsigned long addr) +{ + on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr, 1, 1); +} + +struct flush_icache_range_args { + unsigned long start; + unsigned long end; +}; + +static inline void local_r4k_flush_icache_range(void *args) +{ + struct flush_icache_range_args *fir_args = args; + unsigned long dc_lsize = current_cpu_data.dcache.linesz; + unsigned long ic_lsize = current_cpu_data.icache.linesz; + unsigned long sc_lsize = current_cpu_data.scache.linesz; + unsigned long start = fir_args->start; + unsigned long end = fir_args->end; + unsigned long addr, aend; + + if (!cpu_has_ic_fills_f_dc) { + if (end - start > dcache_size) { + r4k_blast_dcache(); + } else { + addr = start & ~(dc_lsize - 1); + aend = (end - 1) & ~(dc_lsize - 1); + + while (1) { + /* Hit_Writeback_Inv_D */ + protected_writeback_dcache_line(addr); + if (addr == aend) + break; + addr += dc_lsize; + } + } + + if (!cpu_icache_snoops_remote_store) { + if (end - start > scache_size) { + r4k_blast_scache(); + } else { + addr = start & ~(sc_lsize - 1); + aend = (end - 1) & ~(sc_lsize - 1); + + while (1) { + /* Hit_Writeback_Inv_D */ + protected_writeback_scache_line(addr); + if (addr == aend) + break; + addr += sc_lsize; + } + } + } + } + + if (end - start > icache_size) + r4k_blast_icache(); + else { + addr = start & ~(ic_lsize - 1); + aend = (end - 1) & ~(ic_lsize - 1); + while (1) { + /* Hit_Invalidate_I */ + protected_flush_icache_line(addr); + if (addr == aend) + break; + addr += ic_lsize; + } + } +} + +static void r4k_flush_icache_range(unsigned long start, unsigned long end) +{ + struct flush_icache_range_args args; + + args.start = start; + args.end = end; + + on_each_cpu(local_r4k_flush_icache_range, &args, 1, 1); +} + +/* + * Ok, this seriously sucks. We use them to flush a user page but don't + * know the virtual address, so we have to blast away the whole icache + * which is significantly more expensive than the real thing. Otoh we at + * least know the kernel address of the page so we can flush it + * selectivly. + */ + +struct flush_icache_page_args { + struct vm_area_struct *vma; + struct page *page; +}; + +static inline void local_r4k_flush_icache_page(void *args) +{ + struct flush_icache_page_args *fip_args = args; + struct vm_area_struct *vma = fip_args->vma; + struct page *page = fip_args->page; + + /* + * Tricky ... Because we don't know the virtual address we've got the + * choice of either invalidating the entire primary and secondary + * caches or invalidating the secondary caches also. With the subset + * enforcment on R4000SC, R4400SC, R10000 and R12000 invalidating the + * secondary cache will result in any entries in the primary caches + * also getting invalidated which hopefully is a bit more economical. + */ + if (cpu_has_subset_pcaches) { + unsigned long addr = (unsigned long) page_address(page); + + r4k_blast_scache_page(addr); + ClearPageDcacheDirty(page); + + return; + } + + if (!cpu_has_ic_fills_f_dc) { + unsigned long addr = (unsigned long) page_address(page); + r4k_blast_dcache_page(addr); + if (!cpu_icache_snoops_remote_store) + r4k_blast_scache_page(addr); + ClearPageDcacheDirty(page); + } + + /* + * We're not sure of the virtual address(es) involved here, so + * we have to flush the entire I-cache. + */ + if (cpu_has_vtag_icache) { + int cpu = smp_processor_id(); + + if (cpu_context(cpu, vma->vm_mm) != 0) + drop_mmu_context(vma->vm_mm, cpu); + } else + r4k_blast_icache(); +} + +static void r4k_flush_icache_page(struct vm_area_struct *vma, + struct page *page) +{ + struct flush_icache_page_args args; + + /* + * If there's no context yet, or the page isn't executable, no I-cache + * flush is needed. + */ + if (!(vma->vm_flags & VM_EXEC)) + return; + + args.vma = vma; + args.page = page; + + on_each_cpu(local_r4k_flush_icache_page, &args, 1, 1); +} + + +#ifdef CONFIG_DMA_NONCOHERENT + +static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) +{ + unsigned long end, a; + + /* Catch bad driver code */ + BUG_ON(size == 0); + + if (cpu_has_subset_pcaches) { + unsigned long sc_lsize = current_cpu_data.scache.linesz; + + if (size >= scache_size) { + r4k_blast_scache(); + return; + } + + a = addr & ~(sc_lsize - 1); + end = (addr + size - 1) & ~(sc_lsize - 1); + while (1) { + flush_scache_line(a); /* Hit_Writeback_Inv_SD */ + if (a == end) + break; + a += sc_lsize; + } + return; + } + + /* + * Either no secondary cache or the available caches don't have the + * subset property so we have to flush the primary caches + * explicitly + */ + if (size >= dcache_size) { + r4k_blast_dcache(); + } else { + unsigned long dc_lsize = current_cpu_data.dcache.linesz; + + R4600_HIT_CACHEOP_WAR_IMPL; + a = addr & ~(dc_lsize - 1); + end = (addr + size - 1) & ~(dc_lsize - 1); + while (1) { + flush_dcache_line(a); /* Hit_Writeback_Inv_D */ + if (a == end) + break; + a += dc_lsize; + } + } + + bc_wback_inv(addr, size); +} + +static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) +{ + unsigned long end, a; + + /* Catch bad driver code */ + BUG_ON(size == 0); + + if (cpu_has_subset_pcaches) { + unsigned long sc_lsize = current_cpu_data.scache.linesz; + + if (size >= scache_size) { + r4k_blast_scache(); + return; + } + + a = addr & ~(sc_lsize - 1); + end = (addr + size - 1) & ~(sc_lsize - 1); + while (1) { + flush_scache_line(a); /* Hit_Writeback_Inv_SD */ + if (a == end) + break; + a += sc_lsize; + } + return; + } + + if (size >= dcache_size) { + r4k_blast_dcache(); + } else { + unsigned long dc_lsize = current_cpu_data.dcache.linesz; + + R4600_HIT_CACHEOP_WAR_IMPL; + a = addr & ~(dc_lsize - 1); + end = (addr + size - 1) & ~(dc_lsize - 1); + while (1) { + flush_dcache_line(a); /* Hit_Writeback_Inv_D */ + if (a == end) + break; + a += dc_lsize; + } + } + + bc_inv(addr, size); +} +#endif /* CONFIG_DMA_NONCOHERENT */ + +/* + * While we're protected against bad userland addresses we don't care + * very much about what happens in that case. Usually a segmentation + * fault will dump the process later on anyway ... + */ +static void local_r4k_flush_cache_sigtramp(void * arg) +{ + unsigned long ic_lsize = current_cpu_data.icache.linesz; + unsigned long dc_lsize = current_cpu_data.dcache.linesz; + unsigned long sc_lsize = current_cpu_data.scache.linesz; + unsigned long addr = (unsigned long) arg; + + R4600_HIT_CACHEOP_WAR_IMPL; + protected_writeback_dcache_line(addr & ~(dc_lsize - 1)); + if (!cpu_icache_snoops_remote_store) + protected_writeback_scache_line(addr & ~(sc_lsize - 1)); + protected_flush_icache_line(addr & ~(ic_lsize - 1)); + if (MIPS4K_ICACHE_REFILL_WAR) { + __asm__ __volatile__ ( + ".set push\n\t" + ".set noat\n\t" + ".set mips3\n\t" +#ifdef CONFIG_MIPS32 + "la $at,1f\n\t" +#endif +#ifdef CONFIG_MIPS64 + "dla $at,1f\n\t" +#endif + "cache %0,($at)\n\t" + "nop; nop; nop\n" + "1:\n\t" + ".set pop" + : + : "i" (Hit_Invalidate_I)); + } + if (MIPS_CACHE_SYNC_WAR) + __asm__ __volatile__ ("sync"); +} + +static void r4k_flush_cache_sigtramp(unsigned long addr) +{ + on_each_cpu(local_r4k_flush_cache_sigtramp, (void *) addr, 1, 1); +} + +static void r4k_flush_icache_all(void) +{ + if (cpu_has_vtag_icache) + r4k_blast_icache(); +} + +static inline void rm7k_erratum31(void) +{ + const unsigned long ic_lsize = 32; + unsigned long addr; + + /* RM7000 erratum #31. The icache is screwed at startup. */ + write_c0_taglo(0); + write_c0_taghi(0); + + for (addr = INDEX_BASE; addr <= INDEX_BASE + 4096; addr += ic_lsize) { + __asm__ __volatile__ ( + ".set noreorder\n\t" + ".set mips3\n\t" + "cache\t%1, 0(%0)\n\t" + "cache\t%1, 0x1000(%0)\n\t" + "cache\t%1, 0x2000(%0)\n\t" + "cache\t%1, 0x3000(%0)\n\t" + "cache\t%2, 0(%0)\n\t" + "cache\t%2, 0x1000(%0)\n\t" + "cache\t%2, 0x2000(%0)\n\t" + "cache\t%2, 0x3000(%0)\n\t" + "cache\t%1, 0(%0)\n\t" + "cache\t%1, 0x1000(%0)\n\t" + "cache\t%1, 0x2000(%0)\n\t" + "cache\t%1, 0x3000(%0)\n\t" + ".set\tmips0\n\t" + ".set\treorder\n\t" + : + : "r" (addr), "i" (Index_Store_Tag_I), "i" (Fill)); + } +} + +static char *way_string[] __initdata = { NULL, "direct mapped", "2-way", + "3-way", "4-way", "5-way", "6-way", "7-way", "8-way" +}; + +static void __init probe_pcache(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned int config = read_c0_config(); + unsigned int prid = read_c0_prid(); + unsigned long config1; + unsigned int lsize; + + switch (c->cputype) { + case CPU_R4600: /* QED style two way caches? */ + case CPU_R4700: + case CPU_R5000: + case CPU_NEVADA: + icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); + c->icache.linesz = 16 << ((config & CONF_IB) >> 5); + c->icache.ways = 2; + c->icache.waybit = ffs(icache_size/2) - 1; + + dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); + c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); + c->dcache.ways = 2; + c->dcache.waybit= ffs(dcache_size/2) - 1; + + c->options |= MIPS_CPU_CACHE_CDEX_P; + break; + + case CPU_R5432: + case CPU_R5500: + icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); + c->icache.linesz = 16 << ((config & CONF_IB) >> 5); + c->icache.ways = 2; + c->icache.waybit= 0; + + dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); + c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); + c->dcache.ways = 2; + c->dcache.waybit = 0; + + c->options |= MIPS_CPU_CACHE_CDEX_P; + break; + + case CPU_TX49XX: + icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); + c->icache.linesz = 16 << ((config & CONF_IB) >> 5); + c->icache.ways = 4; + c->icache.waybit= 0; + + dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); + c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); + c->dcache.ways = 4; + c->dcache.waybit = 0; + + c->options |= MIPS_CPU_CACHE_CDEX_P; + break; + + case CPU_R4000PC: + case CPU_R4000SC: + case CPU_R4000MC: + case CPU_R4400PC: + case CPU_R4400SC: + case CPU_R4400MC: + case CPU_R4300: + icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); + c->icache.linesz = 16 << ((config & CONF_IB) >> 5); + c->icache.ways = 1; + c->icache.waybit = 0; /* doesn't matter */ + + dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); + c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); + c->dcache.ways = 1; + c->dcache.waybit = 0; /* does not matter */ + + c->options |= MIPS_CPU_CACHE_CDEX_P; + break; + + case CPU_R10000: + case CPU_R12000: + icache_size = 1 << (12 + ((config & R10K_CONF_IC) >> 29)); + c->icache.linesz = 64; + c->icache.ways = 2; + c->icache.waybit = 0; + + dcache_size = 1 << (12 + ((config & R10K_CONF_DC) >> 26)); + c->dcache.linesz = 32; + c->dcache.ways = 2; + c->dcache.waybit = 0; + + c->options |= MIPS_CPU_PREFETCH; + break; + + case CPU_VR4133: + write_c0_config(config & ~CONF_EB); + case CPU_VR4131: + /* Workaround for cache instruction bug of VR4131 */ + if (c->processor_id == 0x0c80U || c->processor_id == 0x0c81U || + c->processor_id == 0x0c82U) { + config &= ~0x00000030U; + config |= 0x00410000U; + write_c0_config(config); + } + icache_size = 1 << (10 + ((config & CONF_IC) >> 9)); + c->icache.linesz = 16 << ((config & CONF_IB) >> 5); + c->icache.ways = 2; + c->icache.waybit = ffs(icache_size/2) - 1; + + dcache_size = 1 << (10 + ((config & CONF_DC) >> 6)); + c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); + c->dcache.ways = 2; + c->dcache.waybit = ffs(dcache_size/2) - 1; + + c->options |= MIPS_CPU_CACHE_CDEX_P; + break; + + case CPU_VR41XX: + case CPU_VR4111: + case CPU_VR4121: + case CPU_VR4122: + case CPU_VR4181: + case CPU_VR4181A: + icache_size = 1 << (10 + ((config & CONF_IC) >> 9)); + c->icache.linesz = 16 << ((config & CONF_IB) >> 5); + c->icache.ways = 1; + c->icache.waybit = 0; /* doesn't matter */ + + dcache_size = 1 << (10 + ((config & CONF_DC) >> 6)); + c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); + c->dcache.ways = 1; + c->dcache.waybit = 0; /* does not matter */ + + c->options |= MIPS_CPU_CACHE_CDEX_P; + break; + + case CPU_RM7000: + rm7k_erratum31(); + + case CPU_RM9000: + icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); + c->icache.linesz = 16 << ((config & CONF_IB) >> 5); + c->icache.ways = 4; + c->icache.waybit = ffs(icache_size / c->icache.ways) - 1; + + dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); + c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); + c->dcache.ways = 4; + c->dcache.waybit = ffs(dcache_size / c->dcache.ways) - 1; + +#if !defined(CONFIG_SMP) || !defined(RM9000_CDEX_SMP_WAR) + c->options |= MIPS_CPU_CACHE_CDEX_P; +#endif + c->options |= MIPS_CPU_PREFETCH; + break; + + default: + if (!(config & MIPS_CONF_M)) + panic("Don't know how to probe P-caches on this cpu."); + + /* + * So we seem to be a MIPS32 or MIPS64 CPU + * So let's probe the I-cache ... + */ + config1 = read_c0_config1(); + + if ((lsize = ((config1 >> 19) & 7))) + c->icache.linesz = 2 << lsize; + else + c->icache.linesz = lsize; + c->icache.sets = 64 << ((config1 >> 22) & 7); + c->icache.ways = 1 + ((config1 >> 16) & 7); + + icache_size = c->icache.sets * + c->icache.ways * + c->icache.linesz; + c->icache.waybit = ffs(icache_size/c->icache.ways) - 1; + + if (config & 0x8) /* VI bit */ + c->icache.flags |= MIPS_CACHE_VTAG; + + /* + * Now probe the MIPS32 / MIPS64 data cache. + */ + c->dcache.flags = 0; + + if ((lsize = ((config1 >> 10) & 7))) + c->dcache.linesz = 2 << lsize; + else + c->dcache.linesz= lsize; + c->dcache.sets = 64 << ((config1 >> 13) & 7); + c->dcache.ways = 1 + ((config1 >> 7) & 7); + + dcache_size = c->dcache.sets * + c->dcache.ways * + c->dcache.linesz; + c->dcache.waybit = ffs(dcache_size/c->dcache.ways) - 1; + + c->options |= MIPS_CPU_PREFETCH; + break; + } + + /* + * Processor configuration sanity check for the R4000SC erratum + * #5. With page sizes larger than 32kB there is no possibility + * to get a VCE exception anymore so we don't care about this + * misconfiguration. The case is rather theoretical anyway; + * presumably no vendor is shipping his hardware in the "bad" + * configuration. + */ + if ((prid & 0xff00) == PRID_IMP_R4000 && (prid & 0xff) < 0x40 && + !(config & CONF_SC) && c->icache.linesz != 16 && + PAGE_SIZE <= 0x8000) + panic("Improper R4000SC processor configuration detected"); + + /* compute a couple of other cache variables */ + c->icache.waysize = icache_size / c->icache.ways; + c->dcache.waysize = dcache_size / c->dcache.ways; + + c->icache.sets = icache_size / (c->icache.linesz * c->icache.ways); + c->dcache.sets = dcache_size / (c->dcache.linesz * c->dcache.ways); + + /* + * R10000 and R12000 P-caches are odd in a positive way. They're 32kB + * 2-way virtually indexed so normally would suffer from aliases. So + * normally they'd suffer from aliases but magic in the hardware deals + * with that for us so we don't need to take care ourselves. + */ + if (c->cputype != CPU_R10000 && c->cputype != CPU_R12000) + if (c->dcache.waysize > PAGE_SIZE) + c->dcache.flags |= MIPS_CACHE_ALIASES; + + switch (c->cputype) { + case CPU_20KC: + /* + * Some older 20Kc chips doesn't have the 'VI' bit in + * the config register. + */ + c->icache.flags |= MIPS_CACHE_VTAG; + break; + + case CPU_AU1500: + c->icache.flags |= MIPS_CACHE_IC_F_DC; + break; + } + + printk("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n", + icache_size >> 10, + cpu_has_vtag_icache ? "virtually tagged" : "physically tagged", + way_string[c->icache.ways], c->icache.linesz); + + printk("Primary data cache %ldkB, %s, linesize %d bytes.\n", + dcache_size >> 10, way_string[c->dcache.ways], c->dcache.linesz); +} + +/* + * If you even _breathe_ on this function, look at the gcc output and make sure + * it does not pop things on and off the stack for the cache sizing loop that + * executes in KSEG1 space or else you will crash and burn badly. You have + * been warned. + */ +static int __init probe_scache(void) +{ + extern unsigned long stext; + unsigned long flags, addr, begin, end, pow2; + unsigned int config = read_c0_config(); + struct cpuinfo_mips *c = ¤t_cpu_data; + int tmp; + + if (config & CONF_SC) + return 0; + + begin = (unsigned long) &stext; + begin &= ~((4 * 1024 * 1024) - 1); + end = begin + (4 * 1024 * 1024); + + /* + * This is such a bitch, you'd think they would make it easy to do + * this. Away you daemons of stupidity! + */ + local_irq_save(flags); + + /* Fill each size-multiple cache line with a valid tag. */ + pow2 = (64 * 1024); + for (addr = begin; addr < end; addr = (begin + pow2)) { + unsigned long *p = (unsigned long *) addr; + __asm__ __volatile__("nop" : : "r" (*p)); /* whee... */ + pow2 <<= 1; + } + + /* Load first line with zero (therefore invalid) tag. */ + write_c0_taglo(0); + write_c0_taghi(0); + __asm__ __volatile__("nop; nop; nop; nop;"); /* avoid the hazard */ + cache_op(Index_Store_Tag_I, begin); + cache_op(Index_Store_Tag_D, begin); + cache_op(Index_Store_Tag_SD, begin); + + /* Now search for the wrap around point. */ + pow2 = (128 * 1024); + tmp = 0; + for (addr = begin + (128 * 1024); addr < end; addr = begin + pow2) { + cache_op(Index_Load_Tag_SD, addr); + __asm__ __volatile__("nop; nop; nop; nop;"); /* hazard... */ + if (!read_c0_taglo()) + break; + pow2 <<= 1; + } + local_irq_restore(flags); + addr -= begin; + + scache_size = addr; + c->scache.linesz = 16 << ((config & R4K_CONF_SB) >> 22); + c->scache.ways = 1; + c->dcache.waybit = 0; /* does not matter */ + + return 1; +} + +typedef int (*probe_func_t)(unsigned long); +extern int r5k_sc_init(void); +extern int rm7k_sc_init(void); + +static void __init setup_scache(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned int config = read_c0_config(); + probe_func_t probe_scache_kseg1; + int sc_present = 0; + + /* + * Do the probing thing on R4000SC and R4400SC processors. Other + * processors don't have a S-cache that would be relevant to the + * Linux memory managment. + */ + switch (c->cputype) { + case CPU_R4000SC: + case CPU_R4000MC: + case CPU_R4400SC: + case CPU_R4400MC: + probe_scache_kseg1 = (probe_func_t) (CKSEG1ADDR(&probe_scache)); + sc_present = probe_scache_kseg1(config); + if (sc_present) + c->options |= MIPS_CPU_CACHE_CDEX_S; + break; + + case CPU_R10000: + case CPU_R12000: + scache_size = 0x80000 << ((config & R10K_CONF_SS) >> 16); + c->scache.linesz = 64 << ((config >> 13) & 1); + c->scache.ways = 2; + c->scache.waybit= 0; + sc_present = 1; + break; + + case CPU_R5000: + case CPU_NEVADA: +#ifdef CONFIG_R5000_CPU_SCACHE + r5k_sc_init(); +#endif + return; + + case CPU_RM7000: + case CPU_RM9000: +#ifdef CONFIG_RM7000_CPU_SCACHE + rm7k_sc_init(); +#endif + return; + + default: + sc_present = 0; + } + + if (!sc_present) + return; + + if ((c->isa_level == MIPS_CPU_ISA_M32 || + c->isa_level == MIPS_CPU_ISA_M64) && + !(c->scache.flags & MIPS_CACHE_NOT_PRESENT)) + panic("Dunno how to handle MIPS32 / MIPS64 second level cache"); + + /* compute a couple of other cache variables */ + c->scache.waysize = scache_size / c->scache.ways; + + c->scache.sets = scache_size / (c->scache.linesz * c->scache.ways); + + printk("Unified secondary cache %ldkB %s, linesize %d bytes.\n", + scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); + + c->options |= MIPS_CPU_SUBSET_CACHES; +} + +static inline void coherency_setup(void) +{ + change_c0_config(CONF_CM_CMASK, CONF_CM_DEFAULT); + + /* + * c0_status.cu=0 specifies that updates by the sc instruction use + * the coherency mode specified by the TLB; 1 means cachable + * coherent update on write will be used. Not all processors have + * this bit and; some wire it to zero, others like Toshiba had the + * silly idea of putting something else there ... + */ + switch (current_cpu_data.cputype) { + case CPU_R4000PC: + case CPU_R4000SC: + case CPU_R4000MC: + case CPU_R4400PC: + case CPU_R4400SC: + case CPU_R4400MC: + clear_c0_config(CONF_CU); + break; + } +} + +void __init ld_mmu_r4xx0(void) +{ + extern void build_clear_page(void); + extern void build_copy_page(void); + extern char except_vec2_generic; + struct cpuinfo_mips *c = ¤t_cpu_data; + + /* Default cache error handler for R4000 and R5000 family */ + memcpy((void *)(CAC_BASE + 0x100), &except_vec2_generic, 0x80); + memcpy((void *)(UNCAC_BASE + 0x100), &except_vec2_generic, 0x80); + + probe_pcache(); + setup_scache(); + + if (c->dcache.sets * c->dcache.ways > PAGE_SIZE) + c->dcache.flags |= MIPS_CACHE_ALIASES; + + r4k_blast_dcache_page_setup(); + r4k_blast_dcache_page_indexed_setup(); + r4k_blast_dcache_setup(); + r4k_blast_icache_page_setup(); + r4k_blast_icache_page_indexed_setup(); + r4k_blast_icache_setup(); + r4k_blast_scache_page_setup(); + r4k_blast_scache_page_indexed_setup(); + r4k_blast_scache_setup(); + + /* + * Some MIPS32 and MIPS64 processors have physically indexed caches. + * This code supports virtually indexed processors and will be + * unnecessarily inefficient on physically indexed processors. + */ + shm_align_mask = max_t( unsigned long, + c->dcache.sets * c->dcache.linesz - 1, + PAGE_SIZE - 1); + + flush_cache_all = r4k_flush_cache_all; + __flush_cache_all = r4k___flush_cache_all; + flush_cache_mm = r4k_flush_cache_mm; + flush_cache_page = r4k_flush_cache_page; + flush_icache_page = r4k_flush_icache_page; + flush_cache_range = r4k_flush_cache_range; + + flush_cache_sigtramp = r4k_flush_cache_sigtramp; + flush_icache_all = r4k_flush_icache_all; + flush_data_cache_page = r4k_flush_data_cache_page; + flush_icache_range = r4k_flush_icache_range; + +#ifdef CONFIG_DMA_NONCOHERENT + _dma_cache_wback_inv = r4k_dma_cache_wback_inv; + _dma_cache_wback = r4k_dma_cache_wback_inv; + _dma_cache_inv = r4k_dma_cache_inv; +#endif + + __flush_cache_all(); + coherency_setup(); + + build_clear_page(); + build_copy_page(); +} diff --git a/arch/mips/mm/c-sb1.c b/arch/mips/mm/c-sb1.c new file mode 100644 index 00000000000..ab30afd63b3 --- /dev/null +++ b/arch/mips/mm/c-sb1.c @@ -0,0 +1,558 @@ +/* + * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org) + * Copyright (C) 2000, 2001, 2002, 2003 Broadcom Corporation + * Copyright (C) 2004 Maciej W. Rozycki + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +extern void sb1_dma_init(void); + +/* These are probed at ld_mmu time */ +static unsigned long icache_size; +static unsigned long dcache_size; + +static unsigned short icache_line_size; +static unsigned short dcache_line_size; + +static unsigned int icache_index_mask; +static unsigned int dcache_index_mask; + +static unsigned short icache_assoc; +static unsigned short dcache_assoc; + +static unsigned short icache_sets; +static unsigned short dcache_sets; + +static unsigned int icache_range_cutoff; +static unsigned int dcache_range_cutoff; + +/* + * The dcache is fully coherent to the system, with one + * big caveat: the instruction stream. In other words, + * if we miss in the icache, and have dirty data in the + * L1 dcache, then we'll go out to memory (or the L2) and + * get the not-as-recent data. + * + * So the only time we have to flush the dcache is when + * we're flushing the icache. Since the L2 is fully + * coherent to everything, including I/O, we never have + * to flush it + */ + +#define cache_set_op(op, addr) \ + __asm__ __volatile__( \ + " .set noreorder \n" \ + " .set mips64\n\t \n" \ + " cache %0, (0<<13)(%1) \n" \ + " cache %0, (1<<13)(%1) \n" \ + " cache %0, (2<<13)(%1) \n" \ + " cache %0, (3<<13)(%1) \n" \ + " .set mips0 \n" \ + " .set reorder" \ + : \ + : "i" (op), "r" (addr)) + +#define sync() \ + __asm__ __volatile( \ + " .set mips64\n\t \n" \ + " sync \n" \ + " .set mips0") + +#define mispredict() \ + __asm__ __volatile__( \ + " bnezl $0, 1f \n" /* Force mispredict */ \ + "1: \n"); + +/* + * Writeback and invalidate the entire dcache + */ +static inline void __sb1_writeback_inv_dcache_all(void) +{ + unsigned long addr = 0; + + while (addr < dcache_line_size * dcache_sets) { + cache_set_op(Index_Writeback_Inv_D, addr); + addr += dcache_line_size; + } +} + +/* + * Writeback and invalidate a range of the dcache. The addresses are + * virtual, and since we're using index ops and bit 12 is part of both + * the virtual frame and physical index, we have to clear both sets + * (bit 12 set and cleared). + */ +static inline void __sb1_writeback_inv_dcache_range(unsigned long start, + unsigned long end) +{ + unsigned long index; + + start &= ~(dcache_line_size - 1); + end = (end + dcache_line_size - 1) & ~(dcache_line_size - 1); + + while (start != end) { + index = start & dcache_index_mask; + cache_set_op(Index_Writeback_Inv_D, index); + cache_set_op(Index_Writeback_Inv_D, index ^ (1<<12)); + start += dcache_line_size; + } + sync(); +} + +/* + * Writeback and invalidate a range of the dcache. With physical + * addresseses, we don't have to worry about possible bit 12 aliasing. + * XXXKW is it worth turning on KX and using hit ops with xkphys? + */ +static inline void __sb1_writeback_inv_dcache_phys_range(unsigned long start, + unsigned long end) +{ + start &= ~(dcache_line_size - 1); + end = (end + dcache_line_size - 1) & ~(dcache_line_size - 1); + + while (start != end) { + cache_set_op(Index_Writeback_Inv_D, start & dcache_index_mask); + start += dcache_line_size; + } + sync(); +} + + +/* + * Invalidate the entire icache + */ +static inline void __sb1_flush_icache_all(void) +{ + unsigned long addr = 0; + + while (addr < icache_line_size * icache_sets) { + cache_set_op(Index_Invalidate_I, addr); + addr += icache_line_size; + } +} + +/* + * Flush the icache for a given physical page. Need to writeback the + * dcache first, then invalidate the icache. If the page isn't + * executable, nothing is required. + */ +static void local_sb1_flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) +{ + int cpu = smp_processor_id(); + +#ifndef CONFIG_SMP + if (!(vma->vm_flags & VM_EXEC)) + return; +#endif + + __sb1_writeback_inv_dcache_range(addr, addr + PAGE_SIZE); + + /* + * Bumping the ASID is probably cheaper than the flush ... + */ + if (cpu_context(cpu, vma->vm_mm) != 0) + drop_mmu_context(vma->vm_mm, cpu); +} + +#ifdef CONFIG_SMP +struct flush_cache_page_args { + struct vm_area_struct *vma; + unsigned long addr; + unsigned long pfn; +}; + +static void sb1_flush_cache_page_ipi(void *info) +{ + struct flush_cache_page_args *args = info; + + local_sb1_flush_cache_page(args->vma, args->addr, args->pfn); +} + +/* Dirty dcache could be on another CPU, so do the IPIs */ +static void sb1_flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) +{ + struct flush_cache_page_args args; + + if (!(vma->vm_flags & VM_EXEC)) + return; + + addr &= PAGE_MASK; + args.vma = vma; + args.addr = addr; + args.pfn = pfn; + on_each_cpu(sb1_flush_cache_page_ipi, (void *) &args, 1, 1); +} +#else +void sb1_flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) + __attribute__((alias("local_sb1_flush_cache_page"))); +#endif + +/* + * Invalidate a range of the icache. The addresses are virtual, and + * the cache is virtually indexed and tagged. However, we don't + * necessarily have the right ASID context, so use index ops instead + * of hit ops. + */ +static inline void __sb1_flush_icache_range(unsigned long start, + unsigned long end) +{ + start &= ~(icache_line_size - 1); + end = (end + icache_line_size - 1) & ~(icache_line_size - 1); + + while (start != end) { + cache_set_op(Index_Invalidate_I, start & icache_index_mask); + start += icache_line_size; + } + mispredict(); + sync(); +} + + +/* + * Invalidate all caches on this CPU + */ +static void local_sb1___flush_cache_all(void) +{ + __sb1_writeback_inv_dcache_all(); + __sb1_flush_icache_all(); +} + +#ifdef CONFIG_SMP +void sb1___flush_cache_all_ipi(void *ignored) + __attribute__((alias("local_sb1___flush_cache_all"))); + +static void sb1___flush_cache_all(void) +{ + on_each_cpu(sb1___flush_cache_all_ipi, 0, 1, 1); +} +#else +void sb1___flush_cache_all(void) + __attribute__((alias("local_sb1___flush_cache_all"))); +#endif + +/* + * When flushing a range in the icache, we have to first writeback + * the dcache for the same range, so new ifetches will see any + * data that was dirty in the dcache. + * + * The start/end arguments are Kseg addresses (possibly mapped Kseg). + */ + +static void local_sb1_flush_icache_range(unsigned long start, + unsigned long end) +{ + /* Just wb-inv the whole dcache if the range is big enough */ + if ((end - start) > dcache_range_cutoff) + __sb1_writeback_inv_dcache_all(); + else + __sb1_writeback_inv_dcache_range(start, end); + + /* Just flush the whole icache if the range is big enough */ + if ((end - start) > icache_range_cutoff) + __sb1_flush_icache_all(); + else + __sb1_flush_icache_range(start, end); +} + +#ifdef CONFIG_SMP +struct flush_icache_range_args { + unsigned long start; + unsigned long end; +}; + +static void sb1_flush_icache_range_ipi(void *info) +{ + struct flush_icache_range_args *args = info; + + local_sb1_flush_icache_range(args->start, args->end); +} + +void sb1_flush_icache_range(unsigned long start, unsigned long end) +{ + struct flush_icache_range_args args; + + args.start = start; + args.end = end; + on_each_cpu(sb1_flush_icache_range_ipi, &args, 1, 1); +} +#else +void sb1_flush_icache_range(unsigned long start, unsigned long end) + __attribute__((alias("local_sb1_flush_icache_range"))); +#endif + +/* + * Flush the icache for a given physical page. Need to writeback the + * dcache first, then invalidate the icache. If the page isn't + * executable, nothing is required. + */ +static void local_sb1_flush_icache_page(struct vm_area_struct *vma, + struct page *page) +{ + unsigned long start; + int cpu = smp_processor_id(); + +#ifndef CONFIG_SMP + if (!(vma->vm_flags & VM_EXEC)) + return; +#endif + + /* Need to writeback any dirty data for that page, we have the PA */ + start = (unsigned long)(page-mem_map) << PAGE_SHIFT; + __sb1_writeback_inv_dcache_phys_range(start, start + PAGE_SIZE); + /* + * If there's a context, bump the ASID (cheaper than a flush, + * since we don't know VAs!) + */ + if (cpu_context(cpu, vma->vm_mm) != 0) { + drop_mmu_context(vma->vm_mm, cpu); + } +} + +#ifdef CONFIG_SMP +struct flush_icache_page_args { + struct vm_area_struct *vma; + struct page *page; +}; + +static void sb1_flush_icache_page_ipi(void *info) +{ + struct flush_icache_page_args *args = info; + local_sb1_flush_icache_page(args->vma, args->page); +} + +/* Dirty dcache could be on another CPU, so do the IPIs */ +static void sb1_flush_icache_page(struct vm_area_struct *vma, + struct page *page) +{ + struct flush_icache_page_args args; + + if (!(vma->vm_flags & VM_EXEC)) + return; + args.vma = vma; + args.page = page; + on_each_cpu(sb1_flush_icache_page_ipi, (void *) &args, 1, 1); +} +#else +void sb1_flush_icache_page(struct vm_area_struct *vma, struct page *page) + __attribute__((alias("local_sb1_flush_icache_page"))); +#endif + +/* + * A signal trampoline must fit into a single cacheline. + */ +static void local_sb1_flush_cache_sigtramp(unsigned long addr) +{ + cache_set_op(Index_Writeback_Inv_D, addr & dcache_index_mask); + cache_set_op(Index_Writeback_Inv_D, (addr ^ (1<<12)) & dcache_index_mask); + cache_set_op(Index_Invalidate_I, addr & icache_index_mask); + mispredict(); +} + +#ifdef CONFIG_SMP +static void sb1_flush_cache_sigtramp_ipi(void *info) +{ + unsigned long iaddr = (unsigned long) info; + local_sb1_flush_cache_sigtramp(iaddr); +} + +static void sb1_flush_cache_sigtramp(unsigned long addr) +{ + on_each_cpu(sb1_flush_cache_sigtramp_ipi, (void *) addr, 1, 1); +} +#else +void sb1_flush_cache_sigtramp(unsigned long addr) + __attribute__((alias("local_sb1_flush_cache_sigtramp"))); +#endif + + +/* + * Anything that just flushes dcache state can be ignored, as we're always + * coherent in dcache space. This is just a dummy function that all the + * nop'ed routines point to + */ +static void sb1_nop(void) +{ +} + +/* + * Cache set values (from the mips64 spec) + * 0 - 64 + * 1 - 128 + * 2 - 256 + * 3 - 512 + * 4 - 1024 + * 5 - 2048 + * 6 - 4096 + * 7 - Reserved + */ + +static unsigned int decode_cache_sets(unsigned int config_field) +{ + if (config_field == 7) { + /* JDCXXX - Find a graceful way to abort. */ + return 0; + } + return (1<<(config_field + 6)); +} + +/* + * Cache line size values (from the mips64 spec) + * 0 - No cache present. + * 1 - 4 bytes + * 2 - 8 bytes + * 3 - 16 bytes + * 4 - 32 bytes + * 5 - 64 bytes + * 6 - 128 bytes + * 7 - Reserved + */ + +static unsigned int decode_cache_line_size(unsigned int config_field) +{ + if (config_field == 0) { + return 0; + } else if (config_field == 7) { + /* JDCXXX - Find a graceful way to abort. */ + return 0; + } + return (1<<(config_field + 1)); +} + +/* + * Relevant bits of the config1 register format (from the MIPS32/MIPS64 specs) + * + * 24:22 Icache sets per way + * 21:19 Icache line size + * 18:16 Icache Associativity + * 15:13 Dcache sets per way + * 12:10 Dcache line size + * 9:7 Dcache Associativity + */ + +static char *way_string[] = { + "direct mapped", "2-way", "3-way", "4-way", + "5-way", "6-way", "7-way", "8-way", +}; + +static __init void probe_cache_sizes(void) +{ + u32 config1; + + config1 = read_c0_config1(); + icache_line_size = decode_cache_line_size((config1 >> 19) & 0x7); + dcache_line_size = decode_cache_line_size((config1 >> 10) & 0x7); + icache_sets = decode_cache_sets((config1 >> 22) & 0x7); + dcache_sets = decode_cache_sets((config1 >> 13) & 0x7); + icache_assoc = ((config1 >> 16) & 0x7) + 1; + dcache_assoc = ((config1 >> 7) & 0x7) + 1; + icache_size = icache_line_size * icache_sets * icache_assoc; + dcache_size = dcache_line_size * dcache_sets * dcache_assoc; + /* Need to remove non-index bits for index ops */ + icache_index_mask = (icache_sets - 1) * icache_line_size; + dcache_index_mask = (dcache_sets - 1) * dcache_line_size; + /* + * These are for choosing range (index ops) versus all. + * icache flushes all ways for each set, so drop icache_assoc. + * dcache flushes all ways and each setting of bit 12 for each + * index, so drop dcache_assoc and halve the dcache_sets. + */ + icache_range_cutoff = icache_sets * icache_line_size; + dcache_range_cutoff = (dcache_sets / 2) * icache_line_size; + + printk("Primary instruction cache %ldkB, %s, linesize %d bytes.\n", + icache_size >> 10, way_string[icache_assoc - 1], + icache_line_size); + printk("Primary data cache %ldkB, %s, linesize %d bytes.\n", + dcache_size >> 10, way_string[dcache_assoc - 1], + dcache_line_size); +} + +/* + * This is called from loadmmu.c. We have to set up all the + * memory management function pointers, as well as initialize + * the caches and tlbs + */ +void ld_mmu_sb1(void) +{ + extern char except_vec2_sb1; + extern char handle_vec2_sb1; + + /* Special cache error handler for SB1 */ + memcpy((void *)(CAC_BASE + 0x100), &except_vec2_sb1, 0x80); + memcpy((void *)(UNCAC_BASE + 0x100), &except_vec2_sb1, 0x80); + memcpy((void *)CKSEG1ADDR(&handle_vec2_sb1), &handle_vec2_sb1, 0x80); + + probe_cache_sizes(); + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS + sb1_dma_init(); +#endif + + /* + * None of these are needed for the SB1 - the Dcache is + * physically indexed and tagged, so no virtual aliasing can + * occur + */ + flush_cache_range = (void *) sb1_nop; + flush_cache_mm = (void (*)(struct mm_struct *))sb1_nop; + flush_cache_all = sb1_nop; + + /* These routines are for Icache coherence with the Dcache */ + flush_icache_range = sb1_flush_icache_range; + flush_icache_page = sb1_flush_icache_page; + flush_icache_all = __sb1_flush_icache_all; /* local only */ + + /* This implies an Icache flush too, so can't be nop'ed */ + flush_cache_page = sb1_flush_cache_page; + + flush_cache_sigtramp = sb1_flush_cache_sigtramp; + flush_data_cache_page = (void *) sb1_nop; + + /* Full flush */ + __flush_cache_all = sb1___flush_cache_all; + + change_c0_config(CONF_CM_CMASK, CONF_CM_DEFAULT); + + /* + * This is the only way to force the update of K0 to complete + * before subsequent instruction fetch. + */ + __asm__ __volatile__( + ".set push \n" + " .set noat \n" + " .set noreorder \n" + " .set mips3 \n" + " " STR(PTR_LA) " $1, 1f \n" + " " STR(MTC0) " $1, $14 \n" + " eret \n" + "1: .set pop" + : + : + : "memory"); + + flush_cache_all(); +} diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c new file mode 100644 index 00000000000..ff5afab64b2 --- /dev/null +++ b/arch/mips/mm/c-tx39.c @@ -0,0 +1,493 @@ +/* + * r2300.c: R2000 and R3000 specific mmu/cache code. + * + * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * + * with a lot of changes to make this thing work for R3000s + * Tx39XX R4k style caches added. HK + * Copyright (C) 1998, 1999, 2000 Harald Koerfgen + * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* For R3000 cores with R4000 style caches */ +static unsigned long icache_size, dcache_size; /* Size in bytes */ + +#include + +extern int r3k_have_wired_reg; /* in r3k-tlb.c */ + +/* This sequence is required to ensure icache is disabled immediately */ +#define TX39_STOP_STREAMING() \ +__asm__ __volatile__( \ + ".set push\n\t" \ + ".set noreorder\n\t" \ + "b 1f\n\t" \ + "nop\n\t" \ + "1:\n\t" \ + ".set pop" \ + ) + +/* TX39H-style cache flush routines. */ +static void tx39h_flush_icache_all(void) +{ + unsigned long start = KSEG0; + unsigned long end = (start + icache_size); + unsigned long flags, config; + + /* disable icache (set ICE#) */ + local_irq_save(flags); + config = read_c0_conf(); + write_c0_conf(config & ~TX39_CONF_ICE); + TX39_STOP_STREAMING(); + + /* invalidate icache */ + while (start < end) { + cache16_unroll32(start, Index_Invalidate_I); + start += 0x200; + } + + write_c0_conf(config); + local_irq_restore(flags); +} + +static void tx39h_dma_cache_wback_inv(unsigned long addr, unsigned long size) +{ + unsigned long end, a; + unsigned long dc_lsize = current_cpu_data.dcache.linesz; + + /* Catch bad driver code */ + BUG_ON(size == 0); + + iob(); + a = addr & ~(dc_lsize - 1); + end = (addr + size - 1) & ~(dc_lsize - 1); + while (1) { + invalidate_dcache_line(a); /* Hit_Invalidate_D */ + if (a == end) break; + a += dc_lsize; + } +} + + +/* TX39H2,TX39H3 */ +static inline void tx39_blast_dcache_page(unsigned long addr) +{ + if (current_cpu_data.cputype != CPU_TX3912) + blast_dcache16_page(addr); +} + +static inline void tx39_blast_dcache_page_indexed(unsigned long addr) +{ + blast_dcache16_page_indexed(addr); +} + +static inline void tx39_blast_dcache(void) +{ + blast_dcache16(); +} + +static inline void tx39_blast_icache_page(unsigned long addr) +{ + unsigned long flags, config; + /* disable icache (set ICE#) */ + local_irq_save(flags); + config = read_c0_conf(); + write_c0_conf(config & ~TX39_CONF_ICE); + TX39_STOP_STREAMING(); + blast_icache16_page(addr); + write_c0_conf(config); + local_irq_restore(flags); +} + +static inline void tx39_blast_icache_page_indexed(unsigned long addr) +{ + unsigned long flags, config; + /* disable icache (set ICE#) */ + local_irq_save(flags); + config = read_c0_conf(); + write_c0_conf(config & ~TX39_CONF_ICE); + TX39_STOP_STREAMING(); + blast_icache16_page_indexed(addr); + write_c0_conf(config); + local_irq_restore(flags); +} + +static inline void tx39_blast_icache(void) +{ + unsigned long flags, config; + /* disable icache (set ICE#) */ + local_irq_save(flags); + config = read_c0_conf(); + write_c0_conf(config & ~TX39_CONF_ICE); + TX39_STOP_STREAMING(); + blast_icache16(); + write_c0_conf(config); + local_irq_restore(flags); +} + +static inline void tx39_flush_cache_all(void) +{ + if (!cpu_has_dc_aliases) + return; + + tx39_blast_dcache(); + tx39_blast_icache(); +} + +static inline void tx39___flush_cache_all(void) +{ + tx39_blast_dcache(); + tx39_blast_icache(); +} + +static void tx39_flush_cache_mm(struct mm_struct *mm) +{ + if (!cpu_has_dc_aliases) + return; + + if (cpu_context(smp_processor_id(), mm) != 0) { + tx39_flush_cache_all(); + } +} + +static void tx39_flush_cache_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + + if (!cpu_has_dc_aliases) + return; + + if (cpu_context(smp_processor_id(), mm) != 0) { + tx39_blast_dcache(); + tx39_blast_icache(); + } +} + +static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn) +{ + int exec = vma->vm_flags & VM_EXEC; + struct mm_struct *mm = vma->vm_mm; + pgd_t *pgdp; + pmd_t *pmdp; + pte_t *ptep; + + /* + * If ownes no valid ASID yet, cannot possibly have gotten + * this page into the cache. + */ + if (cpu_context(smp_processor_id(), mm) == 0) + return; + + page &= PAGE_MASK; + pgdp = pgd_offset(mm, page); + pmdp = pmd_offset(pgdp, page); + ptep = pte_offset(pmdp, page); + + /* + * If the page isn't marked valid, the page cannot possibly be + * in the cache. + */ + if (!(pte_val(*ptep) & _PAGE_PRESENT)) + return; + + /* + * Doing flushes for another ASID than the current one is + * too difficult since stupid R4k caches do a TLB translation + * for every cache flush operation. So we do indexed flushes + * in that case, which doesn't overly flush the cache too much. + */ + if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID)) { + if (cpu_has_dc_aliases || exec) + tx39_blast_dcache_page(page); + if (exec) + tx39_blast_icache_page(page); + + return; + } + + /* + * Do indexed flush, too much work to get the (possible) TLB refills + * to work correctly. + */ + page = (KSEG0 + (page & (dcache_size - 1))); + if (cpu_has_dc_aliases || exec) + tx39_blast_dcache_page_indexed(page); + if (exec) + tx39_blast_icache_page_indexed(page); +} + +static void tx39_flush_data_cache_page(unsigned long addr) +{ + tx39_blast_dcache_page(addr); +} + +static void tx39_flush_icache_range(unsigned long start, unsigned long end) +{ + unsigned long dc_lsize = current_cpu_data.dcache.linesz; + unsigned long addr, aend; + + if (end - start > dcache_size) + tx39_blast_dcache(); + else { + addr = start & ~(dc_lsize - 1); + aend = (end - 1) & ~(dc_lsize - 1); + + while (1) { + /* Hit_Writeback_Inv_D */ + protected_writeback_dcache_line(addr); + if (addr == aend) + break; + addr += dc_lsize; + } + } + + if (end - start > icache_size) + tx39_blast_icache(); + else { + unsigned long flags, config; + addr = start & ~(dc_lsize - 1); + aend = (end - 1) & ~(dc_lsize - 1); + /* disable icache (set ICE#) */ + local_irq_save(flags); + config = read_c0_conf(); + write_c0_conf(config & ~TX39_CONF_ICE); + TX39_STOP_STREAMING(); + while (1) { + /* Hit_Invalidate_I */ + protected_flush_icache_line(addr); + if (addr == aend) + break; + addr += dc_lsize; + } + write_c0_conf(config); + local_irq_restore(flags); + } +} + +/* + * Ok, this seriously sucks. We use them to flush a user page but don't + * know the virtual address, so we have to blast away the whole icache + * which is significantly more expensive than the real thing. Otoh we at + * least know the kernel address of the page so we can flush it + * selectivly. + */ +static void tx39_flush_icache_page(struct vm_area_struct *vma, struct page *page) +{ + unsigned long addr; + /* + * If there's no context yet, or the page isn't executable, no icache + * flush is needed. + */ + if (!(vma->vm_flags & VM_EXEC)) + return; + + addr = (unsigned long) page_address(page); + tx39_blast_dcache_page(addr); + + /* + * We're not sure of the virtual address(es) involved here, so + * we have to flush the entire I-cache. + */ + tx39_blast_icache(); +} + +static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size) +{ + unsigned long end, a; + + if (((size | addr) & (PAGE_SIZE - 1)) == 0) { + end = addr + size; + do { + tx39_blast_dcache_page(addr); + addr += PAGE_SIZE; + } while(addr != end); + } else if (size > dcache_size) { + tx39_blast_dcache(); + } else { + unsigned long dc_lsize = current_cpu_data.dcache.linesz; + a = addr & ~(dc_lsize - 1); + end = (addr + size - 1) & ~(dc_lsize - 1); + while (1) { + flush_dcache_line(a); /* Hit_Writeback_Inv_D */ + if (a == end) break; + a += dc_lsize; + } + } +} + +static void tx39_dma_cache_inv(unsigned long addr, unsigned long size) +{ + unsigned long end, a; + + if (((size | addr) & (PAGE_SIZE - 1)) == 0) { + end = addr + size; + do { + tx39_blast_dcache_page(addr); + addr += PAGE_SIZE; + } while(addr != end); + } else if (size > dcache_size) { + tx39_blast_dcache(); + } else { + unsigned long dc_lsize = current_cpu_data.dcache.linesz; + a = addr & ~(dc_lsize - 1); + end = (addr + size - 1) & ~(dc_lsize - 1); + while (1) { + invalidate_dcache_line(a); /* Hit_Invalidate_D */ + if (a == end) break; + a += dc_lsize; + } + } +} + +static void tx39_flush_cache_sigtramp(unsigned long addr) +{ + unsigned long ic_lsize = current_cpu_data.icache.linesz; + unsigned long dc_lsize = current_cpu_data.dcache.linesz; + unsigned long config; + unsigned long flags; + + protected_writeback_dcache_line(addr & ~(dc_lsize - 1)); + + /* disable icache (set ICE#) */ + local_irq_save(flags); + config = read_c0_conf(); + write_c0_conf(config & ~TX39_CONF_ICE); + TX39_STOP_STREAMING(); + protected_flush_icache_line(addr & ~(ic_lsize - 1)); + write_c0_conf(config); + local_irq_restore(flags); +} + +static __init void tx39_probe_cache(void) +{ + unsigned long config; + + config = read_c0_conf(); + + icache_size = 1 << (10 + ((config & TX39_CONF_ICS_MASK) >> + TX39_CONF_ICS_SHIFT)); + dcache_size = 1 << (10 + ((config & TX39_CONF_DCS_MASK) >> + TX39_CONF_DCS_SHIFT)); + + current_cpu_data.icache.linesz = 16; + switch (current_cpu_data.cputype) { + case CPU_TX3912: + current_cpu_data.icache.ways = 1; + current_cpu_data.dcache.ways = 1; + current_cpu_data.dcache.linesz = 4; + break; + + case CPU_TX3927: + current_cpu_data.icache.ways = 2; + current_cpu_data.dcache.ways = 2; + current_cpu_data.dcache.linesz = 16; + break; + + case CPU_TX3922: + default: + current_cpu_data.icache.ways = 1; + current_cpu_data.dcache.ways = 1; + current_cpu_data.dcache.linesz = 16; + break; + } +} + +void __init ld_mmu_tx39(void) +{ + extern void build_clear_page(void); + extern void build_copy_page(void); + unsigned long config; + + config = read_c0_conf(); + config &= ~TX39_CONF_WBON; + write_c0_conf(config); + + tx39_probe_cache(); + + switch (current_cpu_data.cputype) { + case CPU_TX3912: + /* TX39/H core (writethru direct-map cache) */ + flush_cache_all = tx39h_flush_icache_all; + __flush_cache_all = tx39h_flush_icache_all; + flush_cache_mm = (void *) tx39h_flush_icache_all; + flush_cache_range = (void *) tx39h_flush_icache_all; + flush_cache_page = (void *) tx39h_flush_icache_all; + flush_icache_page = (void *) tx39h_flush_icache_all; + flush_icache_range = (void *) tx39h_flush_icache_all; + + flush_cache_sigtramp = (void *) tx39h_flush_icache_all; + flush_data_cache_page = (void *) tx39h_flush_icache_all; + + _dma_cache_wback_inv = tx39h_dma_cache_wback_inv; + + shm_align_mask = PAGE_SIZE - 1; + + break; + + case CPU_TX3922: + case CPU_TX3927: + default: + /* TX39/H2,H3 core (writeback 2way-set-associative cache) */ + r3k_have_wired_reg = 1; + write_c0_wired(0); /* set 8 on reset... */ + /* board-dependent init code may set WBON */ + + flush_cache_all = tx39_flush_cache_all; + __flush_cache_all = tx39___flush_cache_all; + flush_cache_mm = tx39_flush_cache_mm; + flush_cache_range = tx39_flush_cache_range; + flush_cache_page = tx39_flush_cache_page; + flush_icache_page = tx39_flush_icache_page; + flush_icache_range = tx39_flush_icache_range; + + flush_cache_sigtramp = tx39_flush_cache_sigtramp; + flush_data_cache_page = tx39_flush_data_cache_page; + + _dma_cache_wback_inv = tx39_dma_cache_wback_inv; + _dma_cache_wback = tx39_dma_cache_wback_inv; + _dma_cache_inv = tx39_dma_cache_inv; + + shm_align_mask = max_t(unsigned long, + (dcache_size / current_cpu_data.dcache.ways) - 1, + PAGE_SIZE - 1); + + break; + } + + current_cpu_data.icache.waysize = icache_size / current_cpu_data.icache.ways; + current_cpu_data.dcache.waysize = dcache_size / current_cpu_data.dcache.ways; + + current_cpu_data.icache.sets = + current_cpu_data.icache.waysize / current_cpu_data.icache.linesz; + current_cpu_data.dcache.sets = + current_cpu_data.dcache.waysize / current_cpu_data.dcache.linesz; + + if (current_cpu_data.dcache.waysize > PAGE_SIZE) + current_cpu_data.dcache.flags |= MIPS_CACHE_ALIASES; + + current_cpu_data.icache.waybit = 0; + current_cpu_data.dcache.waybit = 0; + + printk("Primary instruction cache %ldkB, linesize %d bytes\n", + icache_size >> 10, current_cpu_data.icache.linesz); + printk("Primary data cache %ldkB, linesize %d bytes\n", + dcache_size >> 10, current_cpu_data.dcache.linesz); + + build_clear_page(); + build_copy_page(); +} diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c new file mode 100644 index 00000000000..1d95cdb77be --- /dev/null +++ b/arch/mips/mm/cache.c @@ -0,0 +1,157 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1994 - 2003 by Ralf Baechle + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* Cache operations. */ +void (*flush_cache_all)(void); +void (*__flush_cache_all)(void); +void (*flush_cache_mm)(struct mm_struct *mm); +void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn); +void (*flush_icache_range)(unsigned long start, unsigned long end); +void (*flush_icache_page)(struct vm_area_struct *vma, struct page *page); + +/* MIPS specific cache operations */ +void (*flush_cache_sigtramp)(unsigned long addr); +void (*flush_data_cache_page)(unsigned long addr); +void (*flush_icache_all)(void); + +#ifdef CONFIG_DMA_NONCOHERENT + +/* DMA cache operations. */ +void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size); +void (*_dma_cache_wback)(unsigned long start, unsigned long size); +void (*_dma_cache_inv)(unsigned long start, unsigned long size); + +EXPORT_SYMBOL(_dma_cache_wback_inv); +EXPORT_SYMBOL(_dma_cache_wback); +EXPORT_SYMBOL(_dma_cache_inv); + +#endif /* CONFIG_DMA_NONCOHERENT */ + +/* + * We could optimize the case where the cache argument is not BCACHE but + * that seems very atypical use ... + */ +asmlinkage int sys_cacheflush(unsigned long addr, unsigned long int bytes, + unsigned int cache) +{ + if (!access_ok(VERIFY_WRITE, (void *) addr, bytes)) + return -EFAULT; + + flush_icache_range(addr, addr + bytes); + + return 0; +} + +void __flush_dcache_page(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + unsigned long addr; + + if (mapping && !mapping_mapped(mapping)) { + SetPageDcacheDirty(page); + return; + } + + /* + * We could delay the flush for the !page_mapping case too. But that + * case is for exec env/arg pages and those are %99 certainly going to + * get faulted into the tlb (and thus flushed) anyways. + */ + addr = (unsigned long) page_address(page); + flush_data_cache_page(addr); +} + +EXPORT_SYMBOL(__flush_dcache_page); + +void __update_cache(struct vm_area_struct *vma, unsigned long address, + pte_t pte) +{ + struct page *page; + unsigned long pfn, addr; + + pfn = pte_pfn(pte); + if (pfn_valid(pfn) && (page = pfn_to_page(pfn), page_mapping(page)) && + Page_dcache_dirty(page)) { + if (pages_do_alias((unsigned long)page_address(page), + address & PAGE_MASK)) { + addr = (unsigned long) page_address(page); + flush_data_cache_page(addr); + } + + ClearPageDcacheDirty(page); + } +} + +extern void ld_mmu_r23000(void); +extern void ld_mmu_r4xx0(void); +extern void ld_mmu_tx39(void); +extern void ld_mmu_r6000(void); +extern void ld_mmu_tfp(void); +extern void ld_mmu_andes(void); +extern void ld_mmu_sb1(void); + +void __init cpu_cache_init(void) +{ + if (cpu_has_4ktlb) { +#if defined(CONFIG_CPU_R4X00) || defined(CONFIG_CPU_VR41XX) || \ + defined(CONFIG_CPU_R4300) || defined(CONFIG_CPU_R5000) || \ + defined(CONFIG_CPU_NEVADA) || defined(CONFIG_CPU_R5432) || \ + defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_MIPS32) || \ + defined(CONFIG_CPU_MIPS64) || defined(CONFIG_CPU_TX49XX) || \ + defined(CONFIG_CPU_RM7000) || defined(CONFIG_CPU_RM9000) + ld_mmu_r4xx0(); +#endif + } else switch (current_cpu_data.cputype) { +#ifdef CONFIG_CPU_R3000 + case CPU_R2000: + case CPU_R3000: + case CPU_R3000A: + case CPU_R3081E: + ld_mmu_r23000(); + break; +#endif +#ifdef CONFIG_CPU_TX39XX + case CPU_TX3912: + case CPU_TX3922: + case CPU_TX3927: + ld_mmu_tx39(); + break; +#endif +#ifdef CONFIG_CPU_R10000 + case CPU_R10000: + case CPU_R12000: + ld_mmu_r4xx0(); + break; +#endif +#ifdef CONFIG_CPU_SB1 + case CPU_SB1: + ld_mmu_sb1(); + break; +#endif + + case CPU_R8000: + panic("R8000 is unsupported"); + break; + + default: + panic("Yeee, unsupported cache architecture."); + } +} diff --git a/arch/mips/mm/cerr-sb1.c b/arch/mips/mm/cerr-sb1.c new file mode 100644 index 00000000000..13d96d62764 --- /dev/null +++ b/arch/mips/mm/cerr-sb1.c @@ -0,0 +1,543 @@ +/* + * Copyright (C) 2001,2002,2003 Broadcom Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include +#include +#include +#include + +#ifndef CONFIG_SIBYTE_BUS_WATCHER +#include +#include +#include +#endif + +/* SB1 definitions */ + +/* XXX should come from config1 XXX */ +#define SB1_CACHE_INDEX_MASK 0x1fe0 + +#define CP0_ERRCTL_RECOVERABLE (1 << 31) +#define CP0_ERRCTL_DCACHE (1 << 30) +#define CP0_ERRCTL_ICACHE (1 << 29) +#define CP0_ERRCTL_MULTIBUS (1 << 23) +#define CP0_ERRCTL_MC_TLB (1 << 15) +#define CP0_ERRCTL_MC_TIMEOUT (1 << 14) + +#define CP0_CERRI_TAG_PARITY (1 << 29) +#define CP0_CERRI_DATA_PARITY (1 << 28) +#define CP0_CERRI_EXTERNAL (1 << 26) + +#define CP0_CERRI_IDX_VALID(c) (!((c) & CP0_CERRI_EXTERNAL)) +#define CP0_CERRI_DATA (CP0_CERRI_DATA_PARITY) + +#define CP0_CERRD_MULTIPLE (1 << 31) +#define CP0_CERRD_TAG_STATE (1 << 30) +#define CP0_CERRD_TAG_ADDRESS (1 << 29) +#define CP0_CERRD_DATA_SBE (1 << 28) +#define CP0_CERRD_DATA_DBE (1 << 27) +#define CP0_CERRD_EXTERNAL (1 << 26) +#define CP0_CERRD_LOAD (1 << 25) +#define CP0_CERRD_STORE (1 << 24) +#define CP0_CERRD_FILLWB (1 << 23) +#define CP0_CERRD_COHERENCY (1 << 22) +#define CP0_CERRD_DUPTAG (1 << 21) + +#define CP0_CERRD_DPA_VALID(c) (!((c) & CP0_CERRD_EXTERNAL)) +#define CP0_CERRD_IDX_VALID(c) \ + (((c) & (CP0_CERRD_LOAD | CP0_CERRD_STORE)) ? (!((c) & CP0_CERRD_EXTERNAL)) : 0) +#define CP0_CERRD_CAUSES \ + (CP0_CERRD_LOAD | CP0_CERRD_STORE | CP0_CERRD_FILLWB | CP0_CERRD_COHERENCY | CP0_CERRD_DUPTAG) +#define CP0_CERRD_TYPES \ + (CP0_CERRD_TAG_STATE | CP0_CERRD_TAG_ADDRESS | CP0_CERRD_DATA_SBE | CP0_CERRD_DATA_DBE | CP0_CERRD_EXTERNAL) +#define CP0_CERRD_DATA (CP0_CERRD_DATA_SBE | CP0_CERRD_DATA_DBE) + +static uint32_t extract_ic(unsigned short addr, int data); +static uint32_t extract_dc(unsigned short addr, int data); + +static inline void breakout_errctl(unsigned int val) +{ + if (val & CP0_ERRCTL_RECOVERABLE) + prom_printf(" recoverable"); + if (val & CP0_ERRCTL_DCACHE) + prom_printf(" dcache"); + if (val & CP0_ERRCTL_ICACHE) + prom_printf(" icache"); + if (val & CP0_ERRCTL_MULTIBUS) + prom_printf(" multiple-buserr"); + prom_printf("\n"); +} + +static inline void breakout_cerri(unsigned int val) +{ + if (val & CP0_CERRI_TAG_PARITY) + prom_printf(" tag-parity"); + if (val & CP0_CERRI_DATA_PARITY) + prom_printf(" data-parity"); + if (val & CP0_CERRI_EXTERNAL) + prom_printf(" external"); + prom_printf("\n"); +} + +static inline void breakout_cerrd(unsigned int val) +{ + switch (val & CP0_CERRD_CAUSES) { + case CP0_CERRD_LOAD: + prom_printf(" load,"); + break; + case CP0_CERRD_STORE: + prom_printf(" store,"); + break; + case CP0_CERRD_FILLWB: + prom_printf(" fill/wb,"); + break; + case CP0_CERRD_COHERENCY: + prom_printf(" coherency,"); + break; + case CP0_CERRD_DUPTAG: + prom_printf(" duptags,"); + break; + default: + prom_printf(" NO CAUSE,"); + break; + } + if (!(val & CP0_CERRD_TYPES)) + prom_printf(" NO TYPE"); + else { + if (val & CP0_CERRD_MULTIPLE) + prom_printf(" multi-err"); + if (val & CP0_CERRD_TAG_STATE) + prom_printf(" tag-state"); + if (val & CP0_CERRD_TAG_ADDRESS) + prom_printf(" tag-address"); + if (val & CP0_CERRD_DATA_SBE) + prom_printf(" data-SBE"); + if (val & CP0_CERRD_DATA_DBE) + prom_printf(" data-DBE"); + if (val & CP0_CERRD_EXTERNAL) + prom_printf(" external"); + } + prom_printf("\n"); +} + +#ifndef CONFIG_SIBYTE_BUS_WATCHER + +static void check_bus_watcher(void) +{ + uint32_t status, l2_err, memio_err; + + /* Destructive read, clears register and interrupt */ + status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS)); + /* Bit 31 is always on, but there's no #define for that */ + if (status & ~(1UL << 31)) { + l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS)); + memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS)); + prom_printf("Bus watcher error counters: %08x %08x\n", l2_err, memio_err); + prom_printf("\nLast recorded signature:\n"); + prom_printf("Request %02x from %d, answered by %d with Dcode %d\n", + (unsigned int)(G_SCD_BERR_TID(status) & 0x3f), + (int)(G_SCD_BERR_TID(status) >> 6), + (int)G_SCD_BERR_RID(status), + (int)G_SCD_BERR_DCODE(status)); + } else { + prom_printf("Bus watcher indicates no error\n"); + } +} +#else +extern void check_bus_watcher(void); +#endif + +asmlinkage void sb1_cache_error(void) +{ + uint64_t cerr_dpa; + uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res; + + prom_printf("Cache error exception on CPU %x:\n", + (read_c0_prid() >> 25) & 0x7); + + __asm__ __volatile__ ( + " .set push\n\t" + " .set mips64\n\t" + " .set noat\n\t" + " mfc0 %0, $26\n\t" + " mfc0 %1, $27\n\t" + " mfc0 %2, $27, 1\n\t" + " dmfc0 $1, $27, 3\n\t" + " dsrl32 %3, $1, 0 \n\t" + " sll %4, $1, 0 \n\t" + " mfc0 %5, $30\n\t" + " .set pop" + : "=r" (errctl), "=r" (cerr_i), "=r" (cerr_d), + "=r" (dpahi), "=r" (dpalo), "=r" (eepc)); + + cerr_dpa = (((uint64_t)dpahi) << 32) | dpalo; + prom_printf(" c0_errorepc == %08x\n", eepc); + prom_printf(" c0_errctl == %08x", errctl); + breakout_errctl(errctl); + if (errctl & CP0_ERRCTL_ICACHE) { + prom_printf(" c0_cerr_i == %08x", cerr_i); + breakout_cerri(cerr_i); + if (CP0_CERRI_IDX_VALID(cerr_i)) { + /* Check index of EPC, allowing for delay slot */ + if (((eepc & SB1_CACHE_INDEX_MASK) != (cerr_i & SB1_CACHE_INDEX_MASK)) && + ((eepc & SB1_CACHE_INDEX_MASK) != ((cerr_i & SB1_CACHE_INDEX_MASK) - 4))) + prom_printf(" cerr_i idx doesn't match eepc\n"); + else { + res = extract_ic(cerr_i & SB1_CACHE_INDEX_MASK, + (cerr_i & CP0_CERRI_DATA) != 0); + if (!(res & cerr_i)) + prom_printf("...didn't see indicated icache problem\n"); + } + } + } + if (errctl & CP0_ERRCTL_DCACHE) { + prom_printf(" c0_cerr_d == %08x", cerr_d); + breakout_cerrd(cerr_d); + if (CP0_CERRD_DPA_VALID(cerr_d)) { + prom_printf(" c0_cerr_dpa == %010llx\n", cerr_dpa); + if (!CP0_CERRD_IDX_VALID(cerr_d)) { + res = extract_dc(cerr_dpa & SB1_CACHE_INDEX_MASK, + (cerr_d & CP0_CERRD_DATA) != 0); + if (!(res & cerr_d)) + prom_printf("...didn't see indicated dcache problem\n"); + } else { + if ((cerr_dpa & SB1_CACHE_INDEX_MASK) != (cerr_d & SB1_CACHE_INDEX_MASK)) + prom_printf(" cerr_d idx doesn't match cerr_dpa\n"); + else { + res = extract_dc(cerr_d & SB1_CACHE_INDEX_MASK, + (cerr_d & CP0_CERRD_DATA) != 0); + if (!(res & cerr_d)) + prom_printf("...didn't see indicated problem\n"); + } + } + } + } + + check_bus_watcher(); + + while (1); + /* + * This tends to make things get really ugly; let's just stall instead. + * panic("Can't handle the cache error!"); + */ +} + + +/* Parity lookup table. */ +static const uint8_t parity[256] = { + 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, + 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, + 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, + 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, + 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, + 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, + 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, + 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0 +}; + +/* Masks to select bits for Hamming parity, mask_72_64[i] for bit[i] */ +static const uint64_t mask_72_64[8] = { + 0x0738C808099264FFULL, + 0x38C808099264FF07ULL, + 0xC808099264FF0738ULL, + 0x08099264FF0738C8ULL, + 0x099264FF0738C808ULL, + 0x9264FF0738C80809ULL, + 0x64FF0738C8080992ULL, + 0xFF0738C808099264ULL +}; + +/* Calculate the parity on a range of bits */ +static char range_parity(uint64_t dword, int max, int min) +{ + char parity = 0; + int i; + dword >>= min; + for (i=max-min; i>=0; i--) { + if (dword & 0x1) + parity = !parity; + dword >>= 1; + } + return parity; +} + +/* Calculate the 4-bit even byte-parity for an instruction */ +static unsigned char inst_parity(uint32_t word) +{ + int i, j; + char parity = 0; + for (j=0; j<4; j++) { + char byte_parity = 0; + for (i=0; i<8; i++) { + if (word & 0x80000000) + byte_parity = !byte_parity; + word <<= 1; + } + parity <<= 1; + parity |= byte_parity; + } + return parity; +} + +static uint32_t extract_ic(unsigned short addr, int data) +{ + unsigned short way; + int valid; + uint64_t taglo, va, tlo_tmp; + uint32_t taghi, taglolo, taglohi; + uint8_t lru; + int res = 0; + + prom_printf("Icache index 0x%04x ", addr); + for (way = 0; way < 4; way++) { + /* Index-load-tag-I */ + __asm__ __volatile__ ( + " .set push \n\t" + " .set noreorder \n\t" + " .set mips64 \n\t" + " .set noat \n\t" + " cache 4, 0(%3) \n\t" + " mfc0 %0, $29 \n\t" + " dmfc0 $1, $28 \n\t" + " dsrl32 %1, $1, 0 \n\t" + " sll %2, $1, 0 \n\t" + " .set pop" + : "=r" (taghi), "=r" (taglohi), "=r" (taglolo) + : "r" ((way << 13) | addr)); + + taglo = ((unsigned long long)taglohi << 32) | taglolo; + if (way == 0) { + lru = (taghi >> 14) & 0xff; + prom_printf("[Bank %d Set 0x%02x] LRU > %d %d %d %d > MRU\n", + ((addr >> 5) & 0x3), /* bank */ + ((addr >> 7) & 0x3f), /* index */ + (lru & 0x3), + ((lru >> 2) & 0x3), + ((lru >> 4) & 0x3), + ((lru >> 6) & 0x3)); + } + va = (taglo & 0xC0000FFFFFFFE000ULL) | addr; + if ((taglo & (1 << 31)) && (((taglo >> 62) & 0x3) == 3)) + va |= 0x3FFFF00000000000ULL; + valid = ((taghi >> 29) & 1); + if (valid) { + tlo_tmp = taglo & 0xfff3ff; + if (((taglo >> 10) & 1) ^ range_parity(tlo_tmp, 23, 0)) { + prom_printf(" ** bad parity in VTag0/G/ASID\n"); + res |= CP0_CERRI_TAG_PARITY; + } + if (((taglo >> 11) & 1) ^ range_parity(taglo, 63, 24)) { + prom_printf(" ** bad parity in R/VTag1\n"); + res |= CP0_CERRI_TAG_PARITY; + } + } + if (valid ^ ((taghi >> 27) & 1)) { + prom_printf(" ** bad parity for valid bit\n"); + res |= CP0_CERRI_TAG_PARITY; + } + prom_printf(" %d [VA %016llx] [Vld? %d] raw tags: %08X-%016llX\n", + way, va, valid, taghi, taglo); + + if (data) { + uint32_t datahi, insta, instb; + uint8_t predecode; + int offset; + + /* (hit all banks and ways) */ + for (offset = 0; offset < 4; offset++) { + /* Index-load-data-I */ + __asm__ __volatile__ ( + " .set push\n\t" + " .set noreorder\n\t" + " .set mips64\n\t" + " .set noat\n\t" + " cache 6, 0(%3) \n\t" + " mfc0 %0, $29, 1\n\t" + " dmfc0 $1, $28, 1\n\t" + " dsrl32 %1, $1, 0 \n\t" + " sll %2, $1, 0 \n\t" + " .set pop \n" + : "=r" (datahi), "=r" (insta), "=r" (instb) + : "r" ((way << 13) | addr | (offset << 3))); + predecode = (datahi >> 8) & 0xff; + if (((datahi >> 16) & 1) != (uint32_t)range_parity(predecode, 7, 0)) { + prom_printf(" ** bad parity in predecode\n"); + res |= CP0_CERRI_DATA_PARITY; + } + /* XXXKW should/could check predecode bits themselves */ + if (((datahi >> 4) & 0xf) ^ inst_parity(insta)) { + prom_printf(" ** bad parity in instruction a\n"); + res |= CP0_CERRI_DATA_PARITY; + } + if ((datahi & 0xf) ^ inst_parity(instb)) { + prom_printf(" ** bad parity in instruction b\n"); + res |= CP0_CERRI_DATA_PARITY; + } + prom_printf(" %05X-%08X%08X", datahi, insta, instb); + } + prom_printf("\n"); + } + } + return res; +} + +/* Compute the ECC for a data doubleword */ +static uint8_t dc_ecc(uint64_t dword) +{ + uint64_t t; + uint32_t w; + uint8_t p; + int i; + + p = 0; + for (i = 7; i >= 0; i--) + { + p <<= 1; + t = dword & mask_72_64[i]; + w = (uint32_t)(t >> 32); + p ^= (parity[w>>24] ^ parity[(w>>16) & 0xFF] + ^ parity[(w>>8) & 0xFF] ^ parity[w & 0xFF]); + w = (uint32_t)(t & 0xFFFFFFFF); + p ^= (parity[w>>24] ^ parity[(w>>16) & 0xFF] + ^ parity[(w>>8) & 0xFF] ^ parity[w & 0xFF]); + } + return p; +} + +struct dc_state { + unsigned char val; + char *name; +}; + +static struct dc_state dc_states[] = { + { 0x00, "INVALID" }, + { 0x0f, "COH-SHD" }, + { 0x13, "NCO-E-C" }, + { 0x19, "NCO-E-D" }, + { 0x16, "COH-E-C" }, + { 0x1c, "COH-E-D" }, + { 0xff, "*ERROR*" } +}; + +#define DC_TAG_VALID(state) \ + (((state) == 0xf) || ((state) == 0x13) || ((state) == 0x19) || ((state == 0x16)) || ((state) == 0x1c)) + +static char *dc_state_str(unsigned char state) +{ + struct dc_state *dsc = dc_states; + while (dsc->val != 0xff) { + if (dsc->val == state) + break; + dsc++; + } + return dsc->name; +} + +static uint32_t extract_dc(unsigned short addr, int data) +{ + int valid, way; + unsigned char state; + uint64_t taglo, pa; + uint32_t taghi, taglolo, taglohi; + uint8_t ecc, lru; + int res = 0; + + prom_printf("Dcache index 0x%04x ", addr); + for (way = 0; way < 4; way++) { + __asm__ __volatile__ ( + " .set push\n\t" + " .set noreorder\n\t" + " .set mips64\n\t" + " .set noat\n\t" + " cache 5, 0(%3)\n\t" /* Index-load-tag-D */ + " mfc0 %0, $29, 2\n\t" + " dmfc0 $1, $28, 2\n\t" + " dsrl32 %1, $1, 0\n\t" + " sll %2, $1, 0\n\t" + " .set pop" + : "=r" (taghi), "=r" (taglohi), "=r" (taglolo) + : "r" ((way << 13) | addr)); + + taglo = ((unsigned long long)taglohi << 32) | taglolo; + pa = (taglo & 0xFFFFFFE000ULL) | addr; + if (way == 0) { + lru = (taghi >> 14) & 0xff; + prom_printf("[Bank %d Set 0x%02x] LRU > %d %d %d %d > MRU\n", + ((addr >> 11) & 0x2) | ((addr >> 5) & 1), /* bank */ + ((addr >> 6) & 0x3f), /* index */ + (lru & 0x3), + ((lru >> 2) & 0x3), + ((lru >> 4) & 0x3), + ((lru >> 6) & 0x3)); + } + state = (taghi >> 25) & 0x1f; + valid = DC_TAG_VALID(state); + prom_printf(" %d [PA %010llx] [state %s (%02x)] raw tags: %08X-%016llX\n", + way, pa, dc_state_str(state), state, taghi, taglo); + if (valid) { + if (((taglo >> 11) & 1) ^ range_parity(taglo, 39, 26)) { + prom_printf(" ** bad parity in PTag1\n"); + res |= CP0_CERRD_TAG_ADDRESS; + } + if (((taglo >> 10) & 1) ^ range_parity(taglo, 25, 13)) { + prom_printf(" ** bad parity in PTag0\n"); + res |= CP0_CERRD_TAG_ADDRESS; + } + } else { + res |= CP0_CERRD_TAG_STATE; + } + + if (data) { + uint64_t datalo; + uint32_t datalohi, datalolo, datahi; + int offset; + + for (offset = 0; offset < 4; offset++) { + /* Index-load-data-D */ + __asm__ __volatile__ ( + " .set push\n\t" + " .set noreorder\n\t" + " .set mips64\n\t" + " .set noat\n\t" + " cache 7, 0(%3)\n\t" /* Index-load-data-D */ + " mfc0 %0, $29, 3\n\t" + " dmfc0 $1, $28, 3\n\t" + " dsrl32 %1, $1, 0 \n\t" + " sll %2, $1, 0 \n\t" + " .set pop" + : "=r" (datahi), "=r" (datalohi), "=r" (datalolo) + : "r" ((way << 13) | addr | (offset << 3))); + datalo = ((unsigned long long)datalohi << 32) | datalolo; + ecc = dc_ecc(datalo); + if (ecc != datahi) { + int bits = 0; + prom_printf(" ** bad ECC (%02x %02x) ->", + datahi, ecc); + ecc ^= datahi; + while (ecc) { + if (ecc & 1) bits++; + ecc >>= 1; + } + res |= (bits == 1) ? CP0_CERRD_DATA_SBE : CP0_CERRD_DATA_DBE; + } + prom_printf(" %02X-%016llX", datahi, datalo); + } + prom_printf("\n"); + } + } + return res; +} diff --git a/arch/mips/mm/cex-gen.S b/arch/mips/mm/cex-gen.S new file mode 100644 index 00000000000..e743622fd24 --- /dev/null +++ b/arch/mips/mm/cex-gen.S @@ -0,0 +1,42 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995 - 1999 Ralf Baechle + * Copyright (C) 1999 Silicon Graphics, Inc. + * + * Cache error handler + */ +#include +#include +#include +#include + +/* + * Game over. Go to the button. Press gently. Swear where allowed by + * legislation. + */ + LEAF(except_vec2_generic) + .set noreorder + .set noat + .set mips0 + /* + * This is a very bad place to be. Our cache error + * detection has triggered. If we have write-back data + * in the cache, we may not be able to recover. As a + * first-order desperate measure, turn off KSEG0 cacheing. + */ + mfc0 k0,CP0_CONFIG + li k1,~CONF_CM_CMASK + and k0,k0,k1 + ori k0,k0,CONF_CM_UNCACHED + mtc0 k0,CP0_CONFIG + /* Give it a few cycles to sink in... */ + nop + nop + nop + + j cache_parity_error + nop + END(except_vec2_generic) diff --git a/arch/mips/mm/cex-sb1.S b/arch/mips/mm/cex-sb1.S new file mode 100644 index 00000000000..2c3a23aa88c --- /dev/null +++ b/arch/mips/mm/cex-sb1.S @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2001,2002,2003 Broadcom Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include + +#include +#include +#include +#include +#include +#include + +#define C0_ERRCTL $26 /* CP0: Error info */ +#define C0_CERR_I $27 /* CP0: Icache error */ +#define C0_CERR_D $27,1 /* CP0: Dcache error */ + + /* + * Based on SiByte sample software cache-err/cerr.S + * CVS revision 1.8. Only the 'unrecoverable' case + * is changed. + */ + + __INIT + + .set mips64 + .set noreorder + .set noat + + /* + * sb1_cerr_vec: code to be copied to the Cache Error + * Exception vector. The code must be pushed out to memory + * (either by copying to Kseg0 and Kseg1 both, or by flushing + * the L1 and L2) since it is fetched as 0xa0000100. + * + * NOTE: Be sure this handler is at most 28 instructions long + * since the final 16 bytes of the exception vector memory + * (0x170-0x17f) are used to preserve k0, k1, and ra. + */ + +LEAF(except_vec2_sb1) + /* + * If this error is recoverable, we need to exit the handler + * without having dirtied any registers. To do this, + * save/restore k0 and k1 from low memory (Useg is direct + * mapped while ERL=1). Note that we can't save to a + * CPU-specific location without ruining a register in the + * process. This means we are vulnerable to data corruption + * whenever the handler is reentered by a second CPU. + */ + sd k0,0x170($0) + sd k1,0x178($0) + + /* + * M_ERRCTL_RECOVERABLE is bit 31, which makes it easy to tell + * if we can fast-path out of here for a h/w-recovered error. + */ + mfc0 k1,C0_ERRCTL + bgtz k1,attempt_recovery + sll k0,k1,1 + +recovered_dcache: + /* + * Unlock CacheErr-D (which in turn unlocks CacheErr-DPA). + * Ought to log the occurence of this recovered dcache error. + */ + b recovered + mtc0 $0,C0_CERR_D + +attempt_recovery: + /* + * k0 has C0_ERRCTL << 1, which puts 'DC' at bit 31. Any + * Dcache errors we can recover from will take more extensive + * processing. For now, they are considered "unrecoverable". + * Note that 'DC' becoming set (outside of ERL mode) will + * cause 'IC' to clear; so if there's an Icache error, we'll + * only find out about it if we recover from this error and + * continue executing. + */ + bltz k0,unrecoverable + sll k0,1 + + /* + * k0 has C0_ERRCTL << 2, which puts 'IC' at bit 31. If an + * Icache error isn't indicated, I'm not sure why we got here. + * Consider that case "unrecoverable" for now. + */ + bgez k0,unrecoverable + +attempt_icache_recovery: + /* + * External icache errors are due to uncorrectable ECC errors + * in the L2 cache or Memory Controller and cannot be + * recovered here. + */ + mfc0 k0,C0_CERR_I /* delay slot */ + li k1,1 << 26 /* ICACHE_EXTERNAL */ + and k1,k0 + bnez k1,unrecoverable + andi k0,0x1fe0 + + /* + * Since the error is internal, the 'IDX' field from + * CacheErr-I is valid and we can just invalidate all blocks + * in that set. + */ + cache Index_Invalidate_I,(0<<13)(k0) + cache Index_Invalidate_I,(1<<13)(k0) + cache Index_Invalidate_I,(2<<13)(k0) + cache Index_Invalidate_I,(3<<13)(k0) + + /* Ought to log this recovered icache error */ + +recovered: + /* Restore the saved registers */ + ld k0,0x170($0) + ld k1,0x178($0) + eret + +unrecoverable: + /* Unrecoverable Icache or Dcache error; log it and/or fail */ + j handle_vec2_sb1 + nop + +END(except_vec2_sb1) + + __FINIT + + LEAF(handle_vec2_sb1) + mfc0 k0,CP0_CONFIG + li k1,~CONF_CM_CMASK + and k0,k0,k1 + ori k0,k0,CONF_CM_UNCACHED + mtc0 k0,CP0_CONFIG + + SSNOP + SSNOP + SSNOP + SSNOP + bnezl $0, 1f +1: + mfc0 k0, CP0_STATUS + sll k0, k0, 3 # check CU0 (kernel?) + bltz k0, 2f + nop + + /* Get a valid Kseg0 stack pointer. Any task's stack pointer + * will do, although if we ever want to resume execution we + * better not have corrupted any state. */ + get_saved_sp + move sp, k1 + +2: + j sb1_cache_error + nop + + END(handle_vec2_sb1) diff --git a/arch/mips/mm/dma-coherent.c b/arch/mips/mm/dma-coherent.c new file mode 100644 index 00000000000..97a50d38c98 --- /dev/null +++ b/arch/mips/mm/dma-coherent.c @@ -0,0 +1,255 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000 Ani Joshi + * Copyright (C) 2000, 2001 Ralf Baechle + * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t * dma_handle, int gfp) +{ + void *ret; + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); + + if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) + gfp |= GFP_DMA; + ret = (void *) __get_free_pages(gfp, get_order(size)); + + if (ret != NULL) { + memset(ret, 0, size); + *dma_handle = virt_to_phys(ret); + } + + return ret; +} + +EXPORT_SYMBOL(dma_alloc_noncoherent); + +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t * dma_handle, int gfp) + __attribute__((alias("dma_alloc_noncoherent"))); + +EXPORT_SYMBOL(dma_alloc_coherent); + +void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + unsigned long addr = (unsigned long) vaddr; + + free_pages(addr, get_order(size)); +} + +EXPORT_SYMBOL(dma_free_noncoherent); + +void dma_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle) __attribute__((alias("dma_free_noncoherent"))); + +EXPORT_SYMBOL(dma_free_coherent); + +dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + + return __pa(ptr); +} + +EXPORT_SYMBOL(dma_map_single); + +void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_unmap_single); + +int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + int i; + + BUG_ON(direction == DMA_NONE); + + for (i = 0; i < nents; i++, sg++) { + sg->dma_address = (dma_addr_t)page_to_phys(sg->page) + sg->offset; + } + + return nents; +} + +EXPORT_SYMBOL(dma_map_sg); + +dma_addr_t dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + + return page_to_phys(page) + offset; +} + +EXPORT_SYMBOL(dma_map_page); + +void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_unmap_page); + +void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_unmap_sg); + +void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_sync_single_for_cpu); + +void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_sync_single_for_device); + +void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_sync_single_range_for_cpu); + +void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_sync_single_range_for_device); + +void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_sync_sg_for_cpu); + +void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_sync_sg_for_device); + +int dma_mapping_error(dma_addr_t dma_addr) +{ + return 0; +} + +EXPORT_SYMBOL(dma_mapping_error); + +int dma_supported(struct device *dev, u64 mask) +{ + /* + * we fall back to GFP_DMA when the mask isn't all 1s, + * so we can't guarantee allocations that must be + * within a tighter range than GFP_DMA.. + */ + if (mask < 0x00ffffff) + return 0; + + return 1; +} + +EXPORT_SYMBOL(dma_supported); + +int dma_is_consistent(dma_addr_t dma_addr) +{ + return 1; +} + +EXPORT_SYMBOL(dma_is_consistent); + +void dma_cache_sync(void *vaddr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_cache_sync); + +/* The DAC routines are a PCIism.. */ + +#ifdef CONFIG_PCI + +#include + +dma64_addr_t pci_dac_page_to_dma(struct pci_dev *pdev, + struct page *page, unsigned long offset, int direction) +{ + return (dma64_addr_t)page_to_phys(page) + offset; +} + +EXPORT_SYMBOL(pci_dac_page_to_dma); + +struct page *pci_dac_dma_to_page(struct pci_dev *pdev, + dma64_addr_t dma_addr) +{ + return mem_map + (dma_addr >> PAGE_SHIFT); +} + +EXPORT_SYMBOL(pci_dac_dma_to_page); + +unsigned long pci_dac_dma_to_offset(struct pci_dev *pdev, + dma64_addr_t dma_addr) +{ + return dma_addr & ~PAGE_MASK; +} + +EXPORT_SYMBOL(pci_dac_dma_to_offset); + +void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, + dma64_addr_t dma_addr, size_t len, int direction) +{ + BUG_ON(direction == PCI_DMA_NONE); +} + +EXPORT_SYMBOL(pci_dac_dma_sync_single_for_cpu); + +void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, + dma64_addr_t dma_addr, size_t len, int direction) +{ + BUG_ON(direction == PCI_DMA_NONE); +} + +EXPORT_SYMBOL(pci_dac_dma_sync_single_for_device); + +#endif /* CONFIG_PCI */ diff --git a/arch/mips/mm/dma-ip27.c b/arch/mips/mm/dma-ip27.c new file mode 100644 index 00000000000..aa7c94b5d78 --- /dev/null +++ b/arch/mips/mm/dma-ip27.c @@ -0,0 +1,257 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000 Ani Joshi + * Copyright (C) 2000, 2001 Ralf Baechle + * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. + */ +#include +#include +#include +#include +#include + +#include +#include + +#define pdev_to_baddr(pdev, addr) \ + (BRIDGE_CONTROLLER(pdev->bus)->baddr + (addr)) +#define dev_to_baddr(dev, addr) \ + pdev_to_baddr(to_pci_dev(dev), (addr)) + +void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t * dma_handle, int gfp) +{ + void *ret; + + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); + + if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) + gfp |= GFP_DMA; + ret = (void *) __get_free_pages(gfp, get_order(size)); + + if (ret != NULL) { + memset(ret, 0, size); + *dma_handle = dev_to_baddr(dev, virt_to_phys(ret)); + } + + return ret; +} + +EXPORT_SYMBOL(dma_alloc_noncoherent); + +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t * dma_handle, int gfp) + __attribute__((alias("dma_alloc_noncoherent"))); + +EXPORT_SYMBOL(dma_alloc_coherent); + +void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + unsigned long addr = (unsigned long) vaddr; + + free_pages(addr, get_order(size)); +} + +EXPORT_SYMBOL(dma_free_noncoherent); + +void dma_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle) __attribute__((alias("dma_free_noncoherent"))); + +EXPORT_SYMBOL(dma_free_coherent); + +dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + + return dev_to_baddr(dev, __pa(ptr)); +} + +EXPORT_SYMBOL(dma_map_single); + +void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_unmap_single); + +int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + int i; + + BUG_ON(direction == DMA_NONE); + + for (i = 0; i < nents; i++, sg++) { + sg->dma_address = (dma_addr_t) dev_to_baddr(dev, + page_to_phys(sg->page) + sg->offset); + } + + return nents; +} + +EXPORT_SYMBOL(dma_map_sg); + +dma_addr_t dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + + return dev_to_baddr(dev, page_to_phys(page) + offset); +} + +EXPORT_SYMBOL(dma_map_page); + +void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_unmap_page); + +void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_unmap_sg); + +void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_sync_single_for_cpu); + +void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_sync_single_for_device); + +void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_sync_single_range_for_cpu); + +void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_sync_single_range_for_device); + +void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_sync_sg_for_cpu); + +void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_sync_sg_for_device); + +int dma_mapping_error(dma_addr_t dma_addr) +{ + return 0; +} + +EXPORT_SYMBOL(dma_mapping_error); + +int dma_supported(struct device *dev, u64 mask) +{ + /* + * we fall back to GFP_DMA when the mask isn't all 1s, + * so we can't guarantee allocations that must be + * within a tighter range than GFP_DMA.. + */ + if (mask < 0x00ffffff) + return 0; + + return 1; +} + +EXPORT_SYMBOL(dma_supported); + +int dma_is_consistent(dma_addr_t dma_addr) +{ + return 1; +} + +EXPORT_SYMBOL(dma_is_consistent); + +void dma_cache_sync(void *vaddr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); +} + +EXPORT_SYMBOL(dma_cache_sync); + +dma64_addr_t pci_dac_page_to_dma(struct pci_dev *pdev, + struct page *page, unsigned long offset, int direction) +{ + dma64_addr_t addr = page_to_phys(page) + offset; + + return (dma64_addr_t) pdev_to_baddr(pdev, addr); +} + +EXPORT_SYMBOL(pci_dac_page_to_dma); + +struct page *pci_dac_dma_to_page(struct pci_dev *pdev, + dma64_addr_t dma_addr) +{ + struct bridge_controller *bc = BRIDGE_CONTROLLER(pdev->bus); + + return pfn_to_page((dma_addr - bc->baddr) >> PAGE_SHIFT); +} + +EXPORT_SYMBOL(pci_dac_dma_to_page); + +unsigned long pci_dac_dma_to_offset(struct pci_dev *pdev, + dma64_addr_t dma_addr) +{ + return dma_addr & ~PAGE_MASK; +} + +EXPORT_SYMBOL(pci_dac_dma_to_offset); + +void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, + dma64_addr_t dma_addr, size_t len, int direction) +{ + BUG_ON(direction == PCI_DMA_NONE); +} + +EXPORT_SYMBOL(pci_dac_dma_sync_single_for_cpu); + +void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, + dma64_addr_t dma_addr, size_t len, int direction) +{ + BUG_ON(direction == PCI_DMA_NONE); +} + +EXPORT_SYMBOL(pci_dac_dma_sync_single_for_device); diff --git a/arch/mips/mm/dma-ip32.c b/arch/mips/mm/dma-ip32.c new file mode 100644 index 00000000000..2cbe196c35f --- /dev/null +++ b/arch/mips/mm/dma-ip32.c @@ -0,0 +1,382 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000 Ani Joshi + * Copyright (C) 2000, 2001 Ralf Baechle + * Copyright (C) 2005 Ilya A. Volynets-Evenbakh + * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. + * IP32 changes by Ilya. + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * Warning on the terminology - Linux calls an uncached area coherent; + * MIPS terminology calls memory areas with hardware maintained coherency + * coherent. + */ + +/* + * Few notes. + * 1. CPU sees memory as two chunks: 0-256M@0x0, and the rest @0x40000000+256M + * 2. PCI sees memory as one big chunk @0x0 (or we could use 0x40000000 for native-endian) + * 3. All other devices see memory as one big chunk at 0x40000000 + * 4. Non-PCI devices will pass NULL as struct device* + * Thus we translate differently, depending on device. + */ + +#define RAM_OFFSET_MASK 0x3fffffff + +void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t * dma_handle, int gfp) +{ + void *ret; + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); + + if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) + gfp |= GFP_DMA; + ret = (void *) __get_free_pages(gfp, get_order(size)); + + if (ret != NULL) { + unsigned long addr = virt_to_phys(ret)&RAM_OFFSET_MASK; + memset(ret, 0, size); + if(dev==NULL) + addr+= CRIME_HI_MEM_BASE; + *dma_handle = addr; + } + + return ret; +} + +EXPORT_SYMBOL(dma_alloc_noncoherent); + +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t * dma_handle, int gfp) +{ + void *ret; + + ret = dma_alloc_noncoherent(dev, size, dma_handle, gfp); + if (ret) { + dma_cache_wback_inv((unsigned long) ret, size); + ret = UNCAC_ADDR(ret); + } + + return ret; +} + +EXPORT_SYMBOL(dma_alloc_coherent); + +void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + free_pages((unsigned long) vaddr, get_order(size)); +} + +EXPORT_SYMBOL(dma_free_noncoherent); + +void dma_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + unsigned long addr = (unsigned long) vaddr; + + addr = CAC_ADDR(addr); + free_pages(addr, get_order(size)); +} + +EXPORT_SYMBOL(dma_free_coherent); + +static inline void __dma_sync(unsigned long addr, size_t size, + enum dma_data_direction direction) +{ + switch (direction) { + case DMA_TO_DEVICE: + dma_cache_wback(addr, size); + break; + + case DMA_FROM_DEVICE: + dma_cache_inv(addr, size); + break; + + case DMA_BIDIRECTIONAL: + dma_cache_wback_inv(addr, size); + break; + + default: + BUG(); + } +} + +dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, + enum dma_data_direction direction) +{ + unsigned long addr = (unsigned long) ptr; + + switch (direction) { + case DMA_TO_DEVICE: + dma_cache_wback(addr, size); + break; + + case DMA_FROM_DEVICE: + dma_cache_inv(addr, size); + break; + + case DMA_BIDIRECTIONAL: + dma_cache_wback_inv(addr, size); + break; + + default: + BUG(); + } + + addr = virt_to_phys(ptr)&RAM_OFFSET_MASK;; + if(dev == NULL) + addr+=CRIME_HI_MEM_BASE; + return (dma_addr_t)addr; +} + +EXPORT_SYMBOL(dma_map_single); + +void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) +{ + switch (direction) { + case DMA_TO_DEVICE: + break; + + case DMA_FROM_DEVICE: + break; + + case DMA_BIDIRECTIONAL: + break; + + default: + BUG(); + } +} + +EXPORT_SYMBOL(dma_unmap_single); + +int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + int i; + + BUG_ON(direction == DMA_NONE); + + for (i = 0; i < nents; i++, sg++) { + unsigned long addr; + + addr = (unsigned long) page_address(sg->page)+sg->offset; + if (addr) + __dma_sync(addr, sg->length, direction); + addr = __pa(addr)&RAM_OFFSET_MASK;; + if(dev == NULL) + addr += CRIME_HI_MEM_BASE; + sg->dma_address = (dma_addr_t)addr; + } + + return nents; +} + +EXPORT_SYMBOL(dma_map_sg); + +dma_addr_t dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction direction) +{ + unsigned long addr; + + BUG_ON(direction == DMA_NONE); + + addr = (unsigned long) page_address(page) + offset; + dma_cache_wback_inv(addr, size); + addr = __pa(addr)&RAM_OFFSET_MASK;; + if(dev == NULL) + addr += CRIME_HI_MEM_BASE; + + return (dma_addr_t)addr; +} + +EXPORT_SYMBOL(dma_map_page); + +void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + + if (direction != DMA_TO_DEVICE) { + unsigned long addr; + + dma_address&=RAM_OFFSET_MASK; + addr = dma_address + PAGE_OFFSET; + if(dma_address>=256*1024*1024) + addr+=CRIME_HI_MEM_BASE; + dma_cache_wback_inv(addr, size); + } +} + +EXPORT_SYMBOL(dma_unmap_page); + +void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, + enum dma_data_direction direction) +{ + unsigned long addr; + int i; + + BUG_ON(direction == DMA_NONE); + + if (direction == DMA_TO_DEVICE) + return; + + for (i = 0; i < nhwentries; i++, sg++) { + addr = (unsigned long) page_address(sg->page); + if (!addr) + continue; + dma_cache_wback_inv(addr + sg->offset, sg->length); + } +} + +EXPORT_SYMBOL(dma_unmap_sg); + +void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + unsigned long addr; + + BUG_ON(direction == DMA_NONE); + + dma_handle&=RAM_OFFSET_MASK; + addr = dma_handle + PAGE_OFFSET; + if(dma_handle>=256*1024*1024) + addr+=CRIME_HI_MEM_BASE; + __dma_sync(addr, size, direction); +} + +EXPORT_SYMBOL(dma_sync_single_for_cpu); + +void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + unsigned long addr; + + BUG_ON(direction == DMA_NONE); + + dma_handle&=RAM_OFFSET_MASK; + addr = dma_handle + PAGE_OFFSET; + if(dma_handle>=256*1024*1024) + addr+=CRIME_HI_MEM_BASE; + __dma_sync(addr, size, direction); +} + +EXPORT_SYMBOL(dma_sync_single_for_device); + +void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, enum dma_data_direction direction) +{ + unsigned long addr; + + BUG_ON(direction == DMA_NONE); + + dma_handle&=RAM_OFFSET_MASK; + addr = dma_handle + offset + PAGE_OFFSET; + if(dma_handle>=256*1024*1024) + addr+=CRIME_HI_MEM_BASE; + __dma_sync(addr, size, direction); +} + +EXPORT_SYMBOL(dma_sync_single_range_for_cpu); + +void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, enum dma_data_direction direction) +{ + unsigned long addr; + + BUG_ON(direction == DMA_NONE); + + dma_handle&=RAM_OFFSET_MASK; + addr = dma_handle + offset + PAGE_OFFSET; + if(dma_handle>=256*1024*1024) + addr+=CRIME_HI_MEM_BASE; + __dma_sync(addr, size, direction); +} + +EXPORT_SYMBOL(dma_sync_single_range_for_device); + +void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + int i; + + BUG_ON(direction == DMA_NONE); + + /* Make sure that gcc doesn't leave the empty loop body. */ + for (i = 0; i < nelems; i++, sg++) + __dma_sync((unsigned long)page_address(sg->page), + sg->length, direction); +} + +EXPORT_SYMBOL(dma_sync_sg_for_cpu); + +void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + int i; + + BUG_ON(direction == DMA_NONE); + + /* Make sure that gcc doesn't leave the empty loop body. */ + for (i = 0; i < nelems; i++, sg++) + __dma_sync((unsigned long)page_address(sg->page), + sg->length, direction); +} + +EXPORT_SYMBOL(dma_sync_sg_for_device); + +int dma_mapping_error(dma_addr_t dma_addr) +{ + return 0; +} + +EXPORT_SYMBOL(dma_mapping_error); + +int dma_supported(struct device *dev, u64 mask) +{ + /* + * we fall back to GFP_DMA when the mask isn't all 1s, + * so we can't guarantee allocations that must be + * within a tighter range than GFP_DMA.. + */ + if (mask < 0x00ffffff) + return 0; + + return 1; +} + +EXPORT_SYMBOL(dma_supported); + +int dma_is_consistent(dma_addr_t dma_addr) +{ + return 1; +} + +EXPORT_SYMBOL(dma_is_consistent); + +void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction direction) +{ + if (direction == DMA_NONE) + return; + + dma_cache_wback_inv((unsigned long)vaddr, size); +} + +EXPORT_SYMBOL(dma_cache_sync); + diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c new file mode 100644 index 00000000000..9895e32b0fc --- /dev/null +++ b/arch/mips/mm/dma-noncoherent.c @@ -0,0 +1,400 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000 Ani Joshi + * Copyright (C) 2000, 2001 Ralf Baechle + * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Warning on the terminology - Linux calls an uncached area coherent; + * MIPS terminology calls memory areas with hardware maintained coherency + * coherent. + */ + +void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t * dma_handle, int gfp) +{ + void *ret; + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); + + if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) + gfp |= GFP_DMA; + ret = (void *) __get_free_pages(gfp, get_order(size)); + + if (ret != NULL) { + memset(ret, 0, size); + *dma_handle = virt_to_phys(ret); + } + + return ret; +} + +EXPORT_SYMBOL(dma_alloc_noncoherent); + +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t * dma_handle, int gfp) +{ + void *ret; + + ret = dma_alloc_noncoherent(dev, size, dma_handle, gfp); + if (ret) { + dma_cache_wback_inv((unsigned long) ret, size); + ret = UNCAC_ADDR(ret); + } + + return ret; +} + +EXPORT_SYMBOL(dma_alloc_coherent); + +void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + free_pages((unsigned long) vaddr, get_order(size)); +} + +EXPORT_SYMBOL(dma_free_noncoherent); + +void dma_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + unsigned long addr = (unsigned long) vaddr; + + addr = CAC_ADDR(addr); + free_pages(addr, get_order(size)); +} + +EXPORT_SYMBOL(dma_free_coherent); + +static inline void __dma_sync(unsigned long addr, size_t size, + enum dma_data_direction direction) +{ + switch (direction) { + case DMA_TO_DEVICE: + dma_cache_wback(addr, size); + break; + + case DMA_FROM_DEVICE: + dma_cache_inv(addr, size); + break; + + case DMA_BIDIRECTIONAL: + dma_cache_wback_inv(addr, size); + break; + + default: + BUG(); + } +} + +dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, + enum dma_data_direction direction) +{ + unsigned long addr = (unsigned long) ptr; + + switch (direction) { + case DMA_TO_DEVICE: + dma_cache_wback(addr, size); + break; + + case DMA_FROM_DEVICE: + dma_cache_inv(addr, size); + break; + + case DMA_BIDIRECTIONAL: + dma_cache_wback_inv(addr, size); + break; + + default: + BUG(); + } + + return virt_to_phys(ptr); +} + +EXPORT_SYMBOL(dma_map_single); + +void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) +{ + unsigned long addr; + addr = dma_addr + PAGE_OFFSET; + + switch (direction) { + case DMA_TO_DEVICE: + //dma_cache_wback(addr, size); + break; + + case DMA_FROM_DEVICE: + //dma_cache_inv(addr, size); + break; + + case DMA_BIDIRECTIONAL: + //dma_cache_wback_inv(addr, size); + break; + + default: + BUG(); + } +} + +EXPORT_SYMBOL(dma_unmap_single); + +int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + int i; + + BUG_ON(direction == DMA_NONE); + + for (i = 0; i < nents; i++, sg++) { + unsigned long addr; + + addr = (unsigned long) page_address(sg->page); + if (addr) + __dma_sync(addr + sg->offset, sg->length, direction); + sg->dma_address = (dma_addr_t) + (page_to_phys(sg->page) + sg->offset); + } + + return nents; +} + +EXPORT_SYMBOL(dma_map_sg); + +dma_addr_t dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction direction) +{ + unsigned long addr; + + BUG_ON(direction == DMA_NONE); + + addr = (unsigned long) page_address(page) + offset; + dma_cache_wback_inv(addr, size); + + return page_to_phys(page) + offset; +} + +EXPORT_SYMBOL(dma_map_page); + +void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + + if (direction != DMA_TO_DEVICE) { + unsigned long addr; + + addr = dma_address + PAGE_OFFSET; + dma_cache_wback_inv(addr, size); + } +} + +EXPORT_SYMBOL(dma_unmap_page); + +void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, + enum dma_data_direction direction) +{ + unsigned long addr; + int i; + + BUG_ON(direction == DMA_NONE); + + if (direction == DMA_TO_DEVICE) + return; + + for (i = 0; i < nhwentries; i++, sg++) { + addr = (unsigned long) page_address(sg->page); + if (!addr) + continue; + dma_cache_wback_inv(addr + sg->offset, sg->length); + } +} + +EXPORT_SYMBOL(dma_unmap_sg); + +void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + unsigned long addr; + + BUG_ON(direction == DMA_NONE); + + addr = dma_handle + PAGE_OFFSET; + __dma_sync(addr, size, direction); +} + +EXPORT_SYMBOL(dma_sync_single_for_cpu); + +void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction direction) +{ + unsigned long addr; + + BUG_ON(direction == DMA_NONE); + + addr = dma_handle + PAGE_OFFSET; + __dma_sync(addr, size, direction); +} + +EXPORT_SYMBOL(dma_sync_single_for_device); + +void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, enum dma_data_direction direction) +{ + unsigned long addr; + + BUG_ON(direction == DMA_NONE); + + addr = dma_handle + offset + PAGE_OFFSET; + __dma_sync(addr, size, direction); +} + +EXPORT_SYMBOL(dma_sync_single_range_for_cpu); + +void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, enum dma_data_direction direction) +{ + unsigned long addr; + + BUG_ON(direction == DMA_NONE); + + addr = dma_handle + offset + PAGE_OFFSET; + __dma_sync(addr, size, direction); +} + +EXPORT_SYMBOL(dma_sync_single_range_for_device); + +void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + int i; + + BUG_ON(direction == DMA_NONE); + + /* Make sure that gcc doesn't leave the empty loop body. */ + for (i = 0; i < nelems; i++, sg++) + __dma_sync((unsigned long)page_address(sg->page), + sg->length, direction); +} + +EXPORT_SYMBOL(dma_sync_sg_for_cpu); + +void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + int i; + + BUG_ON(direction == DMA_NONE); + + /* Make sure that gcc doesn't leave the empty loop body. */ + for (i = 0; i < nelems; i++, sg++) + __dma_sync((unsigned long)page_address(sg->page), + sg->length, direction); +} + +EXPORT_SYMBOL(dma_sync_sg_for_device); + +int dma_mapping_error(dma_addr_t dma_addr) +{ + return 0; +} + +EXPORT_SYMBOL(dma_mapping_error); + +int dma_supported(struct device *dev, u64 mask) +{ + /* + * we fall back to GFP_DMA when the mask isn't all 1s, + * so we can't guarantee allocations that must be + * within a tighter range than GFP_DMA.. + */ + if (mask < 0x00ffffff) + return 0; + + return 1; +} + +EXPORT_SYMBOL(dma_supported); + +int dma_is_consistent(dma_addr_t dma_addr) +{ + return 1; +} + +EXPORT_SYMBOL(dma_is_consistent); + +void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction direction) +{ + if (direction == DMA_NONE) + return; + + dma_cache_wback_inv((unsigned long)vaddr, size); +} + +EXPORT_SYMBOL(dma_cache_sync); + +/* The DAC routines are a PCIism.. */ + +#ifdef CONFIG_PCI + +#include + +dma64_addr_t pci_dac_page_to_dma(struct pci_dev *pdev, + struct page *page, unsigned long offset, int direction) +{ + return (dma64_addr_t)page_to_phys(page) + offset; +} + +EXPORT_SYMBOL(pci_dac_page_to_dma); + +struct page *pci_dac_dma_to_page(struct pci_dev *pdev, + dma64_addr_t dma_addr) +{ + return mem_map + (dma_addr >> PAGE_SHIFT); +} + +EXPORT_SYMBOL(pci_dac_dma_to_page); + +unsigned long pci_dac_dma_to_offset(struct pci_dev *pdev, + dma64_addr_t dma_addr) +{ + return dma_addr & ~PAGE_MASK; +} + +EXPORT_SYMBOL(pci_dac_dma_to_offset); + +void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, + dma64_addr_t dma_addr, size_t len, int direction) +{ + BUG_ON(direction == PCI_DMA_NONE); + + dma_cache_wback_inv(dma_addr + PAGE_OFFSET, len); +} + +EXPORT_SYMBOL(pci_dac_dma_sync_single_for_cpu); + +void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, + dma64_addr_t dma_addr, size_t len, int direction) +{ + BUG_ON(direction == PCI_DMA_NONE); + + dma_cache_wback_inv(dma_addr + PAGE_OFFSET, len); +} + +EXPORT_SYMBOL(pci_dac_dma_sync_single_for_device); + +#endif /* CONFIG_PCI */ diff --git a/arch/mips/mm/extable.c b/arch/mips/mm/extable.c new file mode 100644 index 00000000000..297fb9f390d --- /dev/null +++ b/arch/mips/mm/extable.c @@ -0,0 +1,21 @@ +/* + * linux/arch/mips/mm/extable.c + */ +#include +#include +#include +#include + +int fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(exception_epc(regs)); + if (fixup) { + regs->cp0_epc = fixup->nextinsn; + + return 1; + } + + return 0; +} diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c new file mode 100644 index 00000000000..ec8077c74e9 --- /dev/null +++ b/arch/mips/mm/fault.c @@ -0,0 +1,236 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995 - 2000 by Ralf Baechle + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* For unblank_screen() */ +#include + +#include +#include +#include +#include +#include + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + */ +asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, + unsigned long address) +{ + struct vm_area_struct * vma = NULL; + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + const int field = sizeof(unsigned long) * 2; + siginfo_t info; + +#if 0 + printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", smp_processor_id(), + current->comm, current->pid, field, address, write, + field, regs->cp0_epc); +#endif + + info.si_code = SEGV_MAPERR; + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + */ + if (unlikely(address >= VMALLOC_START)) + goto vmalloc_fault; + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (in_atomic() || !mm) + goto bad_area_nosemaphore; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, address)) + goto bad_area; +/* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + info.si_code = SEGV_ACCERR; + + if (write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + } else { + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + +survive: + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + switch (handle_mm_fault(mm, vma, address, write)) { + case VM_FAULT_MINOR: + tsk->min_flt++; + break; + case VM_FAULT_MAJOR: + tsk->maj_flt++; + break; + case VM_FAULT_SIGBUS: + goto do_sigbus; + case VM_FAULT_OOM: + goto out_of_memory; + default: + BUG(); + } + + up_read(&mm->mmap_sem); + return; + +/* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + up_read(&mm->mmap_sem); + +bad_area_nosemaphore: + /* User mode accesses just cause a SIGSEGV */ + if (user_mode(regs)) { + tsk->thread.cp0_badvaddr = address; + tsk->thread.error_code = write; +#if 0 + printk("do_page_fault() #2: sending SIGSEGV to %s for " + "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", + tsk->comm, + write ? "write access to" : "read access from", + field, address, + field, (unsigned long) regs->cp0_epc, + field, (unsigned long) regs->regs[31]); +#endif + info.si_signo = SIGSEGV; + info.si_errno = 0; + /* info.si_code has been set above */ + info.si_addr = (void *) address; + force_sig_info(SIGSEGV, &info, tsk); + return; + } + +no_context: + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) { + current->thread.cp0_baduaddr = address; + return; + } + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + + bust_spinlocks(1); + + printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at " + "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n", + smp_processor_id(), field, address, field, regs->cp0_epc, + field, regs->regs[31]); + die("Oops", regs); + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + up_read(&mm->mmap_sem); + if (tsk->pid == 1) { + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + printk("VM: killing process %s\n", tsk->comm); + if (user_mode(regs)) + do_exit(SIGKILL); + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) + goto no_context; + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + tsk->thread.cp0_badvaddr = address; + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void *) address; + force_sig_info(SIGBUS, &info, tsk); + + return; + +vmalloc_fault: + { + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "tsk" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + int offset = __pgd_offset(address); + pgd_t *pgd, *pgd_k; + pmd_t *pmd, *pmd_k; + pte_t *pte_k; + + pgd = (pgd_t *) pgd_current[smp_processor_id()] + offset; + pgd_k = init_mm.pgd + offset; + + if (!pgd_present(*pgd_k)) + goto no_context; + set_pgd(pgd, *pgd_k); + + pmd = pmd_offset(pgd, address); + pmd_k = pmd_offset(pgd_k, address); + if (!pmd_present(*pmd_k)) + goto no_context; + set_pmd(pmd, *pmd_k); + + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + goto no_context; + return; + } +} diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c new file mode 100644 index 00000000000..dd5e2e31885 --- /dev/null +++ b/arch/mips/mm/highmem.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include + +void *__kmap(struct page *page) +{ + void *addr; + + might_sleep(); + if (!PageHighMem(page)) + return page_address(page); + addr = kmap_high(page); + flush_tlb_one((unsigned long)addr); + + return addr; +} + +void __kunmap(struct page *page) +{ + if (in_interrupt()) + BUG(); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} + +/* + * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because + * no global lock is needed and because the kmap code must perform a global TLB + * invalidation when the kmap pool wraps. + * + * However when holding an atomic kmap is is not legal to sleep, so atomic + * kmaps are appropriate for short, tight code paths only. + */ + +void *__kmap_atomic(struct page *page, enum km_type type) +{ + enum fixed_addresses idx; + unsigned long vaddr; + + /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + inc_preempt_count(); + if (!PageHighMem(page)) + return page_address(page); + + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +#ifdef CONFIG_DEBUG_HIGHMEM + if (!pte_none(*(kmap_pte-idx))) + BUG(); +#endif + set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); + local_flush_tlb_one((unsigned long)vaddr); + + return (void*) vaddr; +} + +void __kunmap_atomic(void *kvaddr, enum km_type type) +{ +#ifdef CONFIG_DEBUG_HIGHMEM + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + + if (vaddr < FIXADDR_START) { // FIXME + dec_preempt_count(); + preempt_check_resched(); + return; + } + + if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) + BUG(); + + /* + * force other mappings to Oops if they'll try to access + * this pte without first remap it + */ + pte_clear(&init_mm, vaddr, kmap_pte-idx); + local_flush_tlb_one(vaddr); +#endif + + dec_preempt_count(); + preempt_check_resched(); +} + +struct page *__kmap_atomic_to_page(void *ptr) +{ + unsigned long idx, vaddr = (unsigned long)ptr; + pte_t *pte; + + if (vaddr < FIXADDR_START) + return virt_to_page(ptr); + + idx = virt_to_fix(vaddr); + pte = kmap_pte - (idx - FIX_KMAP_BEGIN); + return pte_page(*pte); +} + +EXPORT_SYMBOL(__kmap); +EXPORT_SYMBOL(__kunmap); +EXPORT_SYMBOL(__kmap_atomic); +EXPORT_SYMBOL(__kunmap_atomic); +EXPORT_SYMBOL(__kmap_atomic_to_page); diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c new file mode 100644 index 00000000000..b027ce7efbc --- /dev/null +++ b/arch/mips/mm/init.c @@ -0,0 +1,304 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1994 - 2000 Ralf Baechle + * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com + * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + +unsigned long highstart_pfn, highend_pfn; + +/* + * We have up to 8 empty zeroed pages so we can map one of the right colour + * when needed. This is necessary only on R4000 / R4400 SC and MC versions + * where we have to avoid VCED / VECI exceptions for good performance at + * any price. Since page is never written to after the initialization we + * don't have to care about aliases on other CPUs. + */ +unsigned long empty_zero_page, zero_page_mask; + +/* + * Not static inline because used by IP27 special magic initialization code + */ +unsigned long setup_zero_pages(void) +{ + unsigned long order, size; + struct page *page; + + if (cpu_has_vce) + order = 3; + else + order = 0; + + empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!empty_zero_page) + panic("Oh boy, that early out of memory?"); + + page = virt_to_page(empty_zero_page); + while (page < virt_to_page(empty_zero_page + (PAGE_SIZE << order))) { + set_bit(PG_reserved, &page->flags); + set_page_count(page, 0); + page++; + } + + size = PAGE_SIZE << order; + zero_page_mask = (size - 1) & PAGE_MASK; + + return 1UL << order; +} + +#ifdef CONFIG_HIGHMEM +pte_t *kmap_pte; +pgprot_t kmap_prot; + +#define kmap_get_fixmap_pte(vaddr) \ + pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) + +static void __init kmap_init(void) +{ + unsigned long kmap_vstart; + + /* cache the first kmap pte */ + kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); + kmap_pte = kmap_get_fixmap_pte(kmap_vstart); + + kmap_prot = PAGE_KERNEL; +} + +#ifdef CONFIG_MIPS64 +static void __init fixrange_init(unsigned long start, unsigned long end, + pgd_t *pgd_base) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int i, j; + unsigned long vaddr; + + vaddr = start; + i = __pgd_offset(vaddr); + j = __pmd_offset(vaddr); + pgd = pgd_base + i; + + for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { + pmd = (pmd_t *)pgd; + for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { + if (pmd_none(*pmd)) { + pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pmd(pmd, __pmd(pte)); + if (pte != pte_offset_kernel(pmd, 0)) + BUG(); + } + vaddr += PMD_SIZE; + } + j = 0; + } +} +#endif /* CONFIG_MIPS64 */ +#endif /* CONFIG_HIGHMEM */ + +#ifndef CONFIG_DISCONTIGMEM +extern void pagetable_init(void); + +void __init paging_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long max_dma, high, low; + + pagetable_init(); + +#ifdef CONFIG_HIGHMEM + kmap_init(); +#endif + + max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + low = max_low_pfn; + high = highend_pfn; + +#ifdef CONFIG_ISA + if (low < max_dma) + zones_size[ZONE_DMA] = low; + else { + zones_size[ZONE_DMA] = max_dma; + zones_size[ZONE_NORMAL] = low - max_dma; + } +#else + zones_size[ZONE_DMA] = low; +#endif +#ifdef CONFIG_HIGHMEM + if (cpu_has_dc_aliases) { + printk(KERN_WARNING "This processor doesn't support highmem."); + if (high - low) + printk(" %ldk highmem ignored", high - low); + printk("\n"); + } else + zones_size[ZONE_HIGHMEM] = high - low; +#endif + + free_area_init(zones_size); +} + +#define PFN_UP(x) (((x) + PAGE_SIZE - 1) >> PAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) + +static inline int page_is_ram(unsigned long pagenr) +{ + int i; + + for (i = 0; i < boot_mem_map.nr_map; i++) { + unsigned long addr, end; + + if (boot_mem_map.map[i].type != BOOT_MEM_RAM) + /* not usable memory */ + continue; + + addr = PFN_UP(boot_mem_map.map[i].addr); + end = PFN_DOWN(boot_mem_map.map[i].addr + + boot_mem_map.map[i].size); + + if (pagenr >= addr && pagenr < end) + return 1; + } + + return 0; +} + +void __init mem_init(void) +{ + unsigned long codesize, reservedpages, datasize, initsize; + unsigned long tmp, ram; + +#ifdef CONFIG_HIGHMEM +#ifdef CONFIG_DISCONTIGMEM +#error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet" +#endif + max_mapnr = num_physpages = highend_pfn; +#else + max_mapnr = num_physpages = max_low_pfn; +#endif + high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); + + totalram_pages += free_all_bootmem(); + totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */ + + reservedpages = ram = 0; + for (tmp = 0; tmp < max_low_pfn; tmp++) + if (page_is_ram(tmp)) { + ram++; + if (PageReserved(mem_map+tmp)) + reservedpages++; + } + +#ifdef CONFIG_HIGHMEM + for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { + struct page *page = mem_map + tmp; + + if (!page_is_ram(tmp)) { + SetPageReserved(page); + continue; + } + ClearPageReserved(page); +#ifdef CONFIG_LIMITED_DMA + set_page_address(page, lowmem_page_address(page)); +#endif + set_bit(PG_highmem, &page->flags); + set_page_count(page, 1); + __free_page(page); + totalhigh_pages++; + } + totalram_pages += totalhigh_pages; +#endif + + codesize = (unsigned long) &_etext - (unsigned long) &_text; + datasize = (unsigned long) &_edata - (unsigned long) &_etext; + initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + + printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " + "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), + ram << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + initsize >> 10, + (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); +} +#endif /* !CONFIG_DISCONTIGMEM */ + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ +#ifdef CONFIG_MIPS64 + /* Switch from KSEG0 to XKPHYS addresses */ + start = (unsigned long)phys_to_virt(CPHYSADDR(start)); + end = (unsigned long)phys_to_virt(CPHYSADDR(end)); +#endif + if (start < end) + printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", + (end - start) >> 10); + + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + totalram_pages++; + } +} +#endif + +extern unsigned long prom_free_prom_memory(void); + +void free_initmem(void) +{ + unsigned long addr, page, freed; + + freed = prom_free_prom_memory(); + + addr = (unsigned long) &__init_begin; + while (addr < (unsigned long) &__init_end) { +#ifdef CONFIG_MIPS64 + page = PAGE_OFFSET | CPHYSADDR(addr); +#else + page = addr; +#endif + ClearPageReserved(virt_to_page(page)); + set_page_count(virt_to_page(page), 1); + free_page(page); + totalram_pages++; + freed += PAGE_SIZE; + addr += PAGE_SIZE; + } + printk(KERN_INFO "Freeing unused kernel memory: %ldk freed\n", + freed >> 10); +} diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c new file mode 100644 index 00000000000..adf352273f6 --- /dev/null +++ b/arch/mips/mm/ioremap.c @@ -0,0 +1,202 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * (C) Copyright 1995 1996 Linus Torvalds + * (C) Copyright 2001, 2002 Ralf Baechle + */ +#include +#include +#include + +#include +#include +#include +#include + +static inline void remap_area_pte(pte_t * pte, unsigned long address, + phys_t size, phys_t phys_addr, unsigned long flags) +{ + phys_t end; + unsigned long pfn; + pgprot_t pgprot = __pgprot(_PAGE_GLOBAL | _PAGE_PRESENT | __READABLE + | __WRITEABLE | flags); + + address &= ~PMD_MASK; + end = address + size; + if (end > PMD_SIZE) + end = PMD_SIZE; + if (address >= end) + BUG(); + pfn = phys_addr >> PAGE_SHIFT; + do { + if (!pte_none(*pte)) { + printk("remap_area_pte: page already exists\n"); + BUG(); + } + set_pte(pte, pfn_pte(pfn, pgprot)); + address += PAGE_SIZE; + pfn++; + pte++; + } while (address && (address < end)); +} + +static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, + phys_t size, phys_t phys_addr, unsigned long flags) +{ + phys_t end; + + address &= ~PGDIR_MASK; + end = address + size; + if (end > PGDIR_SIZE) + end = PGDIR_SIZE; + phys_addr -= address; + if (address >= end) + BUG(); + do { + pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); + if (!pte) + return -ENOMEM; + remap_area_pte(pte, address, end - address, address + phys_addr, flags); + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address && (address < end)); + return 0; +} + +static int remap_area_pages(unsigned long address, phys_t phys_addr, + phys_t size, unsigned long flags) +{ + int error; + pgd_t * dir; + unsigned long end = address + size; + + phys_addr -= address; + dir = pgd_offset(&init_mm, address); + flush_cache_all(); + if (address >= end) + BUG(); + spin_lock(&init_mm.page_table_lock); + do { + pmd_t *pmd; + pmd = pmd_alloc(&init_mm, dir, address); + error = -ENOMEM; + if (!pmd) + break; + if (remap_area_pmd(pmd, address, end - address, + phys_addr + address, flags)) + break; + error = 0; + address = (address + PGDIR_SIZE) & PGDIR_MASK; + dir++; + } while (address && (address < end)); + spin_unlock(&init_mm.page_table_lock); + flush_tlb_all(); + return error; +} + +/* + * Allow physical addresses to be fixed up to help 36 bit peripherals. + */ +phys_t __attribute__ ((weak)) +fixup_bigphys_addr(phys_t phys_addr, phys_t size) +{ + return phys_addr; +} + +/* + * Generic mapping function (not visible outside): + */ + +/* + * Remap an arbitrary physical address space into the kernel virtual + * address space. Needed when the kernel wants to access high addresses + * directly. + * + * NOTE! We need to allow non-page-aligned mappings too: we will obviously + * have to convert them into an offset in a page-aligned mapping, but the + * caller shouldn't need to know that small detail. + */ + +#define IS_LOW512(addr) (!((phys_t)(addr) & (phys_t) ~0x1fffffffULL)) + +void * __ioremap(phys_t phys_addr, phys_t size, unsigned long flags) +{ + struct vm_struct * area; + unsigned long offset; + phys_t last_addr; + void * addr; + + phys_addr = fixup_bigphys_addr(phys_addr, size); + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + /* + * Map uncached objects in the low 512mb of address space using KSEG1, + * otherwise map using page tables. + */ + if (IS_LOW512(phys_addr) && IS_LOW512(last_addr) && + flags == _CACHE_UNCACHED) + return (void *) KSEG1ADDR(phys_addr); + + /* + * Don't allow anybody to remap normal RAM that we're using.. + */ + if (phys_addr < virt_to_phys(high_memory)) { + char *t_addr, *t_end; + struct page *page; + + t_addr = __va(phys_addr); + t_end = t_addr + (size - 1); + + for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) + if(!PageReserved(page)) + return NULL; + } + + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr + 1) - phys_addr; + + /* + * Ok, go for it.. + */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + addr = area->addr; + if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { + vunmap(addr); + return NULL; + } + + return (void *) (offset + (char *)addr); +} + +#define IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == KSEG1) + +void __iounmap(volatile void __iomem *addr) +{ + struct vm_struct *p; + + if (IS_KSEG1(addr)) + return; + + p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); + if (!p) { + printk(KERN_ERR "iounmap: bad address %p\n", addr); + return; + } + + kfree(p); +} + +EXPORT_SYMBOL(__ioremap); +EXPORT_SYMBOL(__iounmap); diff --git a/arch/mips/mm/pg-r4k.c b/arch/mips/mm/pg-r4k.c new file mode 100644 index 00000000000..9f8b1654157 --- /dev/null +++ b/arch/mips/mm/pg-r4k.c @@ -0,0 +1,489 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define half_scache_line_size() (cpu_scache_line_size() >> 1) + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache: 0x58 bytes + * R4600 v1.7: 0x5c bytes + * R4600 v2.0: 0x60 bytes + * With prefetching, 16 byte strides 0xa0 bytes + */ + +static unsigned int clear_page_array[0x130 / 4]; + +void clear_page(void * page) __attribute__((alias("clear_page_array"))); + +EXPORT_SYMBOL(clear_page); + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache: 0x11c bytes + * R4600 v1.7: 0x080 bytes + * R4600 v2.0: 0x07c bytes + * With prefetching, 16 byte strides 0x0b8 bytes + */ +static unsigned int copy_page_array[0x148 / 4]; + +void copy_page(void *to, void *from) __attribute__((alias("copy_page_array"))); + +EXPORT_SYMBOL(copy_page); + +/* + * This is suboptimal for 32-bit kernels; we assume that R10000 is only used + * with 64-bit kernels. The prefetch offsets have been experimentally tuned + * an Origin 200. + */ +static int pref_offset_clear __initdata = 512; +static int pref_offset_copy __initdata = 256; + +static unsigned int pref_src_mode __initdata; +static unsigned int pref_dst_mode __initdata; + +static int load_offset __initdata; +static int store_offset __initdata; + +static unsigned int __initdata *dest, *epc; + +static unsigned int instruction_pending; +static union mips_instruction delayed_mi; + +static void __init emit_instruction(union mips_instruction mi) +{ + if (instruction_pending) + *epc++ = delayed_mi.word; + + instruction_pending = 1; + delayed_mi = mi; +} + +static inline void flush_delay_slot_or_nop(void) +{ + if (instruction_pending) { + *epc++ = delayed_mi.word; + instruction_pending = 0; + return; + } + + *epc++ = 0; +} + +static inline unsigned int *label(void) +{ + if (instruction_pending) { + *epc++ = delayed_mi.word; + instruction_pending = 0; + } + + return epc; +} + +static inline void build_insn_word(unsigned int word) +{ + union mips_instruction mi; + + mi.word = word; + + emit_instruction(mi); +} + +static inline void build_nop(void) +{ + build_insn_word(0); /* nop */ +} + +static inline void build_src_pref(int advance) +{ + if (!(load_offset & (cpu_dcache_line_size() - 1))) { + union mips_instruction mi; + + mi.i_format.opcode = pref_op; + mi.i_format.rs = 5; /* $a1 */ + mi.i_format.rt = pref_src_mode; + mi.i_format.simmediate = load_offset + advance; + + emit_instruction(mi); + } +} + +static inline void __build_load_reg(int reg) +{ + union mips_instruction mi; + unsigned int width; + + if (cpu_has_64bit_gp_regs) { + mi.i_format.opcode = ld_op; + width = 8; + } else { + mi.i_format.opcode = lw_op; + width = 4; + } + mi.i_format.rs = 5; /* $a1 */ + mi.i_format.rt = reg; /* $reg */ + mi.i_format.simmediate = load_offset; + + load_offset += width; + emit_instruction(mi); +} + +static inline void build_load_reg(int reg) +{ + if (cpu_has_prefetch) + build_src_pref(pref_offset_copy); + + __build_load_reg(reg); +} + +static inline void build_dst_pref(int advance) +{ + if (!(store_offset & (cpu_dcache_line_size() - 1))) { + union mips_instruction mi; + + mi.i_format.opcode = pref_op; + mi.i_format.rs = 4; /* $a0 */ + mi.i_format.rt = pref_dst_mode; + mi.i_format.simmediate = store_offset + advance; + + emit_instruction(mi); + } +} + +static inline void build_cdex_s(void) +{ + union mips_instruction mi; + + if ((store_offset & (cpu_scache_line_size() - 1))) + return; + + mi.c_format.opcode = cache_op; + mi.c_format.rs = 4; /* $a0 */ + mi.c_format.c_op = 3; /* Create Dirty Exclusive */ + mi.c_format.cache = 3; /* Secondary Data Cache */ + mi.c_format.simmediate = store_offset; + + emit_instruction(mi); +} + +static inline void build_cdex_p(void) +{ + union mips_instruction mi; + + if (store_offset & (cpu_dcache_line_size() - 1)) + return; + + if (R4600_V1_HIT_CACHEOP_WAR && ((read_c0_prid() & 0xfff0) == 0x2010)) { + build_nop(); + build_nop(); + build_nop(); + build_nop(); + } + + if (R4600_V2_HIT_CACHEOP_WAR && ((read_c0_prid() & 0xfff0) == 0x2020)) + build_insn_word(0x8c200000); /* lw $zero, ($at) */ + + mi.c_format.opcode = cache_op; + mi.c_format.rs = 4; /* $a0 */ + mi.c_format.c_op = 3; /* Create Dirty Exclusive */ + mi.c_format.cache = 1; /* Data Cache */ + mi.c_format.simmediate = store_offset; + + emit_instruction(mi); +} + +static void __init __build_store_reg(int reg) +{ + union mips_instruction mi; + unsigned int width; + + if (cpu_has_64bit_gp_regs || + (cpu_has_64bit_zero_reg && reg == 0)) { + mi.i_format.opcode = sd_op; + width = 8; + } else { + mi.i_format.opcode = sw_op; + width = 4; + } + mi.i_format.rs = 4; /* $a0 */ + mi.i_format.rt = reg; /* $reg */ + mi.i_format.simmediate = store_offset; + + store_offset += width; + emit_instruction(mi); +} + +static inline void build_store_reg(int reg) +{ + if (cpu_has_prefetch) + if (reg) + build_dst_pref(pref_offset_copy); + else + build_dst_pref(pref_offset_clear); + else if (cpu_has_cache_cdex_s) + build_cdex_s(); + else if (cpu_has_cache_cdex_p) + build_cdex_p(); + + __build_store_reg(reg); +} + +static inline void build_addiu_a2_a0(unsigned long offset) +{ + union mips_instruction mi; + + BUG_ON(offset > 0x7fff); + + mi.i_format.opcode = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op; + mi.i_format.rs = 4; /* $a0 */ + mi.i_format.rt = 6; /* $a2 */ + mi.i_format.simmediate = offset; + + emit_instruction(mi); +} + +static inline void build_addiu_a1(unsigned long offset) +{ + union mips_instruction mi; + + BUG_ON(offset > 0x7fff); + + mi.i_format.opcode = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op; + mi.i_format.rs = 5; /* $a1 */ + mi.i_format.rt = 5; /* $a1 */ + mi.i_format.simmediate = offset; + + load_offset -= offset; + + emit_instruction(mi); +} + +static inline void build_addiu_a0(unsigned long offset) +{ + union mips_instruction mi; + + BUG_ON(offset > 0x7fff); + + mi.i_format.opcode = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op; + mi.i_format.rs = 4; /* $a0 */ + mi.i_format.rt = 4; /* $a0 */ + mi.i_format.simmediate = offset; + + store_offset -= offset; + + emit_instruction(mi); +} + +static inline void build_bne(unsigned int *dest) +{ + union mips_instruction mi; + + mi.i_format.opcode = bne_op; + mi.i_format.rs = 6; /* $a2 */ + mi.i_format.rt = 4; /* $a0 */ + mi.i_format.simmediate = dest - epc - 1; + + *epc++ = mi.word; + flush_delay_slot_or_nop(); +} + +static inline void build_jr_ra(void) +{ + union mips_instruction mi; + + mi.r_format.opcode = spec_op; + mi.r_format.rs = 31; + mi.r_format.rt = 0; + mi.r_format.rd = 0; + mi.r_format.re = 0; + mi.r_format.func = jr_op; + + *epc++ = mi.word; + flush_delay_slot_or_nop(); +} + +void __init build_clear_page(void) +{ + unsigned int loop_start; + + epc = (unsigned int *) &clear_page_array; + instruction_pending = 0; + store_offset = 0; + + if (cpu_has_prefetch) { + switch (current_cpu_data.cputype) { + case CPU_RM9000: + /* + * As a workaround for erratum G105 which make the + * PrepareForStore hint unusable we fall back to + * StoreRetained on the RM9000. Once it is known which + * versions of the RM9000 we'll be able to condition- + * alize this. + */ + + case CPU_R10000: + case CPU_R12000: + pref_src_mode = Pref_LoadStreamed; + pref_dst_mode = Pref_StoreStreamed; + break; + + default: + pref_src_mode = Pref_LoadStreamed; + pref_dst_mode = Pref_PrepareForStore; + break; + } + } + + build_addiu_a2_a0(PAGE_SIZE - (cpu_has_prefetch ? pref_offset_clear : 0)); + + if (R4600_V2_HIT_CACHEOP_WAR && ((read_c0_prid() & 0xfff0) == 0x2020)) + build_insn_word(0x3c01a000); /* lui $at, 0xa000 */ + +dest = label(); + do { + build_store_reg(0); + build_store_reg(0); + build_store_reg(0); + build_store_reg(0); + } while (store_offset < half_scache_line_size()); + build_addiu_a0(2 * store_offset); + loop_start = store_offset; + do { + build_store_reg(0); + build_store_reg(0); + build_store_reg(0); + build_store_reg(0); + } while ((store_offset - loop_start) < half_scache_line_size()); + build_bne(dest); + + if (cpu_has_prefetch && pref_offset_clear) { + build_addiu_a2_a0(pref_offset_clear); + dest = label(); + loop_start = store_offset; + do { + __build_store_reg(0); + __build_store_reg(0); + __build_store_reg(0); + __build_store_reg(0); + } while ((store_offset - loop_start) < half_scache_line_size()); + build_addiu_a0(2 * store_offset); + loop_start = store_offset; + do { + __build_store_reg(0); + __build_store_reg(0); + __build_store_reg(0); + __build_store_reg(0); + } while ((store_offset - loop_start) < half_scache_line_size()); + build_bne(dest); + } + + build_jr_ra(); + + flush_icache_range((unsigned long)&clear_page_array, + (unsigned long) epc); + + BUG_ON(epc > clear_page_array + ARRAY_SIZE(clear_page_array)); +} + +void __init build_copy_page(void) +{ + unsigned int loop_start; + + epc = (unsigned int *) ©_page_array; + store_offset = load_offset = 0; + instruction_pending = 0; + + build_addiu_a2_a0(PAGE_SIZE - (cpu_has_prefetch ? pref_offset_copy : 0)); + + if (R4600_V2_HIT_CACHEOP_WAR && ((read_c0_prid() & 0xfff0) == 0x2020)) + build_insn_word(0x3c01a000); /* lui $at, 0xa000 */ + +dest = label(); + loop_start = store_offset; + do { + build_load_reg( 8); + build_load_reg( 9); + build_load_reg(10); + build_load_reg(11); + build_store_reg( 8); + build_store_reg( 9); + build_store_reg(10); + build_store_reg(11); + } while ((store_offset - loop_start) < half_scache_line_size()); + build_addiu_a0(2 * store_offset); + build_addiu_a1(2 * load_offset); + loop_start = store_offset; + do { + build_load_reg( 8); + build_load_reg( 9); + build_load_reg(10); + build_load_reg(11); + build_store_reg( 8); + build_store_reg( 9); + build_store_reg(10); + build_store_reg(11); + } while ((store_offset - loop_start) < half_scache_line_size()); + build_bne(dest); + + if (cpu_has_prefetch && pref_offset_copy) { + build_addiu_a2_a0(pref_offset_copy); + dest = label(); + loop_start = store_offset; + do { + __build_load_reg( 8); + __build_load_reg( 9); + __build_load_reg(10); + __build_load_reg(11); + __build_store_reg( 8); + __build_store_reg( 9); + __build_store_reg(10); + __build_store_reg(11); + } while ((store_offset - loop_start) < half_scache_line_size()); + build_addiu_a0(2 * store_offset); + build_addiu_a1(2 * load_offset); + loop_start = store_offset; + do { + __build_load_reg( 8); + __build_load_reg( 9); + __build_load_reg(10); + __build_load_reg(11); + __build_store_reg( 8); + __build_store_reg( 9); + __build_store_reg(10); + __build_store_reg(11); + } while ((store_offset - loop_start) < half_scache_line_size()); + build_bne(dest); + } + + build_jr_ra(); + + flush_icache_range((unsigned long)©_page_array, + (unsigned long) epc); + + BUG_ON(epc > copy_page_array + ARRAY_SIZE(copy_page_array)); +} diff --git a/arch/mips/mm/pg-sb1.c b/arch/mips/mm/pg-sb1.c new file mode 100644 index 00000000000..59d131b5e53 --- /dev/null +++ b/arch/mips/mm/pg-sb1.c @@ -0,0 +1,287 @@ +/* + * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org) + * Copyright (C) 2000 SiByte, Inc. + * Copyright (C) 2005 Thiemo Seufer + * + * Written by Justin Carlson of SiByte, Inc. + * and Kip Walker of Broadcom Corp. + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS +#define SB1_PREF_LOAD_STREAMED_HINT "0" +#define SB1_PREF_STORE_STREAMED_HINT "1" +#else +#define SB1_PREF_LOAD_STREAMED_HINT "4" +#define SB1_PREF_STORE_STREAMED_HINT "5" +#endif + +static inline void clear_page_cpu(void *page) +{ + unsigned char *addr = (unsigned char *) page; + unsigned char *end = addr + PAGE_SIZE; + + /* + * JDCXXX - This should be bottlenecked by the write buffer, but these + * things tend to be mildly unpredictable...should check this on the + * performance model + * + * We prefetch 4 lines ahead. We're also "cheating" slightly here... + * since we know we're on an SB1, we force the assembler to take + * 64-bit operands to speed things up + */ + __asm__ __volatile__( + " .set push \n" + " .set mips4 \n" + " .set noreorder \n" +#ifdef CONFIG_CPU_HAS_PREFETCH + " daddiu %0, %0, 128 \n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%0) \n" /* Prefetch the first 4 lines */ + " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%0) \n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%0) \n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n" + "1: sd $0, -128(%0) \n" /* Throw out a cacheline of 0's */ + " sd $0, -120(%0) \n" + " sd $0, -112(%0) \n" + " sd $0, -104(%0) \n" + " daddiu %0, %0, 32 \n" + " bnel %0, %1, 1b \n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n" + " daddiu %0, %0, -128 \n" +#endif + " sd $0, 0(%0) \n" /* Throw out a cacheline of 0's */ + "1: sd $0, 8(%0) \n" + " sd $0, 16(%0) \n" + " sd $0, 24(%0) \n" + " daddiu %0, %0, 32 \n" + " bnel %0, %1, 1b \n" + " sd $0, 0(%0) \n" + " .set pop \n" + : "+r" (addr) + : "r" (end) + : "memory"); +} + +static inline void copy_page_cpu(void *to, void *from) +{ + unsigned char *src = (unsigned char *)from; + unsigned char *dst = (unsigned char *)to; + unsigned char *end = src + PAGE_SIZE; + + /* + * The pref's used here are using "streaming" hints, which cause the + * copied data to be kicked out of the cache sooner. A page copy often + * ends up copying a lot more data than is commonly used, so this seems + * to make sense in terms of reducing cache pollution, but I've no real + * performance data to back this up + */ + __asm__ __volatile__( + " .set push \n" + " .set mips4 \n" + " .set noreorder \n" +#ifdef CONFIG_CPU_HAS_PREFETCH + " daddiu %0, %0, 128 \n" + " daddiu %1, %1, 128 \n" + " pref " SB1_PREF_LOAD_STREAMED_HINT ", -128(%0)\n" /* Prefetch the first 4 lines */ + " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%1)\n" + " pref " SB1_PREF_LOAD_STREAMED_HINT ", -96(%0)\n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%1)\n" + " pref " SB1_PREF_LOAD_STREAMED_HINT ", -64(%0)\n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%1)\n" + " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n" + "1: pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%1)\n" +# ifdef CONFIG_MIPS64 + " ld $8, -128(%0) \n" /* Block copy a cacheline */ + " ld $9, -120(%0) \n" + " ld $10, -112(%0) \n" + " ld $11, -104(%0) \n" + " sd $8, -128(%1) \n" + " sd $9, -120(%1) \n" + " sd $10, -112(%1) \n" + " sd $11, -104(%1) \n" +# else + " lw $2, -128(%0) \n" /* Block copy a cacheline */ + " lw $3, -124(%0) \n" + " lw $6, -120(%0) \n" + " lw $7, -116(%0) \n" + " lw $8, -112(%0) \n" + " lw $9, -108(%0) \n" + " lw $10, -104(%0) \n" + " lw $11, -100(%0) \n" + " sw $2, -128(%1) \n" + " sw $3, -124(%1) \n" + " sw $6, -120(%1) \n" + " sw $7, -116(%1) \n" + " sw $8, -112(%1) \n" + " sw $9, -108(%1) \n" + " sw $10, -104(%1) \n" + " sw $11, -100(%1) \n" +# endif + " daddiu %0, %0, 32 \n" + " daddiu %1, %1, 32 \n" + " bnel %0, %2, 1b \n" + " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n" + " daddiu %0, %0, -128 \n" + " daddiu %1, %1, -128 \n" +#endif +#ifdef CONFIG_MIPS64 + " ld $8, 0(%0) \n" /* Block copy a cacheline */ + "1: ld $9, 8(%0) \n" + " ld $10, 16(%0) \n" + " ld $11, 24(%0) \n" + " sd $8, 0(%1) \n" + " sd $9, 8(%1) \n" + " sd $10, 16(%1) \n" + " sd $11, 24(%1) \n" +#else + " lw $2, 0(%0) \n" /* Block copy a cacheline */ + "1: lw $3, 4(%0) \n" + " lw $6, 8(%0) \n" + " lw $7, 12(%0) \n" + " lw $8, 16(%0) \n" + " lw $9, 20(%0) \n" + " lw $10, 24(%0) \n" + " lw $11, 28(%0) \n" + " sw $2, 0(%1) \n" + " sw $3, 4(%1) \n" + " sw $6, 8(%1) \n" + " sw $7, 12(%1) \n" + " sw $8, 16(%1) \n" + " sw $9, 20(%1) \n" + " sw $10, 24(%1) \n" + " sw $11, 28(%1) \n" +#endif + " daddiu %0, %0, 32 \n" + " daddiu %1, %1, 32 \n" + " bnel %0, %2, 1b \n" +#ifdef CONFIG_MIPS64 + " ld $8, 0(%0) \n" +#else + " lw $2, 0(%0) \n" +#endif + " .set pop \n" + : "+r" (src), "+r" (dst) + : "r" (end) +#ifdef CONFIG_MIPS64 + : "$8","$9","$10","$11","memory"); +#else + : "$2","$3","$6","$7","$8","$9","$10","$11","memory"); +#endif +} + + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS + +/* + * Pad descriptors to cacheline, since each is exclusively owned by a + * particular CPU. + */ +typedef struct dmadscr_s { + u64 dscr_a; + u64 dscr_b; + u64 pad_a; + u64 pad_b; +} dmadscr_t; + +static dmadscr_t page_descr[NR_CPUS] __attribute__((aligned(SMP_CACHE_BYTES))); + +void sb1_dma_init(void) +{ + int cpu = smp_processor_id(); + u64 base_val = CPHYSADDR(&page_descr[cpu]) | V_DM_DSCR_BASE_RINGSZ(1); + + bus_writeq(base_val, + (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); + bus_writeq(base_val | M_DM_DSCR_BASE_RESET, + (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); + bus_writeq(base_val | M_DM_DSCR_BASE_ENABL, + (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); +} + +void clear_page(void *page) +{ + int cpu = smp_processor_id(); + + /* if the page is above Kseg0, use old way */ + if ((long)KSEGX(page) != (long)CKSEG0) + return clear_page_cpu(page); + + page_descr[cpu].dscr_a = CPHYSADDR(page) | M_DM_DSCRA_ZERO_MEM | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; + page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); + bus_writeq(1, (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); + + /* + * Don't really want to do it this way, but there's no + * reliable way to delay completion detection. + */ + while (!(bus_readq((void *)(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) & + M_DM_DSCR_BASE_INTERRUPT)))) + ; + bus_readq((void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); +} + +void copy_page(void *to, void *from) +{ + unsigned long from_phys = CPHYSADDR(from); + unsigned long to_phys = CPHYSADDR(to); + int cpu = smp_processor_id(); + + /* if either page is above Kseg0, use old way */ + if ((long)KSEGX(to) != (long)CKSEG0 + || (long)KSEGX(from) != (long)CKSEG0) + return copy_page_cpu(to, from); + + page_descr[cpu].dscr_a = CPHYSADDR(to_phys) | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; + page_descr[cpu].dscr_b = CPHYSADDR(from_phys) | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); + bus_writeq(1, (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); + + /* + * Don't really want to do it this way, but there's no + * reliable way to delay completion detection. + */ + while (!(bus_readq((void *)(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) & + M_DM_DSCR_BASE_INTERRUPT)))) + ; + bus_readq((void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); +} + +#else /* !CONFIG_SIBYTE_DMA_PAGEOPS */ + +void clear_page(void *page) +{ + return clear_page_cpu(page); +} + +void copy_page(void *to, void *from) +{ + return copy_page_cpu(to, from); +} + +#endif /* !CONFIG_SIBYTE_DMA_PAGEOPS */ + +EXPORT_SYMBOL(clear_page); +EXPORT_SYMBOL(copy_page); diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c new file mode 100644 index 00000000000..4f07f81e850 --- /dev/null +++ b/arch/mips/mm/pgtable-32.c @@ -0,0 +1,97 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003 by Ralf Baechle + */ +#include +#include +#include +#include +#include +#include + +void pgd_init(unsigned long page) +{ + unsigned long *p = (unsigned long *) page; + int i; + + for (i = 0; i < USER_PTRS_PER_PGD; i+=8) { + p[i + 0] = (unsigned long) invalid_pte_table; + p[i + 1] = (unsigned long) invalid_pte_table; + p[i + 2] = (unsigned long) invalid_pte_table; + p[i + 3] = (unsigned long) invalid_pte_table; + p[i + 4] = (unsigned long) invalid_pte_table; + p[i + 5] = (unsigned long) invalid_pte_table; + p[i + 6] = (unsigned long) invalid_pte_table; + p[i + 7] = (unsigned long) invalid_pte_table; + } +} + +#ifdef CONFIG_HIGHMEM +static void __init fixrange_init (unsigned long start, unsigned long end, + pgd_t *pgd_base) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int i, j; + unsigned long vaddr; + + vaddr = start; + i = __pgd_offset(vaddr); + j = __pmd_offset(vaddr); + pgd = pgd_base + i; + + for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { + pmd = (pmd_t *)pgd; + for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { + if (pmd_none(*pmd)) { + pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pmd(pmd, __pmd((unsigned long)pte)); + if (pte != pte_offset_kernel(pmd, 0)) + BUG(); + } + vaddr += PMD_SIZE; + } + j = 0; + } +} +#endif + +void __init pagetable_init(void) +{ +#ifdef CONFIG_HIGHMEM + unsigned long vaddr; + pgd_t *pgd, *pgd_base; + pmd_t *pmd; + pte_t *pte; +#endif + + /* Initialize the entire pgd. */ + pgd_init((unsigned long)swapper_pg_dir); + pgd_init((unsigned long)swapper_pg_dir + + sizeof(pgd_t) * USER_PTRS_PER_PGD); + +#ifdef CONFIG_HIGHMEM + pgd_base = swapper_pg_dir; + + /* + * Fixed mappings: + */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + fixrange_init(vaddr, 0, pgd_base); + + /* + * Permanent kmaps: + */ + vaddr = PKMAP_BASE; + fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + + pgd = swapper_pg_dir + __pgd_offset(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + pkmap_page_table = pte; +#endif +} diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c new file mode 100644 index 00000000000..44b5e97fff6 --- /dev/null +++ b/arch/mips/mm/pgtable-64.c @@ -0,0 +1,58 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999, 2000 by Silicon Graphics + * Copyright (C) 2003 by Ralf Baechle + */ +#include +#include +#include + +void pgd_init(unsigned long page) +{ + unsigned long *p, *end; + + p = (unsigned long *) page; + end = p + PTRS_PER_PGD; + + while (p < end) { + p[0] = (unsigned long) invalid_pmd_table; + p[1] = (unsigned long) invalid_pmd_table; + p[2] = (unsigned long) invalid_pmd_table; + p[3] = (unsigned long) invalid_pmd_table; + p[4] = (unsigned long) invalid_pmd_table; + p[5] = (unsigned long) invalid_pmd_table; + p[6] = (unsigned long) invalid_pmd_table; + p[7] = (unsigned long) invalid_pmd_table; + p += 8; + } +} + +void pmd_init(unsigned long addr, unsigned long pagetable) +{ + unsigned long *p, *end; + + p = (unsigned long *) addr; + end = p + PTRS_PER_PMD; + + while (p < end) { + p[0] = (unsigned long)pagetable; + p[1] = (unsigned long)pagetable; + p[2] = (unsigned long)pagetable; + p[3] = (unsigned long)pagetable; + p[4] = (unsigned long)pagetable; + p[5] = (unsigned long)pagetable; + p[6] = (unsigned long)pagetable; + p[7] = (unsigned long)pagetable; + p += 8; + } +} + +void __init pagetable_init(void) +{ + /* Initialize the entire pgd. */ + pgd_init((unsigned long)swapper_pg_dir); + pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table); +} diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c new file mode 100644 index 00000000000..3b88fdeef32 --- /dev/null +++ b/arch/mips/mm/pgtable.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include + +void show_mem(void) +{ +#ifndef CONFIG_DISCONTIGMEM /* XXX(hch): later.. */ + int pfn, total = 0, reserved = 0; + int shared = 0, cached = 0; + int highmem = 0; + struct page *page; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + pfn = max_mapnr; + while (pfn-- > 0) { + page = pfn_to_page(pfn); + total++; + if (PageHighMem(page)) + highmem++; + if (PageReserved(page)) + reserved++; + else if (PageSwapCache(page)) + cached++; + else if (page_count(page)) + shared += page_count(page) - 1; + } + printk("%d pages of RAM\n", total); + printk("%d pages of HIGHMEM\n",highmem); + printk("%d reserved pages\n",reserved); + printk("%d pages shared\n",shared); + printk("%d pages swap cached\n",cached); +#endif +} diff --git a/arch/mips/mm/sc-ip22.c b/arch/mips/mm/sc-ip22.c new file mode 100644 index 00000000000..d236cf8b737 --- /dev/null +++ b/arch/mips/mm/sc-ip22.c @@ -0,0 +1,177 @@ +/* + * sc-ip22.c: Indy cache management functions. + * + * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org), + * derived from r4xx0.c by David S. Miller (dm@engr.sgi.com). + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* Secondary cache size in bytes, if present. */ +static unsigned long scache_size; + +#undef DEBUG_CACHE + +#define SC_SIZE 0x00080000 +#define SC_LINE 32 +#define CI_MASK (SC_SIZE - SC_LINE) +#define SC_INDEX(n) ((n) & CI_MASK) + +static inline void indy_sc_wipe(unsigned long first, unsigned long last) +{ + unsigned long tmp; + + __asm__ __volatile__( + ".set\tpush\t\t\t# indy_sc_wipe\n\t" + ".set\tnoreorder\n\t" + ".set\tmips3\n\t" + ".set\tnoat\n\t" + "mfc0\t%2, $12\n\t" + "li\t$1, 0x80\t\t\t# Go 64 bit\n\t" + "mtc0\t$1, $12\n\t" + + "dli\t$1, 0x9000000080000000\n\t" + "or\t%0, $1\t\t\t# first line to flush\n\t" + "or\t%1, $1\t\t\t# last line to flush\n\t" + ".set\tat\n\t" + + "1:\tsw\t$0, 0(%0)\n\t" + "bne\t%0, %1, 1b\n\t" + " daddu\t%0, 32\n\t" + + "mtc0\t%2, $12\t\t\t# Back to 32 bit\n\t" + "nop; nop; nop; nop;\n\t" + ".set\tpop" + : "=r" (first), "=r" (last), "=&r" (tmp) + : "0" (first), "1" (last)); +} + +static void indy_sc_wback_invalidate(unsigned long addr, unsigned long size) +{ + unsigned long first_line, last_line; + unsigned long flags; + +#ifdef DEBUG_CACHE + printk("indy_sc_wback_invalidate[%08lx,%08lx]", addr, size); +#endif + + /* Catch bad driver code */ + BUG_ON(size == 0); + + /* Which lines to flush? */ + first_line = SC_INDEX(addr); + last_line = SC_INDEX(addr + size - 1); + + local_irq_save(flags); + if (first_line <= last_line) { + indy_sc_wipe(first_line, last_line); + goto out; + } + + indy_sc_wipe(first_line, SC_SIZE - SC_LINE); + indy_sc_wipe(0, last_line); +out: + local_irq_restore(flags); +} + +static void indy_sc_enable(void) +{ + unsigned long addr, tmp1, tmp2; + + /* This is really cool... */ +#ifdef DEBUG_CACHE + printk("Enabling R4600 SCACHE\n"); +#endif + __asm__ __volatile__( + ".set\tpush\n\t" + ".set\tnoreorder\n\t" + ".set\tmips3\n\t" + "mfc0\t%2, $12\n\t" + "nop; nop; nop; nop;\n\t" + "li\t%1, 0x80\n\t" + "mtc0\t%1, $12\n\t" + "nop; nop; nop; nop;\n\t" + "li\t%0, 0x1\n\t" + "dsll\t%0, 31\n\t" + "lui\t%1, 0x9000\n\t" + "dsll32\t%1, 0\n\t" + "or\t%0, %1, %0\n\t" + "sb\t$0, 0(%0)\n\t" + "mtc0\t$0, $12\n\t" + "nop; nop; nop; nop;\n\t" + "mtc0\t%2, $12\n\t" + "nop; nop; nop; nop;\n\t" + ".set\tpop" + : "=r" (tmp1), "=r" (tmp2), "=r" (addr)); +} + +static void indy_sc_disable(void) +{ + unsigned long tmp1, tmp2, tmp3; + +#ifdef DEBUG_CACHE + printk("Disabling R4600 SCACHE\n"); +#endif + __asm__ __volatile__( + ".set\tpush\n\t" + ".set\tnoreorder\n\t" + ".set\tmips3\n\t" + "li\t%0, 0x1\n\t" + "dsll\t%0, 31\n\t" + "lui\t%1, 0x9000\n\t" + "dsll32\t%1, 0\n\t" + "or\t%0, %1, %0\n\t" + "mfc0\t%2, $12\n\t" + "nop; nop; nop; nop\n\t" + "li\t%1, 0x80\n\t" + "mtc0\t%1, $12\n\t" + "nop; nop; nop; nop\n\t" + "sh\t$0, 0(%0)\n\t" + "mtc0\t$0, $12\n\t" + "nop; nop; nop; nop\n\t" + "mtc0\t%2, $12\n\t" + "nop; nop; nop; nop\n\t" + ".set\tpop" + : "=r" (tmp1), "=r" (tmp2), "=r" (tmp3)); +} + +static inline int __init indy_sc_probe(void) +{ + unsigned int size = ip22_eeprom_read(&sgimc->eeprom, 17); + if (size == 0) + return 0; + + size <<= PAGE_SHIFT; + printk(KERN_INFO "R4600/R5000 SCACHE size %dK, linesize 32 bytes.\n", + size >> 10); + scache_size = size; + + return 1; +} + +/* XXX Check with wje if the Indy caches can differenciate between + writeback + invalidate and just invalidate. */ +struct bcache_ops indy_sc_ops = { + .bc_enable = indy_sc_enable, + .bc_disable = indy_sc_disable, + .bc_wback_inv = indy_sc_wback_invalidate, + .bc_inv = indy_sc_wback_invalidate +}; + +void __init indy_sc_init(void) +{ + if (indy_sc_probe()) { + indy_sc_enable(); + bcops = &indy_sc_ops; + } +} diff --git a/arch/mips/mm/sc-r5k.c b/arch/mips/mm/sc-r5k.c new file mode 100644 index 00000000000..d35b6c1103a --- /dev/null +++ b/arch/mips/mm/sc-r5k.c @@ -0,0 +1,108 @@ +/* + * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org), + * derived from r4xx0.c by David S. Miller (dm@engr.sgi.com). + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Secondary cache size in bytes, if present. */ +static unsigned long scache_size; + +#define SC_LINE 32 +#define SC_PAGE (128*SC_LINE) + +static inline void blast_r5000_scache(void) +{ + unsigned long start = INDEX_BASE; + unsigned long end = start + scache_size; + + while(start < end) { + cache_op(R5K_Page_Invalidate_S, start); + start += SC_PAGE; + } +} + +static void r5k_dma_cache_inv_sc(unsigned long addr, unsigned long size) +{ + unsigned long end, a; + + /* Catch bad driver code */ + BUG_ON(size == 0); + + if (size >= scache_size) { + blast_r5000_scache(); + return; + } + + /* On the R5000 secondary cache we cannot + * invalidate less than a page at a time. + * The secondary cache is physically indexed, write-through. + */ + a = addr & ~(SC_PAGE - 1); + end = (addr + size - 1) & ~(SC_PAGE - 1); + while (a <= end) { + cache_op(R5K_Page_Invalidate_S, a); + a += SC_PAGE; + } +} + +static void r5k_sc_enable(void) +{ + unsigned long flags; + + local_irq_save(flags); + set_c0_config(R5K_CONF_SE); + blast_r5000_scache(); + local_irq_restore(flags); +} + +static void r5k_sc_disable(void) +{ + unsigned long flags; + + local_irq_save(flags); + blast_r5000_scache(); + clear_c0_config(R5K_CONF_SE); + local_irq_restore(flags); +} + +static inline int __init r5k_sc_probe(void) +{ + unsigned long config = read_c0_config(); + + if (config & CONF_SC) + return(0); + + scache_size = (512 * 1024) << ((config & R5K_CONF_SS) >> 20); + + printk("R5000 SCACHE size %ldkB, linesize 32 bytes.\n", + scache_size >> 10); + + return 1; +} + +static struct bcache_ops r5k_sc_ops = { + .bc_enable = r5k_sc_enable, + .bc_disable = r5k_sc_disable, + .bc_wback_inv = r5k_dma_cache_inv_sc, + .bc_inv = r5k_dma_cache_inv_sc +}; + +void __init r5k_sc_init(void) +{ + if (r5k_sc_probe()) { + r5k_sc_enable(); + bcops = &r5k_sc_ops; + } +} diff --git a/arch/mips/mm/sc-rm7k.c b/arch/mips/mm/sc-rm7k.c new file mode 100644 index 00000000000..4e92f931aab --- /dev/null +++ b/arch/mips/mm/sc-rm7k.c @@ -0,0 +1,193 @@ +/* + * sc-rm7k.c: RM7000 cache management functions. + * + * Copyright (C) 1997, 2001, 2003, 2004 Ralf Baechle (ralf@linux-mips.org) + */ + +#undef DEBUG + +#include +#include +#include + +#include +#include +#include +#include +#include + +/* Primary cache parameters. */ +#define sc_lsize 32 +#define tc_pagesize (32*128) + +/* Secondary cache parameters. */ +#define scache_size (256*1024) /* Fixed to 256KiB on RM7000 */ + +extern unsigned long icache_way_size, dcache_way_size; + +#include + +int rm7k_tcache_enabled; + +/* + * Writeback and invalidate the primary cache dcache before DMA. + * (XXX These need to be fixed ...) + */ +static void rm7k_sc_wback_inv(unsigned long addr, unsigned long size) +{ + unsigned long end, a; + + pr_debug("rm7k_sc_wback_inv[%08lx,%08lx]", addr, size); + + /* Catch bad driver code */ + BUG_ON(size == 0); + + a = addr & ~(sc_lsize - 1); + end = (addr + size - 1) & ~(sc_lsize - 1); + while (1) { + flush_scache_line(a); /* Hit_Writeback_Inv_SD */ + if (a == end) + break; + a += sc_lsize; + } + + if (!rm7k_tcache_enabled) + return; + + a = addr & ~(tc_pagesize - 1); + end = (addr + size - 1) & ~(tc_pagesize - 1); + while(1) { + invalidate_tcache_page(a); /* Page_Invalidate_T */ + if (a == end) + break; + a += tc_pagesize; + } +} + +static void rm7k_sc_inv(unsigned long addr, unsigned long size) +{ + unsigned long end, a; + + pr_debug("rm7k_sc_inv[%08lx,%08lx]", addr, size); + + /* Catch bad driver code */ + BUG_ON(size == 0); + + a = addr & ~(sc_lsize - 1); + end = (addr + size - 1) & ~(sc_lsize - 1); + while (1) { + invalidate_scache_line(a); /* Hit_Invalidate_SD */ + if (a == end) + break; + a += sc_lsize; + } + + if (!rm7k_tcache_enabled) + return; + + a = addr & ~(tc_pagesize - 1); + end = (addr + size - 1) & ~(tc_pagesize - 1); + while(1) { + invalidate_tcache_page(a); /* Page_Invalidate_T */ + if (a == end) + break; + a += tc_pagesize; + } +} + +/* + * This function is executed in the uncached segment CKSEG1. + * It must not touch the stack, because the stack pointer still points + * into CKSEG0. + * + * Three options: + * - Write it in assembly and guarantee that we don't use the stack. + * - Disable caching for CKSEG0 before calling it. + * - Pray that GCC doesn't randomly start using the stack. + * + * This being Linux, we obviously take the least sane of those options - + * following DaveM's lead in c-r4k.c + * + * It seems we get our kicks from relying on unguaranteed behaviour in GCC + */ +static __init void __rm7k_sc_enable(void) +{ + int i; + + set_c0_config(1 << 3); /* CONF_SE */ + + write_c0_taglo(0); + write_c0_taghi(0); + + for (i = 0; i < scache_size; i += sc_lsize) { + __asm__ __volatile__ ( + ".set noreorder\n\t" + ".set mips3\n\t" + "cache %1, (%0)\n\t" + ".set mips0\n\t" + ".set reorder" + : + : "r" (KSEG0ADDR(i)), "i" (Index_Store_Tag_SD)); + } +} + +static __init void rm7k_sc_enable(void) +{ + void (*func)(void) = (void *) KSEG1ADDR(&__rm7k_sc_enable); + + if (read_c0_config() & 0x08) /* CONF_SE */ + return; + + printk(KERN_INFO "Enabling secondary cache..."); + func(); +} + +static void rm7k_sc_disable(void) +{ + clear_c0_config(1<<3); /* CONF_SE */ +} + +struct bcache_ops rm7k_sc_ops = { + .bc_enable = rm7k_sc_enable, + .bc_disable = rm7k_sc_disable, + .bc_wback_inv = rm7k_sc_wback_inv, + .bc_inv = rm7k_sc_inv +}; + +void __init rm7k_sc_init(void) +{ + unsigned int config = read_c0_config(); + + if ((config >> 31) & 1) /* Bit 31 set -> no S-Cache */ + return; + + printk(KERN_INFO "Secondary cache size %dK, linesize %d bytes.\n", + (scache_size >> 10), sc_lsize); + + if (!((config >> 3) & 1)) /* CONF_SE */ + rm7k_sc_enable(); + + /* + * While we're at it let's deal with the tertiary cache. + */ + if (!((config >> 17) & 1)) { + + /* + * We can't enable the L3 cache yet. There may be board-specific + * magic necessary to turn it on, and blindly asking the CPU to + * start using it would may give cache errors. + * + * Also, board-specific knowledge may allow us to use the + * CACHE Flash_Invalidate_T instruction if the tag RAM supports + * it, and may specify the size of the L3 cache so we don't have + * to probe it. + */ + printk(KERN_INFO "Tertiary cache present, %s enabled\n", + config&(1<<12) ? "already" : "not (yet)"); + + if ((config >> 12) & 1) + rm7k_tcache_enabled = 1; + } + + bcops = &rm7k_sc_ops; +} diff --git a/arch/mips/mm/tlb-andes.c b/arch/mips/mm/tlb-andes.c new file mode 100644 index 00000000000..167e08e9661 --- /dev/null +++ b/arch/mips/mm/tlb-andes.c @@ -0,0 +1,257 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1997, 1998, 1999 Ralf Baechle (ralf@gnu.org) + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) 2000 Kanoj Sarcar (kanoj@sgi.com) + */ +#include +#include +#include +#include +#include +#include +#include +#include + +extern void build_tlb_refill_handler(void); + +#define NTLB_ENTRIES 64 +#define NTLB_ENTRIES_HALF 32 + +void local_flush_tlb_all(void) +{ + unsigned long flags; + unsigned long old_ctx; + unsigned long entry; + + local_irq_save(flags); + /* Save old context and create impossible VPN2 value */ + old_ctx = read_c0_entryhi() & ASID_MASK; + write_c0_entryhi(CKSEG0); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + + entry = read_c0_wired(); + + /* Blast 'em all away. */ + while (entry < NTLB_ENTRIES) { + write_c0_index(entry); + tlb_write_indexed(); + entry++; + } + write_c0_entryhi(old_ctx); + local_irq_restore(flags); +} + +void local_flush_tlb_mm(struct mm_struct *mm) +{ + int cpu = smp_processor_id(); + if (cpu_context(cpu, mm) != 0) { + drop_mmu_context(mm,cpu); + } +} + +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + int cpu = smp_processor_id(); + + if (cpu_context(cpu, mm) != 0) { + unsigned long flags; + int size; + + local_irq_save(flags); + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + size = (size + 1) >> 1; + if (size <= NTLB_ENTRIES_HALF) { + int oldpid = (read_c0_entryhi() & ASID_MASK); + int newpid = (cpu_context(smp_processor_id(), mm) + & ASID_MASK); + + start &= (PAGE_MASK << 1); + end += ((PAGE_SIZE << 1) - 1); + end &= (PAGE_MASK << 1); + while(start < end) { + int idx; + + write_c0_entryhi(start | newpid); + start += (PAGE_SIZE << 1); + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + write_c0_entryhi(CKSEG0); + if(idx < 0) + continue; + tlb_write_indexed(); + } + write_c0_entryhi(oldpid); + } else { + drop_mmu_context(mm, cpu); + } + local_irq_restore(flags); + } +} + +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + int size; + + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + size = (size + 1) >> 1; + + local_irq_save(flags); + if (size <= NTLB_ENTRIES_HALF) { + int pid = read_c0_entryhi(); + + start &= (PAGE_MASK << 1); + end += ((PAGE_SIZE << 1) - 1); + end &= (PAGE_MASK << 1); + + while (start < end) { + int idx; + + write_c0_entryhi(start); + start += (PAGE_SIZE << 1); + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT+1))); + if (idx < 0) + continue; + tlb_write_indexed(); + } + write_c0_entryhi(pid); + } else { + local_flush_tlb_all(); + } + local_irq_restore(flags); +} + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + if (cpu_context(smp_processor_id(), vma->vm_mm) != 0) { + unsigned long flags; + int oldpid, newpid, idx; + + newpid = (cpu_context(smp_processor_id(), vma->vm_mm) & + ASID_MASK); + page &= (PAGE_MASK << 1); + local_irq_save(flags); + oldpid = (read_c0_entryhi() & ASID_MASK); + write_c0_entryhi(page | newpid); + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + write_c0_entryhi(CKSEG0); + if (idx < 0) + goto finish; + tlb_write_indexed(); + + finish: + write_c0_entryhi(oldpid); + local_irq_restore(flags); + } +} + +/* + * This one is only used for pages with the global bit set so we don't care + * much about the ASID. + */ +void local_flush_tlb_one(unsigned long page) +{ + unsigned long flags; + int oldpid, idx; + + local_irq_save(flags); + page &= (PAGE_MASK << 1); + oldpid = read_c0_entryhi() & 0xff; + write_c0_entryhi(page); + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + if (idx >= 0) { + /* Make sure all entries differ. */ + write_c0_entryhi(CKSEG0+(idx<<(PAGE_SHIFT+1))); + tlb_write_indexed(); + } + write_c0_entryhi(oldpid); + + local_irq_restore(flags); +} + +/* XXX Simplify this. On the R10000 writing a TLB entry for an virtual + address that already exists will overwrite the old entry and not result + in TLB malfunction or TLB shutdown. */ +void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) +{ + unsigned long flags; + pgd_t *pgdp; + pmd_t *pmdp; + pte_t *ptep; + int idx, pid; + + /* + * Handle debugger faulting in for debugee. + */ + if (current->active_mm != vma->vm_mm) + return; + + pid = read_c0_entryhi() & ASID_MASK; + + if ((pid != (cpu_context(smp_processor_id(), vma->vm_mm) & ASID_MASK)) + || (cpu_context(smp_processor_id(), vma->vm_mm) == 0)) { + printk(KERN_WARNING + "%s: Wheee, bogus tlbpid mmpid=%d tlbpid=%d\n", + __FUNCTION__, (int) (cpu_context(smp_processor_id(), + vma->vm_mm) & ASID_MASK), pid); + } + + local_irq_save(flags); + address &= (PAGE_MASK << 1); + write_c0_entryhi(address | (pid)); + pgdp = pgd_offset(vma->vm_mm, address); + tlb_probe(); + pmdp = pmd_offset(pgdp, address); + idx = read_c0_index(); + ptep = pte_offset_map(pmdp, address); + write_c0_entrylo0(pte_val(*ptep++) >> 6); + write_c0_entrylo1(pte_val(*ptep) >> 6); + write_c0_entryhi(address | pid); + if (idx < 0) { + tlb_write_random(); + } else { + tlb_write_indexed(); + } + write_c0_entryhi(pid); + local_irq_restore(flags); +} + +void __init tlb_init(void) +{ + /* + * You should never change this register: + * - On R4600 1.7 the tlbp never hits for pages smaller than + * the value in the c0_pagemask register. + * - The entire mm handling assumes the c0_pagemask register to + * be set for 4kb pages. + */ + write_c0_pagemask(PM_4K); + write_c0_wired(0); + write_c0_framemask(0); + + /* From this point on the ARC firmware is dead. */ + local_flush_tlb_all(); + + /* Did I tell you that ARC SUCKS? */ + + build_tlb_refill_handler(); +} diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c new file mode 100644 index 00000000000..7948e9a5e37 --- /dev/null +++ b/arch/mips/mm/tlb-r3k.c @@ -0,0 +1,289 @@ +/* + * r2300.c: R2000 and R3000 specific mmu/cache code. + * + * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * + * with a lot of changes to make this thing work for R3000s + * Tx39XX R4k style caches added. HK + * Copyright (C) 1998, 1999, 2000 Harald Koerfgen + * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov + * Copyright (C) 2002 Ralf Baechle + * Copyright (C) 2002 Maciej W. Rozycki + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG_TLB + +extern void build_tlb_refill_handler(void); + +/* CP0 hazard avoidance. */ +#define BARRIER \ + __asm__ __volatile__( \ + ".set push\n\t" \ + ".set noreorder\n\t" \ + "nop\n\t" \ + ".set pop\n\t") + +int r3k_have_wired_reg; /* should be in cpu_data? */ + +/* TLB operations. */ +void local_flush_tlb_all(void) +{ + unsigned long flags; + unsigned long old_ctx; + int entry; + +#ifdef DEBUG_TLB + printk("[tlball]"); +#endif + + local_irq_save(flags); + old_ctx = read_c0_entryhi() & ASID_MASK; + write_c0_entrylo0(0); + entry = r3k_have_wired_reg ? read_c0_wired() : 8; + for (; entry < current_cpu_data.tlbsize; entry++) { + write_c0_index(entry << 8); + write_c0_entryhi((entry | 0x80000) << 12); + BARRIER; + tlb_write_indexed(); + } + write_c0_entryhi(old_ctx); + local_irq_restore(flags); +} + +void local_flush_tlb_mm(struct mm_struct *mm) +{ + int cpu = smp_processor_id(); + + if (cpu_context(cpu, mm) != 0) { +#ifdef DEBUG_TLB + printk("[tlbmm<%lu>]", (unsigned long)cpu_context(cpu, mm)); +#endif + drop_mmu_context(mm, cpu); + } +} + +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + int cpu = smp_processor_id(); + + if (cpu_context(cpu, mm) != 0) { + unsigned long flags; + int size; + +#ifdef DEBUG_TLB + printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", + cpu_context(cpu, mm) & ASID_MASK, start, end); +#endif + local_irq_save(flags); + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + if (size <= current_cpu_data.tlbsize) { + int oldpid = read_c0_entryhi() & ASID_MASK; + int newpid = cpu_context(cpu, mm) & ASID_MASK; + + start &= PAGE_MASK; + end += PAGE_SIZE - 1; + end &= PAGE_MASK; + while (start < end) { + int idx; + + write_c0_entryhi(start | newpid); + start += PAGE_SIZE; /* BARRIER */ + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entryhi(KSEG0); + if (idx < 0) /* BARRIER */ + continue; + tlb_write_indexed(); + } + write_c0_entryhi(oldpid); + } else { + drop_mmu_context(mm, cpu); + } + local_irq_restore(flags); + } +} + +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + int size; + +#ifdef DEBUG_TLB + printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", start, end); +#endif + local_irq_save(flags); + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + if (size <= current_cpu_data.tlbsize) { + int pid = read_c0_entryhi(); + + start &= PAGE_MASK; + end += PAGE_SIZE - 1; + end &= PAGE_MASK; + + while (start < end) { + int idx; + + write_c0_entryhi(start); + start += PAGE_SIZE; /* BARRIER */ + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entryhi(KSEG0); + if (idx < 0) /* BARRIER */ + continue; + tlb_write_indexed(); + } + write_c0_entryhi(pid); + } else { + local_flush_tlb_all(); + } + local_irq_restore(flags); +} + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + int cpu = smp_processor_id(); + + if (!vma || cpu_context(cpu, vma->vm_mm) != 0) { + unsigned long flags; + int oldpid, newpid, idx; + +#ifdef DEBUG_TLB + printk("[tlbpage<%lu,0x%08lx>]", cpu_context(cpu, vma->vm_mm), page); +#endif + newpid = cpu_context(cpu, vma->vm_mm) & ASID_MASK; + page &= PAGE_MASK; + local_irq_save(flags); + oldpid = read_c0_entryhi() & ASID_MASK; + write_c0_entryhi(page | newpid); + BARRIER; + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entryhi(KSEG0); + if (idx < 0) /* BARRIER */ + goto finish; + tlb_write_indexed(); + +finish: + write_c0_entryhi(oldpid); + local_irq_restore(flags); + } +} + +void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) +{ + unsigned long flags; + int idx, pid; + + /* + * Handle debugger faulting in for debugee. + */ + if (current->active_mm != vma->vm_mm) + return; + + pid = read_c0_entryhi() & ASID_MASK; + +#ifdef DEBUG_TLB + if ((pid != (cpu_context(cpu, vma->vm_mm) & ASID_MASK)) || (cpu_context(cpu, vma->vm_mm) == 0)) { + printk("update_mmu_cache: Wheee, bogus tlbpid mmpid=%lu tlbpid=%d\n", + (cpu_context(cpu, vma->vm_mm)), pid); + } +#endif + + local_irq_save(flags); + address &= PAGE_MASK; + write_c0_entryhi(address | pid); + BARRIER; + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(pte_val(pte)); + write_c0_entryhi(address | pid); + if (idx < 0) { /* BARRIER */ + tlb_write_random(); + } else { + tlb_write_indexed(); + } + write_c0_entryhi(pid); + local_irq_restore(flags); +} + +void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, + unsigned long entryhi, unsigned long pagemask) +{ + unsigned long flags; + unsigned long old_ctx; + static unsigned long wired = 0; + + if (r3k_have_wired_reg) { /* TX39XX */ + unsigned long old_pagemask; + unsigned long w; + +#ifdef DEBUG_TLB + printk("[tlbwired]\n", + entrylo0, entryhi, pagemask); +#endif + + local_irq_save(flags); + /* Save old context and create impossible VPN2 value */ + old_ctx = read_c0_entryhi() & ASID_MASK; + old_pagemask = read_c0_pagemask(); + w = read_c0_wired(); + write_c0_wired(w + 1); + if (read_c0_wired() != w + 1) { + printk("[tlbwired] No WIRED reg?\n"); + return; + } + write_c0_index(w << 8); + write_c0_pagemask(pagemask); + write_c0_entryhi(entryhi); + write_c0_entrylo0(entrylo0); + BARRIER; + tlb_write_indexed(); + + write_c0_entryhi(old_ctx); + write_c0_pagemask(old_pagemask); + local_flush_tlb_all(); + local_irq_restore(flags); + + } else if (wired < 8) { +#ifdef DEBUG_TLB + printk("[tlbwired]\n", + entrylo0, entryhi); +#endif + + local_irq_save(flags); + old_ctx = read_c0_entryhi() & ASID_MASK; + write_c0_entrylo0(entrylo0); + write_c0_entryhi(entryhi); + write_c0_index(wired); + wired++; /* BARRIER */ + tlb_write_indexed(); + write_c0_entryhi(old_ctx); + local_flush_tlb_all(); + local_irq_restore(flags); + } +} + +void __init tlb_init(void) +{ + local_flush_tlb_all(); + + build_tlb_refill_handler(); +} diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c new file mode 100644 index 00000000000..59d38bc05b6 --- /dev/null +++ b/arch/mips/mm/tlb-r4k.c @@ -0,0 +1,419 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1997, 1998, 1999, 2000 Ralf Baechle ralf@gnu.org + * Carsten Langgaard, carstenl@mips.com + * Copyright (C) 2002 MIPS Technologies, Inc. All rights reserved. + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +extern void build_tlb_refill_handler(void); + +/* CP0 hazard avoidance. */ +#define BARRIER __asm__ __volatile__(".set noreorder\n\t" \ + "nop; nop; nop; nop; nop; nop;\n\t" \ + ".set reorder\n\t") + +void local_flush_tlb_all(void) +{ + unsigned long flags; + unsigned long old_ctx; + int entry; + + local_irq_save(flags); + /* Save old context and create impossible VPN2 value */ + old_ctx = read_c0_entryhi(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + + entry = read_c0_wired(); + + /* Blast 'em all away. */ + while (entry < current_cpu_data.tlbsize) { + /* + * Make sure all entries differ. If they're not different + * MIPS32 will take revenge ... + */ + write_c0_entryhi(CKSEG0 + (entry << (PAGE_SHIFT + 1))); + write_c0_index(entry); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + entry++; + } + tlbw_use_hazard(); + write_c0_entryhi(old_ctx); + local_irq_restore(flags); +} + +void local_flush_tlb_mm(struct mm_struct *mm) +{ + int cpu = smp_processor_id(); + + if (cpu_context(cpu, mm) != 0) + drop_mmu_context(mm,cpu); +} + +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + int cpu = smp_processor_id(); + + if (cpu_context(cpu, mm) != 0) { + unsigned long flags; + int size; + + local_irq_save(flags); + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + size = (size + 1) >> 1; + if (size <= current_cpu_data.tlbsize/2) { + int oldpid = read_c0_entryhi(); + int newpid = cpu_asid(cpu, mm); + + start &= (PAGE_MASK << 1); + end += ((PAGE_SIZE << 1) - 1); + end &= (PAGE_MASK << 1); + while (start < end) { + int idx; + + write_c0_entryhi(start | newpid); + start += (PAGE_SIZE << 1); + mtc0_tlbw_hazard(); + tlb_probe(); + BARRIER; + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + if (idx < 0) + continue; + /* Make sure all entries differ. */ + write_c0_entryhi(CKSEG0 + + (idx << (PAGE_SHIFT + 1))); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + } + tlbw_use_hazard(); + write_c0_entryhi(oldpid); + } else { + drop_mmu_context(mm, cpu); + } + local_irq_restore(flags); + } +} + +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + int size; + + local_irq_save(flags); + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + size = (size + 1) >> 1; + if (size <= current_cpu_data.tlbsize / 2) { + int pid = read_c0_entryhi(); + + start &= (PAGE_MASK << 1); + end += ((PAGE_SIZE << 1) - 1); + end &= (PAGE_MASK << 1); + + while (start < end) { + int idx; + + write_c0_entryhi(start); + start += (PAGE_SIZE << 1); + mtc0_tlbw_hazard(); + tlb_probe(); + BARRIER; + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + if (idx < 0) + continue; + /* Make sure all entries differ. */ + write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1))); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + } + tlbw_use_hazard(); + write_c0_entryhi(pid); + } else { + local_flush_tlb_all(); + } + local_irq_restore(flags); +} + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + int cpu = smp_processor_id(); + + if (cpu_context(cpu, vma->vm_mm) != 0) { + unsigned long flags; + int oldpid, newpid, idx; + + newpid = cpu_asid(cpu, vma->vm_mm); + page &= (PAGE_MASK << 1); + local_irq_save(flags); + oldpid = read_c0_entryhi(); + write_c0_entryhi(page | newpid); + mtc0_tlbw_hazard(); + tlb_probe(); + BARRIER; + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + if (idx < 0) + goto finish; + /* Make sure all entries differ. */ + write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1))); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + + finish: + write_c0_entryhi(oldpid); + local_irq_restore(flags); + } +} + +/* + * This one is only used for pages with the global bit set so we don't care + * much about the ASID. + */ +void local_flush_tlb_one(unsigned long page) +{ + unsigned long flags; + int oldpid, idx; + + local_irq_save(flags); + page &= (PAGE_MASK << 1); + oldpid = read_c0_entryhi(); + write_c0_entryhi(page); + mtc0_tlbw_hazard(); + tlb_probe(); + BARRIER; + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + if (idx >= 0) { + /* Make sure all entries differ. */ + write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1))); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + } + write_c0_entryhi(oldpid); + + local_irq_restore(flags); +} + +/* + * We will need multiple versions of update_mmu_cache(), one that just + * updates the TLB with the new pte(s), and another which also checks + * for the R4k "end of page" hardware bug and does the needy. + */ +void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) +{ + unsigned long flags; + pgd_t *pgdp; + pmd_t *pmdp; + pte_t *ptep; + int idx, pid; + + /* + * Handle debugger faulting in for debugee. + */ + if (current->active_mm != vma->vm_mm) + return; + + pid = read_c0_entryhi() & ASID_MASK; + + local_irq_save(flags); + address &= (PAGE_MASK << 1); + write_c0_entryhi(address | pid); + pgdp = pgd_offset(vma->vm_mm, address); + mtc0_tlbw_hazard(); + tlb_probe(); + BARRIER; + pmdp = pmd_offset(pgdp, address); + idx = read_c0_index(); + ptep = pte_offset_map(pmdp, address); + + #if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32) + write_c0_entrylo0(ptep->pte_high); + ptep++; + write_c0_entrylo1(ptep->pte_high); +#else + write_c0_entrylo0(pte_val(*ptep++) >> 6); + write_c0_entrylo1(pte_val(*ptep) >> 6); +#endif + write_c0_entryhi(address | pid); + mtc0_tlbw_hazard(); + if (idx < 0) + tlb_write_random(); + else + tlb_write_indexed(); + tlbw_use_hazard(); + write_c0_entryhi(pid); + local_irq_restore(flags); +} + +#if 0 +static void r4k_update_mmu_cache_hwbug(struct vm_area_struct * vma, + unsigned long address, pte_t pte) +{ + unsigned long flags; + unsigned int asid; + pgd_t *pgdp; + pmd_t *pmdp; + pte_t *ptep; + int idx; + + local_irq_save(flags); + address &= (PAGE_MASK << 1); + asid = read_c0_entryhi() & ASID_MASK; + write_c0_entryhi(address | asid); + pgdp = pgd_offset(vma->vm_mm, address); + mtc0_tlbw_hazard(); + tlb_probe(); + BARRIER; + pmdp = pmd_offset(pgdp, address); + idx = read_c0_index(); + ptep = pte_offset_map(pmdp, address); + write_c0_entrylo0(pte_val(*ptep++) >> 6); + write_c0_entrylo1(pte_val(*ptep) >> 6); + mtc0_tlbw_hazard(); + if (idx < 0) + tlb_write_random(); + else + tlb_write_indexed(); + tlbw_use_hazard(); + local_irq_restore(flags); +} +#endif + +void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, + unsigned long entryhi, unsigned long pagemask) +{ + unsigned long flags; + unsigned long wired; + unsigned long old_pagemask; + unsigned long old_ctx; + + local_irq_save(flags); + /* Save old context and create impossible VPN2 value */ + old_ctx = read_c0_entryhi(); + old_pagemask = read_c0_pagemask(); + wired = read_c0_wired(); + write_c0_wired(wired + 1); + write_c0_index(wired); + BARRIER; + write_c0_pagemask(pagemask); + write_c0_entryhi(entryhi); + write_c0_entrylo0(entrylo0); + write_c0_entrylo1(entrylo1); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + + write_c0_entryhi(old_ctx); + BARRIER; + write_c0_pagemask(old_pagemask); + local_flush_tlb_all(); + local_irq_restore(flags); +} + +/* + * Used for loading TLB entries before trap_init() has started, when we + * don't actually want to add a wired entry which remains throughout the + * lifetime of the system + */ + +static int temp_tlb_entry __initdata; + +__init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, + unsigned long entryhi, unsigned long pagemask) +{ + int ret = 0; + unsigned long flags; + unsigned long wired; + unsigned long old_pagemask; + unsigned long old_ctx; + + local_irq_save(flags); + /* Save old context and create impossible VPN2 value */ + old_ctx = read_c0_entryhi(); + old_pagemask = read_c0_pagemask(); + wired = read_c0_wired(); + if (--temp_tlb_entry < wired) { + printk(KERN_WARNING "No TLB space left for add_temporary_entry\n"); + ret = -ENOSPC; + goto out; + } + + write_c0_index(temp_tlb_entry); + write_c0_pagemask(pagemask); + write_c0_entryhi(entryhi); + write_c0_entrylo0(entrylo0); + write_c0_entrylo1(entrylo1); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + + write_c0_entryhi(old_ctx); + write_c0_pagemask(old_pagemask); +out: + local_irq_restore(flags); + return ret; +} + +static void __init probe_tlb(unsigned long config) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned int reg; + + /* + * If this isn't a MIPS32 / MIPS64 compliant CPU. Config 1 register + * is not supported, we assume R4k style. Cpu probing already figured + * out the number of tlb entries. + */ + if ((c->processor_id & 0xff0000) == PRID_COMP_LEGACY) + return; + + reg = read_c0_config1(); + if (!((config >> 7) & 3)) + panic("No TLB present"); + + c->tlbsize = ((reg >> 25) & 0x3f) + 1; +} + +void __init tlb_init(void) +{ + unsigned int config = read_c0_config(); + + /* + * You should never change this register: + * - On R4600 1.7 the tlbp never hits for pages smaller than + * the value in the c0_pagemask register. + * - The entire mm handling assumes the c0_pagemask register to + * be set for 4kb pages. + */ + probe_tlb(config); + write_c0_pagemask(PM_DEFAULT_MASK); + write_c0_wired(0); + temp_tlb_entry = current_cpu_data.tlbsize - 1; + local_flush_tlb_all(); + + build_tlb_refill_handler(); +} diff --git a/arch/mips/mm/tlb-r8k.c b/arch/mips/mm/tlb-r8k.c new file mode 100644 index 00000000000..1bfb09198ce --- /dev/null +++ b/arch/mips/mm/tlb-r8k.c @@ -0,0 +1,250 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1997, 1998, 1999, 2000 Ralf Baechle ralf@gnu.org + * Carsten Langgaard, carstenl@mips.com + * Copyright (C) 2002 MIPS Technologies, Inc. All rights reserved. + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +extern void build_tlb_refill_handler(void); + +#define TFP_TLB_SIZE 384 +#define TFP_TLB_SET_SHIFT 7 + +/* CP0 hazard avoidance. */ +#define BARRIER __asm__ __volatile__(".set noreorder\n\t" \ + "nop; nop; nop; nop; nop; nop;\n\t" \ + ".set reorder\n\t") + +void local_flush_tlb_all(void) +{ + unsigned long flags; + unsigned long old_ctx; + int entry; + + local_irq_save(flags); + /* Save old context and create impossible VPN2 value */ + old_ctx = read_c0_entryhi(); + write_c0_entrylo(0); + + for (entry = 0; entry < TFP_TLB_SIZE; entry++) { + write_c0_tlbset(entry >> TFP_TLB_SET_SHIFT); + write_c0_vaddr(entry << PAGE_SHIFT); + write_c0_entryhi(CKSEG0 + (entry << (PAGE_SHIFT + 1))); + mtc0_tlbw_hazard(); + tlb_write(); + } + tlbw_use_hazard(); + write_c0_entryhi(old_ctx); + local_irq_restore(flags); +} + +void local_flush_tlb_mm(struct mm_struct *mm) +{ + int cpu = smp_processor_id(); + + if (cpu_context(cpu, mm) != 0) + drop_mmu_context(mm,cpu); +} + +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + int cpu = smp_processor_id(); + unsigned long flags; + int oldpid, newpid, size; + + if (!cpu_context(cpu, mm)) + return; + + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + size = (size + 1) >> 1; + + local_irq_save(flags); + + if (size > TFP_TLB_SIZE / 2) { + drop_mmu_context(mm, cpu); + goto out_restore; + } + + oldpid = read_c0_entryhi(); + newpid = cpu_asid(cpu, mm); + + write_c0_entrylo(0); + + start &= PAGE_MASK; + end += (PAGE_SIZE - 1); + end &= PAGE_MASK; + while (start < end) { + signed long idx; + + write_c0_vaddr(start); + write_c0_entryhi(start); + start += PAGE_SIZE; + tlb_probe(); + idx = read_c0_tlbset(); + if (idx < 0) + continue; + + write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1))); + tlb_write(); + } + write_c0_entryhi(oldpid); + +out_restore: + local_irq_restore(flags); +} + +/* Usable for KV1 addresses only! */ +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + int size; + + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + size = (size + 1) >> 1; + + if (size > TFP_TLB_SIZE / 2) { + local_flush_tlb_all(); + return; + } + + local_irq_save(flags); + + write_c0_entrylo(0); + + start &= PAGE_MASK; + end += (PAGE_SIZE - 1); + end &= PAGE_MASK; + while (start < end) { + signed long idx; + + write_c0_vaddr(start); + write_c0_entryhi(start); + start += PAGE_SIZE; + tlb_probe(); + idx = read_c0_tlbset(); + if (idx < 0) + continue; + + write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1))); + tlb_write(); + } + + local_irq_restore(flags); +} + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + int cpu = smp_processor_id(); + unsigned long flags; + int oldpid, newpid; + signed long idx; + + if (!cpu_context(cpu, vma->vm_mm)) + return; + + newpid = cpu_asid(cpu, vma->vm_mm); + page &= PAGE_MASK; + local_irq_save(flags); + oldpid = read_c0_entryhi(); + write_c0_vaddr(page); + write_c0_entryhi(newpid); + tlb_probe(); + idx = read_c0_tlbset(); + if (idx < 0) + goto finish; + + write_c0_entrylo(0); + write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT + 1))); + tlb_write(); + +finish: + write_c0_entryhi(oldpid); + local_irq_restore(flags); +} + +/* + * We will need multiple versions of update_mmu_cache(), one that just + * updates the TLB with the new pte(s), and another which also checks + * for the R4k "end of page" hardware bug and does the needy. + */ +void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) +{ + unsigned long flags; + pgd_t *pgdp; + pmd_t *pmdp; + pte_t *ptep; + int pid; + + /* + * Handle debugger faulting in for debugee. + */ + if (current->active_mm != vma->vm_mm) + return; + + pid = read_c0_entryhi() & ASID_MASK; + + local_irq_save(flags); + address &= PAGE_MASK; + write_c0_vaddr(address); + write_c0_entryhi(pid); + pgdp = pgd_offset(vma->vm_mm, address); + pmdp = pmd_offset(pgdp, address); + ptep = pte_offset_map(pmdp, address); + tlb_probe(); + + write_c0_entrylo(pte_val(*ptep++) >> 6); + tlb_write(); + + write_c0_entryhi(pid); + local_irq_restore(flags); +} + +static void __init probe_tlb(unsigned long config) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + + c->tlbsize = 3 * 128; /* 3 sets each 128 entries */ +} + +void __init tlb_init(void) +{ + unsigned int config = read_c0_config(); + unsigned long status; + + probe_tlb(config); + + status = read_c0_status(); + status &= ~(ST0_UPS | ST0_KPS); +#ifdef CONFIG_PAGE_SIZE_4KB + status |= (TFP_PAGESIZE_4K << 32) | (TFP_PAGESIZE_4K << 36); +#elif defined(CONFIG_PAGE_SIZE_8KB) + status |= (TFP_PAGESIZE_8K << 32) | (TFP_PAGESIZE_8K << 36); +#elif defined(CONFIG_PAGE_SIZE_16KB) + status |= (TFP_PAGESIZE_16K << 32) | (TFP_PAGESIZE_16K << 36); +#elif defined(CONFIG_PAGE_SIZE_64KB) + status |= (TFP_PAGESIZE_64K << 32) | (TFP_PAGESIZE_64K << 36); +#endif + write_c0_status(status); + + write_c0_wired(0); + + local_flush_tlb_all(); + + build_tlb_refill_handler(); +} diff --git a/arch/mips/mm/tlb-sb1.c b/arch/mips/mm/tlb-sb1.c new file mode 100644 index 00000000000..6256cafcf3a --- /dev/null +++ b/arch/mips/mm/tlb-sb1.c @@ -0,0 +1,376 @@ +/* + * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org) + * Copyright (C) 2000, 2001, 2002, 2003 Broadcom Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include +#include +#include +#include + +extern void build_tlb_refill_handler(void); + +#define UNIQUE_ENTRYHI(idx) (CKSEG0 + ((idx) << (PAGE_SHIFT + 1))) + +/* Dump the current entry* and pagemask registers */ +static inline void dump_cur_tlb_regs(void) +{ + unsigned int entryhihi, entryhilo, entrylo0hi, entrylo0lo, entrylo1hi; + unsigned int entrylo1lo, pagemask; + + __asm__ __volatile__ ( + ".set push \n" + ".set noreorder \n" + ".set mips64 \n" + ".set noat \n" + " tlbr \n" + " dmfc0 $1, $10 \n" + " dsrl32 %0, $1, 0 \n" + " sll %1, $1, 0 \n" + " dmfc0 $1, $2 \n" + " dsrl32 %2, $1, 0 \n" + " sll %3, $1, 0 \n" + " dmfc0 $1, $3 \n" + " dsrl32 %4, $1, 0 \n" + " sll %5, $1, 0 \n" + " mfc0 %6, $5 \n" + ".set pop \n" + : "=r" (entryhihi), "=r" (entryhilo), + "=r" (entrylo0hi), "=r" (entrylo0lo), + "=r" (entrylo1hi), "=r" (entrylo1lo), + "=r" (pagemask)); + + printk("%08X%08X %08X%08X %08X%08X %08X", + entryhihi, entryhilo, + entrylo0hi, entrylo0lo, + entrylo1hi, entrylo1lo, + pagemask); +} + +void sb1_dump_tlb(void) +{ + unsigned long old_ctx; + unsigned long flags; + int entry; + local_irq_save(flags); + old_ctx = read_c0_entryhi(); + printk("Current TLB registers state:\n" + " EntryHi EntryLo0 EntryLo1 PageMask Index\n" + "--------------------------------------------------------------------\n"); + dump_cur_tlb_regs(); + printk(" %08X\n", read_c0_index()); + printk("\n\nFull TLB Dump:\n" + "Idx EntryHi EntryLo0 EntryLo1 PageMask\n" + "--------------------------------------------------------------\n"); + for (entry = 0; entry < current_cpu_data.tlbsize; entry++) { + write_c0_index(entry); + printk("\n%02i ", entry); + dump_cur_tlb_regs(); + } + printk("\n"); + write_c0_entryhi(old_ctx); + local_irq_restore(flags); +} + +void local_flush_tlb_all(void) +{ + unsigned long flags; + unsigned long old_ctx; + int entry; + + local_irq_save(flags); + /* Save old context and create impossible VPN2 value */ + old_ctx = read_c0_entryhi() & ASID_MASK; + write_c0_entrylo0(0); + write_c0_entrylo1(0); + + entry = read_c0_wired(); + while (entry < current_cpu_data.tlbsize) { + write_c0_entryhi(UNIQUE_ENTRYHI(entry)); + write_c0_index(entry); + tlb_write_indexed(); + entry++; + } + write_c0_entryhi(old_ctx); + local_irq_restore(flags); +} + + +/* + * Use a bogus region of memory (starting at 0) to sanitize the TLB's. + * Use increments of the maximum page size (16MB), and check for duplicate + * entries before doing a given write. Then, when we're safe from collisions + * with the firmware, go back and give all the entries invalid addresses with + * the normal flush routine. Wired entries will be killed as well! + */ +static void __init sb1_sanitize_tlb(void) +{ + int entry; + long addr = 0; + + long inc = 1<<24; /* 16MB */ + /* Save old context and create impossible VPN2 value */ + write_c0_entrylo0(0); + write_c0_entrylo1(0); + for (entry = 0; entry < current_cpu_data.tlbsize; entry++) { + do { + addr += inc; + write_c0_entryhi(addr); + tlb_probe(); + } while ((int)(read_c0_index()) >= 0); + write_c0_index(entry); + tlb_write_indexed(); + } + /* Now that we know we're safe from collisions, we can safely flush + the TLB with the "normal" routine. */ + local_flush_tlb_all(); +} + +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long flags; + int cpu; + + local_irq_save(flags); + cpu = smp_processor_id(); + if (cpu_context(cpu, mm) != 0) { + int size; + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + size = (size + 1) >> 1; + if (size <= (current_cpu_data.tlbsize/2)) { + int oldpid = read_c0_entryhi() & ASID_MASK; + int newpid = cpu_asid(cpu, mm); + + start &= (PAGE_MASK << 1); + end += ((PAGE_SIZE << 1) - 1); + end &= (PAGE_MASK << 1); + while (start < end) { + int idx; + + write_c0_entryhi(start | newpid); + start += (PAGE_SIZE << 1); + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + write_c0_entryhi(UNIQUE_ENTRYHI(idx)); + if (idx < 0) + continue; + tlb_write_indexed(); + } + write_c0_entryhi(oldpid); + } else { + drop_mmu_context(mm, cpu); + } + } + local_irq_restore(flags); +} + +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + int size; + + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + size = (size + 1) >> 1; + + local_irq_save(flags); + if (size <= (current_cpu_data.tlbsize/2)) { + int pid = read_c0_entryhi(); + + start &= (PAGE_MASK << 1); + end += ((PAGE_SIZE << 1) - 1); + end &= (PAGE_MASK << 1); + + while (start < end) { + int idx; + + write_c0_entryhi(start); + start += (PAGE_SIZE << 1); + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + write_c0_entryhi(UNIQUE_ENTRYHI(idx)); + if (idx < 0) + continue; + tlb_write_indexed(); + } + write_c0_entryhi(pid); + } else { + local_flush_tlb_all(); + } + local_irq_restore(flags); +} + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + unsigned long flags; + int cpu = smp_processor_id(); + + local_irq_save(flags); + if (cpu_context(cpu, vma->vm_mm) != 0) { + int oldpid, newpid, idx; + newpid = cpu_asid(cpu, vma->vm_mm); + page &= (PAGE_MASK << 1); + oldpid = read_c0_entryhi() & ASID_MASK; + write_c0_entryhi(page | newpid); + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + if (idx < 0) + goto finish; + /* Make sure all entries differ. */ + write_c0_entryhi(UNIQUE_ENTRYHI(idx)); + tlb_write_indexed(); + finish: + write_c0_entryhi(oldpid); + } + local_irq_restore(flags); +} + +/* + * Remove one kernel space TLB entry. This entry is assumed to be marked + * global so we don't do the ASID thing. + */ +void local_flush_tlb_one(unsigned long page) +{ + unsigned long flags; + int oldpid, idx; + + page &= (PAGE_MASK << 1); + oldpid = read_c0_entryhi() & ASID_MASK; + + local_irq_save(flags); + write_c0_entryhi(page); + tlb_probe(); + idx = read_c0_index(); + if (idx >= 0) { + /* Make sure all entries differ. */ + write_c0_entryhi(UNIQUE_ENTRYHI(idx)); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + tlb_write_indexed(); + } + + write_c0_entryhi(oldpid); + local_irq_restore(flags); +} + +/* All entries common to a mm share an asid. To effectively flush + these entries, we just bump the asid. */ +void local_flush_tlb_mm(struct mm_struct *mm) +{ + int cpu; + + preempt_disable(); + + cpu = smp_processor_id(); + + if (cpu_context(cpu, mm) != 0) { + drop_mmu_context(mm, cpu); + } + + preempt_enable(); +} + +/* Stolen from mips32 routines */ + +void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) +{ + unsigned long flags; + pgd_t *pgdp; + pmd_t *pmdp; + pte_t *ptep; + int idx, pid; + + /* + * Handle debugger faulting in for debugee. + */ + if (current->active_mm != vma->vm_mm) + return; + + local_irq_save(flags); + + pid = read_c0_entryhi() & ASID_MASK; + address &= (PAGE_MASK << 1); + write_c0_entryhi(address | (pid)); + pgdp = pgd_offset(vma->vm_mm, address); + tlb_probe(); + pmdp = pmd_offset(pgdp, address); + idx = read_c0_index(); + ptep = pte_offset_map(pmdp, address); + write_c0_entrylo0(pte_val(*ptep++) >> 6); + write_c0_entrylo1(pte_val(*ptep) >> 6); + if (idx < 0) { + tlb_write_random(); + } else { + tlb_write_indexed(); + } + local_irq_restore(flags); +} + +void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, + unsigned long entryhi, unsigned long pagemask) +{ + unsigned long flags; + unsigned long wired; + unsigned long old_pagemask; + unsigned long old_ctx; + + local_irq_save(flags); + old_ctx = read_c0_entryhi() & 0xff; + old_pagemask = read_c0_pagemask(); + wired = read_c0_wired(); + write_c0_wired(wired + 1); + write_c0_index(wired); + + write_c0_pagemask(pagemask); + write_c0_entryhi(entryhi); + write_c0_entrylo0(entrylo0); + write_c0_entrylo1(entrylo1); + tlb_write_indexed(); + + write_c0_entryhi(old_ctx); + write_c0_pagemask(old_pagemask); + + local_flush_tlb_all(); + local_irq_restore(flags); +} + +/* + * This is called from loadmmu.c. We have to set up all the + * memory management function pointers, as well as initialize + * the caches and tlbs + */ +void tlb_init(void) +{ + write_c0_pagemask(PM_DEFAULT_MASK); + write_c0_wired(0); + + /* + * We don't know what state the firmware left the TLB's in, so this is + * the ultra-conservative way to flush the TLB's and avoid machine + * check exceptions due to duplicate TLB entries + */ + sb1_sanitize_tlb(); + + build_tlb_refill_handler(); +} diff --git a/arch/mips/mm/tlbex-fault.S b/arch/mips/mm/tlbex-fault.S new file mode 100644 index 00000000000..9e7f4175b49 --- /dev/null +++ b/arch/mips/mm/tlbex-fault.S @@ -0,0 +1,28 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999 Ralf Baechle + * Copyright (C) 1999 Silicon Graphics, Inc. + */ +#include +#include +#include +#include + + .macro tlb_do_page_fault, write + NESTED(tlb_do_page_fault_\write, PT_SIZE, sp) + SAVE_ALL + MFC0 a2, CP0_BADVADDR + KMODE + move a0, sp + REG_S a2, PT_BVADDR(sp) + li a1, \write + jal do_page_fault + j ret_from_exception + END(tlb_do_page_fault_\write) + .endm + + tlb_do_page_fault 0 + tlb_do_page_fault 1 diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c new file mode 100644 index 00000000000..87e229f4d3d --- /dev/null +++ b/arch/mips/mm/tlbex.c @@ -0,0 +1,1815 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Synthesize TLB refill handlers at runtime. + * + * Copyright (C) 2004,2005 by Thiemo Seufer + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* #define DEBUG_TLB */ + +static __init int __attribute__((unused)) r45k_bvahwbug(void) +{ + /* XXX: We should probe for the presence of this bug, but we don't. */ + return 0; +} + +static __init int __attribute__((unused)) r4k_250MHZhwbug(void) +{ + /* XXX: We should probe for the presence of this bug, but we don't. */ + return 0; +} + +static __init int __attribute__((unused)) bcm1250_m3_war(void) +{ + return BCM1250_M3_WAR; +} + +static __init int __attribute__((unused)) r10000_llsc_war(void) +{ + return R10000_LLSC_WAR; +} + +/* + * A little micro-assembler, intended for TLB refill handler + * synthesizing. It is intentionally kept simple, does only support + * a subset of instructions, and does not try to hide pipeline effects + * like branch delay slots. + */ + +enum fields +{ + RS = 0x001, + RT = 0x002, + RD = 0x004, + RE = 0x008, + SIMM = 0x010, + UIMM = 0x020, + BIMM = 0x040, + JIMM = 0x080, + FUNC = 0x100, +}; + +#define OP_MASK 0x2f +#define OP_SH 26 +#define RS_MASK 0x1f +#define RS_SH 21 +#define RT_MASK 0x1f +#define RT_SH 16 +#define RD_MASK 0x1f +#define RD_SH 11 +#define RE_MASK 0x1f +#define RE_SH 6 +#define IMM_MASK 0xffff +#define IMM_SH 0 +#define JIMM_MASK 0x3ffffff +#define JIMM_SH 0 +#define FUNC_MASK 0x2f +#define FUNC_SH 0 + +enum opcode { + insn_invalid, + insn_addu, insn_addiu, insn_and, insn_andi, insn_beq, + insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl, + insn_bne, insn_daddu, insn_daddiu, insn_dmfc0, insn_dmtc0, + insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32, + insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr, insn_ld, + insn_ll, insn_lld, insn_lui, insn_lw, insn_mfc0, insn_mtc0, + insn_ori, insn_rfe, insn_sc, insn_scd, insn_sd, insn_sll, + insn_sra, insn_srl, insn_subu, insn_sw, insn_tlbp, insn_tlbwi, + insn_tlbwr, insn_xor, insn_xori +}; + +struct insn { + enum opcode opcode; + u32 match; + enum fields fields; +}; + +/* This macro sets the non-variable bits of an instruction. */ +#define M(a, b, c, d, e, f) \ + ((a) << OP_SH \ + | (b) << RS_SH \ + | (c) << RT_SH \ + | (d) << RD_SH \ + | (e) << RE_SH \ + | (f) << FUNC_SH) + +static __initdata struct insn insn_table[] = { + { insn_addiu, M(addiu_op,0,0,0,0,0), RS | RT | SIMM }, + { insn_addu, M(spec_op,0,0,0,0,addu_op), RS | RT | RD }, + { insn_and, M(spec_op,0,0,0,0,and_op), RS | RT | RD }, + { insn_andi, M(andi_op,0,0,0,0,0), RS | RT | UIMM }, + { insn_beq, M(beq_op,0,0,0,0,0), RS | RT | BIMM }, + { insn_beql, M(beql_op,0,0,0,0,0), RS | RT | BIMM }, + { insn_bgez, M(bcond_op,0,bgez_op,0,0,0), RS | BIMM }, + { insn_bgezl, M(bcond_op,0,bgezl_op,0,0,0), RS | BIMM }, + { insn_bltz, M(bcond_op,0,bltz_op,0,0,0), RS | BIMM }, + { insn_bltzl, M(bcond_op,0,bltzl_op,0,0,0), RS | BIMM }, + { insn_bne, M(bne_op,0,0,0,0,0), RS | RT | BIMM }, + { insn_daddiu, M(daddiu_op,0,0,0,0,0), RS | RT | SIMM }, + { insn_daddu, M(spec_op,0,0,0,0,daddu_op), RS | RT | RD }, + { insn_dmfc0, M(cop0_op,dmfc_op,0,0,0,0), RT | RD }, + { insn_dmtc0, M(cop0_op,dmtc_op,0,0,0,0), RT | RD }, + { insn_dsll, M(spec_op,0,0,0,0,dsll_op), RT | RD | RE }, + { insn_dsll32, M(spec_op,0,0,0,0,dsll32_op), RT | RD | RE }, + { insn_dsra, M(spec_op,0,0,0,0,dsra_op), RT | RD | RE }, + { insn_dsrl, M(spec_op,0,0,0,0,dsrl_op), RT | RD | RE }, + { insn_dsrl32, M(spec_op,0,0,0,0,dsrl32_op), RT | RD | RE }, + { insn_dsubu, M(spec_op,0,0,0,0,dsubu_op), RS | RT | RD }, + { insn_eret, M(cop0_op,cop_op,0,0,0,eret_op), 0 }, + { insn_j, M(j_op,0,0,0,0,0), JIMM }, + { insn_jal, M(jal_op,0,0,0,0,0), JIMM }, + { insn_jr, M(spec_op,0,0,0,0,jr_op), RS }, + { insn_ld, M(ld_op,0,0,0,0,0), RS | RT | SIMM }, + { insn_ll, M(ll_op,0,0,0,0,0), RS | RT | SIMM }, + { insn_lld, M(lld_op,0,0,0,0,0), RS | RT | SIMM }, + { insn_lui, M(lui_op,0,0,0,0,0), RT | SIMM }, + { insn_lw, M(lw_op,0,0,0,0,0), RS | RT | SIMM }, + { insn_mfc0, M(cop0_op,mfc_op,0,0,0,0), RT | RD }, + { insn_mtc0, M(cop0_op,mtc_op,0,0,0,0), RT | RD }, + { insn_ori, M(ori_op,0,0,0,0,0), RS | RT | UIMM }, + { insn_rfe, M(cop0_op,cop_op,0,0,0,rfe_op), 0 }, + { insn_sc, M(sc_op,0,0,0,0,0), RS | RT | SIMM }, + { insn_scd, M(scd_op,0,0,0,0,0), RS | RT | SIMM }, + { insn_sd, M(sd_op,0,0,0,0,0), RS | RT | SIMM }, + { insn_sll, M(spec_op,0,0,0,0,sll_op), RT | RD | RE }, + { insn_sra, M(spec_op,0,0,0,0,sra_op), RT | RD | RE }, + { insn_srl, M(spec_op,0,0,0,0,srl_op), RT | RD | RE }, + { insn_subu, M(spec_op,0,0,0,0,subu_op), RS | RT | RD }, + { insn_sw, M(sw_op,0,0,0,0,0), RS | RT | SIMM }, + { insn_tlbp, M(cop0_op,cop_op,0,0,0,tlbp_op), 0 }, + { insn_tlbwi, M(cop0_op,cop_op,0,0,0,tlbwi_op), 0 }, + { insn_tlbwr, M(cop0_op,cop_op,0,0,0,tlbwr_op), 0 }, + { insn_xor, M(spec_op,0,0,0,0,xor_op), RS | RT | RD }, + { insn_xori, M(xori_op,0,0,0,0,0), RS | RT | UIMM }, + { insn_invalid, 0, 0 } +}; + +#undef M + +static __init u32 build_rs(u32 arg) +{ + if (arg & ~RS_MASK) + printk(KERN_WARNING "TLB synthesizer field overflow\n"); + + return (arg & RS_MASK) << RS_SH; +} + +static __init u32 build_rt(u32 arg) +{ + if (arg & ~RT_MASK) + printk(KERN_WARNING "TLB synthesizer field overflow\n"); + + return (arg & RT_MASK) << RT_SH; +} + +static __init u32 build_rd(u32 arg) +{ + if (arg & ~RD_MASK) + printk(KERN_WARNING "TLB synthesizer field overflow\n"); + + return (arg & RD_MASK) << RD_SH; +} + +static __init u32 build_re(u32 arg) +{ + if (arg & ~RE_MASK) + printk(KERN_WARNING "TLB synthesizer field overflow\n"); + + return (arg & RE_MASK) << RE_SH; +} + +static __init u32 build_simm(s32 arg) +{ + if (arg > 0x7fff || arg < -0x8000) + printk(KERN_WARNING "TLB synthesizer field overflow\n"); + + return arg & 0xffff; +} + +static __init u32 build_uimm(u32 arg) +{ + if (arg & ~IMM_MASK) + printk(KERN_WARNING "TLB synthesizer field overflow\n"); + + return arg & IMM_MASK; +} + +static __init u32 build_bimm(s32 arg) +{ + if (arg > 0x1ffff || arg < -0x20000) + printk(KERN_WARNING "TLB synthesizer field overflow\n"); + + if (arg & 0x3) + printk(KERN_WARNING "Invalid TLB synthesizer branch target\n"); + + return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 2) & 0x7fff); +} + +static __init u32 build_jimm(u32 arg) +{ + if (arg & ~((JIMM_MASK) << 2)) + printk(KERN_WARNING "TLB synthesizer field overflow\n"); + + return (arg >> 2) & JIMM_MASK; +} + +static __init u32 build_func(u32 arg) +{ + if (arg & ~FUNC_MASK) + printk(KERN_WARNING "TLB synthesizer field overflow\n"); + + return arg & FUNC_MASK; +} + +/* + * The order of opcode arguments is implicitly left to right, + * starting with RS and ending with FUNC or IMM. + */ +static void __init build_insn(u32 **buf, enum opcode opc, ...) +{ + struct insn *ip = NULL; + unsigned int i; + va_list ap; + u32 op; + + for (i = 0; insn_table[i].opcode != insn_invalid; i++) + if (insn_table[i].opcode == opc) { + ip = &insn_table[i]; + break; + } + + if (!ip) + panic("Unsupported TLB synthesizer instruction %d", opc); + + op = ip->match; + va_start(ap, opc); + if (ip->fields & RS) op |= build_rs(va_arg(ap, u32)); + if (ip->fields & RT) op |= build_rt(va_arg(ap, u32)); + if (ip->fields & RD) op |= build_rd(va_arg(ap, u32)); + if (ip->fields & RE) op |= build_re(va_arg(ap, u32)); + if (ip->fields & SIMM) op |= build_simm(va_arg(ap, s32)); + if (ip->fields & UIMM) op |= build_uimm(va_arg(ap, u32)); + if (ip->fields & BIMM) op |= build_bimm(va_arg(ap, s32)); + if (ip->fields & JIMM) op |= build_jimm(va_arg(ap, u32)); + if (ip->fields & FUNC) op |= build_func(va_arg(ap, u32)); + va_end(ap); + + **buf = op; + (*buf)++; +} + +#define I_u1u2u3(op) \ + static inline void i##op(u32 **buf, unsigned int a, \ + unsigned int b, unsigned int c) \ + { \ + build_insn(buf, insn##op, a, b, c); \ + } + +#define I_u2u1u3(op) \ + static inline void i##op(u32 **buf, unsigned int a, \ + unsigned int b, unsigned int c) \ + { \ + build_insn(buf, insn##op, b, a, c); \ + } + +#define I_u3u1u2(op) \ + static inline void i##op(u32 **buf, unsigned int a, \ + unsigned int b, unsigned int c) \ + { \ + build_insn(buf, insn##op, b, c, a); \ + } + +#define I_u1u2s3(op) \ + static inline void i##op(u32 **buf, unsigned int a, \ + unsigned int b, signed int c) \ + { \ + build_insn(buf, insn##op, a, b, c); \ + } + +#define I_u2s3u1(op) \ + static inline void i##op(u32 **buf, unsigned int a, \ + signed int b, unsigned int c) \ + { \ + build_insn(buf, insn##op, c, a, b); \ + } + +#define I_u2u1s3(op) \ + static inline void i##op(u32 **buf, unsigned int a, \ + unsigned int b, signed int c) \ + { \ + build_insn(buf, insn##op, b, a, c); \ + } + +#define I_u1u2(op) \ + static inline void i##op(u32 **buf, unsigned int a, \ + unsigned int b) \ + { \ + build_insn(buf, insn##op, a, b); \ + } + +#define I_u1s2(op) \ + static inline void i##op(u32 **buf, unsigned int a, \ + signed int b) \ + { \ + build_insn(buf, insn##op, a, b); \ + } + +#define I_u1(op) \ + static inline void i##op(u32 **buf, unsigned int a) \ + { \ + build_insn(buf, insn##op, a); \ + } + +#define I_0(op) \ + static inline void i##op(u32 **buf) \ + { \ + build_insn(buf, insn##op); \ + } + +I_u2u1s3(_addiu); +I_u3u1u2(_addu); +I_u2u1u3(_andi); +I_u3u1u2(_and); +I_u1u2s3(_beq); +I_u1u2s3(_beql); +I_u1s2(_bgez); +I_u1s2(_bgezl); +I_u1s2(_bltz); +I_u1s2(_bltzl); +I_u1u2s3(_bne); +I_u1u2(_dmfc0); +I_u1u2(_dmtc0); +I_u2u1s3(_daddiu); +I_u3u1u2(_daddu); +I_u2u1u3(_dsll); +I_u2u1u3(_dsll32); +I_u2u1u3(_dsra); +I_u2u1u3(_dsrl); +I_u2u1u3(_dsrl32); +I_u3u1u2(_dsubu); +I_0(_eret); +I_u1(_j); +I_u1(_jal); +I_u1(_jr); +I_u2s3u1(_ld); +I_u2s3u1(_ll); +I_u2s3u1(_lld); +I_u1s2(_lui); +I_u2s3u1(_lw); +I_u1u2(_mfc0); +I_u1u2(_mtc0); +I_u2u1u3(_ori); +I_0(_rfe); +I_u2s3u1(_sc); +I_u2s3u1(_scd); +I_u2s3u1(_sd); +I_u2u1u3(_sll); +I_u2u1u3(_sra); +I_u2u1u3(_srl); +I_u3u1u2(_subu); +I_u2s3u1(_sw); +I_0(_tlbp); +I_0(_tlbwi); +I_0(_tlbwr); +I_u3u1u2(_xor) +I_u2u1u3(_xori); + +/* + * handling labels + */ + +enum label_id { + label_invalid, + label_second_part, + label_leave, + label_vmalloc, + label_vmalloc_done, + label_tlbw_hazard, + label_split, + label_nopage_tlbl, + label_nopage_tlbs, + label_nopage_tlbm, + label_smp_pgtable_change, + label_r3000_write_probe_fail, + label_r3000_write_probe_ok +}; + +struct label { + u32 *addr; + enum label_id lab; +}; + +static __init void build_label(struct label **lab, u32 *addr, + enum label_id l) +{ + (*lab)->addr = addr; + (*lab)->lab = l; + (*lab)++; +} + +#define L_LA(lb) \ + static inline void l##lb(struct label **lab, u32 *addr) \ + { \ + build_label(lab, addr, label##lb); \ + } + +L_LA(_second_part) +L_LA(_leave) +L_LA(_vmalloc) +L_LA(_vmalloc_done) +L_LA(_tlbw_hazard) +L_LA(_split) +L_LA(_nopage_tlbl) +L_LA(_nopage_tlbs) +L_LA(_nopage_tlbm) +L_LA(_smp_pgtable_change) +L_LA(_r3000_write_probe_fail) +L_LA(_r3000_write_probe_ok) + +/* convenience macros for instructions */ +#ifdef CONFIG_MIPS64 +# define i_LW(buf, rs, rt, off) i_ld(buf, rs, rt, off) +# define i_SW(buf, rs, rt, off) i_sd(buf, rs, rt, off) +# define i_SLL(buf, rs, rt, sh) i_dsll(buf, rs, rt, sh) +# define i_SRA(buf, rs, rt, sh) i_dsra(buf, rs, rt, sh) +# define i_SRL(buf, rs, rt, sh) i_dsrl(buf, rs, rt, sh) +# define i_MFC0(buf, rt, rd) i_dmfc0(buf, rt, rd) +# define i_MTC0(buf, rt, rd) i_dmtc0(buf, rt, rd) +# define i_ADDIU(buf, rs, rt, val) i_daddiu(buf, rs, rt, val) +# define i_ADDU(buf, rs, rt, rd) i_daddu(buf, rs, rt, rd) +# define i_SUBU(buf, rs, rt, rd) i_dsubu(buf, rs, rt, rd) +# define i_LL(buf, rs, rt, off) i_lld(buf, rs, rt, off) +# define i_SC(buf, rs, rt, off) i_scd(buf, rs, rt, off) +#else +# define i_LW(buf, rs, rt, off) i_lw(buf, rs, rt, off) +# define i_SW(buf, rs, rt, off) i_sw(buf, rs, rt, off) +# define i_SLL(buf, rs, rt, sh) i_sll(buf, rs, rt, sh) +# define i_SRA(buf, rs, rt, sh) i_sra(buf, rs, rt, sh) +# define i_SRL(buf, rs, rt, sh) i_srl(buf, rs, rt, sh) +# define i_MFC0(buf, rt, rd) i_mfc0(buf, rt, rd) +# define i_MTC0(buf, rt, rd) i_mtc0(buf, rt, rd) +# define i_ADDIU(buf, rs, rt, val) i_addiu(buf, rs, rt, val) +# define i_ADDU(buf, rs, rt, rd) i_addu(buf, rs, rt, rd) +# define i_SUBU(buf, rs, rt, rd) i_subu(buf, rs, rt, rd) +# define i_LL(buf, rs, rt, off) i_ll(buf, rs, rt, off) +# define i_SC(buf, rs, rt, off) i_sc(buf, rs, rt, off) +#endif + +#define i_b(buf, off) i_beq(buf, 0, 0, off) +#define i_beqz(buf, rs, off) i_beq(buf, rs, 0, off) +#define i_beqzl(buf, rs, off) i_beql(buf, rs, 0, off) +#define i_bnez(buf, rs, off) i_bne(buf, rs, 0, off) +#define i_bnezl(buf, rs, off) i_bnel(buf, rs, 0, off) +#define i_move(buf, a, b) i_ADDU(buf, a, 0, b) +#define i_nop(buf) i_sll(buf, 0, 0, 0) +#define i_ssnop(buf) i_sll(buf, 0, 0, 1) +#define i_ehb(buf) i_sll(buf, 0, 0, 3) + +#ifdef CONFIG_MIPS64 +static __init int __attribute__((unused)) in_compat_space_p(long addr) +{ + /* Is this address in 32bit compat space? */ + return (((addr) & 0xffffffff00000000) == 0xffffffff00000000); +} + +static __init int __attribute__((unused)) rel_highest(long val) +{ + return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000; +} + +static __init int __attribute__((unused)) rel_higher(long val) +{ + return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000; +} +#endif + +static __init int rel_hi(long val) +{ + return ((((val + 0x8000L) >> 16) & 0xffff) ^ 0x8000) - 0x8000; +} + +static __init int rel_lo(long val) +{ + return ((val & 0xffff) ^ 0x8000) - 0x8000; +} + +static __init void i_LA_mostly(u32 **buf, unsigned int rs, long addr) +{ +#if CONFIG_MIPS64 + if (!in_compat_space_p(addr)) { + i_lui(buf, rs, rel_highest(addr)); + if (rel_higher(addr)) + i_daddiu(buf, rs, rs, rel_higher(addr)); + if (rel_hi(addr)) { + i_dsll(buf, rs, rs, 16); + i_daddiu(buf, rs, rs, rel_hi(addr)); + i_dsll(buf, rs, rs, 16); + } else + i_dsll32(buf, rs, rs, 0); + } else +#endif + i_lui(buf, rs, rel_hi(addr)); +} + +static __init void __attribute__((unused)) i_LA(u32 **buf, unsigned int rs, + long addr) +{ + i_LA_mostly(buf, rs, addr); + if (rel_lo(addr)) + i_ADDIU(buf, rs, rs, rel_lo(addr)); +} + +/* + * handle relocations + */ + +struct reloc { + u32 *addr; + unsigned int type; + enum label_id lab; +}; + +static __init void r_mips_pc16(struct reloc **rel, u32 *addr, + enum label_id l) +{ + (*rel)->addr = addr; + (*rel)->type = R_MIPS_PC16; + (*rel)->lab = l; + (*rel)++; +} + +static inline void __resolve_relocs(struct reloc *rel, struct label *lab) +{ + long laddr = (long)lab->addr; + long raddr = (long)rel->addr; + + switch (rel->type) { + case R_MIPS_PC16: + *rel->addr |= build_bimm(laddr - (raddr + 4)); + break; + + default: + panic("Unsupported TLB synthesizer relocation %d", + rel->type); + } +} + +static __init void resolve_relocs(struct reloc *rel, struct label *lab) +{ + struct label *l; + + for (; rel->lab != label_invalid; rel++) + for (l = lab; l->lab != label_invalid; l++) + if (rel->lab == l->lab) + __resolve_relocs(rel, l); +} + +static __init void move_relocs(struct reloc *rel, u32 *first, u32 *end, + long off) +{ + for (; rel->lab != label_invalid; rel++) + if (rel->addr >= first && rel->addr < end) + rel->addr += off; +} + +static __init void move_labels(struct label *lab, u32 *first, u32 *end, + long off) +{ + for (; lab->lab != label_invalid; lab++) + if (lab->addr >= first && lab->addr < end) + lab->addr += off; +} + +static __init void copy_handler(struct reloc *rel, struct label *lab, + u32 *first, u32 *end, u32 *target) +{ + long off = (long)(target - first); + + memcpy(target, first, (end - first) * sizeof(u32)); + + move_relocs(rel, first, end, off); + move_labels(lab, first, end, off); +} + +static __init int __attribute__((unused)) insn_has_bdelay(struct reloc *rel, + u32 *addr) +{ + for (; rel->lab != label_invalid; rel++) { + if (rel->addr == addr + && (rel->type == R_MIPS_PC16 + || rel->type == R_MIPS_26)) + return 1; + } + + return 0; +} + +/* convenience functions for labeled branches */ +static void __attribute__((unused)) il_bltz(u32 **p, struct reloc **r, + unsigned int reg, enum label_id l) +{ + r_mips_pc16(r, *p, l); + i_bltz(p, reg, 0); +} + +static void __attribute__((unused)) il_b(u32 **p, struct reloc **r, + enum label_id l) +{ + r_mips_pc16(r, *p, l); + i_b(p, 0); +} + +static void il_beqz(u32 **p, struct reloc **r, unsigned int reg, + enum label_id l) +{ + r_mips_pc16(r, *p, l); + i_beqz(p, reg, 0); +} + +static void __attribute__((unused)) +il_beqzl(u32 **p, struct reloc **r, unsigned int reg, enum label_id l) +{ + r_mips_pc16(r, *p, l); + i_beqzl(p, reg, 0); +} + +static void il_bnez(u32 **p, struct reloc **r, unsigned int reg, + enum label_id l) +{ + r_mips_pc16(r, *p, l); + i_bnez(p, reg, 0); +} + +static void il_bgezl(u32 **p, struct reloc **r, unsigned int reg, + enum label_id l) +{ + r_mips_pc16(r, *p, l); + i_bgezl(p, reg, 0); +} + +/* The only general purpose registers allowed in TLB handlers. */ +#define K0 26 +#define K1 27 + +/* Some CP0 registers */ +#define C0_INDEX 0 +#define C0_ENTRYLO0 2 +#define C0_ENTRYLO1 3 +#define C0_CONTEXT 4 +#define C0_BADVADDR 8 +#define C0_ENTRYHI 10 +#define C0_EPC 14 +#define C0_XCONTEXT 20 + +#ifdef CONFIG_MIPS64 +# define GET_CONTEXT(buf, reg) i_MFC0(buf, reg, C0_XCONTEXT) +#else +# define GET_CONTEXT(buf, reg) i_MFC0(buf, reg, C0_CONTEXT) +#endif + +/* The worst case length of the handler is around 18 instructions for + * R3000-style TLBs and up to 63 instructions for R4000-style TLBs. + * Maximum space available is 32 instructions for R3000 and 64 + * instructions for R4000. + * + * We deliberately chose a buffer size of 128, so we won't scribble + * over anything important on overflow before we panic. + */ +static __initdata u32 tlb_handler[128]; + +/* simply assume worst case size for labels and relocs */ +static __initdata struct label labels[128]; +static __initdata struct reloc relocs[128]; + +/* + * The R3000 TLB handler is simple. + */ +static void __init build_r3000_tlb_refill_handler(void) +{ + long pgdc = (long)pgd_current; + u32 *p; + + memset(tlb_handler, 0, sizeof(tlb_handler)); + p = tlb_handler; + + i_mfc0(&p, K0, C0_BADVADDR); + i_lui(&p, K1, rel_hi(pgdc)); /* cp0 delay */ + i_lw(&p, K1, rel_lo(pgdc), K1); + i_srl(&p, K0, K0, 22); /* load delay */ + i_sll(&p, K0, K0, 2); + i_addu(&p, K1, K1, K0); + i_mfc0(&p, K0, C0_CONTEXT); + i_lw(&p, K1, 0, K1); /* cp0 delay */ + i_andi(&p, K0, K0, 0xffc); /* load delay */ + i_addu(&p, K1, K1, K0); + i_lw(&p, K0, 0, K1); + i_nop(&p); /* load delay */ + i_mtc0(&p, K0, C0_ENTRYLO0); + i_mfc0(&p, K1, C0_EPC); /* cp0 delay */ + i_tlbwr(&p); /* cp0 delay */ + i_jr(&p, K1); + i_rfe(&p); /* branch delay */ + + if (p > tlb_handler + 32) + panic("TLB refill handler space exceeded"); + + printk("Synthesized TLB handler (%u instructions).\n", + (unsigned int)(p - tlb_handler)); +#ifdef DEBUG_TLB + { + int i; + + for (i = 0; i < (p - tlb_handler); i++) + printk("%08x\n", tlb_handler[i]); + } +#endif + + memcpy((void *)CAC_BASE, tlb_handler, 0x80); + flush_icache_range(CAC_BASE, CAC_BASE + 0x80); +} + +/* + * The R4000 TLB handler is much more complicated. We have two + * consecutive handler areas with 32 instructions space each. + * Since they aren't used at the same time, we can overflow in the + * other one.To keep things simple, we first assume linear space, + * then we relocate it to the final handler layout as needed. + */ +static __initdata u32 final_handler[64]; + +/* + * Hazards + * + * From the IDT errata for the QED RM5230 (Nevada), processor revision 1.0: + * 2. A timing hazard exists for the TLBP instruction. + * + * stalling_instruction + * TLBP + * + * The JTLB is being read for the TLBP throughout the stall generated by the + * previous instruction. This is not really correct as the stalling instruction + * can modify the address used to access the JTLB. The failure symptom is that + * the TLBP instruction will use an address created for the stalling instruction + * and not the address held in C0_ENHI and thus report the wrong results. + * + * The software work-around is to not allow the instruction preceding the TLBP + * to stall - make it an NOP or some other instruction guaranteed not to stall. + * + * Errata 2 will not be fixed. This errata is also on the R5000. + * + * As if we MIPS hackers wouldn't know how to nop pipelines happy ... + */ +static __init void __attribute__((unused)) build_tlb_probe_entry(u32 **p) +{ + switch (current_cpu_data.cputype) { + case CPU_R5000: + case CPU_R5000A: + case CPU_NEVADA: + i_nop(p); + i_tlbp(p); + break; + + default: + i_tlbp(p); + break; + } +} + +/* + * Write random or indexed TLB entry, and care about the hazards from + * the preceeding mtc0 and for the following eret. + */ +enum tlb_write_entry { tlb_random, tlb_indexed }; + +static __init void build_tlb_write_entry(u32 **p, struct label **l, + struct reloc **r, + enum tlb_write_entry wmode) +{ + void(*tlbw)(u32 **) = NULL; + + switch (wmode) { + case tlb_random: tlbw = i_tlbwr; break; + case tlb_indexed: tlbw = i_tlbwi; break; + } + + switch (current_cpu_data.cputype) { + case CPU_R4000PC: + case CPU_R4000SC: + case CPU_R4000MC: + case CPU_R4400PC: + case CPU_R4400SC: + case CPU_R4400MC: + /* + * This branch uses up a mtc0 hazard nop slot and saves + * two nops after the tlbw instruction. + */ + il_bgezl(p, r, 0, label_tlbw_hazard); + tlbw(p); + l_tlbw_hazard(l, *p); + i_nop(p); + break; + + case CPU_R4600: + case CPU_R4700: + case CPU_R5000: + case CPU_R5000A: + case CPU_5KC: + case CPU_TX49XX: + case CPU_AU1000: + case CPU_AU1100: + case CPU_AU1500: + case CPU_AU1550: + i_nop(p); + tlbw(p); + break; + + case CPU_R10000: + case CPU_R12000: + case CPU_4KC: + case CPU_SB1: + case CPU_4KSC: + case CPU_20KC: + case CPU_25KF: + tlbw(p); + break; + + case CPU_NEVADA: + i_nop(p); /* QED specifies 2 nops hazard */ + /* + * This branch uses up a mtc0 hazard nop slot and saves + * a nop after the tlbw instruction. + */ + il_bgezl(p, r, 0, label_tlbw_hazard); + tlbw(p); + l_tlbw_hazard(l, *p); + break; + + case CPU_RM7000: + i_nop(p); + i_nop(p); + i_nop(p); + i_nop(p); + tlbw(p); + break; + + case CPU_4KEC: + case CPU_24K: + i_ehb(p); + tlbw(p); + break; + + case CPU_RM9000: + /* + * When the JTLB is updated by tlbwi or tlbwr, a subsequent + * use of the JTLB for instructions should not occur for 4 + * cpu cycles and use for data translations should not occur + * for 3 cpu cycles. + */ + i_ssnop(p); + i_ssnop(p); + i_ssnop(p); + i_ssnop(p); + tlbw(p); + i_ssnop(p); + i_ssnop(p); + i_ssnop(p); + i_ssnop(p); + break; + + case CPU_VR4111: + case CPU_VR4121: + case CPU_VR4122: + case CPU_VR4181: + case CPU_VR4181A: + i_nop(p); + i_nop(p); + tlbw(p); + i_nop(p); + i_nop(p); + break; + + case CPU_VR4131: + case CPU_VR4133: + i_nop(p); + i_nop(p); + tlbw(p); + break; + + default: + panic("No TLB refill handler yet (CPU type: %d)", + current_cpu_data.cputype); + break; + } +} + +#ifdef CONFIG_MIPS64 +/* + * TMP and PTR are scratch. + * TMP will be clobbered, PTR will hold the pmd entry. + */ +static __init void +build_get_pmde64(u32 **p, struct label **l, struct reloc **r, + unsigned int tmp, unsigned int ptr) +{ + long pgdc = (long)pgd_current; + + /* + * The vmalloc handling is not in the hotpath. + */ + i_dmfc0(p, tmp, C0_BADVADDR); + il_bltz(p, r, tmp, label_vmalloc); + /* No i_nop needed here, since the next insn doesn't touch TMP. */ + +#ifdef CONFIG_SMP + /* + * 64 bit SMP has the lower part of &pgd_current[smp_processor_id()] + * stored in CONTEXT. + */ + if (in_compat_space_p(pgdc)) { + i_dmfc0(p, ptr, C0_CONTEXT); + i_dsra(p, ptr, ptr, 23); + i_ld(p, ptr, 0, ptr); + } else { +#ifdef CONFIG_BUILD_ELF64 + i_dmfc0(p, ptr, C0_CONTEXT); + i_dsrl(p, ptr, ptr, 23); + i_dsll(p, ptr, ptr, 3); + i_LA_mostly(p, tmp, pgdc); + i_daddu(p, ptr, ptr, tmp); + i_dmfc0(p, tmp, C0_BADVADDR); + i_ld(p, ptr, rel_lo(pgdc), ptr); +#else + i_dmfc0(p, ptr, C0_CONTEXT); + i_lui(p, tmp, rel_highest(pgdc)); + i_dsll(p, ptr, ptr, 9); + i_daddiu(p, tmp, tmp, rel_higher(pgdc)); + i_dsrl32(p, ptr, ptr, 0); + i_and(p, ptr, ptr, tmp); + i_dmfc0(p, tmp, C0_BADVADDR); + i_ld(p, ptr, 0, ptr); +#endif + } +#else + i_LA_mostly(p, ptr, pgdc); + i_ld(p, ptr, rel_lo(pgdc), ptr); +#endif + + l_vmalloc_done(l, *p); + i_dsrl(p, tmp, tmp, PGDIR_SHIFT-3); /* get pgd offset in bytes */ + i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3); + i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */ + i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */ + i_ld(p, ptr, 0, ptr); /* get pmd pointer */ + i_dsrl(p, tmp, tmp, PMD_SHIFT-3); /* get pmd offset in bytes */ + i_andi(p, tmp, tmp, (PTRS_PER_PMD - 1)<<3); + i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */ +} + +/* + * BVADDR is the faulting address, PTR is scratch. + * PTR will hold the pgd for vmalloc. + */ +static __init void +build_get_pgd_vmalloc64(u32 **p, struct label **l, struct reloc **r, + unsigned int bvaddr, unsigned int ptr) +{ + long swpd = (long)swapper_pg_dir; + + l_vmalloc(l, *p); + i_LA(p, ptr, VMALLOC_START); + i_dsubu(p, bvaddr, bvaddr, ptr); + + if (in_compat_space_p(swpd) && !rel_lo(swpd)) { + il_b(p, r, label_vmalloc_done); + i_lui(p, ptr, rel_hi(swpd)); + } else { + i_LA_mostly(p, ptr, swpd); + il_b(p, r, label_vmalloc_done); + i_daddiu(p, ptr, ptr, rel_lo(swpd)); + } +} + +#else /* !CONFIG_MIPS64 */ + +/* + * TMP and PTR are scratch. + * TMP will be clobbered, PTR will hold the pgd entry. + */ +static __init void __attribute__((unused)) +build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr) +{ + long pgdc = (long)pgd_current; + + /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */ +#ifdef CONFIG_SMP + i_mfc0(p, ptr, C0_CONTEXT); + i_LA_mostly(p, tmp, pgdc); + i_srl(p, ptr, ptr, 23); + i_sll(p, ptr, ptr, 2); + i_addu(p, ptr, tmp, ptr); +#else + i_LA_mostly(p, ptr, pgdc); +#endif + i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ + i_lw(p, ptr, rel_lo(pgdc), ptr); + i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */ + i_sll(p, tmp, tmp, PGD_T_LOG2); + i_addu(p, ptr, ptr, tmp); /* add in pgd offset */ +} + +#endif /* !CONFIG_MIPS64 */ + +static __init void build_adjust_context(u32 **p, unsigned int ctx) +{ + unsigned int shift = 4 - (PTE_T_LOG2 + 1); + unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1); + + switch (current_cpu_data.cputype) { + case CPU_VR41XX: + case CPU_VR4111: + case CPU_VR4121: + case CPU_VR4122: + case CPU_VR4131: + case CPU_VR4181: + case CPU_VR4181A: + case CPU_VR4133: + shift += 2; + break; + + default: + break; + } + + if (shift) + i_SRL(p, ctx, ctx, shift); + i_andi(p, ctx, ctx, mask); +} + +static __init void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr) +{ + /* + * Bug workaround for the Nevada. It seems as if under certain + * circumstances the move from cp0_context might produce a + * bogus result when the mfc0 instruction and its consumer are + * in a different cacheline or a load instruction, probably any + * memory reference, is between them. + */ + switch (current_cpu_data.cputype) { + case CPU_NEVADA: + i_LW(p, ptr, 0, ptr); + GET_CONTEXT(p, tmp); /* get context reg */ + break; + + default: + GET_CONTEXT(p, tmp); /* get context reg */ + i_LW(p, ptr, 0, ptr); + break; + } + + build_adjust_context(p, tmp); + i_ADDU(p, ptr, ptr, tmp); /* add in offset */ +} + +static __init void build_update_entries(u32 **p, unsigned int tmp, + unsigned int ptep) +{ + /* + * 64bit address support (36bit on a 32bit CPU) in a 32bit + * Kernel is a special case. Only a few CPUs use it. + */ +#ifdef CONFIG_64BIT_PHYS_ADDR + if (cpu_has_64bits) { + i_ld(p, tmp, 0, ptep); /* get even pte */ + i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ + i_dsrl(p, tmp, tmp, 6); /* convert to entrylo0 */ + i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */ + i_dsrl(p, ptep, ptep, 6); /* convert to entrylo1 */ + i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */ + } else { + int pte_off_even = sizeof(pte_t) / 2; + int pte_off_odd = pte_off_even + sizeof(pte_t); + + /* The pte entries are pre-shifted */ + i_lw(p, tmp, pte_off_even, ptep); /* get even pte */ + i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */ + i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */ + i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */ + } +#else + i_LW(p, tmp, 0, ptep); /* get even pte */ + i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ + if (r45k_bvahwbug()) + build_tlb_probe_entry(p); + i_SRL(p, tmp, tmp, 6); /* convert to entrylo0 */ + if (r4k_250MHZhwbug()) + i_mtc0(p, 0, C0_ENTRYLO0); + i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */ + i_SRL(p, ptep, ptep, 6); /* convert to entrylo1 */ + if (r45k_bvahwbug()) + i_mfc0(p, tmp, C0_INDEX); + if (r4k_250MHZhwbug()) + i_mtc0(p, 0, C0_ENTRYLO1); + i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */ +#endif +} + +static void __init build_r4000_tlb_refill_handler(void) +{ + u32 *p = tlb_handler; + struct label *l = labels; + struct reloc *r = relocs; + u32 *f; + unsigned int final_len; + + memset(tlb_handler, 0, sizeof(tlb_handler)); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + memset(final_handler, 0, sizeof(final_handler)); + + /* + * create the plain linear handler + */ + if (bcm1250_m3_war()) { + i_MFC0(&p, K0, C0_BADVADDR); + i_MFC0(&p, K1, C0_ENTRYHI); + i_xor(&p, K0, K0, K1); + i_SRL(&p, K0, K0, PAGE_SHIFT + 1); + il_bnez(&p, &r, K0, label_leave); + /* No need for i_nop */ + } + +#ifdef CONFIG_MIPS64 + build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */ +#else + build_get_pgde32(&p, K0, K1); /* get pgd in K1 */ +#endif + + build_get_ptep(&p, K0, K1); + build_update_entries(&p, K0, K1); + build_tlb_write_entry(&p, &l, &r, tlb_random); + l_leave(&l, p); + i_eret(&p); /* return from trap */ + +#ifdef CONFIG_MIPS64 + build_get_pgd_vmalloc64(&p, &l, &r, K0, K1); +#endif + + /* + * Overflow check: For the 64bit handler, we need at least one + * free instruction slot for the wrap-around branch. In worst + * case, if the intended insertion point is a delay slot, we + * need three, with the the second nop'ed and the third being + * unused. + */ +#ifdef CONFIG_MIPS32 + if ((p - tlb_handler) > 64) + panic("TLB refill handler space exceeded"); +#else + if (((p - tlb_handler) > 63) + || (((p - tlb_handler) > 61) + && insn_has_bdelay(relocs, tlb_handler + 29))) + panic("TLB refill handler space exceeded"); +#endif + + /* + * Now fold the handler in the TLB refill handler space. + */ +#ifdef CONFIG_MIPS32 + f = final_handler; + /* Simplest case, just copy the handler. */ + copy_handler(relocs, labels, tlb_handler, p, f); + final_len = p - tlb_handler; +#else /* CONFIG_MIPS64 */ + f = final_handler + 32; + if ((p - tlb_handler) <= 32) { + /* Just copy the handler. */ + copy_handler(relocs, labels, tlb_handler, p, f); + final_len = p - tlb_handler; + } else { + u32 *split = tlb_handler + 30; + + /* + * Find the split point. + */ + if (insn_has_bdelay(relocs, split - 1)) + split--; + + /* Copy first part of the handler. */ + copy_handler(relocs, labels, tlb_handler, split, f); + f += split - tlb_handler; + + /* Insert branch. */ + l_split(&l, final_handler); + il_b(&f, &r, label_split); + if (insn_has_bdelay(relocs, split)) + i_nop(&f); + else { + copy_handler(relocs, labels, split, split + 1, f); + move_labels(labels, f, f + 1, -1); + f++; + split++; + } + + /* Copy the rest of the handler. */ + copy_handler(relocs, labels, split, p, final_handler); + final_len = (f - (final_handler + 32)) + (p - split); + } +#endif /* CONFIG_MIPS64 */ + + resolve_relocs(relocs, labels); + printk("Synthesized TLB refill handler (%u instructions).\n", + final_len); + +#ifdef DEBUG_TLB + { + int i; + + for (i = 0; i < 64; i++) + printk("%08x\n", final_handler[i]); + } +#endif + + memcpy((void *)CAC_BASE, final_handler, 0x100); + flush_icache_range(CAC_BASE, CAC_BASE + 0x100); +} + +/* + * TLB load/store/modify handlers. + * + * Only the fastpath gets synthesized at runtime, the slowpath for + * do_page_fault remains normal asm. + */ +extern void tlb_do_page_fault_0(void); +extern void tlb_do_page_fault_1(void); + +#define __tlb_handler_align \ + __attribute__((__aligned__(1 << CONFIG_MIPS_L1_CACHE_SHIFT))) + +/* + * 128 instructions for the fastpath handler is generous and should + * never be exceeded. + */ +#define FASTPATH_SIZE 128 + +u32 __tlb_handler_align handle_tlbl[FASTPATH_SIZE]; +u32 __tlb_handler_align handle_tlbs[FASTPATH_SIZE]; +u32 __tlb_handler_align handle_tlbm[FASTPATH_SIZE]; + +static void __init +iPTE_LW(u32 **p, struct label **l, unsigned int pte, int offset, + unsigned int ptr) +{ +#ifdef CONFIG_SMP +# ifdef CONFIG_64BIT_PHYS_ADDR + if (cpu_has_64bits) + i_lld(p, pte, offset, ptr); + else +# endif + i_LL(p, pte, offset, ptr); +#else +# ifdef CONFIG_64BIT_PHYS_ADDR + if (cpu_has_64bits) + i_ld(p, pte, offset, ptr); + else +# endif + i_LW(p, pte, offset, ptr); +#endif +} + +static void __init +iPTE_SW(u32 **p, struct reloc **r, unsigned int pte, int offset, + unsigned int ptr) +{ +#ifdef CONFIG_SMP +# ifdef CONFIG_64BIT_PHYS_ADDR + if (cpu_has_64bits) + i_scd(p, pte, offset, ptr); + else +# endif + i_SC(p, pte, offset, ptr); + + if (r10000_llsc_war()) + il_beqzl(p, r, pte, label_smp_pgtable_change); + else + il_beqz(p, r, pte, label_smp_pgtable_change); + +# ifdef CONFIG_64BIT_PHYS_ADDR + if (!cpu_has_64bits) { + /* no i_nop needed */ + i_ll(p, pte, sizeof(pte_t) / 2, ptr); + i_ori(p, pte, pte, _PAGE_VALID); + i_sc(p, pte, sizeof(pte_t) / 2, ptr); + il_beqz(p, r, pte, label_smp_pgtable_change); + /* no i_nop needed */ + i_lw(p, pte, 0, ptr); + } else + i_nop(p); +# else + i_nop(p); +# endif +#else +# ifdef CONFIG_64BIT_PHYS_ADDR + if (cpu_has_64bits) + i_sd(p, pte, offset, ptr); + else +# endif + i_SW(p, pte, offset, ptr); + +# ifdef CONFIG_64BIT_PHYS_ADDR + if (!cpu_has_64bits) { + i_lw(p, pte, sizeof(pte_t) / 2, ptr); + i_ori(p, pte, pte, _PAGE_VALID); + i_sw(p, pte, sizeof(pte_t) / 2, ptr); + i_lw(p, pte, 0, ptr); + } +# endif +#endif +} + +/* + * Check if PTE is present, if not then jump to LABEL. PTR points to + * the page table where this PTE is located, PTE will be re-loaded + * with it's original value. + */ +static void __init +build_pte_present(u32 **p, struct label **l, struct reloc **r, + unsigned int pte, unsigned int ptr, enum label_id lid) +{ + i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_READ); + i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_READ); + il_bnez(p, r, pte, lid); + iPTE_LW(p, l, pte, 0, ptr); +} + +/* Make PTE valid, store result in PTR. */ +static void __init +build_make_valid(u32 **p, struct reloc **r, unsigned int pte, + unsigned int ptr) +{ + i_ori(p, pte, pte, _PAGE_VALID | _PAGE_ACCESSED); + iPTE_SW(p, r, pte, 0, ptr); +} + +/* + * Check if PTE can be written to, if not branch to LABEL. Regardless + * restore PTE with value from PTR when done. + */ +static void __init +build_pte_writable(u32 **p, struct label **l, struct reloc **r, + unsigned int pte, unsigned int ptr, enum label_id lid) +{ + i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE); + i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE); + il_bnez(p, r, pte, lid); + iPTE_LW(p, l, pte, 0, ptr); +} + +/* Make PTE writable, update software status bits as well, then store + * at PTR. + */ +static void __init +build_make_write(u32 **p, struct reloc **r, unsigned int pte, + unsigned int ptr) +{ + i_ori(p, pte, pte, + _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); + iPTE_SW(p, r, pte, 0, ptr); +} + +/* + * Check if PTE can be modified, if not branch to LABEL. Regardless + * restore PTE with value from PTR when done. + */ +static void __init +build_pte_modifiable(u32 **p, struct label **l, struct reloc **r, + unsigned int pte, unsigned int ptr, enum label_id lid) +{ + i_andi(p, pte, pte, _PAGE_WRITE); + il_beqz(p, r, pte, lid); + iPTE_LW(p, l, pte, 0, ptr); +} + +/* + * R3000 style TLB load/store/modify handlers. + */ + +/* This places the pte in the page table at PTR into ENTRYLO0. */ +static void __init +build_r3000_pte_reload(u32 **p, unsigned int ptr) +{ + i_lw(p, ptr, 0, ptr); + i_nop(p); /* load delay */ + i_mtc0(p, ptr, C0_ENTRYLO0); + i_nop(p); /* cp0 delay */ +} + +/* + * The index register may have the probe fail bit set, + * because we would trap on access kseg2, i.e. without refill. + */ +static void __init +build_r3000_tlb_write(u32 **p, struct label **l, struct reloc **r, + unsigned int tmp) +{ + i_mfc0(p, tmp, C0_INDEX); + i_nop(p); /* cp0 delay */ + il_bltz(p, r, tmp, label_r3000_write_probe_fail); + i_nop(p); /* branch delay */ + i_tlbwi(p); + il_b(p, r, label_r3000_write_probe_ok); + i_nop(p); /* branch delay */ + l_r3000_write_probe_fail(l, *p); + i_tlbwr(p); + l_r3000_write_probe_ok(l, *p); +} + +static void __init +build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte, + unsigned int ptr) +{ + long pgdc = (long)pgd_current; + + i_mfc0(p, pte, C0_BADVADDR); + i_lui(p, ptr, rel_hi(pgdc)); /* cp0 delay */ + i_lw(p, ptr, rel_lo(pgdc), ptr); + i_srl(p, pte, pte, 22); /* load delay */ + i_sll(p, pte, pte, 2); + i_addu(p, ptr, ptr, pte); + i_mfc0(p, pte, C0_CONTEXT); + i_lw(p, ptr, 0, ptr); /* cp0 delay */ + i_andi(p, pte, pte, 0xffc); /* load delay */ + i_addu(p, ptr, ptr, pte); + i_lw(p, pte, 0, ptr); + i_nop(p); /* load delay */ + i_tlbp(p); +} + +static void __init +build_r3000_tlbchange_handler_tail(u32 **p, unsigned int tmp) +{ + i_mfc0(p, tmp, C0_EPC); + i_nop(p); /* cp0 delay */ + i_jr(p, tmp); + i_rfe(p); /* branch delay */ +} + +static void __init build_r3000_tlb_load_handler(void) +{ + u32 *p = handle_tlbl; + struct label *l = labels; + struct reloc *r = relocs; + + memset(handle_tlbl, 0, sizeof(handle_tlbl)); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + build_r3000_tlbchange_handler_head(&p, K0, K1); + build_pte_present(&p, &l, &r, K0, K1, label_nopage_tlbl); + build_make_valid(&p, &r, K0, K1); + build_r3000_pte_reload(&p, K1); + build_r3000_tlb_write(&p, &l, &r, K0); + build_r3000_tlbchange_handler_tail(&p, K0); + + l_nopage_tlbl(&l, p); + i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); + i_nop(&p); + + if ((p - handle_tlbl) > FASTPATH_SIZE) + panic("TLB load handler fastpath space exceeded"); + + resolve_relocs(relocs, labels); + printk("Synthesized TLB load handler fastpath (%u instructions).\n", + (unsigned int)(p - handle_tlbl)); + +#ifdef DEBUG_TLB + { + int i; + + for (i = 0; i < FASTPATH_SIZE; i++) + printk("%08x\n", handle_tlbl[i]); + } +#endif + + flush_icache_range((unsigned long)handle_tlbl, + (unsigned long)handle_tlbl + FASTPATH_SIZE * sizeof(u32)); +} + +static void __init build_r3000_tlb_store_handler(void) +{ + u32 *p = handle_tlbs; + struct label *l = labels; + struct reloc *r = relocs; + + memset(handle_tlbs, 0, sizeof(handle_tlbs)); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + build_r3000_tlbchange_handler_head(&p, K0, K1); + build_pte_writable(&p, &l, &r, K0, K1, label_nopage_tlbs); + build_make_write(&p, &r, K0, K1); + build_r3000_pte_reload(&p, K1); + build_r3000_tlb_write(&p, &l, &r, K0); + build_r3000_tlbchange_handler_tail(&p, K0); + + l_nopage_tlbs(&l, p); + i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); + i_nop(&p); + + if ((p - handle_tlbs) > FASTPATH_SIZE) + panic("TLB store handler fastpath space exceeded"); + + resolve_relocs(relocs, labels); + printk("Synthesized TLB store handler fastpath (%u instructions).\n", + (unsigned int)(p - handle_tlbs)); + +#ifdef DEBUG_TLB + { + int i; + + for (i = 0; i < FASTPATH_SIZE; i++) + printk("%08x\n", handle_tlbs[i]); + } +#endif + + flush_icache_range((unsigned long)handle_tlbs, + (unsigned long)handle_tlbs + FASTPATH_SIZE * sizeof(u32)); +} + +static void __init build_r3000_tlb_modify_handler(void) +{ + u32 *p = handle_tlbm; + struct label *l = labels; + struct reloc *r = relocs; + + memset(handle_tlbm, 0, sizeof(handle_tlbm)); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + build_r3000_tlbchange_handler_head(&p, K0, K1); + build_pte_modifiable(&p, &l, &r, K0, K1, label_nopage_tlbm); + build_make_write(&p, &r, K0, K1); + build_r3000_pte_reload(&p, K1); + i_tlbwi(&p); + build_r3000_tlbchange_handler_tail(&p, K0); + + l_nopage_tlbm(&l, p); + i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); + i_nop(&p); + + if ((p - handle_tlbm) > FASTPATH_SIZE) + panic("TLB modify handler fastpath space exceeded"); + + resolve_relocs(relocs, labels); + printk("Synthesized TLB modify handler fastpath (%u instructions).\n", + (unsigned int)(p - handle_tlbm)); + +#ifdef DEBUG_TLB + { + int i; + + for (i = 0; i < FASTPATH_SIZE; i++) + printk("%08x\n", handle_tlbm[i]); + } +#endif + + flush_icache_range((unsigned long)handle_tlbm, + (unsigned long)handle_tlbm + FASTPATH_SIZE * sizeof(u32)); +} + +/* + * R4000 style TLB load/store/modify handlers. + */ +static void __init +build_r4000_tlbchange_handler_head(u32 **p, struct label **l, + struct reloc **r, unsigned int pte, + unsigned int ptr) +{ +#ifdef CONFIG_MIPS64 + build_get_pmde64(p, l, r, pte, ptr); /* get pmd in ptr */ +#else + build_get_pgde32(p, pte, ptr); /* get pgd in ptr */ +#endif + + i_MFC0(p, pte, C0_BADVADDR); + i_LW(p, ptr, 0, ptr); + i_SRL(p, pte, pte, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2); + i_andi(p, pte, pte, (PTRS_PER_PTE - 1) << PTE_T_LOG2); + i_ADDU(p, ptr, ptr, pte); + +#ifdef CONFIG_SMP + l_smp_pgtable_change(l, *p); +# endif + iPTE_LW(p, l, pte, 0, ptr); /* get even pte */ + build_tlb_probe_entry(p); +} + +static void __init +build_r4000_tlbchange_handler_tail(u32 **p, struct label **l, + struct reloc **r, unsigned int tmp, + unsigned int ptr) +{ + i_ori(p, ptr, ptr, sizeof(pte_t)); + i_xori(p, ptr, ptr, sizeof(pte_t)); + build_update_entries(p, tmp, ptr); + build_tlb_write_entry(p, l, r, tlb_indexed); + l_leave(l, *p); + i_eret(p); /* return from trap */ + +#ifdef CONFIG_MIPS64 + build_get_pgd_vmalloc64(p, l, r, tmp, ptr); +#endif +} + +static void __init build_r4000_tlb_load_handler(void) +{ + u32 *p = handle_tlbl; + struct label *l = labels; + struct reloc *r = relocs; + + memset(handle_tlbl, 0, sizeof(handle_tlbl)); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + if (bcm1250_m3_war()) { + i_MFC0(&p, K0, C0_BADVADDR); + i_MFC0(&p, K1, C0_ENTRYHI); + i_xor(&p, K0, K0, K1); + i_SRL(&p, K0, K0, PAGE_SHIFT + 1); + il_bnez(&p, &r, K0, label_leave); + /* No need for i_nop */ + } + + build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); + build_pte_present(&p, &l, &r, K0, K1, label_nopage_tlbl); + build_make_valid(&p, &r, K0, K1); + build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); + + l_nopage_tlbl(&l, p); + i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); + i_nop(&p); + + if ((p - handle_tlbl) > FASTPATH_SIZE) + panic("TLB load handler fastpath space exceeded"); + + resolve_relocs(relocs, labels); + printk("Synthesized TLB load handler fastpath (%u instructions).\n", + (unsigned int)(p - handle_tlbl)); + +#ifdef DEBUG_TLB + { + int i; + + for (i = 0; i < FASTPATH_SIZE; i++) + printk("%08x\n", handle_tlbl[i]); + } +#endif + + flush_icache_range((unsigned long)handle_tlbl, + (unsigned long)handle_tlbl + FASTPATH_SIZE * sizeof(u32)); +} + +static void __init build_r4000_tlb_store_handler(void) +{ + u32 *p = handle_tlbs; + struct label *l = labels; + struct reloc *r = relocs; + + memset(handle_tlbs, 0, sizeof(handle_tlbs)); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); + build_pte_writable(&p, &l, &r, K0, K1, label_nopage_tlbs); + build_make_write(&p, &r, K0, K1); + build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); + + l_nopage_tlbs(&l, p); + i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); + i_nop(&p); + + if ((p - handle_tlbs) > FASTPATH_SIZE) + panic("TLB store handler fastpath space exceeded"); + + resolve_relocs(relocs, labels); + printk("Synthesized TLB store handler fastpath (%u instructions).\n", + (unsigned int)(p - handle_tlbs)); + +#ifdef DEBUG_TLB + { + int i; + + for (i = 0; i < FASTPATH_SIZE; i++) + printk("%08x\n", handle_tlbs[i]); + } +#endif + + flush_icache_range((unsigned long)handle_tlbs, + (unsigned long)handle_tlbs + FASTPATH_SIZE * sizeof(u32)); +} + +static void __init build_r4000_tlb_modify_handler(void) +{ + u32 *p = handle_tlbm; + struct label *l = labels; + struct reloc *r = relocs; + + memset(handle_tlbm, 0, sizeof(handle_tlbm)); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); + build_pte_modifiable(&p, &l, &r, K0, K1, label_nopage_tlbm); + /* Present and writable bits set, set accessed and dirty bits. */ + build_make_write(&p, &r, K0, K1); + build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); + + l_nopage_tlbm(&l, p); + i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); + i_nop(&p); + + if ((p - handle_tlbm) > FASTPATH_SIZE) + panic("TLB modify handler fastpath space exceeded"); + + resolve_relocs(relocs, labels); + printk("Synthesized TLB modify handler fastpath (%u instructions).\n", + (unsigned int)(p - handle_tlbm)); + +#ifdef DEBUG_TLB + { + int i; + + for (i = 0; i < FASTPATH_SIZE; i++) + printk("%08x\n", handle_tlbm[i]); + } +#endif + + flush_icache_range((unsigned long)handle_tlbm, + (unsigned long)handle_tlbm + FASTPATH_SIZE * sizeof(u32)); +} + +void __init build_tlb_refill_handler(void) +{ + /* + * The refill handler is generated per-CPU, multi-node systems + * may have local storage for it. The other handlers are only + * needed once. + */ + static int run_once = 0; + + switch (current_cpu_data.cputype) { + case CPU_R2000: + case CPU_R3000: + case CPU_R3000A: + case CPU_R3081E: + case CPU_TX3912: + case CPU_TX3922: + case CPU_TX3927: + build_r3000_tlb_refill_handler(); + if (!run_once) { + build_r3000_tlb_load_handler(); + build_r3000_tlb_store_handler(); + build_r3000_tlb_modify_handler(); + run_once++; + } + break; + + case CPU_R6000: + case CPU_R6000A: + panic("No R6000 TLB refill handler yet"); + break; + + case CPU_R8000: + panic("No R8000 TLB refill handler yet"); + break; + + default: + build_r4000_tlb_refill_handler(); + if (!run_once) { + build_r4000_tlb_load_handler(); + build_r4000_tlb_store_handler(); + build_r4000_tlb_modify_handler(); + run_once++; + } + } +} -- cgit v1.2.3