From e4707dd3e9d0cb57597b6568a5e51fea5d6fca41 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Thu, 12 Mar 2009 20:11:43 +0100
Subject: [ARM] 5422/1: ARM: MMU: add a Non-cacheable Normal executable memory
 type

This patch adds a Non-cacheable Normal ARM executable memory type,
MT_MEMORY_NONCACHED.

On OMAP3, this is used for rapid dynamic voltage/frequency scaling in
the VDD2 voltage domain. OMAP3's SDRAM controller (SDRC) is in the
VDD2 voltage domain, and its clock frequency must change along with
voltage. The SDRC clock change code cannot run from SDRAM itself,
since SDRAM accesses are paused during the clock change. So the
current implementation of the DVFS code executes from OMAP on-chip
SRAM, aka "OCM RAM."

If the OCM RAM pages are marked as Cacheable, the ARM cache controller
will attempt to flush dirty cache lines to the SDRC, so it can fill
those lines with OCM RAM instruction code. The problem is that the
SDRC is paused during DVFS, and so any SDRAM access causes the ARM MPU
subsystem to hang.

TI's original solution to this problem was to mark the OCM RAM
sections as Strongly Ordered memory, thus preventing caching. This is
overkill: since the memory is marked as non-bufferable, OCM RAM writes
become needlessly slow. The idea of "Strongly Ordered SRAM" is also
conceptually disturbing. Previous LAKML list discussion is here:

http://www.spinics.net/lists/arm-kernel/msg54312.html

This memory type MT_MEMORY_NONCACHED is used for OCM RAM by a future
patch.

Cc: Richard Woodruff <r-woodruff2@ti.com>
Signed-off-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/mmu.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 9b36c5cb5e9..aa424e1da8a 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -243,6 +243,10 @@ static struct mem_type mem_types[] = {
 		.prot_sect = PMD_TYPE_SECT,
 		.domain    = DOMAIN_KERNEL,
 	},
+	[MT_MEMORY_NONCACHED] = {
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
+		.domain    = DOMAIN_KERNEL,
+	},
 };
 
 const struct mem_type *get_mem_type(unsigned int type)
@@ -406,9 +410,28 @@ static void __init build_mem_type_table(void)
 		kern_pgprot |= L_PTE_SHARED;
 		vecs_pgprot |= L_PTE_SHARED;
 		mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
+		mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
 #endif
 	}
 
+	/*
+	 * Non-cacheable Normal - intended for memory areas that must
+	 * not cause dirty cache line writebacks when used
+	 */
+	if (cpu_arch >= CPU_ARCH_ARMv6) {
+		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
+			/* Non-cacheable Normal is XCB = 001 */
+			mem_types[MT_MEMORY_NONCACHED].prot_sect |=
+				PMD_SECT_BUFFERED;
+		} else {
+			/* For both ARMv6 and non-TEX-remapping ARMv7 */
+			mem_types[MT_MEMORY_NONCACHED].prot_sect |=
+				PMD_SECT_TEX(1);
+		}
+	} else {
+		mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
+	}
+
 	for (i = 0; i < 16; i++) {
 		unsigned long v = pgprot_val(protection_map[i]);
 		protection_map[i] = __pgprot(v | user_pgprot);
-- 
cgit v1.2.3


From cb88214d726b337d49c1f65cbc5e5ac85837b11b Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Sun, 8 Feb 2009 02:00:50 +0100
Subject: [ARM] MX31/MX35: Add l2x0 cache support

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mm/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index d490f3773c0..0d8581f1121 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -704,7 +704,8 @@ config CACHE_FEROCEON_L2_WRITETHROUGH
 
 config CACHE_L2X0
 	bool "Enable the L2x0 outer cache controller"
-	depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 || REALVIEW_EB_A9MP
+	depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 || \
+		   REALVIEW_EB_A9MP || ARCH_MX35 || ARCH_MX31
 	default y
 	select OUTER_CACHE
 	help
-- 
cgit v1.2.3


From 5f0fbf9ecaf354fa4bbf266fffdea2ea3d14a0ed Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Tue, 16 Sep 2008 13:05:53 -0400
Subject: [ARM] fixmap support

This is the minimum fixmap interface expected to be implemented by
architectures supporting highmem.

We have a second level page table already allocated and covering
0xfff00000-0xffffffff because the exception vector page is located
at 0xffff0000, and various cache tricks already use some entries above
0xffff0000.  Therefore the PTEs covering 0xfff00000-0xfffeffff are free
to be used.

However the XScale cache flushing code already uses virtual addresses
between 0xfffe0000 and 0xfffeffff.

So this reserves the 0xfff00000-0xfffdffff range for fixmap stuff.

The Documentation/arm/memory.txt information is updated accordingly,
including the information about the actual top of DMA memory mapping
region which didn't match the code.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
---
 arch/arm/mm/mm.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 95bbe112965..c4f6f05198e 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -1,7 +1,6 @@
-/* the upper-most page table pointer */
-
 #ifdef CONFIG_MMU
 
+/* the upper-most page table pointer */
 extern pmd_t *top_pmd;
 
 #define TOP_PTE(x)	pte_offset_kernel(top_pmd, x)
-- 
cgit v1.2.3


From d73cd42893f4cdc06e6829fea2347bb92cb789d1 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 15 Sep 2008 16:44:55 -0400
Subject: [ARM] kmap support

The kmap virtual area borrows a 2MB range at the top of the 16MB area
below PAGE_OFFSET currently reserved for kernel modules and/or the
XIP kernel.  This 2MB corresponds to the range covered by 2 consecutive
second-level page tables, or a single pmd entry as seen by the Linux
page table abstraction.  Because XIP kernels are unlikely to be seen
on systems needing highmem support, there shouldn't be any shortage of
VM space for modules (14 MB for modules is still way more than twice the
typical usage).

Because the virtual mapping of highmem pages can go away at any moment
after kunmap() is called on them, we need to bypass the delayed cache
flushing provided by flush_dcache_page() in that case.

The atomic kmap versions are based on fixmaps, and
__cpuc_flush_dcache_page() is used directly in that case.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
---
 arch/arm/mm/Makefile  |   1 +
 arch/arm/mm/flush.c   |   2 +-
 arch/arm/mm/highmem.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++
 arch/arm/mm/mmu.c     |  13 ++++++
 4 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm/mm/highmem.c

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 480f78a3611..185e7dc7dcf 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_MODULES)		+= proc-syms.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
 obj-$(CONFIG_DISCONTIGMEM)	+= discontig.o
+obj-$(CONFIG_HIGHMEM)		+= highmem.o
 
 obj-$(CONFIG_CPU_ABRT_NOMMU)	+= abort-nommu.o
 obj-$(CONFIG_CPU_ABRT_EV4)	+= abort-ev4.o
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 0fa9bf388f0..4e283481cee 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -192,7 +192,7 @@ void flush_dcache_page(struct page *page)
 	struct address_space *mapping = page_mapping(page);
 
 #ifndef CONFIG_SMP
-	if (mapping && !mapping_mapped(mapping))
+	if (!PageHighMem(page) && mapping && !mapping_mapped(mapping))
 		set_bit(PG_dcache_dirty, &page->flags);
 	else
 #endif
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
new file mode 100644
index 00000000000..a34954d9df7
--- /dev/null
+++ b/arch/arm/mm/highmem.c
@@ -0,0 +1,116 @@
+/*
+ * arch/arm/mm/highmem.c -- ARM highmem support
+ *
+ * Author:	Nicolas Pitre
+ * Created:	september 8, 2008
+ * Copyright:	Marvell Semiconductors Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <asm/fixmap.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include "mm.h"
+
+void *kmap(struct page *page)
+{
+	might_sleep();
+	if (!PageHighMem(page))
+		return page_address(page);
+	return kmap_high(page);
+}
+EXPORT_SYMBOL(kmap);
+
+void kunmap(struct page *page)
+{
+	BUG_ON(in_interrupt());
+	if (!PageHighMem(page))
+		return;
+	kunmap_high(page);
+}
+EXPORT_SYMBOL(kunmap);
+
+void *kmap_atomic(struct page *page, enum km_type type)
+{
+	unsigned int idx;
+	unsigned long vaddr;
+
+	pagefault_disable();
+	if (!PageHighMem(page))
+		return page_address(page);
+
+	idx = type + KM_TYPE_NR * smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+	/*
+	 * With debugging enabled, kunmap_atomic forces that entry to 0.
+	 * Make sure it was indeed properly unmapped.
+	 */
+	BUG_ON(!pte_none(*(TOP_PTE(vaddr))));
+#endif
+	set_pte_ext(TOP_PTE(vaddr), mk_pte(page, kmap_prot), 0);
+	/*
+	 * When debugging is off, kunmap_atomic leaves the previous mapping
+	 * in place, so this TLB flush ensures the TLB is updated with the
+	 * new mapping.
+	 */
+	local_flush_tlb_kernel_page(vaddr);
+
+	return (void *)vaddr;
+}
+EXPORT_SYMBOL(kmap_atomic);
+
+void kunmap_atomic(void *kvaddr, enum km_type type)
+{
+	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+	unsigned int idx = type + KM_TYPE_NR * smp_processor_id();
+
+	if (kvaddr >= (void *)FIXADDR_START) {
+		__cpuc_flush_dcache_page((void *)vaddr);
+#ifdef CONFIG_DEBUG_HIGHMEM
+		BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+		set_pte_ext(TOP_PTE(vaddr), __pte(0), 0);
+		local_flush_tlb_kernel_page(vaddr);
+#else
+		(void) idx;  /* to kill a warning */
+#endif
+	}
+	pagefault_enable();
+}
+EXPORT_SYMBOL(kunmap_atomic);
+
+void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
+{
+	unsigned int idx;
+	unsigned long vaddr;
+
+	pagefault_disable();
+
+	idx = type + KM_TYPE_NR * smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+	BUG_ON(!pte_none(*(TOP_PTE(vaddr))));
+#endif
+	set_pte_ext(TOP_PTE(vaddr), pfn_pte(pfn, kmap_prot), 0);
+	local_flush_tlb_kernel_page(vaddr);
+
+	return (void *)vaddr;
+}
+
+struct page *kmap_atomic_to_page(const void *ptr)
+{
+	unsigned long vaddr = (unsigned long)ptr;
+	pte_t *pte;
+
+	if (vaddr < FIXADDR_START)
+		return virt_to_page(ptr);
+
+	pte = TOP_PTE(vaddr);
+	return pte_page(*pte);
+}
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index d4d082c5c2d..4810a4c9ffc 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -21,6 +21,7 @@
 #include <asm/setup.h>
 #include <asm/sizes.h>
 #include <asm/tlb.h>
+#include <asm/highmem.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
@@ -895,6 +896,17 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 	flush_cache_all();
 }
 
+static void __init kmap_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+	pmd_t *pmd = pmd_off_k(PKMAP_BASE);
+	pte_t *pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
+	BUG_ON(!pmd_none(*pmd) || !pte);
+	__pmd_populate(pmd, __pa(pte) | _PAGE_KERNEL_TABLE);
+	pkmap_page_table = pte + PTRS_PER_PTE;
+#endif
+}
+
 /*
  * paging_init() sets up the page tables, initialises the zone memory
  * maps, and sets up the zero page, bad page and bad page tables.
@@ -908,6 +920,7 @@ void __init paging_init(struct machine_desc *mdesc)
 	prepare_page_table();
 	bootmem_init();
 	devicemaps_init(mdesc);
+	kmap_init();
 
 	top_pmd = pmd_off_k(0xffff0000);
 
-- 
cgit v1.2.3


From 3835f6cb645bdb9a58aa6e062fe1d5777f1a9748 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Wed, 17 Sep 2008 15:21:55 -0400
Subject: [ARM] mem_init(): make highmem pages available for use

Signed-off-by: Nicolas Pitre <nico@marvell.com>
---
 arch/arm/mm/init.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 80fd3b69ae1..8277802ec85 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -15,6 +15,7 @@
 #include <linux/mman.h>
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
+#include <linux/highmem.h>
 
 #include <asm/mach-types.h>
 #include <asm/sections.h>
@@ -485,7 +486,7 @@ void __init mem_init(void)
 	int i, node;
 
 #ifndef CONFIG_DISCONTIGMEM
-	max_mapnr   = virt_to_page(high_memory) - mem_map;
+	max_mapnr   = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
 #endif
 
 	/* this will put all unused low memory onto the freelists */
@@ -504,6 +505,19 @@ void __init mem_init(void)
 				    __phys_to_pfn(__pa(swapper_pg_dir)), NULL);
 #endif
 
+#ifdef CONFIG_HIGHMEM
+	/* set highmem page free */
+	for_each_online_node(node) {
+		for_each_nodebank (i, &meminfo, node) {
+			unsigned long start = bank_pfn_start(&meminfo.bank[i]);
+			unsigned long end = bank_pfn_end(&meminfo.bank[i]);
+			if (start >= max_low_pfn + PHYS_PFN_OFFSET)
+				totalhigh_pages += free_area(start, end, NULL);
+		}
+	}
+	totalram_pages += totalhigh_pages;
+#endif
+
 	/*
 	 * Since our memory may not be contiguous, calculate the
 	 * real number of pages we have in this system
@@ -521,9 +535,10 @@ void __init mem_init(void)
 	initsize = __init_end - __init_begin;
 
 	printk(KERN_NOTICE "Memory: %luKB available (%dK code, "
-		"%dK data, %dK init)\n",
+		"%dK data, %dK init, %luK highmem)\n",
 		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
-		codesize >> 10, datasize >> 10, initsize >> 10);
+		codesize >> 10, datasize >> 10, initsize >> 10,
+		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
 
 	if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
 		extern int sysctl_overcommit_memory;
-- 
cgit v1.2.3


From 43377453af83b8ff8c1c731da1508bd6b84ebfea Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Thu, 12 Mar 2009 22:52:09 -0400
Subject: [ARM] introduce dma_cache_maint_page()

This is a helper to be used by the DMA mapping API to handle cache
maintenance for memory identified by a page structure instead of a
virtual address.  Those pages may or may not be highmem pages, and
when they're highmem pages, they may or may not be virtually mapped.
When they're not mapped then there is no L1 cache to worry about. But
even in that case the L2 cache must be processed since unmapped highmem
pages can still be L2 cached.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
---
 arch/arm/mm/dma-mapping.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index f1ef5613ccd..510c179b0ac 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -19,6 +19,7 @@
 #include <linux/dma-mapping.h>
 
 #include <asm/memory.h>
+#include <asm/highmem.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/sizes.h>
@@ -517,6 +518,74 @@ void dma_cache_maint(const void *start, size_t size, int direction)
 }
 EXPORT_SYMBOL(dma_cache_maint);
 
+static void dma_cache_maint_contiguous(struct page *page, unsigned long offset,
+				       size_t size, int direction)
+{
+	void *vaddr;
+	unsigned long paddr;
+	void (*inner_op)(const void *, const void *);
+	void (*outer_op)(unsigned long, unsigned long);
+
+	switch (direction) {
+	case DMA_FROM_DEVICE:		/* invalidate only */
+		inner_op = dmac_inv_range;
+		outer_op = outer_inv_range;
+		break;
+	case DMA_TO_DEVICE:		/* writeback only */
+		inner_op = dmac_clean_range;
+		outer_op = outer_clean_range;
+		break;
+	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
+		inner_op = dmac_flush_range;
+		outer_op = outer_flush_range;
+		break;
+	default:
+		BUG();
+	}
+
+	if (!PageHighMem(page)) {
+		vaddr = page_address(page) + offset;
+		inner_op(vaddr, vaddr + size);
+	} else {
+		vaddr = kmap_high_get(page);
+		if (vaddr) {
+			vaddr += offset;
+			inner_op(vaddr, vaddr + size);
+			kunmap_high(page);
+		}
+	}
+
+	paddr = page_to_phys(page) + offset;
+	outer_op(paddr, paddr + size);
+}
+
+void dma_cache_maint_page(struct page *page, unsigned long offset,
+			  size_t size, int dir)
+{
+	/*
+	 * A single sg entry may refer to multiple physically contiguous
+	 * pages.  But we still need to process highmem pages individually.
+	 * If highmem is not configured then the bulk of this loop gets
+	 * optimized out.
+	 */
+	size_t left = size;
+	do {
+		size_t len = left;
+		if (PageHighMem(page) && len + offset > PAGE_SIZE) {
+			if (offset >= PAGE_SIZE) {
+				page += offset / PAGE_SIZE;
+				offset %= PAGE_SIZE;
+			}
+			len = PAGE_SIZE - offset;
+		}
+		dma_cache_maint_contiguous(page, offset, len, dir);
+		offset = 0;
+		page++;
+		left -= len;
+	} while (left);
+}
+EXPORT_SYMBOL(dma_cache_maint_page);
+
 /**
  * dma_map_sg - map a set of SG buffers for streaming mode DMA
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -614,7 +683,8 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 			continue;
 
 		if (!arch_is_coherent())
-			dma_cache_maint(sg_virt(s), s->length, dir);
+			dma_cache_maint_page(sg_page(s), s->offset,
+					     s->length, dir);
 	}
 }
 EXPORT_SYMBOL(dma_sync_sg_for_device);
-- 
cgit v1.2.3


From 1bb772679ffb0ba1ff1d40d8c6b855ab029f177d Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 12 Sep 2008 16:11:51 -0400
Subject: [ARM] Feroceon: add highmem support to L2 cache handling code

The choice is between looping over the physical range and performing
single cache line operations, or to map highmem pages somewhere, as
cache range ops are possible only on virtual addresses.

Because L2 range ops are much faster, we go with the later by factoring
the physical-to-virtual address conversion and use a fixmap entry for it
in the HIGHMEM case.

Possible future optimizations to avoid the pte setup cost:

 - do the pte setup for highmem pages only

 - determine a threshold for doing a line-by-line processing on physical
   addresses when the range is small

Signed-off-by: Nicolas Pitre <nico@marvell.com>
---
 arch/arm/mm/cache-feroceon-l2.c | 54 ++++++++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 17 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index 80cd207cbae..d6dd83826f8 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -14,8 +14,12 @@
 
 #include <linux/init.h>
 #include <asm/cacheflush.h>
+#include <asm/kmap_types.h>
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 #include <plat/cache-feroceon-l2.h>
-
+#include "mm.h"
 
 /*
  * Low-level cache maintenance operations.
@@ -34,14 +38,36 @@
  * The range operations require two successive cp15 writes, in
  * between which we don't want to be preempted.
  */
+
+static inline unsigned long l2_start_va(unsigned long paddr)
+{
+#ifdef CONFIG_HIGHMEM
+	/*
+	 * Let's do our own fixmap stuff in a minimal way here.
+	 * Because range ops can't be done on physical addresses,
+	 * we simply install a virtual mapping for it only for the
+	 * TLB lookup to occur, hence no need to flush the untouched
+	 * memory mapping.  This is protected with the disabling of
+	 * interrupts by the caller.
+	 */
+	unsigned long idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id();
+	unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+	set_pte_ext(TOP_PTE(vaddr), pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL), 0);
+	local_flush_tlb_kernel_page(vaddr);
+	return vaddr + (paddr & ~PAGE_MASK);
+#else
+	return __phys_to_virt(paddr);
+#endif
+}
+
 static inline void l2_clean_pa(unsigned long addr)
 {
 	__asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr));
 }
 
-static inline void l2_clean_mva_range(unsigned long start, unsigned long end)
+static inline void l2_clean_pa_range(unsigned long start, unsigned long end)
 {
-	unsigned long flags;
+	unsigned long va_start, va_end, flags;
 
 	/*
 	 * Make sure 'start' and 'end' reference the same page, as
@@ -51,17 +77,14 @@ static inline void l2_clean_mva_range(unsigned long start, unsigned long end)
 	BUG_ON((start ^ end) >> PAGE_SHIFT);
 
 	raw_local_irq_save(flags);
+	va_start = l2_start_va(start);
+	va_end = va_start + (end - start);
 	__asm__("mcr p15, 1, %0, c15, c9, 4\n\t"
 		"mcr p15, 1, %1, c15, c9, 5"
-		: : "r" (start), "r" (end));
+		: : "r" (va_start), "r" (va_end));
 	raw_local_irq_restore(flags);
 }
 
-static inline void l2_clean_pa_range(unsigned long start, unsigned long end)
-{
-	l2_clean_mva_range(__phys_to_virt(start), __phys_to_virt(end));
-}
-
 static inline void l2_clean_inv_pa(unsigned long addr)
 {
 	__asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr));
@@ -72,9 +95,9 @@ static inline void l2_inv_pa(unsigned long addr)
 	__asm__("mcr p15, 1, %0, c15, c11, 3" : : "r" (addr));
 }
 
-static inline void l2_inv_mva_range(unsigned long start, unsigned long end)
+static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
 {
-	unsigned long flags;
+	unsigned long va_start, va_end, flags;
 
 	/*
 	 * Make sure 'start' and 'end' reference the same page, as
@@ -84,17 +107,14 @@ static inline void l2_inv_mva_range(unsigned long start, unsigned long end)
 	BUG_ON((start ^ end) >> PAGE_SHIFT);
 
 	raw_local_irq_save(flags);
+	va_start = l2_start_va(start);
+	va_end = va_start + (end - start);
 	__asm__("mcr p15, 1, %0, c15, c11, 4\n\t"
 		"mcr p15, 1, %1, c15, c11, 5"
-		: : "r" (start), "r" (end));
+		: : "r" (va_start), "r" (va_end));
 	raw_local_irq_restore(flags);
 }
 
-static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
-{
-	l2_inv_mva_range(__phys_to_virt(start), __phys_to_virt(end));
-}
-
 
 /*
  * Linux primitives.
-- 
cgit v1.2.3


From 3902a15e784e9b1efa8e6ad246489c609e0ef880 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Thu, 18 Sep 2008 22:55:47 -0400
Subject: [ARM] xsc3: add highmem support to L2 cache handling code

On xsc3, L2 cache ops are possible only on virtual addresses.  The code
is rearranged so to have a linear progression requiring the least amount
of pte setups in the highmem case.  To protect the virtual mapping so
created, interrupts must be disabled currently up to a page worth of
address range.

The interrupt disabling is done in a way to minimize the overhead within
the inner loop.  The alternative would consist in separate code for
the highmem and non highmem compilation which is less preferable.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
---
 arch/arm/mm/cache-xsc3l2.c | 107 +++++++++++++++++++++++++++++++++------------
 1 file changed, 80 insertions(+), 27 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c
index 464de893a98..5d180cb0bd9 100644
--- a/arch/arm/mm/cache-xsc3l2.c
+++ b/arch/arm/mm/cache-xsc3l2.c
@@ -17,12 +17,14 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/io.h>
-
 #include <asm/system.h>
 #include <asm/cputype.h>
 #include <asm/cacheflush.h>
+#include <asm/kmap_types.h>
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include "mm.h"
 
 #define CR_L2	(1 << 26)
 
@@ -47,21 +49,11 @@ static inline void xsc3_l2_clean_mva(unsigned long addr)
 	__asm__("mcr p15, 1, %0, c7, c11, 1" : : "r" (addr));
 }
 
-static inline void xsc3_l2_clean_pa(unsigned long addr)
-{
-	xsc3_l2_clean_mva(__phys_to_virt(addr));
-}
-
 static inline void xsc3_l2_inv_mva(unsigned long addr)
 {
 	__asm__("mcr p15, 1, %0, c7, c7, 1" : : "r" (addr));
 }
 
-static inline void xsc3_l2_inv_pa(unsigned long addr)
-{
-	xsc3_l2_inv_mva(__phys_to_virt(addr));
-}
-
 static inline void xsc3_l2_inv_all(void)
 {
 	unsigned long l2ctype, set_way;
@@ -79,50 +71,103 @@ static inline void xsc3_l2_inv_all(void)
 	dsb();
 }
 
+#ifdef CONFIG_HIGHMEM
+#define l2_map_save_flags(x)		raw_local_save_flags(x)
+#define l2_map_restore_flags(x)		raw_local_irq_restore(x)
+#else
+#define l2_map_save_flags(x)		((x) = 0)
+#define l2_map_restore_flags(x)		((void)(x))
+#endif
+
+static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va,
+				      unsigned long flags)
+{
+#ifdef CONFIG_HIGHMEM
+	unsigned long va = prev_va & PAGE_MASK;
+	unsigned long pa_offset = pa << (32 - PAGE_SHIFT);
+	if (unlikely(pa_offset < (prev_va << (32 - PAGE_SHIFT)))) {
+		/*
+		 * Switching to a new page.  Because cache ops are
+		 * using virtual addresses only, we must put a mapping
+		 * in place for it.  We also enable interrupts for a
+		 * short while and disable them again to protect this
+		 * mapping.
+		 */
+		unsigned long idx;
+		raw_local_irq_restore(flags);
+		idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id();
+		va = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+		raw_local_irq_restore(flags | PSR_I_BIT);
+		set_pte_ext(TOP_PTE(va), pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL), 0);
+		local_flush_tlb_kernel_page(va);
+	}
+	return va + (pa_offset >> (32 - PAGE_SHIFT));
+#else
+	return __phys_to_virt(pa);
+#endif
+}
+
 static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
 {
+	unsigned long vaddr, flags;
+
 	if (start == 0 && end == -1ul) {
 		xsc3_l2_inv_all();
 		return;
 	}
 
+	vaddr = -1;  /* to force the first mapping */
+	l2_map_save_flags(flags);
+
 	/*
 	 * Clean and invalidate partial first cache line.
 	 */
 	if (start & (CACHE_LINE_SIZE - 1)) {
-		xsc3_l2_clean_pa(start & ~(CACHE_LINE_SIZE - 1));
-		xsc3_l2_inv_pa(start & ~(CACHE_LINE_SIZE - 1));
+		vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr, flags);
+		xsc3_l2_clean_mva(vaddr);
+		xsc3_l2_inv_mva(vaddr);
 		start = (start | (CACHE_LINE_SIZE - 1)) + 1;
 	}
 
 	/*
-	 * Clean and invalidate partial last cache line.
+	 * Invalidate all full cache lines between 'start' and 'end'.
 	 */
-	if (start < end && (end & (CACHE_LINE_SIZE - 1))) {
-		xsc3_l2_clean_pa(end & ~(CACHE_LINE_SIZE - 1));
-		xsc3_l2_inv_pa(end & ~(CACHE_LINE_SIZE - 1));
-		end &= ~(CACHE_LINE_SIZE - 1);
+	while (start < (end & ~(CACHE_LINE_SIZE - 1))) {
+		vaddr = l2_map_va(start, vaddr, flags);
+		xsc3_l2_inv_mva(vaddr);
+		start += CACHE_LINE_SIZE;
 	}
 
 	/*
-	 * Invalidate all full cache lines between 'start' and 'end'.
+	 * Clean and invalidate partial last cache line.
 	 */
-	while (start < end) {
-		xsc3_l2_inv_pa(start);
-		start += CACHE_LINE_SIZE;
+	if (start < end) {
+		vaddr = l2_map_va(start, vaddr, flags);
+		xsc3_l2_clean_mva(vaddr);
+		xsc3_l2_inv_mva(vaddr);
 	}
 
+	l2_map_restore_flags(flags);
+
 	dsb();
 }
 
 static void xsc3_l2_clean_range(unsigned long start, unsigned long end)
 {
+	unsigned long vaddr, flags;
+
+	vaddr = -1;  /* to force the first mapping */
+	l2_map_save_flags(flags);
+
 	start &= ~(CACHE_LINE_SIZE - 1);
 	while (start < end) {
-		xsc3_l2_clean_pa(start);
+		vaddr = l2_map_va(start, vaddr, flags);
+		xsc3_l2_clean_mva(vaddr);
 		start += CACHE_LINE_SIZE;
 	}
 
+	l2_map_restore_flags(flags);
+
 	dsb();
 }
 
@@ -148,18 +193,26 @@ static inline void xsc3_l2_flush_all(void)
 
 static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
 {
+	unsigned long vaddr, flags;
+
 	if (start == 0 && end == -1ul) {
 		xsc3_l2_flush_all();
 		return;
 	}
 
+	vaddr = -1;  /* to force the first mapping */
+	l2_map_save_flags(flags);
+
 	start &= ~(CACHE_LINE_SIZE - 1);
 	while (start < end) {
-		xsc3_l2_clean_pa(start);
-		xsc3_l2_inv_pa(start);
+		vaddr = l2_map_va(start, vaddr, flags);
+		xsc3_l2_clean_mva(vaddr);
+		xsc3_l2_inv_mva(vaddr);
 		start += CACHE_LINE_SIZE;
 	}
 
+	l2_map_restore_flags(flags);
+
 	dsb();
 }
 
-- 
cgit v1.2.3


From 3f973e22160257c5bda85815be5b1540d391a671 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Tue, 4 Nov 2008 00:48:42 -0500
Subject: [ARM] ignore high memory with VIPT aliasing caches

VIPT aliasing caches have issues of their own which are not yet handled.
Usage of discard_old_kernel_data() in copypage-v6.c is not highmem ready,
kmap/fixmap stuff doesn't take account of cache colouring, etc.
If/when those issues are handled then this could be reverted.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
---
 arch/arm/mm/mmu.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4810a4c9ffc..cf504885a5f 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -18,6 +18,7 @@
 #include <asm/cputype.h>
 #include <asm/mach-types.h>
 #include <asm/sections.h>
+#include <asm/cachetype.h>
 #include <asm/setup.h>
 #include <asm/sizes.h>
 #include <asm/tlb.h>
@@ -678,6 +679,10 @@ static void __init sanity_check_meminfo(void)
 			if (meminfo.nr_banks >= NR_BANKS) {
 				printk(KERN_CRIT "NR_BANKS too low, "
 						 "ignoring high memory\n");
+			} else if (cache_is_vipt_aliasing()) {
+				printk(KERN_CRIT "HIGHMEM is not yet supported "
+						 "with VIPT aliasing cache, "
+						 "ignoring high memory\n");
 			} else {
 				memmove(bank + 1, bank,
 					(meminfo.nr_banks - i) * sizeof(*bank));
-- 
cgit v1.2.3


From 49cbe78637eb0503f45fc9b556ec08918a616534 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Tue, 20 Jan 2009 14:15:18 +0800
Subject: [ARM] pxa: add base support for Marvell's PXA168 processor line
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

"""The Marvell® PXA168 processor is the first in a family of application
processors targeted at mass market opportunities in computing and consumer
devices. It balances high computing and multimedia performance with low
power consumption to support extended battery life, and includes a wealth
of integrated peripherals to reduce overall BOM cost .... """

See http://www.marvell.com/featured/pxa168.jsp for more information.

  1. Marvell Mohawk core is a hybrid of xscale3 and its own ARM core,
     there are many enhancements like instructions for flushing the
     whole D-cache, and so on

  2. Clock reuses Russell's common clkdev, and added the basic support
     for UART1/2.

  3. Devices are a bit different from the 'mach-pxa' way, the platform
     devices are now dynamically allocated only when necessary (i.e.
     when pxa_register_device() is called). Description for each device
     are stored in an array of 'struct pxa_device_desc'. Now that:

     a. this array of device description is marked with __initdata and
        can be freed up system is fully up

     b. which means board code has to add all needed devices early in
        his initializing function

     c. platform specific data can now be marked as __initdata since
        they are allocated and copied by platform_device_add_data()

  4. only the basic UART1/2/3 are added, more devices will come later.

Signed-off-by: Jason Chagas <chagas@marvell.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>
---
 arch/arm/mm/Kconfig       |  15 +-
 arch/arm/mm/Makefile      |   1 +
 arch/arm/mm/proc-mohawk.S | 416 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 430 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm/mm/proc-mohawk.S

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index d490f3773c0..64086f4f5fc 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -340,6 +340,17 @@ config CPU_XSC3
 	select CPU_TLB_V4WBI if MMU
 	select IO_36
 
+# Marvell PJ1 (Mohawk)
+config CPU_MOHAWK
+	bool
+	select CPU_32v5
+	select CPU_ABRT_EV5T
+	select CPU_PABRT_NOIFAR
+	select CPU_CACHE_VIVT
+	select CPU_CP15_MMU
+	select CPU_TLB_V4WBI if MMU
+	select CPU_COPY_V4WB if MMU
+
 # Feroceon
 config CPU_FEROCEON
 	bool
@@ -569,7 +580,7 @@ comment "Processor Features"
 
 config ARM_THUMB
 	bool "Support Thumb user binaries"
-	depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_V6 || CPU_V7 || CPU_FEROCEON
+	depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V7 || CPU_FEROCEON
 	default y
 	help
 	  Say Y if you want to include kernel support for running user space
@@ -653,7 +664,7 @@ config CPU_CACHE_ROUND_ROBIN
 
 config CPU_BPREDICT_DISABLE
 	bool "Disable branch prediction"
-	depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7
+	depends on CPU_ARM1020 || CPU_V6 || CPU_MOHAWK || CPU_XSC3 || CPU_V7
 	help
 	  Say Y here to disable branch prediction.  If unsure, say N.
 
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 480f78a3611..64149d9e55a 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_CPU_SA110)		+= proc-sa110.o
 obj-$(CONFIG_CPU_SA1100)	+= proc-sa1100.o
 obj-$(CONFIG_CPU_XSCALE)	+= proc-xscale.o
 obj-$(CONFIG_CPU_XSC3)		+= proc-xsc3.o
+obj-$(CONFIG_CPU_MOHAWK)	+= proc-mohawk.o
 obj-$(CONFIG_CPU_FEROCEON)	+= proc-feroceon.o
 obj-$(CONFIG_CPU_V6)		+= proc-v6.o
 obj-$(CONFIG_CPU_V7)		+= proc-v7.o
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
new file mode 100644
index 00000000000..540f5078496
--- /dev/null
+++ b/arch/arm/mm/proc-mohawk.S
@@ -0,0 +1,416 @@
+/*
+ *  linux/arch/arm/mm/proc-mohawk.S: MMU functions for Marvell PJ1 core
+ *
+ *  PJ1 (codename Mohawk) is a hybrid of the xscale3 and Marvell's own core.
+ *
+ *  Heavily based on proc-arm926.S and proc-xsc3.S
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be flushed.  If the
+ * area is larger than this, then we flush the whole cache.
+ */
+#define CACHE_DLIMIT	32768
+
+/*
+ * The cache line size of the L1 D cache.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * cpu_mohawk_proc_init()
+ */
+ENTRY(cpu_mohawk_proc_init)
+	mov	pc, lr
+
+/*
+ * cpu_mohawk_proc_fin()
+ */
+ENTRY(cpu_mohawk_proc_fin)
+	stmfd	sp!, {lr}
+	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	msr	cpsr_c, ip
+	bl	mohawk_flush_kern_cache_all
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1800			@ ...iz...........
+	bic	r0, r0, #0x0006			@ .............ca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ldmfd	sp!, {pc}
+
+/*
+ * cpu_mohawk_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ *
+ * (same as arm926)
+ */
+	.align	5
+ENTRY(cpu_mohawk_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x0007			@ .............cam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	mov	pc, r0
+
+/*
+ * cpu_mohawk_do_idle()
+ *
+ * Called with IRQs disabled
+ */
+	.align	5
+ENTRY(cpu_mohawk_do_idle)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mcr	p15, 0, r0, c7, c0, 4		@ wait for interrupt
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular
+ *	address space.
+ */
+ENTRY(mohawk_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(mohawk_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+	mcr	p15, 0, ip, c7, c14, 0		@ clean & invalidate all D cache
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c10, 0		@ drain write buffer
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Clean and invalidate a range of cache entries in the
+ *	specified address range.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags describing address space
+ *
+ * (same as arm926)
+ */
+ENTRY(mohawk_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bgt	__flush_whole_cache
+1:	tst	r2, #VM_EXEC
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(mohawk_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as arm926)
+ */
+ENTRY(mohawk_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- page aligned address
+ */
+ENTRY(mohawk_flush_kern_dcache_page)
+	add	r1, r0, #PAGE_SZ
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(mohawk_dma_inv_range)
+	tst	r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(mohawk_dma_clean_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(mohawk_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:
+	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+ENTRY(mohawk_cache_fns)
+	.long	mohawk_flush_kern_cache_all
+	.long	mohawk_flush_user_cache_all
+	.long	mohawk_flush_user_cache_range
+	.long	mohawk_coherent_kern_range
+	.long	mohawk_coherent_user_range
+	.long	mohawk_flush_kern_dcache_page
+	.long	mohawk_dma_inv_range
+	.long	mohawk_dma_clean_range
+	.long	mohawk_dma_flush_range
+
+ENTRY(cpu_mohawk_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ * cpu_mohawk_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_mohawk_switch_mm)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c14, 0		@ clean & invalidate all D cache
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	orr	r0, r0, #0x18			@ cache the page table in L2
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mov	pc, lr
+
+/*
+ * cpu_mohawk_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_mohawk_set_pte_ext)
+	armv3_set_pte_ext
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+	__INIT
+
+	.type	__mohawk_setup, #function
+__mohawk_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs
+	orr	r4, r4, #0x18			@ cache the page table in L2
+	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
+
+	mov	r0, #0				@ don't allow CP access
+	mcr	p15, 0, r0, c15, c1, 0		@ write CP access register
+
+	adr	r5, mohawk_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+	mov	pc, lr
+
+	.size	__mohawk_setup, . - __mohawk_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .011 1001 ..00 0101
+	 *
+	 */
+	.type	mohawk_crval, #object
+mohawk_crval:
+	crval	clear=0x00007f3f, mmuset=0x00003905, ucset=0x00001134
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+	.type	mohawk_processor_functions, #object
+mohawk_processor_functions:
+	.word	v5t_early_abort
+	.word	pabort_noifar
+	.word	cpu_mohawk_proc_init
+	.word	cpu_mohawk_proc_fin
+	.word	cpu_mohawk_reset
+	.word	cpu_mohawk_do_idle
+	.word	cpu_mohawk_dcache_clean_area
+	.word	cpu_mohawk_switch_mm
+	.word	cpu_mohawk_set_pte_ext
+	.size	mohawk_processor_functions, . - mohawk_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv5te"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v5"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_mohawk_name, #object
+cpu_mohawk_name:
+	.asciz	"Marvell 88SV331x"
+	.size	cpu_mohawk_name, . - cpu_mohawk_name
+
+	.align
+
+	.section ".proc.info.init", #alloc, #execinstr
+
+	.type	__88sv331x_proc_info,#object
+__88sv331x_proc_info:
+	.long	0x56158000			@ Marvell 88SV331x (MOHAWK)
+	.long	0xfffff000
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__mohawk_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	cpu_mohawk_name
+	.long	mohawk_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	mohawk_cache_fns
+	.size	__88sv331x_proc_info, . - __88sv331x_proc_info
-- 
cgit v1.2.3


From 28853ac8fe5221de74a14f1182d7b2b383dfd85c Mon Sep 17 00:00:00 2001
From: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
Date: Wed, 25 Mar 2009 13:10:01 +0200
Subject: ARM: Add support for FA526 v2

Adds support for Faraday FA526 core. This core is used at least by:
Cortina Systems Gemini and Centroid family
Cavium Networks ECONA family
Grain Media GM8120
Pixelplus ImageARM
Prolific PL-1029
Faraday IP evaluation boards

v2:
- move TLB_BTB to separate patch
- update copyrights

Signed-off-by: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
---
 arch/arm/mm/Kconfig       |  35 ++++++-
 arch/arm/mm/Makefile      |   4 +
 arch/arm/mm/cache-fa.S    | 220 ++++++++++++++++++++++++++++++++++++++++
 arch/arm/mm/copypage-fa.c |  86 ++++++++++++++++
 arch/arm/mm/proc-fa526.S  | 248 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/arm/mm/tlb-fa.S      |  75 ++++++++++++++
 6 files changed, 666 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm/mm/cache-fa.S
 create mode 100644 arch/arm/mm/copypage-fa.c
 create mode 100644 arch/arm/mm/proc-fa526.S
 create mode 100644 arch/arm/mm/tlb-fa.S

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index d490f3773c0..bc3331863d9 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -186,6 +186,24 @@ config CPU_ARM926T
 	  Say Y if you want support for the ARM926T processor.
 	  Otherwise, say N.
 
+# FA526
+config CPU_FA526
+	bool
+	select CPU_32v4
+	select CPU_ABRT_EV4
+	select CPU_PABRT_NOIFAR
+	select CPU_CACHE_VIVT
+	select CPU_CP15_MMU
+	select CPU_CACHE_FA
+	select CPU_COPY_FA if MMU
+	select CPU_TLB_FA if MMU
+	help
+	  The FA526 is a version of the ARMv4 compatible processor with
+	  Branch Target Buffer, Unified TLB and cache line size 16.
+
+	  Say Y if you want support for the FA526 processor.
+	  Otherwise, say N.
+
 # ARM940T
 config CPU_ARM940T
 	bool "Support ARM940T processor" if ARCH_INTEGRATOR
@@ -484,6 +502,9 @@ config CPU_CACHE_VIVT
 config CPU_CACHE_VIPT
 	bool
 
+config CPU_CACHE_FA
+	bool
+
 if MMU
 # The copy-page model
 config CPU_COPY_V3
@@ -498,6 +519,9 @@ config CPU_COPY_V4WB
 config CPU_COPY_FEROCEON
 	bool
 
+config CPU_COPY_FA
+	bool
+
 config CPU_COPY_V6
 	bool
 
@@ -528,6 +552,13 @@ config CPU_TLB_FEROCEON
 	help
 	  Feroceon TLB (v4wbi with non-outer-cachable page table walks).
 
+config CPU_TLB_FA
+	bool
+	help
+	  Faraday ARM FA526 architecture, unified TLB with writeback cache
+	  and invalidate instruction cache entry. Branch target buffer is
+	  also supported.
+
 config CPU_TLB_V6
 	bool
 
@@ -638,7 +669,7 @@ config CPU_DCACHE_SIZE
 
 config CPU_DCACHE_WRITETHROUGH
 	bool "Force write through D-cache"
-	depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020) && !CPU_DCACHE_DISABLE
+	depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_FA526) && !CPU_DCACHE_DISABLE
 	default y if CPU_ARM925T
 	help
 	  Say Y here to use the data cache in writethrough mode. Unless you
@@ -653,7 +684,7 @@ config CPU_CACHE_ROUND_ROBIN
 
 config CPU_BPREDICT_DISABLE
 	bool "Disable branch prediction"
-	depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7
+	depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7 || CPU_FA526
 	help
 	  Say Y here to disable branch prediction.  If unsure, say N.
 
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 480f78a3611..40f941c2245 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_CPU_CACHE_V4WT)	+= cache-v4wt.o
 obj-$(CONFIG_CPU_CACHE_V4WB)	+= cache-v4wb.o
 obj-$(CONFIG_CPU_CACHE_V6)	+= cache-v6.o
 obj-$(CONFIG_CPU_CACHE_V7)	+= cache-v7.o
+obj-$(CONFIG_CPU_CACHE_FA)	+= cache-fa.o
 
 obj-$(CONFIG_CPU_COPY_V3)	+= copypage-v3.o
 obj-$(CONFIG_CPU_COPY_V4WT)	+= copypage-v4wt.o
@@ -41,6 +42,7 @@ obj-$(CONFIG_CPU_COPY_V6)	+= copypage-v6.o context.o
 obj-$(CONFIG_CPU_SA1100)	+= copypage-v4mc.o
 obj-$(CONFIG_CPU_XSCALE)	+= copypage-xscale.o
 obj-$(CONFIG_CPU_XSC3)		+= copypage-xsc3.o
+obj-$(CONFIG_CPU_COPY_FA)	+= copypage-fa.o
 
 obj-$(CONFIG_CPU_TLB_V3)	+= tlb-v3.o
 obj-$(CONFIG_CPU_TLB_V4WT)	+= tlb-v4.o
@@ -49,6 +51,7 @@ obj-$(CONFIG_CPU_TLB_V4WBI)	+= tlb-v4wbi.o
 obj-$(CONFIG_CPU_TLB_FEROCEON)	+= tlb-v4wbi.o	# reuse v4wbi TLB functions
 obj-$(CONFIG_CPU_TLB_V6)	+= tlb-v6.o
 obj-$(CONFIG_CPU_TLB_V7)	+= tlb-v7.o
+obj-$(CONFIG_CPU_TLB_FA)	+= tlb-fa.o
 
 obj-$(CONFIG_CPU_ARM610)	+= proc-arm6_7.o
 obj-$(CONFIG_CPU_ARM710)	+= proc-arm6_7.o
@@ -62,6 +65,7 @@ obj-$(CONFIG_CPU_ARM925T)	+= proc-arm925.o
 obj-$(CONFIG_CPU_ARM926T)	+= proc-arm926.o
 obj-$(CONFIG_CPU_ARM940T)	+= proc-arm940.o
 obj-$(CONFIG_CPU_ARM946E)	+= proc-arm946.o
+obj-$(CONFIG_CPU_FA526)		+= proc-fa526.o
 obj-$(CONFIG_CPU_ARM1020)	+= proc-arm1020.o
 obj-$(CONFIG_CPU_ARM1020E)	+= proc-arm1020e.o
 obj-$(CONFIG_CPU_ARM1022)	+= proc-arm1022.o
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
new file mode 100644
index 00000000000..b63a8f7b95c
--- /dev/null
+++ b/arch/arm/mm/cache-fa.S
@@ -0,0 +1,220 @@
+/*
+ *  linux/arch/arm/mm/cache-fa.S
+ *
+ *  Copyright (C) 2005 Faraday Corp.
+ *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on cache-v4wb.S:
+ *  Copyright (C) 1997-2002 Russell king
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Processors: FA520 FA526 FA626	
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	16
+
+/*
+ * The total size of the data cache.
+ */
+#ifdef CONFIG_ARCH_GEMINI
+#define CACHE_DSIZE	8192
+#else
+#define CACHE_DSIZE	16384 
+#endif 
+
+/* FIXME: put optimal value here. Current one is just estimation */
+#define CACHE_DLIMIT	(CACHE_DSIZE * 2)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(fa_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(fa_flush_kern_cache_all)
+	mov	ip, #0
+	mov	r2, #VM_EXEC
+__flush_whole_cache:
+	mcr	p15, 0, ip, c7, c14, 0		@ clean/invalidate D cache
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain write buffer
+	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start - start address (inclusive, page aligned)
+ *	- end	- end address (exclusive, page aligned)
+ *	- flags	- vma_area_struct flags describing address space
+ */
+ENTRY(fa_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT		@ total size >= limit?
+	bhs	__flush_whole_cache		@ flush whole D cache
+
+1:	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I line
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
+	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
+	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(fa_coherent_kern_range)
+	/* fall through */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(fa_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(kaddr)
+ *
+ *	Ensure that the data held in the page kaddr is written back
+ *	to the page in question.
+ *
+ *	- kaddr   - kernel address (guaranteed to be page aligned)
+ */
+ENTRY(fa_flush_kern_dcache_page)
+	add	r1, r0, #PAGE_SZ
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(fa_dma_inv_range)
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c14, 1		@ clean & invalidate D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	bic	r1, r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean (write back) the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(fa_dma_clean_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0	
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(fa_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0	
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+	__INITDATA
+
+	.type	fa_cache_fns, #object
+ENTRY(fa_cache_fns)
+	.long	fa_flush_kern_cache_all
+	.long	fa_flush_user_cache_all
+	.long	fa_flush_user_cache_range
+	.long	fa_coherent_kern_range
+	.long	fa_coherent_user_range
+	.long	fa_flush_kern_dcache_page
+	.long	fa_dma_inv_range
+	.long	fa_dma_clean_range
+	.long	fa_dma_flush_range
+	.size	fa_cache_fns, . - fa_cache_fns
diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c
new file mode 100644
index 00000000000..b2a6008b011
--- /dev/null
+++ b/arch/arm/mm/copypage-fa.c
@@ -0,0 +1,86 @@
+/*
+ *  linux/arch/arm/lib/copypage-fa.S
+ *
+ *  Copyright (C) 2005 Faraday Corp.
+ *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on copypage-v4wb.S:
+ *  Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+/*
+ * Faraday optimised copy_user_page
+ */
+static void __naked
+fa_copy_user_page(void *kto, const void *kfrom)
+{
+	asm("\
+	stmfd	sp!, {r4, lr}			@ 2\n\
+	mov	r2, %0				@ 1\n\
+1:	ldmia	r1!, {r3, r4, ip, lr}		@ 4\n\
+	stmia	r0, {r3, r4, ip, lr}		@ 4\n\
+	mcr	p15, 0, r0, c7, c14, 1		@ 1   clean and invalidate D line\n\
+	add	r0, r0, #16			@ 1\n\
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4\n\
+	stmia	r0, {r3, r4, ip, lr}		@ 4\n\
+	mcr	p15, 0, r0, c7, c14, 1		@ 1   clean and invalidate D line\n\
+	add	r0, r0, #16			@ 1\n\
+	subs	r2, r2, #1			@ 1\n\
+	bne	1b				@ 1\n\
+	mcr	p15, 0, r2, c7, c10, 4		@ 1   drain WB\n\
+	ldmfd	sp!, {r4, pc}			@ 3"
+	:
+	: "I" (PAGE_SIZE / 32));
+}
+
+void fa_copy_user_highpage(struct page *to, struct page *from,
+	unsigned long vaddr)
+{
+	void *kto, *kfrom;
+
+	kto = kmap_atomic(to, KM_USER0);
+	kfrom = kmap_atomic(from, KM_USER1);
+	fa_copy_user_page(kto, kfrom);
+	kunmap_atomic(kfrom, KM_USER1);
+	kunmap_atomic(kto, KM_USER0);
+}
+
+/*
+ * Faraday optimised clear_user_page
+ *
+ * Same story as above.
+ */
+void fa_clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	void *ptr, *kaddr = kmap_atomic(page, KM_USER0);
+	asm volatile("\
+	mov	r1, %2				@ 1\n\
+	mov	r2, #0				@ 1\n\
+	mov	r3, #0				@ 1\n\
+	mov	ip, #0				@ 1\n\
+	mov	lr, #0				@ 1\n\
+1:	stmia	%0, {r2, r3, ip, lr}		@ 4\n\
+	mcr	p15, 0, %0, c7, c14, 1		@ 1   clean and invalidate D line\n\
+	add	%0, %0, #16			@ 1\n\
+	stmia	%0, {r2, r3, ip, lr}		@ 4\n\
+	mcr	p15, 0, %0, c7, c14, 1		@ 1   clean and invalidate D line\n\
+	add	%0, %0, #16			@ 1\n\
+	subs	r1, r1, #1			@ 1\n\
+	bne	1b				@ 1\n\
+	mcr	p15, 0, r1, c7, c10, 4		@ 1   drain WB"
+	: "=r" (ptr)
+	: "0" (kaddr), "I" (PAGE_SIZE / 32)
+	: "r1", "r2", "r3", "ip", "lr");
+	kunmap_atomic(kaddr, KM_USER0);
+}
+
+struct cpu_user_fns fa_user_fns __initdata = {
+	.cpu_clear_user_highpage = fa_clear_user_highpage,
+	.cpu_copy_user_highpage	= fa_copy_user_highpage,
+};
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
new file mode 100644
index 00000000000..08b8a955d5d
--- /dev/null
+++ b/arch/arm/mm/proc-fa526.S
@@ -0,0 +1,248 @@
+/*
+ *  linux/arch/arm/mm/proc-fa526.S: MMU functions for FA526
+ *
+ *  Written by : Luke Lee
+ *  Copyright (C) 2005 Faraday Corp.
+ *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the fa526.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+#include "proc-macros.S"
+
+#define CACHE_DLINESIZE	16
+
+	.text
+/*
+ * cpu_fa526_proc_init()
+ */
+ENTRY(cpu_fa526_proc_init)
+	mov	pc, lr
+
+/*
+ * cpu_fa526_proc_fin()
+ */
+ENTRY(cpu_fa526_proc_fin)
+	stmfd	sp!, {lr}
+	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	msr	cpsr_c, ip
+	bl	fa_flush_kern_cache_all
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	nop
+	nop
+	ldmfd	sp!, {pc}
+
+/*
+ * cpu_fa526_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	4
+ENTRY(cpu_fa526_reset)
+/* TODO: Use CP8 if possible... */
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	bic	ip, ip, #0x0800			@ BTB off
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	nop
+	nop
+	mov	pc, r0
+
+/*
+ * cpu_fa526_do_idle()
+ */
+	.align	4
+ENTRY(cpu_fa526_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	mov	pc, lr
+
+
+ENTRY(cpu_fa526_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_fa526_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	4
+ENTRY(cpu_fa526_switch_mm)
+#ifdef CONFIG_MMU
+	mov	ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+	mcr	p15, 0, ip, c7, c14, 0		@ clean and invalidate whole D cache
+#endif
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c5, 6		@ invalidate BTB since mm changed
+	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
+	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate UTLB
+#endif
+	mov	pc, lr
+
+/*
+ * cpu_fa526_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	4
+ENTRY(cpu_fa526_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+#endif
+	mov	pc, lr
+
+	__INIT
+
+	.type	__fa526_setup, #function
+__fa526_setup:
+	/* On return of this routine, r0 must carry correct flags for CFG register */
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+#endif
+	mcr	p15, 0, r0, c7, c5, 5		@ invalidate IScratchpad RAM
+
+	mov	r0, #1
+	mcr	p15, 0, r0, c1, c1, 0		@ turn-on ECR
+
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB All
+	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
+	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
+
+	mov	r0, #0x1f			@ Domains 0, 1 = manager, 2 = client
+	mcr	p15, 0, r0, c3, c0		@ load domain access register
+
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	ldr	r5, fa526_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, fa526_cr1_set
+	orr	r0, r0, r5
+	mov	pc, lr
+	.size	__fa526_setup, . - __fa526_setup
+
+	/*
+	 * .RVI ZFRS BLDP WCAM
+	 * ..11 1001 .111 1101
+	 *
+	 */
+	.type	fa526_cr1_clear, #object
+	.type	fa526_cr1_set, #object
+fa526_cr1_clear:
+	.word	0x3f3f
+fa526_cr1_set:
+	.word	0x397D
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+	.type	fa526_processor_functions, #object
+fa526_processor_functions:
+	.word	v4_early_abort
+	.word	pabort_noifar
+	.word	cpu_fa526_proc_init
+	.word	cpu_fa526_proc_fin
+	.word	cpu_fa526_reset
+	.word   cpu_fa526_do_idle
+	.word	cpu_fa526_dcache_clean_area
+	.word	cpu_fa526_switch_mm
+	.word	cpu_fa526_set_pte_ext
+	.size	fa526_processor_functions, . - fa526_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv4"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v4"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_fa526_name, #object
+cpu_fa526_name:
+	.asciz	"FA526"
+	.size	cpu_fa526_name, . - cpu_fa526_name
+
+	.align
+
+	.section ".proc.info.init", #alloc, #execinstr
+
+	.type	__fa526_proc_info,#object
+__fa526_proc_info:
+	.long	0x66015261
+	.long	0xff01fff1
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__fa526_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF
+	.long	cpu_fa526_name
+	.long	fa526_processor_functions
+	.long	fa_tlb_fns
+	.long	fa_user_fns
+	.long	fa_cache_fns
+	.size	__fa526_proc_info, . - __fa526_proc_info
diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S
new file mode 100644
index 00000000000..9694f1f6f48
--- /dev/null
+++ b/arch/arm/mm/tlb-fa.S
@@ -0,0 +1,75 @@
+/*
+ *  linux/arch/arm/mm/tlb-fa.S
+ *
+ *  Copyright (C) 2005 Faraday Corp.
+ *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on tlb-v4wbi.S:
+ *  Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARM architecture version 4, Faraday variation.
+ *  This assume an unified TLBs, with a write buffer, and branch target buffer (BTB)
+ *
+ *  Processors: FA520 FA526 FA626
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/asm-offsets.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+
+/*
+ *	flush_user_tlb_range(start, end, mm)
+ *
+ *	Invalidate a range of TLB entries in the specified address space.
+ *
+ *	- start - range start address
+ *	- end   - range end address
+ *	- mm    - mm_struct describing address space
+ */
+	.align	4
+ENTRY(fa_flush_user_tlb_range)
+	vma_vm_mm ip, r2
+	act_mm	r3				@ get current->active_mm
+	eors	r3, ip, r3			@ == mm ?
+	movne	pc, lr				@ no, we dont do anything
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	mcr	p15, 0, r0, c8, c7, 1		@ invalidate UTLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r3, c7, c5, 6		@ invalidate BTB
+	mcr	p15, 0, r3, c7, c10, 4		@ data write barrier
+	mov	pc, lr
+
+
+ENTRY(fa_flush_kern_tlb_range)
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	mcr	p15, 0, r0, c8, c7, 1		@ invalidate UTLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r3, c7, c5, 6		@ invalidate BTB
+	mcr	p15, 0, r3, c7, c10, 4		@ data write barrier
+	mcr	p15, 0, r3, c7, c5, 4		@ prefetch flush
+	mov	pc, lr
+
+	__INITDATA
+
+	.type	fa_tlb_fns, #object
+ENTRY(fa_tlb_fns)
+	.long	fa_flush_user_tlb_range
+	.long	fa_flush_kern_tlb_range
+	.long	fa_tlb_flags
+	.size	fa_tlb_fns, . - fa_tlb_fns
-- 
cgit v1.2.3


From f0bba9f934517533acbda7329be93f55d5a01c03 Mon Sep 17 00:00:00 2001
From: Mikael Pettersson <mikpe@it.uu.se>
Date: Sat, 28 Mar 2009 19:18:05 +0100
Subject: [ARM] 5435/1: fix compile warning in sanity_check_meminfo()

Compiling recent 2.6.29-rc kernels for ARM gives me the following warning:

arch/arm/mm/mmu.c: In function 'sanity_check_meminfo':
arch/arm/mm/mmu.c:697: warning: comparison between pointer and integer

This is because commit 3fd9825c42c784a59b3b90bdf073f49d4bb42a8d
"[ARM] 5402/1: fix a case of wrap-around in sanity_check_meminfo()"
in 2.6.29-rc5-git4 added a comparison of a pointer with PAGE_OFFSET,
which is an integer.

Fixed by casting PAGE_OFFSET to void *.

Signed-off-by: Mikael Pettersson <mikpe@it.uu.se>
Acked-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index d4d082c5c2d..5a89e57e342 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -694,7 +694,7 @@ static void __init sanity_check_meminfo(void)
 		 * the vmalloc area.
 		 */
 		if (__va(bank->start) >= VMALLOC_MIN ||
-		    __va(bank->start) < PAGE_OFFSET) {
+		    __va(bank->start) < (void *)PAGE_OFFSET) {
 			printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx "
 			       "(vmalloc region overlap).\n",
 			       bank->start, bank->start + bank->size - 1);
-- 
cgit v1.2.3