aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/mm/init.c2
-rw-r--r--arch/arm/mach-pxa/mainstone.c4
-rw-r--r--arch/arm/mm/consistent.c4
-rw-r--r--arch/arm/mm/init.c2
-rw-r--r--arch/arm26/mm/init.c2
-rw-r--r--arch/cris/mm/init.c2
-rw-r--r--arch/frv/kernel/frv_ksyms.c1
-rw-r--r--arch/frv/mm/dma-alloc.c4
-rw-r--r--arch/frv/mm/init.c6
-rw-r--r--arch/h8300/kernel/h8300_ksyms.c1
-rw-r--r--arch/h8300/mm/init.c4
-rw-r--r--arch/i386/kernel/efi.c2
-rw-r--r--arch/i386/kernel/smp.c28
-rw-r--r--arch/i386/kernel/sys_i386.c25
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c2
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c2
-rw-r--r--arch/i386/mm/hugetlbpage.c12
-rw-r--r--arch/i386/mm/init.c6
-rw-r--r--arch/i386/mm/pageattr.c20
-rw-r--r--arch/ia64/Kconfig19
-rw-r--r--arch/ia64/configs/tiger_defconfig2
-rw-r--r--arch/ia64/kernel/acpi.c14
-rw-r--r--arch/ia64/kernel/entry.S14
-rw-r--r--arch/ia64/kernel/iosapic.c6
-rw-r--r--arch/ia64/kernel/irq.c13
-rw-r--r--arch/ia64/kernel/mca.c90
-rw-r--r--arch/ia64/kernel/perfmon.c5
-rw-r--r--arch/ia64/kernel/signal.c101
-rw-r--r--arch/ia64/kernel/smpboot.c114
-rw-r--r--arch/ia64/kernel/time.c9
-rw-r--r--arch/ia64/kernel/topology.c2
-rw-r--r--arch/ia64/mm/contig.c4
-rw-r--r--arch/ia64/mm/discontig.c9
-rw-r--r--arch/ia64/mm/hugetlbpage.c5
-rw-r--r--arch/ia64/mm/init.c6
-rw-r--r--arch/ia64/sn/kernel/Makefile3
-rw-r--r--arch/ia64/sn/kernel/pio_phys.S71
-rw-r--r--arch/ia64/sn/kernel/setup.c6
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c21
-rw-r--r--arch/ia64/sn/kernel/xpc_channel.c102
-rw-r--r--arch/ia64/sn/kernel/xpc_main.c1
-rw-r--r--arch/ia64/sn/kernel/xpc_partition.c28
-rw-r--r--arch/ia64/sn/pci/tioce_provider.c326
-rw-r--r--arch/m32r/mm/init.c4
-rw-r--r--arch/m68k/mm/init.c2
-rw-r--r--arch/m68k/mm/memory.c2
-rw-r--r--arch/m68k/mm/motorola.c2
-rw-r--r--arch/m68knommu/kernel/m68k_ksyms.c1
-rw-r--r--arch/m68knommu/mm/init.c4
-rw-r--r--arch/mips/arc/memory.c2
-rw-r--r--arch/mips/dec/prom/memory.c2
-rw-r--r--arch/mips/mips-boards/generic/memory.c2
-rw-r--r--arch/mips/mips-boards/sim/sim_mem.c2
-rw-r--r--arch/mips/mm/init.c11
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c2
-rw-r--r--arch/parisc/mm/init.c4
-rw-r--r--arch/powerpc/mm/hugetlbpage.c15
-rw-r--r--arch/powerpc/mm/init_32.c4
-rw-r--r--arch/powerpc/mm/init_64.c4
-rw-r--r--arch/powerpc/mm/mem.c6
-rw-r--r--arch/powerpc/platforms/cell/setup.c2
-rw-r--r--arch/ppc/kernel/dma-mapping.c4
-rw-r--r--arch/ppc/mm/init.c6
-rw-r--r--arch/s390/mm/init.c4
-rw-r--r--arch/sh/mm/consistent.c3
-rw-r--r--arch/sh/mm/hugetlbpage.c12
-rw-r--r--arch/sh/mm/init.c4
-rw-r--r--arch/sh64/mm/hugetlbpage.c12
-rw-r--r--arch/sh64/mm/init.c4
-rw-r--r--arch/sparc/kernel/sun4d_smp.c6
-rw-r--r--arch/sparc/kernel/sun4m_smp.c6
-rw-r--r--arch/sparc/mm/generic.c1
-rw-r--r--arch/sparc/mm/init.c6
-rw-r--r--arch/sparc/mm/loadmmu.c2
-rw-r--r--arch/sparc/mm/srmmu.c9
-rw-r--r--arch/sparc/mm/sun4c.c15
-rw-r--r--arch/sparc64/Kconfig4
-rw-r--r--arch/sparc64/kernel/pci.c2
-rw-r--r--arch/sparc64/kernel/sun4v_tlb_miss.S39
-rw-r--r--arch/sparc64/kernel/traps.c21
-rw-r--r--arch/sparc64/kernel/tsb.S210
-rw-r--r--arch/sparc64/mm/fault.c15
-rw-r--r--arch/sparc64/mm/generic.c1
-rw-r--r--arch/sparc64/mm/hugetlbpage.c40
-rw-r--r--arch/sparc64/mm/init.c25
-rw-r--r--arch/sparc64/mm/tsb.c234
-rw-r--r--arch/um/kernel/mem.c4
-rw-r--r--arch/um/kernel/physmem.c2
-rw-r--r--arch/x86_64/kernel/time.c2
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c1
-rw-r--r--arch/x86_64/mm/init.c6
-rw-r--r--arch/x86_64/mm/pageattr.c63
-rw-r--r--arch/xtensa/mm/init.c2
-rw-r--r--arch/xtensa/mm/pgtable.c24
94 files changed, 1307 insertions, 653 deletions
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 486d7945583..544ac5dc09e 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -357,7 +357,7 @@ free_reserved_mem(void *start, void *end)
void *__start = start;
for (; __start < end; __start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(__start));
- set_page_count(virt_to_page(__start), 1);
+ init_page_count(virt_to_page(__start));
free_page((long)__start);
totalram_pages++;
}
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index d5bda60209e..98356f81000 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -157,14 +157,14 @@ static struct platform_device smc91x_device = {
.resource = smc91x_resources,
};
-static int mst_audio_startup(snd_pcm_substream_t *substream, void *priv)
+static int mst_audio_startup(struct snd_pcm_substream *substream, void *priv)
{
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
MST_MSCWR2 &= ~MST_MSCWR2_AC97_SPKROFF;
return 0;
}
-static void mst_audio_shutdown(snd_pcm_substream_t *substream, void *priv)
+static void mst_audio_shutdown(struct snd_pcm_substream *substream, void *priv)
{
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
MST_MSCWR2 |= MST_MSCWR2_AC97_SPKROFF;
diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c
index c2ee18d2075..8a1bfcd5008 100644
--- a/arch/arm/mm/consistent.c
+++ b/arch/arm/mm/consistent.c
@@ -223,6 +223,8 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
pte = consistent_pte[idx] + off;
c->vm_pages = page;
+ split_page(page, order);
+
/*
* Set the "dma handle"
*/
@@ -231,7 +233,6 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
do {
BUG_ON(!pte_none(*pte));
- set_page_count(page, 1);
/*
* x86 does not mark the pages reserved...
*/
@@ -250,7 +251,6 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
* Free the otherwise unused pages.
*/
while (page < end) {
- set_page_count(page, 1);
__free_page(page);
page++;
}
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index efda9710ee6..88279124317 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -530,7 +530,7 @@ static inline void free_area(unsigned long addr, unsigned long end, char *s)
for (; addr < end; addr += PAGE_SIZE) {
struct page *page = virt_to_page(addr);
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
free_page(addr);
totalram_pages++;
}
diff --git a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c
index 1f09a9d0fb8..e3ecaa45374 100644
--- a/arch/arm26/mm/init.c
+++ b/arch/arm26/mm/init.c
@@ -324,7 +324,7 @@ static inline void free_area(unsigned long addr, unsigned long end, char *s)
for (; addr < end; addr += PAGE_SIZE) {
struct page *page = virt_to_page(addr);
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
free_page(addr);
totalram_pages++;
}
diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c
index 31a0018b525..b7842ff213a 100644
--- a/arch/cris/mm/init.c
+++ b/arch/cris/mm/init.c
@@ -216,7 +216,7 @@ free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
diff --git a/arch/frv/kernel/frv_ksyms.c b/arch/frv/kernel/frv_ksyms.c
index 0f1c6cbc4f5..aa6b7d0a210 100644
--- a/arch/frv/kernel/frv_ksyms.c
+++ b/arch/frv/kernel/frv_ksyms.c
@@ -27,6 +27,7 @@ EXPORT_SYMBOL(__ioremap);
EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(strstr);
EXPORT_SYMBOL(strchr);
diff --git a/arch/frv/mm/dma-alloc.c b/arch/frv/mm/dma-alloc.c
index 342823aad75..636b2f8b5d9 100644
--- a/arch/frv/mm/dma-alloc.c
+++ b/arch/frv/mm/dma-alloc.c
@@ -115,9 +115,7 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle)
*/
if (order > 0) {
struct page *rpage = virt_to_page(page);
-
- for (i = 1; i < (1 << order); i++)
- set_page_count(rpage + i, 1);
+ split_page(rpage, order);
}
err = 0;
diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c
index 765088ea8a5..8899aa1a4f0 100644
--- a/arch/frv/mm/init.c
+++ b/arch/frv/mm/init.c
@@ -169,7 +169,7 @@ void __init mem_init(void)
struct page *page = &mem_map[pfn];
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalram_pages++;
}
@@ -210,7 +210,7 @@ void __init free_initmem(void)
/* next to check that the page we free is not a partial page */
for (addr = start; addr < end; addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
@@ -230,7 +230,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
int pages = 0;
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
pages++;
diff --git a/arch/h8300/kernel/h8300_ksyms.c b/arch/h8300/kernel/h8300_ksyms.c
index 5cc76efaf7a..69d6ad32d56 100644
--- a/arch/h8300/kernel/h8300_ksyms.c
+++ b/arch/h8300/kernel/h8300_ksyms.c
@@ -25,6 +25,7 @@ extern char h8300_debug_device[];
/* platform dependent support */
EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(strstr);
EXPORT_SYMBOL(strchr);
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index 1e0929ddc8c..09efc4b1f03 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -196,7 +196,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
int pages = 0;
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
pages++;
@@ -219,7 +219,7 @@ free_initmem()
/* next to check that the page we free is not a partial page */
for (; addr + PAGE_SIZE < (unsigned long)(&__init_end); addr +=PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index c9cad7ba0d2..aeabb419686 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -115,7 +115,7 @@ static void efi_call_phys_epilog(void)
unsigned long cr4;
struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0);
- cpu_gdt_descr->address = __va(cpu_gdt_descr->address);
+ cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address);
load_gdt(cpu_gdt_descr);
cr4 = read_cr4();
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 218d725a5a1..d134e9643a5 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -504,27 +504,23 @@ void unlock_ipi_call_lock(void)
spin_unlock_irq(&call_lock);
}
-static struct call_data_struct * call_data;
-
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
- int wait)
-/*
- * [SUMMARY] Run a function on all other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
+static struct call_data_struct *call_data;
+
+/**
+ * smp_call_function(): Run a function on all other CPUs.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @nonatomic: currently unused.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code. Does not return until
* remote CPUs are nearly ready to execute <<func>> or are or have executed.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+ int wait)
{
struct call_data_struct data;
int cpus;
diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c
index a4a61976ecb..8fdb1fb17a5 100644
--- a/arch/i386/kernel/sys_i386.c
+++ b/arch/i386/kernel/sys_i386.c
@@ -40,14 +40,13 @@ asmlinkage int sys_pipe(unsigned long __user * fildes)
return error;
}
-/* common code for old and new mmaps */
-static inline long do_mmap2(
- unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff)
{
int error = -EBADF;
- struct file * file = NULL;
+ struct file *file = NULL;
+ struct mm_struct *mm = current->mm;
flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
if (!(flags & MAP_ANONYMOUS)) {
@@ -56,9 +55,9 @@ static inline long do_mmap2(
goto out;
}
- down_write(&current->mm->mmap_sem);
+ down_write(&mm->mmap_sem);
error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
- up_write(&current->mm->mmap_sem);
+ up_write(&mm->mmap_sem);
if (file)
fput(file);
@@ -66,13 +65,6 @@ out:
return error;
}
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
-{
- return do_mmap2(addr, len, prot, flags, fd, pgoff);
-}
-
/*
* Perform the select(nd, in, out, ex, tv) and mmap() system
* calls. Linux/i386 didn't use to be able to handle more than
@@ -101,7 +93,8 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
if (a.offset & ~PAGE_MASK)
goto out;
- err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
+ err = sys_mmap2(a.addr, a.len, a.prot, a.flags,
+ a.fd, a.offset >> PAGE_SHIFT);
out:
return err;
}
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
index be242723c33..17a6fe7166e 100644
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ b/arch/i386/kernel/timers/timer_hpet.c
@@ -46,7 +46,7 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-static unsigned long cyc2ns_scale;
+static unsigned long cyc2ns_scale __read_mostly;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
static inline void set_cyc2ns_scale(unsigned long cpu_khz)
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
index a7f5a2aceba..5e41ee29c8c 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -74,7 +74,7 @@ late_initcall(start_lost_tick_compensation);
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-static unsigned long cyc2ns_scale;
+static unsigned long cyc2ns_scale __read_mostly;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
static inline void set_cyc2ns_scale(unsigned long cpu_khz)
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index d524127c9af..a7d89158541 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -48,18 +48,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
return (pte_t *) pmd;
}
-/*
- * This function checks for proper alignment of input addr and len parameters.
- */
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
-{
- if (len & ~HPAGE_MASK)
- return -EINVAL;
- if (addr & ~HPAGE_MASK)
- return -EINVAL;
- return 0;
-}
-
#if 0 /* This is just for testing */
struct page *
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 2700f01994b..7ba55a6e2db 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -270,7 +270,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
static void __meminit free_new_highpage(struct page *page)
{
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalhigh_pages++;
}
@@ -727,7 +727,7 @@ void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
memset((void *)addr, 0xcc, PAGE_SIZE);
free_page(addr);
totalram_pages++;
@@ -766,7 +766,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index d0cadb33b54..92c3d9f0e73 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -51,6 +51,13 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
if (!base)
return NULL;
+ /*
+ * page_private is used to track the number of entries in
+ * the page table page that have non standard attributes.
+ */
+ SetPagePrivate(base);
+ page_private(base) = 0;
+
address = __pa(address);
addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
@@ -143,11 +150,12 @@ __change_page_attr(struct page *page, pgprot_t prot)
return -ENOMEM;
set_pmd_pte(kpte,address,mk_pte(split, ref_prot));
kpte_page = split;
- }
- get_page(kpte_page);
+ }
+ page_private(kpte_page)++;
} else if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
- __put_page(kpte_page);
+ BUG_ON(page_private(kpte_page) == 0);
+ page_private(kpte_page)--;
} else
BUG();
@@ -157,10 +165,8 @@ __change_page_attr(struct page *page, pgprot_t prot)
* replace it with a largepage.
*/
if (!PageReserved(kpte_page)) {
- /* memleak and potential failed 2M page regeneration */
- BUG_ON(!page_count(kpte_page));
-
- if (cpu_has_pse && (page_count(kpte_page) == 1)) {
+ if (cpu_has_pse && (page_private(kpte_page) == 0)) {
+ ClearPagePrivate(kpte_page);
list_add(&kpte_page->lru, &df_list);
revert_page(kpte_page, address);
}
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index a85ea9d37f0..ff7ae6b664e 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -271,6 +271,25 @@ config SCHED_SMT
Intel IA64 chips with MultiThreading at a cost of slightly increased
overhead in some places. If unsure say N here.
+config PERMIT_BSP_REMOVE
+ bool "Support removal of Bootstrap Processor"
+ depends on HOTPLUG_CPU
+ default n
+ ---help---
+ Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU
+ support.
+
+config FORCE_CPEI_RETARGET
+ bool "Force assumption that CPEI can be re-targetted"
+ depends on PERMIT_BSP_REMOVE
+ default n
+ ---help---
+ Say Y if you need to force the assumption that CPEI can be re-targetted to
+ any cpu in the system. This hint is available via ACPI 3.0 specifications.
+ Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP.
+ This option it useful to enable this feature on older BIOS's as well.
+ You can also enable this by using boot command line option force_cpei=1.
+
config PREEMPT
bool "Preemptible Kernel"
help
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
index 125568118b8..766bf495543 100644
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
@@ -116,6 +116,8 @@ CONFIG_FORCE_MAX_ZONEORDER=17
CONFIG_SMP=y
CONFIG_NR_CPUS=4
CONFIG_HOTPLUG_CPU=y
+CONFIG_PERMIT_BSP_REMOVE=y
+CONFIG_FORCE_CPEI_RETARGET=y
# CONFIG_SCHED_SMT is not set
# CONFIG_PREEMPT is not set
CONFIG_SELECT_MEMORY_MODEL=y
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index ecd44bdc839..4722ec51c70 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -284,19 +284,24 @@ acpi_parse_plat_int_src(acpi_table_entry_header * header,
return 0;
}
+#ifdef CONFIG_HOTPLUG_CPU
unsigned int can_cpei_retarget(void)
{
extern int cpe_vector;
+ extern unsigned int force_cpei_retarget;
/*
* Only if CPEI is supported and the override flag
* is present, otherwise return that its re-targettable
* if we are in polling mode.
*/
- if (cpe_vector > 0 && !acpi_cpei_override)
- return 0;
- else
- return 1;
+ if (cpe_vector > 0) {
+ if (acpi_cpei_override || force_cpei_retarget)
+ return 1;
+ else
+ return 0;
+ }
+ return 1;
}
unsigned int is_cpu_cpei_target(unsigned int cpu)
@@ -315,6 +320,7 @@ void set_cpei_target_cpu(unsigned int cpu)
{
acpi_cpei_phys_cpuid = cpu_physical_id(cpu);
}
+#endif
unsigned int get_cpei_target_cpu(void)
{
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 930fdfca6dd..0e3eda99e54 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1102,9 +1102,6 @@ skip_rbs_switch:
st8 [r2]=r8
st8 [r3]=r10
.work_pending:
- tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context?
-(p6) br.cond.sptk.few .sigdelayed
- ;;
tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
(p6) br.cond.sptk.few .notify
#ifdef CONFIG_PREEMPT
@@ -1131,17 +1128,6 @@ skip_rbs_switch:
(pLvSys)br.cond.sptk.few .work_pending_syscall_end
br.cond.sptk.many .work_processed_kernel // don't re-check
-// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
-// it could not be delivered. Deliver it now. The signal might be for us and
-// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
-// signal.
-
-.sigdelayed:
- br.call.sptk.many rp=do_sigdelayed
- cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check
-(pLvSys)br.cond.sptk.few .work_pending_syscall_end
- br.cond.sptk.many .work_processed_kernel // re-check
-
.work_pending_syscall_end:
adds r2=PT(R8)+16,r12
adds r3=PT(R10)+16,r12
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 574084f343f..8832c553230 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -631,6 +631,7 @@ get_target_cpu (unsigned int gsi, int vector)
{
#ifdef CONFIG_SMP
static int cpu = -1;
+ extern int cpe_vector;
/*
* In case of vector shared by multiple RTEs, all RTEs that
@@ -653,6 +654,11 @@ get_target_cpu (unsigned int gsi, int vector)
if (!cpu_online(smp_processor_id()))
return cpu_physical_id(smp_processor_id());
+#ifdef CONFIG_ACPI
+ if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
+ return get_cpei_target_cpu();
+#endif
+
#ifdef CONFIG_NUMA
{
int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index d33244c3275..5ce908ef9c9 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -163,8 +163,19 @@ void fixup_irqs(void)
{
unsigned int irq;
extern void ia64_process_pending_intr(void);
+ extern void ia64_disable_timer(void);
+ extern volatile int time_keeper_id;
+
+ ia64_disable_timer();
+
+ /*
+ * Find a new timesync master
+ */
+ if (smp_processor_id() == time_keeper_id) {
+ time_keeper_id = first_cpu(cpu_online_map);
+ printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id);
+ }
- ia64_set_itv(1<<16);
/*
* Phase 1: Locate irq's bound to this cpu and
* relocate them for cpu removal.
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index ee7eec9ee57..b57e723f194 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -281,14 +281,10 @@ ia64_mca_log_sal_error_record(int sal_info_type)
ia64_sal_clear_state_info(sal_info_type);
}
-/*
- * platform dependent error handling
- */
-#ifndef PLATFORM_MCA_HANDLERS
-
#ifdef CONFIG_ACPI
int cpe_vector = -1;
+int ia64_cpe_irq = -1;
static irqreturn_t
ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
@@ -377,8 +373,6 @@ ia64_mca_register_cpev (int cpev)
}
#endif /* CONFIG_ACPI */
-#endif /* PLATFORM_MCA_HANDLERS */
-
/*
* ia64_mca_cmc_vector_setup
*
@@ -630,6 +624,32 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
*tnat |= (nat << tslot);
}
+/* Change the comm field on the MCA/INT task to include the pid that
+ * was interrupted, it makes for easier debugging. If that pid was 0
+ * (swapper or nested MCA/INIT) then use the start of the previous comm
+ * field suffixed with its cpu.
+ */
+
+static void
+ia64_mca_modify_comm(const task_t *previous_current)
+{
+ char *p, comm[sizeof(current->comm)];
+ if (previous_current->pid)
+ snprintf(comm, sizeof(comm), "%s %d",
+ current->comm, previous_current->pid);
+ else {
+ int l;
+ if ((p = strchr(previous_current->comm, ' ')))
+ l = p - previous_current->comm;
+ else
+ l = strlen(previous_current->comm);
+ snprintf(comm, sizeof(comm), "%s %*s %d",
+ current->comm, l, previous_current->comm,
+ task_thread_info(previous_current)->cpu);
+ }
+ memcpy(current->comm, comm, sizeof(current->comm));
+}
+
/* On entry to this routine, we are running on the per cpu stack, see
* mca_asm.h. The original stack has not been touched by this event. Some of
* the original stack's registers will be in the RBS on this stack. This stack
@@ -648,7 +668,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
struct ia64_sal_os_state *sos,
const char *type)
{
- char *p, comm[sizeof(current->comm)];
+ char *p;
ia64_va va;
extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */
const pal_min_state_area_t *ms = sos->pal_min_state;
@@ -721,6 +741,10 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
/* Verify the previous stack state before we change it */
if (user_mode(regs)) {
msg = "occurred in user space";
+ /* previous_current is guaranteed to be valid when the task was
+ * in user space, so ...
+ */
+ ia64_mca_modify_comm(previous_current);
goto no_mod;
}
if (r13 != sos->prev_IA64_KR_CURRENT) {
@@ -750,25 +774,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
goto no_mod;
}
- /* Change the comm field on the MCA/INT task to include the pid that
- * was interrupted, it makes for easier debugging. If that pid was 0
- * (swapper or nested MCA/INIT) then use the start of the previous comm
- * field suffixed with its cpu.
- */
- if (previous_current->pid)
- snprintf(comm, sizeof(comm), "%s %d",
- current->comm, previous_current->pid);
- else {
- int l;
- if ((p = strchr(previous_current->comm, ' ')))
- l = p - previous_current->comm;
- else
- l = strlen(previous_current->comm);
- snprintf(comm, sizeof(comm), "%s %*s %d",
- current->comm, l, previous_current->comm,
- task_thread_info(previous_current)->cpu);
- }
- memcpy(current->comm, comm, sizeof(current->comm));
+ ia64_mca_modify_comm(previous_current);
/* Make the original task look blocked. First stack a struct pt_regs,
* describing the state at the time of interrupt. mca_asm.S built a
@@ -908,7 +914,7 @@ no_mod:
static void
ia64_wait_for_slaves(int monarch)
{
- int c, wait = 0;
+ int c, wait = 0, missing = 0;
for_each_online_cpu(c) {
if (c == monarch)
continue;
@@ -919,15 +925,32 @@ ia64_wait_for_slaves(int monarch)
}
}
if (!wait)
- return;
+ goto all_in;
for_each_online_cpu(c) {
if (c == monarch)
continue;
if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */
+ if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
+ missing = 1;
break;
}
}
+ if (!missing)
+ goto all_in;
+ printk(KERN_INFO "OS MCA slave did not rendezvous on cpu");
+ for_each_online_cpu(c) {
+ if (c == monarch)
+ continue;
+ if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
+ printk(" %d", c);
+ }
+ printk("\n");
+ return;
+
+all_in:
+ printk(KERN_INFO "All OS MCA slaves have reached rendezvous\n");
+ return;
}
/*
@@ -953,6 +976,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
task_t *previous_current;
oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */
+ console_loglevel = 15; /* make sure printks make it to console */
+ printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n",
+ sos->proc_state_param, cpu, sos->monarch);
+
previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
monarch_cpu = cpu;
if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0)
@@ -1444,11 +1471,13 @@ void __devinit
ia64_mca_cpu_init(void *cpu_data)
{
void *pal_vaddr;
+ static int first_time = 1;
- if (smp_processor_id() == 0) {
+ if (first_time) {
void *mca_data;
int cpu;
+ first_time = 0;
mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu)
* NR_CPUS + KERNEL_STACK_SIZE);
mca_data = (void *)(((unsigned long)mca_data +
@@ -1704,6 +1733,7 @@ ia64_mca_late_init(void)
desc = irq_descp(irq);
desc->status |= IRQ_PER_CPU;
setup_irq(irq, &mca_cpe_irqaction);
+ ia64_cpe_irq = irq;
}
ia64_mca_register_cpev(cpe_vector);
IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__);
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 9c5194b385d..077f21216b6 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -6722,6 +6722,7 @@ __initcall(pfm_init);
void
pfm_init_percpu (void)
{
+ static int first_time=1;
/*
* make sure no measurement is active
* (may inherit programmed PMCs from EFI).
@@ -6734,8 +6735,10 @@ pfm_init_percpu (void)
*/
pfm_unfreeze_pmu();
- if (smp_processor_id() == 0)
+ if (first_time) {
register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
+ first_time=0;
+ }
ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
ia64_srlz_d();
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 463f6bb44d0..1d7903ee212 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -588,104 +588,3 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
}
return 0;
}
-
-/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it
- * could not be delivered. It is important that the target process is not
- * allowed to do any more work in user space. Possible cases for the target
- * process:
- *
- * - It is sleeping and will wake up soon. Store the data in the current task,
- * the signal will be sent when the current task returns from the next
- * interrupt.
- *
- * - It is running in user context. Store the data in the current task, the
- * signal will be sent when the current task returns from the next interrupt.
- *
- * - It is running in kernel context on this or another cpu and will return to
- * user context. Store the data in the target task, the signal will be sent
- * to itself when the target task returns to user space.
- *
- * - It is running in kernel context on this cpu and will sleep before
- * returning to user context. Because this is also the current task, the
- * signal will not get delivered and the task could sleep indefinitely.
- * Store the data in the idle task for this cpu, the signal will be sent
- * after the idle task processes its next interrupt.
- *
- * To cover all cases, store the data in the target task, the current task and
- * the idle task on this cpu. Whatever happens, the signal will be delivered
- * to the target task before it can do any useful user space work. Multiple
- * deliveries have no unwanted side effects.
- *
- * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts
- * disabled. It must not take any locks nor use kernel structures or services
- * that require locks.
- */
-
-/* To ensure that we get the right pid, check its start time. To avoid extra
- * include files in thread_info.h, convert the task start_time to unsigned long,
- * giving us a cycle time of > 580 years.
- */
-static inline unsigned long
-start_time_ul(const struct task_struct *t)
-{
- return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec;
-}
-
-void
-set_sigdelayed(pid_t pid, int signo, int code, void __user *addr)
-{
- struct task_struct *t;
- unsigned long start_time = 0;
- int i;
-
- for (i = 1; i <= 3; ++i) {
- switch (i) {
- case 1:
- t = find_task_by_pid(pid);
- if (t)
- start_time = start_time_ul(t);
- break;
- case 2:
- t = current;
- break;
- default:
- t = idle_task(smp_processor_id());
- break;
- }
-
- if (!t)
- return;
- task_thread_info(t)->sigdelayed.signo = signo;
- task_thread_info(t)->sigdelayed.code = code;
- task_thread_info(t)->sigdelayed.addr = addr;
- task_thread_info(t)->sigdelayed.start_time = start_time;
- task_thread_info(t)->sigdelayed.pid = pid;
- wmb();
- set_tsk_thread_flag(t, TIF_SIGDELAYED);
- }
-}
-
-/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that
- * was detected in MCA/INIT/NMI/PMI context where it could not be delivered.
- */
-
-void
-do_sigdelayed(void)
-{
- struct siginfo siginfo;
- pid_t pid;
- struct task_struct *t;
-
- clear_thread_flag(TIF_SIGDELAYED);
- memset(&siginfo, 0, sizeof(siginfo));
- siginfo.si_signo = current_thread_info()->sigdelayed.signo;
- siginfo.si_code = current_thread_info()->sigdelayed.code;
- siginfo.si_addr = current_thread_info()->sigdelayed.addr;
- pid = current_thread_info()->sigdelayed.pid;
- t = find_task_by_pid(pid);
- if (!t)
- return;
- if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
- return;
- force_sig_info(siginfo.si_signo, &siginfo, t);
-}
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index b681ef34a86..c4b633b36da 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -70,6 +70,12 @@
#endif
#ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_PERMIT_BSP_REMOVE
+#define bsp_remove_ok 1
+#else
+#define bsp_remove_ok 0
+#endif
+
/*
* Store all idle threads, this can be reused instead of creating
* a new thread. Also avoids complicated thread destroy functionality
@@ -104,7 +110,7 @@ struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0];
/*
* ITC synchronization related stuff:
*/
-#define MASTER 0
+#define MASTER (0)
#define SLAVE (SMP_CACHE_BYTES/8)
#define NUM_ROUNDS 64 /* magic value */
@@ -151,6 +157,27 @@ char __initdata no_int_routing;
unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
+#ifdef CONFIG_FORCE_CPEI_RETARGET
+#define CPEI_OVERRIDE_DEFAULT (1)
+#else
+#define CPEI_OVERRIDE_DEFAULT (0)
+#endif
+
+unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT;
+
+static int __init
+cmdl_force_cpei(char *str)
+{
+ int value=0;
+
+ get_option (&str, &value);
+ force_cpei_retarget = value;
+
+ return 1;
+}
+
+__setup("force_cpei=", cmdl_force_cpei);
+
static int __init
nointroute (char *str)
{
@@ -161,6 +188,27 @@ nointroute (char *str)
__setup("nointroute", nointroute);
+static void fix_b0_for_bsp(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ int cpuid;
+ static int fix_bsp_b0 = 1;
+
+ cpuid = smp_processor_id();
+
+ /*
+ * Cache the b0 value on the first AP that comes up
+ */
+ if (!(fix_bsp_b0 && cpuid))
+ return;
+
+ sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0];
+ printk ("Fixed BSP b0 value from CPU %d\n", cpuid);
+
+ fix_bsp_b0 = 0;
+#endif
+}
+
void
sync_master (void *arg)
{
@@ -327,8 +375,9 @@ smp_setup_percpu_timer (void)
static void __devinit
smp_callin (void)
{
- int cpuid, phys_id;
+ int cpuid, phys_id, itc_master;
extern void ia64_init_itm(void);
+ extern volatile int time_keeper_id;
#ifdef CONFIG_PERFMON
extern void pfm_init_percpu(void);
@@ -336,6 +385,7 @@ smp_callin (void)
cpuid = smp_processor_id();
phys_id = hard_smp_processor_id();
+ itc_master = time_keeper_id;
if (cpu_online(cpuid)) {
printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
@@ -343,6 +393,8 @@ smp_callin (void)
BUG();
}
+ fix_b0_for_bsp();
+
lock_ipi_calllock();
cpu_set(cpuid, cpu_online_map);
unlock_ipi_calllock();
@@ -365,8 +417,8 @@ smp_callin (void)
* calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
* local_bh_enable(), which bugs out if irqs are not enabled...
*/
- Dprintk("Going to syncup ITC with BP.\n");
- ia64_sync_itc(0);
+ Dprintk("Going to syncup ITC with ITC Master.\n");
+ ia64_sync_itc(itc_master);
}
/*
@@ -635,6 +687,47 @@ remove_siblinginfo(int cpu)
}
extern void fixup_irqs(void);
+
+int migrate_platform_irqs(unsigned int cpu)
+{
+ int new_cpei_cpu;
+ irq_desc_t *desc = NULL;
+ cpumask_t mask;
+ int retval = 0;
+
+ /*
+ * dont permit CPEI target to removed.
+ */
+ if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) {
+ printk ("CPU (%d) is CPEI Target\n", cpu);
+ if (can_cpei_retarget()) {
+ /*
+ * Now re-target the CPEI to a different processor
+ */
+ new_cpei_cpu = any_online_cpu(cpu_online_map);
+ mask = cpumask_of_cpu(new_cpei_cpu);
+ set_cpei_target_cpu(new_cpei_cpu);
+ desc = irq_descp(ia64_cpe_irq);
+ /*
+ * Switch for now, immediatly, we need to do fake intr
+ * as other interrupts, but need to study CPEI behaviour with
+ * polling before making changes.
+ */
+ if (desc) {
+ desc->handler->disable(ia64_cpe_irq);
+ desc->handler->set_affinity(ia64_cpe_irq, mask);
+ desc->handler->enable(ia64_cpe_irq);
+ printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu);
+ }
+ }
+ if (!desc) {
+ printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu);
+ retval = -EBUSY;
+ }
+ }
+ return retval;
+}
+
/* must be called with cpucontrol mutex held */
int __cpu_disable(void)
{
@@ -643,8 +736,17 @@ int __cpu_disable(void)
/*
* dont permit boot processor for now
*/
- if (cpu == 0)
- return -EBUSY;
+ if (cpu == 0 && !bsp_remove_ok) {
+ printk ("Your platform does not support removal of BSP\n");
+ return (-EBUSY);
+ }
+
+ cpu_clear(cpu, cpu_online_map);
+
+ if (migrate_platform_irqs(cpu)) {
+ cpu_set(cpu, cpu_online_map);
+ return (-EBUSY);
+ }
remove_siblinginfo(cpu);
cpu_clear(cpu, cpu_online_map);
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 307d01e15b2..ac167436e93 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -32,7 +32,7 @@
extern unsigned long wall_jiffies;
-#define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */
+volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
#ifdef CONFIG_IA64_DEBUG_IRQ
@@ -71,7 +71,7 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
new_itm += local_cpu_data->itm_delta;
- if (smp_processor_id() == TIME_KEEPER_ID) {
+ if (smp_processor_id() == time_keeper_id) {
/*
* Here we are in the timer irq handler. We have irqs locally
* disabled, but we don't know if the timer_bh is running on
@@ -236,6 +236,11 @@ static struct irqaction timer_irqaction = {
.name = "timer"
};
+void __devinit ia64_disable_timer(void)
+{
+ ia64_set_itv(1 << 16);
+}
+
void __init
time_init (void)
{
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 6e5eea19fa6..3b6fd798c4d 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -36,7 +36,7 @@ int arch_register_cpu(int num)
parent = &sysfs_nodes[cpu_to_node(num)];
#endif /* CONFIG_NUMA */
-#ifdef CONFIG_ACPI
+#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU)
/*
* If CPEI cannot be re-targetted, and this is
* CPEI target, then dont create the control file
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index acaaec4e468..9855ba31809 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -181,13 +181,15 @@ per_cpu_init (void)
{
void *cpu_data;
int cpu;
+ static int first_time=1;
/*
* get_free_pages() cannot be used before cpu_init() done. BSP
* allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
* get_zeroed_page().
*/
- if (smp_processor_id() == 0) {
+ if (first_time) {
+ first_time=0;
cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
for (cpu = 0; cpu < NR_CPUS; cpu++) {
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index c87d6d1d581..573d5cc63e2 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -528,12 +528,17 @@ void __init find_memory(void)
void *per_cpu_init(void)
{
int cpu;
+ static int first_time = 1;
+
if (smp_processor_id() != 0)
return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+ if (first_time) {
+ first_time = 0;
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+ }
return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
}
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 2d13889d0a9..9dbc7dadd16 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -68,9 +68,10 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr)
#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
/*
- * This function checks for proper alignment of input addr and len parameters.
+ * Don't actually need to do any preparation, but need to make sure
+ * the address is in the right region.
*/
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
+int prepare_hugepage_range(unsigned long addr, unsigned long len)
{
if (len & ~HPAGE_MASK)
return -EINVAL;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index b38b6d213c1..08d94e6bfa1 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -197,7 +197,7 @@ free_initmem (void)
eaddr = (unsigned long) ia64_imva(__init_end);
while (addr < eaddr) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
++totalram_pages;
addr += PAGE_SIZE;
@@ -252,7 +252,7 @@ free_initrd_mem (unsigned long start, unsigned long end)
continue;
page = virt_to_page(start);
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
free_page(start);
++totalram_pages;
}
@@ -640,7 +640,7 @@ mem_init (void)
void online_page(struct page *page)
{
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalram_pages++;
num_physpages++;
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
index 3e9b4eea741..ab9c48c8801 100644
--- a/arch/ia64/sn/kernel/Makefile
+++ b/arch/ia64/sn/kernel/Makefile
@@ -10,7 +10,8 @@
CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \
- huberror.o io_init.o iomv.o klconflib.o sn2/
+ huberror.o io_init.o iomv.o klconflib.o pio_phys.o \
+ sn2/
obj-$(CONFIG_IA64_GENERIC) += machvec.o
obj-$(CONFIG_SGI_TIOCX) += tiocx.o
obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o
diff --git a/arch/ia64/sn/kernel/pio_phys.S b/arch/ia64/sn/kernel/pio_phys.S
new file mode 100644
index 00000000000..3c7d48d6ecb
--- /dev/null
+++ b/arch/ia64/sn/kernel/pio_phys.S
@@ -0,0 +1,71 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ *
+ * This file contains macros used to access MMR registers via
+ * uncached physical addresses.
+ * pio_phys_read_mmr - read an MMR
+ * pio_phys_write_mmr - write an MMR
+ * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0
+ * Second MMR will be skipped if address is NULL
+ *
+ * Addresses passed to these routines should be uncached physical addresses
+ * ie., 0x80000....
+ */
+
+
+
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+GLOBAL_ENTRY(pio_phys_read_mmr)
+ .prologue
+ .regstk 1,0,0,0
+ .body
+ mov r2=psr
+ rsm psr.i | psr.dt
+ ;;
+ srlz.d
+ ld8.acq r8=[r32]
+ ;;
+ mov psr.l=r2;;
+ srlz.d
+ br.ret.sptk.many rp
+END(pio_phys_read_mmr)
+
+GLOBAL_ENTRY(pio_phys_write_mmr)
+ .prologue
+ .regstk 2,0,0,0
+ .body
+ mov r2=psr
+ rsm psr.i | psr.dt
+ ;;
+ srlz.d
+ st8.rel [r32]=r33
+ ;;
+ mov psr.l=r2;;
+ srlz.d
+ br.ret.sptk.many rp
+END(pio_phys_write_mmr)
+
+GLOBAL_ENTRY(pio_atomic_phys_write_mmrs)
+ .prologue
+ .regstk 4,0,0,0
+ .body
+ mov r2=psr
+ cmp.ne p9,p0=r34,r0;
+ rsm psr.i | psr.dt | psr.ic
+ ;;
+ srlz.d
+ st8.rel [r32]=r33
+(p9) st8.rel [r34]=r35
+ ;;
+ mov psr.l=r2;;
+ srlz.d
+ br.ret.sptk.many rp
+END(pio_atomic_phys_write_mmrs)
+
+
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 5b84836c217..8b6d5c84470 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -3,7 +3,7 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved.
*/
#include <linux/config.h>
@@ -498,6 +498,7 @@ void __init sn_setup(char **cmdline_p)
* for sn.
*/
pm_power_off = ia64_sn_power_down;
+ current->thread.flags |= IA64_THREAD_MIGRATION;
}
/**
@@ -660,7 +661,8 @@ void __init sn_cpu_init(void)
SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3};
u64 *pio;
pio = is_shub1() ? pio1 : pio2;
- pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]);
+ pda->pio_write_status_addr =
+ (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]);
pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0;
}
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index b2e1e746b47..d9d306c79f2 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -93,6 +93,27 @@ static inline unsigned long wait_piowc(void)
return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
}
+/**
+ * sn_migrate - SN-specific task migration actions
+ * @task: Task being migrated to new CPU
+ *
+ * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order.
+ * Context switching user threads which have memory-mapped MMIO may cause
+ * PIOs to issue from seperate CPUs, thus the PIO writes must be drained
+ * from the previous CPU's Shub before execution resumes on the new CPU.
+ */
+void sn_migrate(struct task_struct *task)
+{
+ pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu);
+ volatile unsigned long *adr = last_pda->pio_write_status_addr;
+ unsigned long val = last_pda->pio_write_status_val;
+
+ /* Drain PIO writes from old CPU's Shub */
+ while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK)
+ != val))
+ cpu_relax();
+}
+
void sn_tlb_migrate_finish(struct mm_struct *mm)
{
/* flush_tlb_mm is inefficient if more than 1 users of mm */
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index cdf6856ce08..d0abddd9ffe 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -21,7 +21,6 @@
#include <linux/sched.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
-#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/completion.h>
#include <asm/sn/bte.h>
@@ -30,6 +29,31 @@
/*
+ * Guarantee that the kzalloc'd memory is cacheline aligned.
+ */
+static void *
+xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+ /* see if kzalloc will give us cachline aligned memory by default */
+ *base = kzalloc(size, flags);
+ if (*base == NULL) {
+ return NULL;
+ }
+ if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
+ return *base;
+ }
+ kfree(*base);
+
+ /* nope, we'll have to do it ourselves */
+ *base = kzalloc(size + L1_CACHE_BYTES, flags);
+ if (*base == NULL) {
+ return NULL;
+ }
+ return (void *) L1_CACHE_ALIGN((u64) *base);
+}
+
+
+/*
* Set up the initial values for the XPartition Communication channels.
*/
static void
@@ -93,20 +117,19 @@ xpc_setup_infrastructure(struct xpc_partition *part)
* Allocate all of the channel structures as a contiguous chunk of
* memory.
*/
- part->channels = kmalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
+ part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
GFP_KERNEL);
if (part->channels == NULL) {
dev_err(xpc_chan, "can't get memory for channels\n");
return xpcNoMemory;
}
- memset(part->channels, 0, sizeof(struct xpc_channel) * XPC_NCHANNELS);
part->nchannels = XPC_NCHANNELS;
/* allocate all the required GET/PUT values */
- part->local_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE,
+ part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
GFP_KERNEL, &part->local_GPs_base);
if (part->local_GPs == NULL) {
kfree(part->channels);
@@ -115,55 +138,51 @@ xpc_setup_infrastructure(struct xpc_partition *part)
"values\n");
return xpcNoMemory;
}
- memset(part->local_GPs, 0, XPC_GP_SIZE);
- part->remote_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE,
+ part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
GFP_KERNEL, &part->remote_GPs_base);
if (part->remote_GPs == NULL) {
- kfree(part->channels);
- part->channels = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
dev_err(xpc_chan, "can't get memory for remote get/put "
"values\n");
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
return xpcNoMemory;
}
- memset(part->remote_GPs, 0, XPC_GP_SIZE);
/* allocate all the required open and close args */
- part->local_openclose_args = xpc_kmalloc_cacheline_aligned(
+ part->local_openclose_args = xpc_kzalloc_cacheline_aligned(
XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
&part->local_openclose_args_base);
if (part->local_openclose_args == NULL) {
- kfree(part->channels);
- part->channels = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
+ dev_err(xpc_chan, "can't get memory for local connect args\n");
kfree(part->remote_GPs_base);
part->remote_GPs = NULL;
- dev_err(xpc_chan, "can't get memory for local connect args\n");
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
return xpcNoMemory;
}
- memset(part->local_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE);
- part->remote_openclose_args = xpc_kmalloc_cacheline_aligned(
+ part->remote_openclose_args = xpc_kzalloc_cacheline_aligned(
XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
&part->remote_openclose_args_base);
if (part->remote_openclose_args == NULL) {
- kfree(part->channels);
- part->channels = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
- kfree(part->remote_GPs_base);
- part->remote_GPs = NULL;
+ dev_err(xpc_chan, "can't get memory for remote connect args\n");
kfree(part->local_openclose_args_base);
part->local_openclose_args = NULL;
- dev_err(xpc_chan, "can't get memory for remote connect args\n");
+ kfree(part->remote_GPs_base);
+ part->remote_GPs = NULL;
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
return xpcNoMemory;
}
- memset(part->remote_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE);
xpc_initialize_channels(part, partid);
@@ -186,18 +205,18 @@ xpc_setup_infrastructure(struct xpc_partition *part)
ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ,
part->IPI_owner, (void *) (u64) partid);
if (ret != 0) {
- kfree(part->channels);
- part->channels = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
- kfree(part->remote_GPs_base);
- part->remote_GPs = NULL;
- kfree(part->local_openclose_args_base);
- part->local_openclose_args = NULL;
- kfree(part->remote_openclose_args_base);
- part->remote_openclose_args = NULL;
dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
"errno=%d\n", -ret);
+ kfree(part->remote_openclose_args_base);
+ part->remote_openclose_args = NULL;
+ kfree(part->local_openclose_args_base);
+ part->local_openclose_args = NULL;
+ kfree(part->remote_GPs_base);
+ part->remote_GPs = NULL;
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
return xpcLackOfResources;
}
@@ -446,22 +465,20 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch)
for (nentries = ch->local_nentries; nentries > 0; nentries--) {
nbytes = nentries * ch->msg_size;
- ch->local_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes,
+ ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
GFP_KERNEL,
&ch->local_msgqueue_base);
if (ch->local_msgqueue == NULL) {
continue;
}
- memset(ch->local_msgqueue, 0, nbytes);
nbytes = nentries * sizeof(struct xpc_notify);
- ch->notify_queue = kmalloc(nbytes, GFP_KERNEL);
+ ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
if (ch->notify_queue == NULL) {
kfree(ch->local_msgqueue_base);
ch->local_msgqueue = NULL;
continue;
}
- memset(ch->notify_queue, 0, nbytes);
spin_lock_irqsave(&ch->lock, irq_flags);
if (nentries < ch->local_nentries) {
@@ -501,13 +518,12 @@ xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
nbytes = nentries * ch->msg_size;
- ch->remote_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes,
+ ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
GFP_KERNEL,
&ch->remote_msgqueue_base);
if (ch->remote_msgqueue == NULL) {
continue;
}
- memset(ch->remote_msgqueue, 0, nbytes);
spin_lock_irqsave(&ch->lock, irq_flags);
if (nentries < ch->remote_nentries) {
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index 8cbf1643257..99b123a6421 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -52,7 +52,6 @@
#include <linux/syscalls.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
-#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/completion.h>
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index 88a730e6cfd..94211429fd0 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -81,6 +81,31 @@ char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
/*
+ * Guarantee that the kmalloc'd memory is cacheline aligned.
+ */
+static void *
+xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+ /* see if kmalloc will give us cachline aligned memory by default */
+ *base = kmalloc(size, flags);
+ if (*base == NULL) {
+ return NULL;
+ }
+ if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
+ return *base;
+ }
+ kfree(*base);
+
+ /* nope, we'll have to do it ourselves */
+ *base = kmalloc(size + L1_CACHE_BYTES, flags);
+ if (*base == NULL) {
+ return NULL;
+ }
+ return (void *) L1_CACHE_ALIGN((u64) *base);
+}
+
+
+/*
* Given a nasid, get the physical address of the partition's reserved page
* for that nasid. This function returns 0 on any error.
*/
@@ -1038,13 +1063,12 @@ xpc_discovery(void)
remote_vars = (struct xpc_vars *) remote_rp;
- discovered_nasids = kmalloc(sizeof(u64) * xp_nasid_mask_words,
+ discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
GFP_KERNEL);
if (discovered_nasids == NULL) {
kfree(remote_rp_base);
return;
}
- memset(discovered_nasids, 0, sizeof(u64) * xp_nasid_mask_words);
rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index e52831ed93e..fa073cc4b56 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -15,6 +15,124 @@
#include <asm/sn/pcidev.h>
#include <asm/sn/pcibus_provider_defs.h>
#include <asm/sn/tioce_provider.h>
+#include <asm/sn/sn2/sn_hwperf.h>
+
+/*
+ * 1/26/2006
+ *
+ * WAR for SGI PV 944642. For revA TIOCE, need to use the following recipe
+ * (taken from the above PV) before and after accessing tioce internal MMR's
+ * to avoid tioce lockups.
+ *
+ * The recipe as taken from the PV:
+ *
+ * if(mmr address < 0x45000) {
+ * if(mmr address == 0 or 0x80)
+ * mmr wrt or read address 0xc0
+ * else if(mmr address == 0x148 or 0x200)
+ * mmr wrt or read address 0x28
+ * else
+ * mmr wrt or read address 0x158
+ *
+ * do desired mmr access (rd or wrt)
+ *
+ * if(mmr address == 0x100)
+ * mmr wrt or read address 0x38
+ * mmr wrt or read address 0xb050
+ * } else
+ * do desired mmr access
+ *
+ * According to hw, we can use reads instead of writes to the above addres
+ *
+ * Note this WAR can only to be used for accessing internal MMR's in the
+ * TIOCE Coretalk Address Range 0x0 - 0x07ff_ffff. This includes the
+ * "Local CE Registers and Memories" and "PCI Compatible Config Space" address
+ * spaces from table 2-1 of the "CE Programmer's Reference Overview" document.
+ *
+ * All registers defined in struct tioce will meet that criteria.
+ */
+
+static void inline
+tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr)
+{
+ u64 mmr_base;
+ u64 mmr_offset;
+
+ if (kern->ce_common->ce_rev != TIOCE_REV_A)
+ return;
+
+ mmr_base = kern->ce_common->ce_pcibus.bs_base;
+ mmr_offset = (u64)mmr_addr - mmr_base;
+
+ if (mmr_offset < 0x45000) {
+ u64 mmr_war_offset;
+
+ if (mmr_offset == 0 || mmr_offset == 0x80)
+ mmr_war_offset = 0xc0;
+ else if (mmr_offset == 0x148 || mmr_offset == 0x200)
+ mmr_war_offset = 0x28;
+ else
+ mmr_war_offset = 0x158;
+
+ readq_relaxed((void *)(mmr_base + mmr_war_offset));
+ }
+}
+
+static void inline
+tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr)
+{
+ u64 mmr_base;
+ u64 mmr_offset;
+
+ if (kern->ce_common->ce_rev != TIOCE_REV_A)
+ return;
+
+ mmr_base = kern->ce_common->ce_pcibus.bs_base;
+ mmr_offset = (u64)mmr_addr - mmr_base;
+
+ if (mmr_offset < 0x45000) {
+ if (mmr_offset == 0x100)
+ readq_relaxed((void *)(mmr_base + 0x38));
+ readq_relaxed((void *)(mmr_base + 0xb050));
+ }
+}
+
+/* load mmr contents into a variable */
+#define tioce_mmr_load(kern, mmrp, varp) do {\
+ tioce_mmr_war_pre(kern, mmrp); \
+ *(varp) = readq_relaxed(mmrp); \
+ tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* store variable contents into mmr */
+#define tioce_mmr_store(kern, mmrp, varp) do {\
+ tioce_mmr_war_pre(kern, mmrp); \
+ writeq(*varp, mmrp); \
+ tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* store immediate value into mmr */
+#define tioce_mmr_storei(kern, mmrp, val) do {\
+ tioce_mmr_war_pre(kern, mmrp); \
+ writeq(val, mmrp); \
+ tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* set bits (immediate value) into mmr */
+#define tioce_mmr_seti(kern, mmrp, bits) do {\
+ u64 tmp; \
+ tioce_mmr_load(kern, mmrp, &tmp); \
+ tmp |= (bits); \
+ tioce_mmr_store(kern, mmrp, &tmp); \
+} while (0)
+
+/* clear bits (immediate value) into mmr */
+#define tioce_mmr_clri(kern, mmrp, bits) do { \
+ u64 tmp; \
+ tioce_mmr_load(kern, mmrp, &tmp); \
+ tmp &= ~(bits); \
+ tioce_mmr_store(kern, mmrp, &tmp); \
+} while (0)
/**
* Bus address ranges for the 5 flavors of TIOCE DMA
@@ -62,9 +180,9 @@
#define TIOCE_ATE_M40 2
#define TIOCE_ATE_M40S 3
-#define KB(x) ((x) << 10)
-#define MB(x) ((x) << 20)
-#define GB(x) ((x) << 30)
+#define KB(x) ((u64)(x) << 10)
+#define MB(x) ((u64)(x) << 20)
+#define GB(x) ((u64)(x) << 30)
/**
* tioce_dma_d64 - create a DMA mapping using 64-bit direct mode
@@ -151,7 +269,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
int last;
int entries;
int nates;
- int pagesize;
+ u64 pagesize;
u64 *ate_shadow;
u64 *ate_reg;
u64 addr;
@@ -228,7 +346,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
ate = ATE_MAKE(addr, pagesize);
ate_shadow[i + j] = ate;
- writeq(ate, &ate_reg[i + j]);
+ tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate);
addr += pagesize;
}
@@ -272,7 +390,8 @@ tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr)
u64 tmp;
ce_kern->ce_port[port].dirmap_shadow = ct_upper;
- writeq(ct_upper, &ce_mmr->ce_ure_dir_map[port]);
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port],
+ ct_upper);
tmp = ce_mmr->ce_ure_dir_map[port];
dma_ok = 1;
} else
@@ -344,7 +463,8 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
if (TIOCE_D32_ADDR(bus_addr)) {
if (--ce_kern->ce_port[port].dirmap_refcnt == 0) {
ce_kern->ce_port[port].dirmap_shadow = 0;
- writeq(0, &ce_mmr->ce_ure_dir_map[port]);
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port],
+ 0);
}
} else {
struct tioce_dmamap *map;
@@ -365,7 +485,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
} else if (--map->refcnt == 0) {
for (i = 0; i < map->ate_count; i++) {
map->ate_shadow[i] = 0;
- map->ate_hw[i] = 0;
+ tioce_mmr_storei(ce_kern, &map->ate_hw[i], 0);
}
list_del(&map->ce_dmamap_list);
@@ -486,7 +606,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count,
spin_unlock_irqrestore(&ce_kern->ce_lock, flags);
dma_map_done:
- if (mapaddr & barrier)
+ if (mapaddr && barrier)
mapaddr = tioce_dma_barrier(mapaddr, 1);
return mapaddr;
@@ -541,17 +661,61 @@ tioce_error_intr_handler(int irq, void *arg, struct pt_regs *pt)
soft->ce_pcibus.bs_persist_segment,
soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0);
+ if (ret_stuff.v0)
+ panic("tioce_error_intr_handler: Fatal TIOCE error");
+
return IRQ_HANDLED;
}
/**
+ * tioce_reserve_m32 - reserve M32 ate's for the indicated address range
+ * @tioce_kernel: TIOCE context to reserve ate's for
+ * @base: starting bus address to reserve
+ * @limit: last bus address to reserve
+ *
+ * If base/limit falls within the range of bus space mapped through the
+ * M32 space, reserve the resources corresponding to the range.
+ */
+static void
+tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit)
+{
+ int ate_index, last_ate, ps;
+ struct tioce *ce_mmr;
+
+ if (!TIOCE_M32_ADDR(base))
+ return;
+
+ ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base;
+ ps = ce_kern->ce_ate3240_pagesize;
+ ate_index = ATE_PAGE(base, ps);
+ last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1;
+
+ if (ate_index < 64)
+ ate_index = 64;
+
+ while (ate_index <= last_ate) {
+ u64 ate;
+
+ ate = ATE_MAKE(0xdeadbeef, ps);
+ ce_kern->ce_ate3240_shadow[ate_index] = ate;
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index],
+ ate);
+ ate_index++;
+ }
+}
+
+/**
* tioce_kern_init - init kernel structures related to a given TIOCE
* @tioce_common: ptr to a cached tioce_common struct that originated in prom
- */ static struct tioce_kernel *
+ */
+static struct tioce_kernel *
tioce_kern_init(struct tioce_common *tioce_common)
{
int i;
+ int ps;
+ int dev;
u32 tmp;
+ unsigned int seg, bus;
struct tioce *tioce_mmr;
struct tioce_kernel *tioce_kern;
@@ -572,9 +736,10 @@ tioce_kern_init(struct tioce_common *tioce_common)
* here to use pci_read_config_xxx() so use the raw_pci_ops vector.
*/
- raw_pci_ops->read(tioce_common->ce_pcibus.bs_persist_segment,
- tioce_common->ce_pcibus.bs_persist_busnum,
- PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1, &tmp);
+ seg = tioce_common->ce_pcibus.bs_persist_segment;
+ bus = tioce_common->ce_pcibus.bs_persist_busnum;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp);
tioce_kern->ce_port1_secondary = (u8) tmp;
/*
@@ -583,18 +748,76 @@ tioce_kern_init(struct tioce_common *tioce_common)
*/
tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
- __sn_clrq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_PAGESIZE_MASK);
- __sn_setq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_256K_PAGESIZE);
- tioce_kern->ce_ate3240_pagesize = KB(256);
+ tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map,
+ CE_URE_PAGESIZE_MASK);
+ tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map,
+ CE_URE_256K_PAGESIZE);
+ ps = tioce_kern->ce_ate3240_pagesize = KB(256);
for (i = 0; i < TIOCE_NUM_M40_ATES; i++) {
tioce_kern->ce_ate40_shadow[i] = 0;
- writeq(0, &tioce_mmr->ce_ure_ate40[i]);
+ tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate40[i], 0);
}
for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) {
tioce_kern->ce_ate3240_shadow[i] = 0;
- writeq(0, &tioce_mmr->ce_ure_ate3240[i]);
+ tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate3240[i], 0);
+ }
+
+ /*
+ * Reserve ATE's corresponding to reserved address ranges. These
+ * include:
+ *
+ * Memory space covered by each PPB mem base/limit register
+ * Memory space covered by each PPB prefetch base/limit register
+ *
+ * These bus ranges are for pio (downstream) traffic only, and so
+ * cannot be used for DMA.
+ */
+
+ for (dev = 1; dev <= 2; dev++) {
+ u64 base, limit;
+
+ /* mem base/limit */
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_MEMORY_BASE, 2, &tmp);
+ base = (u64)tmp << 16;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_MEMORY_LIMIT, 2, &tmp);
+ limit = (u64)tmp << 16;
+ limit |= 0xfffffUL;
+
+ if (base < limit)
+ tioce_reserve_m32(tioce_kern, base, limit);
+
+ /*
+ * prefetch mem base/limit. The tioce ppb's have 64-bit
+ * decoders, so read the upper portions w/o checking the
+ * attributes.
+ */
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_PREF_MEMORY_BASE, 2, &tmp);
+ base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_PREF_BASE_UPPER32, 4, &tmp);
+ base |= (u64)tmp << 32;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_PREF_MEMORY_LIMIT, 2, &tmp);
+
+ limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
+ limit |= 0xfffffUL;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_PREF_LIMIT_UPPER32, 4, &tmp);
+ limit |= (u64)tmp << 32;
+
+ if ((base < limit) && TIOCE_M32_ADDR(base))
+ tioce_reserve_m32(tioce_kern, base, limit);
}
return tioce_kern;
@@ -614,6 +837,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
{
struct pcidev_info *pcidev_info;
struct tioce_common *ce_common;
+ struct tioce_kernel *ce_kern;
struct tioce *ce_mmr;
u64 force_int_val;
@@ -629,6 +853,29 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base;
+ ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
+
+ /*
+ * TIOCE Rev A workaround (PV 945826), force an interrupt by writing
+ * the TIO_INTx register directly (1/26/2006)
+ */
+ if (ce_common->ce_rev == TIOCE_REV_A) {
+ u64 int_bit_mask = (1ULL << sn_irq_info->irq_int_bit);
+ u64 status;
+
+ tioce_mmr_load(ce_kern, &ce_mmr->ce_adm_int_status, &status);
+ if (status & int_bit_mask) {
+ u64 force_irq = (1 << 8) | sn_irq_info->irq_irq;
+ u64 ctalk = sn_irq_info->irq_xtalkaddr;
+ u64 nasid, offset;
+
+ nasid = (ctalk & CTALK_NASID_MASK) >> CTALK_NASID_SHFT;
+ offset = (ctalk & CTALK_NODE_OFFSET);
+ HUB_S(TIO_IOSPACE_ADDR(nasid, offset), force_irq);
+ }
+
+ return;
+ }
/*
* irq_int_bit is originally set up by prom, and holds the interrupt
@@ -666,7 +913,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
default:
return;
}
- writeq(force_int_val, &ce_mmr->ce_adm_force_int);
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_force_int, force_int_val);
}
/**
@@ -685,6 +932,7 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
{
struct pcidev_info *pcidev_info;
struct tioce_common *ce_common;
+ struct tioce_kernel *ce_kern;
struct tioce *ce_mmr;
int bit;
u64 vector;
@@ -695,14 +943,15 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base;
+ ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
bit = sn_irq_info->irq_int_bit;
- __sn_setq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit));
+ tioce_mmr_seti(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit));
vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT;
vector |= sn_irq_info->irq_xtalkaddr;
- writeq(vector, &ce_mmr->ce_adm_int_dest[bit]);
- __sn_clrq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit));
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_int_dest[bit], vector);
+ tioce_mmr_clri(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit));
tioce_force_interrupt(sn_irq_info);
}
@@ -721,7 +970,11 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
static void *
tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
{
+ int my_nasid;
+ cnodeid_t my_cnode, mem_cnode;
struct tioce_common *tioce_common;
+ struct tioce_kernel *tioce_kern;
+ struct tioce *tioce_mmr;
/*
* Allocate kernel bus soft and copy from prom.
@@ -734,11 +987,23 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common));
tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET;
- if (tioce_kern_init(tioce_common) == NULL) {
+ tioce_kern = tioce_kern_init(tioce_common);
+ if (tioce_kern == NULL) {
kfree(tioce_common);
return NULL;
}
+ /*
+ * Clear out any transient errors before registering the error
+ * interrupt handler.
+ */
+
+ tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
+ tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL);
+ tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias,
+ ~0ULL);
+ tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, ~0ULL);
+
if (request_irq(SGI_PCIASIC_ERROR,
tioce_error_intr_handler,
SA_SHIRQ, "TIOCE error", (void *)tioce_common))
@@ -750,6 +1015,21 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
tioce_common->ce_pcibus.bs_persist_segment,
tioce_common->ce_pcibus.bs_persist_busnum);
+ /*
+ * identify closest nasid for memory allocations
+ */
+
+ my_nasid = NASID_GET(tioce_common->ce_pcibus.bs_base);
+ my_cnode = nasid_to_cnodeid(my_nasid);
+
+ if (sn_hwperf_get_nearest_node(my_cnode, &mem_cnode, NULL) < 0) {
+ printk(KERN_WARNING "tioce_bus_fixup: failed to find "
+ "closest node with MEM to TIO node %d\n", my_cnode);
+ mem_cnode = (cnodeid_t)-1; /* use any node */
+ }
+
+ controller->node = mem_cnode;
+
return tioce_common;
}
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index 6facf15b04f..c9e7dad860b 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -226,7 +226,7 @@ void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
@@ -244,7 +244,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
unsigned long p;
for (p = start; p < end; p += PAGE_SIZE) {
ClearPageReserved(virt_to_page(p));
- set_page_count(virt_to_page(p), 1);
+ init_page_count(virt_to_page(p));
free_page(p);
totalram_pages++;
}
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index c45beb95594..a190e39c907 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -137,7 +137,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
int pages = 0;
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
pages++;
diff --git a/arch/m68k/mm/memory.c b/arch/m68k/mm/memory.c
index 559942ce0e1..d6d582a5abb 100644
--- a/arch/m68k/mm/memory.c
+++ b/arch/m68k/mm/memory.c
@@ -54,7 +54,7 @@ void __init init_pointer_table(unsigned long ptable)
/* unreserve the page so it's possible to free that page */
PD_PAGE(dp)->flags &= ~(1 << PG_reserved);
- set_page_count(PD_PAGE(dp), 1);
+ init_page_count(PD_PAGE(dp));
return;
}
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index d855fec2631..afb57eeafdc 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -276,7 +276,7 @@ void free_initmem(void)
addr = (unsigned long)&__init_begin;
for (; addr < (unsigned long)&__init_end; addr += PAGE_SIZE) {
virt_to_page(addr)->flags &= ~(1 << PG_reserved);
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
diff --git a/arch/m68knommu/kernel/m68k_ksyms.c b/arch/m68knommu/kernel/m68k_ksyms.c
index eddb8d3e130..d844c755945 100644
--- a/arch/m68knommu/kernel/m68k_ksyms.c
+++ b/arch/m68knommu/kernel/m68k_ksyms.c
@@ -26,6 +26,7 @@ EXPORT_SYMBOL(__ioremap);
EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(dump_fpu);
EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(strstr);
EXPORT_SYMBOL(strchr);
diff --git a/arch/m68knommu/mm/init.c b/arch/m68knommu/mm/init.c
index 89f0b554ffb..d79503fe6e4 100644
--- a/arch/m68knommu/mm/init.c
+++ b/arch/m68knommu/mm/init.c
@@ -195,7 +195,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
int pages = 0;
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
pages++;
@@ -218,7 +218,7 @@ free_initmem()
/* next to check that the page we free is not a partial page */
for (; addr + PAGE_SIZE < (unsigned long)(&__init_end); addr +=PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
diff --git a/arch/mips/arc/memory.c b/arch/mips/arc/memory.c
index 958d2eb7886..8a9ef58cc39 100644
--- a/arch/mips/arc/memory.c
+++ b/arch/mips/arc/memory.c
@@ -158,7 +158,7 @@ unsigned long __init prom_free_prom_memory(void)
while (addr < boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size) {
ClearPageReserved(virt_to_page(__va(addr)));
- set_page_count(virt_to_page(__va(addr)), 1);
+ init_page_count(virt_to_page(__va(addr)));
free_page((unsigned long)__va(addr));
addr += PAGE_SIZE;
freed += PAGE_SIZE;
diff --git a/arch/mips/dec/prom/memory.c b/arch/mips/dec/prom/memory.c
index 81cb5a76cfb..1edaf3074ee 100644
--- a/arch/mips/dec/prom/memory.c
+++ b/arch/mips/dec/prom/memory.c
@@ -118,7 +118,7 @@ unsigned long __init prom_free_prom_memory(void)
addr = PAGE_SIZE;
while (addr < end) {
ClearPageReserved(virt_to_page(__va(addr)));
- set_page_count(virt_to_page(__va(addr)), 1);
+ init_page_count(virt_to_page(__va(addr)));
free_page((unsigned long)__va(addr));
addr += PAGE_SIZE;
}
diff --git a/arch/mips/mips-boards/generic/memory.c b/arch/mips/mips-boards/generic/memory.c
index 2c8afd77a20..ee5e70c95cf 100644
--- a/arch/mips/mips-boards/generic/memory.c
+++ b/arch/mips/mips-boards/generic/memory.c
@@ -174,7 +174,7 @@ unsigned long __init prom_free_prom_memory(void)
while (addr < boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size) {
ClearPageReserved(virt_to_page(__va(addr)));
- set_page_count(virt_to_page(__va(addr)), 1);
+ init_page_count(virt_to_page(__va(addr)));
free_page((unsigned long)__va(addr));
addr += PAGE_SIZE;
freed += PAGE_SIZE;
diff --git a/arch/mips/mips-boards/sim/sim_mem.c b/arch/mips/mips-boards/sim/sim_mem.c
index 0dbd7435bb2..1ec4e75656b 100644
--- a/arch/mips/mips-boards/sim/sim_mem.c
+++ b/arch/mips/mips-boards/sim/sim_mem.c
@@ -117,7 +117,7 @@ unsigned long __init prom_free_prom_memory(void)
while (addr < boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size) {
ClearPageReserved(virt_to_page(__va(addr)));
- set_page_count(virt_to_page(__va(addr)), 1);
+ init_page_count(virt_to_page(__va(addr)));
free_page((unsigned long)__va(addr));
addr += PAGE_SIZE;
freed += PAGE_SIZE;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 0ff9a348b84..52f7d59fe61 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -54,7 +54,8 @@ unsigned long empty_zero_page, zero_page_mask;
*/
unsigned long setup_zero_pages(void)
{
- unsigned long order, size;
+ unsigned int order;
+ unsigned long size;
struct page *page;
if (cpu_has_vce)
@@ -67,9 +68,9 @@ unsigned long setup_zero_pages(void)
panic("Oh boy, that early out of memory?");
page = virt_to_page(empty_zero_page);
+ split_page(page, order);
while (page < virt_to_page(empty_zero_page + (PAGE_SIZE << order))) {
SetPageReserved(page);
- set_page_count(page, 1);
page++;
}
@@ -244,7 +245,7 @@ void __init mem_init(void)
#ifdef CONFIG_LIMITED_DMA
set_page_address(page, lowmem_page_address(page));
#endif
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalhigh_pages++;
}
@@ -291,7 +292,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
@@ -314,7 +315,7 @@ void free_initmem(void)
page = addr;
#endif
ClearPageReserved(virt_to_page(page));
- set_page_count(virt_to_page(page), 1);
+ init_page_count(virt_to_page(page));
free_page(page);
totalram_pages++;
freed += PAGE_SIZE;
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index ed93a979295..e0d095daa5e 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -559,7 +559,7 @@ void __init mem_init(void)
/* if (!page_is_ram(pgnr)) continue; */
/* commented out until page_is_ram works */
ClearPageReserved(p);
- set_page_count(p, 1);
+ init_page_count(p);
__free_page(p);
totalram_pages++;
}
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 7847ca13d6c..852eda3953d 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -398,7 +398,7 @@ void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
num_physpages++;
totalram_pages++;
@@ -1018,7 +1018,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
num_physpages++;
totalram_pages++;
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index b51bb28c054..7370f9f33e2 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -133,21 +133,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
return __pte(old);
}
-/*
- * This function checks for proper alignment of input addr and len parameters.
- */
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
-{
- if (len & ~HPAGE_MASK)
- return -EINVAL;
- if (addr & ~HPAGE_MASK)
- return -EINVAL;
- if (! (within_hugepage_low_range(addr, len)
- || within_hugepage_high_range(addr, len)) )
- return -EINVAL;
- return 0;
-}
-
struct slb_flush_info {
struct mm_struct *mm;
u16 newareas;
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 7d0d75c1184..b57fb3a2b7b 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -216,7 +216,7 @@ static void free_sec(unsigned long start, unsigned long end, const char *name)
while (start < end) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
cnt++;
start += PAGE_SIZE;
@@ -248,7 +248,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 81cfb0c2ec5..bacb71c8981 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -140,7 +140,7 @@ void free_initmem(void)
for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
memset((void *)addr, 0xcc, PAGE_SIZE);
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
@@ -155,7 +155,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 550517c2dd4..454cac01d8c 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -108,8 +108,8 @@ EXPORT_SYMBOL(phys_mem_access_prot);
void online_page(struct page *page)
{
ClearPageReserved(page);
- set_page_count(page, 0);
- free_cold_page(page);
+ init_page_count(page);
+ __free_page(page);
totalram_pages++;
num_physpages++;
}
@@ -376,7 +376,7 @@ void __init mem_init(void)
struct page *page = pfn_to_page(pfn);
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalhigh_pages++;
}
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index b33a4443f5a..fec8e65b36e 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -115,7 +115,7 @@ static void __init cell_spuprop_present(struct device_node *spe,
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
struct page *page = pfn_to_page(pfn);
set_page_links(page, ZONE_DMA, node_id, pfn);
- set_page_count(page, 1);
+ init_page_count(page);
reset_page_mapcount(page);
SetPageReserved(page);
INIT_LIST_HEAD(&page->lru);
diff --git a/arch/ppc/kernel/dma-mapping.c b/arch/ppc/kernel/dma-mapping.c
index 685fd0defe2..61465ec88bc 100644
--- a/arch/ppc/kernel/dma-mapping.c
+++ b/arch/ppc/kernel/dma-mapping.c
@@ -223,6 +223,8 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
struct page *end = page + (1 << order);
+ split_page(page, order);
+
/*
* Set the "dma handle"
*/
@@ -231,7 +233,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
do {
BUG_ON(!pte_none(*pte));
- set_page_count(page, 1);
SetPageReserved(page);
set_pte_at(&init_mm, vaddr,
pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL)));
@@ -244,7 +245,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
* Free the otherwise unused pages.
*/
while (page < end) {
- set_page_count(page, 1);
__free_page(page);
page++;
}
diff --git a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c
index 134db5c0420..cb1c294fb93 100644
--- a/arch/ppc/mm/init.c
+++ b/arch/ppc/mm/init.c
@@ -140,7 +140,7 @@ static void free_sec(unsigned long start, unsigned long end, const char *name)
while (start < end) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
cnt++;
start += PAGE_SIZE;
@@ -172,7 +172,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
@@ -441,7 +441,7 @@ void __init mem_init(void)
struct page *page = mem_map + pfn;
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalhigh_pages++;
}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index df953383724..a055894f3bd 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -292,7 +292,7 @@ void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
@@ -307,7 +307,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index df3a9e452cc..ee73e30263a 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -23,6 +23,7 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *handle)
page = alloc_pages(gfp, order);
if (!page)
return NULL;
+ split_page(page, order);
ret = page_address(page);
*handle = virt_to_phys(ret);
@@ -37,8 +38,6 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *handle)
end = page + (1 << order);
while (++page < end) {
- set_page_count(page, 1);
-
/* Free any unused pages */
if (page >= free) {
__free_page(page);
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 6b7a7688c98..a3568fd5150 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -84,18 +84,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
return entry;
}
-/*
- * This function checks for proper alignment of input addr and len parameters.
- */
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
-{
- if (len & ~HPAGE_MASK)
- return -EINVAL;
- if (addr & ~HPAGE_MASK)
- return -EINVAL;
- return 0;
-}
-
struct page *follow_huge_addr(struct mm_struct *mm,
unsigned long address, int write)
{
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index e342565f75f..77b4a838fe1 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -273,7 +273,7 @@ void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
@@ -286,7 +286,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
unsigned long p;
for (p = start; p < end; p += PAGE_SIZE) {
ClearPageReserved(virt_to_page(p));
- set_page_count(virt_to_page(p), 1);
+ init_page_count(virt_to_page(p));
free_page(p);
totalram_pages++;
}
diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c
index ed6a505b3ee..3d89f2a6c78 100644
--- a/arch/sh64/mm/hugetlbpage.c
+++ b/arch/sh64/mm/hugetlbpage.c
@@ -84,18 +84,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
return entry;
}
-/*
- * This function checks for proper alignment of input addr and len parameters.
- */
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
-{
- if (len & ~HPAGE_MASK)
- return -EINVAL;
- if (addr & ~HPAGE_MASK)
- return -EINVAL;
- return 0;
-}
-
struct page *follow_huge_addr(struct mm_struct *mm,
unsigned long address, int write)
{
diff --git a/arch/sh64/mm/init.c b/arch/sh64/mm/init.c
index a65e8bb2c3c..1169757fb38 100644
--- a/arch/sh64/mm/init.c
+++ b/arch/sh64/mm/init.c
@@ -173,7 +173,7 @@ void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
@@ -186,7 +186,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
unsigned long p;
for (p = start; p < end; p += PAGE_SIZE) {
ClearPageReserved(virt_to_page(p));
- set_page_count(virt_to_page(p), 1);
+ init_page_count(virt_to_page(p));
free_page(p);
totalram_pages++;
}
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
index 40d426cce82..4219dd2ce3a 100644
--- a/arch/sparc/kernel/sun4d_smp.c
+++ b/arch/sparc/kernel/sun4d_smp.c
@@ -266,19 +266,19 @@ void __init smp4d_boot_cpus(void)
/* Free unneeded trap tables */
ClearPageReserved(virt_to_page(trapbase_cpu1));
- set_page_count(virt_to_page(trapbase_cpu1), 1);
+ init_page_count(virt_to_page(trapbase_cpu1));
free_page((unsigned long)trapbase_cpu1);
totalram_pages++;
num_physpages++;
ClearPageReserved(virt_to_page(trapbase_cpu2));
- set_page_count(virt_to_page(trapbase_cpu2), 1);
+ init_page_count(virt_to_page(trapbase_cpu2));
free_page((unsigned long)trapbase_cpu2);
totalram_pages++;
num_physpages++;
ClearPageReserved(virt_to_page(trapbase_cpu3));
- set_page_count(virt_to_page(trapbase_cpu3), 1);
+ init_page_count(virt_to_page(trapbase_cpu3));
free_page((unsigned long)trapbase_cpu3);
totalram_pages++;
num_physpages++;
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index a21f27d10e5..fbbd8a474c4 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -233,21 +233,21 @@ void __init smp4m_boot_cpus(void)
/* Free unneeded trap tables */
if (!cpu_isset(i, cpu_present_map)) {
ClearPageReserved(virt_to_page(trapbase_cpu1));
- set_page_count(virt_to_page(trapbase_cpu1), 1);
+ init_page_count(virt_to_page(trapbase_cpu1));
free_page((unsigned long)trapbase_cpu1);
totalram_pages++;
num_physpages++;
}
if (!cpu_isset(2, cpu_present_map)) {
ClearPageReserved(virt_to_page(trapbase_cpu2));
- set_page_count(virt_to_page(trapbase_cpu2), 1);
+ init_page_count(virt_to_page(trapbase_cpu2));
free_page((unsigned long)trapbase_cpu2);
totalram_pages++;
num_physpages++;
}
if (!cpu_isset(3, cpu_present_map)) {
ClearPageReserved(virt_to_page(trapbase_cpu3));
- set_page_count(virt_to_page(trapbase_cpu3), 1);
+ init_page_count(virt_to_page(trapbase_cpu3));
free_page((unsigned long)trapbase_cpu3);
totalram_pages++;
num_physpages++;
diff --git a/arch/sparc/mm/generic.c b/arch/sparc/mm/generic.c
index 2cb0728cee0..1ef7fa03fef 100644
--- a/arch/sparc/mm/generic.c
+++ b/arch/sparc/mm/generic.c
@@ -76,7 +76,6 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
vma->vm_pgoff = (offset >> PAGE_SHIFT) |
((unsigned long)space << 28UL);
- prot = __pgprot(pg_iobits);
offset -= from;
dir = pgd_offset(mm, from);
flush_cache_range(vma, beg, end);
diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c
index c03babaa049..89866973246 100644
--- a/arch/sparc/mm/init.c
+++ b/arch/sparc/mm/init.c
@@ -383,7 +383,7 @@ void map_high_region(unsigned long start_pfn, unsigned long end_pfn)
struct page *page = pfn_to_page(tmp);
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalhigh_pages++;
}
@@ -480,7 +480,7 @@ void free_initmem (void)
p = virt_to_page(addr);
ClearPageReserved(p);
- set_page_count(p, 1);
+ init_page_count(p);
__free_page(p);
totalram_pages++;
num_physpages++;
@@ -497,7 +497,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
struct page *p = virt_to_page(start);
ClearPageReserved(p);
- set_page_count(p, 1);
+ init_page_count(p);
__free_page(p);
num_physpages++;
}
diff --git a/arch/sparc/mm/loadmmu.c b/arch/sparc/mm/loadmmu.c
index e9f9571601b..36b4d24988f 100644
--- a/arch/sparc/mm/loadmmu.c
+++ b/arch/sparc/mm/loadmmu.c
@@ -22,8 +22,6 @@ struct ctx_list *ctx_list_pool;
struct ctx_list ctx_free;
struct ctx_list ctx_used;
-unsigned int pg_iobits;
-
extern void ld_mmu_sun4c(void);
extern void ld_mmu_srmmu(void);
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index c664b962987..27b0e0ba858 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -2130,6 +2130,13 @@ static unsigned long srmmu_pte_to_pgoff(pte_t pte)
return pte_val(pte) >> SRMMU_PTE_FILE_SHIFT;
}
+static pgprot_t srmmu_pgprot_noncached(pgprot_t prot)
+{
+ prot &= ~__pgprot(SRMMU_CACHE);
+
+ return prot;
+}
+
/* Load up routines and constants for sun4m and sun4d mmu */
void __init ld_mmu_srmmu(void)
{
@@ -2150,9 +2157,9 @@ void __init ld_mmu_srmmu(void)
BTFIXUPSET_INT(page_readonly, pgprot_val(SRMMU_PAGE_RDONLY));
BTFIXUPSET_INT(page_kernel, pgprot_val(SRMMU_PAGE_KERNEL));
page_kernel = pgprot_val(SRMMU_PAGE_KERNEL);
- pg_iobits = SRMMU_VALID | SRMMU_WRITE | SRMMU_REF;
/* Functions */
+ BTFIXUPSET_CALL(pgprot_noncached, srmmu_pgprot_noncached, BTFIXUPCALL_NORM);
#ifndef CONFIG_SMP
BTFIXUPSET_CALL(___xchg32, ___xchg32_sun4md, BTFIXUPCALL_SWAPG1G2);
#endif
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index 731f19603ca..49f28c1bdc6 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -1589,7 +1589,10 @@ static void sun4c_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
static inline void sun4c_mapioaddr(unsigned long physaddr, unsigned long virt_addr)
{
- unsigned long page_entry;
+ unsigned long page_entry, pg_iobits;
+
+ pg_iobits = _SUN4C_PAGE_PRESENT | _SUN4C_READABLE | _SUN4C_WRITEABLE |
+ _SUN4C_PAGE_IO | _SUN4C_PAGE_NOCACHE;
page_entry = ((physaddr >> PAGE_SHIFT) & SUN4C_PFN_MASK);
page_entry |= ((pg_iobits | _SUN4C_PAGE_PRIV) & ~(_SUN4C_PAGE_PRESENT));
@@ -2134,6 +2137,13 @@ void __init sun4c_paging_init(void)
printk("SUN4C: %d mmu entries for the kernel\n", cnt);
}
+static pgprot_t sun4c_pgprot_noncached(pgprot_t prot)
+{
+ prot |= __pgprot(_SUN4C_PAGE_IO | _SUN4C_PAGE_NOCACHE);
+
+ return prot;
+}
+
/* Load up routines and constants for sun4c mmu */
void __init ld_mmu_sun4c(void)
{
@@ -2156,10 +2166,9 @@ void __init ld_mmu_sun4c(void)
BTFIXUPSET_INT(page_readonly, pgprot_val(SUN4C_PAGE_READONLY));
BTFIXUPSET_INT(page_kernel, pgprot_val(SUN4C_PAGE_KERNEL));
page_kernel = pgprot_val(SUN4C_PAGE_KERNEL);
- pg_iobits = _SUN4C_PAGE_PRESENT | _SUN4C_READABLE | _SUN4C_WRITEABLE |
- _SUN4C_PAGE_IO | _SUN4C_PAGE_NOCACHE;
/* Functions */
+ BTFIXUPSET_CALL(pgprot_noncached, sun4c_pgprot_noncached, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(___xchg32, ___xchg32_sun4c, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(do_check_pgt_cache, sun4c_check_pgt_cache, BTFIXUPCALL_NORM);
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index c3685b314d7..267afddf63c 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -175,11 +175,11 @@ config HUGETLB_PAGE_SIZE_4MB
bool "4MB"
config HUGETLB_PAGE_SIZE_512K
- depends on !SPARC64_PAGE_SIZE_4MB
+ depends on !SPARC64_PAGE_SIZE_4MB && !SPARC64_PAGE_SIZE_512KB
bool "512K"
config HUGETLB_PAGE_SIZE_64K
- depends on !SPARC64_PAGE_SIZE_4MB && !SPARC64_PAGE_SIZE_512KB
+ depends on !SPARC64_PAGE_SIZE_4MB && !SPARC64_PAGE_SIZE_512KB && !SPARC64_PAGE_SIZE_64K
bool "64K"
endchoice
diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c
index 95ffa941862..dfccff29e18 100644
--- a/arch/sparc64/kernel/pci.c
+++ b/arch/sparc64/kernel/pci.c
@@ -656,6 +656,7 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
__pci_mmap_set_flags(dev, vma, mmap_state);
__pci_mmap_set_pgprot(dev, vma, mmap_state);
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
ret = io_remap_pfn_range(vma, vma->vm_start,
vma->vm_pgoff,
vma->vm_end - vma->vm_start,
@@ -663,7 +664,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
if (ret)
return ret;
- vma->vm_flags |= VM_IO;
return 0;
}
diff --git a/arch/sparc64/kernel/sun4v_tlb_miss.S b/arch/sparc64/kernel/sun4v_tlb_miss.S
index ab23ddb7116..b731881224e 100644
--- a/arch/sparc64/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc64/kernel/sun4v_tlb_miss.S
@@ -29,15 +29,15 @@
*
* index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL;
* tsb_base = tsb_reg & ~0x7UL;
- * tsb_index = ((vaddr >> PAGE_SHIFT) & tsb_mask);
+ * tsb_index = ((vaddr >> HASH_SHIFT) & tsb_mask);
* tsb_ptr = tsb_base + (tsb_index * 16);
*/
-#define COMPUTE_TSB_PTR(TSB_PTR, VADDR, TMP1, TMP2) \
+#define COMPUTE_TSB_PTR(TSB_PTR, VADDR, HASH_SHIFT, TMP1, TMP2) \
and TSB_PTR, 0x7, TMP1; \
mov 512, TMP2; \
andn TSB_PTR, 0x7, TSB_PTR; \
sllx TMP2, TMP1, TMP2; \
- srlx VADDR, PAGE_SHIFT, TMP1; \
+ srlx VADDR, HASH_SHIFT, TMP1; \
sub TMP2, 1, TMP2; \
and TMP1, TMP2, TMP1; \
sllx TMP1, 4, TMP1; \
@@ -53,7 +53,7 @@ sun4v_itlb_miss:
LOAD_ITLB_INFO(%g2, %g4, %g5)
COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_itlb_4v)
- COMPUTE_TSB_PTR(%g1, %g4, %g3, %g7)
+ COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g3, %g7)
/* Load TSB tag/pte into %g2/%g3 and compare the tag. */
ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2
@@ -99,7 +99,7 @@ sun4v_dtlb_miss:
LOAD_DTLB_INFO(%g2, %g4, %g5)
COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_dtlb_4v)
- COMPUTE_TSB_PTR(%g1, %g4, %g3, %g7)
+ COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g3, %g7)
/* Load TSB tag/pte into %g2/%g3 and compare the tag. */
ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2
@@ -171,21 +171,26 @@ sun4v_dtsb_miss:
/* fallthrough */
- /* Create TSB pointer into %g1. This is something like:
- *
- * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL;
- * tsb_base = tsb_reg & ~0x7UL;
- * tsb_index = ((vaddr >> PAGE_SHIFT) & tsb_mask);
- * tsb_ptr = tsb_base + (tsb_index * 16);
- */
sun4v_tsb_miss_common:
- COMPUTE_TSB_PTR(%g1, %g4, %g5, %g7)
+ COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g5, %g7)
- /* Branch directly to page table lookup. We have SCRATCHPAD_MMU_MISS
- * still in %g2, so it's quite trivial to get at the PGD PHYS value
- * so we can preload it into %g7.
- */
sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2
+
+#ifdef CONFIG_HUGETLB_PAGE
+ mov SCRATCHPAD_UTSBREG2, %g5
+ ldxa [%g5] ASI_SCRATCHPAD, %g5
+ cmp %g5, -1
+ be,pt %xcc, 80f
+ nop
+ COMPUTE_TSB_PTR(%g5, %g4, HPAGE_SHIFT, %g2, %g7)
+
+ /* That clobbered %g2, reload it. */
+ ldxa [%g0] ASI_SCRATCHPAD, %g2
+ sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2
+
+80: stx %g5, [%g2 + TRAP_PER_CPU_TSB_HUGE_TEMP]
+#endif
+
ba,pt %xcc, tsb_miss_page_table_walk_sun4v_fastpath
ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 7f7dba0ca96..df612e4f75f 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -2482,6 +2482,7 @@ void init_cur_cpu_trap(struct thread_info *t)
extern void thread_info_offsets_are_bolixed_dave(void);
extern void trap_per_cpu_offsets_are_bolixed_dave(void);
+extern void tsb_config_offsets_are_bolixed_dave(void);
/* Only invoked on boot processor. */
void __init trap_init(void)
@@ -2535,9 +2536,27 @@ void __init trap_init(void)
(TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) ||
(TRAP_PER_CPU_CPU_LIST_PA !=
- offsetof(struct trap_per_cpu, cpu_list_pa)))
+ offsetof(struct trap_per_cpu, cpu_list_pa)) ||
+ (TRAP_PER_CPU_TSB_HUGE !=
+ offsetof(struct trap_per_cpu, tsb_huge)) ||
+ (TRAP_PER_CPU_TSB_HUGE_TEMP !=
+ offsetof(struct trap_per_cpu, tsb_huge_temp)))
trap_per_cpu_offsets_are_bolixed_dave();
+ if ((TSB_CONFIG_TSB !=
+ offsetof(struct tsb_config, tsb)) ||
+ (TSB_CONFIG_RSS_LIMIT !=
+ offsetof(struct tsb_config, tsb_rss_limit)) ||
+ (TSB_CONFIG_NENTRIES !=
+ offsetof(struct tsb_config, tsb_nentries)) ||
+ (TSB_CONFIG_REG_VAL !=
+ offsetof(struct tsb_config, tsb_reg_val)) ||
+ (TSB_CONFIG_MAP_VADDR !=
+ offsetof(struct tsb_config, tsb_map_vaddr)) ||
+ (TSB_CONFIG_MAP_PTE !=
+ offsetof(struct tsb_config, tsb_map_pte)))
+ tsb_config_offsets_are_bolixed_dave();
+
/* Attach to the address space of init_task. On SMP we
* do this in smp.c:smp_callin for other cpus.
*/
diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S
index 118baea44f6..a0c8ba58920 100644
--- a/arch/sparc64/kernel/tsb.S
+++ b/arch/sparc64/kernel/tsb.S
@@ -3,8 +3,13 @@
* Copyright (C) 2006 David S. Miller <davem@davemloft.net>
*/
+#include <linux/config.h>
+
#include <asm/tsb.h>
#include <asm/hypervisor.h>
+#include <asm/page.h>
+#include <asm/cpudata.h>
+#include <asm/mmu.h>
.text
.align 32
@@ -34,34 +39,124 @@ tsb_miss_itlb:
ldxa [%g4] ASI_IMMU, %g4
/* At this point we have:
- * %g1 -- TSB entry address
+ * %g1 -- PAGE_SIZE TSB entry address
* %g3 -- FAULT_CODE_{D,I}TLB
* %g4 -- missing virtual address
* %g6 -- TAG TARGET (vaddr >> 22)
*/
tsb_miss_page_table_walk:
- TRAP_LOAD_PGD_PHYS(%g7, %g5)
+ TRAP_LOAD_TRAP_BLOCK(%g7, %g5)
- /* And now we have the PGD base physical address in %g7. */
-tsb_miss_page_table_walk_sun4v_fastpath:
- USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
+ /* Before committing to a full page table walk,
+ * check the huge page TSB.
+ */
+#ifdef CONFIG_HUGETLB_PAGE
+
+661: ldx [%g7 + TRAP_PER_CPU_TSB_HUGE], %g5
+ nop
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ mov SCRATCHPAD_UTSBREG2, %g5
+ ldxa [%g5] ASI_SCRATCHPAD, %g5
+ .previous
+
+ cmp %g5, -1
+ be,pt %xcc, 80f
+ nop
+
+ /* We need an aligned pair of registers containing 2 values
+ * which can be easily rematerialized. %g6 and %g7 foot the
+ * bill just nicely. We'll save %g6 away into %g2 for the
+ * huge page TSB TAG comparison.
+ *
+ * Perform a huge page TSB lookup.
+ */
+ mov %g6, %g2
+ and %g5, 0x7, %g6
+ mov 512, %g7
+ andn %g5, 0x7, %g5
+ sllx %g7, %g6, %g7
+ srlx %g4, HPAGE_SHIFT, %g6
+ sub %g7, 1, %g7
+ and %g6, %g7, %g6
+ sllx %g6, 4, %g6
+ add %g5, %g6, %g5
+
+ TSB_LOAD_QUAD(%g5, %g6)
+ cmp %g6, %g2
+ be,a,pt %xcc, tsb_tlb_reload
+ mov %g7, %g5
+
+ /* No match, remember the huge page TSB entry address,
+ * and restore %g6 and %g7.
+ */
+ TRAP_LOAD_TRAP_BLOCK(%g7, %g6)
+ srlx %g4, 22, %g6
+80: stx %g5, [%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP]
+
+#endif
+
+ ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7
/* At this point we have:
* %g1 -- TSB entry address
* %g3 -- FAULT_CODE_{D,I}TLB
- * %g5 -- physical address of PTE in Linux page tables
+ * %g4 -- missing virtual address
* %g6 -- TAG TARGET (vaddr >> 22)
+ * %g7 -- page table physical address
+ *
+ * We know that both the base PAGE_SIZE TSB and the HPAGE_SIZE
+ * TSB both lack a matching entry.
*/
-tsb_reload:
- TSB_LOCK_TAG(%g1, %g2, %g7)
+tsb_miss_page_table_walk_sun4v_fastpath:
+ USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
/* Load and check PTE. */
ldxa [%g5] ASI_PHYS_USE_EC, %g5
- mov 1, %g7
- sllx %g7, TSB_TAG_INVALID_BIT, %g7
- brgez,a,pn %g5, tsb_do_fault
- TSB_STORE(%g1, %g7)
+ brgez,pn %g5, tsb_do_fault
+ nop
+
+#ifdef CONFIG_HUGETLB_PAGE
+661: sethi %uhi(_PAGE_SZALL_4U), %g7
+ sllx %g7, 32, %g7
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ mov _PAGE_SZALL_4V, %g7
+ nop
+ .previous
+
+ and %g5, %g7, %g2
+
+661: sethi %uhi(_PAGE_SZHUGE_4U), %g7
+ sllx %g7, 32, %g7
+ .section .sun4v_2insn_patch, "ax"
+ .word 661b
+ mov _PAGE_SZHUGE_4V, %g7
+ nop
+ .previous
+
+ cmp %g2, %g7
+ bne,pt %xcc, 60f
+ nop
+
+ /* It is a huge page, use huge page TSB entry address we
+ * calculated above.
+ */
+ TRAP_LOAD_TRAP_BLOCK(%g7, %g2)
+ ldx [%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP], %g2
+ cmp %g2, -1
+ movne %xcc, %g2, %g1
+60:
+#endif
+ /* At this point we have:
+ * %g1 -- TSB entry address
+ * %g3 -- FAULT_CODE_{D,I}TLB
+ * %g5 -- valid PTE
+ * %g6 -- TAG TARGET (vaddr >> 22)
+ */
+tsb_reload:
+ TSB_LOCK_TAG(%g1, %g2, %g7)
TSB_WRITE(%g1, %g5, %g6)
/* Finally, load TLB and return from trap. */
@@ -240,10 +335,9 @@ tsb_flush:
* schedule() time.
*
* %o0: page table physical address
- * %o1: TSB register value
- * %o2: TSB virtual address
- * %o3: TSB mapping locked PTE
- * %o4: Hypervisor TSB descriptor physical address
+ * %o1: TSB base config pointer
+ * %o2: TSB huge config pointer, or NULL if none
+ * %o3: Hypervisor TSB descriptor physical address
*
* We have to run this whole thing with interrupts
* disabled so that the current cpu doesn't change
@@ -253,63 +347,79 @@ tsb_flush:
.globl __tsb_context_switch
.type __tsb_context_switch,#function
__tsb_context_switch:
- rdpr %pstate, %o5
- wrpr %o5, PSTATE_IE, %pstate
+ rdpr %pstate, %g1
+ wrpr %g1, PSTATE_IE, %pstate
+
+ TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
- ldub [%g6 + TI_CPU], %g1
- sethi %hi(trap_block), %g2
- sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1
- or %g2, %lo(trap_block), %g2
- add %g2, %g1, %g2
stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR]
- sethi %hi(tlb_type), %g1
- lduw [%g1 + %lo(tlb_type)], %g1
- cmp %g1, 3
- bne,pt %icc, 1f
+ ldx [%o1 + TSB_CONFIG_REG_VAL], %o0
+ brz,pt %o2, 1f
+ mov -1, %g3
+
+ ldx [%o2 + TSB_CONFIG_REG_VAL], %g3
+
+1: stx %g3, [%g2 + TRAP_PER_CPU_TSB_HUGE]
+
+ sethi %hi(tlb_type), %g2
+ lduw [%g2 + %lo(tlb_type)], %g2
+ cmp %g2, 3
+ bne,pt %icc, 50f
nop
/* Hypervisor TSB switch. */
- mov SCRATCHPAD_UTSBREG1, %g1
- stxa %o1, [%g1] ASI_SCRATCHPAD
- mov -1, %g2
- mov SCRATCHPAD_UTSBREG2, %g1
- stxa %g2, [%g1] ASI_SCRATCHPAD
-
- /* Save away %o5's %pstate, we have to use %o5 for
- * the hypervisor call.
- */
- mov %o5, %g1
+ mov SCRATCHPAD_UTSBREG1, %o5
+ stxa %o0, [%o5] ASI_SCRATCHPAD
+ mov SCRATCHPAD_UTSBREG2, %o5
+ stxa %g3, [%o5] ASI_SCRATCHPAD
+
+ mov 2, %o0
+ cmp %g3, -1
+ move %xcc, 1, %o0
mov HV_FAST_MMU_TSB_CTXNON0, %o5
- mov 1, %o0
- mov %o4, %o1
+ mov %o3, %o1
ta HV_FAST_TRAP
- /* Finish up and restore %o5. */
+ /* Finish up. */
ba,pt %xcc, 9f
- mov %g1, %o5
+ nop
/* SUN4U TSB switch. */
-1: mov TSB_REG, %g1
- stxa %o1, [%g1] ASI_DMMU
+50: mov TSB_REG, %o5
+ stxa %o0, [%o5] ASI_DMMU
membar #Sync
- stxa %o1, [%g1] ASI_IMMU
+ stxa %o0, [%o5] ASI_IMMU
membar #Sync
-2: brz %o2, 9f
- nop
+2: ldx [%o1 + TSB_CONFIG_MAP_VADDR], %o4
+ brz %o4, 9f
+ ldx [%o1 + TSB_CONFIG_MAP_PTE], %o5
sethi %hi(sparc64_highest_unlocked_tlb_ent), %g2
- mov TLB_TAG_ACCESS, %g1
+ mov TLB_TAG_ACCESS, %g3
lduw [%g2 + %lo(sparc64_highest_unlocked_tlb_ent)], %g2
- stxa %o2, [%g1] ASI_DMMU
+ stxa %o4, [%g3] ASI_DMMU
membar #Sync
sllx %g2, 3, %g2
- stxa %o3, [%g2] ASI_DTLB_DATA_ACCESS
+ stxa %o5, [%g2] ASI_DTLB_DATA_ACCESS
+ membar #Sync
+
+ brz,pt %o2, 9f
+ nop
+
+ ldx [%o2 + TSB_CONFIG_MAP_VADDR], %o4
+ ldx [%o2 + TSB_CONFIG_MAP_PTE], %o5
+ mov TLB_TAG_ACCESS, %g3
+ stxa %o4, [%g3] ASI_DMMU
+ membar #Sync
+ sub %g2, (1 << 3), %g2
+ stxa %o5, [%g2] ASI_DTLB_DATA_ACCESS
membar #Sync
+
9:
- wrpr %o5, %pstate
+ wrpr %g1, %pstate
retl
nop
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index 63b6cc0cd5d..d21ff3230c0 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -410,9 +410,18 @@ good_area:
up_read(&mm->mmap_sem);
mm_rss = get_mm_rss(mm);
- if (unlikely(mm_rss >= mm->context.tsb_rss_limit))
- tsb_grow(mm, mm_rss);
-
+#ifdef CONFIG_HUGETLB_PAGE
+ mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE));
+#endif
+ if (unlikely(mm_rss >=
+ mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
+ tsb_grow(mm, MM_TSB_BASE, mm_rss);
+#ifdef CONFIG_HUGETLB_PAGE
+ mm_rss = mm->context.huge_pte_count;
+ if (unlikely(mm_rss >=
+ mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit))
+ tsb_grow(mm, MM_TSB_HUGE, mm_rss);
+#endif
return;
/*
diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c
index 5fc5c579e35..8cb06205d26 100644
--- a/arch/sparc64/mm/generic.c
+++ b/arch/sparc64/mm/generic.c
@@ -140,7 +140,6 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
vma->vm_pgoff = phys_base >> PAGE_SHIFT;
- prot = __pgprot(pg_iobits);
offset -= from;
dir = pgd_offset(mm, from);
flush_cache_range(vma, beg, end);
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index a7a24869d04..074620d413d 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -199,13 +199,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
- if (pgd) {
- pud = pud_offset(pgd, addr);
- if (pud) {
- pmd = pmd_alloc(mm, pud, addr);
- if (pmd)
- pte = pte_alloc_map(mm, pmd, addr);
- }
+ pud = pud_alloc(mm, pgd, addr);
+ if (pud) {
+ pmd = pmd_alloc(mm, pud, addr);
+ if (pmd)
+ pte = pte_alloc_map(mm, pmd, addr);
}
return pte;
}
@@ -231,13 +229,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
return pte;
}
-#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
-
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t entry)
{
int i;
+ if (!pte_present(*ptep) && pte_present(entry))
+ mm->context.huge_pte_count++;
+
for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
set_pte_at(mm, addr, ptep, entry);
ptep++;
@@ -253,6 +252,8 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
int i;
entry = *ptep;
+ if (pte_present(entry))
+ mm->context.huge_pte_count--;
for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
pte_clear(mm, addr, ptep);
@@ -263,18 +264,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
return entry;
}
-/*
- * This function checks for proper alignment of input addr and len parameters.
- */
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
-{
- if (len & ~HPAGE_MASK)
- return -EINVAL;
- if (addr & ~HPAGE_MASK)
- return -EINVAL;
- return 0;
-}
-
struct page *follow_huge_addr(struct mm_struct *mm,
unsigned long address, int write)
{
@@ -302,6 +291,15 @@ static void context_reload(void *__data)
void hugetlb_prefault_arch_hook(struct mm_struct *mm)
{
+ struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
+
+ if (likely(tp->tsb != NULL))
+ return;
+
+ tsb_grow(mm, MM_TSB_HUGE, 0);
+ tsb_context_switch(mm);
+ smp_tsb_sync(mm);
+
/* On UltraSPARC-III+ and later, configure the second half of
* the Data-TLB for huge pages.
*/
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index c2b556106fc..ded63ee9c4f 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -283,6 +283,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p
struct mm_struct *mm;
struct tsb *tsb;
unsigned long tag, flags;
+ unsigned long tsb_index, tsb_hash_shift;
if (tlb_type != hypervisor) {
unsigned long pfn = pte_pfn(pte);
@@ -312,10 +313,26 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p
mm = vma->vm_mm;
+ tsb_index = MM_TSB_BASE;
+ tsb_hash_shift = PAGE_SHIFT;
+
spin_lock_irqsave(&mm->context.lock, flags);
- tsb = &mm->context.tsb[(address >> PAGE_SHIFT) &
- (mm->context.tsb_nentries - 1UL)];
+#ifdef CONFIG_HUGETLB_PAGE
+ if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) {
+ if ((tlb_type == hypervisor &&
+ (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
+ (tlb_type != hypervisor &&
+ (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U)) {
+ tsb_index = MM_TSB_HUGE;
+ tsb_hash_shift = HPAGE_SHIFT;
+ }
+ }
+#endif
+
+ tsb = mm->context.tsb_block[tsb_index].tsb;
+ tsb += ((address >> tsb_hash_shift) &
+ (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
tag = (address >> 22UL);
tsb_insert(tsb, tag, pte_val(pte));
@@ -1461,7 +1478,7 @@ void free_initmem(void)
p = virt_to_page(page);
ClearPageReserved(p);
- set_page_count(p, 1);
+ init_page_count(p);
__free_page(p);
num_physpages++;
totalram_pages++;
@@ -1477,7 +1494,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
struct page *p = virt_to_page(start);
ClearPageReserved(p);
- set_page_count(p, 1);
+ init_page_count(p);
__free_page(p);
num_physpages++;
totalram_pages++;
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
index b2064e2a44d..beaa02810f0 100644
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -15,9 +15,9 @@
extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
-static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long nentries)
+static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries)
{
- vaddr >>= PAGE_SHIFT;
+ vaddr >>= hash_shift;
return vaddr & (nentries - 1);
}
@@ -36,7 +36,8 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
unsigned long v;
for (v = start; v < end; v += PAGE_SIZE) {
- unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES);
+ unsigned long hash = tsb_hash(v, PAGE_SHIFT,
+ KERNEL_TSB_NENTRIES);
struct tsb *ent = &swapper_tsb[hash];
if (tag_compare(ent->tag, v)) {
@@ -46,49 +47,91 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
}
}
-void flush_tsb_user(struct mmu_gather *mp)
+static void __flush_tsb_one(struct mmu_gather *mp, unsigned long hash_shift, unsigned long tsb, unsigned long nentries)
{
- struct mm_struct *mm = mp->mm;
- unsigned long nentries, base, flags;
- struct tsb *tsb;
- int i;
-
- spin_lock_irqsave(&mm->context.lock, flags);
-
- tsb = mm->context.tsb;
- nentries = mm->context.tsb_nentries;
+ unsigned long i;
- if (tlb_type == cheetah_plus || tlb_type == hypervisor)
- base = __pa(tsb);
- else
- base = (unsigned long) tsb;
-
for (i = 0; i < mp->tlb_nr; i++) {
unsigned long v = mp->vaddrs[i];
unsigned long tag, ent, hash;
v &= ~0x1UL;
- hash = tsb_hash(v, nentries);
- ent = base + (hash * sizeof(struct tsb));
+ hash = tsb_hash(v, hash_shift, nentries);
+ ent = tsb + (hash * sizeof(struct tsb));
tag = (v >> 22UL);
tsb_flush(ent, tag);
}
+}
+
+void flush_tsb_user(struct mmu_gather *mp)
+{
+ struct mm_struct *mm = mp->mm;
+ unsigned long nentries, base, flags;
+
+ spin_lock_irqsave(&mm->context.lock, flags);
+ base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
+ nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
+ if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+ base = __pa(base);
+ __flush_tsb_one(mp, PAGE_SHIFT, base, nentries);
+
+#ifdef CONFIG_HUGETLB_PAGE
+ if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
+ base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
+ nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
+ if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+ base = __pa(base);
+ __flush_tsb_one(mp, HPAGE_SHIFT, base, nentries);
+ }
+#endif
spin_unlock_irqrestore(&mm->context.lock, flags);
}
-static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
+#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
+#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K
+#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K
+#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
+#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_64K
+#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_64K
+#elif defined(CONFIG_SPARC64_PAGE_SIZE_512KB)
+#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_512K
+#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_512K
+#elif defined(CONFIG_SPARC64_PAGE_SIZE_4MB)
+#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_4MB
+#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_4MB
+#else
+#error Broken base page size setting...
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_64K
+#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_64K
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_512K
+#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_512K
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
+#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB
+#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB
+#else
+#error Broken huge page size setting...
+#endif
+#endif
+
+static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
{
unsigned long tsb_reg, base, tsb_paddr;
unsigned long page_sz, tte;
- mm->context.tsb_nentries = tsb_bytes / sizeof(struct tsb);
+ mm->context.tsb_block[tsb_idx].tsb_nentries =
+ tsb_bytes / sizeof(struct tsb);
base = TSBMAP_BASE;
tte = pgprot_val(PAGE_KERNEL_LOCKED);
- tsb_paddr = __pa(mm->context.tsb);
+ tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb);
BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
/* Use the smallest page size that can map the whole TSB
@@ -147,61 +190,49 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
/* Physical mapping, no locked TLB entry for TSB. */
tsb_reg |= tsb_paddr;
- mm->context.tsb_reg_val = tsb_reg;
- mm->context.tsb_map_vaddr = 0;
- mm->context.tsb_map_pte = 0;
+ mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
+ mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0;
+ mm->context.tsb_block[tsb_idx].tsb_map_pte = 0;
} else {
tsb_reg |= base;
tsb_reg |= (tsb_paddr & (page_sz - 1UL));
tte |= (tsb_paddr & ~(page_sz - 1UL));
- mm->context.tsb_reg_val = tsb_reg;
- mm->context.tsb_map_vaddr = base;
- mm->context.tsb_map_pte = tte;
+ mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
+ mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base;
+ mm->context.tsb_block[tsb_idx].tsb_map_pte = tte;
}
/* Setup the Hypervisor TSB descriptor. */
if (tlb_type == hypervisor) {
- struct hv_tsb_descr *hp = &mm->context.tsb_descr;
+ struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx];
- switch (PAGE_SIZE) {
- case 8192:
- default:
- hp->pgsz_idx = HV_PGSZ_IDX_8K;
+ switch (tsb_idx) {
+ case MM_TSB_BASE:
+ hp->pgsz_idx = HV_PGSZ_IDX_BASE;
break;
-
- case 64 * 1024:
- hp->pgsz_idx = HV_PGSZ_IDX_64K;
- break;
-
- case 512 * 1024:
- hp->pgsz_idx = HV_PGSZ_IDX_512K;
- break;
-
- case 4 * 1024 * 1024:
- hp->pgsz_idx = HV_PGSZ_IDX_4MB;
+#ifdef CONFIG_HUGETLB_PAGE
+ case MM_TSB_HUGE:
+ hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
break;
+#endif
+ default:
+ BUG();
};
hp->assoc = 1;
hp->num_ttes = tsb_bytes / 16;
hp->ctx_idx = 0;
- switch (PAGE_SIZE) {
- case 8192:
- default:
- hp->pgsz_mask = HV_PGSZ_MASK_8K;
- break;
-
- case 64 * 1024:
- hp->pgsz_mask = HV_PGSZ_MASK_64K;
- break;
-
- case 512 * 1024:
- hp->pgsz_mask = HV_PGSZ_MASK_512K;
+ switch (tsb_idx) {
+ case MM_TSB_BASE:
+ hp->pgsz_mask = HV_PGSZ_MASK_BASE;
break;
-
- case 4 * 1024 * 1024:
- hp->pgsz_mask = HV_PGSZ_MASK_4MB;
+#ifdef CONFIG_HUGETLB_PAGE
+ case MM_TSB_HUGE:
+ hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
break;
+#endif
+ default:
+ BUG();
};
hp->tsb_base = tsb_paddr;
hp->resv = 0;
@@ -241,11 +272,11 @@ void __init tsb_cache_init(void)
}
}
-/* When the RSS of an address space exceeds mm->context.tsb_rss_limit,
- * do_sparc64_fault() invokes this routine to try and grow the TSB.
+/* When the RSS of an address space exceeds tsb_rss_limit for a TSB,
+ * do_sparc64_fault() invokes this routine to try and grow it.
*
* When we reach the maximum TSB size supported, we stick ~0UL into
- * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache()
+ * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault()
* will not trigger any longer.
*
* The TSB can be anywhere from 8K to 1MB in size, in increasing powers
@@ -257,7 +288,7 @@ void __init tsb_cache_init(void)
* the number of entries that the current TSB can hold at once. Currently,
* we trigger when the RSS hits 3/4 of the TSB capacity.
*/
-void tsb_grow(struct mm_struct *mm, unsigned long rss)
+void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
{
unsigned long max_tsb_size = 1 * 1024 * 1024;
unsigned long new_size, old_size, flags;
@@ -297,7 +328,8 @@ retry_tsb_alloc:
* down to a 0-order allocation and force no TSB
* growing for this address space.
*/
- if (mm->context.tsb == NULL && new_cache_index > 0) {
+ if (mm->context.tsb_block[tsb_index].tsb == NULL &&
+ new_cache_index > 0) {
new_cache_index = 0;
new_size = 8192;
new_rss_limit = ~0UL;
@@ -307,8 +339,8 @@ retry_tsb_alloc:
/* If we failed on a TSB grow, we are under serious
* memory pressure so don't try to grow any more.
*/
- if (mm->context.tsb != NULL)
- mm->context.tsb_rss_limit = ~0UL;
+ if (mm->context.tsb_block[tsb_index].tsb != NULL)
+ mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL;
return;
}
@@ -339,23 +371,26 @@ retry_tsb_alloc:
*/
spin_lock_irqsave(&mm->context.lock, flags);
- old_tsb = mm->context.tsb;
- old_cache_index = (mm->context.tsb_reg_val & 0x7UL);
- old_size = mm->context.tsb_nentries * sizeof(struct tsb);
+ old_tsb = mm->context.tsb_block[tsb_index].tsb;
+ old_cache_index =
+ (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL);
+ old_size = (mm->context.tsb_block[tsb_index].tsb_nentries *
+ sizeof(struct tsb));
/* Handle multiple threads trying to grow the TSB at the same time.
* One will get in here first, and bump the size and the RSS limit.
* The others will get in here next and hit this check.
*/
- if (unlikely(old_tsb && (rss < mm->context.tsb_rss_limit))) {
+ if (unlikely(old_tsb &&
+ (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) {
spin_unlock_irqrestore(&mm->context.lock, flags);
kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
return;
}
- mm->context.tsb_rss_limit = new_rss_limit;
+ mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit;
if (old_tsb) {
extern void copy_tsb(unsigned long old_tsb_base,
@@ -372,8 +407,8 @@ retry_tsb_alloc:
copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
}
- mm->context.tsb = new_tsb;
- setup_tsb_params(mm, new_size);
+ mm->context.tsb_block[tsb_index].tsb = new_tsb;
+ setup_tsb_params(mm, tsb_index, new_size);
spin_unlock_irqrestore(&mm->context.lock, flags);
@@ -394,40 +429,65 @@ retry_tsb_alloc:
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
+#ifdef CONFIG_HUGETLB_PAGE
+ unsigned long huge_pte_count;
+#endif
+ unsigned int i;
+
spin_lock_init(&mm->context.lock);
mm->context.sparc64_ctx_val = 0UL;
+#ifdef CONFIG_HUGETLB_PAGE
+ /* We reset it to zero because the fork() page copying
+ * will re-increment the counters as the parent PTEs are
+ * copied into the child address space.
+ */
+ huge_pte_count = mm->context.huge_pte_count;
+ mm->context.huge_pte_count = 0;
+#endif
+
/* copy_mm() copies over the parent's mm_struct before calling
* us, so we need to zero out the TSB pointer or else tsb_grow()
* will be confused and think there is an older TSB to free up.
*/
- mm->context.tsb = NULL;
+ for (i = 0; i < MM_NUM_TSBS; i++)
+ mm->context.tsb_block[i].tsb = NULL;
/* If this is fork, inherit the parent's TSB size. We would
* grow it to that size on the first page fault anyways.
*/
- tsb_grow(mm, get_mm_rss(mm));
+ tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
- if (unlikely(!mm->context.tsb))
+#ifdef CONFIG_HUGETLB_PAGE
+ if (unlikely(huge_pte_count))
+ tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
+#endif
+
+ if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
return -ENOMEM;
return 0;
}
-void destroy_context(struct mm_struct *mm)
+static void tsb_destroy_one(struct tsb_config *tp)
{
- unsigned long flags, cache_index;
+ unsigned long cache_index;
- cache_index = (mm->context.tsb_reg_val & 0x7UL);
- kmem_cache_free(tsb_caches[cache_index], mm->context.tsb);
+ if (!tp->tsb)
+ return;
+ cache_index = tp->tsb_reg_val & 0x7UL;
+ kmem_cache_free(tsb_caches[cache_index], tp->tsb);
+ tp->tsb = NULL;
+ tp->tsb_reg_val = 0UL;
+}
- /* We can remove these later, but for now it's useful
- * to catch any bogus post-destroy_context() references
- * to the TSB.
- */
- mm->context.tsb = NULL;
- mm->context.tsb_reg_val = 0UL;
+void destroy_context(struct mm_struct *mm)
+{
+ unsigned long flags, i;
+
+ for (i = 0; i < MM_NUM_TSBS; i++)
+ tsb_destroy_one(&mm->context.tsb_block[i]);
spin_lock_irqsave(&ctx_alloc_lock, flags);
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index fa4f915be5c..92cce96b5e2 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -57,7 +57,7 @@ static void setup_highmem(unsigned long highmem_start,
for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){
page = &mem_map[highmem_pfn + i];
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
}
}
@@ -296,7 +296,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
(end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index 544665e0451..0e65340eee3 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -279,7 +279,7 @@ int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem)
for(i = 0; i < total_pages; i++){
p = &map[i];
- set_page_count(p, 0);
+ memset(p, 0, sizeof(struct page));
SetPageReserved(p);
INIT_LIST_HEAD(&p->lru);
}
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 3080f84bf7b..ee5ce3d3cbc 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -477,7 +477,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
return IRQ_HANDLED;
}
-static unsigned int cyc2ns_scale;
+static unsigned int cyc2ns_scale __read_mostly;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
static inline void set_cyc2ns_scale(unsigned long cpu_khz)
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index 3496abc8d37..c9dc7e46731 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -124,6 +124,7 @@ extern void * __memcpy(void *,const void *,__kernel_size_t);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 7af1742aa95..40ed13d263c 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -486,7 +486,7 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size)
void online_page(struct page *page)
{
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalram_pages++;
num_physpages++;
@@ -592,7 +592,7 @@ void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
free_page(addr);
totalram_pages++;
@@ -632,7 +632,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 35f1f1aab06..531ad21447b 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -45,6 +45,13 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
pte_t *pbase;
if (!base)
return NULL;
+ /*
+ * page_private is used to track the number of entries in
+ * the page table page have non standard attributes.
+ */
+ SetPagePrivate(base);
+ page_private(base) = 0;
+
address = __pa(address);
addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
@@ -77,26 +84,12 @@ static inline void flush_map(unsigned long address)
on_each_cpu(flush_kernel_map, (void *)address, 1, 1);
}
-struct deferred_page {
- struct deferred_page *next;
- struct page *fpage;
- unsigned long address;
-};
-static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */
+static struct page *deferred_pages; /* protected by init_mm.mmap_sem */
-static inline void save_page(unsigned long address, struct page *fpage)
+static inline void save_page(struct page *fpage)
{
- struct deferred_page *df;
- df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL);
- if (!df) {
- flush_map(address);
- __free_page(fpage);
- } else {
- df->next = df_list;
- df->fpage = fpage;
- df->address = address;
- df_list = df;
- }
+ fpage->lru.next = (struct list_head *)deferred_pages;
+ deferred_pages = fpage;
}
/*
@@ -138,8 +131,8 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
set_pte(kpte, pfn_pte(pfn, prot));
} else {
/*
- * split_large_page will take the reference for this change_page_attr
- * on the split page.
+ * split_large_page will take the reference for this
+ * change_page_attr on the split page.
*/
struct page *split;
@@ -151,23 +144,20 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
set_pte(kpte,mk_pte(split, ref_prot2));
kpte_page = split;
}
- get_page(kpte_page);
+ page_private(kpte_page)++;
} else if ((kpte_flags & _PAGE_PSE) == 0) {
set_pte(kpte, pfn_pte(pfn, ref_prot));
- __put_page(kpte_page);
+ BUG_ON(page_private(kpte_page) == 0);
+ page_private(kpte_page)--;
} else
BUG();
/* on x86-64 the direct mapping set at boot is not using 4k pages */
BUG_ON(PageReserved(kpte_page));
- switch (page_count(kpte_page)) {
- case 1:
- save_page(address, kpte_page);
+ if (page_private(kpte_page) == 0) {
+ save_page(kpte_page);
revert_page(address, ref_prot);
- break;
- case 0:
- BUG(); /* memleak and failed 2M page regeneration */
}
return 0;
}
@@ -220,17 +210,18 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
void global_flush_tlb(void)
{
- struct deferred_page *df, *next_df;
+ struct page *dpage;
down_read(&init_mm.mmap_sem);
- df = xchg(&df_list, NULL);
+ dpage = xchg(&deferred_pages, NULL);
up_read(&init_mm.mmap_sem);
- flush_map((df && !df->next) ? df->address : 0);
- for (; df; df = next_df) {
- next_df = df->next;
- if (df->fpage)
- __free_page(df->fpage);
- kfree(df);
+
+ flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0);
+ while (dpage) {
+ struct page *tmp = dpage;
+ dpage = (struct page *)dpage->lru.next;
+ ClearPagePrivate(tmp);
+ __free_page(tmp);
}
}
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 5a91d6c9e66..e1be4235f36 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -272,7 +272,7 @@ free_reserved_mem(void *start, void *end)
{
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page((unsigned long)start);
totalram_pages++;
}
diff --git a/arch/xtensa/mm/pgtable.c b/arch/xtensa/mm/pgtable.c
index e5e119c820e..7d28914d11c 100644
--- a/arch/xtensa/mm/pgtable.c
+++ b/arch/xtensa/mm/pgtable.c
@@ -14,25 +14,21 @@
pte_t* pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte, p;
+ pte_t *pte = NULL, *p;
int color = ADDR_COLOR(address);
int i;
p = (pte_t*) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, COLOR_ORDER);
if (likely(p)) {
- struct page *page;
-
- for (i = 0; i < COLOR_SIZE; i++, p++) {
- page = virt_to_page(pte);
-
- set_page_count(page, 1);
- ClearPageCompound(page);
+ split_page(virt_to_page(p), COLOR_ORDER);
+ for (i = 0; i < COLOR_SIZE; i++) {
if (ADDR_COLOR(p) == color)
pte = p;
else
free_page(p);
+ p += PTRS_PER_PTE;
}
clear_page(pte);
}
@@ -49,20 +45,20 @@ int flush;
struct page* pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- struct page *page, p;
+ struct page *page = NULL, *p;
int color = ADDR_COLOR(address);
p = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
if (likely(p)) {
- for (i = 0; i < PAGE_ORDER; i++) {
- set_page_count(p, 1);
- ClearPageCompound(p);
+ split_page(p, COLOR_ORDER);
- if (PADDR_COLOR(page_address(pg)) == color)
+ for (i = 0; i < PAGE_ORDER; i++) {
+ if (PADDR_COLOR(page_address(p)) == color)
page = p;
else
- free_page(p);
+ __free_page(p);
+ p++;
}
clear_highpage(page);
}