aboutsummaryrefslogtreecommitdiff
path: root/arch/i386/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/mm')
-rw-r--r--arch/i386/mm/boot_ioremap.c1
-rw-r--r--arch/i386/mm/discontig.c30
-rw-r--r--arch/i386/mm/fault.c12
-rw-r--r--arch/i386/mm/highmem.c26
-rw-r--r--arch/i386/mm/hugetlbpage.c112
-rw-r--r--arch/i386/mm/init.c20
-rw-r--r--arch/i386/mm/pageattr.c24
-rw-r--r--arch/i386/mm/pgtable.c13
8 files changed, 189 insertions, 49 deletions
diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c
index 4de11f508c3..4de95a17a7d 100644
--- a/arch/i386/mm/boot_ioremap.c
+++ b/arch/i386/mm/boot_ioremap.c
@@ -16,6 +16,7 @@
*/
#undef CONFIG_X86_PAE
+#undef CONFIG_PARAVIRT
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index ddbdb0336f2..e0c390d6ceb 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -168,7 +168,7 @@ static void __init allocate_pgdat(int nid)
if (nid && node_has_online_mem(nid))
NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
else {
- NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT));
+ NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn));
min_low_pfn += PFN_UP(sizeof(pg_data_t));
}
}
@@ -405,3 +405,31 @@ void __init set_highmem_pages_init(int bad_ppro)
totalram_pages += totalhigh_pages;
#endif
}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int paddr_to_nid(u64 addr)
+{
+ int nid;
+ unsigned long pfn = PFN_DOWN(addr);
+
+ for_each_node(nid)
+ if (node_start_pfn[nid] <= pfn &&
+ pfn < node_end_pfn[nid])
+ return nid;
+
+ return -1;
+}
+
+/*
+ * This function is used to ask node id BEFORE memmap and mem_section's
+ * initialization (pfn_to_nid() can't be used yet).
+ * If _PXM is not defined on ACPI's DSDT, node id must be found by this.
+ */
+int memory_add_physaddr_to_nid(u64 addr)
+{
+ int nid = paddr_to_nid(addr);
+ return (nid >= 0) ? nid : 0;
+}
+
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+#endif
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index 2581575786c..aaaa4d225f7 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -22,9 +22,9 @@
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/kprobes.h>
+#include <linux/uaccess.h>
#include <asm/system.h>
-#include <asm/uaccess.h>
#include <asm/desc.h>
#include <asm/kdebug.h>
#include <asm/segment.h>
@@ -167,7 +167,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
{
unsigned long limit;
- unsigned long instr = get_segment_eip (regs, &limit);
+ unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit);
int scan_more = 1;
int prefetch = 0;
int i;
@@ -177,9 +177,9 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
unsigned char instr_hi;
unsigned char instr_lo;
- if (instr > limit)
+ if (instr > (unsigned char *)limit)
break;
- if (__get_user(opcode, (unsigned char __user *) instr))
+ if (probe_kernel_address(instr, opcode))
break;
instr_hi = opcode & 0xf0;
@@ -204,9 +204,9 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
case 0x00:
/* Prefetch instruction is 0x0F0D or 0x0F18 */
scan_more = 0;
- if (instr > limit)
+ if (instr > (unsigned char *)limit)
break;
- if (__get_user(opcode, (unsigned char __user *) instr))
+ if (probe_kernel_address(instr, opcode))
break;
prefetch = (instr_lo == 0xF) &&
(opcode == 0x0D || opcode == 0x18);
diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c
index f9f647cdbc7..e0fa6cb655a 100644
--- a/arch/i386/mm/highmem.c
+++ b/arch/i386/mm/highmem.c
@@ -32,7 +32,7 @@ void *kmap_atomic(struct page *page, enum km_type type)
unsigned long vaddr;
/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
- inc_preempt_count();
+ pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
@@ -50,26 +50,22 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
-#ifdef CONFIG_DEBUG_HIGHMEM
- if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
- dec_preempt_count();
- preempt_check_resched();
- return;
- }
-
- if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
- BUG();
-#endif
/*
* Force other mappings to Oops if they'll try to access this pte
* without first remap it. Keeping stale mappings around is a bad idea
* also, in case the page changes cacheability attributes or becomes
* a protected page in a hypervisor.
*/
- kpte_clear_flush(kmap_pte-idx, vaddr);
+ if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+ kpte_clear_flush(kmap_pte-idx, vaddr);
+ else {
+#ifdef CONFIG_DEBUG_HIGHMEM
+ BUG_ON(vaddr < PAGE_OFFSET);
+ BUG_ON(vaddr >= (unsigned long)high_memory);
+#endif
+ }
- dec_preempt_count();
- preempt_check_resched();
+ pagefault_enable();
}
/* This is the same as kmap_atomic() but can map memory that doesn't
@@ -80,7 +76,7 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
enum fixed_addresses idx;
unsigned long vaddr;
- inc_preempt_count();
+ pagefault_disable();
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 1719a8141f8..34728e4afe4 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -17,6 +17,113 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+static unsigned long page_table_shareable(struct vm_area_struct *svma,
+ struct vm_area_struct *vma,
+ unsigned long addr, pgoff_t idx)
+{
+ unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
+ svma->vm_start;
+ unsigned long sbase = saddr & PUD_MASK;
+ unsigned long s_end = sbase + PUD_SIZE;
+
+ /*
+ * match the virtual addresses, permission and the alignment of the
+ * page table page.
+ */
+ if (pmd_index(addr) != pmd_index(saddr) ||
+ vma->vm_flags != svma->vm_flags ||
+ sbase < svma->vm_start || svma->vm_end < s_end)
+ return 0;
+
+ return saddr;
+}
+
+static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
+{
+ unsigned long base = addr & PUD_MASK;
+ unsigned long end = base + PUD_SIZE;
+
+ /*
+ * check on proper vm_flags and page table alignment
+ */
+ if (vma->vm_flags & VM_MAYSHARE &&
+ vma->vm_start <= base && end <= vma->vm_end)
+ return 1;
+ return 0;
+}
+
+/*
+ * search for a shareable pmd page for hugetlb.
+ */
+static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
+{
+ struct vm_area_struct *vma = find_vma(mm, addr);
+ struct address_space *mapping = vma->vm_file->f_mapping;
+ pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
+ vma->vm_pgoff;
+ struct prio_tree_iter iter;
+ struct vm_area_struct *svma;
+ unsigned long saddr;
+ pte_t *spte = NULL;
+
+ if (!vma_shareable(vma, addr))
+ return;
+
+ spin_lock(&mapping->i_mmap_lock);
+ vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
+ if (svma == vma)
+ continue;
+
+ saddr = page_table_shareable(svma, vma, addr, idx);
+ if (saddr) {
+ spte = huge_pte_offset(svma->vm_mm, saddr);
+ if (spte) {
+ get_page(virt_to_page(spte));
+ break;
+ }
+ }
+ }
+
+ if (!spte)
+ goto out;
+
+ spin_lock(&mm->page_table_lock);
+ if (pud_none(*pud))
+ pud_populate(mm, pud, (unsigned long) spte & PAGE_MASK);
+ else
+ put_page(virt_to_page(spte));
+ spin_unlock(&mm->page_table_lock);
+out:
+ spin_unlock(&mapping->i_mmap_lock);
+}
+
+/*
+ * unmap huge page backed by shared pte.
+ *
+ * Hugetlb pte page is ref counted at the time of mapping. If pte is shared
+ * indicated by page_count > 1, unmap is achieved by clearing pud and
+ * decrementing the ref count. If count == 1, the pte page is not shared.
+ *
+ * called with vma->vm_mm->page_table_lock held.
+ *
+ * returns: 1 successfully unmapped a shared pte page
+ * 0 the underlying pte page is not shared, or it is the last user
+ */
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+ pgd_t *pgd = pgd_offset(mm, *addr);
+ pud_t *pud = pud_offset(pgd, *addr);
+
+ BUG_ON(page_count(virt_to_page(ptep)) == 0);
+ if (page_count(virt_to_page(ptep)) == 1)
+ return 0;
+
+ pud_clear(pud);
+ put_page(virt_to_page(ptep));
+ *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
+ return 1;
+}
+
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
@@ -25,8 +132,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
- if (pud)
+ if (pud) {
+ if (pud_none(*pud))
+ huge_pmd_share(mm, addr, pud);
pte = (pte_t *) pmd_alloc(mm, pud, addr);
+ }
BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
return pte;
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 167416155ee..c5c5ea700cc 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -192,8 +192,6 @@ static inline int page_kills_ppro(unsigned long pagenr)
return 0;
}
-extern int is_available_memory(efi_memory_desc_t *);
-
int page_is_ram(unsigned long pagenr)
{
int i;
@@ -285,7 +283,7 @@ void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
SetPageReserved(page);
}
-static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
+static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn)
{
free_new_highpage(page);
totalram_pages++;
@@ -302,7 +300,7 @@ static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
* has been added dynamically that would be
* onlined here is in HIGHMEM
*/
-void online_page(struct page *page)
+void __meminit online_page(struct page *page)
{
ClearPageReserved(page);
add_one_highpage_hotplug(page, page_to_pfn(page));
@@ -675,16 +673,10 @@ void __init mem_init(void)
#endif
}
-/*
- * this is for the non-NUMA, single node SMP system case.
- * Specifically, in the case of x86, we will always add
- * memory to the highmem for now.
- */
#ifdef CONFIG_MEMORY_HOTPLUG
-#ifndef CONFIG_NEED_MULTIPLE_NODES
int arch_add_memory(int nid, u64 start, u64 size)
{
- struct pglist_data *pgdata = &contig_page_data;
+ struct pglist_data *pgdata = NODE_DATA(nid);
struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -696,11 +688,11 @@ int remove_memory(u64 start, u64 size)
{
return -EINVAL;
}
-#endif
+EXPORT_SYMBOL_GPL(remove_memory);
#endif
-kmem_cache_t *pgd_cache;
-kmem_cache_t *pmd_cache;
+struct kmem_cache *pgd_cache;
+struct kmem_cache *pmd_cache;
void __init pgtable_cache_init(void)
{
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index 8564b6ae17e..ad91528bdc1 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -67,11 +67,17 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
return base;
}
-static void flush_kernel_map(void *dummy)
+static void flush_kernel_map(void *arg)
{
- /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */
- if (boot_cpu_data.x86_model >= 4)
+ unsigned long adr = (unsigned long)arg;
+
+ if (adr && cpu_has_clflush) {
+ int i;
+ for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
+ asm volatile("clflush (%0)" :: "r" (adr + i));
+ } else if (boot_cpu_data.x86_model >= 4)
wbinvd();
+
/* Flush all to work around Errata in early athlons regarding
* large page flushing.
*/
@@ -173,9 +179,9 @@ __change_page_attr(struct page *page, pgprot_t prot)
return 0;
}
-static inline void flush_map(void)
+static inline void flush_map(void *adr)
{
- on_each_cpu(flush_kernel_map, NULL, 1, 1);
+ on_each_cpu(flush_kernel_map, adr, 1, 1);
}
/*
@@ -217,9 +223,13 @@ void global_flush_tlb(void)
spin_lock_irq(&cpa_lock);
list_replace_init(&df_list, &l);
spin_unlock_irq(&cpa_lock);
- flush_map();
- list_for_each_entry_safe(pg, next, &l, lru)
+ if (!cpu_has_clflush)
+ flush_map(0);
+ list_for_each_entry_safe(pg, next, &l, lru) {
+ if (cpu_has_clflush)
+ flush_map(page_address(pg));
__free_page(pg);
+ }
}
#ifdef CONFIG_DEBUG_PAGEALLOC
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 10126e3f817..f349eaf450b 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -95,8 +95,11 @@ static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
return;
}
pte = pte_offset_kernel(pmd, vaddr);
- /* <pfn,flags> stored as-is, to permit clearing entries */
- set_pte(pte, pfn_pte(pfn, flags));
+ if (pgprot_val(flags))
+ /* <pfn,flags> stored as-is, to permit clearing entries */
+ set_pte(pte, pfn_pte(pfn, flags));
+ else
+ pte_clear(&init_mm, vaddr, pte);
/*
* It's enough to flush this one mapping.
@@ -193,7 +196,7 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
return pte;
}
-void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
+void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags)
{
memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
}
@@ -233,7 +236,7 @@ static inline void pgd_list_del(pgd_t *pgd)
set_page_private(next, (unsigned long)pprev);
}
-void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
{
unsigned long flags;
@@ -253,7 +256,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
}
/* never called when PTRS_PER_PMD > 1 */
-void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
{
unsigned long flags; /* can be called from interrupt context */