aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb.c11
-rw-r--r--mm/madvise.c13
-rw-r--r--mm/memory.c73
-rw-r--r--mm/mempolicy.c4
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/nommu.c6
-rw-r--r--mm/page_alloc.c25
8 files changed, 85 insertions, 55 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index fbd1111ea11..6bf720bc662 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -301,6 +301,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
{
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
+ pte_t *ptep;
pte_t pte;
struct page *page;
@@ -309,9 +310,17 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
BUG_ON(end & ~HPAGE_MASK);
for (address = start; address < end; address += HPAGE_SIZE) {
- pte = huge_ptep_get_and_clear(mm, address, huge_pte_offset(mm, address));
+ ptep = huge_pte_offset(mm, address);
+ if (! ptep)
+ /* This can happen on truncate, or if an
+ * mmap() is aborted due to an error before
+ * the prefault */
+ continue;
+
+ pte = huge_ptep_get_and_clear(mm, address, ptep);
if (pte_none(pte))
continue;
+
page = pte_page(pte);
put_page(page);
}
diff --git a/mm/madvise.c b/mm/madvise.c
index 73180a22877..c8c01a12fea 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -83,9 +83,6 @@ static long madvise_willneed(struct vm_area_struct * vma,
{
struct file *file = vma->vm_file;
- if (!file)
- return -EBADF;
-
if (file->f_mapping->a_ops->get_xip_page) {
/* no bad return value, but ignore advice */
return 0;
@@ -140,11 +137,16 @@ static long madvise_dontneed(struct vm_area_struct * vma,
return 0;
}
-static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
- unsigned long start, unsigned long end, int behavior)
+static long
+madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
+ unsigned long start, unsigned long end, int behavior)
{
+ struct file *filp = vma->vm_file;
long error = -EBADF;
+ if (!filp)
+ goto out;
+
switch (behavior) {
case MADV_NORMAL:
case MADV_SEQUENTIAL:
@@ -165,6 +167,7 @@ static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev
break;
}
+out:
return error;
}
diff --git a/mm/memory.c b/mm/memory.c
index beabdefa625..e046b7e4b53 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -776,8 +776,8 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
* Do a quick page-table lookup for a single page.
* mm->page_table_lock must be held.
*/
-static struct page *
-__follow_page(struct mm_struct *mm, unsigned long address, int read, int write)
+static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
+ int read, int write, int accessed)
{
pgd_t *pgd;
pud_t *pud;
@@ -818,9 +818,11 @@ __follow_page(struct mm_struct *mm, unsigned long address, int read, int write)
pfn = pte_pfn(pte);
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
- if (write && !pte_dirty(pte) && !PageDirty(page))
- set_page_dirty(page);
- mark_page_accessed(page);
+ if (accessed) {
+ if (write && !pte_dirty(pte) &&!PageDirty(page))
+ set_page_dirty(page);
+ mark_page_accessed(page);
+ }
return page;
}
}
@@ -829,16 +831,19 @@ out:
return NULL;
}
-struct page *
+inline struct page *
follow_page(struct mm_struct *mm, unsigned long address, int write)
{
- return __follow_page(mm, address, /*read*/0, write);
+ return __follow_page(mm, address, 0, write, 1);
}
-int
-check_user_page_readable(struct mm_struct *mm, unsigned long address)
+/*
+ * check_user_page_readable() can be called frm niterrupt context by oprofile,
+ * so we need to avoid taking any non-irq-safe locks
+ */
+int check_user_page_readable(struct mm_struct *mm, unsigned long address)
{
- return __follow_page(mm, address, /*read*/1, /*write*/0) != NULL;
+ return __follow_page(mm, address, 1, 0, 0) != NULL;
}
EXPORT_SYMBOL(check_user_page_readable);
@@ -908,9 +913,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
pud = pud_offset(pgd, pg);
BUG_ON(pud_none(*pud));
pmd = pmd_offset(pud, pg);
- BUG_ON(pmd_none(*pmd));
+ if (pmd_none(*pmd))
+ return i ? : -EFAULT;
pte = pte_offset_map(pmd, pg);
- BUG_ON(pte_none(*pte));
+ if (pte_none(*pte)) {
+ pte_unmap(pte);
+ return i ? : -EFAULT;
+ }
if (pages) {
pages[i] = pte_page(*pte);
get_page(pages[i]);
@@ -935,11 +944,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
}
spin_lock(&mm->page_table_lock);
do {
+ int write_access = write;
struct page *page;
- int lookup_write = write;
cond_resched_lock(&mm->page_table_lock);
- while (!(page = follow_page(mm, start, lookup_write))) {
+ while (!(page = follow_page(mm, start, write_access))) {
+ int ret;
+
/*
* Shortcut for anonymous pages. We don't want
* to force the creation of pages tables for
@@ -947,13 +958,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
* nobody touched so far. This is important
* for doing a core dump for these mappings.
*/
- if (!lookup_write &&
- untouched_anonymous_page(mm,vma,start)) {
+ if (!write && untouched_anonymous_page(mm,vma,start)) {
page = ZERO_PAGE(start);
break;
}
spin_unlock(&mm->page_table_lock);
- switch (handle_mm_fault(mm,vma,start,write)) {
+ ret = __handle_mm_fault(mm, vma, start, write_access);
+
+ /*
+ * The VM_FAULT_WRITE bit tells us that do_wp_page has
+ * broken COW when necessary, even if maybe_mkwrite
+ * decided not to set pte_write. We can thus safely do
+ * subsequent page lookups as if they were reads.
+ */
+ if (ret & VM_FAULT_WRITE)
+ write_access = 0;
+
+ switch (ret & ~VM_FAULT_WRITE) {
case VM_FAULT_MINOR:
tsk->min_flt++;
break;
@@ -967,14 +988,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
default:
BUG();
}
- /*
- * Now that we have performed a write fault
- * and surely no longer have a shared page we
- * shouldn't write, we shouldn't ignore an
- * unwritable page in the page table if
- * we are forcing write access.
- */
- lookup_write = write && !force;
spin_lock(&mm->page_table_lock);
}
if (pages) {
@@ -1224,6 +1237,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
struct page *old_page, *new_page;
unsigned long pfn = pte_pfn(pte);
pte_t entry;
+ int ret;
if (unlikely(!pfn_valid(pfn))) {
/*
@@ -1251,7 +1265,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
lazy_mmu_prot_update(entry);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
- return VM_FAULT_MINOR;
+ return VM_FAULT_MINOR|VM_FAULT_WRITE;
}
}
pte_unmap(page_table);
@@ -1278,6 +1292,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
/*
* Re-check the pte - we dropped the lock
*/
+ ret = VM_FAULT_MINOR;
spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
if (likely(pte_same(*page_table, pte))) {
@@ -1294,12 +1309,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
/* Free the old page.. */
new_page = old_page;
+ ret |= VM_FAULT_WRITE;
}
pte_unmap(page_table);
page_cache_release(new_page);
page_cache_release(old_page);
spin_unlock(&mm->page_table_lock);
- return VM_FAULT_MINOR;
+ return ret;
no_new_page:
page_cache_release(old_page);
@@ -1991,7 +2007,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
if (write_access) {
if (!pte_write(entry))
return do_wp_page(mm, vma, address, pte, pmd, entry);
-
entry = pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
@@ -2006,7 +2021,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
/*
* By the time we get here, we already hold the mm semaphore
*/
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
+int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
unsigned long address, int write_access)
{
pgd_t *pgd;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index cb41c31e7c8..b4eababc819 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -443,7 +443,7 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
struct mempolicy *new;
DECLARE_BITMAP(nodes, MAX_NUMNODES);
- if (mode > MPOL_MAX)
+ if (mode < 0 || mode > MPOL_MAX)
return -EINVAL;
err = get_nodes(nodes, nmask, maxnode, mode);
if (err)
@@ -1138,11 +1138,11 @@ void mpol_free_shared_policy(struct shared_policy *p)
while (next) {
n = rb_entry(next, struct sp_node, nd);
next = rb_next(&n->nd);
+ rb_erase(&n->nd, &p->root);
mpol_free(n->policy);
kmem_cache_free(sn_cache, n);
}
spin_unlock(&p->lock);
- p->root = RB_ROOT;
}
/* assumes fs == KERNEL_DS */
diff --git a/mm/mmap.c b/mm/mmap.c
index da3fa90a0aa..404319477e7 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -143,7 +143,11 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
leave 3% of the size of this process for other processes */
allowed -= current->mm->total_vm / 32;
- if (atomic_read(&vm_committed_space) < allowed)
+ /*
+ * cast `allowed' as a signed long because vm_committed_space
+ * sometimes has a negative value
+ */
+ if (atomic_read(&vm_committed_space) < (long)allowed)
return 0;
vm_unacct_memory(pages);
diff --git a/mm/mremap.c b/mm/mremap.c
index ec7238a78f3..fc45dc9a617 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -229,6 +229,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
* since do_munmap() will decrement it by old_len == new_len
*/
mm->total_vm += new_len >> PAGE_SHIFT;
+ __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
if (do_munmap(mm, old_addr, old_len) < 0) {
/* OOM: unable to split vma, just get accounts right */
@@ -243,7 +244,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
vma->vm_next->vm_flags |= VM_ACCOUNT;
}
- __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
mm->locked_vm += new_len >> PAGE_SHIFT;
if (new_len > old_len)
diff --git a/mm/nommu.c b/mm/nommu.c
index ce74452c02d..fd4e8df0f02 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1167,7 +1167,11 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
leave 3% of the size of this process for other processes */
allowed -= current->mm->total_vm / 32;
- if (atomic_read(&vm_committed_space) < allowed)
+ /*
+ * cast `allowed' as a signed long because vm_committed_space
+ * sometimes has a negative value
+ */
+ if (atomic_read(&vm_committed_space) < (long)allowed)
return 0;
vm_unacct_memory(pages);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1d6ba6a4b59..8d088371196 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1061,20 +1061,19 @@ unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
static unsigned int nr_free_zone_pages(int offset)
{
- pg_data_t *pgdat;
+ /* Just pick one node, since fallback list is circular */
+ pg_data_t *pgdat = NODE_DATA(numa_node_id());
unsigned int sum = 0;
- for_each_pgdat(pgdat) {
- struct zonelist *zonelist = pgdat->node_zonelists + offset;
- struct zone **zonep = zonelist->zones;
- struct zone *zone;
+ struct zonelist *zonelist = pgdat->node_zonelists + offset;
+ struct zone **zonep = zonelist->zones;
+ struct zone *zone;
- for (zone = *zonep++; zone; zone = *zonep++) {
- unsigned long size = zone->present_pages;
- unsigned long high = zone->pages_high;
- if (size > high)
- sum += size - high;
- }
+ for (zone = *zonep++; zone; zone = *zonep++) {
+ unsigned long size = zone->present_pages;
+ unsigned long high = zone->pages_high;
+ if (size > high)
+ sum += size - high;
}
return sum;
@@ -1861,7 +1860,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
unsigned long *zones_size, unsigned long *zholes_size)
{
unsigned long i, j;
- const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
int cpu, nid = pgdat->node_id;
unsigned long zone_start_pfn = pgdat->node_start_pfn;
@@ -1934,9 +1932,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
zone->zone_mem_map = pfn_to_page(zone_start_pfn);
zone->zone_start_pfn = zone_start_pfn;
- if ((zone_start_pfn) & (zone_required_alignment-1))
- printk(KERN_CRIT "BUG: wrong zone alignment, it will crash\n");
-
memmap_init(size, nid, j, zone_start_pfn);
zonetable_add(zone, nid, j, zone_start_pfn, size);