diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 11 | ||||
-rw-r--r-- | mm/madvise.c | 13 | ||||
-rw-r--r-- | mm/memory.c | 73 | ||||
-rw-r--r-- | mm/mempolicy.c | 4 | ||||
-rw-r--r-- | mm/mmap.c | 6 | ||||
-rw-r--r-- | mm/mremap.c | 2 | ||||
-rw-r--r-- | mm/nommu.c | 6 | ||||
-rw-r--r-- | mm/page_alloc.c | 25 |
8 files changed, 85 insertions, 55 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index fbd1111ea11..6bf720bc662 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -301,6 +301,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, { struct mm_struct *mm = vma->vm_mm; unsigned long address; + pte_t *ptep; pte_t pte; struct page *page; @@ -309,9 +310,17 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, BUG_ON(end & ~HPAGE_MASK); for (address = start; address < end; address += HPAGE_SIZE) { - pte = huge_ptep_get_and_clear(mm, address, huge_pte_offset(mm, address)); + ptep = huge_pte_offset(mm, address); + if (! ptep) + /* This can happen on truncate, or if an + * mmap() is aborted due to an error before + * the prefault */ + continue; + + pte = huge_ptep_get_and_clear(mm, address, ptep); if (pte_none(pte)) continue; + page = pte_page(pte); put_page(page); } diff --git a/mm/madvise.c b/mm/madvise.c index 73180a22877..c8c01a12fea 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -83,9 +83,6 @@ static long madvise_willneed(struct vm_area_struct * vma, { struct file *file = vma->vm_file; - if (!file) - return -EBADF; - if (file->f_mapping->a_ops->get_xip_page) { /* no bad return value, but ignore advice */ return 0; @@ -140,11 +137,16 @@ static long madvise_dontneed(struct vm_area_struct * vma, return 0; } -static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, - unsigned long start, unsigned long end, int behavior) +static long +madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, + unsigned long start, unsigned long end, int behavior) { + struct file *filp = vma->vm_file; long error = -EBADF; + if (!filp) + goto out; + switch (behavior) { case MADV_NORMAL: case MADV_SEQUENTIAL: @@ -165,6 +167,7 @@ static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev break; } +out: return error; } diff --git a/mm/memory.c b/mm/memory.c index beabdefa625..e046b7e4b53 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -776,8 +776,8 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, * Do a quick page-table lookup for a single page. * mm->page_table_lock must be held. */ -static struct page * -__follow_page(struct mm_struct *mm, unsigned long address, int read, int write) +static struct page *__follow_page(struct mm_struct *mm, unsigned long address, + int read, int write, int accessed) { pgd_t *pgd; pud_t *pud; @@ -818,9 +818,11 @@ __follow_page(struct mm_struct *mm, unsigned long address, int read, int write) pfn = pte_pfn(pte); if (pfn_valid(pfn)) { page = pfn_to_page(pfn); - if (write && !pte_dirty(pte) && !PageDirty(page)) - set_page_dirty(page); - mark_page_accessed(page); + if (accessed) { + if (write && !pte_dirty(pte) &&!PageDirty(page)) + set_page_dirty(page); + mark_page_accessed(page); + } return page; } } @@ -829,16 +831,19 @@ out: return NULL; } -struct page * +inline struct page * follow_page(struct mm_struct *mm, unsigned long address, int write) { - return __follow_page(mm, address, /*read*/0, write); + return __follow_page(mm, address, 0, write, 1); } -int -check_user_page_readable(struct mm_struct *mm, unsigned long address) +/* + * check_user_page_readable() can be called frm niterrupt context by oprofile, + * so we need to avoid taking any non-irq-safe locks + */ +int check_user_page_readable(struct mm_struct *mm, unsigned long address) { - return __follow_page(mm, address, /*read*/1, /*write*/0) != NULL; + return __follow_page(mm, address, 1, 0, 0) != NULL; } EXPORT_SYMBOL(check_user_page_readable); @@ -908,9 +913,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, pud = pud_offset(pgd, pg); BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, pg); - BUG_ON(pmd_none(*pmd)); + if (pmd_none(*pmd)) + return i ? : -EFAULT; pte = pte_offset_map(pmd, pg); - BUG_ON(pte_none(*pte)); + if (pte_none(*pte)) { + pte_unmap(pte); + return i ? : -EFAULT; + } if (pages) { pages[i] = pte_page(*pte); get_page(pages[i]); @@ -935,11 +944,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, } spin_lock(&mm->page_table_lock); do { + int write_access = write; struct page *page; - int lookup_write = write; cond_resched_lock(&mm->page_table_lock); - while (!(page = follow_page(mm, start, lookup_write))) { + while (!(page = follow_page(mm, start, write_access))) { + int ret; + /* * Shortcut for anonymous pages. We don't want * to force the creation of pages tables for @@ -947,13 +958,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, * nobody touched so far. This is important * for doing a core dump for these mappings. */ - if (!lookup_write && - untouched_anonymous_page(mm,vma,start)) { + if (!write && untouched_anonymous_page(mm,vma,start)) { page = ZERO_PAGE(start); break; } spin_unlock(&mm->page_table_lock); - switch (handle_mm_fault(mm,vma,start,write)) { + ret = __handle_mm_fault(mm, vma, start, write_access); + + /* + * The VM_FAULT_WRITE bit tells us that do_wp_page has + * broken COW when necessary, even if maybe_mkwrite + * decided not to set pte_write. We can thus safely do + * subsequent page lookups as if they were reads. + */ + if (ret & VM_FAULT_WRITE) + write_access = 0; + + switch (ret & ~VM_FAULT_WRITE) { case VM_FAULT_MINOR: tsk->min_flt++; break; @@ -967,14 +988,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, default: BUG(); } - /* - * Now that we have performed a write fault - * and surely no longer have a shared page we - * shouldn't write, we shouldn't ignore an - * unwritable page in the page table if - * we are forcing write access. - */ - lookup_write = write && !force; spin_lock(&mm->page_table_lock); } if (pages) { @@ -1224,6 +1237,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, struct page *old_page, *new_page; unsigned long pfn = pte_pfn(pte); pte_t entry; + int ret; if (unlikely(!pfn_valid(pfn))) { /* @@ -1251,7 +1265,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, lazy_mmu_prot_update(entry); pte_unmap(page_table); spin_unlock(&mm->page_table_lock); - return VM_FAULT_MINOR; + return VM_FAULT_MINOR|VM_FAULT_WRITE; } } pte_unmap(page_table); @@ -1278,6 +1292,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, /* * Re-check the pte - we dropped the lock */ + ret = VM_FAULT_MINOR; spin_lock(&mm->page_table_lock); page_table = pte_offset_map(pmd, address); if (likely(pte_same(*page_table, pte))) { @@ -1294,12 +1309,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, /* Free the old page.. */ new_page = old_page; + ret |= VM_FAULT_WRITE; } pte_unmap(page_table); page_cache_release(new_page); page_cache_release(old_page); spin_unlock(&mm->page_table_lock); - return VM_FAULT_MINOR; + return ret; no_new_page: page_cache_release(old_page); @@ -1991,7 +2007,6 @@ static inline int handle_pte_fault(struct mm_struct *mm, if (write_access) { if (!pte_write(entry)) return do_wp_page(mm, vma, address, pte, pmd, entry); - entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); @@ -2006,7 +2021,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, /* * By the time we get here, we already hold the mm semaphore */ -int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, +int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, int write_access) { pgd_t *pgd; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index cb41c31e7c8..b4eababc819 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -443,7 +443,7 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, struct mempolicy *new; DECLARE_BITMAP(nodes, MAX_NUMNODES); - if (mode > MPOL_MAX) + if (mode < 0 || mode > MPOL_MAX) return -EINVAL; err = get_nodes(nodes, nmask, maxnode, mode); if (err) @@ -1138,11 +1138,11 @@ void mpol_free_shared_policy(struct shared_policy *p) while (next) { n = rb_entry(next, struct sp_node, nd); next = rb_next(&n->nd); + rb_erase(&n->nd, &p->root); mpol_free(n->policy); kmem_cache_free(sn_cache, n); } spin_unlock(&p->lock); - p->root = RB_ROOT; } /* assumes fs == KERNEL_DS */ diff --git a/mm/mmap.c b/mm/mmap.c index da3fa90a0aa..404319477e7 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -143,7 +143,11 @@ int __vm_enough_memory(long pages, int cap_sys_admin) leave 3% of the size of this process for other processes */ allowed -= current->mm->total_vm / 32; - if (atomic_read(&vm_committed_space) < allowed) + /* + * cast `allowed' as a signed long because vm_committed_space + * sometimes has a negative value + */ + if (atomic_read(&vm_committed_space) < (long)allowed) return 0; vm_unacct_memory(pages); diff --git a/mm/mremap.c b/mm/mremap.c index ec7238a78f3..fc45dc9a617 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -229,6 +229,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, * since do_munmap() will decrement it by old_len == new_len */ mm->total_vm += new_len >> PAGE_SHIFT; + __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT); if (do_munmap(mm, old_addr, old_len) < 0) { /* OOM: unable to split vma, just get accounts right */ @@ -243,7 +244,6 @@ static unsigned long move_vma(struct vm_area_struct *vma, vma->vm_next->vm_flags |= VM_ACCOUNT; } - __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT); if (vm_flags & VM_LOCKED) { mm->locked_vm += new_len >> PAGE_SHIFT; if (new_len > old_len) diff --git a/mm/nommu.c b/mm/nommu.c index ce74452c02d..fd4e8df0f02 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1167,7 +1167,11 @@ int __vm_enough_memory(long pages, int cap_sys_admin) leave 3% of the size of this process for other processes */ allowed -= current->mm->total_vm / 32; - if (atomic_read(&vm_committed_space) < allowed) + /* + * cast `allowed' as a signed long because vm_committed_space + * sometimes has a negative value + */ + if (atomic_read(&vm_committed_space) < (long)allowed) return 0; vm_unacct_memory(pages); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1d6ba6a4b59..8d088371196 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1061,20 +1061,19 @@ unsigned int nr_free_pages_pgdat(pg_data_t *pgdat) static unsigned int nr_free_zone_pages(int offset) { - pg_data_t *pgdat; + /* Just pick one node, since fallback list is circular */ + pg_data_t *pgdat = NODE_DATA(numa_node_id()); unsigned int sum = 0; - for_each_pgdat(pgdat) { - struct zonelist *zonelist = pgdat->node_zonelists + offset; - struct zone **zonep = zonelist->zones; - struct zone *zone; + struct zonelist *zonelist = pgdat->node_zonelists + offset; + struct zone **zonep = zonelist->zones; + struct zone *zone; - for (zone = *zonep++; zone; zone = *zonep++) { - unsigned long size = zone->present_pages; - unsigned long high = zone->pages_high; - if (size > high) - sum += size - high; - } + for (zone = *zonep++; zone; zone = *zonep++) { + unsigned long size = zone->present_pages; + unsigned long high = zone->pages_high; + if (size > high) + sum += size - high; } return sum; @@ -1861,7 +1860,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat, unsigned long *zones_size, unsigned long *zholes_size) { unsigned long i, j; - const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1); int cpu, nid = pgdat->node_id; unsigned long zone_start_pfn = pgdat->node_start_pfn; @@ -1934,9 +1932,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat, zone->zone_mem_map = pfn_to_page(zone_start_pfn); zone->zone_start_pfn = zone_start_pfn; - if ((zone_start_pfn) & (zone_required_alignment-1)) - printk(KERN_CRIT "BUG: wrong zone alignment, it will crash\n"); - memmap_init(size, nid, j, zone_start_pfn); zonetable_add(zone, nid, j, zone_start_pfn, size); |