aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/Makefile1
-rw-r--r--mm/filemap.c26
-rw-r--r--mm/filemap_xip.c3
-rw-r--r--mm/fremap.c3
-rw-r--r--mm/hugetlb.c13
-rw-r--r--mm/madvise.c4
-rw-r--r--mm/memcontrol.c3
-rw-r--r--mm/memory.c76
-rw-r--r--mm/migrate.c4
-rw-r--r--mm/mlock.c2
-rw-r--r--mm/mm_init.c8
-rw-r--r--mm/mmap.c162
-rw-r--r--mm/mmu_notifier.c277
-rw-r--r--mm/mprotect.c3
-rw-r--r--mm/mremap.c6
-rw-r--r--mm/nommu.c21
-rw-r--r--mm/page_alloc.c21
-rw-r--r--mm/rmap.c18
-rw-r--r--mm/shmem.c7
-rw-r--r--mm/swap.c9
-rw-r--r--mm/swap_state.c8
-rw-r--r--mm/swapfile.c6
-rw-r--r--mm/truncate.c6
-rw-r--r--mm/vmscan.c8
25 files changed, 614 insertions, 84 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index efee5d379df..446c6588c75 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -208,3 +208,6 @@ config NR_QUICK
config VIRT_TO_BUS
def_bool y
depends on !ARCH_NO_VIRT_TO_BUS
+
+config MMU_NOTIFIER
+ bool
diff --git a/mm/Makefile b/mm/Makefile
index 06ca2381fef..da4ccf015ae 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SHMEM) += shmem.o
obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
obj-$(CONFIG_SLOB) += slob.o
+obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_SLUB) += slub.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
diff --git a/mm/filemap.c b/mm/filemap.c
index 5de7633e1db..54e96865085 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -558,14 +558,14 @@ EXPORT_SYMBOL(wait_on_page_bit);
* But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
*
* The first mb is necessary to safely close the critical section opened by the
- * TestSetPageLocked(), the second mb is necessary to enforce ordering between
- * the clear_bit and the read of the waitqueue (to avoid SMP races with a
- * parallel wait_on_page_locked()).
+ * test_and_set_bit() to lock the page; the second mb is necessary to enforce
+ * ordering between the clear_bit and the read of the waitqueue (to avoid SMP
+ * races with a parallel wait_on_page_locked()).
*/
void unlock_page(struct page *page)
{
smp_mb__before_clear_bit();
- if (!TestClearPageLocked(page))
+ if (!test_and_clear_bit(PG_locked, &page->flags))
BUG();
smp_mb__after_clear_bit();
wake_up_page(page, PG_locked);
@@ -931,7 +931,7 @@ grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
struct page *page = find_get_page(mapping, index);
if (page) {
- if (!TestSetPageLocked(page))
+ if (trylock_page(page))
return page;
page_cache_release(page);
return NULL;
@@ -1023,8 +1023,17 @@ find_page:
ra, filp, page,
index, last_index - index);
}
- if (!PageUptodate(page))
- goto page_not_up_to_date;
+ if (!PageUptodate(page)) {
+ if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
+ !mapping->a_ops->is_partially_uptodate)
+ goto page_not_up_to_date;
+ if (!trylock_page(page))
+ goto page_not_up_to_date;
+ if (!mapping->a_ops->is_partially_uptodate(page,
+ desc, offset))
+ goto page_not_up_to_date_locked;
+ unlock_page(page);
+ }
page_ok:
/*
* i_size must be checked after we know the page is Uptodate.
@@ -1094,6 +1103,7 @@ page_not_up_to_date:
if (lock_page_killable(page))
goto readpage_eio;
+page_not_up_to_date_locked:
/* Did it get truncated before we got the lock? */
if (!page->mapping) {
unlock_page(page);
@@ -1869,7 +1879,7 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes)
* The !iov->iov_len check ensures we skip over unlikely
* zero-length segments (without overruning the iovec).
*/
- while (bytes || unlikely(!iov->iov_len && i->count)) {
+ while (bytes || unlikely(i->count && !iov->iov_len)) {
int copy;
copy = min(bytes, iov->iov_len - base);
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 98a3f31ccd6..380ab402d71 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/uio.h>
#include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
#include <linux/sched.h>
#include <asm/tlbflush.h>
#include <asm/io.h>
@@ -188,7 +189,7 @@ __xip_unmap (struct address_space * mapping,
if (pte) {
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
- pteval = ptep_clear_flush(vma, address, pte);
+ pteval = ptep_clear_flush_notify(vma, address, pte);
page_remove_rmap(page, vma);
dec_mm_counter(mm, file_rss);
BUG_ON(pte_dirty(pteval));
diff --git a/mm/fremap.c b/mm/fremap.c
index 07a9c82ce1a..7881638e4a1 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -15,6 +15,7 @@
#include <linux/rmap.h>
#include <linux/module.h>
#include <linux/syscalls.h>
+#include <linux/mmu_notifier.h>
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
@@ -214,7 +215,9 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
spin_unlock(&mapping->i_mmap_lock);
}
+ mmu_notifier_invalidate_range_start(mm, start, start + size);
err = populate_range(mm, vma, start, size, pgoff);
+ mmu_notifier_invalidate_range_end(mm, start, start + size);
if (!err && !(flags & MAP_NONBLOCK)) {
if (unlikely(has_write_lock)) {
downgrade_write(&mm->mmap_sem);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3be79dc18c5..28a2980ee43 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/highmem.h>
+#include <linux/mmu_notifier.h>
#include <linux/nodemask.h>
#include <linux/pagemap.h>
#include <linux/mempolicy.h>
@@ -16,9 +17,10 @@
#include <linux/mutex.h>
#include <linux/bootmem.h>
#include <linux/sysfs.h>
-
+#include <asm/io.h>
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/io.h>
#include <linux/hugetlb.h>
#include "internal.h"
@@ -1281,7 +1283,12 @@ module_exit(hugetlb_exit);
static int __init hugetlb_init(void)
{
- BUILD_BUG_ON(HPAGE_SHIFT == 0);
+ /* Some platform decide whether they support huge pages at boot
+ * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when
+ * there is no such support
+ */
+ if (HPAGE_SHIFT == 0)
+ return 0;
if (!size_to_hstate(default_hstate_size)) {
default_hstate_size = HPAGE_SIZE;
@@ -1672,6 +1679,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
BUG_ON(start & ~huge_page_mask(h));
BUG_ON(end & ~huge_page_mask(h));
+ mmu_notifier_invalidate_range_start(mm, start, end);
spin_lock(&mm->page_table_lock);
for (address = start; address < end; address += sz) {
ptep = huge_pte_offset(mm, address);
@@ -1713,6 +1721,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
}
spin_unlock(&mm->page_table_lock);
flush_tlb_range(vma, start, end);
+ mmu_notifier_invalidate_range_end(mm, start, end);
list_for_each_entry_safe(page, tmp, &page_list, lru) {
list_del(&page->lru);
put_page(page);
diff --git a/mm/madvise.c b/mm/madvise.c
index 23a0ec3e0ea..f9349c18a1b 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -132,10 +132,10 @@ static long madvise_willneed(struct vm_area_struct * vma,
* Application no longer needs these pages. If the pages are dirty,
* it's OK to just throw them away. The app will be more careful about
* data it wants to keep. Be sure to free swap resources too. The
- * zap_page_range call sets things up for refill_inactive to actually free
+ * zap_page_range call sets things up for shrink_active_list to actually free
* these pages later if no one else has touched them in the meantime,
* although we could add these pages to a global reuse list for
- * refill_inactive to pick up before reclaiming other pages.
+ * shrink_active_list to pick up before reclaiming other pages.
*
* NB: This interface discards data rather than pushes it out to swap,
* as some implementations do. This has performance implications for
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fba566c5132..7056c3bdb47 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1168,9 +1168,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
mem = mem_cgroup_from_cont(cont);
old_mem = mem_cgroup_from_cont(old_cont);
- if (mem == old_mem)
- goto out;
-
/*
* Only thread group leaders are allowed to migrate, the mm_struct is
* in effect owned by the leader
diff --git a/mm/memory.c b/mm/memory.c
index a8ca04faaea..1002f473f49 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -51,6 +51,7 @@
#include <linux/init.h>
#include <linux/writeback.h>
#include <linux/memcontrol.h>
+#include <linux/mmu_notifier.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
@@ -652,6 +653,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
unsigned long next;
unsigned long addr = vma->vm_start;
unsigned long end = vma->vm_end;
+ int ret;
/*
* Don't copy ptes where a page fault will fill them correctly.
@@ -667,17 +669,33 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
if (is_vm_hugetlb_page(vma))
return copy_hugetlb_page_range(dst_mm, src_mm, vma);
+ /*
+ * We need to invalidate the secondary MMU mappings only when
+ * there could be a permission downgrade on the ptes of the
+ * parent mm. And a permission downgrade will only happen if
+ * is_cow_mapping() returns true.
+ */
+ if (is_cow_mapping(vma->vm_flags))
+ mmu_notifier_invalidate_range_start(src_mm, addr, end);
+
+ ret = 0;
dst_pgd = pgd_offset(dst_mm, addr);
src_pgd = pgd_offset(src_mm, addr);
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(src_pgd))
continue;
- if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
- vma, addr, next))
- return -ENOMEM;
+ if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+ vma, addr, next))) {
+ ret = -ENOMEM;
+ break;
+ }
} while (dst_pgd++, src_pgd++, addr = next, addr != end);
- return 0;
+
+ if (is_cow_mapping(vma->vm_flags))
+ mmu_notifier_invalidate_range_end(src_mm,
+ vma->vm_start, end);
+ return ret;
}
static unsigned long zap_pte_range(struct mmu_gather *tlb,
@@ -881,7 +899,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
unsigned long start = start_addr;
spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
int fullmm = (*tlbp)->fullmm;
+ struct mm_struct *mm = vma->vm_mm;
+ mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
unsigned long end;
@@ -946,6 +966,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
}
}
out:
+ mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
return start; /* which is now the end (or restart) address */
}
@@ -973,6 +994,29 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
return end;
}
+/**
+ * zap_vma_ptes - remove ptes mapping the vma
+ * @vma: vm_area_struct holding ptes to be zapped
+ * @address: starting address of pages to zap
+ * @size: number of bytes to zap
+ *
+ * This function only unmaps ptes assigned to VM_PFNMAP vmas.
+ *
+ * The entire address range must be fully contained within the vma.
+ *
+ * Returns 0 if successful.
+ */
+int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
+ unsigned long size)
+{
+ if (address < vma->vm_start || address + size > vma->vm_end ||
+ !(vma->vm_flags & VM_PFNMAP))
+ return -1;
+ zap_page_range(vma, address, size, NULL);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(zap_vma_ptes);
+
/*
* Do a quick page-table lookup for a single page.
*/
@@ -1616,10 +1660,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
{
pgd_t *pgd;
unsigned long next;
- unsigned long end = addr + size;
+ unsigned long start = addr, end = addr + size;
int err;
BUG_ON(addr >= end);
+ mmu_notifier_invalidate_range_start(mm, start, end);
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, end);
@@ -1627,6 +1672,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
if (err)
break;
} while (pgd++, addr = next, addr != end);
+ mmu_notifier_invalidate_range_end(mm, start, end);
return err;
}
EXPORT_SYMBOL_GPL(apply_to_page_range);
@@ -1743,7 +1789,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
* not dirty accountable.
*/
if (PageAnon(old_page)) {
- if (!TestSetPageLocked(old_page)) {
+ if (trylock_page(old_page)) {
reuse = can_share_swap_page(old_page);
unlock_page(old_page);
}
@@ -1839,7 +1885,7 @@ gotten:
* seen in the presence of one thread doing SMC and another
* thread doing COW.
*/
- ptep_clear_flush(vma, address, page_table);
+ ptep_clear_flush_notify(vma, address, page_table);
set_pte_at(mm, address, page_table, entry);
update_mmu_cache(vma, address, entry);
lru_cache_add_active(new_page);
@@ -2719,16 +2765,26 @@ int make_pages_present(unsigned long addr, unsigned long end)
vma = find_vma(current->mm, addr);
if (!vma)
- return -1;
+ return -ENOMEM;
write = (vma->vm_flags & VM_WRITE) != 0;
BUG_ON(addr >= end);
BUG_ON(end > vma->vm_end);
len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
ret = get_user_pages(current, current->mm, addr,
len, write, 0, NULL, NULL);
- if (ret < 0)
+ if (ret < 0) {
+ /*
+ SUS require strange return value to mlock
+ - invalid addr generate to ENOMEM.
+ - out of memory should generate EAGAIN.
+ */
+ if (ret == -EFAULT)
+ ret = -ENOMEM;
+ else if (ret == -ENOMEM)
+ ret = -EAGAIN;
return ret;
- return ret == len ? 0 : -1;
+ }
+ return ret == len ? 0 : -ENOMEM;
}
#if !defined(__HAVE_ARCH_GATE_AREA)
diff --git a/mm/migrate.c b/mm/migrate.c
index 153572fb60b..2a80136b23b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -605,7 +605,7 @@ static int move_to_new_page(struct page *newpage, struct page *page)
* establishing additional references. We are the only one
* holding a reference to the new page at this point.
*/
- if (TestSetPageLocked(newpage))
+ if (!trylock_page(newpage))
BUG();
/* Prepare mapping for the new page.*/
@@ -667,7 +667,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
BUG_ON(charge);
rc = -EAGAIN;
- if (TestSetPageLocked(page)) {
+ if (!trylock_page(page)) {
if (!force)
goto move_newpage;
lock_page(page);
diff --git a/mm/mlock.c b/mm/mlock.c
index 7b2656055d6..01fbe93eff5 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -78,8 +78,6 @@ success:
mm->locked_vm -= pages;
out:
- if (ret == -ENOMEM)
- ret = -EAGAIN;
return ret;
}
diff --git a/mm/mm_init.c b/mm/mm_init.c
index c6af41ea999..936ef2efd89 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -14,6 +14,10 @@
#ifdef CONFIG_DEBUG_MEMORY_INIT
int __meminitdata mminit_loglevel;
+#ifndef SECTIONS_SHIFT
+#define SECTIONS_SHIFT 0
+#endif
+
/* The zonelists are simply reported, validation is manual. */
void mminit_verify_zonelist(void)
{
@@ -74,11 +78,7 @@ void __init mminit_verify_pageflags_layout(void)
NR_PAGEFLAGS);
mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
"Section %d Node %d Zone %d\n",
-#ifdef SECTIONS_SHIFT
SECTIONS_SHIFT,
-#else
- 0,
-#endif
NODES_SHIFT,
ZONES_SHIFT);
mminit_dprintk(MMINIT_TRACE, "pageflags_layout_offsets",
diff --git a/mm/mmap.c b/mm/mmap.c
index 5e0cc99e9cd..971d0eda754 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -26,6 +26,7 @@
#include <linux/mount.h>
#include <linux/mempolicy.h>
#include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -369,7 +370,7 @@ find_vma_prepare(struct mm_struct *mm, unsigned long addr,
if (vma_tmp->vm_end > addr) {
vma = vma_tmp;
if (vma_tmp->vm_start <= addr)
- return vma;
+ break;
__rb_link = &__rb_parent->rb_left;
} else {
rb_prev = __rb_parent;
@@ -2061,6 +2062,7 @@ void exit_mmap(struct mm_struct *mm)
/* mm's last user has gone, and its about to be pulled down */
arch_exit_mmap(mm);
+ mmu_notifier_release(mm);
lru_add_drain();
flush_cache_mm(mm);
@@ -2268,3 +2270,161 @@ int install_special_mapping(struct mm_struct *mm,
return 0;
}
+
+static DEFINE_MUTEX(mm_all_locks_mutex);
+
+static void vm_lock_anon_vma(struct anon_vma *anon_vma)
+{
+ if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+ /*
+ * The LSB of head.next can't change from under us
+ * because we hold the mm_all_locks_mutex.
+ */
+ spin_lock(&anon_vma->lock);
+ /*
+ * We can safely modify head.next after taking the
+ * anon_vma->lock. If some other vma in this mm shares
+ * the same anon_vma we won't take it again.
+ *
+ * No need of atomic instructions here, head.next
+ * can't change from under us thanks to the
+ * anon_vma->lock.
+ */
+ if (__test_and_set_bit(0, (unsigned long *)
+ &anon_vma->head.next))
+ BUG();
+ }
+}
+
+static void vm_lock_mapping(struct address_space *mapping)
+{
+ if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
+ /*
+ * AS_MM_ALL_LOCKS can't change from under us because
+ * we hold the mm_all_locks_mutex.
+ *
+ * Operations on ->flags have to be atomic because
+ * even if AS_MM_ALL_LOCKS is stable thanks to the
+ * mm_all_locks_mutex, there may be other cpus
+ * changing other bitflags in parallel to us.
+ */
+ if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
+ BUG();
+ spin_lock(&mapping->i_mmap_lock);
+ }
+}
+
+/*
+ * This operation locks against the VM for all pte/vma/mm related
+ * operations that could ever happen on a certain mm. This includes
+ * vmtruncate, try_to_unmap, and all page faults.
+ *
+ * The caller must take the mmap_sem in write mode before calling
+ * mm_take_all_locks(). The caller isn't allowed to release the
+ * mmap_sem until mm_drop_all_locks() returns.
+ *
+ * mmap_sem in write mode is required in order to block all operations
+ * that could modify pagetables and free pages without need of
+ * altering the vma layout (for example populate_range() with
+ * nonlinear vmas). It's also needed in write mode to avoid new
+ * anon_vmas to be associated with existing vmas.
+ *
+ * A single task can't take more than one mm_take_all_locks() in a row
+ * or it would deadlock.
+ *
+ * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in
+ * mapping->flags avoid to take the same lock twice, if more than one
+ * vma in this mm is backed by the same anon_vma or address_space.
+ *
+ * We can take all the locks in random order because the VM code
+ * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
+ * takes more than one of them in a row. Secondly we're protected
+ * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
+ *
+ * mm_take_all_locks() and mm_drop_all_locks are expensive operations
+ * that may have to take thousand of locks.
+ *
+ * mm_take_all_locks() can fail if it's interrupted by signals.
+ */
+int mm_take_all_locks(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+ int ret = -EINTR;
+
+ BUG_ON(down_read_trylock(&mm->mmap_sem));
+
+ mutex_lock(&mm_all_locks_mutex);
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (signal_pending(current))
+ goto out_unlock;
+ if (vma->anon_vma)
+ vm_lock_anon_vma(vma->anon_vma);
+ if (vma->vm_file && vma->vm_file->f_mapping)
+ vm_lock_mapping(vma->vm_file->f_mapping);
+ }
+ ret = 0;
+
+out_unlock:
+ if (ret)
+ mm_drop_all_locks(mm);
+
+ return ret;
+}
+
+static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
+{
+ if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+ /*
+ * The LSB of head.next can't change to 0 from under
+ * us because we hold the mm_all_locks_mutex.
+ *
+ * We must however clear the bitflag before unlocking
+ * the vma so the users using the anon_vma->head will
+ * never see our bitflag.
+ *
+ * No need of atomic instructions here, head.next
+ * can't change from under us until we release the
+ * anon_vma->lock.
+ */
+ if (!__test_and_clear_bit(0, (unsigned long *)
+ &anon_vma->head.next))
+ BUG();
+ spin_unlock(&anon_vma->lock);
+ }
+}
+
+static void vm_unlock_mapping(struct address_space *mapping)
+{
+ if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
+ /*
+ * AS_MM_ALL_LOCKS can't change to 0 from under us
+ * because we hold the mm_all_locks_mutex.
+ */
+ spin_unlock(&mapping->i_mmap_lock);
+ if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
+ &mapping->flags))
+ BUG();
+ }
+}
+
+/*
+ * The mmap_sem cannot be released by the caller until
+ * mm_drop_all_locks() returns.
+ */
+void mm_drop_all_locks(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+
+ BUG_ON(down_read_trylock(&mm->mmap_sem));
+ BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (vma->anon_vma)
+ vm_unlock_anon_vma(vma->anon_vma);
+ if (vma->vm_file && vma->vm_file->f_mapping)
+ vm_unlock_mapping(vma->vm_file->f_mapping);
+ }
+
+ mutex_unlock(&mm_all_locks_mutex);
+}
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
new file mode 100644
index 00000000000..5f4ef0250be
--- /dev/null
+++ b/mm/mmu_notifier.c
@@ -0,0 +1,277 @@
+/*
+ * linux/mm/mmu_notifier.c
+ *
+ * Copyright (C) 2008 Qumranet, Inc.
+ * Copyright (C) 2008 SGI
+ * Christoph Lameter <clameter@sgi.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/rculist.h>
+#include <linux/mmu_notifier.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+/*
+ * This function can't run concurrently against mmu_notifier_register
+ * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
+ * runs with mm_users == 0. Other tasks may still invoke mmu notifiers
+ * in parallel despite there being no task using this mm any more,
+ * through the vmas outside of the exit_mmap context, such as with
+ * vmtruncate. This serializes against mmu_notifier_unregister with
+ * the mmu_notifier_mm->lock in addition to RCU and it serializes
+ * against the other mmu notifiers with RCU. struct mmu_notifier_mm
+ * can't go away from under us as exit_mmap holds an mm_count pin
+ * itself.
+ */
+void __mmu_notifier_release(struct mm_struct *mm)
+{
+ struct mmu_notifier *mn;
+
+ spin_lock(&mm->mmu_notifier_mm->lock);
+ while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
+ mn = hlist_entry(mm->mmu_notifier_mm->list.first,
+ struct mmu_notifier,
+ hlist);
+ /*
+ * We arrived before mmu_notifier_unregister so
+ * mmu_notifier_unregister will do nothing other than
+ * to wait ->release to finish and
+ * mmu_notifier_unregister to return.
+ */
+ hlist_del_init_rcu(&mn->hlist);
+ /*
+ * RCU here will block mmu_notifier_unregister until
+ * ->release returns.
+ */
+ rcu_read_lock();
+ spin_unlock(&mm->mmu_notifier_mm->lock);
+ /*
+ * if ->release runs before mmu_notifier_unregister it
+ * must be handled as it's the only way for the driver
+ * to flush all existing sptes and stop the driver
+ * from establishing any more sptes before all the
+ * pages in the mm are freed.
+ */
+ if (mn->ops->release)
+ mn->ops->release(mn, mm);
+ rcu_read_unlock();
+ spin_lock(&mm->mmu_notifier_mm->lock);
+ }
+ spin_unlock(&mm->mmu_notifier_mm->lock);
+
+ /*
+ * synchronize_rcu here prevents mmu_notifier_release to
+ * return to exit_mmap (which would proceed freeing all pages
+ * in the mm) until the ->release method returns, if it was
+ * invoked by mmu_notifier_unregister.
+ *
+ * The mmu_notifier_mm can't go away from under us because one
+ * mm_count is hold by exit_mmap.
+ */
+ synchronize_rcu();
+}
+
+/*
+ * If no young bitflag is supported by the hardware, ->clear_flush_young can
+ * unmap the address and return 1 or 0 depending if the mapping previously
+ * existed or not.
+ */
+int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
+ unsigned long address)
+{
+ struct mmu_notifier *mn;
+ struct hlist_node *n;
+ int young = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+ if (mn->ops->clear_flush_young)
+ young |= mn->ops->clear_flush_young(mn, mm, address);
+ }
+ rcu_read_unlock();
+
+ return young;
+}
+
+void __mmu_notifier_invalidate_page(struct mm_struct *mm,
+ unsigned long address)
+{
+ struct mmu_notifier *mn;
+ struct hlist_node *n;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+ if (mn->ops->invalidate_page)
+ mn->ops->invalidate_page(mn, mm, address);
+ }
+ rcu_read_unlock();
+}
+
+void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ struct mmu_notifier *mn;
+ struct hlist_node *n;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+ if (mn->ops->invalidate_range_start)
+ mn->ops->invalidate_range_start(mn, mm, start, end);
+ }
+ rcu_read_unlock();
+}
+
+void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ struct mmu_notifier *mn;
+ struct hlist_node *n;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+ if (mn->ops->invalidate_range_end)
+ mn->ops->invalidate_range_end(mn, mm, start, end);
+ }
+ rcu_read_unlock();
+}
+
+static int do_mmu_notifier_register(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ int take_mmap_sem)
+{
+ struct mmu_notifier_mm *mmu_notifier_mm;
+ int ret;
+
+ BUG_ON(atomic_read(&mm->mm_users) <= 0);
+
+ ret = -ENOMEM;
+ mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
+ if (unlikely(!mmu_notifier_mm))
+ goto out;
+
+ if (take_mmap_sem)
+ down_write(&mm->mmap_sem);
+ ret = mm_take_all_locks(mm);
+ if (unlikely(ret))
+ goto out_cleanup;
+
+ if (!mm_has_notifiers(mm)) {
+ INIT_HLIST_HEAD(&mmu_notifier_mm->list);
+ spin_lock_init(&mmu_notifier_mm->lock);
+ mm->mmu_notifier_mm = mmu_notifier_mm;
+ mmu_notifier_mm = NULL;
+ }
+ atomic_inc(&mm->mm_count);
+
+ /*
+ * Serialize the update against mmu_notifier_unregister. A
+ * side note: mmu_notifier_release can't run concurrently with
+ * us because we hold the mm_users pin (either implicitly as
+ * current->mm or explicitly with get_task_mm() or similar).
+ * We can't race against any other mmu notifier method either
+ * thanks to mm_take_all_locks().
+ */
+ spin_lock(&mm->mmu_notifier_mm->lock);
+ hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list);
+ spin_unlock(&mm->mmu_notifier_mm->lock);
+
+ mm_drop_all_locks(mm);
+out_cleanup:
+ if (take_mmap_sem)
+ up_write(&mm->mmap_sem);
+ /* kfree() does nothing if mmu_notifier_mm is NULL */
+ kfree(mmu_notifier_mm);
+out:
+ BUG_ON(atomic_read(&mm->mm_users) <= 0);
+ return ret;
+}
+
+/*
+ * Must not hold mmap_sem nor any other VM related lock when calling
+ * this registration function. Must also ensure mm_users can't go down
+ * to zero while this runs to avoid races with mmu_notifier_release,
+ * so mm has to be current->mm or the mm should be pinned safely such
+ * as with get_task_mm(). If the mm is not current->mm, the mm_users
+ * pin should be released by calling mmput after mmu_notifier_register
+ * returns. mmu_notifier_unregister must be always called to
+ * unregister the notifier. mm_count is automatically pinned to allow
+ * mmu_notifier_unregister to safely run at any time later, before or
+ * after exit_mmap. ->release will always be called before exit_mmap
+ * frees the pages.
+ */
+int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+ return do_mmu_notifier_register(mn, mm, 1);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_register);
+
+/*
+ * Same as mmu_notifier_register but here the caller must hold the
+ * mmap_sem in write mode.
+ */
+int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+ return do_mmu_notifier_register(mn, mm, 0);
+}
+EXPORT_SYMBOL_GPL(__mmu_notifier_register);
+
+/* this is called after the last mmu_notifier_unregister() returned */
+void __mmu_notifier_mm_destroy(struct mm_struct *mm)
+{
+ BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list));
+ kfree(mm->mmu_notifier_mm);
+ mm->mmu_notifier_mm = LIST_POISON1; /* debug */
+}
+
+/*
+ * This releases the mm_count pin automatically and frees the mm
+ * structure if it was the last user of it. It serializes against
+ * running mmu notifiers with RCU and against mmu_notifier_unregister
+ * with the unregister lock + RCU. All sptes must be dropped before
+ * calling mmu_notifier_unregister. ->release or any other notifier
+ * method may be invoked concurrently with mmu_notifier_unregister,
+ * and only after mmu_notifier_unregister returned we're guaranteed
+ * that ->release or any other method can't run anymore.
+ */
+void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+ BUG_ON(atomic_read(&mm->mm_count) <= 0);
+
+ spin_lock(&mm->mmu_notifier_mm->lock);
+ if (!hlist_unhashed(&mn->hlist)) {
+ hlist_del_rcu(&mn->hlist);
+
+ /*
+ * RCU here will force exit_mmap to wait ->release to finish
+ * before freeing the pages.
+ */
+ rcu_read_lock();
+ spin_unlock(&mm->mmu_notifier_mm->lock);
+ /*
+ * exit_mmap will block in mmu_notifier_release to
+ * guarantee ->release is called before freeing the
+ * pages.
+ */
+ if (mn->ops->release)
+ mn->ops->release(mn, mm);
+ rcu_read_unlock();
+ } else
+ spin_unlock(&mm->mmu_notifier_mm->lock);
+
+ /*
+ * Wait any running method to finish, of course including
+ * ->release if it was run by mmu_notifier_relase instead of us.
+ */
+ synchronize_rcu();
+
+ BUG_ON(atomic_read(&mm->mm_count) <= 0);
+
+ mmdrop(mm);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index abd645a3b0a..fded06f923f 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -21,6 +21,7 @@
#include <linux/syscalls.h>
#include <linux/swap.h>
#include <linux/swapops.h>
+#include <linux/mmu_notifier.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/cacheflush.h>
@@ -203,10 +204,12 @@ success:
dirty_accountable = 1;
}
+ mmu_notifier_invalidate_range_start(mm, start, end);
if (is_vm_hugetlb_page(vma))
hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
else
change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
+ mmu_notifier_invalidate_range_end(mm, start, end);
vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
vm_stat_account(mm, newflags, vma->vm_file, nrpages);
return 0;
diff --git a/mm/mremap.c b/mm/mremap.c
index 08e3c7f2bd1..1a7743923c8 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -18,6 +18,7 @@
#include <linux/highmem.h>
#include <linux/security.h>
#include <linux/syscalls.h>
+#include <linux/mmu_notifier.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -74,7 +75,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
spinlock_t *old_ptl, *new_ptl;
+ unsigned long old_start;
+ old_start = old_addr;
+ mmu_notifier_invalidate_range_start(vma->vm_mm,
+ old_start, old_end);
if (vma->vm_file) {
/*
* Subtle point from Rajesh Venkatasubramanian: before
@@ -116,6 +121,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
pte_unmap_unlock(old_pte - 1, old_ptl);
if (mapping)
spin_unlock(&mapping->i_mmap_lock);
+ mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end);
}
#define LATENCY_LIMIT (64 * PAGE_SIZE)
diff --git a/mm/nommu.c b/mm/nommu.c
index 5edccd9c921..ed75bc962fb 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -266,6 +266,27 @@ void *vmalloc_node(unsigned long size, int node)
}
EXPORT_SYMBOL(vmalloc_node);
+#ifndef PAGE_KERNEL_EXEC
+# define PAGE_KERNEL_EXEC PAGE_KERNEL
+#endif
+
+/**
+ * vmalloc_exec - allocate virtually contiguous, executable memory
+ * @size: allocation size
+ *
+ * Kernel-internal function to allocate enough pages to cover @size
+ * the page level allocator and map them into contiguous and
+ * executable kernel virtual space.
+ *
+ * For tight control over page level allocator and protection flags
+ * use __vmalloc() instead.
+ */
+
+void *vmalloc_exec(unsigned long size)
+{
+ return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
+}
+
/**
* vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
* @size: allocation size
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6da667274df..401d104d2bb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2372,7 +2372,7 @@ static void build_zonelist_cache(pg_data_t *pgdat)
#endif /* CONFIG_NUMA */
-/* return values int ....just for stop_machine_run() */
+/* return values int ....just for stop_machine() */
static int __build_all_zonelists(void *dummy)
{
int nid;
@@ -2397,7 +2397,7 @@ void build_all_zonelists(void)
} else {
/* we have to stop all cpus to guarantee there is no user
of zonelist */
- stop_machine_run(__build_all_zonelists, NULL, NR_CPUS);
+ stop_machine(__build_all_zonelists, NULL, NULL);
/* cpuset refresh routine should be here */
}
vm_total_pages = nr_free_pagecache_pages();
@@ -3753,23 +3753,6 @@ unsigned long __init find_min_pfn_with_active_regions(void)
return find_min_pfn_for_node(MAX_NUMNODES);
}
-/**
- * find_max_pfn_with_active_regions - Find the maximum PFN registered
- *
- * It returns the maximum PFN based on information provided via
- * add_active_range().
- */
-unsigned long __init find_max_pfn_with_active_regions(void)
-{
- int i;
- unsigned long max_pfn = 0;
-
- for (i = 0; i < nr_nodemap_entries; i++)
- max_pfn = max(max_pfn, early_node_map[i].end_pfn);
-
- return max_pfn;
-}
-
/*
* early_calculate_totalpages()
* Sum pages in active regions for movable zone.
diff --git a/mm/rmap.c b/mm/rmap.c
index 39ae5a9bf38..1ea4e6fcee7 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -49,6 +49,7 @@
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/memcontrol.h>
+#include <linux/mmu_notifier.h>
#include <asm/tlbflush.h>
@@ -287,7 +288,7 @@ static int page_referenced_one(struct page *page,
if (vma->vm_flags & VM_LOCKED) {
referenced++;
*mapcount = 1; /* break early from loop */
- } else if (ptep_clear_flush_young(vma, address, pte))
+ } else if (ptep_clear_flush_young_notify(vma, address, pte))
referenced++;
/* Pretend the page is referenced if the task has the
@@ -421,7 +422,7 @@ int page_referenced(struct page *page, int is_locked,
referenced += page_referenced_anon(page, mem_cont);
else if (is_locked)
referenced += page_referenced_file(page, mem_cont);
- else if (TestSetPageLocked(page))
+ else if (!trylock_page(page))
referenced++;
else {
if (page->mapping)
@@ -457,7 +458,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
pte_t entry;
flush_cache_page(vma, address, pte_pfn(*pte));
- entry = ptep_clear_flush(vma, address, pte);
+ entry = ptep_clear_flush_notify(vma, address, pte);
entry = pte_wrprotect(entry);
entry = pte_mkclean(entry);
set_pte_at(mm, address, pte, entry);
@@ -666,7 +667,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
* Leaving it set also helps swapoff to reinstate ptes
* faster for those pages still in swapcache.
*/
- if (page_test_dirty(page)) {
+ if ((!PageAnon(page) || PageSwapCache(page)) &&
+ page_test_dirty(page)) {
page_clear_dirty(page);
set_page_dirty(page);
}
@@ -705,14 +707,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* skipped over this mm) then we should reactivate it.
*/
if (!migration && ((vma->vm_flags & VM_LOCKED) ||
- (ptep_clear_flush_young(vma, address, pte)))) {
+ (ptep_clear_flush_young_notify(vma, address, pte)))) {
ret = SWAP_FAIL;
goto out_unmap;
}
/* Nuke the page table entry. */
flush_cache_page(vma, address, page_to_pfn(page));
- pteval = ptep_clear_flush(vma, address, pte);
+ pteval = ptep_clear_flush_notify(vma, address, pte);
/* Move the dirty bit to the physical page now the pte is gone. */
if (pte_dirty(pteval))
@@ -837,12 +839,12 @@ static void try_to_unmap_cluster(unsigned long cursor,
page = vm_normal_page(vma, address, *pte);
BUG_ON(!page || PageAnon(page));
- if (ptep_clear_flush_young(vma, address, pte))
+ if (ptep_clear_flush_young_notify(vma, address, pte))
continue;
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
- pteval = ptep_clear_flush(vma, address, pte);
+ pteval = ptep_clear_flush_notify(vma, address, pte);
/* If nonlinear, store the file page offset in the pte. */
if (page->index != linear_page_index(vma, address))
diff --git a/mm/shmem.c b/mm/shmem.c
index 952d361774b..04fb4f1ab88 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1265,7 +1265,7 @@ repeat:
}
/* We have to do this with page locked to prevent races */
- if (TestSetPageLocked(swappage)) {
+ if (!trylock_page(swappage)) {
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
wait_on_page_locked(swappage);
@@ -1329,7 +1329,7 @@ repeat:
shmem_swp_unmap(entry);
filepage = find_get_page(mapping, idx);
if (filepage &&
- (!PageUptodate(filepage) || TestSetPageLocked(filepage))) {
+ (!PageUptodate(filepage) || !trylock_page(filepage))) {
spin_unlock(&info->lock);
wait_on_page_locked(filepage);
page_cache_release(filepage);
@@ -1513,7 +1513,6 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
inode->i_blocks = 0;
- inode->i_mapping->a_ops = &shmem_aops;
inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_generation = get_seconds();
@@ -1528,6 +1527,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
init_special_inode(inode, mode, dev);
break;
case S_IFREG:
+ inode->i_mapping->a_ops = &shmem_aops;
inode->i_op = &shmem_inode_operations;
inode->i_fop = &shmem_file_operations;
mpol_shared_policy_init(&info->policy,
@@ -1929,6 +1929,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
return error;
}
unlock_page(page);
+ inode->i_mapping->a_ops = &shmem_aops;
inode->i_op = &shmem_symlink_inode_operations;
kaddr = kmap_atomic(page, KM_USER0);
memcpy(kaddr, symname, len);
diff --git a/mm/swap.c b/mm/swap.c
index dd89234ee51..9e0cb311807 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -278,9 +278,10 @@ int lru_add_drain_all(void)
* Avoid taking zone->lru_lock if possible, but if it is taken, retain it
* for the remainder of the operation.
*
- * The locking in this function is against shrink_cache(): we recheck the
- * page count inside the lock to see whether shrink_cache grabbed the page
- * via the LRU. If it did, give up: shrink_cache will free it.
+ * The locking in this function is against shrink_inactive_list(): we recheck
+ * the page count inside the lock to see whether shrink_inactive_list()
+ * grabbed the page via the LRU. If it did, give up: shrink_inactive_list()
+ * will free it.
*/
void release_pages(struct page **pages, int nr, int cold)
{
@@ -443,7 +444,7 @@ void pagevec_strip(struct pagevec *pvec)
for (i = 0; i < pagevec_count(pvec); i++) {
struct page *page = pvec->pages[i];
- if (PagePrivate(page) && !TestSetPageLocked(page)) {
+ if (PagePrivate(page) && trylock_page(page)) {
if (PagePrivate(page))
try_to_release_page(page, 0);
unlock_page(page);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b8035b05512..167cf2dc8a0 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -201,7 +201,7 @@ void delete_from_swap_cache(struct page *page)
*/
static inline void free_swap_cache(struct page *page)
{
- if (PageSwapCache(page) && !TestSetPageLocked(page)) {
+ if (PageSwapCache(page) && trylock_page(page)) {
remove_exclusive_swap_page(page);
unlock_page(page);
}
@@ -302,9 +302,9 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
* re-using the just freed swap entry for an existing page.
* May fail (-ENOMEM) if radix-tree node allocation failed.
*/
- SetPageLocked(new_page);
+ set_page_locked(new_page);
err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
- if (!err) {
+ if (likely(!err)) {
/*
* Initiate read into locked page and return.
*/
@@ -312,7 +312,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
swap_readpage(NULL, new_page);
return new_page;
}
- ClearPageLocked(new_page);
+ clear_page_locked(new_page);
swap_free(entry);
} while (err != -ENOMEM);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6beb6251e99..1e330f2998f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -403,7 +403,7 @@ void free_swap_and_cache(swp_entry_t entry)
if (p) {
if (swap_entry_free(p, swp_offset(entry)) == 1) {
page = find_get_page(&swapper_space, entry.val);
- if (page && unlikely(TestSetPageLocked(page))) {
+ if (page && unlikely(!trylock_page(page))) {
page_cache_release(page);
page = NULL;
}
@@ -656,8 +656,8 @@ static int unuse_mm(struct mm_struct *mm,
if (!down_read_trylock(&mm->mmap_sem)) {
/*
- * Activate page so shrink_cache is unlikely to unmap its
- * ptes while lock is dropped, so swapoff can make progress.
+ * Activate page so shrink_inactive_list is unlikely to unmap
+ * its ptes while lock is dropped, so swapoff can make progress.
*/
activate_page(page);
unlock_page(page);
diff --git a/mm/truncate.c b/mm/truncate.c
index e68443d7456..250505091d3 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -104,7 +104,6 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
cancel_dirty_page(page, PAGE_CACHE_SIZE);
remove_from_page_cache(page);
- ClearPageUptodate(page);
ClearPageMappedToDisk(page);
page_cache_release(page); /* pagecache ref */
}
@@ -188,7 +187,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
if (page_index > next)
next = page_index;
next++;
- if (TestSetPageLocked(page))
+ if (!trylock_page(page))
continue;
if (PageWriteback(page)) {
unlock_page(page);
@@ -281,7 +280,7 @@ unsigned long __invalidate_mapping_pages(struct address_space *mapping,
pgoff_t index;
int lock_failed;
- lock_failed = TestSetPageLocked(page);
+ lock_failed = !trylock_page(page);
/*
* We really shouldn't be looking at the ->index of an
@@ -356,7 +355,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
BUG_ON(PagePrivate(page));
__remove_from_page_cache(page);
spin_unlock_irq(&mapping->tree_lock);
- ClearPageUptodate(page);
page_cache_release(page); /* pagecache ref */
return 1;
failed:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8f71761bc4b..1ff1a58e7c1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -496,7 +496,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
page = lru_to_page(page_list);
list_del(&page->lru);
- if (TestSetPageLocked(page))
+ if (!trylock_page(page))
goto keep;
VM_BUG_ON(PageActive(page));
@@ -582,7 +582,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* A synchronous write - probably a ramdisk. Go
* ahead and try to reclaim the page.
*/
- if (TestSetPageLocked(page))
+ if (!trylock_page(page))
goto keep;
if (PageDirty(page) || PageWriteback(page))
goto keep_locked;
@@ -1408,7 +1408,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
if (sc->nr_scanned && priority < DEF_PRIORITY - 2)
congestion_wait(WRITE, HZ/10);
}
- /* top priority shrink_caches still had more to do? don't OOM, then */
+ /* top priority shrink_zones still had more to do? don't OOM, then */
if (!sc->all_unreclaimable && scan_global_lru(sc))
ret = nr_reclaimed;
out:
@@ -1979,7 +1979,7 @@ module_init(kswapd_init)
int zone_reclaim_mode __read_mostly;
#define RECLAIM_OFF 0
-#define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */
+#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */
#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */