aboutsummaryrefslogtreecommitdiff
path: root/mm/swapfile.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c113
1 files changed, 76 insertions, 37 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f071648e136..eade24da931 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -506,9 +506,19 @@ unsigned int count_swap_pages(int type, int free)
* just let do_wp_page work it out if a write is requested later - to
* force COW, vm_page_prot omits write permission from any private vma.
*/
-static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
+static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, swp_entry_t entry, struct page *page)
{
+ spinlock_t *ptl;
+ pte_t *pte;
+ int found = 1;
+
+ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
+ found = 0;
+ goto out;
+ }
+
inc_mm_counter(vma->vm_mm, anon_rss);
get_page(page);
set_pte_at(vma->vm_mm, addr, pte,
@@ -520,6 +530,9 @@ static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
* immediately swapped out again after swapon.
*/
activate_page(page);
+out:
+ pte_unmap_unlock(pte, ptl);
+ return found;
}
static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
@@ -528,22 +541,33 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
{
pte_t swp_pte = swp_entry_to_pte(entry);
pte_t *pte;
- spinlock_t *ptl;
int found = 0;
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ /*
+ * We don't actually need pte lock while scanning for swp_pte: since
+ * we hold page lock and mmap_sem, swp_pte cannot be inserted into the
+ * page table while we're scanning; though it could get zapped, and on
+ * some architectures (e.g. x86_32 with PAE) we might catch a glimpse
+ * of unmatched parts which look like swp_pte, so unuse_pte must
+ * recheck under pte lock. Scanning without pte lock lets it be
+ * preemptible whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE.
+ */
+ pte = pte_offset_map(pmd, addr);
do {
/*
* swapoff spends a _lot_ of time in this loop!
* Test inline before going to call unuse_pte.
*/
if (unlikely(pte_same(*pte, swp_pte))) {
- unuse_pte(vma, pte++, addr, entry, page);
- found = 1;
- break;
+ pte_unmap(pte);
+ found = unuse_pte(vma, pmd, addr, entry, page);
+ if (found)
+ goto out;
+ pte = pte_offset_map(pmd, addr);
}
} while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap_unlock(pte - 1, ptl);
+ pte_unmap(pte - 1);
+out:
return found;
}
@@ -730,7 +754,8 @@ static int try_to_unuse(unsigned int type)
*/
swap_map = &si->swap_map[i];
entry = swp_entry(type, i);
- page = read_swap_cache_async(entry, NULL, 0);
+ page = read_swap_cache_async(entry,
+ GFP_HIGHUSER_MOVABLE, NULL, 0);
if (!page) {
/*
* Either swap_duplicate() failed because entry
@@ -789,7 +814,7 @@ static int try_to_unuse(unsigned int type)
atomic_inc(&new_start_mm->mm_users);
atomic_inc(&prev_mm->mm_users);
spin_lock(&mmlist_lock);
- while (*swap_map > 1 && !retval &&
+ while (*swap_map > 1 && !retval && !shmem &&
(p = p->next) != &start_mm->mmlist) {
mm = list_entry(p, struct mm_struct, mmlist);
if (!atomic_inc_not_zero(&mm->mm_users))
@@ -821,6 +846,13 @@ static int try_to_unuse(unsigned int type)
mmput(start_mm);
start_mm = new_start_mm;
}
+ if (shmem) {
+ /* page has already been unlocked and released */
+ if (shmem > 0)
+ continue;
+ retval = shmem;
+ break;
+ }
if (retval) {
unlock_page(page);
page_cache_release(page);
@@ -859,12 +891,6 @@ static int try_to_unuse(unsigned int type)
* read from disk into another page. Splitting into two
* pages would be incorrect if swap supported "shared
* private" pages, but they are handled by tmpfs files.
- *
- * Note shmem_unuse already deleted a swappage from
- * the swap cache, unless the move to filepage failed:
- * in which case it left swappage in cache, lowered its
- * swap count to pass quickly through the loops above,
- * and now we must reincrement count to try again later.
*/
if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
struct writeback_control wbc = {
@@ -875,12 +901,8 @@ static int try_to_unuse(unsigned int type)
lock_page(page);
wait_on_page_writeback(page);
}
- if (PageSwapCache(page)) {
- if (shmem)
- swap_duplicate(entry);
- else
- delete_from_swap_cache(page);
- }
+ if (PageSwapCache(page))
+ delete_from_swap_cache(page);
/*
* So we could skip searching mms once swap count went
@@ -1768,31 +1790,48 @@ get_swap_info_struct(unsigned type)
*/
int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
{
+ struct swap_info_struct *si;
int our_page_cluster = page_cluster;
- int ret = 0, i = 1 << our_page_cluster;
- unsigned long toff;
- struct swap_info_struct *swapdev = swp_type(entry) + swap_info;
+ pgoff_t target, toff;
+ pgoff_t base, end;
+ int nr_pages = 0;
if (!our_page_cluster) /* no readahead */
return 0;
- toff = (swp_offset(entry) >> our_page_cluster) << our_page_cluster;
- if (!toff) /* first page is swap header */
- toff++, i--;
- *offset = toff;
+
+ si = &swap_info[swp_type(entry)];
+ target = swp_offset(entry);
+ base = (target >> our_page_cluster) << our_page_cluster;
+ end = base + (1 << our_page_cluster);
+ if (!base) /* first page is swap header */
+ base++;
spin_lock(&swap_lock);
- do {
- /* Don't read-ahead past the end of the swap area */
- if (toff >= swapdev->max)
+ if (end > si->max) /* don't go beyond end of map */
+ end = si->max;
+
+ /* Count contiguous allocated slots above our target */
+ for (toff = target; ++toff < end; nr_pages++) {
+ /* Don't read in free or bad pages */
+ if (!si->swap_map[toff])
+ break;
+ if (si->swap_map[toff] == SWAP_MAP_BAD)
break;
+ }
+ /* Count contiguous allocated slots below our target */
+ for (toff = target; --toff >= base; nr_pages++) {
/* Don't read in free or bad pages */
- if (!swapdev->swap_map[toff])
+ if (!si->swap_map[toff])
break;
- if (swapdev->swap_map[toff] == SWAP_MAP_BAD)
+ if (si->swap_map[toff] == SWAP_MAP_BAD)
break;
- toff++;
- ret++;
- } while (--i);
+ }
spin_unlock(&swap_lock);
- return ret;
+
+ /*
+ * Indicate starting offset, and return number of pages to get:
+ * if only 1, say 0, since there's then no readahead to be done.
+ */
+ *offset = ++toff;
+ return nr_pages? ++nr_pages: 0;
}