aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c36
-rw-r--r--mm/filemap.h26
-rw-r--r--mm/mempolicy.c6
-rw-r--r--mm/migrate.c30
-rw-r--r--mm/page_alloc.c3
-rw-r--r--mm/pdflush.c15
-rw-r--r--mm/readahead.c16
-rw-r--r--mm/rmap.c9
8 files changed, 95 insertions, 46 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 807a463fd5e..9c7334bafda 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -828,6 +828,32 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
}
EXPORT_SYMBOL(grab_cache_page_nowait);
+/*
+ * CD/DVDs are error prone. When a medium error occurs, the driver may fail
+ * a _large_ part of the i/o request. Imagine the worst scenario:
+ *
+ * ---R__________________________________________B__________
+ * ^ reading here ^ bad block(assume 4k)
+ *
+ * read(R) => miss => readahead(R...B) => media error => frustrating retries
+ * => failing the whole request => read(R) => read(R+1) =>
+ * readahead(R+1...B+1) => bang => read(R+2) => read(R+3) =>
+ * readahead(R+3...B+2) => bang => read(R+3) => read(R+4) =>
+ * readahead(R+4...B+3) => bang => read(R+4) => read(R+5) => ......
+ *
+ * It is going insane. Fix it by quickly scaling down the readahead size.
+ */
+static void shrink_readahead_size_eio(struct file *filp,
+ struct file_ra_state *ra)
+{
+ if (!ra->ra_pages)
+ return;
+
+ ra->ra_pages /= 4;
+ printk(KERN_WARNING "Reducing readahead size to %luK\n",
+ ra->ra_pages << (PAGE_CACHE_SHIFT - 10));
+}
+
/**
* do_generic_mapping_read - generic file read routine
* @mapping: address_space to be read
@@ -985,6 +1011,7 @@ readpage:
}
unlock_page(page);
error = -EIO;
+ shrink_readahead_size_eio(filp, &ra);
goto readpage_error;
}
unlock_page(page);
@@ -1522,6 +1549,7 @@ page_not_uptodate:
* Things didn't work out. Return zero to tell the
* mm layer so, possibly freeing the page cache page first.
*/
+ shrink_readahead_size_eio(file, ra);
page_cache_release(page);
return NULL;
}
@@ -1892,7 +1920,7 @@ int remove_suid(struct dentry *dentry)
EXPORT_SYMBOL(remove_suid);
size_t
-__filemap_copy_from_user_iovec(char *vaddr,
+__filemap_copy_from_user_iovec_inatomic(char *vaddr,
const struct iovec *iov, size_t base, size_t bytes)
{
size_t copied = 0, left = 0;
@@ -1908,12 +1936,8 @@ __filemap_copy_from_user_iovec(char *vaddr,
vaddr += copy;
iov++;
- if (unlikely(left)) {
- /* zero the rest of the target like __copy_from_user */
- if (bytes)
- memset(vaddr, 0, bytes);
+ if (unlikely(left))
break;
- }
}
return copied - left;
}
diff --git a/mm/filemap.h b/mm/filemap.h
index 5683cde2205..536979fb4ba 100644
--- a/mm/filemap.h
+++ b/mm/filemap.h
@@ -16,15 +16,23 @@
#include <linux/uaccess.h>
size_t
-__filemap_copy_from_user_iovec(char *vaddr,
- const struct iovec *iov,
- size_t base,
- size_t bytes);
+__filemap_copy_from_user_iovec_inatomic(char *vaddr,
+ const struct iovec *iov,
+ size_t base,
+ size_t bytes);
/*
* Copy as much as we can into the page and return the number of bytes which
* were sucessfully copied. If a fault is encountered then clear the page
* out to (offset+bytes) and return the number of bytes which were copied.
+ *
+ * NOTE: For this to work reliably we really want copy_from_user_inatomic_nocache
+ * to *NOT* zero any tail of the buffer that it failed to copy. If it does,
+ * and if the following non-atomic copy succeeds, then there is a small window
+ * where the target page contains neither the data before the write, nor the
+ * data after the write (it contains zero). A read at this time will see
+ * data that is inconsistent with any ordering of the read and the write.
+ * (This has been detected in practice).
*/
static inline size_t
filemap_copy_from_user(struct page *page, unsigned long offset,
@@ -60,13 +68,15 @@ filemap_copy_from_user_iovec(struct page *page, unsigned long offset,
size_t copied;
kaddr = kmap_atomic(page, KM_USER0);
- copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
- base, bytes);
+ copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset, iov,
+ base, bytes);
kunmap_atomic(kaddr, KM_USER0);
if (copied != bytes) {
kaddr = kmap(page);
- copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
- base, bytes);
+ copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset, iov,
+ base, bytes);
+ if (bytes - copied)
+ memset(kaddr + offset + copied, 0, bytes - copied);
kunmap(page);
}
return copied;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ec4a1a950df..73e0f23b7f5 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -632,6 +632,10 @@ int do_migrate_pages(struct mm_struct *mm,
down_read(&mm->mmap_sem);
+ err = migrate_vmas(mm, from_nodes, to_nodes, flags);
+ if (err)
+ goto out;
+
/*
* Find a 'source' bit set in 'tmp' whose corresponding 'dest'
* bit in 'to' is not also set in 'tmp'. Clear the found 'source'
@@ -691,7 +695,7 @@ int do_migrate_pages(struct mm_struct *mm,
if (err < 0)
break;
}
-
+out:
up_read(&mm->mmap_sem);
if (err < 0)
return err;
diff --git a/mm/migrate.c b/mm/migrate.c
index 1c2a71aa05c..3f1e0c2c942 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -616,15 +616,13 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
/*
* Establish migration ptes or remove ptes
*/
- if (try_to_unmap(page, 1) != SWAP_FAIL) {
- if (!page_mapped(page))
- rc = move_to_new_page(newpage, page);
- } else
- /* A vma has VM_LOCKED set -> permanent failure */
- rc = -EPERM;
+ try_to_unmap(page, 1);
+ if (!page_mapped(page))
+ rc = move_to_new_page(newpage, page);
if (rc)
remove_migration_ptes(page, page);
+
unlock:
unlock_page(page);
@@ -976,3 +974,23 @@ out2:
}
#endif
+/*
+ * Call migration functions in the vma_ops that may prepare
+ * memory in a vm for migration. migration functions may perform
+ * the migration for vmas that do not have an underlying page struct.
+ */
+int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
+ const nodemask_t *from, unsigned long flags)
+{
+ struct vm_area_struct *vma;
+ int err = 0;
+
+ for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) {
+ if (vma->vm_ops && vma->vm_ops->migrate) {
+ err = vma->vm_ops->migrate(vma, to, from, flags);
+ if (err)
+ break;
+ }
+ }
+ return err;
+}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 423db0db7c0..6c1174fcf52 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -957,8 +957,7 @@ restart:
goto got_pg;
do {
- if (cpuset_zone_allowed(*z, gfp_mask|__GFP_HARDWALL))
- wakeup_kswapd(*z, order);
+ wakeup_kswapd(*z, order);
} while (*(++z));
/*
diff --git a/mm/pdflush.c b/mm/pdflush.c
index df7e50b8f70..b02102feeb4 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -104,21 +104,20 @@ static int __pdflush(struct pdflush_work *my_work)
list_move(&my_work->list, &pdflush_list);
my_work->when_i_went_to_sleep = jiffies;
spin_unlock_irq(&pdflush_lock);
-
schedule();
- if (try_to_freeze()) {
- spin_lock_irq(&pdflush_lock);
- continue;
- }
-
+ try_to_freeze();
spin_lock_irq(&pdflush_lock);
if (!list_empty(&my_work->list)) {
- printk("pdflush: bogus wakeup!\n");
+ /*
+ * Someone woke us up, but without removing our control
+ * structure from the global list. swsusp will do this
+ * in try_to_freeze()->refrigerator(). Handle it.
+ */
my_work->fn = NULL;
continue;
}
if (my_work->fn == NULL) {
- printk("pdflush: NULL work function\n");
+ printk("pdflush: bogus wakeup\n");
continue;
}
spin_unlock_irq(&pdflush_lock);
diff --git a/mm/readahead.c b/mm/readahead.c
index 0f142a40984..e39e416860d 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -118,8 +118,7 @@ static inline unsigned long get_next_ra_size(struct file_ra_state *ra)
#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
/**
- * read_cache_pages - populate an address space with some pages, and
- * start reads against them.
+ * read_cache_pages - populate an address space with some pages & start reads against them
* @mapping: the address_space
* @pages: The address of a list_head which contains the target pages. These
* pages have their ->index populated and are otherwise uninitialised.
@@ -182,14 +181,11 @@ static int read_pages(struct address_space *mapping, struct file *filp,
list_del(&page->lru);
if (!add_to_page_cache(page, mapping,
page->index, GFP_KERNEL)) {
- ret = mapping->a_ops->readpage(filp, page);
- if (ret != AOP_TRUNCATED_PAGE) {
- if (!pagevec_add(&lru_pvec, page))
- __pagevec_lru_add(&lru_pvec);
- continue;
- } /* else fall through to release */
- }
- page_cache_release(page);
+ mapping->a_ops->readpage(filp, page);
+ if (!pagevec_add(&lru_pvec, page))
+ __pagevec_lru_add(&lru_pvec);
+ } else
+ page_cache_release(page);
}
pagevec_lru_add(&lru_pvec);
ret = 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index 882a85826bb..e76909e880c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -562,9 +562,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* If it's recently referenced (perhaps page_referenced
* skipped over this mm) then we should reactivate it.
*/
- if ((vma->vm_flags & VM_LOCKED) ||
- (ptep_clear_flush_young(vma, address, pte)
- && !migration)) {
+ if (!migration && ((vma->vm_flags & VM_LOCKED) ||
+ (ptep_clear_flush_young(vma, address, pte)))) {
ret = SWAP_FAIL;
goto out_unmap;
}
@@ -771,7 +770,7 @@ static int try_to_unmap_file(struct page *page, int migration)
list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
shared.vm_set.list) {
- if (vma->vm_flags & VM_LOCKED)
+ if ((vma->vm_flags & VM_LOCKED) && !migration)
continue;
cursor = (unsigned long) vma->vm_private_data;
if (cursor > max_nl_cursor)
@@ -805,7 +804,7 @@ static int try_to_unmap_file(struct page *page, int migration)
do {
list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
shared.vm_set.list) {
- if (vma->vm_flags & VM_LOCKED)
+ if ((vma->vm_flags & VM_LOCKED) && !migration)
continue;
cursor = (unsigned long) vma->vm_private_data;
while ( cursor < max_nl_cursor &&