aboutsummaryrefslogtreecommitdiff
path: root/mm/migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c212
1 files changed, 116 insertions, 96 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 385db89f0c3..2bb4e1d6352 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -121,20 +121,6 @@ static void remove_migration_pte(struct vm_area_struct *vma,
if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old)
goto out;
- /*
- * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge.
- * Failure is not an option here: we're now expected to remove every
- * migration pte, and will cause crashes otherwise. Normally this
- * is not an issue: mem_cgroup_prepare_migration bumped up the old
- * page_cgroup count for safety, that's now attached to the new page,
- * so this charge should just be another incrementation of the count,
- * to keep in balance with rmap.c's mem_cgroup_uncharging. But if
- * there's been a force_empty, those reference counts may no longer
- * be reliable, and this charge can actually fail: oh well, we don't
- * make the situation any worse by proceeding as if it had succeeded.
- */
- mem_cgroup_charge(new, mm, GFP_ATOMIC);
-
get_page(new);
pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
if (is_write_migration_entry(entry))
@@ -300,12 +286,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
* Now we know that no one else is looking at the page.
*/
get_page(newpage); /* add cache reference */
-#ifdef CONFIG_SWAP
if (PageSwapCache(page)) {
SetPageSwapCache(newpage);
set_page_private(newpage, page_private(page));
}
-#endif
radix_tree_replace_slot(pslot, newpage);
@@ -373,18 +357,13 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
mlock_migrate_page(newpage, page);
-#ifdef CONFIG_SWAP
ClearPageSwapCache(page);
-#endif
ClearPagePrivate(page);
set_page_private(page, 0);
/* page->mapping contains a flag for PageAnon() */
anon = PageAnon(page);
page->mapping = NULL;
- if (!anon) /* This page was removed from radix-tree. */
- mem_cgroup_uncharge_cache_page(page);
-
/*
* If any waiters have accumulated on the new page then
* wake them up.
@@ -522,15 +501,12 @@ static int writeout(struct address_space *mapping, struct page *page)
remove_migration_ptes(page, page);
rc = mapping->a_ops->writepage(page, &wbc);
- if (rc < 0)
- /* I/O Error writing */
- return -EIO;
if (rc != AOP_WRITEPAGE_ACTIVATE)
/* unlocked. Relock */
lock_page(page);
- return -EAGAIN;
+ return (rc < 0) ? -EIO : -EAGAIN;
}
/*
@@ -621,6 +597,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
struct page *newpage = get_new_page(page, private, &result);
int rcu_locked = 0;
int charge = 0;
+ struct mem_cgroup *mem;
if (!newpage)
return -ENOMEM;
@@ -630,24 +607,26 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
goto move_newpage;
}
- charge = mem_cgroup_prepare_migration(page, newpage);
- if (charge == -ENOMEM) {
- rc = -ENOMEM;
- goto move_newpage;
- }
/* prepare cgroup just returns 0 or -ENOMEM */
- BUG_ON(charge);
-
rc = -EAGAIN;
+
if (!trylock_page(page)) {
if (!force)
goto move_newpage;
lock_page(page);
}
+ /* charge against new page */
+ charge = mem_cgroup_prepare_migration(page, &mem);
+ if (charge == -ENOMEM) {
+ rc = -ENOMEM;
+ goto unlock;
+ }
+ BUG_ON(charge);
+
if (PageWriteback(page)) {
if (!force)
- goto unlock;
+ goto uncharge;
wait_on_page_writeback(page);
}
/*
@@ -700,7 +679,9 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
rcu_unlock:
if (rcu_locked)
rcu_read_unlock();
-
+uncharge:
+ if (!charge)
+ mem_cgroup_end_migration(mem, page, newpage);
unlock:
unlock_page(page);
@@ -716,8 +697,6 @@ unlock:
}
move_newpage:
- if (!charge)
- mem_cgroup_end_migration(newpage);
/*
* Move the new page to the LRU. If migration was not successful
@@ -851,12 +830,6 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
struct vm_area_struct *vma;
struct page *page;
- /*
- * A valid page pointer that will not match any of the
- * pages that will be moved.
- */
- pp->page = ZERO_PAGE(0);
-
err = -EFAULT;
vma = find_vma(mm, pp->addr);
if (!vma || !vma_migratable(vma))
@@ -922,41 +895,43 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
const int __user *nodes,
int __user *status, int flags)
{
- struct page_to_node *pm = NULL;
+ struct page_to_node *pm;
nodemask_t task_nodes;
- int err = 0;
- int i;
+ unsigned long chunk_nr_pages;
+ unsigned long chunk_start;
+ int err;
task_nodes = cpuset_mems_allowed(task);
- /* Limit nr_pages so that the multiplication may not overflow */
- if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
- err = -E2BIG;
+ err = -ENOMEM;
+ pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
+ if (!pm)
goto out;
- }
-
- pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
- if (!pm) {
- err = -ENOMEM;
- goto out;
- }
-
/*
- * Get parameters from user space and initialize the pm
- * array. Return various errors if the user did something wrong.
+ * Store a chunk of page_to_node array in a page,
+ * but keep the last one as a marker
*/
- for (i = 0; i < nr_pages; i++) {
- const void __user *p;
+ chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
- err = -EFAULT;
- if (get_user(p, pages + i))
- goto out_pm;
+ for (chunk_start = 0;
+ chunk_start < nr_pages;
+ chunk_start += chunk_nr_pages) {
+ int j;
+
+ if (chunk_start + chunk_nr_pages > nr_pages)
+ chunk_nr_pages = nr_pages - chunk_start;
- pm[i].addr = (unsigned long)p;
- if (nodes) {
+ /* fill the chunk pm with addrs and nodes from user-space */
+ for (j = 0; j < chunk_nr_pages; j++) {
+ const void __user *p;
int node;
- if (get_user(node, nodes + i))
+ err = -EFAULT;
+ if (get_user(p, pages + j + chunk_start))
+ goto out_pm;
+ pm[j].addr = (unsigned long) p;
+
+ if (get_user(node, nodes + j + chunk_start))
goto out_pm;
err = -ENODEV;
@@ -967,22 +942,29 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
if (!node_isset(node, task_nodes))
goto out_pm;
- pm[i].node = node;
- } else
- pm[i].node = 0; /* anything to not match MAX_NUMNODES */
- }
- /* End marker */
- pm[nr_pages].node = MAX_NUMNODES;
+ pm[j].node = node;
+ }
+
+ /* End marker for this chunk */
+ pm[chunk_nr_pages].node = MAX_NUMNODES;
+
+ /* Migrate this chunk */
+ err = do_move_page_to_node_array(mm, pm,
+ flags & MPOL_MF_MOVE_ALL);
+ if (err < 0)
+ goto out_pm;
- err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL);
- if (err >= 0)
/* Return status information */
- for (i = 0; i < nr_pages; i++)
- if (put_user(pm[i].status, status + i))
+ for (j = 0; j < chunk_nr_pages; j++)
+ if (put_user(pm[j].status, status + j + chunk_start)) {
err = -EFAULT;
+ goto out_pm;
+ }
+ }
+ err = 0;
out_pm:
- vfree(pm);
+ free_page((unsigned long)pm);
out:
return err;
}
@@ -990,25 +972,18 @@ out:
/*
* Determine the nodes of an array of pages and store it in an array of status.
*/
-static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
- const void __user * __user *pages,
- int __user *status)
+static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
+ const void __user **pages, int *status)
{
unsigned long i;
- int err;
down_read(&mm->mmap_sem);
for (i = 0; i < nr_pages; i++) {
- const void __user *p;
- unsigned long addr;
+ unsigned long addr = (unsigned long)(*pages);
struct vm_area_struct *vma;
struct page *page;
-
- err = -EFAULT;
- if (get_user(p, pages+i))
- goto out;
- addr = (unsigned long) p;
+ int err = -EFAULT;
vma = find_vma(mm, addr);
if (!vma)
@@ -1027,12 +1002,52 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
err = page_to_nid(page);
set_status:
- put_user(err, status+i);
+ *status = err;
+
+ pages++;
+ status++;
+ }
+
+ up_read(&mm->mmap_sem);
+}
+
+/*
+ * Determine the nodes of a user array of pages and store it in
+ * a user array of status.
+ */
+static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
+ const void __user * __user *pages,
+ int __user *status)
+{
+#define DO_PAGES_STAT_CHUNK_NR 16
+ const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
+ int chunk_status[DO_PAGES_STAT_CHUNK_NR];
+ unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR;
+ int err;
+
+ for (i = 0; i < nr_pages; i += chunk_nr) {
+ if (chunk_nr + i > nr_pages)
+ chunk_nr = nr_pages - i;
+
+ err = copy_from_user(chunk_pages, &pages[i],
+ chunk_nr * sizeof(*chunk_pages));
+ if (err) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
+
+ err = copy_to_user(&status[i], chunk_status,
+ chunk_nr * sizeof(*chunk_status));
+ if (err) {
+ err = -EFAULT;
+ goto out;
+ }
}
err = 0;
out:
- up_read(&mm->mmap_sem);
return err;
}
@@ -1040,11 +1055,12 @@ out:
* Move a list of pages in the address space of the currently executing
* process.
*/
-asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
- const void __user * __user *pages,
- const int __user *nodes,
- int __user *status, int flags)
+SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
+ const void __user * __user *, pages,
+ const int __user *, nodes,
+ int __user *, status, int, flags)
{
+ const struct cred *cred = current_cred(), *tcred;
struct task_struct *task;
struct mm_struct *mm;
int err;
@@ -1075,12 +1091,16 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
* capabilities, superuser privileges or the same
* userid as the target process.
*/
- if ((current->euid != task->suid) && (current->euid != task->uid) &&
- (current->uid != task->suid) && (current->uid != task->uid) &&
+ rcu_read_lock();
+ tcred = __task_cred(task);
+ if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
+ cred->uid != tcred->suid && cred->uid != tcred->uid &&
!capable(CAP_SYS_NICE)) {
+ rcu_read_unlock();
err = -EPERM;
goto out;
}
+ rcu_read_unlock();
err = security_task_movememory(task);
if (err)