aboutsummaryrefslogtreecommitdiff
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c335
1 files changed, 158 insertions, 177 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 4b0144b24c1..0d14d1e58a5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -50,6 +50,7 @@
#include <linux/delayacct.h>
#include <linux/init.h>
#include <linux/writeback.h>
+#include <linux/memcontrol.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
@@ -82,7 +83,18 @@ void * high_memory;
EXPORT_SYMBOL(num_physpages);
EXPORT_SYMBOL(high_memory);
-int randomize_va_space __read_mostly = 1;
+/*
+ * Randomize the address space (stacks, mmaps, brk, etc.).
+ *
+ * ( When CONFIG_COMPAT_BRK=y we exclude brk from randomization,
+ * as ancient (libc5 based) binaries can segfault. )
+ */
+int randomize_va_space __read_mostly =
+#ifdef CONFIG_COMPAT_BRK
+ 1;
+#else
+ 2;
+#endif
static int __init disable_randmaps(char *s)
{
@@ -122,11 +134,9 @@ void pmd_clear_bad(pmd_t *pmd)
*/
static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
{
- struct page *page = pmd_page(*pmd);
+ pgtable_t token = pmd_pgtable(*pmd);
pmd_clear(pmd);
- pte_lock_deinit(page);
- pte_free_tlb(tlb, page);
- dec_zone_page_state(page, NR_PAGETABLE);
+ pte_free_tlb(tlb, token);
tlb->mm->nr_ptes--;
}
@@ -297,21 +307,19 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
{
- struct page *new = pte_alloc_one(mm, address);
+ pgtable_t new = pte_alloc_one(mm, address);
if (!new)
return -ENOMEM;
- pte_lock_init(new);
spin_lock(&mm->page_table_lock);
- if (pmd_present(*pmd)) { /* Another has populated it */
- pte_lock_deinit(new);
- pte_free(new);
- } else {
+ if (!pmd_present(*pmd)) { /* Has another populated it ? */
mm->nr_ptes++;
- inc_zone_page_state(new, NR_PAGETABLE);
pmd_populate(mm, pmd, new);
+ new = NULL;
}
spin_unlock(&mm->page_table_lock);
+ if (new)
+ pte_free(mm, new);
return 0;
}
@@ -322,11 +330,13 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
return -ENOMEM;
spin_lock(&init_mm.page_table_lock);
- if (pmd_present(*pmd)) /* Another has populated it */
- pte_free_kernel(new);
- else
+ if (!pmd_present(*pmd)) { /* Has another populated it ? */
pmd_populate_kernel(&init_mm, pmd, new);
+ new = NULL;
+ }
spin_unlock(&init_mm.page_table_lock);
+ if (new)
+ pte_free_kernel(&init_mm, new);
return 0;
}
@@ -513,8 +523,7 @@ again:
if (progress >= 32) {
progress = 0;
if (need_resched() ||
- need_lockbreak(src_ptl) ||
- need_lockbreak(dst_ptl))
+ spin_needbreak(src_ptl) || spin_needbreak(dst_ptl))
break;
}
if (pte_none(*src_pte)) {
@@ -853,7 +862,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
tlb_finish_mmu(*tlbp, tlb_start, start);
if (need_resched() ||
- (i_mmap_lock && need_lockbreak(i_mmap_lock))) {
+ (i_mmap_lock && spin_needbreak(i_mmap_lock))) {
if (i_mmap_lock) {
*tlbp = NULL;
goto out;
@@ -980,6 +989,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
int i;
unsigned int vm_flags;
+ if (len <= 0)
+ return 0;
/*
* Require read or write permissions.
* If 'force' is set, we only require the "MAY" flags.
@@ -1110,7 +1121,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
}
EXPORT_SYMBOL(get_user_pages);
-pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)
+pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
+ spinlock_t **ptl)
{
pgd_t * pgd = pgd_offset(mm, addr);
pud_t * pud = pud_alloc(mm, pgd, addr);
@@ -1133,16 +1145,20 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa
{
int retval;
pte_t *pte;
- spinlock_t *ptl;
+ spinlock_t *ptl;
+
+ retval = mem_cgroup_charge(page, mm, GFP_KERNEL);
+ if (retval)
+ goto out;
retval = -EINVAL;
if (PageAnon(page))
- goto out;
+ goto out_uncharge;
retval = -ENOMEM;
flush_dcache_page(page);
pte = get_locked_pte(mm, addr, &ptl);
if (!pte)
- goto out;
+ goto out_uncharge;
retval = -EBUSY;
if (!pte_none(*pte))
goto out_unlock;
@@ -1154,8 +1170,12 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa
set_pte_at(mm, addr, pte, mk_pte(page, prot));
retval = 0;
+ pte_unmap_unlock(pte, ptl);
+ return retval;
out_unlock:
pte_unmap_unlock(pte, ptl);
+out_uncharge:
+ mem_cgroup_uncharge_page(page);
out:
return retval;
}
@@ -1370,7 +1390,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
{
pte_t *pte;
int err;
- struct page *pmd_page;
+ pgtable_t token;
spinlock_t *uninitialized_var(ptl);
pte = (mm == &init_mm) ?
@@ -1381,10 +1401,10 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
BUG_ON(pmd_huge(*pmd));
- pmd_page = pmd_page(*pmd);
+ token = pmd_pgtable(*pmd);
do {
- err = fn(pte, pmd_page, addr, data);
+ err = fn(pte, token, addr, data);
if (err)
break;
} while (pte++, addr += PAGE_SIZE, addr != end);
@@ -1518,10 +1538,8 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
memset(kaddr, 0, PAGE_SIZE);
kunmap_atomic(kaddr, KM_USER0);
flush_dcache_page(dst);
- return;
-
- }
- copy_user_highpage(dst, src, va, vma);
+ } else
+ copy_user_highpage(dst, src, va, vma);
}
/*
@@ -1630,6 +1648,10 @@ gotten:
if (!new_page)
goto oom;
cow_user_page(new_page, old_page, address, vma);
+ __SetPageUptodate(new_page);
+
+ if (mem_cgroup_charge(new_page, mm, GFP_KERNEL))
+ goto oom_free_new;
/*
* Re-check the pte - we dropped the lock
@@ -1662,7 +1684,9 @@ gotten:
/* Free the old page.. */
new_page = old_page;
ret |= VM_FAULT_WRITE;
- }
+ } else
+ mem_cgroup_uncharge_page(new_page);
+
if (new_page)
page_cache_release(new_page);
if (old_page)
@@ -1686,6 +1710,8 @@ unlock:
put_page(dirty_page);
}
return ret;
+oom_free_new:
+ page_cache_release(new_page);
oom:
if (old_page)
page_cache_release(old_page);
@@ -1768,8 +1794,7 @@ again:
restart_addr = zap_page_range(vma, start_addr,
end_addr - start_addr, details);
- need_break = need_resched() ||
- need_lockbreak(details->i_mmap_lock);
+ need_break = need_resched() || spin_needbreak(details->i_mmap_lock);
if (restart_addr >= end_addr) {
/* We have now completed this vma: mark it so */
@@ -1911,50 +1936,49 @@ EXPORT_SYMBOL(unmap_mapping_range);
*/
int vmtruncate(struct inode * inode, loff_t offset)
{
- struct address_space *mapping = inode->i_mapping;
- unsigned long limit;
+ if (inode->i_size < offset) {
+ unsigned long limit;
- if (inode->i_size < offset)
- goto do_expand;
- /*
- * truncation of in-use swapfiles is disallowed - it would cause
- * subsequent swapout to scribble on the now-freed blocks.
- */
- if (IS_SWAPFILE(inode))
- goto out_busy;
- i_size_write(inode, offset);
+ limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+ if (limit != RLIM_INFINITY && offset > limit)
+ goto out_sig;
+ if (offset > inode->i_sb->s_maxbytes)
+ goto out_big;
+ i_size_write(inode, offset);
+ } else {
+ struct address_space *mapping = inode->i_mapping;
+
+ /*
+ * truncation of in-use swapfiles is disallowed - it would
+ * cause subsequent swapout to scribble on the now-freed
+ * blocks.
+ */
+ if (IS_SWAPFILE(inode))
+ return -ETXTBSY;
+ i_size_write(inode, offset);
+
+ /*
+ * unmap_mapping_range is called twice, first simply for
+ * efficiency so that truncate_inode_pages does fewer
+ * single-page unmaps. However after this first call, and
+ * before truncate_inode_pages finishes, it is possible for
+ * private pages to be COWed, which remain after
+ * truncate_inode_pages finishes, hence the second
+ * unmap_mapping_range call must be made for correctness.
+ */
+ unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
+ truncate_inode_pages(mapping, offset);
+ unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
+ }
- /*
- * unmap_mapping_range is called twice, first simply for efficiency
- * so that truncate_inode_pages does fewer single-page unmaps. However
- * after this first call, and before truncate_inode_pages finishes,
- * it is possible for private pages to be COWed, which remain after
- * truncate_inode_pages finishes, hence the second unmap_mapping_range
- * call must be made for correctness.
- */
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- truncate_inode_pages(mapping, offset);
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- goto out_truncate;
-
-do_expand:
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && offset > limit)
- goto out_sig;
- if (offset > inode->i_sb->s_maxbytes)
- goto out_big;
- i_size_write(inode, offset);
-
-out_truncate:
if (inode->i_op && inode->i_op->truncate)
inode->i_op->truncate(inode);
return 0;
+
out_sig:
send_sig(SIGXFSZ, current, 0);
out_big:
return -EFBIG;
-out_busy:
- return -ETXTBSY;
}
EXPORT_SYMBOL(vmtruncate);
@@ -1982,67 +2006,6 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
return 0;
}
-/**
- * swapin_readahead - swap in pages in hope we need them soon
- * @entry: swap entry of this memory
- * @addr: address to start
- * @vma: user vma this addresses belong to
- *
- * Primitive swap readahead code. We simply read an aligned block of
- * (1 << page_cluster) entries in the swap area. This method is chosen
- * because it doesn't cost us any seek time. We also make sure to queue
- * the 'original' request together with the readahead ones...
- *
- * This has been extended to use the NUMA policies from the mm triggering
- * the readahead.
- *
- * Caller must hold down_read on the vma->vm_mm if vma is not NULL.
- */
-void swapin_readahead(swp_entry_t entry, unsigned long addr,struct vm_area_struct *vma)
-{
-#ifdef CONFIG_NUMA
- struct vm_area_struct *next_vma = vma ? vma->vm_next : NULL;
-#endif
- int i, num;
- struct page *new_page;
- unsigned long offset;
-
- /*
- * Get the number of handles we should do readahead io to.
- */
- num = valid_swaphandles(entry, &offset);
- for (i = 0; i < num; offset++, i++) {
- /* Ok, do the async read-ahead now */
- new_page = read_swap_cache_async(swp_entry(swp_type(entry),
- offset), vma, addr);
- if (!new_page)
- break;
- page_cache_release(new_page);
-#ifdef CONFIG_NUMA
- /*
- * Find the next applicable VMA for the NUMA policy.
- */
- addr += PAGE_SIZE;
- if (addr == 0)
- vma = NULL;
- if (vma) {
- if (addr >= vma->vm_end) {
- vma = next_vma;
- next_vma = vma ? vma->vm_next : NULL;
- }
- if (vma && addr < vma->vm_start)
- vma = NULL;
- } else {
- if (next_vma && addr >= next_vma->vm_start) {
- vma = next_vma;
- next_vma = vma->vm_next;
- }
- }
-#endif
- }
- lru_add_drain(); /* Push any new pages onto the LRU now */
-}
-
/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
@@ -2070,8 +2033,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
page = lookup_swap_cache(entry);
if (!page) {
grab_swap_token(); /* Contend for token _before_ read-in */
- swapin_readahead(entry, address, vma);
- page = read_swap_cache_async(entry, vma, address);
+ page = swapin_readahead(entry,
+ GFP_HIGHUSER_MOVABLE, vma, address);
if (!page) {
/*
* Back out if somebody else faulted in this pte
@@ -2089,6 +2052,12 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_event(PGMAJFAULT);
}
+ if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
+ delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+ ret = VM_FAULT_OOM;
+ goto out;
+ }
+
mark_page_accessed(page);
lock_page(page);
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
@@ -2124,10 +2093,9 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
unlock_page(page);
if (write_access) {
- /* XXX: We could OR the do_wp_page code with this one? */
- if (do_wp_page(mm, vma, address,
- page_table, pmd, ptl, pte) & VM_FAULT_OOM)
- ret = VM_FAULT_OOM;
+ ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte);
+ if (ret & VM_FAULT_ERROR)
+ ret &= VM_FAULT_ERROR;
goto out;
}
@@ -2138,6 +2106,7 @@ unlock:
out:
return ret;
out_nomap:
+ mem_cgroup_uncharge_page(page);
pte_unmap_unlock(page_table, ptl);
unlock_page(page);
page_cache_release(page);
@@ -2165,6 +2134,10 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
page = alloc_zeroed_user_highpage_movable(vma, address);
if (!page)
goto oom;
+ __SetPageUptodate(page);
+
+ if (mem_cgroup_charge(page, mm, GFP_KERNEL))
+ goto oom_free_page;
entry = mk_pte(page, vma->vm_page_prot);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -2183,8 +2156,11 @@ unlock:
pte_unmap_unlock(page_table, ptl);
return 0;
release:
+ mem_cgroup_uncharge_page(page);
page_cache_release(page);
goto unlock;
+oom_free_page:
+ page_cache_release(page);
oom:
return VM_FAULT_OOM;
}
@@ -2265,6 +2241,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
goto out;
}
copy_user_highpage(page, vmf.page, address, vma);
+ __SetPageUptodate(page);
} else {
/*
* If the page will be shareable, see if the backing
@@ -2297,6 +2274,11 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
}
+ if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
+ ret = VM_FAULT_OOM;
+ goto out;
+ }
+
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
/*
@@ -2332,6 +2314,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
/* no need to invalidate: a not-present page won't be cached */
update_mmu_cache(vma, address, entry);
} else {
+ mem_cgroup_uncharge_page(page);
if (anon)
page_cache_release(page);
else
@@ -2565,7 +2548,7 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
spin_lock(&mm->page_table_lock);
if (pgd_present(*pgd)) /* Another has populated it */
- pud_free(new);
+ pud_free(mm, new);
else
pgd_populate(mm, pgd, new);
spin_unlock(&mm->page_table_lock);
@@ -2587,12 +2570,12 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
spin_lock(&mm->page_table_lock);
#ifndef __ARCH_HAS_4LEVEL_HACK
if (pud_present(*pud)) /* Another has populated it */
- pmd_free(new);
+ pmd_free(mm, new);
else
pud_populate(mm, pud, new);
#else
if (pgd_present(*pud)) /* Another has populated it */
- pmd_free(new);
+ pmd_free(mm, new);
else
pgd_populate(mm, pud, new);
#endif /* __ARCH_HAS_4LEVEL_HACK */
@@ -2620,46 +2603,6 @@ int make_pages_present(unsigned long addr, unsigned long end)
return ret == len ? 0 : -1;
}
-/*
- * Map a vmalloc()-space virtual address to the physical page.
- */
-struct page * vmalloc_to_page(void * vmalloc_addr)
-{
- unsigned long addr = (unsigned long) vmalloc_addr;
- struct page *page = NULL;
- pgd_t *pgd = pgd_offset_k(addr);
- pud_t *pud;
- pmd_t *pmd;
- pte_t *ptep, pte;
-
- if (!pgd_none(*pgd)) {
- pud = pud_offset(pgd, addr);
- if (!pud_none(*pud)) {
- pmd = pmd_offset(pud, addr);
- if (!pmd_none(*pmd)) {
- ptep = pte_offset_map(pmd, addr);
- pte = *ptep;
- if (pte_present(pte))
- page = pte_page(pte);
- pte_unmap(ptep);
- }
- }
- }
- return page;
-}
-
-EXPORT_SYMBOL(vmalloc_to_page);
-
-/*
- * Map a vmalloc()-space virtual address to the physical page frame number.
- */
-unsigned long vmalloc_to_pfn(void * vmalloc_addr)
-{
- return page_to_pfn(vmalloc_to_page(vmalloc_addr));
-}
-
-EXPORT_SYMBOL(vmalloc_to_pfn);
-
#if !defined(__HAVE_ARCH_GATE_AREA)
#if defined(AT_SYSINFO_EHDR)
@@ -2756,3 +2699,41 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
return buf - old_buf;
}
+
+/*
+ * Print the name of a VMA.
+ */
+void print_vma_addr(char *prefix, unsigned long ip)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+
+ /*
+ * Do not print if we are in atomic
+ * contexts (in exception stacks, etc.):
+ */
+ if (preempt_count())
+ return;
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, ip);
+ if (vma && vma->vm_file) {
+ struct file *f = vma->vm_file;
+ char *buf = (char *)__get_free_page(GFP_KERNEL);
+ if (buf) {
+ char *p, *s;
+
+ p = d_path(&f->f_path, buf, PAGE_SIZE);
+ if (IS_ERR(p))
+ p = "?";
+ s = strrchr(p, '/');
+ if (s)
+ p = s+1;
+ printk("%s%s[%lx+%lx]", prefix, p,
+ vma->vm_start,
+ vma->vm_end - vma->vm_start);
+ free_page((unsigned long)buf);
+ }
+ }
+ up_read(&current->mm->mmap_sem);
+}