aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/mempolicy.c132
-rw-r--r--mm/nommu.c8
-rw-r--r--mm/oom_kill.c5
-rw-r--r--mm/rmap.c18
-rw-r--r--mm/slab.c57
-rw-r--r--mm/swap.c25
6 files changed, 182 insertions, 63 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 67af4cea1e2..954981b1430 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -197,7 +197,7 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
return policy;
}
-static void gather_stats(struct page *, void *);
+static void gather_stats(struct page *, void *, int pte_dirty);
static void migrate_page_add(struct page *page, struct list_head *pagelist,
unsigned long flags);
@@ -239,7 +239,7 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
continue;
if (flags & MPOL_MF_STATS)
- gather_stats(page, private);
+ gather_stats(page, private, pte_dirty(*pte));
else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
migrate_page_add(page, private, flags);
else
@@ -954,7 +954,8 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
goto out;
}
- err = do_migrate_pages(mm, &old, &new, MPOL_MF_MOVE);
+ err = do_migrate_pages(mm, &old, &new,
+ capable(CAP_SYS_ADMIN) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
out:
mmput(mm);
return err;
@@ -1752,66 +1753,145 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
struct numa_maps {
unsigned long pages;
unsigned long anon;
- unsigned long mapped;
+ unsigned long active;
+ unsigned long writeback;
unsigned long mapcount_max;
+ unsigned long dirty;
+ unsigned long swapcache;
unsigned long node[MAX_NUMNODES];
};
-static void gather_stats(struct page *page, void *private)
+static void gather_stats(struct page *page, void *private, int pte_dirty)
{
struct numa_maps *md = private;
int count = page_mapcount(page);
- if (count)
- md->mapped++;
+ md->pages++;
+ if (pte_dirty || PageDirty(page))
+ md->dirty++;
- if (count > md->mapcount_max)
- md->mapcount_max = count;
+ if (PageSwapCache(page))
+ md->swapcache++;
- md->pages++;
+ if (PageActive(page))
+ md->active++;
+
+ if (PageWriteback(page))
+ md->writeback++;
if (PageAnon(page))
md->anon++;
+ if (count > md->mapcount_max)
+ md->mapcount_max = count;
+
md->node[page_to_nid(page)]++;
cond_resched();
}
+#ifdef CONFIG_HUGETLB_PAGE
+static void check_huge_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct numa_maps *md)
+{
+ unsigned long addr;
+ struct page *page;
+
+ for (addr = start; addr < end; addr += HPAGE_SIZE) {
+ pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & HPAGE_MASK);
+ pte_t pte;
+
+ if (!ptep)
+ continue;
+
+ pte = *ptep;
+ if (pte_none(pte))
+ continue;
+
+ page = pte_page(pte);
+ if (!page)
+ continue;
+
+ gather_stats(page, md, pte_dirty(*ptep));
+ }
+}
+#else
+static inline void check_huge_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct numa_maps *md)
+{
+}
+#endif
+
int show_numa_map(struct seq_file *m, void *v)
{
struct task_struct *task = m->private;
struct vm_area_struct *vma = v;
struct numa_maps *md;
+ struct file *file = vma->vm_file;
+ struct mm_struct *mm = vma->vm_mm;
int n;
char buffer[50];
- if (!vma->vm_mm)
+ if (!mm)
return 0;
md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL);
if (!md)
return 0;
- check_pgd_range(vma, vma->vm_start, vma->vm_end,
- &node_online_map, MPOL_MF_STATS, md);
+ mpol_to_str(buffer, sizeof(buffer),
+ get_vma_policy(task, vma, vma->vm_start));
- if (md->pages) {
- mpol_to_str(buffer, sizeof(buffer),
- get_vma_policy(task, vma, vma->vm_start));
+ seq_printf(m, "%08lx %s", vma->vm_start, buffer);
- seq_printf(m, "%08lx %s pages=%lu mapped=%lu maxref=%lu",
- vma->vm_start, buffer, md->pages,
- md->mapped, md->mapcount_max);
+ if (file) {
+ seq_printf(m, " file=");
+ seq_path(m, file->f_vfsmnt, file->f_dentry, "\n\t= ");
+ } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
+ seq_printf(m, " heap");
+ } else if (vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack) {
+ seq_printf(m, " stack");
+ }
- if (md->anon)
- seq_printf(m," anon=%lu",md->anon);
+ if (is_vm_hugetlb_page(vma)) {
+ check_huge_range(vma, vma->vm_start, vma->vm_end, md);
+ seq_printf(m, " huge");
+ } else {
+ check_pgd_range(vma, vma->vm_start, vma->vm_end,
+ &node_online_map, MPOL_MF_STATS, md);
+ }
- for_each_online_node(n)
- if (md->node[n])
- seq_printf(m, " N%d=%lu", n, md->node[n]);
+ if (!md->pages)
+ goto out;
- seq_putc(m, '\n');
- }
+ if (md->anon)
+ seq_printf(m," anon=%lu",md->anon);
+
+ if (md->dirty)
+ seq_printf(m," dirty=%lu",md->dirty);
+
+ if (md->pages != md->anon && md->pages != md->dirty)
+ seq_printf(m, " mapped=%lu", md->pages);
+
+ if (md->mapcount_max > 1)
+ seq_printf(m, " mapmax=%lu", md->mapcount_max);
+
+ if (md->swapcache)
+ seq_printf(m," swapcache=%lu", md->swapcache);
+
+ if (md->active < md->pages && !is_vm_hugetlb_page(vma))
+ seq_printf(m," active=%lu", md->active);
+
+ if (md->writeback)
+ seq_printf(m," writeback=%lu", md->writeback);
+
+ for_each_online_node(n)
+ if (md->node[n])
+ seq_printf(m, " N%d=%lu", n, md->node[n]);
+out:
+ seq_putc(m, '\n');
kfree(md);
if (m->count < m->size)
diff --git a/mm/nommu.c b/mm/nommu.c
index 99d21020ec9..4951f4786f2 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -53,7 +53,6 @@ DECLARE_RWSEM(nommu_vma_sem);
struct vm_operations_struct generic_file_vm_ops = {
};
-EXPORT_SYMBOL(vmalloc);
EXPORT_SYMBOL(vfree);
EXPORT_SYMBOL(vmalloc_to_page);
EXPORT_SYMBOL(vmalloc_32);
@@ -205,6 +204,13 @@ void *vmalloc(unsigned long size)
{
return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
}
+EXPORT_SYMBOL(vmalloc);
+
+void *vmalloc_node(unsigned long size, int node)
+{
+ return vmalloc(size);
+}
+EXPORT_SYMBOL(vmalloc_node);
/*
* vmalloc_32 - allocate virtually continguos memory (32bit addressable)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 8123fad5a48..78747afad6b 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -302,7 +302,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
{
struct mm_struct *mm = NULL;
task_t *p;
- unsigned long points;
+ unsigned long points = 0;
if (printk_ratelimit()) {
printk("oom-killer: gfp_mask=0x%x, order=%d\n",
@@ -355,6 +355,7 @@ retry:
}
out:
+ read_unlock(&tasklist_lock);
cpuset_unlock();
if (mm)
mmput(mm);
@@ -364,5 +365,5 @@ out:
* retry to allocate memory unless "p" is current
*/
if (!test_thread_flag(TIF_MEMDIE))
- schedule_timeout_interruptible(1);
+ schedule_timeout_uninterruptible(1);
}
diff --git a/mm/rmap.c b/mm/rmap.c
index df2c41c2a9a..d8ce5ff6145 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -212,25 +212,33 @@ out:
* through real pte's pointing to valid pages and then releasing
* the page from the swap cache.
*
- * Must hold page lock on page.
+ * Must hold page lock on page and mmap_sem of one vma that contains
+ * the page.
*/
void remove_from_swap(struct page *page)
{
struct anon_vma *anon_vma;
struct vm_area_struct *vma;
+ unsigned long mapping;
- if (!PageAnon(page) || !PageSwapCache(page))
+ if (!PageSwapCache(page))
return;
- anon_vma = page_lock_anon_vma(page);
- if (!anon_vma)
+ mapping = (unsigned long)page->mapping;
+
+ if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0)
return;
+ /*
+ * We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
+ */
+ anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON);
+ spin_lock(&anon_vma->lock);
+
list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
remove_vma_swap(vma, page);
spin_unlock(&anon_vma->lock);
-
delete_from_swap_cache(page);
}
EXPORT_SYMBOL(remove_from_swap);
diff --git a/mm/slab.c b/mm/slab.c
index add05d808a4..61800b88e24 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1124,6 +1124,7 @@ void __init kmem_cache_init(void)
struct cache_sizes *sizes;
struct cache_names *names;
int i;
+ int order;
for (i = 0; i < NUM_INIT_LISTS; i++) {
kmem_list3_init(&initkmem_list3[i]);
@@ -1167,11 +1168,15 @@ void __init kmem_cache_init(void)
cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size());
- cache_estimate(0, cache_cache.buffer_size, cache_line_size(), 0,
- &left_over, &cache_cache.num);
+ for (order = 0; order < MAX_ORDER; order++) {
+ cache_estimate(order, cache_cache.buffer_size,
+ cache_line_size(), 0, &left_over, &cache_cache.num);
+ if (cache_cache.num)
+ break;
+ }
if (!cache_cache.num)
BUG();
-
+ cache_cache.gfporder = order;
cache_cache.colour = left_over / cache_cache.colour_off;
cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
sizeof(struct slab), cache_line_size());
@@ -1628,36 +1633,44 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep,
size_t size, size_t align, unsigned long flags)
{
size_t left_over = 0;
+ int gfporder;
- for (;; cachep->gfporder++) {
+ for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) {
unsigned int num;
size_t remainder;
- if (cachep->gfporder > MAX_GFP_ORDER) {
- cachep->num = 0;
- break;
- }
-
- cache_estimate(cachep->gfporder, size, align, flags,
- &remainder, &num);
+ cache_estimate(gfporder, size, align, flags, &remainder, &num);
if (!num)
continue;
+
/* More than offslab_limit objects will cause problems */
- if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit)
+ if ((flags & CFLGS_OFF_SLAB) && num > offslab_limit)
break;
+ /* Found something acceptable - save it away */
cachep->num = num;
+ cachep->gfporder = gfporder;
left_over = remainder;
/*
+ * A VFS-reclaimable slab tends to have most allocations
+ * as GFP_NOFS and we really don't want to have to be allocating
+ * higher-order pages when we are unable to shrink dcache.
+ */
+ if (flags & SLAB_RECLAIM_ACCOUNT)
+ break;
+
+ /*
* Large number of objects is good, but very large slabs are
* currently bad for the gfp()s.
*/
- if (cachep->gfporder >= slab_break_gfp_order)
+ if (gfporder >= slab_break_gfp_order)
break;
- if ((left_over * 8) <= (PAGE_SIZE << cachep->gfporder))
- /* Acceptable internal fragmentation */
+ /*
+ * Acceptable internal fragmentation?
+ */
+ if ((left_over * 8) <= (PAGE_SIZE << gfporder))
break;
}
return left_over;
@@ -1869,17 +1882,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
size = ALIGN(size, align);
- if ((flags & SLAB_RECLAIM_ACCOUNT) && size <= PAGE_SIZE) {
- /*
- * A VFS-reclaimable slab tends to have most allocations
- * as GFP_NOFS and we really don't want to have to be allocating
- * higher-order pages when we are unable to shrink dcache.
- */
- cachep->gfporder = 0;
- cache_estimate(cachep->gfporder, size, align, flags,
- &left_over, &cachep->num);
- } else
- left_over = calculate_slab_order(cachep, size, align, flags);
+ left_over = calculate_slab_order(cachep, size, align, flags);
if (!cachep->num) {
printk("kmem_cache_create: couldn't create cache %s.\n", name);
@@ -2554,7 +2557,7 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
"slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n",
cachep->name, cachep->num, slabp, slabp->inuse);
for (i = 0;
- i < sizeof(slabp) + cachep->num * sizeof(kmem_bufctl_t);
+ i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
i++) {
if ((i % 16) == 0)
printk("\n%03x:", i);
diff --git a/mm/swap.c b/mm/swap.c
index cce3dda59c5..e9ec06d845e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -489,13 +489,34 @@ void percpu_counter_mod(struct percpu_counter *fbc, long amount)
if (count >= FBC_BATCH || count <= -FBC_BATCH) {
spin_lock(&fbc->lock);
fbc->count += count;
+ *pcount = 0;
spin_unlock(&fbc->lock);
- count = 0;
+ } else {
+ *pcount = count;
}
- *pcount = count;
put_cpu();
}
EXPORT_SYMBOL(percpu_counter_mod);
+
+/*
+ * Add up all the per-cpu counts, return the result. This is a more accurate
+ * but much slower version of percpu_counter_read_positive()
+ */
+long percpu_counter_sum(struct percpu_counter *fbc)
+{
+ long ret;
+ int cpu;
+
+ spin_lock(&fbc->lock);
+ ret = fbc->count;
+ for_each_cpu(cpu) {
+ long *pcount = per_cpu_ptr(fbc->counters, cpu);
+ ret += *pcount;
+ }
+ spin_unlock(&fbc->lock);
+ return ret < 0 ? 0 : ret;
+}
+EXPORT_SYMBOL(percpu_counter_sum);
#endif
/*