aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/bootmem.c9
-rw-r--r--mm/filemap.c32
-rw-r--r--mm/highmem.c15
-rw-r--r--mm/madvise.c3
-rw-r--r--mm/mempolicy.c1
-rw-r--r--mm/migrate.c8
-rw-r--r--mm/mmap.c43
-rw-r--r--mm/nommu.c18
-rw-r--r--mm/oom_kill.c71
-rw-r--r--mm/page-writeback.c7
-rw-r--r--mm/page_alloc.c82
-rw-r--r--mm/shmem.c3
-rw-r--r--mm/slab.c53
-rw-r--r--mm/slob.c10
-rw-r--r--mm/swap_state.c3
-rw-r--r--mm/vmalloc.c3
-rw-r--r--mm/vmscan.c2
17 files changed, 243 insertions, 120 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index d3e3bd2ffce..d213feded10 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -401,7 +401,7 @@ unsigned long __init free_all_bootmem (void)
return(free_all_bootmem_core(NODE_DATA(0)));
}
-void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal)
{
bootmem_data_t *bdata;
void *ptr;
@@ -409,7 +409,14 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned
list_for_each_entry(bdata, &bdata_list, list)
if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0)))
return(ptr);
+ return NULL;
+}
+void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
+{
+ void *mem = __alloc_bootmem_nopanic(size,align,goal);
+ if (mem)
+ return mem;
/*
* Whoops, we cannot satisfy the allocation request.
*/
diff --git a/mm/filemap.c b/mm/filemap.c
index 3ef20739e72..fd57442186c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -697,6 +697,38 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
return ret;
}
+/**
+ * find_get_pages_contig - gang contiguous pagecache lookup
+ * @mapping: The address_space to search
+ * @index: The starting page index
+ * @nr_pages: The maximum number of pages
+ * @pages: Where the resulting pages are placed
+ *
+ * find_get_pages_contig() works exactly like find_get_pages(), except
+ * that the returned number of pages are guaranteed to be contiguous.
+ *
+ * find_get_pages_contig() returns the number of pages which were found.
+ */
+unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
+ unsigned int nr_pages, struct page **pages)
+{
+ unsigned int i;
+ unsigned int ret;
+
+ read_lock_irq(&mapping->tree_lock);
+ ret = radix_tree_gang_lookup(&mapping->page_tree,
+ (void **)pages, index, nr_pages);
+ for (i = 0; i < ret; i++) {
+ if (pages[i]->mapping == NULL || pages[i]->index != index)
+ break;
+
+ page_cache_get(pages[i]);
+ index++;
+ }
+ read_unlock_irq(&mapping->tree_lock);
+ return i;
+}
+
/*
* Like find_get_pages, except we only return pages which are tagged with
* `tag'. We update *index to index the next page for the traversal.
diff --git a/mm/highmem.c b/mm/highmem.c
index 55885f64af4..9b274fdf9d0 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -74,8 +74,7 @@ static void flush_all_zero_pkmaps(void)
pkmap_count[i] = 0;
/* sanity check */
- if (pte_none(pkmap_page_table[i]))
- BUG();
+ BUG_ON(pte_none(pkmap_page_table[i]));
/*
* Don't need an atomic fetch-and-clear op here;
@@ -158,8 +157,7 @@ void fastcall *kmap_high(struct page *page)
if (!vaddr)
vaddr = map_new_virtual(page);
pkmap_count[PKMAP_NR(vaddr)]++;
- if (pkmap_count[PKMAP_NR(vaddr)] < 2)
- BUG();
+ BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2);
spin_unlock(&kmap_lock);
return (void*) vaddr;
}
@@ -174,8 +172,7 @@ void fastcall kunmap_high(struct page *page)
spin_lock(&kmap_lock);
vaddr = (unsigned long)page_address(page);
- if (!vaddr)
- BUG();
+ BUG_ON(!vaddr);
nr = PKMAP_NR(vaddr);
/*
@@ -220,8 +217,7 @@ static __init int init_emergency_pool(void)
return 0;
page_pool = mempool_create_page_pool(POOL_SIZE, 0);
- if (!page_pool)
- BUG();
+ BUG_ON(!page_pool);
printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
return 0;
@@ -264,8 +260,7 @@ int init_emergency_isa_pool(void)
isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
mempool_free_pages, (void *) 0);
- if (!isa_page_pool)
- BUG();
+ BUG_ON(!isa_page_pool);
printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
return 0;
diff --git a/mm/madvise.c b/mm/madvise.c
index af3d573b014..4e196155a0c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -168,6 +168,9 @@ static long madvise_remove(struct vm_area_struct *vma,
return -EINVAL;
}
+ if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
+ return -EACCES;
+
mapping = vma->vm_file->f_mapping;
offset = (loff_t)(start - vma->vm_start)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index dec8249e972..8778f58880c 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1761,7 +1761,6 @@ static void gather_stats(struct page *page, void *private, int pte_dirty)
md->mapcount_max = count;
md->node[page_to_nid(page)]++;
- cond_resched();
}
#ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/migrate.c b/mm/migrate.c
index 09f6e4aa87f..d444229f259 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -16,8 +16,7 @@
#include <linux/module.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
-#include <linux/buffer_head.h> /* for try_to_release_page(),
- buffer_heads_over_limit */
+#include <linux/buffer_head.h>
#include <linux/mm_inline.h>
#include <linux/pagevec.h>
#include <linux/rmap.h>
@@ -28,8 +27,6 @@
#include "internal.h"
-#include "internal.h"
-
/* The maximum number of pages to take off the LRU for migration */
#define MIGRATE_CHUNK_SIZE 256
@@ -176,7 +173,6 @@ unlock_retry:
retry:
return -EAGAIN;
}
-EXPORT_SYMBOL(swap_page);
/*
* Remove references for a page and establish the new page with the correct
@@ -234,7 +230,7 @@ int migrate_page_remove_references(struct page *newpage,
if (!page_mapping(page) || page_count(page) != nr_refs ||
*radix_pointer != page) {
write_unlock_irq(&mapping->tree_lock);
- return 1;
+ return -EAGAIN;
}
/*
diff --git a/mm/mmap.c b/mm/mmap.c
index 4f5b5709136..e6ee12344b1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -121,14 +121,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
* only call if we're about to fail.
*/
n = nr_free_pages();
+
+ /*
+ * Leave reserved pages. The pages are not for anonymous pages.
+ */
+ if (n <= totalreserve_pages)
+ goto error;
+ else
+ n -= totalreserve_pages;
+
+ /*
+ * Leave the last 3% for root
+ */
if (!cap_sys_admin)
n -= n / 32;
free += n;
if (free > pages)
return 0;
- vm_unacct_memory(pages);
- return -ENOMEM;
+
+ goto error;
}
allowed = (totalram_pages - hugetlb_total_pages())
@@ -150,7 +162,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
*/
if (atomic_read(&vm_committed_space) < (long)allowed)
return 0;
-
+error:
vm_unacct_memory(pages);
return -ENOMEM;
@@ -220,6 +232,17 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
if (brk < mm->end_code)
goto out;
+
+ /*
+ * Check against rlimit here. If this check is done later after the test
+ * of oldbrk with newbrk then it can escape the test and let the data
+ * segment grow beyond its set limit the in case where the limit is
+ * not page aligned -Ram Gupta
+ */
+ rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+ if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+ goto out;
+
newbrk = PAGE_ALIGN(brk);
oldbrk = PAGE_ALIGN(mm->brk);
if (oldbrk == newbrk)
@@ -232,11 +255,6 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
goto out;
}
- /* Check against rlimit.. */
- rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
- if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
- goto out;
-
/* Check against existing mmap mappings. */
if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
goto out;
@@ -294,8 +312,7 @@ void validate_mm(struct mm_struct *mm)
i = browse_rb(&mm->mm_rb);
if (i != mm->map_count)
printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
- if (bug)
- BUG();
+ BUG_ON(bug);
}
#else
#define validate_mm(mm) do { } while (0)
@@ -432,8 +449,7 @@ __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
struct rb_node ** rb_link, * rb_parent;
__vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
- if (__vma && __vma->vm_start < vma->vm_end)
- BUG();
+ BUG_ON(__vma && __vma->vm_start < vma->vm_end);
__vma_link(mm, vma, prev, rb_link, rb_parent);
mm->map_count++;
}
@@ -813,8 +829,7 @@ try_prev:
* (e.g. stash info in next's anon_vma_node when assigning
* an anon_vma, or when trying vma_merge). Another time.
*/
- if (find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma)
- BUG();
+ BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma);
if (!near)
goto none;
diff --git a/mm/nommu.c b/mm/nommu.c
index db45efac17c..029fadac0fb 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1147,14 +1147,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
* only call if we're about to fail.
*/
n = nr_free_pages();
+
+ /*
+ * Leave reserved pages. The pages are not for anonymous pages.
+ */
+ if (n <= totalreserve_pages)
+ goto error;
+ else
+ n -= totalreserve_pages;
+
+ /*
+ * Leave the last 3% for root
+ */
if (!cap_sys_admin)
n -= n / 32;
free += n;
if (free > pages)
return 0;
- vm_unacct_memory(pages);
- return -ENOMEM;
+
+ goto error;
}
allowed = totalram_pages * sysctl_overcommit_ratio / 100;
@@ -1175,7 +1187,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
*/
if (atomic_read(&vm_committed_space) < (long)allowed)
return 0;
-
+error:
vm_unacct_memory(pages);
return -ENOMEM;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 78747afad6b..042e6436c3e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -46,15 +46,25 @@
unsigned long badness(struct task_struct *p, unsigned long uptime)
{
unsigned long points, cpu_time, run_time, s;
- struct list_head *tsk;
+ struct mm_struct *mm;
+ struct task_struct *child;
- if (!p->mm)
+ task_lock(p);
+ mm = p->mm;
+ if (!mm) {
+ task_unlock(p);
return 0;
+ }
/*
* The memory size of the process is the basis for the badness.
*/
- points = p->mm->total_vm;
+ points = mm->total_vm;
+
+ /*
+ * After this unlock we can no longer dereference local variable `mm'
+ */
+ task_unlock(p);
/*
* Processes which fork a lot of child processes are likely
@@ -64,11 +74,11 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
* child is eating the vast majority of memory, adding only half
* to the parents will make the child our kill candidate of choice.
*/
- list_for_each(tsk, &p->children) {
- struct task_struct *chld;
- chld = list_entry(tsk, struct task_struct, sibling);
- if (chld->mm != p->mm && chld->mm)
- points += chld->mm->total_vm/2 + 1;
+ list_for_each_entry(child, &p->children, sibling) {
+ task_lock(child);
+ if (child->mm != mm && child->mm)
+ points += child->mm->total_vm/2 + 1;
+ task_unlock(child);
}
/*
@@ -244,17 +254,24 @@ static void __oom_kill_task(task_t *p, const char *message)
force_sig(SIGKILL, p);
}
-static struct mm_struct *oom_kill_task(task_t *p, const char *message)
+static int oom_kill_task(task_t *p, const char *message)
{
- struct mm_struct *mm = get_task_mm(p);
+ struct mm_struct *mm;
task_t * g, * q;
- if (!mm)
- return NULL;
- if (mm == &init_mm) {
- mmput(mm);
- return NULL;
- }
+ mm = p->mm;
+
+ /* WARNING: mm may not be dereferenced since we did not obtain its
+ * value from get_task_mm(p). This is OK since all we need to do is
+ * compare mm to q->mm below.
+ *
+ * Furthermore, even if mm contains a non-NULL value, p->mm may
+ * change to NULL at any time since we do not hold task_lock(p).
+ * However, this is of no concern to us.
+ */
+
+ if (mm == NULL || mm == &init_mm)
+ return 1;
__oom_kill_task(p, message);
/*
@@ -266,13 +283,12 @@ static struct mm_struct *oom_kill_task(task_t *p, const char *message)
__oom_kill_task(q, message);
while_each_thread(g, q);
- return mm;
+ return 0;
}
-static struct mm_struct *oom_kill_process(struct task_struct *p,
- unsigned long points, const char *message)
+static int oom_kill_process(struct task_struct *p, unsigned long points,
+ const char *message)
{
- struct mm_struct *mm;
struct task_struct *c;
struct list_head *tsk;
@@ -283,9 +299,8 @@ static struct mm_struct *oom_kill_process(struct task_struct *p,
c = list_entry(tsk, struct task_struct, sibling);
if (c->mm == p->mm)
continue;
- mm = oom_kill_task(c, message);
- if (mm)
- return mm;
+ if (!oom_kill_task(c, message))
+ return 0;
}
return oom_kill_task(p, message);
}
@@ -300,7 +315,6 @@ static struct mm_struct *oom_kill_process(struct task_struct *p,
*/
void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
{
- struct mm_struct *mm = NULL;
task_t *p;
unsigned long points = 0;
@@ -320,12 +334,12 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
*/
switch (constrained_alloc(zonelist, gfp_mask)) {
case CONSTRAINT_MEMORY_POLICY:
- mm = oom_kill_process(current, points,
+ oom_kill_process(current, points,
"No available memory (MPOL_BIND)");
break;
case CONSTRAINT_CPUSET:
- mm = oom_kill_process(current, points,
+ oom_kill_process(current, points,
"No available memory in cpuset");
break;
@@ -347,8 +361,7 @@ retry:
panic("Out of memory and no killable processes...\n");
}
- mm = oom_kill_process(p, points, "Out of memory");
- if (!mm)
+ if (oom_kill_process(p, points, "Out of memory"))
goto retry;
break;
@@ -357,8 +370,6 @@ retry:
out:
read_unlock(&tasklist_lock);
cpuset_unlock();
- if (mm)
- mmput(mm);
/*
* Give "p" a good chance of killing itself before we
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 893d7677579..75d7f48b79b 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -72,13 +72,12 @@ int dirty_background_ratio = 10;
int vm_dirty_ratio = 40;
/*
- * The interval between `kupdate'-style writebacks, in centiseconds
- * (hundredths of a second)
+ * The interval between `kupdate'-style writebacks, in jiffies
*/
int dirty_writeback_interval = 5 * HZ;
/*
- * The longest number of centiseconds for which data is allowed to remain dirty
+ * The longest number of jiffies for which data is allowed to remain dirty
*/
int dirty_expire_interval = 30 * HZ;
@@ -258,7 +257,7 @@ static void balance_dirty_pages(struct address_space *mapping)
/**
* balance_dirty_pages_ratelimited_nr - balance dirty memory state
* @mapping: address_space which was dirtied
- * @nr_pages: number of pages which the caller has just dirtied
+ * @nr_pages_dirtied: number of pages which the caller has just dirtied
*
* Processes which are dirtying memory should call in here once for each page
* which was newly dirtied. The function will periodically check the system's
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dc523a1f270..ea77c999047 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -51,6 +51,7 @@ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
EXPORT_SYMBOL(node_possible_map);
unsigned long totalram_pages __read_mostly;
unsigned long totalhigh_pages __read_mostly;
+unsigned long totalreserve_pages __read_mostly;
long nr_swap_pages;
int percpu_pagelist_fraction;
@@ -151,7 +152,8 @@ static void bad_page(struct page *page)
1 << PG_reclaim |
1 << PG_slab |
1 << PG_swapcache |
- 1 << PG_writeback );
+ 1 << PG_writeback |
+ 1 << PG_buddy );
set_page_count(page, 0);
reset_page_mapcount(page);
page->mapping = NULL;
@@ -230,18 +232,20 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
* zone->lock is already acquired when we use these.
* So, we don't need atomic page->flags operations here.
*/
-static inline unsigned long page_order(struct page *page) {
+static inline unsigned long page_order(struct page *page)
+{
return page_private(page);
}
-static inline void set_page_order(struct page *page, int order) {
+static inline void set_page_order(struct page *page, int order)
+{
set_page_private(page, order);
- __SetPagePrivate(page);
+ __SetPageBuddy(page);
}
static inline void rmv_page_order(struct page *page)
{
- __ClearPagePrivate(page);
+ __ClearPageBuddy(page);
set_page_private(page, 0);
}
@@ -280,11 +284,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
* This function checks whether a page is free && is the buddy
* we can do coalesce a page and its buddy if
* (a) the buddy is not in a hole &&
- * (b) the buddy is free &&
- * (c) the buddy is on the buddy system &&
- * (d) a page and its buddy have the same order.
- * for recording page's order, we use page_private(page) and PG_private.
+ * (b) the buddy is in the buddy system &&
+ * (c) a page and its buddy have the same order.
*
+ * For recording whether a page is in the buddy system, we use PG_buddy.
+ * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
+ *
+ * For recording page's order, we use page_private(page).
*/
static inline int page_is_buddy(struct page *page, int order)
{
@@ -293,11 +299,11 @@ static inline int page_is_buddy(struct page *page, int order)
return 0;
#endif
- if (PagePrivate(page) &&
- (page_order(page) == order) &&
- page_count(page) == 0)
- return 1;
- return 0;
+ if (PageBuddy(page) && page_order(page) == order) {
+ BUG_ON(page_count(page) != 0);
+ return 1;
+ }
+ return 0;
}
/*
@@ -313,7 +319,7 @@ static inline int page_is_buddy(struct page *page, int order)
* as necessary, plus some accounting needed to play nicely with other
* parts of the VM system.
* At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with PG_Private.Page's
+ * free pages of length of (1 << order) and marked with PG_buddy. Page's
* order is recorded in page_private(page) field.
* So when we are allocating or freeing one, we can derive the state of the
* other. That is, if we allocate a small block, and both were
@@ -376,7 +382,8 @@ static inline int free_pages_check(struct page *page)
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
- 1 << PG_reserved ))))
+ 1 << PG_reserved |
+ 1 << PG_buddy ))))
bad_page(page);
if (PageDirty(page))
__ClearPageDirty(page);
@@ -524,7 +531,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
- 1 << PG_reserved ))))
+ 1 << PG_reserved |
+ 1 << PG_buddy ))))
bad_page(page);
/*
@@ -1954,7 +1962,7 @@ static inline void free_zone_pagesets(int cpu)
}
}
-static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
+static int pageset_cpuup_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
@@ -2472,6 +2480,38 @@ void __init page_alloc_init(void)
}
/*
+ * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
+ * or min_free_kbytes changes.
+ */
+static void calculate_totalreserve_pages(void)
+{
+ struct pglist_data *pgdat;
+ unsigned long reserve_pages = 0;
+ int i, j;
+
+ for_each_online_pgdat(pgdat) {
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ struct zone *zone = pgdat->node_zones + i;
+ unsigned long max = 0;
+
+ /* Find valid and maximum lowmem_reserve in the zone */
+ for (j = i; j < MAX_NR_ZONES; j++) {
+ if (zone->lowmem_reserve[j] > max)
+ max = zone->lowmem_reserve[j];
+ }
+
+ /* we treat pages_high as reserved pages. */
+ max += zone->pages_high;
+
+ if (max > zone->present_pages)
+ max = zone->present_pages;
+ reserve_pages += max;
+ }
+ }
+ totalreserve_pages = reserve_pages;
+}
+
+/*
* setup_per_zone_lowmem_reserve - called whenever
* sysctl_lower_zone_reserve_ratio changes. Ensures that each zone
* has a correct pages reserved value, so an adequate number of
@@ -2502,6 +2542,9 @@ static void setup_per_zone_lowmem_reserve(void)
}
}
}
+
+ /* update totalreserve_pages */
+ calculate_totalreserve_pages();
}
/*
@@ -2556,6 +2599,9 @@ void setup_per_zone_pages_min(void)
zone->pages_high = zone->pages_min + tmp / 2;
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
+
+ /* update totalreserve_pages */
+ calculate_totalreserve_pages();
}
/*
diff --git a/mm/shmem.c b/mm/shmem.c
index 37eaf42ed2c..4c5e68e4e9a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -46,6 +46,8 @@
#include <linux/mempolicy.h>
#include <linux/namei.h>
#include <linux/ctype.h>
+#include <linux/migrate.h>
+
#include <asm/uaccess.h>
#include <asm/div64.h>
#include <asm/pgtable.h>
@@ -2173,6 +2175,7 @@ static struct address_space_operations shmem_aops = {
.prepare_write = shmem_prepare_write,
.commit_write = simple_commit_write,
#endif
+ .migratepage = migrate_page,
};
static struct file_operations shmem_file_operations = {
diff --git a/mm/slab.c b/mm/slab.c
index 4cbf8bb1355..c32af7e7581 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -420,6 +420,7 @@ struct kmem_cache {
unsigned long max_freeable;
unsigned long node_allocs;
unsigned long node_frees;
+ unsigned long node_overflow;
atomic_t allochit;
atomic_t allocmiss;
atomic_t freehit;
@@ -465,6 +466,7 @@ struct kmem_cache {
#define STATS_INC_ERR(x) ((x)->errors++)
#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
+#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
#define STATS_SET_FREEABLE(x, i) \
do { \
if ((x)->max_freeable < i) \
@@ -484,6 +486,7 @@ struct kmem_cache {
#define STATS_INC_ERR(x) do { } while (0)
#define STATS_INC_NODEALLOCS(x) do { } while (0)
#define STATS_INC_NODEFREES(x) do { } while (0)
+#define STATS_INC_ACOVERFLOW(x) do { } while (0)
#define STATS_SET_FREEABLE(x, i) do { } while (0)
#define STATS_INC_ALLOCHIT(x) do { } while (0)
#define STATS_INC_ALLOCMISS(x) do { } while (0)
@@ -976,7 +979,8 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
* That way we could avoid the overhead of putting the objects
* into the free lists and getting them back later.
*/
- transfer_objects(rl3->shared, ac, ac->limit);
+ if (rl3->shared)
+ transfer_objects(rl3->shared, ac, ac->limit);
free_block(cachep, ac->entry, ac->avail, node);
ac->avail = 0;
@@ -1033,7 +1037,7 @@ static inline void free_alien_cache(struct array_cache **ac_ptr)
#endif
-static int __devinit cpuup_callback(struct notifier_block *nfb,
+static int cpuup_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
@@ -1297,8 +1301,7 @@ void __init kmem_cache_init(void)
if (cache_cache.num)
break;
}
- if (!cache_cache.num)
- BUG();
+ BUG_ON(!cache_cache.num);
cache_cache.gfporder = order;
cache_cache.colour = left_over / cache_cache.colour_off;
cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
@@ -1454,7 +1457,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
int i;
flags |= cachep->gfpflags;
+#ifndef CONFIG_MMU
+ /* nommu uses slab's for process anonymous memory allocations, so
+ * requires __GFP_COMP to properly refcount higher order allocations"
+ */
+ page = alloc_pages_node(nodeid, (flags | __GFP_COMP), cachep->gfporder);
+#else
page = alloc_pages_node(nodeid, flags, cachep->gfporder);
+#endif
if (!page)
return NULL;
addr = page_address(page);
@@ -1974,8 +1984,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
* Always checks flags, a caller might be expecting debug support which
* isn't available.
*/
- if (flags & ~CREATE_MASK)
- BUG();
+ BUG_ON(flags & ~CREATE_MASK);
/*
* Check that size is in terms of words. This is needed to avoid
@@ -2206,8 +2215,7 @@ static int __node_shrink(struct kmem_cache *cachep, int node)
slabp = list_entry(l3->slabs_free.prev, struct slab, list);
#if DEBUG
- if (slabp->inuse)
- BUG();
+ BUG_ON(slabp->inuse);
#endif
list_del(&slabp->list);
@@ -2248,8 +2256,7 @@ static int __cache_shrink(struct kmem_cache *cachep)
*/
int kmem_cache_shrink(struct kmem_cache *cachep)
{
- if (!cachep || in_interrupt())
- BUG();
+ BUG_ON(!cachep || in_interrupt());
return __cache_shrink(cachep);
}
@@ -2277,8 +2284,7 @@ int kmem_cache_destroy(struct kmem_cache *cachep)
int i;
struct kmem_list3 *l3;
- if (!cachep || in_interrupt())
- BUG();
+ BUG_ON(!cachep || in_interrupt());
/* Don't let CPUs to come and go */
lock_cpu_hotplug();
@@ -2323,13 +2329,15 @@ EXPORT_SYMBOL(kmem_cache_destroy);
/* Get the memory for a slab management obj. */
static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
- int colour_off, gfp_t local_flags)
+ int colour_off, gfp_t local_flags,
+ int nodeid)
{
struct slab *slabp;
if (OFF_SLAB(cachep)) {
/* Slab management obj is off-slab. */
- slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
+ slabp = kmem_cache_alloc_node(cachep->slabp_cache,
+ local_flags, nodeid);
if (!slabp)
return NULL;
} else {
@@ -2339,6 +2347,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
slabp->inuse = 0;
slabp->colouroff = colour_off;
slabp->s_mem = objp + colour_off;
+ slabp->nodeid = nodeid;
return slabp;
}
@@ -2477,8 +2486,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
* Be lazy and only check for valid flags here, keeping it out of the
* critical path in kmem_cache_alloc().
*/
- if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW))
- BUG();
+ BUG_ON(flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW));
if (flags & SLAB_NO_GROW)
return 0;
@@ -2525,7 +2533,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
goto failed;
/* Get slab management. */
- slabp = alloc_slabmgmt(cachep, objp, offset, local_flags);
+ slabp = alloc_slabmgmt(cachep, objp, offset, local_flags, nodeid);
if (!slabp)
goto opps1;
@@ -3086,9 +3094,11 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
if (l3->alien && l3->alien[nodeid]) {
alien = l3->alien[nodeid];
spin_lock(&alien->lock);
- if (unlikely(alien->avail == alien->limit))
+ if (unlikely(alien->avail == alien->limit)) {
+ STATS_INC_ACOVERFLOW(cachep);
__drain_alien_cache(cachep,
alien, nodeid);
+ }
alien->entry[alien->avail++] = objp;
spin_unlock(&alien->lock);
} else {
@@ -3766,7 +3776,7 @@ static void print_slabinfo_header(struct seq_file *m)
seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
#if STATS
seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
- "<error> <maxfreeable> <nodeallocs> <remotefrees>");
+ "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
#endif
seq_putc(m, '\n');
@@ -3880,11 +3890,12 @@ static int s_show(struct seq_file *m, void *p)
unsigned long max_freeable = cachep->max_freeable;
unsigned long node_allocs = cachep->node_allocs;
unsigned long node_frees = cachep->node_frees;
+ unsigned long overflows = cachep->node_overflow;
seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
- %4lu %4lu %4lu %4lu", allocs, high, grown,
+ %4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
reaped, errors, max_freeable, node_allocs,
- node_frees);
+ node_frees, overflows);
}
/* cpu stats */
{
diff --git a/mm/slob.c b/mm/slob.c
index 9bcc7e2cabf..a68255ba455 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -354,9 +354,7 @@ void *__alloc_percpu(size_t size)
if (!pdata)
return NULL;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_possible(i))
- continue;
+ for_each_possible_cpu(i) {
pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
if (!pdata->ptrs[i])
goto unwind_oom;
@@ -383,11 +381,9 @@ free_percpu(const void *objp)
int i;
struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp);
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_possible(i))
- continue;
+ for_each_possible_cpu(i)
kfree(p->ptrs[i]);
- }
+
kfree(p);
}
EXPORT_SYMBOL(free_percpu);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index d7af296833f..e0e1583f32c 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -148,8 +148,7 @@ int add_to_swap(struct page * page, gfp_t gfp_mask)
swp_entry_t entry;
int err;
- if (!PageLocked(page))
- BUG();
+ BUG_ON(!PageLocked(page));
for (;;) {
entry = get_swap_page();
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 729eb3eec75..c0504f1e34e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -321,8 +321,7 @@ void __vunmap(void *addr, int deallocate_pages)
int i;
for (i = 0; i < area->nr_pages; i++) {
- if (unlikely(!area->pages[i]))
- BUG();
+ BUG_ON(!area->pages[i]);
__free_page(area->pages[i]);
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index acdf001d694..4649a63a8cb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1328,7 +1328,7 @@ repeat:
not required for correctness. So if the last cpu in a node goes
away, we get changed to run anywhere: as the first one comes back,
restore their cpu bindings. */
-static int __devinit cpu_callback(struct notifier_block *nfb,
+static int cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
pg_data_t *pgdat;