aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/bootmem.c9
-rw-r--r--mm/migrate.c8
-rw-r--r--mm/mmap.c34
-rw-r--r--mm/nommu.c18
-rw-r--r--mm/page-writeback.c5
-rw-r--r--mm/page_alloc.c70
-rw-r--r--mm/slab.c30
7 files changed, 133 insertions, 41 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index d3e3bd2ffce..d213feded10 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -401,7 +401,7 @@ unsigned long __init free_all_bootmem (void)
return(free_all_bootmem_core(NODE_DATA(0)));
}
-void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal)
{
bootmem_data_t *bdata;
void *ptr;
@@ -409,7 +409,14 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned
list_for_each_entry(bdata, &bdata_list, list)
if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0)))
return(ptr);
+ return NULL;
+}
+void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
+{
+ void *mem = __alloc_bootmem_nopanic(size,align,goal);
+ if (mem)
+ return mem;
/*
* Whoops, we cannot satisfy the allocation request.
*/
diff --git a/mm/migrate.c b/mm/migrate.c
index 09f6e4aa87f..d444229f259 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -16,8 +16,7 @@
#include <linux/module.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
-#include <linux/buffer_head.h> /* for try_to_release_page(),
- buffer_heads_over_limit */
+#include <linux/buffer_head.h>
#include <linux/mm_inline.h>
#include <linux/pagevec.h>
#include <linux/rmap.h>
@@ -28,8 +27,6 @@
#include "internal.h"
-#include "internal.h"
-
/* The maximum number of pages to take off the LRU for migration */
#define MIGRATE_CHUNK_SIZE 256
@@ -176,7 +173,6 @@ unlock_retry:
retry:
return -EAGAIN;
}
-EXPORT_SYMBOL(swap_page);
/*
* Remove references for a page and establish the new page with the correct
@@ -234,7 +230,7 @@ int migrate_page_remove_references(struct page *newpage,
if (!page_mapping(page) || page_count(page) != nr_refs ||
*radix_pointer != page) {
write_unlock_irq(&mapping->tree_lock);
- return 1;
+ return -EAGAIN;
}
/*
diff --git a/mm/mmap.c b/mm/mmap.c
index e780d19aa21..e6ee12344b1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -121,14 +121,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
* only call if we're about to fail.
*/
n = nr_free_pages();
+
+ /*
+ * Leave reserved pages. The pages are not for anonymous pages.
+ */
+ if (n <= totalreserve_pages)
+ goto error;
+ else
+ n -= totalreserve_pages;
+
+ /*
+ * Leave the last 3% for root
+ */
if (!cap_sys_admin)
n -= n / 32;
free += n;
if (free > pages)
return 0;
- vm_unacct_memory(pages);
- return -ENOMEM;
+
+ goto error;
}
allowed = (totalram_pages - hugetlb_total_pages())
@@ -150,7 +162,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
*/
if (atomic_read(&vm_committed_space) < (long)allowed)
return 0;
-
+error:
vm_unacct_memory(pages);
return -ENOMEM;
@@ -220,6 +232,17 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
if (brk < mm->end_code)
goto out;
+
+ /*
+ * Check against rlimit here. If this check is done later after the test
+ * of oldbrk with newbrk then it can escape the test and let the data
+ * segment grow beyond its set limit the in case where the limit is
+ * not page aligned -Ram Gupta
+ */
+ rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+ if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+ goto out;
+
newbrk = PAGE_ALIGN(brk);
oldbrk = PAGE_ALIGN(mm->brk);
if (oldbrk == newbrk)
@@ -232,11 +255,6 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
goto out;
}
- /* Check against rlimit.. */
- rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
- if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
- goto out;
-
/* Check against existing mmap mappings. */
if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
goto out;
diff --git a/mm/nommu.c b/mm/nommu.c
index db45efac17c..029fadac0fb 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1147,14 +1147,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
* only call if we're about to fail.
*/
n = nr_free_pages();
+
+ /*
+ * Leave reserved pages. The pages are not for anonymous pages.
+ */
+ if (n <= totalreserve_pages)
+ goto error;
+ else
+ n -= totalreserve_pages;
+
+ /*
+ * Leave the last 3% for root
+ */
if (!cap_sys_admin)
n -= n / 32;
free += n;
if (free > pages)
return 0;
- vm_unacct_memory(pages);
- return -ENOMEM;
+
+ goto error;
}
allowed = totalram_pages * sysctl_overcommit_ratio / 100;
@@ -1175,7 +1187,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
*/
if (atomic_read(&vm_committed_space) < (long)allowed)
return 0;
-
+error:
vm_unacct_memory(pages);
return -ENOMEM;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 6dcce3a4bbd..75d7f48b79b 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -72,13 +72,12 @@ int dirty_background_ratio = 10;
int vm_dirty_ratio = 40;
/*
- * The interval between `kupdate'-style writebacks, in centiseconds
- * (hundredths of a second)
+ * The interval between `kupdate'-style writebacks, in jiffies
*/
int dirty_writeback_interval = 5 * HZ;
/*
- * The longest number of centiseconds for which data is allowed to remain dirty
+ * The longest number of jiffies for which data is allowed to remain dirty
*/
int dirty_expire_interval = 30 * HZ;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dc523a1f270..97d6827c7d6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -51,6 +51,7 @@ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
EXPORT_SYMBOL(node_possible_map);
unsigned long totalram_pages __read_mostly;
unsigned long totalhigh_pages __read_mostly;
+unsigned long totalreserve_pages __read_mostly;
long nr_swap_pages;
int percpu_pagelist_fraction;
@@ -151,7 +152,8 @@ static void bad_page(struct page *page)
1 << PG_reclaim |
1 << PG_slab |
1 << PG_swapcache |
- 1 << PG_writeback );
+ 1 << PG_writeback |
+ 1 << PG_buddy );
set_page_count(page, 0);
reset_page_mapcount(page);
page->mapping = NULL;
@@ -236,12 +238,12 @@ static inline unsigned long page_order(struct page *page) {
static inline void set_page_order(struct page *page, int order) {
set_page_private(page, order);
- __SetPagePrivate(page);
+ __SetPageBuddy(page);
}
static inline void rmv_page_order(struct page *page)
{
- __ClearPagePrivate(page);
+ __ClearPageBuddy(page);
set_page_private(page, 0);
}
@@ -280,11 +282,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
* This function checks whether a page is free && is the buddy
* we can do coalesce a page and its buddy if
* (a) the buddy is not in a hole &&
- * (b) the buddy is free &&
- * (c) the buddy is on the buddy system &&
- * (d) a page and its buddy have the same order.
- * for recording page's order, we use page_private(page) and PG_private.
+ * (b) the buddy is in the buddy system &&
+ * (c) a page and its buddy have the same order.
+ *
+ * For recording whether a page is in the buddy system, we use PG_buddy.
+ * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
*
+ * For recording page's order, we use page_private(page).
*/
static inline int page_is_buddy(struct page *page, int order)
{
@@ -293,10 +297,10 @@ static inline int page_is_buddy(struct page *page, int order)
return 0;
#endif
- if (PagePrivate(page) &&
- (page_order(page) == order) &&
- page_count(page) == 0)
+ if (PageBuddy(page) && page_order(page) == order) {
+ BUG_ON(page_count(page) != 0);
return 1;
+ }
return 0;
}
@@ -313,7 +317,7 @@ static inline int page_is_buddy(struct page *page, int order)
* as necessary, plus some accounting needed to play nicely with other
* parts of the VM system.
* At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with PG_Private.Page's
+ * free pages of length of (1 << order) and marked with PG_buddy. Page's
* order is recorded in page_private(page) field.
* So when we are allocating or freeing one, we can derive the state of the
* other. That is, if we allocate a small block, and both were
@@ -376,7 +380,8 @@ static inline int free_pages_check(struct page *page)
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
- 1 << PG_reserved ))))
+ 1 << PG_reserved |
+ 1 << PG_buddy ))))
bad_page(page);
if (PageDirty(page))
__ClearPageDirty(page);
@@ -524,7 +529,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
- 1 << PG_reserved ))))
+ 1 << PG_reserved |
+ 1 << PG_buddy ))))
bad_page(page);
/*
@@ -2472,6 +2478,38 @@ void __init page_alloc_init(void)
}
/*
+ * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
+ * or min_free_kbytes changes.
+ */
+static void calculate_totalreserve_pages(void)
+{
+ struct pglist_data *pgdat;
+ unsigned long reserve_pages = 0;
+ int i, j;
+
+ for_each_online_pgdat(pgdat) {
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ struct zone *zone = pgdat->node_zones + i;
+ unsigned long max = 0;
+
+ /* Find valid and maximum lowmem_reserve in the zone */
+ for (j = i; j < MAX_NR_ZONES; j++) {
+ if (zone->lowmem_reserve[j] > max)
+ max = zone->lowmem_reserve[j];
+ }
+
+ /* we treat pages_high as reserved pages. */
+ max += zone->pages_high;
+
+ if (max > zone->present_pages)
+ max = zone->present_pages;
+ reserve_pages += max;
+ }
+ }
+ totalreserve_pages = reserve_pages;
+}
+
+/*
* setup_per_zone_lowmem_reserve - called whenever
* sysctl_lower_zone_reserve_ratio changes. Ensures that each zone
* has a correct pages reserved value, so an adequate number of
@@ -2502,6 +2540,9 @@ static void setup_per_zone_lowmem_reserve(void)
}
}
}
+
+ /* update totalreserve_pages */
+ calculate_totalreserve_pages();
}
/*
@@ -2556,6 +2597,9 @@ void setup_per_zone_pages_min(void)
zone->pages_high = zone->pages_min + tmp / 2;
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
+
+ /* update totalreserve_pages */
+ calculate_totalreserve_pages();
}
/*
diff --git a/mm/slab.c b/mm/slab.c
index f055c142021..e6ef9bd5233 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -420,6 +420,7 @@ struct kmem_cache {
unsigned long max_freeable;
unsigned long node_allocs;
unsigned long node_frees;
+ unsigned long node_overflow;
atomic_t allochit;
atomic_t allocmiss;
atomic_t freehit;
@@ -465,6 +466,7 @@ struct kmem_cache {
#define STATS_INC_ERR(x) ((x)->errors++)
#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
+#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
#define STATS_SET_FREEABLE(x, i) \
do { \
if ((x)->max_freeable < i) \
@@ -484,6 +486,7 @@ struct kmem_cache {
#define STATS_INC_ERR(x) do { } while (0)
#define STATS_INC_NODEALLOCS(x) do { } while (0)
#define STATS_INC_NODEFREES(x) do { } while (0)
+#define STATS_INC_ACOVERFLOW(x) do { } while (0)
#define STATS_SET_FREEABLE(x, i) do { } while (0)
#define STATS_INC_ALLOCHIT(x) do { } while (0)
#define STATS_INC_ALLOCMISS(x) do { } while (0)
@@ -1453,7 +1456,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
int i;
flags |= cachep->gfpflags;
+#ifndef CONFIG_MMU
+ /* nommu uses slab's for process anonymous memory allocations, so
+ * requires __GFP_COMP to properly refcount higher order allocations"
+ */
+ page = alloc_pages_node(nodeid, (flags | __GFP_COMP), cachep->gfporder);
+#else
page = alloc_pages_node(nodeid, flags, cachep->gfporder);
+#endif
if (!page)
return NULL;
addr = page_address(page);
@@ -2318,13 +2328,15 @@ EXPORT_SYMBOL(kmem_cache_destroy);
/* Get the memory for a slab management obj. */
static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
- int colour_off, gfp_t local_flags)
+ int colour_off, gfp_t local_flags,
+ int nodeid)
{
struct slab *slabp;
if (OFF_SLAB(cachep)) {
/* Slab management obj is off-slab. */
- slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
+ slabp = kmem_cache_alloc_node(cachep->slabp_cache,
+ local_flags, nodeid);
if (!slabp)
return NULL;
} else {
@@ -2334,6 +2346,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
slabp->inuse = 0;
slabp->colouroff = colour_off;
slabp->s_mem = objp + colour_off;
+ slabp->nodeid = nodeid;
return slabp;
}
@@ -2519,7 +2532,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
goto failed;
/* Get slab management. */
- slabp = alloc_slabmgmt(cachep, objp, offset, local_flags);
+ slabp = alloc_slabmgmt(cachep, objp, offset, local_flags, nodeid);
if (!slabp)
goto opps1;
@@ -3080,9 +3093,11 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
if (l3->alien && l3->alien[nodeid]) {
alien = l3->alien[nodeid];
spin_lock(&alien->lock);
- if (unlikely(alien->avail == alien->limit))
+ if (unlikely(alien->avail == alien->limit)) {
+ STATS_INC_ACOVERFLOW(cachep);
__drain_alien_cache(cachep,
alien, nodeid);
+ }
alien->entry[alien->avail++] = objp;
spin_unlock(&alien->lock);
} else {
@@ -3760,7 +3775,7 @@ static void print_slabinfo_header(struct seq_file *m)
seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
#if STATS
seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
- "<error> <maxfreeable> <nodeallocs> <remotefrees>");
+ "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
#endif
seq_putc(m, '\n');
@@ -3874,11 +3889,12 @@ static int s_show(struct seq_file *m, void *p)
unsigned long max_freeable = cachep->max_freeable;
unsigned long node_allocs = cachep->node_allocs;
unsigned long node_frees = cachep->node_frees;
+ unsigned long overflows = cachep->node_overflow;
seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
- %4lu %4lu %4lu %4lu", allocs, high, grown,
+ %4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
reaped, errors, max_freeable, node_allocs,
- node_frees);
+ node_frees, overflows);
}
/* cpu stats */
{