aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c54
-rw-r--r--mm/hugetlb.c3
-rw-r--r--mm/migrate.c3
-rw-r--r--mm/mmap.c5
-rw-r--r--mm/page_alloc.c14
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/slab.c13
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/vmalloc.c47
-rw-r--r--mm/vmscan.c63
-rw-r--r--mm/vmstat.c2
11 files changed, 126 insertions, 82 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 8558732e85c..7b84dc81434 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -467,25 +467,15 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
}
#ifdef CONFIG_NUMA
-struct page *page_cache_alloc(struct address_space *x)
+struct page *__page_cache_alloc(gfp_t gfp)
{
if (cpuset_do_page_mem_spread()) {
int n = cpuset_mem_spread_node();
- return alloc_pages_node(n, mapping_gfp_mask(x), 0);
+ return alloc_pages_node(n, gfp, 0);
}
- return alloc_pages(mapping_gfp_mask(x), 0);
+ return alloc_pages(gfp, 0);
}
-EXPORT_SYMBOL(page_cache_alloc);
-
-struct page *page_cache_alloc_cold(struct address_space *x)
-{
- if (cpuset_do_page_mem_spread()) {
- int n = cpuset_mem_spread_node();
- return alloc_pages_node(n, mapping_gfp_mask(x)|__GFP_COLD, 0);
- }
- return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0);
-}
-EXPORT_SYMBOL(page_cache_alloc_cold);
+EXPORT_SYMBOL(__page_cache_alloc);
#endif
static int __sleep_on_page_lock(void *word)
@@ -826,7 +816,6 @@ struct page *
grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
{
struct page *page = find_get_page(mapping, index);
- gfp_t gfp_mask;
if (page) {
if (!TestSetPageLocked(page))
@@ -834,9 +823,8 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
page_cache_release(page);
return NULL;
}
- gfp_mask = mapping_gfp_mask(mapping) & ~__GFP_FS;
- page = alloc_pages(gfp_mask, 0);
- if (page && add_to_page_cache_lru(page, mapping, index, gfp_mask)) {
+ page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS);
+ if (page && add_to_page_cache_lru(page, mapping, index, GFP_KERNEL)) {
page_cache_release(page);
page = NULL;
}
@@ -1884,11 +1872,10 @@ repeat:
* if suid or (sgid and xgrp)
* remove privs
*/
-int remove_suid(struct dentry *dentry)
+int should_remove_suid(struct dentry *dentry)
{
mode_t mode = dentry->d_inode->i_mode;
int kill = 0;
- int result = 0;
/* suid always must be killed */
if (unlikely(mode & S_ISUID))
@@ -1901,13 +1888,28 @@ int remove_suid(struct dentry *dentry)
if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
kill |= ATTR_KILL_SGID;
- if (unlikely(kill && !capable(CAP_FSETID))) {
- struct iattr newattrs;
+ if (unlikely(kill && !capable(CAP_FSETID)))
+ return kill;
- newattrs.ia_valid = ATTR_FORCE | kill;
- result = notify_change(dentry, &newattrs);
- }
- return result;
+ return 0;
+}
+
+int __remove_suid(struct dentry *dentry, int kill)
+{
+ struct iattr newattrs;
+
+ newattrs.ia_valid = ATTR_FORCE | kill;
+ return notify_change(dentry, &newattrs);
+}
+
+int remove_suid(struct dentry *dentry)
+{
+ int kill = should_remove_suid(dentry);
+
+ if (unlikely(kill))
+ return __remove_suid(dentry, kill);
+
+ return 0;
}
EXPORT_SYMBOL(remove_suid);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2dbec90dc3b..a088f593a80 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -478,6 +478,9 @@ int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
retry:
page = find_lock_page(mapping, idx);
if (!page) {
+ size = i_size_read(mapping->host) >> HPAGE_SHIFT;
+ if (idx >= size)
+ goto out;
if (hugetlb_get_quota(mapping))
goto out;
page = alloc_huge_page(vma, address);
diff --git a/mm/migrate.c b/mm/migrate.c
index ba2453f9483..b4979d423d2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -952,7 +952,8 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
goto out;
pm[i].node = node;
- }
+ } else
+ pm[i].node = 0; /* anything to not match MAX_NUMNODES */
}
/* End marker */
pm[nr_pages].node = MAX_NUMNODES;
diff --git a/mm/mmap.c b/mm/mmap.c
index 497e502dfd6..7b40abd7cba 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1379,7 +1379,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
* Check if the given range is hugepage aligned, and
* can be made suitable for hugepages.
*/
- ret = prepare_hugepage_range(addr, len);
+ ret = prepare_hugepage_range(addr, len, pgoff);
} else {
/*
* Ensure that a normal request is not falling in a
@@ -1880,6 +1880,9 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
if ((addr + len) > TASK_SIZE || (addr + len) < addr)
return -EINVAL;
+ if (is_hugepage_only_range(mm, addr, len))
+ return -EINVAL;
+
flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
error = arch_mmap_check(addr, len, flags);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ebd425c2e2a..aa6fcc7ca66 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -853,7 +853,7 @@ again:
pcp = &zone_pcp(zone, cpu)->pcp[cold];
local_irq_save(flags);
if (!pcp->count) {
- pcp->count += rmqueue_bulk(zone, 0,
+ pcp->count = rmqueue_bulk(zone, 0,
pcp->batch, &pcp->list);
if (unlikely(!pcp->count))
goto failed;
@@ -1689,6 +1689,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
if (!early_pfn_valid(pfn))
continue;
+ if (!early_pfn_in_nid(pfn, nid))
+ continue;
page = pfn_to_page(pfn);
set_page_links(page, zone, nid, pfn);
init_page_count(page);
@@ -2259,7 +2261,7 @@ unsigned long __init __absent_pages_in_range(int nid,
/* Account for ranges past physical memory on this node */
if (range_end_pfn > prev_end_pfn)
- hole_pages = range_end_pfn -
+ hole_pages += range_end_pfn -
max(range_start_pfn, prev_end_pfn);
return hole_pages;
@@ -2405,7 +2407,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
zone->zone_pgdat = pgdat;
zone->free_pages = 0;
- zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
+ zone->prev_priority = DEF_PRIORITY;
zone_pcp_init(zone);
INIT_LIST_HEAD(&zone->active_list);
@@ -2610,6 +2612,9 @@ unsigned long __init find_min_pfn_for_node(unsigned long nid)
{
int i;
+ /* Regions in the early_node_map can be in any order */
+ sort_node_map();
+
/* Assuming a sorted map, the first range found has the starting pfn */
for_each_active_range_index_in_nid(i, nid)
return early_node_map[i].start_pfn;
@@ -2678,9 +2683,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
}
- /* Regions in the early_node_map can be in any order */
- sort_node_map();
-
/* Print out the zone ranges */
printk("Zone PFN ranges:\n");
for (i = 0; i < MAX_NR_ZONES; i++)
diff --git a/mm/readahead.c b/mm/readahead.c
index 1ba736ac036..23cb61a01c6 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -173,6 +173,8 @@ static int read_pages(struct address_space *mapping, struct file *filp,
if (mapping->a_ops->readpages) {
ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
+ /* Clean up the remaining pages */
+ put_pages_list(pages);
goto out;
}
diff --git a/mm/slab.c b/mm/slab.c
index 266449d604b..3c4a7e34edd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -883,7 +883,7 @@ static void init_reap_node(int cpu)
if (node == MAX_NUMNODES)
node = first_node(node_online_map);
- __get_cpu_var(reap_node) = node;
+ per_cpu(reap_node, cpu) = node;
}
static void next_reap_node(void)
@@ -3152,12 +3152,15 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
struct zone **z;
void *obj = NULL;
- for (z = zonelist->zones; *z && !obj; z++)
+ for (z = zonelist->zones; *z && !obj; z++) {
+ int nid = zone_to_nid(*z);
+
if (zone_idx(*z) <= ZONE_NORMAL &&
- cpuset_zone_allowed(*z, flags))
+ cpuset_zone_allowed(*z, flags) &&
+ cache->nodelists[nid])
obj = __cache_alloc_node(cache,
- flags | __GFP_THISNODE,
- zone_to_nid(*z));
+ flags | __GFP_THISNODE, nid);
+ }
return obj;
}
diff --git a/mm/sparse.c b/mm/sparse.c
index 86c52ab8087..b3c82ba3001 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -211,7 +211,7 @@ static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
struct page *page, *ret;
unsigned long memmap_size = sizeof(struct page) * nr_pages;
- page = alloc_pages(GFP_KERNEL, get_order(memmap_size));
+ page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
if (page)
goto got_map_page;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 1133dd3aafc..86897ee792d 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -160,13 +160,15 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
return err;
}
-struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags,
- unsigned long start, unsigned long end, int node)
+static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags,
+ unsigned long start, unsigned long end,
+ int node, gfp_t gfp_mask)
{
struct vm_struct **p, *tmp, *area;
unsigned long align = 1;
unsigned long addr;
+ BUG_ON(in_interrupt());
if (flags & VM_IOREMAP) {
int bit = fls(size);
@@ -179,16 +181,13 @@ struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags,
}
addr = ALIGN(start, align);
size = PAGE_ALIGN(size);
+ if (unlikely(!size))
+ return NULL;
- area = kmalloc_node(sizeof(*area), GFP_KERNEL, node);
+ area = kmalloc_node(sizeof(*area), gfp_mask & GFP_LEVEL_MASK, node);
if (unlikely(!area))
return NULL;
- if (unlikely(!size)) {
- kfree (area);
- return NULL;
- }
-
/*
* We always allocate a guard page.
*/
@@ -236,7 +235,7 @@ out:
struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
unsigned long start, unsigned long end)
{
- return __get_vm_area_node(size, flags, start, end, -1);
+ return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL);
}
/**
@@ -253,9 +252,11 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END);
}
-struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, int node)
+struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
+ int node, gfp_t gfp_mask)
{
- return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node);
+ return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node,
+ gfp_mask);
}
/* Caller must hold vmlist_lock */
@@ -487,7 +488,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
if (!size || (size >> PAGE_SHIFT) > num_physpages)
return NULL;
- area = get_vm_area_node(size, VM_ALLOC, node);
+ area = get_vm_area_node(size, VM_ALLOC, node, gfp_mask);
if (!area)
return NULL;
@@ -528,11 +529,12 @@ void *vmalloc_user(unsigned long size)
void *ret;
ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
- write_lock(&vmlist_lock);
- area = __find_vm_area(ret);
- area->flags |= VM_USERMAP;
- write_unlock(&vmlist_lock);
-
+ if (ret) {
+ write_lock(&vmlist_lock);
+ area = __find_vm_area(ret);
+ area->flags |= VM_USERMAP;
+ write_unlock(&vmlist_lock);
+ }
return ret;
}
EXPORT_SYMBOL(vmalloc_user);
@@ -601,11 +603,12 @@ void *vmalloc_32_user(unsigned long size)
void *ret;
ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
- write_lock(&vmlist_lock);
- area = __find_vm_area(ret);
- area->flags |= VM_USERMAP;
- write_unlock(&vmlist_lock);
-
+ if (ret) {
+ write_lock(&vmlist_lock);
+ area = __find_vm_area(ret);
+ area->flags |= VM_USERMAP;
+ write_unlock(&vmlist_lock);
+ }
return ret;
}
EXPORT_SYMBOL(vmalloc_32_user);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f05527bf792..518540a4a2a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -723,6 +723,20 @@ done:
return nr_reclaimed;
}
+/*
+ * We are about to scan this zone at a certain priority level. If that priority
+ * level is smaller (ie: more urgent) than the previous priority, then note
+ * that priority level within the zone. This is done so that when the next
+ * process comes in to scan this zone, it will immediately start out at this
+ * priority level rather than having to build up its own scanning priority.
+ * Here, this priority affects only the reclaim-mapped threshold.
+ */
+static inline void note_zone_scanning_priority(struct zone *zone, int priority)
+{
+ if (priority < zone->prev_priority)
+ zone->prev_priority = priority;
+}
+
static inline int zone_is_near_oom(struct zone *zone)
{
return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3;
@@ -746,7 +760,7 @@ static inline int zone_is_near_oom(struct zone *zone)
* But we had to alter page->flags anyway.
*/
static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
- struct scan_control *sc)
+ struct scan_control *sc, int priority)
{
unsigned long pgmoved;
int pgdeactivate = 0;
@@ -770,7 +784,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
* `distress' is a measure of how much trouble we're having
* reclaiming pages. 0 -> no problems. 100 -> great trouble.
*/
- distress = 100 >> zone->prev_priority;
+ distress = 100 >> min(zone->prev_priority, priority);
/*
* The point of this algorithm is to decide when to start
@@ -922,7 +936,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
nr_to_scan = min(nr_active,
(unsigned long)sc->swap_cluster_max);
nr_active -= nr_to_scan;
- shrink_active_list(nr_to_scan, zone, sc);
+ shrink_active_list(nr_to_scan, zone, sc, priority);
}
if (nr_inactive) {
@@ -972,9 +986,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
continue;
- zone->temp_priority = priority;
- if (zone->prev_priority > priority)
- zone->prev_priority = priority;
+ note_zone_scanning_priority(zone, priority);
if (zone->all_unreclaimable && priority != DEF_PRIORITY)
continue; /* Let kswapd poll it */
@@ -1024,7 +1036,6 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
continue;
- zone->temp_priority = DEF_PRIORITY;
lru_pages += zone->nr_active + zone->nr_inactive;
}
@@ -1065,13 +1076,22 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
if (!sc.all_unreclaimable)
ret = 1;
out:
+ /*
+ * Now that we've scanned all the zones at this priority level, note
+ * that level within the zone so that the next thread which performs
+ * scanning of this zone will immediately start out at this priority
+ * level. This affects only the decision whether or not to bring
+ * mapped pages onto the inactive list.
+ */
+ if (priority < 0)
+ priority = 0;
for (i = 0; zones[i] != 0; i++) {
struct zone *zone = zones[i];
if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
continue;
- zone->prev_priority = zone->temp_priority;
+ zone->prev_priority = priority;
}
return ret;
}
@@ -1111,6 +1131,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
.swap_cluster_max = SWAP_CLUSTER_MAX,
.swappiness = vm_swappiness,
};
+ /*
+ * temp_priority is used to remember the scanning priority at which
+ * this zone was successfully refilled to free_pages == pages_high.
+ */
+ int temp_priority[MAX_NR_ZONES];
loop_again:
total_scanned = 0;
@@ -1118,11 +1143,8 @@ loop_again:
sc.may_writepage = !laptop_mode;
count_vm_event(PAGEOUTRUN);
- for (i = 0; i < pgdat->nr_zones; i++) {
- struct zone *zone = pgdat->node_zones + i;
-
- zone->temp_priority = DEF_PRIORITY;
- }
+ for (i = 0; i < pgdat->nr_zones; i++)
+ temp_priority[i] = DEF_PRIORITY;
for (priority = DEF_PRIORITY; priority >= 0; priority--) {
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
@@ -1183,10 +1205,9 @@ scan:
if (!zone_watermark_ok(zone, order, zone->pages_high,
end_zone, 0))
all_zones_ok = 0;
- zone->temp_priority = priority;
- if (zone->prev_priority > priority)
- zone->prev_priority = priority;
+ temp_priority[i] = priority;
sc.nr_scanned = 0;
+ note_zone_scanning_priority(zone, priority);
nr_reclaimed += shrink_zone(priority, zone, &sc);
reclaim_state->reclaimed_slab = 0;
nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
@@ -1226,10 +1247,15 @@ scan:
break;
}
out:
+ /*
+ * Note within each zone the priority level at which this zone was
+ * brought into a happy state. So that the next thread which scans this
+ * zone will start out at that priority level.
+ */
for (i = 0; i < pgdat->nr_zones; i++) {
struct zone *zone = pgdat->node_zones + i;
- zone->prev_priority = zone->temp_priority;
+ zone->prev_priority = temp_priority[i];
}
if (!all_zones_ok) {
cond_resched();
@@ -1358,7 +1384,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int pass,
if (zone->nr_scan_active >= nr_pages || pass > 3) {
zone->nr_scan_active = 0;
nr_to_scan = min(nr_pages, zone->nr_active);
- shrink_active_list(nr_to_scan, zone, sc);
+ shrink_active_list(nr_to_scan, zone, sc, prio);
}
}
@@ -1614,6 +1640,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
*/
priority = ZONE_RECLAIM_PRIORITY;
do {
+ note_zone_scanning_priority(zone, priority);
nr_reclaimed += shrink_zone(priority, zone, &sc);
priority--;
} while (priority >= 0 && nr_reclaimed < nr_pages);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 45b124e012f..8614e8f6743 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -587,11 +587,9 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
seq_printf(m,
"\n all_unreclaimable: %u"
"\n prev_priority: %i"
- "\n temp_priority: %i"
"\n start_pfn: %lu",
zone->all_unreclaimable,
zone->prev_priority,
- zone->temp_priority,
zone->zone_start_pfn);
spin_unlock_irqrestore(&zone->lock, flags);
seq_putc(m, '\n');