From f481891fdc49d3d1b8a9674a1825d183069a805f Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 19 Nov 2008 15:36:30 -0800 Subject: cpuset: update top cpuset's mems after adding a node After adding a node into the machine, top cpuset's mems isn't updated. By reviewing the code, we found that the update function cpuset_track_online_nodes() was invoked after node_states[N_ONLINE] changes. It is wrong because N_ONLINE just means node has pgdat, and if node has/added memory, we use N_HIGH_MEMORY. So, We should invoke the update function after node_states[N_HIGH_MEMORY] changes, just like its commit says. This patch fixes it. And we use notifier of memory hotplug instead of direct calling of cpuset_track_online_nodes(). Signed-off-by: Miao Xie Acked-by: Yasunori Goto Cc: David Rientjes Cc: Paul Menage Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'mm') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6837a101437..b5b2b15085a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -498,8 +497,6 @@ int add_memory(int nid, u64 start, u64 size) /* we online node here. we can't roll back from here. */ node_set_online(nid); - cpuset_track_online_nodes(); - if (new_pgdat) { ret = register_one_node(nid); /* -- cgit v1.2.3 From f011c2dae6cffc50ef67d9bd937b488ba5db8913 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 19 Nov 2008 15:36:32 -0800 Subject: mm: vmalloc allocator off by one Fix off by one bug in the KVA allocator that can leave gaps in the address space. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ba6b0f5f7fa..46aab4dbf61 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -362,7 +362,7 @@ retry: goto found; } - while (addr + size >= first->va_start && addr + size <= vend) { + while (addr + size > first->va_start && addr + size <= vend) { addr = ALIGN(first->va_end + PAGE_SIZE, align); n = rb_next(&first->rb_node); -- cgit v1.2.3 From 496850e5f5a372029ceb2b35c811770a9bb073b6 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 19 Nov 2008 15:36:33 -0800 Subject: mm: vmalloc failure flush fix An initial vmalloc failure should start off a synchronous flush of lazy areas, in case someone is in progress flushing them already, which could cause us to return an allocation failure even if there is plenty of KVA free. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 46aab4dbf61..04f5e320e74 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -521,6 +521,17 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, spin_unlock(&purge_lock); } +/* + * Kick off a purge of the outstanding lazy areas. Don't bother if somebody + * is already purging. + */ +static void try_purge_vmap_area_lazy(void) +{ + unsigned long start = ULONG_MAX, end = 0; + + __purge_vmap_area_lazy(&start, &end, 0, 0); +} + /* * Kick off a purge of the outstanding lazy areas. */ @@ -528,7 +539,7 @@ static void purge_vmap_area_lazy(void) { unsigned long start = ULONG_MAX, end = 0; - __purge_vmap_area_lazy(&start, &end, 0, 0); + __purge_vmap_area_lazy(&start, &end, 1, 0); } /* @@ -539,7 +550,7 @@ static void free_unmap_vmap_area(struct vmap_area *va) va->flags |= VM_LAZY_FREE; atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages())) - purge_vmap_area_lazy(); + try_purge_vmap_area_lazy(); } static struct vmap_area *find_vmap_area(unsigned long addr) -- cgit v1.2.3 From 0ae15132a4f5c758a6ffcde74495641dc3f62ba1 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 19 Nov 2008 15:36:33 -0800 Subject: mm: vmalloc search restart fix Current vmalloc restart search for a free area in case we can't find one. The reason is there are areas which are lazily freed, and could be possibly freed now. However, current implementation start searching the tree from the last failing address, which is pretty much by definition at the end of address space. So, we fail. The proposal of this patch is to restart the search from the beginning of the requested vstart address. This fixes the regression in running KVM virtual machines for me, described in http://lkml.org/lkml/2008/10/28/349, caused by commit db64fe02258f1507e13fe5212a989922323685ce. Signed-off-by: Glauber Costa Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 04f5e320e74..30f826d484f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -324,14 +324,14 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, BUG_ON(size & ~PAGE_MASK); - addr = ALIGN(vstart, align); - va = kmalloc_node(sizeof(struct vmap_area), gfp_mask & GFP_RECLAIM_MASK, node); if (unlikely(!va)) return ERR_PTR(-ENOMEM); retry: + addr = ALIGN(vstart, align); + spin_lock(&vmap_area_lock); /* XXX: could have a last_hole cache */ n = vmap_area_root.rb_node; -- cgit v1.2.3 From bda8550deed96687f29992d711a88ea21cff4d26 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 19 Nov 2008 15:36:36 -0800 Subject: migration: fix writepage error Page migration's writeout() has got understandably confused by the nasty AOP_WRITEPAGE_ACTIVATE case: as in normal success, a writepage() error has unlocked the page, so writeout() then needs to relock it. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'mm') diff --git a/mm/migrate.c b/mm/migrate.c index 385db89f0c3..1e0d6b237f4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -522,15 +522,12 @@ static int writeout(struct address_space *mapping, struct page *page) remove_migration_ptes(page, page); rc = mapping->a_ops->writepage(page, &wbc); - if (rc < 0) - /* I/O Error writing */ - return -EIO; if (rc != AOP_WRITEPAGE_ACTIVATE) /* unlocked. Relock */ lock_page(page); - return -EAGAIN; + return (rc < 0) ? -EIO : -EAGAIN; } /* -- cgit v1.2.3 From 63eb6b93ce725e4c5f38fc85dd703d49465b03cb Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 19 Nov 2008 15:36:37 -0800 Subject: vmscan: let GFP_NOFS go to swap again In the past, GFP_NOFS (but of course not GFP_NOIO) was allowed to reclaim by writing to swap. That got partially broken in 2.6.23, when may_enter_fs initialization was moved up before the allocation of swap, so its PageSwapCache test was failing the first time around, Fix it by setting may_enter_fs when add_to_swap() succeeds with __GFP_IO. In fact, check __GFP_IO before calling add_to_swap(): allocating swap we're not ready to use just increases disk seeking. Signed-off-by: Hugh Dickins Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'mm') diff --git a/mm/vmscan.c b/mm/vmscan.c index c141b3e7807..f83a7ed5c6c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -623,6 +623,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, * Try to allocate it some swap space here. */ if (PageAnon(page) && !PageSwapCache(page)) { + if (!(sc->gfp_mask & __GFP_IO)) + goto keep_locked; switch (try_to_munlock(page)) { case SWAP_FAIL: /* shouldn't happen */ case SWAP_AGAIN: @@ -634,6 +636,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, } if (!add_to_swap(page, GFP_ATOMIC)) goto activate_locked; + may_enter_fs = 1; } #endif /* CONFIG_SWAP */ -- cgit v1.2.3 From 00d8089c54867053a5aae062b765f257ca419e27 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 19 Nov 2008 15:36:44 -0800 Subject: vmscan: fix get_scan_ratio() comment Fix the old comment on the scan ratio calculations. Signed-off-by: Rik van Riel Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/vmscan.c b/mm/vmscan.c index f83a7ed5c6c..7ea1440b53d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1389,9 +1389,9 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, file_prio = 200 - sc->swappiness; /* - * anon recent_rotated[0] - * %anon = 100 * ----------- / ----------------- * IO cost - * anon + file rotate_sum + * The amount of pressure on anon vs file pages is inversely + * proportional to the fraction of recently scanned pages on + * each list that were recently referenced and in active use. */ ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1); ap /= zone->recent_rotated[0] + 1; -- cgit v1.2.3 From 31168481c32c8a485e1003af9433124dede57f8d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:33:24 +0000 Subject: meminit section warnings Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 9 +++++---- mm/page_cgroup.c | 13 +++++++------ mm/sparse.c | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'mm') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b5b2b15085a..b1737118546 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -189,7 +189,7 @@ static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, pgdat->node_start_pfn; } -static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) +static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn) { struct pglist_data *pgdat = zone->zone_pgdat; int nr_pages = PAGES_PER_SECTION; @@ -216,7 +216,7 @@ static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) return 0; } -static int __add_section(struct zone *zone, unsigned long phys_start_pfn) +static int __meminit __add_section(struct zone *zone, unsigned long phys_start_pfn) { int nr_pages = PAGES_PER_SECTION; int ret; @@ -273,7 +273,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms) * call this function after deciding the zone to which to * add the new pages. */ -int __add_pages(struct zone *zone, unsigned long phys_start_pfn, +int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn, unsigned long nr_pages) { unsigned long i; @@ -470,7 +470,8 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat) } -int add_memory(int nid, u64 start, u64 size) +/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ +int __ref add_memory(int nid, u64 start, u64 size) { pg_data_t *pgdat = NULL; int new_pgdat = 0; diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 1223d927904..436c00229e7 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -21,7 +21,7 @@ static unsigned long total_usage; #if !defined(CONFIG_SPARSEMEM) -void __init pgdat_page_cgroup_init(struct pglist_data *pgdat) +void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) { pgdat->node_page_cgroup = NULL; } @@ -97,7 +97,8 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) return section->page_cgroup + pfn; } -int __meminit init_section_page_cgroup(unsigned long pfn) +/* __alloc_bootmem...() is protected by !slab_available() */ +int __init_refok init_section_page_cgroup(unsigned long pfn) { struct mem_section *section; struct page_cgroup *base, *pc; @@ -158,7 +159,7 @@ void __free_page_cgroup(unsigned long pfn) } } -int online_page_cgroup(unsigned long start_pfn, +int __meminit online_page_cgroup(unsigned long start_pfn, unsigned long nr_pages, int nid) { @@ -183,7 +184,7 @@ int online_page_cgroup(unsigned long start_pfn, return -ENOMEM; } -int offline_page_cgroup(unsigned long start_pfn, +int __meminit offline_page_cgroup(unsigned long start_pfn, unsigned long nr_pages, int nid) { unsigned long start, end, pfn; @@ -197,7 +198,7 @@ int offline_page_cgroup(unsigned long start_pfn, } -static int page_cgroup_callback(struct notifier_block *self, +static int __meminit page_cgroup_callback(struct notifier_block *self, unsigned long action, void *arg) { struct memory_notify *mn = arg; @@ -248,7 +249,7 @@ void __init page_cgroup_init(void) " want\n"); } -void __init pgdat_page_cgroup_init(struct pglist_data *pgdat) +void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) { return; } diff --git a/mm/sparse.c b/mm/sparse.c index 39db301b920..083f5b63e7a 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -570,7 +570,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) * set. If this is <=0, then that means that the passed-in * map was not consumed and must be freed. */ -int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, +int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn, int nr_pages) { unsigned long section_nr = pfn_to_section_nr(start_pfn); -- cgit v1.2.3 From 2a1dc509747fdcfdf3a2df818a14908aed86c3d4 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 1 Dec 2008 03:00:35 +0100 Subject: vmscan: protect zone rotation stats by lru lock The zone's rotation statistics must not be accessed without the corresponding LRU lock held. Fix an unprotected write in shrink_active_list(). Acked-by: Rik van Riel Reviewed-by: KOSAKI Motohiro Signed-off-by: Johannes Weiner Signed-off-by: Linus Torvalds --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/vmscan.c b/mm/vmscan.c index 7ea1440b53d..62e7f62fb55 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1248,6 +1248,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, list_add(&page->lru, &l_inactive); } + spin_lock_irq(&zone->lru_lock); /* * Count referenced pages from currently used mappings as * rotated, even though they are moved to the inactive list. @@ -1263,7 +1264,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, pgmoved = 0; lru = LRU_BASE + file * LRU_FILE; - spin_lock_irq(&zone->lru_lock); while (!list_empty(&l_inactive)) { page = lru_to_page(&l_inactive); prefetchw_prev_lru_page(page, &l_inactive, flags); -- cgit v1.2.3 From b29acbdcf877009af3f1fc0750bcac314c51e055 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Mon, 1 Dec 2008 13:13:47 -0800 Subject: mm: vmalloc fix lazy unmapping cache aliasing Jim Radford has reported that the vmap subsystem rewrite was sometimes causing his VIVT ARM system to behave strangely (seemed like going into infinite loops trying to fault in pages to userspace). We determined that the problem was most likely due to a cache aliasing issue. flush_cache_vunmap was only being called at the moment the page tables were to be taken down, however with lazy unmapping, this can happen after the page has subsequently been freed and allocated for something else. The dangling alias may still have dirty data attached to it. The fix for this problem is to do the cache flushing when the caller has called vunmap -- it would be a bug for them to write anything else to the mapping at that point. That appeared to solve Jim's problems. Reported-by: Jim Radford Signed-off-by: Nick Piggin Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'mm') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 30f826d484f..f3f6e075856 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -77,7 +77,6 @@ static void vunmap_page_range(unsigned long addr, unsigned long end) BUG_ON(addr >= end); pgd = pgd_offset_k(addr); - flush_cache_vunmap(addr, end); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) @@ -543,9 +542,10 @@ static void purge_vmap_area_lazy(void) } /* - * Free and unmap a vmap area + * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been + * called for the correct range previously. */ -static void free_unmap_vmap_area(struct vmap_area *va) +static void free_unmap_vmap_area_noflush(struct vmap_area *va) { va->flags |= VM_LAZY_FREE; atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); @@ -553,6 +553,15 @@ static void free_unmap_vmap_area(struct vmap_area *va) try_purge_vmap_area_lazy(); } +/* + * Free and unmap a vmap area + */ +static void free_unmap_vmap_area(struct vmap_area *va) +{ + flush_cache_vunmap(va->va_start, va->va_end); + free_unmap_vmap_area_noflush(va); +} + static struct vmap_area *find_vmap_area(unsigned long addr) { struct vmap_area *va; @@ -734,7 +743,7 @@ static void free_vmap_block(struct vmap_block *vb) spin_unlock(&vmap_block_tree_lock); BUG_ON(tmp != vb); - free_unmap_vmap_area(vb->va); + free_unmap_vmap_area_noflush(vb->va); call_rcu(&vb->rcu_head, rcu_free_vb); } @@ -796,6 +805,9 @@ static void vb_free(const void *addr, unsigned long size) BUG_ON(size & ~PAGE_MASK); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); + + flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size); + order = get_order(size); offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1); -- cgit v1.2.3 From dc19f9db38295f811d9041bd89b113beccbd763a Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 1 Dec 2008 13:13:48 -0800 Subject: memcg: memory hotplug fix for notifier callback Fixes for memcg/memory hotplug. While memory hotplug allocate/free memmap, page_cgroup doesn't free page_cgroup at OFFLINE when page_cgroup is allocated via bootomem. (Because freeing bootmem requires special care.) Then, if page_cgroup is allocated by bootmem and memmap is freed/allocated by memory hotplug, page_cgroup->page == page is no longer true. But current MEM_ONLINE handler doesn't check it and update page_cgroup->page if it's not necessary to allocate page_cgroup. (This was not found because memmap is not freed if SPARSEMEM_VMEMMAP is y.) And I noticed that MEM_ONLINE can be called against "part of section". So, freeing page_cgroup at CANCEL_ONLINE will cause trouble. (freeing used page_cgroup) Don't rollback at CANCEL. One more, current memory hotplug notifier is stopped by slub because it sets NOTIFY_STOP_MASK to return vaule. So, page_cgroup's callback never be called. (low priority than slub now.) I think this slub's behavior is not intentional(BUG). and fixes it. Another way to be considered about page_cgroup allocation: - free page_cgroup at OFFLINE even if it's from bootmem and remove specieal handler. But it requires more changes. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=12041 Signed-off-by: KAMEZAWA Hiruyoki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Tested-by: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_cgroup.c | 43 +++++++++++++++++++++++++++++-------------- mm/slub.c | 6 ++++-- 2 files changed, 33 insertions(+), 16 deletions(-) (limited to 'mm') diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 436c00229e7..0b3cbf090a6 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -107,19 +107,29 @@ int __init_refok init_section_page_cgroup(unsigned long pfn) section = __pfn_to_section(pfn); - if (section->page_cgroup) - return 0; - - nid = page_to_nid(pfn_to_page(pfn)); - - table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; - if (slab_is_available()) { - base = kmalloc_node(table_size, GFP_KERNEL, nid); - if (!base) - base = vmalloc_node(table_size, nid); - } else { - base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), table_size, + if (!section->page_cgroup) { + nid = page_to_nid(pfn_to_page(pfn)); + table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; + if (slab_is_available()) { + base = kmalloc_node(table_size, GFP_KERNEL, nid); + if (!base) + base = vmalloc_node(table_size, nid); + } else { + base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), + table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + } + } else { + /* + * We don't have to allocate page_cgroup again, but + * address of memmap may be changed. So, we have to initialize + * again. + */ + base = section->page_cgroup + pfn; + table_size = 0; + /* check address of memmap is changed or not. */ + if (base->page == pfn_to_page(pfn)) + return 0; } if (!base) { @@ -208,18 +218,23 @@ static int __meminit page_cgroup_callback(struct notifier_block *self, ret = online_page_cgroup(mn->start_pfn, mn->nr_pages, mn->status_change_nid); break; - case MEM_CANCEL_ONLINE: case MEM_OFFLINE: offline_page_cgroup(mn->start_pfn, mn->nr_pages, mn->status_change_nid); break; + case MEM_CANCEL_ONLINE: case MEM_GOING_OFFLINE: break; case MEM_ONLINE: case MEM_CANCEL_OFFLINE: break; } - ret = notifier_from_errno(ret); + + if (ret) + ret = notifier_from_errno(ret); + else + ret = NOTIFY_OK; + return ret; } diff --git a/mm/slub.c b/mm/slub.c index 7ad489af956..749588a50a5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2931,8 +2931,10 @@ static int slab_memory_callback(struct notifier_block *self, case MEM_CANCEL_OFFLINE: break; } - - ret = notifier_from_errno(ret); + if (ret) + ret = notifier_from_errno(ret); + else + ret = NOTIFY_OK; return ret; } -- cgit v1.2.3 From f1d0b063d993527754f062c589b73f125024d216 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 2 Dec 2008 10:31:50 -0800 Subject: bdi: register sysfs bdi device only once per queue Devices which share the same queue, like floppies and mtd devices, get registered multiple times in the bdi interface, but bdi accounts only the last registered device of the devices sharing one queue. On remove, all earlier registered devices leak, stay around in sysfs, and cause "duplicate filename" errors if the devices are re-created. This prevents the creation of multiple bdi interfaces per queue, and the bdi device will carry the dev_t name of the block device which is the first one registered, of the pool of devices using the same queue. [akpm@linux-foundation.org: add a WARN_ON so we know which drivers are misbehaving] Tested-by: Peter Korsgaard Acked-by: Peter Zijlstra Signed-off-by: Kay Sievers Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/backing-dev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'mm') diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f2e574dbc30..2a56124dbc2 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -176,6 +176,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, int ret = 0; struct device *dev; + if (WARN_ON(bdi->dev)) + goto exit; + va_start(args, fmt); dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args); va_end(args); -- cgit v1.2.3 From 9ff473b9a72942c5ac0ad35607cae28d8d59ed7a Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 2 Dec 2008 10:31:52 -0800 Subject: vmscan: evict streaming IO first Count the insertion of new pages in the statistics used to drive the pageout scanning code. This should help the kernel quickly evict streaming file IO. We count on the fact that new file pages start on the inactive file LRU and new anonymous pages start on the active anon list. This means streaming file IO will increment the recent scanned file statistic, while leaving the recent rotated file statistic alone, driving pageout scanning to the file LRUs. Pageout activity does its own list manipulation. Signed-off-by: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Tested-by: Gene Heskett Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/swap.c b/mm/swap.c index 2152e48a7b8..2881987603e 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -445,6 +445,7 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; struct zone *pagezone = page_zone(page); + int file; if (pagezone != zone) { if (zone) @@ -456,8 +457,12 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) VM_BUG_ON(PageUnevictable(page)); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - if (is_active_lru(lru)) + file = is_file_lru(lru); + zone->recent_scanned[file]++; + if (is_active_lru(lru)) { SetPageActive(page); + zone->recent_rotated[file]++; + } add_page_to_lru_list(zone, page, lru); } if (zone) -- cgit v1.2.3