From b69408e88bd86b98feb7b9a38fd865e1ddb29827 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 18 Oct 2008 20:26:14 -0700 Subject: vmscan: Use an indexed array for LRU variables Currently we are defining explicit variables for the inactive and active list. An indexed array can be more generic and avoid repeating similar code in several places in the reclaim code. We are saving a few bytes in terms of code size: Before: text data bss dec hex filename 4097753 573120 4092484 8763357 85b7dd vmlinux After: text data bss dec hex filename 4097729 573120 4092484 8763333 85b7c5 vmlinux Having an easy way to add new lru lists may ease future work on the reclaim code. Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Signed-off-by: Christoph Lameter Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fdf3967e139..a6ac0d491fe 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -69,10 +69,8 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority); -extern long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, - struct zone *zone, int priority); -extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, - struct zone *zone, int priority); +extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, + int priority, enum lru_list lru); #else /* CONFIG_CGROUP_MEM_RES_CTLR */ static inline void page_reset_bad_cgroup(struct page *page) @@ -159,14 +157,9 @@ static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, { } -static inline long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, - struct zone *zone, int priority) -{ - return 0; -} - -static inline long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, - struct zone *zone, int priority) +static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, + struct zone *zone, int priority, + enum lru_list lru) { return 0; } -- cgit v1.2.3 From 4f98a2fee8acdb4ac84545df98cccecfd130f8db Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 18 Oct 2008 20:26:32 -0700 Subject: vmscan: split LRU lists into anon & file sets Split the LRU lists in two, one set for pages that are backed by real file systems ("file") and one for pages that are backed by memory and swap ("anon"). The latter includes tmpfs. The advantage of doing this is that the VM will not have to scan over lots of anonymous pages (which we generally do not want to swap out), just to find the page cache pages that it should evict. This patch has the infrastructure and a basic policy to balance how much we scan the anon lists and how much we scan the file lists. The big policy changes are in separate patches. [lee.schermerhorn@hp.com: collect lru meminfo statistics from correct offset] [kosaki.motohiro@jp.fujitsu.com: prevent incorrect oom under split_lru] [kosaki.motohiro@jp.fujitsu.com: fix pagevec_move_tail() doesn't treat unevictable page] [hugh@veritas.com: memcg swapbacked pages active] [hugh@veritas.com: splitlru: BDI_CAP_SWAP_BACKED] [akpm@linux-foundation.org: fix /proc/vmstat units] [nishimura@mxp.nes.nec.co.jp: memcg: fix handling of shmem migration] [kosaki.motohiro@jp.fujitsu.com: adjust Quicklists field of /proc/meminfo] [kosaki.motohiro@jp.fujitsu.com: fix style issue of get_scan_ratio()] Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Signed-off-by: KOSAKI Motohiro Signed-off-by: Hugh Dickins Signed-off-by: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a6ac0d491fe..8d8f05c1515 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -44,7 +44,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, unsigned long *scanned, int order, int mode, struct zone *z, struct mem_cgroup *mem_cont, - int active); + int active, int file); extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); -- cgit v1.2.3 From 894bc310419ac95f4fa4142dc364401a7e607f65 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:39 -0700 Subject: Unevictable LRU Infrastructure When the system contains lots of mlocked or otherwise unevictable pages, the pageout code (kswapd) can spend lots of time scanning over these pages. Worse still, the presence of lots of unevictable pages can confuse kswapd into thinking that more aggressive pageout modes are required, resulting in all kinds of bad behaviour. Infrastructure to manage pages excluded from reclaim--i.e., hidden from vmscan. Based on a patch by Larry Woodman of Red Hat. Reworked to maintain "unevictable" pages on a separate per-zone LRU list, to "hide" them from vmscan. Kosaki Motohiro added the support for the memory controller unevictable lru list. Pages on the unevictable list have both PG_unevictable and PG_lru set. Thus, PG_unevictable is analogous to and mutually exclusive with PG_active--it specifies which LRU list the page is on. The unevictable infrastructure is enabled by a new mm Kconfig option [CONFIG_]UNEVICTABLE_LRU. A new function 'page_evictable(page, vma)' in vmscan.c tests whether or not a page may be evictable. Subsequent patches will add the various !evictable tests. We'll want to keep these tests light-weight for use in shrink_active_list() and, possibly, the fault path. To avoid races between tasks putting pages [back] onto an LRU list and tasks that might be moving the page from non-evictable to evictable state, the new function 'putback_lru_page()' -- inverse to 'isolate_lru_page()' -- tests the "evictability" of a page after placing it on the LRU, before dropping the reference. If the page has become unevictable, putback_lru_page() will redo the 'putback', thus moving the page to the unevictable list. This way, we avoid "stranding" evictable pages on the unevictable list. [akpm@linux-foundation.org: fix fallout from out-of-order merge] [riel@redhat.com: fix UNEVICTABLE_LRU and !PROC_PAGE_MONITOR build] [nishimura@mxp.nes.nec.co.jp: remove redundant mapping check] [kosaki.motohiro@jp.fujitsu.com: unevictable-lru-infrastructure: putback_lru_page()/unevictable page handling rework] [kosaki.motohiro@jp.fujitsu.com: kill unnecessary lock_page() in vmscan.c] [kosaki.motohiro@jp.fujitsu.com: revert migration change of unevictable lru infrastructure] [kosaki.motohiro@jp.fujitsu.com: revert to unevictable-lru-infrastructure-kconfig-fix.patch] [kosaki.motohiro@jp.fujitsu.com: restore patch failure of vmstat-unevictable-and-mlocked-pages-vm-events.patch] Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Debugged-by: Benjamin Kidwell Signed-off-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8d8f05c1515..ee1b2fcb441 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -34,9 +34,9 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); +extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); -extern void mem_cgroup_move_lists(struct page *page, bool active); extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, -- cgit v1.2.3 From 52d4b9ac0b985168009c2a57098324e67bae171f Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Sat, 18 Oct 2008 20:28:16 -0700 Subject: memcg: allocate all page_cgroup at boot Allocate all page_cgroup at boot and remove page_cgroup poitner from struct page. This patch adds an interface as struct page_cgroup *lookup_page_cgroup(struct page*) All FLATMEM/DISCONTIGMEM/SPARSEMEM and MEMORY_HOTPLUG is supported. Remove page_cgroup pointer reduces the amount of memory by - 4 bytes per PAGE_SIZE. - 8 bytes per PAGE_SIZE if memory controller is disabled. (even if configured.) On usual 8GB x86-32 server, this saves 8MB of NORMAL_ZONE memory. On my x86-64 server with 48GB of memory, this saves 96MB of memory. I think this reduction makes sense. By pre-allocation, kmalloc/kfree in charge/uncharge are removed. This means - we're not necessary to be afraid of kmalloc faiulre. (this can happen because of gfp_mask type.) - we can avoid calling kmalloc/kfree. - we can avoid allocating tons of small objects which can be fragmented. - we can know what amount of memory will be used for this extra-lru handling. I added printk message as "allocated %ld bytes of page_cgroup" "please try cgroup_disable=memory option if you don't want" maybe enough informative for users. Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Balbir Singh Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ee1b2fcb441..1fbe14d3952 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -27,9 +27,6 @@ struct mm_struct; #ifdef CONFIG_CGROUP_MEM_RES_CTLR -#define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0) - -extern struct page_cgroup *page_get_page_cgroup(struct page *page); extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, @@ -72,16 +69,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); -#else /* CONFIG_CGROUP_MEM_RES_CTLR */ -static inline void page_reset_bad_cgroup(struct page *page) -{ -} - -static inline struct page_cgroup *page_get_page_cgroup(struct page *page) -{ - return NULL; -} +#else /* CONFIG_CGROUP_MEM_RES_CTLR */ static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { -- cgit v1.2.3