From b69408e88bd86b98feb7b9a38fd865e1ddb29827 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 18 Oct 2008 20:26:14 -0700 Subject: vmscan: Use an indexed array for LRU variables Currently we are defining explicit variables for the inactive and active list. An indexed array can be more generic and avoid repeating similar code in several places in the reclaim code. We are saving a few bytes in terms of code size: Before: text data bss dec hex filename 4097753 573120 4092484 8763357 85b7dd vmlinux After: text data bss dec hex filename 4097729 573120 4092484 8763333 85b7c5 vmlinux Having an easy way to add new lru lists may ease future work on the reclaim code. Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Signed-off-by: Christoph Lameter Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 49 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 11 deletions(-) (limited to 'include/linux/mm_inline.h') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 895bc4e9303..2704729777e 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -1,40 +1,67 @@ +static inline void +add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l) +{ + list_add(&page->lru, &zone->lru[l].list); + __inc_zone_state(zone, NR_LRU_BASE + l); +} + +static inline void +del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l) +{ + list_del(&page->lru); + __dec_zone_state(zone, NR_LRU_BASE + l); +} + static inline void add_page_to_active_list(struct zone *zone, struct page *page) { - list_add(&page->lru, &zone->active_list); - __inc_zone_state(zone, NR_ACTIVE); + add_page_to_lru_list(zone, page, LRU_ACTIVE); } static inline void add_page_to_inactive_list(struct zone *zone, struct page *page) { - list_add(&page->lru, &zone->inactive_list); - __inc_zone_state(zone, NR_INACTIVE); + add_page_to_lru_list(zone, page, LRU_INACTIVE); } static inline void del_page_from_active_list(struct zone *zone, struct page *page) { - list_del(&page->lru); - __dec_zone_state(zone, NR_ACTIVE); + del_page_from_lru_list(zone, page, LRU_ACTIVE); } static inline void del_page_from_inactive_list(struct zone *zone, struct page *page) { - list_del(&page->lru); - __dec_zone_state(zone, NR_INACTIVE); + del_page_from_lru_list(zone, page, LRU_INACTIVE); } static inline void del_page_from_lru(struct zone *zone, struct page *page) { + enum lru_list l = LRU_INACTIVE; + list_del(&page->lru); if (PageActive(page)) { __ClearPageActive(page); - __dec_zone_state(zone, NR_ACTIVE); - } else { - __dec_zone_state(zone, NR_INACTIVE); + l = LRU_ACTIVE; } + __dec_zone_state(zone, NR_LRU_BASE + l); } +/** + * page_lru - which LRU list should a page be on? + * @page: the page to test + * + * Returns the LRU list a page should be on, as an index + * into the array of LRU lists. + */ +static inline enum lru_list page_lru(struct page *page) +{ + enum lru_list lru = LRU_BASE; + + if (PageActive(page)) + lru += LRU_ACTIVE; + + return lru; +} -- cgit v1.2.3 From b2e185384f534781fd22f5ce170b2ad26f97df70 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 18 Oct 2008 20:26:30 -0700 Subject: define page_file_cache() function Define page_file_cache() function to answer the question: is page backed by a file? Originally part of Rik van Riel's split-lru patch. Extracted to make available for other, independent reclaim patches. Moved inline function to linux/mm_inline.h where it will be needed by subsequent "split LRU" and "noreclaim" patches. Unfortunately this needs to use a page flag, since the PG_swapbacked state needs to be preserved all the way to the point where the page is last removed from the LRU. Trying to derive the status from other info in the page resulted in wrong VM statistics in earlier split VM patchsets. The total number of page flags in use on a 32 bit machine after this patch is 19. [akpm@linux-foundation.org: fix up out-of-order merge fallout] [hugh@veritas.com: splitlru: shmem_getpage SetPageSwapBacked sooner[ Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Signed-off-by: MinChan Kim Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux/mm_inline.h') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 2704729777e..96e970485b6 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -1,3 +1,28 @@ +#ifndef LINUX_MM_INLINE_H +#define LINUX_MM_INLINE_H + +/** + * page_is_file_cache - should the page be on a file LRU or anon LRU? + * @page: the page to test + * + * Returns !0 if @page is page cache page backed by a regular filesystem, + * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed. + * Used by functions that manipulate the LRU lists, to sort a page + * onto the right LRU list. + * + * We would like to get this info without a page flag, but the state + * needs to survive until the page is last deleted from the LRU, which + * could be as far down as __page_cache_release. + */ +static inline int page_is_file_cache(struct page *page) +{ + if (PageSwapBacked(page)) + return 0; + + /* The page is page cache backed by a normal filesystem. */ + return 1; +} + static inline void add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l) { @@ -65,3 +90,5 @@ static inline enum lru_list page_lru(struct page *page) return lru; } + +#endif -- cgit v1.2.3 From 4f98a2fee8acdb4ac84545df98cccecfd130f8db Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 18 Oct 2008 20:26:32 -0700 Subject: vmscan: split LRU lists into anon & file sets Split the LRU lists in two, one set for pages that are backed by real file systems ("file") and one for pages that are backed by memory and swap ("anon"). The latter includes tmpfs. The advantage of doing this is that the VM will not have to scan over lots of anonymous pages (which we generally do not want to swap out), just to find the page cache pages that it should evict. This patch has the infrastructure and a basic policy to balance how much we scan the anon lists and how much we scan the file lists. The big policy changes are in separate patches. [lee.schermerhorn@hp.com: collect lru meminfo statistics from correct offset] [kosaki.motohiro@jp.fujitsu.com: prevent incorrect oom under split_lru] [kosaki.motohiro@jp.fujitsu.com: fix pagevec_move_tail() doesn't treat unevictable page] [hugh@veritas.com: memcg swapbacked pages active] [hugh@veritas.com: splitlru: BDI_CAP_SWAP_BACKED] [akpm@linux-foundation.org: fix /proc/vmstat units] [nishimura@mxp.nes.nec.co.jp: memcg: fix handling of shmem migration] [kosaki.motohiro@jp.fujitsu.com: adjust Quicklists field of /proc/meminfo] [kosaki.motohiro@jp.fujitsu.com: fix style issue of get_scan_ratio()] Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Signed-off-by: KOSAKI Motohiro Signed-off-by: Hugh Dickins Signed-off-by: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 50 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 12 deletions(-) (limited to 'include/linux/mm_inline.h') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 96e970485b6..2eb599465d5 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -5,7 +5,7 @@ * page_is_file_cache - should the page be on a file LRU or anon LRU? * @page: the page to test * - * Returns !0 if @page is page cache page backed by a regular filesystem, + * Returns LRU_FILE if @page is page cache page backed by a regular filesystem, * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed. * Used by functions that manipulate the LRU lists, to sort a page * onto the right LRU list. @@ -20,7 +20,7 @@ static inline int page_is_file_cache(struct page *page) return 0; /* The page is page cache backed by a normal filesystem. */ - return 1; + return LRU_FILE; } static inline void @@ -38,39 +38,64 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l) } static inline void -add_page_to_active_list(struct zone *zone, struct page *page) +add_page_to_inactive_anon_list(struct zone *zone, struct page *page) { - add_page_to_lru_list(zone, page, LRU_ACTIVE); + add_page_to_lru_list(zone, page, LRU_INACTIVE_ANON); } static inline void -add_page_to_inactive_list(struct zone *zone, struct page *page) +add_page_to_active_anon_list(struct zone *zone, struct page *page) { - add_page_to_lru_list(zone, page, LRU_INACTIVE); + add_page_to_lru_list(zone, page, LRU_ACTIVE_ANON); } static inline void -del_page_from_active_list(struct zone *zone, struct page *page) +add_page_to_inactive_file_list(struct zone *zone, struct page *page) { - del_page_from_lru_list(zone, page, LRU_ACTIVE); + add_page_to_lru_list(zone, page, LRU_INACTIVE_FILE); } static inline void -del_page_from_inactive_list(struct zone *zone, struct page *page) +add_page_to_active_file_list(struct zone *zone, struct page *page) { - del_page_from_lru_list(zone, page, LRU_INACTIVE); + add_page_to_lru_list(zone, page, LRU_ACTIVE_FILE); +} + +static inline void +del_page_from_inactive_anon_list(struct zone *zone, struct page *page) +{ + del_page_from_lru_list(zone, page, LRU_INACTIVE_ANON); +} + +static inline void +del_page_from_active_anon_list(struct zone *zone, struct page *page) +{ + del_page_from_lru_list(zone, page, LRU_ACTIVE_ANON); +} + +static inline void +del_page_from_inactive_file_list(struct zone *zone, struct page *page) +{ + del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE); +} + +static inline void +del_page_from_active_file_list(struct zone *zone, struct page *page) +{ + del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE); } static inline void del_page_from_lru(struct zone *zone, struct page *page) { - enum lru_list l = LRU_INACTIVE; + enum lru_list l = LRU_BASE; list_del(&page->lru); if (PageActive(page)) { __ClearPageActive(page); - l = LRU_ACTIVE; + l += LRU_ACTIVE; } + l += page_is_file_cache(page); __dec_zone_state(zone, NR_LRU_BASE + l); } @@ -87,6 +112,7 @@ static inline enum lru_list page_lru(struct page *page) if (PageActive(page)) lru += LRU_ACTIVE; + lru += page_is_file_cache(page); return lru; } -- cgit v1.2.3 From 556adecba110bf5f1db6c6b56416cfab5bcab698 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 18 Oct 2008 20:26:34 -0700 Subject: vmscan: second chance replacement for anonymous pages We avoid evicting and scanning anonymous pages for the most part, but under some workloads we can end up with most of memory filled with anonymous pages. At that point, we suddenly need to clear the referenced bits on all of memory, which can take ages on very large memory systems. We can reduce the maximum number of pages that need to be scanned by not taking the referenced state into account when deactivating an anonymous page. After all, every anonymous page starts out referenced, so why check? If an anonymous page gets referenced again before it reaches the end of the inactive list, we move it back to the active list. To keep the maximum amount of necessary work reasonable, we scale the active to inactive ratio with the size of memory, using the formula active:inactive ratio = sqrt(memory in GB * 10). Kswapd CPU use now seems to scale by the amount of pageout bandwidth, instead of by the amount of memory present in the system. [kamezawa.hiroyu@jp.fujitsu.com: fix OOM with memcg] [kamezawa.hiroyu@jp.fujitsu.com: memcg: lru scan fix] Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux/mm_inline.h') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 2eb599465d5..f451fedd1e7 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -117,4 +117,23 @@ static inline enum lru_list page_lru(struct page *page) return lru; } +/** + * inactive_anon_is_low - check if anonymous pages need to be deactivated + * @zone: zone to check + * + * Returns true if the zone does not have enough inactive anon pages, + * meaning some active anon pages need to be deactivated. + */ +static inline int inactive_anon_is_low(struct zone *zone) +{ + unsigned long active, inactive; + + active = zone_page_state(zone, NR_ACTIVE_ANON); + inactive = zone_page_state(zone, NR_INACTIVE_ANON); + + if (inactive * zone->inactive_ratio < active) + return 1; + + return 0; +} #endif -- cgit v1.2.3 From 894bc310419ac95f4fa4142dc364401a7e607f65 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:39 -0700 Subject: Unevictable LRU Infrastructure When the system contains lots of mlocked or otherwise unevictable pages, the pageout code (kswapd) can spend lots of time scanning over these pages. Worse still, the presence of lots of unevictable pages can confuse kswapd into thinking that more aggressive pageout modes are required, resulting in all kinds of bad behaviour. Infrastructure to manage pages excluded from reclaim--i.e., hidden from vmscan. Based on a patch by Larry Woodman of Red Hat. Reworked to maintain "unevictable" pages on a separate per-zone LRU list, to "hide" them from vmscan. Kosaki Motohiro added the support for the memory controller unevictable lru list. Pages on the unevictable list have both PG_unevictable and PG_lru set. Thus, PG_unevictable is analogous to and mutually exclusive with PG_active--it specifies which LRU list the page is on. The unevictable infrastructure is enabled by a new mm Kconfig option [CONFIG_]UNEVICTABLE_LRU. A new function 'page_evictable(page, vma)' in vmscan.c tests whether or not a page may be evictable. Subsequent patches will add the various !evictable tests. We'll want to keep these tests light-weight for use in shrink_active_list() and, possibly, the fault path. To avoid races between tasks putting pages [back] onto an LRU list and tasks that might be moving the page from non-evictable to evictable state, the new function 'putback_lru_page()' -- inverse to 'isolate_lru_page()' -- tests the "evictability" of a page after placing it on the LRU, before dropping the reference. If the page has become unevictable, putback_lru_page() will redo the 'putback', thus moving the page to the unevictable list. This way, we avoid "stranding" evictable pages on the unevictable list. [akpm@linux-foundation.org: fix fallout from out-of-order merge] [riel@redhat.com: fix UNEVICTABLE_LRU and !PROC_PAGE_MONITOR build] [nishimura@mxp.nes.nec.co.jp: remove redundant mapping check] [kosaki.motohiro@jp.fujitsu.com: unevictable-lru-infrastructure: putback_lru_page()/unevictable page handling rework] [kosaki.motohiro@jp.fujitsu.com: kill unnecessary lock_page() in vmscan.c] [kosaki.motohiro@jp.fujitsu.com: revert migration change of unevictable lru infrastructure] [kosaki.motohiro@jp.fujitsu.com: revert to unevictable-lru-infrastructure-kconfig-fix.patch] [kosaki.motohiro@jp.fujitsu.com: restore patch failure of vmstat-unevictable-and-mlocked-pages-vm-events.patch] Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Debugged-by: Benjamin Kidwell Signed-off-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'include/linux/mm_inline.h') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index f451fedd1e7..67d7697fd01 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -91,11 +91,16 @@ del_page_from_lru(struct zone *zone, struct page *page) enum lru_list l = LRU_BASE; list_del(&page->lru); - if (PageActive(page)) { - __ClearPageActive(page); - l += LRU_ACTIVE; + if (PageUnevictable(page)) { + __ClearPageUnevictable(page); + l = LRU_UNEVICTABLE; + } else { + if (PageActive(page)) { + __ClearPageActive(page); + l += LRU_ACTIVE; + } + l += page_is_file_cache(page); } - l += page_is_file_cache(page); __dec_zone_state(zone, NR_LRU_BASE + l); } @@ -110,9 +115,13 @@ static inline enum lru_list page_lru(struct page *page) { enum lru_list lru = LRU_BASE; - if (PageActive(page)) - lru += LRU_ACTIVE; - lru += page_is_file_cache(page); + if (PageUnevictable(page)) + lru = LRU_UNEVICTABLE; + else { + if (PageActive(page)) + lru += LRU_ACTIVE; + lru += page_is_file_cache(page); + } return lru; } -- cgit v1.2.3 From 902d2e8ae0de29f483840ba1134af27343b9564d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Sat, 18 Oct 2008 20:26:54 -0700 Subject: vmscan: kill unused lru functions Several LRU manupuration function are not used now. So they can be removed. Signed-off-by: KOSAKI Motohiro Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 48 ----------------------------------------------- 1 file changed, 48 deletions(-) (limited to 'include/linux/mm_inline.h') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 67d7697fd01..c948350c378 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -37,54 +37,6 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l) __dec_zone_state(zone, NR_LRU_BASE + l); } -static inline void -add_page_to_inactive_anon_list(struct zone *zone, struct page *page) -{ - add_page_to_lru_list(zone, page, LRU_INACTIVE_ANON); -} - -static inline void -add_page_to_active_anon_list(struct zone *zone, struct page *page) -{ - add_page_to_lru_list(zone, page, LRU_ACTIVE_ANON); -} - -static inline void -add_page_to_inactive_file_list(struct zone *zone, struct page *page) -{ - add_page_to_lru_list(zone, page, LRU_INACTIVE_FILE); -} - -static inline void -add_page_to_active_file_list(struct zone *zone, struct page *page) -{ - add_page_to_lru_list(zone, page, LRU_ACTIVE_FILE); -} - -static inline void -del_page_from_inactive_anon_list(struct zone *zone, struct page *page) -{ - del_page_from_lru_list(zone, page, LRU_INACTIVE_ANON); -} - -static inline void -del_page_from_active_anon_list(struct zone *zone, struct page *page) -{ - del_page_from_lru_list(zone, page, LRU_ACTIVE_ANON); -} - -static inline void -del_page_from_inactive_file_list(struct zone *zone, struct page *page) -{ - del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE); -} - -static inline void -del_page_from_active_file_list(struct zone *zone, struct page *page) -{ - del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE); -} - static inline void del_page_from_lru(struct zone *zone, struct page *page) { -- cgit v1.2.3