From b69408e88bd86b98feb7b9a38fd865e1ddb29827 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 18 Oct 2008 20:26:14 -0700 Subject: vmscan: Use an indexed array for LRU variables Currently we are defining explicit variables for the inactive and active list. An indexed array can be more generic and avoid repeating similar code in several places in the reclaim code. We are saving a few bytes in terms of code size: Before: text data bss dec hex filename 4097753 573120 4092484 8763357 85b7dd vmlinux After: text data bss dec hex filename 4097729 573120 4092484 8763333 85b7c5 vmlinux Having an easy way to add new lru lists may ease future work on the reclaim code. Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Signed-off-by: Christoph Lameter Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index d7826af2fb0..52c0335c1b7 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -696,7 +696,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, zone->pages_low, zone->pages_high, zone->pages_scanned, - zone->nr_scan_active, zone->nr_scan_inactive, + zone->lru[LRU_ACTIVE].nr_scan, + zone->lru[LRU_INACTIVE].nr_scan, zone->spanned_pages, zone->present_pages); -- cgit v1.2.3 From 4f98a2fee8acdb4ac84545df98cccecfd130f8db Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 18 Oct 2008 20:26:32 -0700 Subject: vmscan: split LRU lists into anon & file sets Split the LRU lists in two, one set for pages that are backed by real file systems ("file") and one for pages that are backed by memory and swap ("anon"). The latter includes tmpfs. The advantage of doing this is that the VM will not have to scan over lots of anonymous pages (which we generally do not want to swap out), just to find the page cache pages that it should evict. This patch has the infrastructure and a basic policy to balance how much we scan the anon lists and how much we scan the file lists. The big policy changes are in separate patches. [lee.schermerhorn@hp.com: collect lru meminfo statistics from correct offset] [kosaki.motohiro@jp.fujitsu.com: prevent incorrect oom under split_lru] [kosaki.motohiro@jp.fujitsu.com: fix pagevec_move_tail() doesn't treat unevictable page] [hugh@veritas.com: memcg swapbacked pages active] [hugh@veritas.com: splitlru: BDI_CAP_SWAP_BACKED] [akpm@linux-foundation.org: fix /proc/vmstat units] [nishimura@mxp.nes.nec.co.jp: memcg: fix handling of shmem migration] [kosaki.motohiro@jp.fujitsu.com: adjust Quicklists field of /proc/meminfo] [kosaki.motohiro@jp.fujitsu.com: fix style issue of get_scan_ratio()] Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Signed-off-by: KOSAKI Motohiro Signed-off-by: Hugh Dickins Signed-off-by: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index 52c0335c1b7..27400b7da7c 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -619,8 +619,10 @@ const struct seq_operations pagetypeinfo_op = { static const char * const vmstat_text[] = { /* Zoned VM counters */ "nr_free_pages", - "nr_inactive", - "nr_active", + "nr_inactive_anon", + "nr_active_anon", + "nr_inactive_file", + "nr_active_file", "nr_anon_pages", "nr_mapped", "nr_file_pages", @@ -688,7 +690,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n min %lu" "\n low %lu" "\n high %lu" - "\n scanned %lu (a: %lu i: %lu)" + "\n scanned %lu (aa: %lu ia: %lu af: %lu if: %lu)" "\n spanned %lu" "\n present %lu", zone_page_state(zone, NR_FREE_PAGES), @@ -696,8 +698,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, zone->pages_low, zone->pages_high, zone->pages_scanned, - zone->lru[LRU_ACTIVE].nr_scan, - zone->lru[LRU_INACTIVE].nr_scan, + zone->lru[LRU_ACTIVE_ANON].nr_scan, + zone->lru[LRU_INACTIVE_ANON].nr_scan, + zone->lru[LRU_ACTIVE_FILE].nr_scan, + zone->lru[LRU_INACTIVE_FILE].nr_scan, zone->spanned_pages, zone->present_pages); -- cgit v1.2.3 From 556adecba110bf5f1db6c6b56416cfab5bcab698 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 18 Oct 2008 20:26:34 -0700 Subject: vmscan: second chance replacement for anonymous pages We avoid evicting and scanning anonymous pages for the most part, but under some workloads we can end up with most of memory filled with anonymous pages. At that point, we suddenly need to clear the referenced bits on all of memory, which can take ages on very large memory systems. We can reduce the maximum number of pages that need to be scanned by not taking the referenced state into account when deactivating an anonymous page. After all, every anonymous page starts out referenced, so why check? If an anonymous page gets referenced again before it reaches the end of the inactive list, we move it back to the active list. To keep the maximum amount of necessary work reasonable, we scale the active to inactive ratio with the size of memory, using the formula active:inactive ratio = sqrt(memory in GB * 10). Kswapd CPU use now seems to scale by the amount of pageout bandwidth, instead of by the amount of memory present in the system. [kamezawa.hiroyu@jp.fujitsu.com: fix OOM with memcg] [kamezawa.hiroyu@jp.fujitsu.com: memcg: lru scan fix] Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index 27400b7da7c..4380b0dba6d 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -738,10 +738,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, seq_printf(m, "\n all_unreclaimable: %u" "\n prev_priority: %i" - "\n start_pfn: %lu", + "\n start_pfn: %lu" + "\n inactive_ratio: %u", zone_is_all_unreclaimable(zone), zone->prev_priority, - zone->zone_start_pfn); + zone->zone_start_pfn, + zone->inactive_ratio); seq_putc(m, '\n'); } -- cgit v1.2.3 From bbfd28eee9fbd73e780b19beb3dc562befbb94fa Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:40 -0700 Subject: unevictable lru: add event counting with statistics Fix to unevictable-lru-page-statistics.patch Add unevictable lru infrastructure vm events to the statistics patch. Rename the "NORECL_" and "noreclaim_" symbols and text strings to "UNEVICTABLE_" and "unevictable_", respectively. Currently, both the infrastructure and the mlocked pages event are added by a single patch later in the series. This makes it difficult to add or rework the incremental patches. The events actually "belong" with the stats, so pull them up to here. Also, restore the event counting to putback_lru_page(). This was removed from previous patch in series where it was "misplaced". The actual events weren't defined that early. Signed-off-by: Lee Schermerhorn Cc: Rik van Riel Reviewed-by: KOSAKI Motohiro Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index 4380b0dba6d..6cb08cdd4f0 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -677,6 +677,11 @@ static const char * const vmstat_text[] = { "htlb_buddy_alloc_success", "htlb_buddy_alloc_fail", #endif +#ifdef CONFIG_UNEVICTABLE_LRU + "unevictable_pgs_culled", + "unevictable_pgs_scanned", + "unevictable_pgs_rescued", +#endif #endif }; -- cgit v1.2.3 From 7b854121eb3e5ba0241882ff939e2c485228c9c5 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:40 -0700 Subject: Unevictable LRU Page Statistics Report unevictable pages per zone and system wide. Kosaki Motohiro added support for memory controller unevictable statistics. [riel@redhat.com: fix printk in show_free_areas()] [akpm@linux-foundation.org: fix units in /proc/vmstats] Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Debugged-by: Hiroshi Shimamoto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index 6cb08cdd4f0..6db2f631931 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -623,6 +623,9 @@ static const char * const vmstat_text[] = { "nr_active_anon", "nr_inactive_file", "nr_active_file", +#ifdef CONFIG_UNEVICTABLE_LRU + "nr_unevictable", +#endif "nr_anon_pages", "nr_mapped", "nr_file_pages", -- cgit v1.2.3 From 5344b7e648980cc2ca613ec03a56a8222ff48820 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 18 Oct 2008 20:26:51 -0700 Subject: vmstat: mlocked pages statistics Add NR_MLOCK zone page state, which provides a (conservative) count of mlocked pages (actually, the number of mlocked pages moved off the LRU). Reworked by lts to fit in with the modified mlock page support in the Reclaim Scalability series. [kosaki.motohiro@jp.fujitsu.com: fix incorrect Mlocked field of /proc/meminfo] [lee.schermerhorn@hp.com: mlocked-pages: add event counting with statistics] Signed-off-by: Nick Piggin Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index 6db2f631931..9e28abc0a0b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -625,6 +625,7 @@ static const char * const vmstat_text[] = { "nr_active_file", #ifdef CONFIG_UNEVICTABLE_LRU "nr_unevictable", + "nr_mlock", #endif "nr_anon_pages", "nr_mapped", @@ -684,6 +685,10 @@ static const char * const vmstat_text[] = { "unevictable_pgs_culled", "unevictable_pgs_scanned", "unevictable_pgs_rescued", + "unevictable_pgs_mlocked", + "unevictable_pgs_munlocked", + "unevictable_pgs_cleared", + "unevictable_pgs_stranded", #endif #endif }; -- cgit v1.2.3 From 985737cf2ea096ea946aed82c7484d40defc71a8 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:53 -0700 Subject: mlock: count attempts to free mlocked page Allow free of mlock()ed pages. This shouldn't happen, but during developement, it occasionally did. This patch allows us to survive that condition, while keeping the statistics and events correct for debug. Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index 9e28abc0a0b..9343227c5c6 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -689,6 +689,7 @@ static const char * const vmstat_text[] = { "unevictable_pgs_munlocked", "unevictable_pgs_cleared", "unevictable_pgs_stranded", + "unevictable_pgs_mlockfreed", #endif #endif }; -- cgit v1.2.3 From 8f32f7e5ac2ed11b0659b6b55af926f3d58ffd9d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 04:13:52 +0400 Subject: proc: move /proc/buddyinfo boilerplate to mm/vmstat.c Signed-off-by: Alexey Dobriyan --- mm/vmstat.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index 9343227c5c6..f45d7245a28 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -8,7 +8,7 @@ * Copyright (C) 2006 Silicon Graphics, Inc., * Christoph Lameter */ - +#include #include #include #include @@ -384,7 +384,7 @@ void zone_statistics(struct zone *preferred_zone, struct zone *z) #endif #ifdef CONFIG_PROC_FS - +#include #include static char * const migratetype_names[MIGRATE_TYPES] = { @@ -581,13 +581,25 @@ static int pagetypeinfo_show(struct seq_file *m, void *arg) return 0; } -const struct seq_operations fragmentation_op = { +static const struct seq_operations fragmentation_op = { .start = frag_start, .next = frag_next, .stop = frag_stop, .show = frag_show, }; +static int fragmentation_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &fragmentation_op); +} + +static const struct file_operations fragmentation_file_operations = { + .open = fragmentation_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + const struct seq_operations pagetypeinfo_op = { .start = frag_start, .next = frag_next, @@ -898,9 +910,11 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, static struct notifier_block __cpuinitdata vmstat_notifier = { &vmstat_cpuup_callback, NULL, 0 }; +#endif static int __init setup_vmstat(void) { +#ifdef CONFIG_SMP int cpu; refresh_zone_stat_thresholds(); @@ -908,7 +922,10 @@ static int __init setup_vmstat(void) for_each_online_cpu(cpu) start_cpu_timer(cpu); +#endif +#ifdef CONFIG_PROC_FS + proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); +#endif return 0; } module_init(setup_vmstat) -#endif -- cgit v1.2.3 From 74e2e8e8ce7b3c0f878a349f9fa6cf2831548eef Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 04:15:36 +0400 Subject: proc: move /proc/pagetypeinfo boilerplate to mm/vmstat.c Signed-off-by: Alexey Dobriyan --- mm/vmstat.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index f45d7245a28..d624d251946 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -600,13 +600,25 @@ static const struct file_operations fragmentation_file_operations = { .release = seq_release, }; -const struct seq_operations pagetypeinfo_op = { +static const struct seq_operations pagetypeinfo_op = { .start = frag_start, .next = frag_next, .stop = frag_stop, .show = pagetypeinfo_show, }; +static int pagetypeinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &pagetypeinfo_op); +} + +static const struct file_operations pagetypeinfo_file_ops = { + .open = pagetypeinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + #ifdef CONFIG_ZONE_DMA #define TEXT_FOR_DMA(xx) xx "_dma", #else @@ -925,6 +937,7 @@ static int __init setup_vmstat(void) #endif #ifdef CONFIG_PROC_FS proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); + proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); #endif return 0; } -- cgit v1.2.3 From b6aa44ab698c7df9d951d3eb45c4fcb8ba68fb25 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 04:17:48 +0400 Subject: proc: move /proc/vmstat boilerplate to mm/vmstat.c Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter --- mm/vmstat.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index d624d251946..7e1854b8186 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -858,13 +858,24 @@ static void vmstat_stop(struct seq_file *m, void *arg) m->private = NULL; } -const struct seq_operations vmstat_op = { +static const struct seq_operations vmstat_op = { .start = vmstat_start, .next = vmstat_next, .stop = vmstat_stop, .show = vmstat_show, }; +static int vmstat_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &vmstat_op); +} + +static const struct file_operations proc_vmstat_file_operations = { + .open = vmstat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_SMP @@ -938,6 +949,7 @@ static int __init setup_vmstat(void) #ifdef CONFIG_PROC_FS proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); + proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); #endif return 0; } -- cgit v1.2.3 From 5c9fe6281b75832e8d2555ec8700ea763d9a865e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Oct 2008 04:19:42 +0400 Subject: proc: move /proc/zoneinfo boilerplate to mm/vmstat.c Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter --- mm/vmstat.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index 7e1854b8186..c3ccfda23ad 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -795,7 +795,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg) return 0; } -const struct seq_operations zoneinfo_op = { +static const struct seq_operations zoneinfo_op = { .start = frag_start, /* iterate over all zones. The same as in * fragmentation. */ .next = frag_next, @@ -803,6 +803,18 @@ const struct seq_operations zoneinfo_op = { .show = zoneinfo_show, }; +static int zoneinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &zoneinfo_op); +} + +static const struct file_operations proc_zoneinfo_file_operations = { + .open = zoneinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static void *vmstat_start(struct seq_file *m, loff_t *pos) { unsigned long *v; @@ -950,6 +962,7 @@ static int __init setup_vmstat(void) proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); + proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); #endif return 0; } -- cgit v1.2.3