From aa6f5ffbdba45aa8e19e5048648fc6c7b25376d3 Mon Sep 17 00:00:00 2001 From: merge Date: Thu, 22 Jan 2009 13:55:32 +0000 Subject: MERGE-via-pending-tracking-hist-MERGE-via-stable-tracking-MERGE-via-mokopatches-tracking-fix-stray-endmenu-patch-1232632040-1232632141 pending-tracking-hist top was MERGE-via-stable-tracking-MERGE-via-mokopatches-tracking-fix-stray-endmenu-patch-1232632040-1232632141 / fdf777a63bcb59e0dfd78bfe2c6242e01f6d4eb9 ... parent commitmessage: From: merge MERGE-via-stable-tracking-hist-MERGE-via-mokopatches-tracking-fix-stray-endmenu-patch-1232632040 stable-tracking-hist top was MERGE-via-mokopatches-tracking-fix-stray-endmenu-patch-1232632040 / 90463bfd2d5a3c8b52f6e6d71024a00e052b0ced ... parent commitmessage: From: merge MERGE-via-mokopatches-tracking-hist-fix-stray-endmenu-patch mokopatches-tracking-hist top was fix-stray-endmenu-patch / 3630e0be570de8057e7f8d2fe501ed353cdf34e6 ... parent commitmessage: From: Andy Green fix-stray-endmenu.patch Signed-off-by: Andy Green --- mm/page_cgroup.c | 270 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 242 insertions(+), 28 deletions(-) (limited to 'mm/page_cgroup.c') diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index f59d797dc5a..7006a11350c 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -8,6 +8,7 @@ #include #include #include +#include static void __meminit __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) @@ -15,13 +16,14 @@ __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) pc->flags = 0; pc->mem_cgroup = NULL; pc->page = pfn_to_page(pfn); + INIT_LIST_HEAD(&pc->lru); } static unsigned long total_usage; #if !defined(CONFIG_SPARSEMEM) -void __init pgdat_page_cgroup_init(struct pglist_data *pgdat) +void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) { pgdat->node_page_cgroup = NULL; } @@ -49,6 +51,9 @@ static int __init alloc_node_page_cgroup(int nid) start_pfn = NODE_DATA(nid)->node_start_pfn; nr_pages = NODE_DATA(nid)->node_spanned_pages; + if (!nr_pages) + return 0; + table_size = sizeof(struct page_cgroup) * nr_pages; base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), @@ -69,7 +74,7 @@ void __init page_cgroup_init(void) int nid, fail; - if (mem_cgroup_subsys.disabled) + if (mem_cgroup_disabled()) return; for_each_online_node(nid) { @@ -97,28 +102,37 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) return section->page_cgroup + pfn; } -int __meminit init_section_page_cgroup(unsigned long pfn) +/* __alloc_bootmem...() is protected by !slab_available() */ +static int __init_refok init_section_page_cgroup(unsigned long pfn) { - struct mem_section *section; + struct mem_section *section = __pfn_to_section(pfn); struct page_cgroup *base, *pc; unsigned long table_size; int nid, index; - section = __pfn_to_section(pfn); - - if (section->page_cgroup) - return 0; - - nid = page_to_nid(pfn_to_page(pfn)); - - table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; - if (slab_is_available()) { - base = kmalloc_node(table_size, GFP_KERNEL, nid); - if (!base) - base = vmalloc_node(table_size, nid); - } else { - base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), table_size, + if (!section->page_cgroup) { + nid = page_to_nid(pfn_to_page(pfn)); + table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; + if (slab_is_available()) { + base = kmalloc_node(table_size, GFP_KERNEL, nid); + if (!base) + base = vmalloc_node(table_size, nid); + } else { + base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), + table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + } + } else { + /* + * We don't have to allocate page_cgroup again, but + * address of memmap may be changed. So, we have to initialize + * again. + */ + base = section->page_cgroup + pfn; + table_size = 0; + /* check address of memmap is changed or not. */ + if (base->page == pfn_to_page(pfn)) + return 0; } if (!base) { @@ -131,7 +145,6 @@ int __meminit init_section_page_cgroup(unsigned long pfn) __init_page_cgroup(pc, pfn + index); } - section = __pfn_to_section(pfn); section->page_cgroup = base - pfn; total_usage += table_size; return 0; @@ -158,14 +171,14 @@ void __free_page_cgroup(unsigned long pfn) } } -int online_page_cgroup(unsigned long start_pfn, +int __meminit online_page_cgroup(unsigned long start_pfn, unsigned long nr_pages, int nid) { unsigned long start, end, pfn; int fail = 0; - start = start_pfn & (PAGES_PER_SECTION - 1); + start = start_pfn & ~(PAGES_PER_SECTION - 1); end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { @@ -183,12 +196,12 @@ int online_page_cgroup(unsigned long start_pfn, return -ENOMEM; } -int offline_page_cgroup(unsigned long start_pfn, +int __meminit offline_page_cgroup(unsigned long start_pfn, unsigned long nr_pages, int nid) { unsigned long start, end, pfn; - start = start_pfn & (PAGES_PER_SECTION - 1); + start = start_pfn & ~(PAGES_PER_SECTION - 1); end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) @@ -197,7 +210,7 @@ int offline_page_cgroup(unsigned long start_pfn, } -static int page_cgroup_callback(struct notifier_block *self, +static int __meminit page_cgroup_callback(struct notifier_block *self, unsigned long action, void *arg) { struct memory_notify *mn = arg; @@ -207,18 +220,23 @@ static int page_cgroup_callback(struct notifier_block *self, ret = online_page_cgroup(mn->start_pfn, mn->nr_pages, mn->status_change_nid); break; - case MEM_CANCEL_ONLINE: case MEM_OFFLINE: offline_page_cgroup(mn->start_pfn, mn->nr_pages, mn->status_change_nid); break; + case MEM_CANCEL_ONLINE: case MEM_GOING_OFFLINE: break; case MEM_ONLINE: case MEM_CANCEL_OFFLINE: break; } - ret = notifier_from_errno(ret); + + if (ret) + ret = notifier_from_errno(ret); + else + ret = NOTIFY_OK; + return ret; } @@ -229,7 +247,7 @@ void __init page_cgroup_init(void) unsigned long pfn; int fail = 0; - if (mem_cgroup_subsys.disabled) + if (mem_cgroup_disabled()) return; for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { @@ -248,9 +266,205 @@ void __init page_cgroup_init(void) " want\n"); } -void __init pgdat_page_cgroup_init(struct pglist_data *pgdat) +void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) { return; } #endif + + +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP + +static DEFINE_MUTEX(swap_cgroup_mutex); +struct swap_cgroup_ctrl { + struct page **map; + unsigned long length; +}; + +struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; + +/* + * This 8bytes seems big..maybe we can reduce this when we can use "id" for + * cgroup rather than pointer. + */ +struct swap_cgroup { + struct mem_cgroup *val; +}; +#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) +#define SC_POS_MASK (SC_PER_PAGE - 1) + +/* + * SwapCgroup implements "lookup" and "exchange" operations. + * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge + * against SwapCache. At swap_free(), this is accessed directly from swap. + * + * This means, + * - we have no race in "exchange" when we're accessed via SwapCache because + * SwapCache(and its swp_entry) is under lock. + * - When called via swap_free(), there is no user of this entry and no race. + * Then, we don't need lock around "exchange". + * + * TODO: we can push these buffers out to HIGHMEM. + */ + +/* + * allocate buffer for swap_cgroup. + */ +static int swap_cgroup_prepare(int type) +{ + struct page *page; + struct swap_cgroup_ctrl *ctrl; + unsigned long idx, max; + + if (!do_swap_account) + return 0; + ctrl = &swap_cgroup_ctrl[type]; + + for (idx = 0; idx < ctrl->length; idx++) { + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto not_enough_page; + ctrl->map[idx] = page; + } + return 0; +not_enough_page: + max = idx; + for (idx = 0; idx < max; idx++) + __free_page(ctrl->map[idx]); + + return -ENOMEM; +} + +/** + * swap_cgroup_record - record mem_cgroup for this swp_entry. + * @ent: swap entry to be recorded into + * @mem: mem_cgroup to be recorded + * + * Returns old value at success, NULL at failure. + * (Of course, old value can be NULL.) + */ +struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) +{ + int type = swp_type(ent); + unsigned long offset = swp_offset(ent); + unsigned long idx = offset / SC_PER_PAGE; + unsigned long pos = offset & SC_POS_MASK; + struct swap_cgroup_ctrl *ctrl; + struct page *mappage; + struct swap_cgroup *sc; + struct mem_cgroup *old; + + if (!do_swap_account) + return NULL; + + ctrl = &swap_cgroup_ctrl[type]; + + mappage = ctrl->map[idx]; + sc = page_address(mappage); + sc += pos; + old = sc->val; + sc->val = mem; + + return old; +} + +/** + * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry + * @ent: swap entry to be looked up. + * + * Returns pointer to mem_cgroup at success. NULL at failure. + */ +struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) +{ + int type = swp_type(ent); + unsigned long offset = swp_offset(ent); + unsigned long idx = offset / SC_PER_PAGE; + unsigned long pos = offset & SC_POS_MASK; + struct swap_cgroup_ctrl *ctrl; + struct page *mappage; + struct swap_cgroup *sc; + struct mem_cgroup *ret; + + if (!do_swap_account) + return NULL; + + ctrl = &swap_cgroup_ctrl[type]; + mappage = ctrl->map[idx]; + sc = page_address(mappage); + sc += pos; + ret = sc->val; + return ret; +} + +int swap_cgroup_swapon(int type, unsigned long max_pages) +{ + void *array; + unsigned long array_size; + unsigned long length; + struct swap_cgroup_ctrl *ctrl; + + if (!do_swap_account) + return 0; + + length = ((max_pages/SC_PER_PAGE) + 1); + array_size = length * sizeof(void *); + + array = vmalloc(array_size); + if (!array) + goto nomem; + + memset(array, 0, array_size); + ctrl = &swap_cgroup_ctrl[type]; + mutex_lock(&swap_cgroup_mutex); + ctrl->length = length; + ctrl->map = array; + if (swap_cgroup_prepare(type)) { + /* memory shortage */ + ctrl->map = NULL; + ctrl->length = 0; + vfree(array); + mutex_unlock(&swap_cgroup_mutex); + goto nomem; + } + mutex_unlock(&swap_cgroup_mutex); + + printk(KERN_INFO + "swap_cgroup: uses %ld bytes of vmalloc for pointer array space" + " and %ld bytes to hold mem_cgroup pointers on swap\n", + array_size, length * PAGE_SIZE); + printk(KERN_INFO + "swap_cgroup can be disabled by noswapaccount boot option.\n"); + + return 0; +nomem: + printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n"); + printk(KERN_INFO + "swap_cgroup can be disabled by noswapaccount boot option\n"); + return -ENOMEM; +} + +void swap_cgroup_swapoff(int type) +{ + int i; + struct swap_cgroup_ctrl *ctrl; + + if (!do_swap_account) + return; + + mutex_lock(&swap_cgroup_mutex); + ctrl = &swap_cgroup_ctrl[type]; + if (ctrl->map) { + for (i = 0; i < ctrl->length; i++) { + struct page *page = ctrl->map[i]; + if (page) + __free_page(page); + } + vfree(ctrl->map); + ctrl->map = NULL; + ctrl->length = 0; + } + mutex_unlock(&swap_cgroup_mutex); +} + +#endif -- cgit v1.2.3