diff options
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 132 |
1 files changed, 99 insertions, 33 deletions
diff --git a/mm/slab.c b/mm/slab.c index d66c2b0d971..d0bd7f07ab0 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -789,6 +789,47 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, char * dump_stack(); } +#ifdef CONFIG_NUMA +/* + * Special reaping functions for NUMA systems called from cache_reap(). + * These take care of doing round robin flushing of alien caches (containing + * objects freed on different nodes from which they were allocated) and the + * flushing of remote pcps by calling drain_node_pages. + */ +static DEFINE_PER_CPU(unsigned long, reap_node); + +static void init_reap_node(int cpu) +{ + int node; + + node = next_node(cpu_to_node(cpu), node_online_map); + if (node == MAX_NUMNODES) + node = 0; + + __get_cpu_var(reap_node) = node; +} + +static void next_reap_node(void) +{ + int node = __get_cpu_var(reap_node); + + /* + * Also drain per cpu pages on remote zones + */ + if (node != numa_node_id()) + drain_node_pages(node); + + node = next_node(node, node_online_map); + if (unlikely(node >= MAX_NUMNODES)) + node = first_node(node_online_map); + __get_cpu_var(reap_node) = node; +} + +#else +#define init_reap_node(cpu) do { } while (0) +#define next_reap_node(void) do { } while (0) +#endif + /* * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz * via the workqueue/eventd. @@ -806,6 +847,7 @@ static void __devinit start_cpu_timer(int cpu) * at that time. */ if (keventd_up() && reap_work->func == NULL) { + init_reap_node(cpu); INIT_WORK(reap_work, cache_reap, NULL); schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); } @@ -884,6 +926,23 @@ static void __drain_alien_cache(struct kmem_cache *cachep, } } +/* + * Called from cache_reap() to regularly drain alien caches round robin. + */ +static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) +{ + int node = __get_cpu_var(reap_node); + + if (l3->alien) { + struct array_cache *ac = l3->alien[node]; + if (ac && ac->avail) { + spin_lock_irq(&ac->lock); + __drain_alien_cache(cachep, ac, node); + spin_unlock_irq(&ac->lock); + } + } +} + static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) { int i = 0; @@ -902,6 +961,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **al #else #define drain_alien_cache(cachep, alien) do { } while (0) +#define reap_alien(cachep, l3) do { } while (0) static inline struct array_cache **alloc_alien_cache(int node, int limit) { @@ -1124,6 +1184,7 @@ void __init kmem_cache_init(void) struct cache_sizes *sizes; struct cache_names *names; int i; + int order; for (i = 0; i < NUM_INIT_LISTS; i++) { kmem_list3_init(&initkmem_list3[i]); @@ -1167,11 +1228,15 @@ void __init kmem_cache_init(void) cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); - cache_estimate(0, cache_cache.buffer_size, cache_line_size(), 0, - &left_over, &cache_cache.num); + for (order = 0; order < MAX_ORDER; order++) { + cache_estimate(order, cache_cache.buffer_size, + cache_line_size(), 0, &left_over, &cache_cache.num); + if (cache_cache.num) + break; + } if (!cache_cache.num) BUG(); - + cache_cache.gfporder = order; cache_cache.colour = left_over / cache_cache.colour_off; cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + sizeof(struct slab), cache_line_size()); @@ -1628,36 +1693,44 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, size_t size, size_t align, unsigned long flags) { size_t left_over = 0; + int gfporder; - for (;; cachep->gfporder++) { + for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) { unsigned int num; size_t remainder; - if (cachep->gfporder > MAX_GFP_ORDER) { - cachep->num = 0; - break; - } - - cache_estimate(cachep->gfporder, size, align, flags, - &remainder, &num); + cache_estimate(gfporder, size, align, flags, &remainder, &num); if (!num) continue; + /* More than offslab_limit objects will cause problems */ - if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) + if ((flags & CFLGS_OFF_SLAB) && num > offslab_limit) break; + /* Found something acceptable - save it away */ cachep->num = num; + cachep->gfporder = gfporder; left_over = remainder; /* + * A VFS-reclaimable slab tends to have most allocations + * as GFP_NOFS and we really don't want to have to be allocating + * higher-order pages when we are unable to shrink dcache. + */ + if (flags & SLAB_RECLAIM_ACCOUNT) + break; + + /* * Large number of objects is good, but very large slabs are * currently bad for the gfp()s. */ - if (cachep->gfporder >= slab_break_gfp_order) + if (gfporder >= slab_break_gfp_order) break; - if ((left_over * 8) <= (PAGE_SIZE << cachep->gfporder)) - /* Acceptable internal fragmentation */ + /* + * Acceptable internal fragmentation? + */ + if ((left_over * 8) <= (PAGE_SIZE << gfporder)) break; } return left_over; @@ -1717,6 +1790,12 @@ kmem_cache_create (const char *name, size_t size, size_t align, BUG(); } + /* + * Prevent CPUs from coming and going. + * lock_cpu_hotplug() nests outside cache_chain_mutex + */ + lock_cpu_hotplug(); + mutex_lock(&cache_chain_mutex); list_for_each(p, &cache_chain) { @@ -1863,17 +1942,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, size = ALIGN(size, align); - if ((flags & SLAB_RECLAIM_ACCOUNT) && size <= PAGE_SIZE) { - /* - * A VFS-reclaimable slab tends to have most allocations - * as GFP_NOFS and we really don't want to have to be allocating - * higher-order pages when we are unable to shrink dcache. - */ - cachep->gfporder = 0; - cache_estimate(cachep->gfporder, size, align, flags, - &left_over, &cachep->num); - } else - left_over = calculate_slab_order(cachep, size, align, flags); + left_over = calculate_slab_order(cachep, size, align, flags); if (!cachep->num) { printk("kmem_cache_create: couldn't create cache %s.\n", name); @@ -1918,8 +1987,6 @@ kmem_cache_create (const char *name, size_t size, size_t align, cachep->dtor = dtor; cachep->name = name; - /* Don't let CPUs to come and go */ - lock_cpu_hotplug(); if (g_cpucache_up == FULL) { enable_cpucache(cachep); @@ -1978,12 +2045,12 @@ kmem_cache_create (const char *name, size_t size, size_t align, /* cache setup completed, link it into the list */ list_add(&cachep->next, &cache_chain); - unlock_cpu_hotplug(); oops: if (!cachep && (flags & SLAB_PANIC)) panic("kmem_cache_create(): failed to create slab `%s'\n", name); mutex_unlock(&cache_chain_mutex); + unlock_cpu_hotplug(); return cachep; } EXPORT_SYMBOL(kmem_cache_create); @@ -2550,7 +2617,7 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", cachep->name, cachep->num, slabp, slabp->inuse); for (i = 0; - i < sizeof(slabp) + cachep->num * sizeof(kmem_bufctl_t); + i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); i++) { if ((i % 16) == 0) printk("\n%03x:", i); @@ -3490,8 +3557,7 @@ static void cache_reap(void *unused) check_irq_on(); l3 = searchp->nodelists[numa_node_id()]; - if (l3->alien) - drain_alien_cache(searchp, l3->alien); + reap_alien(searchp, l3); spin_lock_irq(&l3->list_lock); drain_array_locked(searchp, cpu_cache_get(searchp), 0, @@ -3541,7 +3607,7 @@ static void cache_reap(void *unused) } check_irq_on(); mutex_unlock(&cache_chain_mutex); - drain_remote_pages(); + next_reap_node(); /* Setup the next iteration */ schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); } |