From 54a6eb5c4765aa573a030ceeba2c14e3d2ea5706 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 28 Apr 2008 02:12:16 -0700 Subject: mm: use two zonelist that are filtered by GFP mask Currently a node has two sets of zonelists, one for each zone type in the system and a second set for GFP_THISNODE allocations. Based on the zones allowed by a gfp mask, one of these zonelists is selected. All of these zonelists consume memory and occupy cache lines. This patch replaces the multiple zonelists per-node with two zonelists. The first contains all populated zones in the system, ordered by distance, for fallback allocations when the target/preferred node has no free pages. The second contains all populated zones in the node suitable for GFP_THISNODE allocations. An iterator macro is introduced called for_each_zone_zonelist() that interates through each zone allowed by the GFP flags in the selected zonelist. Signed-off-by: Mel Gorman Acked-by: Christoph Lameter Signed-off-by: Lee Schermerhorn Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Christoph Lameter Cc: Hugh Dickins Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 13 ++++++++-- include/linux/mmzone.h | 65 +++++++++++++++++++++++++++++++++----------------- 2 files changed, 54 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e865d51f1c7..e1c6064cb6c 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -151,17 +151,26 @@ static inline enum zone_type gfp_zone(gfp_t flags) * virtual kernel addresses to the allocated page(s). */ +static inline int gfp_zonelist(gfp_t flags) +{ + if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE)) + return 1; + + return 0; +} + /* * We get the zone list from the current node and the gfp_mask. * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. - * There are many zonelists per node, two for each active zone. + * There are two zonelists per node, one for all zones with memory and + * one containing just zones from the node the zonelist belongs to. * * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets * optimized to &contig_page_data at compile-time. */ static inline struct zonelist *node_zonelist(int nid, gfp_t flags) { - return NODE_DATA(nid)->node_zonelists + gfp_zone(flags); + return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags); } #ifndef HAVE_ARCH_FREE_PAGE diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 451eaa13bc2..d5c33a0b89e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -393,10 +393,10 @@ static inline int zone_is_oom_locked(const struct zone *zone) * The NUMA zonelists are doubled becausse we need zonelists that restrict the * allocations to a single node for GFP_THISNODE. * - * [0 .. MAX_NR_ZONES -1] : Zonelists with fallback - * [MAZ_NR_ZONES ... MAZ_ZONELISTS -1] : No fallback (GFP_THISNODE) + * [0] : Zonelist with fallback + * [1] : No fallback (GFP_THISNODE) */ -#define MAX_ZONELISTS (2 * MAX_NR_ZONES) +#define MAX_ZONELISTS 2 /* @@ -464,7 +464,7 @@ struct zonelist_cache { unsigned long last_full_zap; /* when last zap'd (jiffies) */ }; #else -#define MAX_ZONELISTS MAX_NR_ZONES +#define MAX_ZONELISTS 1 struct zonelist_cache; #endif @@ -486,24 +486,6 @@ struct zonelist { #endif }; -#ifdef CONFIG_NUMA -/* - * Only custom zonelists like MPOL_BIND need to be filtered as part of - * policies. As described in the comment for struct zonelist_cache, these - * zonelists will not have a zlcache so zlcache_ptr will not be set. Use - * that to determine if the zonelists needs to be filtered or not. - */ -static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) -{ - return !zonelist->zlcache_ptr; -} -#else -static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) -{ - return 0; -} -#endif /* CONFIG_NUMA */ - #ifdef CONFIG_ARCH_POPULATES_NODE_MAP struct node_active_region { unsigned long start_pfn; @@ -731,6 +713,45 @@ extern struct zone *next_zone(struct zone *zone); zone; \ zone = next_zone(zone)) +/* Returns the first zone at or below highest_zoneidx in a zonelist */ +static inline struct zone **first_zones_zonelist(struct zonelist *zonelist, + enum zone_type highest_zoneidx) +{ + struct zone **z; + + /* Find the first suitable zone to use for the allocation */ + z = zonelist->zones; + while (*z && zone_idx(*z) > highest_zoneidx) + z++; + + return z; +} + +/* Returns the next zone at or below highest_zoneidx in a zonelist */ +static inline struct zone **next_zones_zonelist(struct zone **z, + enum zone_type highest_zoneidx) +{ + /* Find the next suitable zone to use for the allocation */ + while (*z && zone_idx(*z) > highest_zoneidx) + z++; + + return z; +} + +/** + * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index + * @zone - The current zone in the iterator + * @z - The current pointer within zonelist->zones being iterated + * @zlist - The zonelist being iterated + * @highidx - The zone index of the highest zone to return + * + * This iterator iterates though all zones at or below a given zone index. + */ +#define for_each_zone_zonelist(zone, z, zlist, highidx) \ + for (z = first_zones_zonelist(zlist, highidx), zone = *z++; \ + zone; \ + z = next_zones_zonelist(z, highidx), zone = *z++) + #ifdef CONFIG_SPARSEMEM #include #endif -- cgit v1.2.3