aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-03-05 21:49:35 +0100
committerIngo Molnar <mingo@elte.hu>2009-03-05 21:49:35 +0100
commit28e93a005b65cc5b4f569642e9c7903618ea5fe1 (patch)
tree3c98b8dc457b05b290ad640c413e453a264739bf /arch/x86/mm
parentcaab36b593b44c97e3c7707c6a8054b320f8d622 (diff)
parented26dbe5ae045e5bf95c6dc27497397a3fde52e1 (diff)
Merge branch 'x86/mm' into x86/core
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/highmem_32.c9
-rw-r--r--arch/x86/mm/init.c344
-rw-r--r--arch/x86/mm/init_32.c255
-rw-r--r--arch/x86/mm/init_64.c272
-rw-r--r--arch/x86/mm/ioremap.c14
-rw-r--r--arch/x86/mm/numa_32.c5
6 files changed, 418 insertions, 481 deletions
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 00f127c80b0..d11745334a6 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -158,7 +158,6 @@ EXPORT_SYMBOL(kunmap);
EXPORT_SYMBOL(kmap_atomic);
EXPORT_SYMBOL(kunmap_atomic);
-#ifdef CONFIG_NUMA
void __init set_highmem_pages_init(void)
{
struct zone *zone;
@@ -182,11 +181,3 @@ void __init set_highmem_pages_init(void)
}
totalram_pages += totalhigh_pages;
}
-#else
-void __init set_highmem_pages_init(void)
-{
- add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
-
- totalram_pages += totalhigh_pages;
-}
-#endif /* CONFIG_NUMA */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index ce6a722587d..6d63e3d1253 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1,8 +1,345 @@
+#include <linux/ioport.h>
#include <linux/swap.h>
+
#include <asm/cacheflush.h>
+#include <asm/e820.h>
+#include <asm/init.h>
#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/sections.h>
#include <asm/system.h>
+#include <asm/tlbflush.h>
+
+unsigned long __initdata e820_table_start;
+unsigned long __meminitdata e820_table_end;
+unsigned long __meminitdata e820_table_top;
+
+int after_bootmem;
+
+int direct_gbpages
+#ifdef CONFIG_DIRECT_GBPAGES
+ = 1
+#endif
+;
+
+static void __init find_early_table_space(unsigned long end, int use_pse,
+ int use_gbpages)
+{
+ unsigned long puds, pmds, ptes, tables, start;
+
+ puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
+ tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
+
+ if (use_gbpages) {
+ unsigned long extra;
+
+ extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
+ pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
+ } else
+ pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+
+ tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
+
+ if (use_pse) {
+ unsigned long extra;
+
+ extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
+#ifdef CONFIG_X86_32
+ extra += PMD_SIZE;
+#endif
+ ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ } else
+ ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+ tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
+
+#ifdef CONFIG_X86_32
+ /* for fixmap */
+ tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
+#endif
+
+ /*
+ * RED-PEN putting page tables only on node 0 could
+ * cause a hotspot and fill up ZONE_DMA. The page tables
+ * need roughly 0.5KB per GB.
+ */
+#ifdef CONFIG_X86_32
+ start = 0x7000;
+ e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
+ tables, PAGE_SIZE);
+#else /* CONFIG_X86_64 */
+ start = 0x8000;
+ e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE);
+#endif
+ if (e820_table_start == -1UL)
+ panic("Cannot find space for the kernel page tables");
+
+ e820_table_start >>= PAGE_SHIFT;
+ e820_table_end = e820_table_start;
+ e820_table_top = e820_table_start + (tables >> PAGE_SHIFT);
+
+ printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
+ end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT);
+}
+
+struct map_range {
+ unsigned long start;
+ unsigned long end;
+ unsigned page_size_mask;
+};
+
+#ifdef CONFIG_X86_32
+#define NR_RANGE_MR 3
+#else /* CONFIG_X86_64 */
+#define NR_RANGE_MR 5
+#endif
+
+static int save_mr(struct map_range *mr, int nr_range,
+ unsigned long start_pfn, unsigned long end_pfn,
+ unsigned long page_size_mask)
+{
+ if (start_pfn < end_pfn) {
+ if (nr_range >= NR_RANGE_MR)
+ panic("run out of range for init_memory_mapping\n");
+ mr[nr_range].start = start_pfn<<PAGE_SHIFT;
+ mr[nr_range].end = end_pfn<<PAGE_SHIFT;
+ mr[nr_range].page_size_mask = page_size_mask;
+ nr_range++;
+ }
+
+ return nr_range;
+}
+
+#ifdef CONFIG_X86_64
+static void __init init_gbpages(void)
+{
+ if (direct_gbpages && cpu_has_gbpages)
+ printk(KERN_INFO "Using GB pages for direct mapping\n");
+ else
+ direct_gbpages = 0;
+}
+#else
+static inline void init_gbpages(void)
+{
+}
+#endif
+
+/*
+ * Setup the direct mapping of the physical memory at PAGE_OFFSET.
+ * This runs before bootmem is initialized and gets pages directly from
+ * the physical memory. To access them they are temporarily mapped.
+ */
+unsigned long __init_refok init_memory_mapping(unsigned long start,
+ unsigned long end)
+{
+ unsigned long page_size_mask = 0;
+ unsigned long start_pfn, end_pfn;
+ unsigned long pos;
+ unsigned long ret;
+
+ struct map_range mr[NR_RANGE_MR];
+ int nr_range, i;
+ int use_pse, use_gbpages;
+
+ printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
+
+ if (!after_bootmem)
+ init_gbpages();
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /*
+ * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
+ * This will simplify cpa(), which otherwise needs to support splitting
+ * large pages into small in interrupt context, etc.
+ */
+ use_pse = use_gbpages = 0;
+#else
+ use_pse = cpu_has_pse;
+ use_gbpages = direct_gbpages;
+#endif
+
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_PAE
+ set_nx();
+ if (nx_enabled)
+ printk(KERN_INFO "NX (Execute Disable) protection: active\n");
+#endif
+
+ /* Enable PSE if available */
+ if (cpu_has_pse)
+ set_in_cr4(X86_CR4_PSE);
+
+ /* Enable PGE if available */
+ if (cpu_has_pge) {
+ set_in_cr4(X86_CR4_PGE);
+ __supported_pte_mask |= _PAGE_GLOBAL;
+ }
+#endif
+
+ if (use_gbpages)
+ page_size_mask |= 1 << PG_LEVEL_1G;
+ if (use_pse)
+ page_size_mask |= 1 << PG_LEVEL_2M;
+
+ memset(mr, 0, sizeof(mr));
+ nr_range = 0;
+
+ /* head if not big page alignment ? */
+ start_pfn = start >> PAGE_SHIFT;
+ pos = start_pfn << PAGE_SHIFT;
+#ifdef CONFIG_X86_32
+ /*
+ * Don't use a large page for the first 2/4MB of memory
+ * because there are often fixed size MTRRs in there
+ * and overlapping MTRRs into large pages can cause
+ * slowdowns.
+ */
+ if (pos == 0)
+ end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT);
+ else
+ end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
+ << (PMD_SHIFT - PAGE_SHIFT);
+#else /* CONFIG_X86_64 */
+ end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
+ << (PMD_SHIFT - PAGE_SHIFT);
+#endif
+ if (end_pfn > (end >> PAGE_SHIFT))
+ end_pfn = end >> PAGE_SHIFT;
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+ pos = end_pfn << PAGE_SHIFT;
+ }
+
+ /* big page (2M) range */
+ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
+ << (PMD_SHIFT - PAGE_SHIFT);
+#ifdef CONFIG_X86_32
+ end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
+#else /* CONFIG_X86_64 */
+ end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
+ << (PUD_SHIFT - PAGE_SHIFT);
+ if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
+ end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
+#endif
+
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+ page_size_mask & (1<<PG_LEVEL_2M));
+ pos = end_pfn << PAGE_SHIFT;
+ }
+
+#ifdef CONFIG_X86_64
+ /* big page (1G) range */
+ start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
+ << (PUD_SHIFT - PAGE_SHIFT);
+ end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+ page_size_mask &
+ ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
+ pos = end_pfn << PAGE_SHIFT;
+ }
+
+ /* tail is not big page (1G) alignment */
+ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
+ << (PMD_SHIFT - PAGE_SHIFT);
+ end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+ page_size_mask & (1<<PG_LEVEL_2M));
+ pos = end_pfn << PAGE_SHIFT;
+ }
+#endif
+
+ /* tail is not big page (2M) alignment */
+ start_pfn = pos>>PAGE_SHIFT;
+ end_pfn = end>>PAGE_SHIFT;
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+
+ /* try to merge same page size and continuous */
+ for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
+ unsigned long old_start;
+ if (mr[i].end != mr[i+1].start ||
+ mr[i].page_size_mask != mr[i+1].page_size_mask)
+ continue;
+ /* move it */
+ old_start = mr[i].start;
+ memmove(&mr[i], &mr[i+1],
+ (nr_range - 1 - i) * sizeof(struct map_range));
+ mr[i--].start = old_start;
+ nr_range--;
+ }
+
+ for (i = 0; i < nr_range; i++)
+ printk(KERN_DEBUG " %010lx - %010lx page %s\n",
+ mr[i].start, mr[i].end,
+ (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
+ (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
+
+ /*
+ * Find space for the kernel direct mapping tables.
+ *
+ * Later we should allocate these tables in the local node of the
+ * memory mapped. Unfortunately this is done currently before the
+ * nodes are discovered.
+ */
+ if (!after_bootmem)
+ find_early_table_space(end, use_pse, use_gbpages);
+
+#ifdef CONFIG_X86_32
+ for (i = 0; i < nr_range; i++)
+ kernel_physical_mapping_init(mr[i].start, mr[i].end,
+ mr[i].page_size_mask);
+ ret = end;
+#else /* CONFIG_X86_64 */
+ for (i = 0; i < nr_range; i++)
+ ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
+ mr[i].page_size_mask);
+#endif
+
+#ifdef CONFIG_X86_32
+ early_ioremap_page_table_range_init();
+
+ load_cr3(swapper_pg_dir);
+#endif
+
+#ifdef CONFIG_X86_64
+ if (!after_bootmem)
+ mmu_cr4_features = read_cr4();
+#endif
+ __flush_tlb_all();
+
+ if (!after_bootmem && e820_table_end > e820_table_start)
+ reserve_early(e820_table_start << PAGE_SHIFT,
+ e820_table_end << PAGE_SHIFT, "PGTABLE");
+
+ if (!after_bootmem)
+ early_memtest(start, end);
+
+ return ret >> PAGE_SHIFT;
+}
+
+
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.
+ *
+ *
+ * On x86, access has to be given to the first megabyte of ram because that area
+ * contains bios code and data regions used by X and dosemu and similar apps.
+ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
+ * mmio resources as well as potential bios/acpi data regions.
+ */
+int devmem_is_allowed(unsigned long pagenr)
+{
+ if (pagenr <= 256)
+ return 1;
+ if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
+ return 0;
+ if (!page_is_ram(pagenr))
+ return 1;
+ return 0;
+}
void free_init_pages(char *what, unsigned long begin, unsigned long end)
{
@@ -47,3 +384,10 @@ void free_initmem(void)
(unsigned long)(&__init_begin),
(unsigned long)(&__init_end));
}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+ free_init_pages("initrd memory", start, end);
+}
+#endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 47df0e1bbeb..2966c6b8d30 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,6 +49,7 @@
#include <asm/paravirt.h>
#include <asm/setup.h>
#include <asm/cacheflush.h>
+#include <asm/init.h>
unsigned long max_low_pfn_mapped;
unsigned long max_pfn_mapped;
@@ -58,19 +59,14 @@ unsigned long highstart_pfn, highend_pfn;
static noinline int do_test_wp_bit(void);
-
-static unsigned long __initdata table_start;
-static unsigned long __meminitdata table_end;
-static unsigned long __meminitdata table_top;
-
-static int __initdata after_init_bootmem;
+bool __read_mostly __vmalloc_start_set = false;
static __init void *alloc_low_page(void)
{
- unsigned long pfn = table_end++;
+ unsigned long pfn = e820_table_end++;
void *adr;
- if (pfn >= table_top)
+ if (pfn >= e820_table_top)
panic("alloc_low_page: ran out of memory");
adr = __va(pfn * PAGE_SIZE);
@@ -90,7 +86,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
#ifdef CONFIG_X86_PAE
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
- if (after_init_bootmem)
+ if (after_bootmem)
pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
else
pmd_table = (pmd_t *)alloc_low_page();
@@ -117,7 +113,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
pte_t *page_table = NULL;
- if (after_init_bootmem) {
+ if (after_bootmem) {
#ifdef CONFIG_DEBUG_PAGEALLOC
page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
#endif
@@ -168,12 +164,12 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
if (pmd_idx_kmap_begin != pmd_idx_kmap_end
&& (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
&& (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
- && ((__pa(pte) >> PAGE_SHIFT) < table_start
- || (__pa(pte) >> PAGE_SHIFT) >= table_end)) {
+ && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start
+ || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) {
pte_t *newpte;
int i;
- BUG_ON(after_init_bootmem);
+ BUG_ON(after_bootmem);
newpte = alloc_low_page();
for (i = 0; i < PTRS_PER_PTE; i++)
set_pte(newpte + i, pte[i]);
@@ -242,11 +238,14 @@ static inline int is_kernel_text(unsigned long addr)
* of max_low_pfn pages, by creating page tables starting from address
* PAGE_OFFSET:
*/
-static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
- unsigned long start_pfn,
- unsigned long end_pfn,
- int use_pse)
+unsigned long __init
+kernel_physical_mapping_init(unsigned long start,
+ unsigned long end,
+ unsigned long page_size_mask)
{
+ int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
+ unsigned long start_pfn, end_pfn;
+ pgd_t *pgd_base = swapper_pg_dir;
int pgd_idx, pmd_idx, pte_ofs;
unsigned long pfn;
pgd_t *pgd;
@@ -255,6 +254,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
unsigned pages_2m, pages_4k;
int mapping_iter;
+ start_pfn = start >> PAGE_SHIFT;
+ end_pfn = end >> PAGE_SHIFT;
+
/*
* First iteration will setup identity mapping using large/small pages
* based on use_pse, with other attributes same as set by
@@ -369,26 +371,6 @@ repeat:
mapping_iter = 2;
goto repeat;
}
-}
-
-/*
- * devmem_is_allowed() checks to see if /dev/mem access to a certain address
- * is valid. The argument is a physical page number.
- *
- *
- * On x86, access has to be given to the first megabyte of ram because that area
- * contains bios code and data regions used by X and dosemu and similar apps.
- * Access has to be given to non-kernel-ram areas as well, these contain the PCI
- * mmio resources as well as potential bios/acpi data regions.
- */
-int devmem_is_allowed(unsigned long pagenr)
-{
- if (pagenr <= 256)
- return 1;
- if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
- return 0;
- if (!page_is_ram(pagenr))
- return 1;
return 0;
}
@@ -545,8 +527,9 @@ void __init native_pagetable_setup_done(pgd_t *base)
* be partially populated, and so it avoids stomping on any existing
* mappings.
*/
-static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base)
+void __init early_ioremap_page_table_range_init(void)
{
+ pgd_t *pgd_base = swapper_pg_dir;
unsigned long vaddr, end;
/*
@@ -641,7 +624,7 @@ static int __init noexec_setup(char *str)
}
early_param("noexec", noexec_setup);
-static void __init set_nx(void)
+void __init set_nx(void)
{
unsigned int v[4], l, h;
@@ -793,6 +776,8 @@ void __init initmem_init(unsigned long start_pfn,
#ifdef CONFIG_FLATMEM
max_mapnr = num_physpages;
#endif
+ __vmalloc_start_set = true;
+
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
pages_to_mb(max_low_pfn));
@@ -814,176 +799,61 @@ static void __init zone_sizes_init(void)
free_area_init_nodes(max_zone_pfns);
}
+static unsigned long __init setup_node_bootmem(int nodeid,
+ unsigned long start_pfn,
+ unsigned long end_pfn,
+ unsigned long bootmap)
+{
+ unsigned long bootmap_size;
+
+ if (start_pfn > max_low_pfn)
+ return bootmap;
+ if (end_pfn > max_low_pfn)
+ end_pfn = max_low_pfn;
+
+ /* don't touch min_low_pfn */
+ bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
+ bootmap >> PAGE_SHIFT,
+ start_pfn, end_pfn);
+ printk(KERN_INFO " node %d low ram: %08lx - %08lx\n",
+ nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+ printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
+ nodeid, bootmap, bootmap + bootmap_size);
+ free_bootmem_with_active_regions(nodeid, end_pfn);
+ early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+
+ return bootmap + bootmap_size;
+}
+
void __init setup_bootmem_allocator(void)
{
- int i;
+ int nodeid;
unsigned long bootmap_size, bootmap;
/*
* Initialize the boot-time allocator (with low memory only):
*/
bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
- bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
- max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
+ bootmap = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
PAGE_SIZE);
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
- /* don't touch min_low_pfn */
- bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
- min_low_pfn, max_low_pfn);
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
max_pfn_mapped<<PAGE_SHIFT);
- printk(KERN_INFO " low ram: %08lx - %08lx\n",
- min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
- printk(KERN_INFO " bootmap %08lx - %08lx\n",
- bootmap, bootmap + bootmap_size);
- for_each_online_node(i)
- free_bootmem_with_active_regions(i, max_low_pfn);
- early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
-
- after_init_bootmem = 1;
-}
-
-static void __init find_early_table_space(unsigned long end, int use_pse)
-{
- unsigned long puds, pmds, ptes, tables, start;
-
- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
- tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
-
- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
- tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
-
- if (use_pse) {
- unsigned long extra;
-
- extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
- extra += PMD_SIZE;
- ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
- } else
- ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
- tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
-
- /* for fixmap */
- tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
-
- /*
- * RED-PEN putting page tables only on node 0 could
- * cause a hotspot and fill up ZONE_DMA. The page tables
- * need roughly 0.5KB per GB.
- */
- start = 0x7000;
- table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
- tables, PAGE_SIZE);
- if (table_start == -1UL)
- panic("Cannot find space for the kernel page tables");
-
- table_start >>= PAGE_SHIFT;
- table_end = table_start;
- table_top = table_start + (tables>>PAGE_SHIFT);
-
- printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
- end, table_start << PAGE_SHIFT,
- (table_start << PAGE_SHIFT) + tables);
-}
-
-unsigned long __init_refok init_memory_mapping(unsigned long start,
- unsigned long end)
-{
- pgd_t *pgd_base = swapper_pg_dir;
- unsigned long start_pfn, end_pfn;
- unsigned long big_page_start;
-#ifdef CONFIG_DEBUG_PAGEALLOC
- /*
- * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
- * This will simplify cpa(), which otherwise needs to support splitting
- * large pages into small in interrupt context, etc.
- */
- int use_pse = 0;
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ for_each_online_node(nodeid)
+ bootmap = setup_node_bootmem(nodeid, node_start_pfn[nodeid],
+ node_end_pfn[nodeid], bootmap);
#else
- int use_pse = cpu_has_pse;
-#endif
-
- /*
- * Find space for the kernel direct mapping tables.
- */
- if (!after_init_bootmem)
- find_early_table_space(end, use_pse);
-
-#ifdef CONFIG_X86_PAE
- set_nx();
- if (nx_enabled)
- printk(KERN_INFO "NX (Execute Disable) protection: active\n");
+ bootmap = setup_node_bootmem(0, 0, max_low_pfn, bootmap);
#endif
- /* Enable PSE if available */
- if (cpu_has_pse)
- set_in_cr4(X86_CR4_PSE);
-
- /* Enable PGE if available */
- if (cpu_has_pge) {
- set_in_cr4(X86_CR4_PGE);
- __supported_pte_mask |= _PAGE_GLOBAL;
- }
-
- /*
- * Don't use a large page for the first 2/4MB of memory
- * because there are often fixed size MTRRs in there
- * and overlapping MTRRs into large pages can cause
- * slowdowns.
- */
- big_page_start = PMD_SIZE;
-
- if (start < big_page_start) {
- start_pfn = start >> PAGE_SHIFT;
- end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT);
- } else {
- /* head is not big page alignment ? */
- start_pfn = start >> PAGE_SHIFT;
- end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
- }
- if (start_pfn < end_pfn)
- kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0);
-
- /* big page range */
- start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
- if (start_pfn < (big_page_start >> PAGE_SHIFT))
- start_pfn = big_page_start >> PAGE_SHIFT;
- end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
- if (start_pfn < end_pfn)
- kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
- use_pse);
-
- /* tail is not big page alignment ? */
- start_pfn = end_pfn;
- if (start_pfn > (big_page_start>>PAGE_SHIFT)) {
- end_pfn = end >> PAGE_SHIFT;
- if (start_pfn < end_pfn)
- kernel_physical_mapping_init(pgd_base, start_pfn,
- end_pfn, 0);
- }
-
- early_ioremap_page_table_range_init(pgd_base);
-
- load_cr3(swapper_pg_dir);
-
- __flush_tlb_all();
-
- if (!after_init_bootmem)
- reserve_early(table_start << PAGE_SHIFT,
- table_end << PAGE_SHIFT, "PGTABLE");
-
- if (!after_init_bootmem)
- early_memtest(start, end);
-
- return end >> PAGE_SHIFT;
+ after_bootmem = 1;
}
-
/*
* paging_init() sets up the page tables - note that the first 8MB are
* already mapped by head.S.
@@ -1217,13 +1087,6 @@ void mark_rodata_ro(void)
}
#endif
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
- free_init_pages("initrd memory", start, end);
-}
-#endif
-
int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
int flags)
{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 07f44d491df..8a853bc3b28 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -48,6 +48,7 @@
#include <asm/kdebug.h>
#include <asm/numa.h>
#include <asm/cacheflush.h>
+#include <asm/init.h>
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -61,12 +62,6 @@ static unsigned long dma_reserve __initdata;
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-int direct_gbpages
-#ifdef CONFIG_DIRECT_GBPAGES
- = 1
-#endif
-;
-
static int __init parse_direct_gbpages_off(char *arg)
{
direct_gbpages = 0;
@@ -87,8 +82,6 @@ early_param("gbpages", parse_direct_gbpages_on);
* around without checking the pgd every time.
*/
-int after_bootmem;
-
pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
EXPORT_SYMBOL_GPL(__supported_pte_mask);
@@ -325,13 +318,9 @@ void __init cleanup_highmap(void)
}
}
-static unsigned long __initdata table_start;
-static unsigned long __meminitdata table_end;
-static unsigned long __meminitdata table_top;
-
static __ref void *alloc_low_page(unsigned long *phys)
{
- unsigned long pfn = table_end++;
+ unsigned long pfn = e820_table_end++;
void *adr;
if (after_bootmem) {
@@ -341,7 +330,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
return adr;
}
- if (pfn >= table_top)
+ if (pfn >= e820_table_top)
panic("alloc_low_page: ran out of memory");
adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
@@ -581,58 +570,10 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
return phys_pud_init(pud, addr, end, page_size_mask);
}
-static void __init find_early_table_space(unsigned long end, int use_pse,
- int use_gbpages)
-{
- unsigned long puds, pmds, ptes, tables, start;
-
- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
- tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
- if (use_gbpages) {
- unsigned long extra;
- extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
- pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
- } else
- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
- tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
-
- if (use_pse) {
- unsigned long extra;
- extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
- ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
- } else
- ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
- tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
-
- /*
- * RED-PEN putting page tables only on node 0 could
- * cause a hotspot and fill up ZONE_DMA. The page tables
- * need roughly 0.5KB per GB.
- */
- start = 0x8000;
- table_start = find_e820_area(start, end, tables, PAGE_SIZE);
- if (table_start == -1UL)
- panic("Cannot find space for the kernel page tables");
-
- table_start >>= PAGE_SHIFT;
- table_end = table_start;
- table_top = table_start + (tables >> PAGE_SHIFT);
-
- printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
- end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT);
-}
-
-static void __init init_gbpages(void)
-{
- if (direct_gbpages && cpu_has_gbpages)
- printk(KERN_INFO "Using GB pages for direct mapping\n");
- else
- direct_gbpages = 0;
-}
-
-static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
- unsigned long end,
- unsigned long page_size_mask)
+unsigned long __init
+kernel_physical_mapping_init(unsigned long start,
+ unsigned long end,
+ unsigned long page_size_mask)
{
unsigned long next, last_map_addr = end;
@@ -669,176 +610,6 @@ static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
return last_map_addr;
}
-struct map_range {
- unsigned long start;
- unsigned long end;
- unsigned page_size_mask;
-};
-
-#define NR_RANGE_MR 5
-
-static int save_mr(struct map_range *mr, int nr_range,
- unsigned long start_pfn, unsigned long end_pfn,
- unsigned long page_size_mask)
-{
-
- if (start_pfn < end_pfn) {
- if (nr_range >= NR_RANGE_MR)
- panic("run out of range for init_memory_mapping\n");
- mr[nr_range].start = start_pfn<<PAGE_SHIFT;
- mr[nr_range].end = end_pfn<<PAGE_SHIFT;
- mr[nr_range].page_size_mask = page_size_mask;
- nr_range++;
- }
-
- return nr_range;
-}
-
-/*
- * Setup the direct mapping of the physical memory at PAGE_OFFSET.
- * This runs before bootmem is initialized and gets pages directly from
- * the physical memory. To access them they are temporarily mapped.
- */
-unsigned long __init_refok init_memory_mapping(unsigned long start,
- unsigned long end)
-{
- unsigned long last_map_addr = 0;
- unsigned long page_size_mask = 0;
- unsigned long start_pfn, end_pfn;
- unsigned long pos;
-
- struct map_range mr[NR_RANGE_MR];
- int nr_range, i;
- int use_pse, use_gbpages;
-
- printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
-
- /*
- * Find space for the kernel direct mapping tables.
- *
- * Later we should allocate these tables in the local node of the
- * memory mapped. Unfortunately this is done currently before the
- * nodes are discovered.
- */
- if (!after_bootmem)
- init_gbpages();
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
- /*
- * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
- * This will simplify cpa(), which otherwise needs to support splitting
- * large pages into small in interrupt context, etc.
- */
- use_pse = use_gbpages = 0;
-#else
- use_pse = cpu_has_pse;
- use_gbpages = direct_gbpages;
-#endif
-
- if (use_gbpages)
- page_size_mask |= 1 << PG_LEVEL_1G;
- if (use_pse)
- page_size_mask |= 1 << PG_LEVEL_2M;
-
- memset(mr, 0, sizeof(mr));
- nr_range = 0;
-
- /* head if not big page alignment ?*/
- start_pfn = start >> PAGE_SHIFT;
- pos = start_pfn << PAGE_SHIFT;
- end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
- if (end_pfn > (end >> PAGE_SHIFT))
- end_pfn = end >> PAGE_SHIFT;
- if (start_pfn < end_pfn) {
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
- pos = end_pfn << PAGE_SHIFT;
- }
-
- /* big page (2M) range*/
- start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
- end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
- << (PUD_SHIFT - PAGE_SHIFT);
- if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
- end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
- if (start_pfn < end_pfn) {
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
- page_size_mask & (1<<PG_LEVEL_2M));
- pos = end_pfn << PAGE_SHIFT;
- }
-
- /* big page (1G) range */
- start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
- << (PUD_SHIFT - PAGE_SHIFT);
- end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
- if (start_pfn < end_pfn) {
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
- page_size_mask &
- ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
- pos = end_pfn << PAGE_SHIFT;
- }
-
- /* tail is not big page (1G) alignment */
- start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
- end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
- if (start_pfn < end_pfn) {
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
- page_size_mask & (1<<PG_LEVEL_2M));
- pos = end_pfn << PAGE_SHIFT;
- }
-
- /* tail is not big page (2M) alignment */
- start_pfn = pos>>PAGE_SHIFT;
- end_pfn = end>>PAGE_SHIFT;
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
-
- /* try to merge same page size and continuous */
- for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
- unsigned long old_start;
- if (mr[i].end != mr[i+1].start ||
- mr[i].page_size_mask != mr[i+1].page_size_mask)
- continue;
- /* move it */
- old_start = mr[i].start;
- memmove(&mr[i], &mr[i+1],
- (nr_range - 1 - i) * sizeof (struct map_range));
- mr[i--].start = old_start;
- nr_range--;
- }
-
- for (i = 0; i < nr_range; i++)
- printk(KERN_DEBUG " %010lx - %010lx page %s\n",
- mr[i].start, mr[i].end,
- (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
- (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
-
- if (!after_bootmem)
- find_early_table_space(end, use_pse, use_gbpages);
-
- for (i = 0; i < nr_range; i++)
- last_map_addr = kernel_physical_mapping_init(
- mr[i].start, mr[i].end,
- mr[i].page_size_mask);
-
- if (!after_bootmem)
- mmu_cr4_features = read_cr4();
- __flush_tlb_all();
-
- if (!after_bootmem && table_end > table_start)
- reserve_early(table_start << PAGE_SHIFT,
- table_end << PAGE_SHIFT, "PGTABLE");
-
- printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
- last_map_addr, end);
-
- if (!after_bootmem)
- early_memtest(start, end);
-
- return last_map_addr >> PAGE_SHIFT;
-}
-
#ifndef CONFIG_NUMA
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
{
@@ -910,28 +681,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif /* CONFIG_MEMORY_HOTPLUG */
-/*
- * devmem_is_allowed() checks to see if /dev/mem access to a certain address
- * is valid. The argument is a physical page number.
- *
- *
- * On x86, access has to be given to the first megabyte of ram because that area
- * contains bios code and data regions used by X and dosemu and similar apps.
- * Access has to be given to non-kernel-ram areas as well, these contain the PCI
- * mmio resources as well as potential bios/acpi data regions.
- */
-int devmem_is_allowed(unsigned long pagenr)
-{
- if (pagenr <= 256)
- return 1;
- if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
- return 0;
- if (!page_is_ram(pagenr))
- return 1;
- return 0;
-}
-
-
static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
kcore_modules, kcore_vsyscall;
@@ -1019,13 +768,6 @@ void mark_rodata_ro(void)
#endif
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
- free_init_pages("initrd memory", start, end);
-}
-#endif
-
int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
int flags)
{
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 433f7bd4648..62773abdf08 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -38,8 +38,7 @@ unsigned long __phys_addr(unsigned long x)
} else {
VIRTUAL_BUG_ON(x < PAGE_OFFSET);
x -= PAGE_OFFSET;
- VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM :
- !phys_addr_valid(x));
+ VIRTUAL_BUG_ON(!phys_addr_valid(x));
}
return x;
}
@@ -56,10 +55,8 @@ bool __virt_addr_valid(unsigned long x)
if (x < PAGE_OFFSET)
return false;
x -= PAGE_OFFSET;
- if (system_state == SYSTEM_BOOTING ?
- x > MAXMEM : !phys_addr_valid(x)) {
+ if (!phys_addr_valid(x))
return false;
- }
}
return pfn_valid(x >> PAGE_SHIFT);
@@ -76,10 +73,9 @@ static inline int phys_addr_valid(unsigned long addr)
#ifdef CONFIG_DEBUG_VIRTUAL
unsigned long __phys_addr(unsigned long x)
{
- /* VMALLOC_* aren't constants; not available at the boot time */
+ /* VMALLOC_* aren't constants */
VIRTUAL_BUG_ON(x < PAGE_OFFSET);
- VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING &&
- is_vmalloc_addr((void *) x));
+ VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
return x - PAGE_OFFSET;
}
EXPORT_SYMBOL(__phys_addr);
@@ -89,7 +85,7 @@ bool __virt_addr_valid(unsigned long x)
{
if (x < PAGE_OFFSET)
return false;
- if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x))
+ if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
return false;
return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
}
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 451fe95a035..3daefa04ace 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -416,10 +416,11 @@ void __init initmem_init(unsigned long start_pfn,
for_each_online_node(nid)
propagate_e820_map_node(nid);
- for_each_online_node(nid)
+ for_each_online_node(nid) {
memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+ NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
+ }
- NODE_DATA(0)->bdata = &bootmem_node_data[0];
setup_bootmem_allocator();
}