diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/discontig_32.c | 54 | ||||
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 25 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 18 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 26 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 21 | ||||
-rw-r--r-- | arch/x86/mm/k8topology_64.c | 13 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 41 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 392 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 30 |
11 files changed, 262 insertions, 362 deletions
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 914ccf98368..8b4eac0ca07 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -156,7 +156,7 @@ static void __init propagate_e820_map_node(int nid) */ static void __init allocate_pgdat(int nid) { - if (nid && node_has_online_mem(nid)) + if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid]) NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; else { NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); @@ -164,16 +164,13 @@ static void __init allocate_pgdat(int nid) } } -#ifdef CONFIG_DISCONTIGMEM /* - * In the discontig memory model, a portion of the kernel virtual area (KVA) - * is reserved and portions of nodes are mapped using it. This is to allow - * node-local memory to be allocated for structures that would normally require - * ZONE_NORMAL. The memory is allocated with alloc_remap() and callers - * should be prepared to allocate from the bootmem allocator instead. This KVA - * mechanism is incompatible with SPARSEMEM as it makes assumptions about the - * layout of memory that are broken if alloc_remap() succeeds for some of the - * map and fails for others + * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel + * virtual address space (KVA) is reserved and portions of nodes are mapped + * using it. This is to allow node-local memory to be allocated for + * structures that would normally require ZONE_NORMAL. The memory is + * allocated with alloc_remap() and callers should be prepared to allocate + * from the bootmem allocator instead. */ static unsigned long node_remap_start_pfn[MAX_NUMNODES]; static void *node_remap_end_vaddr[MAX_NUMNODES]; @@ -290,25 +287,6 @@ static void init_remap_allocator(int nid) (ulong) pfn_to_kaddr(highstart_pfn + node_remap_offset[nid] + node_remap_size[nid])); } -#else -void *alloc_remap(int nid, unsigned long size) -{ - return NULL; -} - -static unsigned long calculate_numa_remap_pages(void) -{ - return 0; -} - -static void init_remap_allocator(int nid) -{ -} - -void __init remap_numa_kva(void) -{ -} -#endif /* CONFIG_DISCONTIGMEM */ extern void setup_bootmem_allocator(void); unsigned long __init setup_memory(void) @@ -476,3 +454,21 @@ int memory_add_physaddr_to_nid(u64 addr) EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif + +#if defined(CONFIG_ACPI_NUMA) && !defined(CONFIG_HAVE_ARCH_PARSE_SRAT) +/* + * Dummy on 32-bit, for now: + */ +void __init acpi_numa_slit_init(struct acpi_table_slit *slit) +{ +} + +void __init +acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) +{ +} + +void __init acpi_numa_arch_fixup(void) +{ +} +#endif diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 2c24bea92c6..0bb0caed897 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -42,7 +42,7 @@ static struct addr_marker address_markers[] = { { 0, "User Space" }, #ifdef CONFIG_X86_64 { 0x8000000000000000UL, "Kernel Space" }, - { 0xffff810000000000UL, "Low Kernel Mapping" }, + { PAGE_OFFSET, "Low Kernel Mapping" }, { VMALLOC_START, "vmalloc() Area" }, { VMEMMAP_START, "Vmemmap" }, { __START_KERNEL_map, "High Kernel Mapping" }, diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fd7e1798c75..578b7681955 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -55,11 +55,7 @@ static inline int notify_page_fault(struct pt_regs *regs) int ret = 0; /* kprobe_running() needs smp_processor_id() */ -#ifdef CONFIG_X86_32 if (!user_mode_vm(regs)) { -#else - if (!user_mode(regs)) { -#endif preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 14)) ret = 1; @@ -396,11 +392,7 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, printk(KERN_CONT "NULL pointer dereference"); else printk(KERN_CONT "paging request"); -#ifdef CONFIG_X86_32 - printk(KERN_CONT " at %08lx\n", address); -#else - printk(KERN_CONT " at %016lx\n", address); -#endif + printk(KERN_CONT " at %p\n", (void *) address); printk(KERN_ALERT "IP:"); printk_address(regs->ip, 1); dump_pagetable(address); @@ -497,6 +489,11 @@ static int vmalloc_fault(unsigned long address) unsigned long pgd_paddr; pmd_t *pmd_k; pte_t *pte_k; + + /* Make sure we are in vmalloc area */ + if (!(address >= VMALLOC_START && address < VMALLOC_END)) + return -1; + /* * Synchronize this task's top level page-table * with the 'reference' page table. @@ -795,14 +792,10 @@ bad_area_nosemaphore: if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { printk( -#ifdef CONFIG_X86_32 - "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx", -#else - "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx", -#endif + "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, task_pid_nr(tsk), address, regs->ip, - regs->sp, error_code); + tsk->comm, task_pid_nr(tsk), address, + (void *) regs->ip, (void *) regs->sp, error_code); print_vma_addr(" in ", regs->ip); printk("\n"); } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ec30d10154b..d71be0eb013 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -162,6 +162,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) pgd_t *pgd; pmd_t *pmd; pte_t *pte; + unsigned pages_2m = 0, pages_4k = 0; pgd_idx = pgd_index(PAGE_OFFSET); pgd = pgd_base + pgd_idx; @@ -197,6 +198,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) is_kernel_text(addr2)) prot = PAGE_KERNEL_LARGE_EXEC; + pages_2m++; set_pmd(pmd, pfn_pmd(pfn, prot)); pfn += PTRS_PER_PTE; @@ -213,11 +215,14 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) if (is_kernel_text(addr)) prot = PAGE_KERNEL_EXEC; + pages_4k++; set_pte(pte, pfn_pte(pfn, prot)); } max_pfn_mapped = pfn; } } + update_page_count(PG_LEVEL_2M, pages_2m); + update_page_count(PG_LEVEL_4K, pages_4k); } static inline int page_kills_ppro(unsigned long pagenr) @@ -571,17 +576,6 @@ void __init mem_init(void) #endif bad_ppro = ppro_with_ram_bug(); -#ifdef CONFIG_HIGHMEM - /* check that fixmap and pkmap do not overlap */ - if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { - printk(KERN_ERR - "fixmap and kmap areas overlap - this will crash\n"); - printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", - PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE, - FIXADDR_START); - BUG(); - } -#endif /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); @@ -614,7 +608,6 @@ void __init mem_init(void) (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); -#if 1 /* double-sanity-check paranoia */ printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM @@ -655,7 +648,6 @@ void __init mem_init(void) #endif BUG_ON(VMALLOC_START > VMALLOC_END); BUG_ON((unsigned long)high_memory > VMALLOC_START); -#endif /* double-sanity-check paranoia */ if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 32ba13b0f81..48623ae628f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -18,6 +18,7 @@ #include <linux/swap.h> #include <linux/smp.h> #include <linux/init.h> +#include <linux/initrd.h> #include <linux/pagemap.h> #include <linux/bootmem.h> #include <linux/proc_fs.h> @@ -135,7 +136,7 @@ static __init void *spp_getpage(void) return ptr; } -static void +static __init void set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) { pgd_t *pgd; @@ -206,7 +207,7 @@ void __init cleanup_highmap(void) pmd_t *last_pmd = pmd + PTRS_PER_PMD; for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) { - if (!pmd_present(*pmd)) + if (pmd_none(*pmd)) continue; if (vaddr < (unsigned long) _text || vaddr > end) set_pmd(pmd, __pmd(0)); @@ -214,7 +215,7 @@ void __init cleanup_highmap(void) } /* NOTE: this is meant to be run only at boot */ -void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot) +void __init __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot) { unsigned long address = __fix_to_virt(idx); @@ -312,6 +313,8 @@ __meminit void early_iounmap(void *addr, unsigned long size) static unsigned long __meminit phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) { + unsigned long pages = 0; + int i = pmd_index(address); for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { @@ -328,9 +331,11 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) if (pmd_val(*pmd)) continue; + pages++; set_pte((pte_t *)pmd, pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); } + update_page_count(PG_LEVEL_2M, pages); return address; } @@ -350,6 +355,7 @@ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) static unsigned long __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) { + unsigned long pages = 0; unsigned long last_map_addr = end; int i = pud_index(addr); @@ -374,6 +380,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) } if (direct_gbpages) { + pages++; set_pte((pte_t *)pud, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); last_map_addr = (addr & PUD_MASK) + PUD_SIZE; @@ -390,6 +397,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) unmap_low_page(pmd); } __flush_tlb_all(); + update_page_count(PG_LEVEL_1G, pages); return last_map_addr >> PAGE_SHIFT; } @@ -431,7 +439,7 @@ static void __init init_gbpages(void) direct_gbpages = 0; } -#ifdef CONFIG_MEMTEST_BOOTPARAM +#ifdef CONFIG_MEMTEST static void __init memtest(unsigned long start_phys, unsigned long size, unsigned pattern) @@ -493,7 +501,8 @@ static void __init memtest(unsigned long start_phys, unsigned long size, } -static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE; +/* default is disabled */ +static int memtest_pattern __initdata; static int __init parse_memtest(char *arg) { @@ -506,7 +515,7 @@ early_param("memtest", parse_memtest); static void __init early_memtest(unsigned long start, unsigned long end) { - unsigned long t_start, t_size; + u64 t_start, t_size; unsigned pattern; if (!memtest_pattern) @@ -525,8 +534,9 @@ static void __init early_memtest(unsigned long start, unsigned long end) if (t_start + t_size > end) t_size = end - t_start; - printk(KERN_CONT "\n %016lx - %016lx pattern %d", - t_start, t_start + t_size, pattern); + printk(KERN_CONT "\n %016llx - %016llx pattern %d", + (unsigned long long)t_start, + (unsigned long long)t_start + t_size, pattern); memtest(t_start, t_size, pattern); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 71bb3159031..416ea415f5c 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -142,7 +142,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, /* * Don't remap the low PCI/ISA area, it's always mapped.. */ - if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) + if (is_ISA_range(phys_addr, last_addr)) return (__force void __iomem *)phys_to_virt(phys_addr); /* @@ -261,7 +261,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) { /* * Ideally, this should be: - * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; + * pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; * * Till we fix all X drivers to use ioremap_wc(), we will use * UC MINUS. @@ -285,7 +285,7 @@ EXPORT_SYMBOL(ioremap_nocache); */ void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) { - if (pat_wc_enabled) + if (pat_enabled) return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, __builtin_return_address(0)); else @@ -318,8 +318,8 @@ void iounmap(volatile void __iomem *addr) * vm_area and by simply returning an address into the kernel mapping * of ISA space. So handle that here. */ - if (addr >= phys_to_virt(ISA_START_ADDRESS) && - addr < phys_to_virt(ISA_END_ADDRESS)) + if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) && + (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) return; addr = (volatile void __iomem *) @@ -332,7 +332,7 @@ void iounmap(volatile void __iomem *addr) cpa takes care of the direct mappings. */ read_lock(&vmlist_lock); for (p = vmlist; p; p = p->next) { - if (p->addr == addr) + if (p->addr == (void __force *)addr) break; } read_unlock(&vmlist_lock); @@ -346,7 +346,7 @@ void iounmap(volatile void __iomem *addr) free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p)); /* Finally remove it */ - o = remove_vm_area((void *)addr); + o = remove_vm_area((void __force *)addr); BUG_ON(p != o || o == NULL); kfree(p); } @@ -365,7 +365,7 @@ void *xlate_dev_mem_ptr(unsigned long phys) if (page_is_ram(start >> PAGE_SHIFT)) return __va(phys); - addr = (void *)ioremap(start, PAGE_SIZE); + addr = (void __force *)ioremap(start, PAGE_SIZE); if (addr) addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); @@ -593,10 +593,11 @@ void __init early_iounmap(void *addr, unsigned long size) unsigned long offset; unsigned int nrpages; enum fixed_addresses idx; - unsigned int nesting; + int nesting; nesting = --early_ioremap_nested; - WARN_ON(nesting < 0); + if (WARN_ON(nesting < 0)) + return; if (early_ioremap_debug) { printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 1f476e47784..0ea66b532c3 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -22,6 +22,7 @@ #include <asm/numa.h> #include <asm/mpspec.h> #include <asm/apic.h> +#include <asm/k8.h> static __init int find_northbridge(void) { @@ -73,17 +74,12 @@ static __init void early_get_boot_cpu_id(void) int __init k8_scan_nodes(unsigned long start, unsigned long end) { + unsigned numnodes, cores, bits, apicid_base; unsigned long prevbase; struct bootnode nodes[8]; - int nodeid, i, nb; unsigned char nodeids[8]; - int found = 0; - u32 reg; - unsigned numnodes; - unsigned cores; - unsigned bits; - int j; - unsigned apicid_base; + int i, j, nb, found = 0; + u32 nodeid, reg; if (!early_pci_allowed()) return -1; @@ -105,7 +101,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) prevbase = 0; for (i = 0; i < 8; i++) { unsigned long base, limit; - u32 nodeid; base = read_pci_config(0, nb, 1, 0x40 + i*8); limit = read_pci_config(0, nb, 1, 0x44 + i*8); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 60bcb5b6a37..afd40054d15 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -34,6 +34,41 @@ struct cpa_data { unsigned force_split : 1; }; +#ifdef CONFIG_PROC_FS +static unsigned long direct_pages_count[PG_LEVEL_NUM]; + +void update_page_count(int level, unsigned long pages) +{ + unsigned long flags; + + /* Protect against CPA */ + spin_lock_irqsave(&pgd_lock, flags); + direct_pages_count[level] += pages; + spin_unlock_irqrestore(&pgd_lock, flags); +} + +static void split_page_count(int level) +{ + direct_pages_count[level]--; + direct_pages_count[level - 1] += PTRS_PER_PTE; +} + +int arch_report_meminfo(char *page) +{ + int n = sprintf(page, "DirectMap4k: %8lu\n" + "DirectMap2M: %8lu\n", + direct_pages_count[PG_LEVEL_4K], + direct_pages_count[PG_LEVEL_2M]); +#ifdef CONFIG_X86_64 + n += sprintf(page + n, "DirectMap1G: %8lu\n", + direct_pages_count[PG_LEVEL_1G]); +#endif + return n; +} +#else +static inline void split_page_count(int level) { } +#endif + #ifdef CONFIG_X86_64 static inline unsigned long highmap_start_pfn(void) @@ -500,6 +535,10 @@ static int split_large_page(pte_t *kpte, unsigned long address) for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); + if (address >= (unsigned long)__va(0) && + address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) + split_page_count(level); + /* * Install the new, split up pagetable. Important details here: * @@ -805,7 +844,7 @@ int _set_memory_wc(unsigned long addr, int numpages) int set_memory_wc(unsigned long addr, int numpages) { - if (!pat_wc_enabled) + if (!pat_enabled) return set_memory_uc(addr, numpages); if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index de3a9981245..a885a1019b8 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -26,15 +26,15 @@ #include <asm/io.h> #ifdef CONFIG_X86_PAT -int __read_mostly pat_wc_enabled = 1; +int __read_mostly pat_enabled = 1; void __cpuinit pat_disable(char *reason) { - pat_wc_enabled = 0; + pat_enabled = 0; printk(KERN_INFO "%s\n", reason); } -static int nopat(char *str) +static int __init nopat(char *str) { pat_disable("PAT support disabled."); return 0; @@ -42,6 +42,19 @@ static int nopat(char *str) early_param("nopat", nopat); #endif + +static int debug_enable; +static int __init pat_debug_setup(char *str) +{ + debug_enable = 1; + return 0; +} +__setup("debugpat", pat_debug_setup); + +#define dprintk(fmt, arg...) \ + do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) + + static u64 __read_mostly boot_pat_state; enum { @@ -53,24 +66,25 @@ enum { PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */ }; -#define PAT(x,y) ((u64)PAT_ ## y << ((x)*8)) +#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) void pat_init(void) { u64 pat; - if (!pat_wc_enabled) + if (!pat_enabled) return; /* Paranoia check. */ - if (!cpu_has_pat) { - printk(KERN_ERR "PAT enabled, but CPU feature cleared\n"); + if (!cpu_has_pat && boot_pat_state) { /* - * Panic if this happens on the secondary CPU, and we + * If this happens we are on a secondary CPU, but * switched to PAT on the boot CPU. We have no way to * undo PAT. - */ - BUG_ON(boot_pat_state); + */ + printk(KERN_ERR "PAT enabled, " + "but not supported by secondary CPU\n"); + BUG(); } /* Set PWT to Write-Combining. All other bits stay the same */ @@ -86,8 +100,8 @@ void pat_init(void) * 011 UC _PAGE_CACHE_UC * PAT bit unused */ - pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) | - PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC); + pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | + PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); /* Boot CPU check */ if (!boot_pat_state) @@ -103,11 +117,11 @@ void pat_init(void) static char *cattr_name(unsigned long flags) { switch (flags & _PAGE_CACHE_MASK) { - case _PAGE_CACHE_UC: return "uncached"; - case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; - case _PAGE_CACHE_WB: return "write-back"; - case _PAGE_CACHE_WC: return "write-combining"; - default: return "broken"; + case _PAGE_CACHE_UC: return "uncached"; + case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; + case _PAGE_CACHE_WB: return "write-back"; + case _PAGE_CACHE_WC: return "write-combining"; + default: return "broken"; } } @@ -145,46 +159,50 @@ static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ * The intersection is based on "Effective Memory Type" tables in IA-32 * SDM vol 3a */ -static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, - unsigned long *ret_prot) +static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) { - unsigned long pat_type; - u8 mtrr_type; - - mtrr_type = mtrr_type_lookup(start, end); - if (mtrr_type == 0xFF) { /* MTRR not enabled */ - *ret_prot = prot; - return 0; - } - if (mtrr_type == 0xFE) { /* MTRR match error */ - *ret_prot = _PAGE_CACHE_UC; - return -1; - } - if (mtrr_type != MTRR_TYPE_UNCACHABLE && - mtrr_type != MTRR_TYPE_WRBACK && - mtrr_type != MTRR_TYPE_WRCOMB) { /* MTRR type unhandled */ - *ret_prot = _PAGE_CACHE_UC; - return -1; + /* + * Look for MTRR hint to get the effective type in case where PAT + * request is for WB. + */ + if (req_type == _PAGE_CACHE_WB) { + u8 mtrr_type; + + mtrr_type = mtrr_type_lookup(start, end); + if (mtrr_type == MTRR_TYPE_UNCACHABLE) + return _PAGE_CACHE_UC; + if (mtrr_type == MTRR_TYPE_WRCOMB) + return _PAGE_CACHE_WC; } - pat_type = prot & _PAGE_CACHE_MASK; - prot &= (~_PAGE_CACHE_MASK); - - /* Currently doing intersection by hand. Optimize it later. */ - if (pat_type == _PAGE_CACHE_WC) { - *ret_prot = prot | _PAGE_CACHE_WC; - } else if (pat_type == _PAGE_CACHE_UC_MINUS) { - *ret_prot = prot | _PAGE_CACHE_UC_MINUS; - } else if (pat_type == _PAGE_CACHE_UC || - mtrr_type == MTRR_TYPE_UNCACHABLE) { - *ret_prot = prot | _PAGE_CACHE_UC; - } else if (mtrr_type == MTRR_TYPE_WRCOMB) { - *ret_prot = prot | _PAGE_CACHE_WC; - } else { - *ret_prot = prot | _PAGE_CACHE_WB; + return req_type; +} + +static int chk_conflict(struct memtype *new, struct memtype *entry, + unsigned long *type) +{ + if (new->type != entry->type) { + if (type) { + new->type = entry->type; + *type = entry->type; + } else + goto conflict; } + /* check overlaps with more than one entry in the list */ + list_for_each_entry_continue(entry, &memtype_list, nd) { + if (new->end <= entry->start) + break; + else if (new->type != entry->type) + goto conflict; + } return 0; + + conflict: + printk(KERN_INFO "%s:%d conflicting memory types " + "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start, + new->end, cattr_name(new->type), cattr_name(entry->type)); + return -EBUSY; } /* @@ -197,251 +215,134 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, * req_type will have a special case value '-1', when requester want to inherit * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS. * - * If ret_type is NULL, function will return an error if it cannot reserve the - * region with req_type. If ret_type is non-null, function will return - * available type in ret_type in case of no error. In case of any error + * If new_type is NULL, function will return an error if it cannot reserve the + * region with req_type. If new_type is non-NULL, function will return + * available type in new_type in case of no error. In case of any error * it will return a negative return value. */ int reserve_memtype(u64 start, u64 end, unsigned long req_type, - unsigned long *ret_type) + unsigned long *new_type) { - struct memtype *new_entry = NULL; - struct memtype *parse; + struct memtype *new, *entry; unsigned long actual_type; + struct list_head *where; int err = 0; - /* Only track when pat_wc_enabled */ - if (!pat_wc_enabled) { + BUG_ON(start >= end); /* end is exclusive */ + + if (!pat_enabled) { /* This is identical to page table setting without PAT */ - if (ret_type) { - if (req_type == -1) { - *ret_type = _PAGE_CACHE_WB; - } else { - *ret_type = req_type; - } + if (new_type) { + if (req_type == -1) + *new_type = _PAGE_CACHE_WB; + else + *new_type = req_type & _PAGE_CACHE_MASK; } return 0; } /* Low ISA region is always mapped WB in page table. No need to track */ - if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) { - if (ret_type) - *ret_type = _PAGE_CACHE_WB; - + if (is_ISA_range(start, end - 1)) { + if (new_type) + *new_type = _PAGE_CACHE_WB; return 0; } if (req_type == -1) { /* - * Special case where caller wants to inherit from mtrr or - * existing pat mapping, defaulting to UC_MINUS in case of - * no match. + * Call mtrr_lookup to get the type hint. This is an + * optimization for /dev/mem mmap'ers into WB memory (BIOS + * tools and ACPI tools). Use WB request for WB memory and use + * UC_MINUS otherwise. */ u8 mtrr_type = mtrr_type_lookup(start, end); - if (mtrr_type == 0xFE) { /* MTRR match error */ - err = -1; - } - if (mtrr_type == MTRR_TYPE_WRBACK) { - req_type = _PAGE_CACHE_WB; + if (mtrr_type == MTRR_TYPE_WRBACK) actual_type = _PAGE_CACHE_WB; - } else { - req_type = _PAGE_CACHE_UC_MINUS; + else actual_type = _PAGE_CACHE_UC_MINUS; - } - } else { - req_type &= _PAGE_CACHE_MASK; - err = pat_x_mtrr_type(start, end, req_type, &actual_type); - } + } else + actual_type = pat_x_mtrr_type(start, end, + req_type & _PAGE_CACHE_MASK); - if (err) { - if (ret_type) - *ret_type = actual_type; - - return -EINVAL; - } - - new_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); - if (!new_entry) + new = kmalloc(sizeof(struct memtype), GFP_KERNEL); + if (!new) return -ENOMEM; - new_entry->start = start; - new_entry->end = end; - new_entry->type = actual_type; + new->start = start; + new->end = end; + new->type = actual_type; - if (ret_type) - *ret_type = actual_type; + if (new_type) + *new_type = actual_type; spin_lock(&memtype_lock); /* Search for existing mapping that overlaps the current range */ - list_for_each_entry(parse, &memtype_list, nd) { - struct memtype *saved_ptr; - - if (parse->start >= end) { - pr_debug("New Entry\n"); - list_add(&new_entry->nd, parse->nd.prev); - new_entry = NULL; + where = NULL; + list_for_each_entry(entry, &memtype_list, nd) { + if (end <= entry->start) { + where = entry->nd.prev; break; - } - - if (start <= parse->start && end >= parse->start) { - if (actual_type != parse->type && ret_type) { - actual_type = parse->type; - *ret_type = actual_type; - new_entry->type = actual_type; - } - - if (actual_type != parse->type) { - printk( - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, - start, end, - cattr_name(actual_type), - cattr_name(parse->type)); - err = -EBUSY; - break; - } - - saved_ptr = parse; - /* - * Check to see whether the request overlaps more - * than one entry in the list - */ - list_for_each_entry_continue(parse, &memtype_list, nd) { - if (end <= parse->start) { - break; - } - - if (actual_type != parse->type) { - printk( - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, - start, end, - cattr_name(actual_type), - cattr_name(parse->type)); - err = -EBUSY; - break; - } + } else if (start <= entry->start) { /* end > entry->start */ + err = chk_conflict(new, entry, new_type); + if (!err) { + dprintk("Overlap at 0x%Lx-0x%Lx\n", + entry->start, entry->end); + where = entry->nd.prev; } - - if (err) { - break; - } - - pr_debug("Overlap at 0x%Lx-0x%Lx\n", - saved_ptr->start, saved_ptr->end); - /* No conflict. Go ahead and add this new entry */ - list_add(&new_entry->nd, saved_ptr->nd.prev); - new_entry = NULL; break; - } - - if (start < parse->end) { - if (actual_type != parse->type && ret_type) { - actual_type = parse->type; - *ret_type = actual_type; - new_entry->type = actual_type; - } - - if (actual_type != parse->type) { - printk( - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, - start, end, - cattr_name(actual_type), - cattr_name(parse->type)); - err = -EBUSY; - break; + } else if (start < entry->end) { /* start > entry->start */ + err = chk_conflict(new, entry, new_type); + if (!err) { + dprintk("Overlap at 0x%Lx-0x%Lx\n", + entry->start, entry->end); + where = &entry->nd; } - - saved_ptr = parse; - /* - * Check to see whether the request overlaps more - * than one entry in the list - */ - list_for_each_entry_continue(parse, &memtype_list, nd) { - if (end <= parse->start) { - break; - } - - if (actual_type != parse->type) { - printk( - KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, - start, end, - cattr_name(actual_type), - cattr_name(parse->type)); - err = -EBUSY; - break; - } - } - - if (err) { - break; - } - - pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n", - saved_ptr->start, saved_ptr->end); - /* No conflict. Go ahead and add this new entry */ - list_add(&new_entry->nd, &saved_ptr->nd); - new_entry = NULL; break; } } if (err) { - printk(KERN_INFO - "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n", - start, end, cattr_name(new_entry->type), - cattr_name(req_type)); - kfree(new_entry); + printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " + "track %s, req %s\n", + start, end, cattr_name(new->type), cattr_name(req_type)); + kfree(new); spin_unlock(&memtype_lock); return err; } - if (new_entry) { - /* No conflict. Not yet added to the list. Add to the tail */ - list_add_tail(&new_entry->nd, &memtype_list); - pr_debug("New Entry\n"); - } - - if (ret_type) { - pr_debug( - "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", - start, end, cattr_name(actual_type), - cattr_name(req_type), cattr_name(*ret_type)); - } else { - pr_debug( - "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n", - start, end, cattr_name(actual_type), - cattr_name(req_type)); - } + if (where) + list_add(&new->nd, where); + else + list_add_tail(&new->nd, &memtype_list); spin_unlock(&memtype_lock); + + dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", + start, end, cattr_name(new->type), cattr_name(req_type), + new_type ? cattr_name(*new_type) : "-"); + return err; } int free_memtype(u64 start, u64 end) { - struct memtype *ml; + struct memtype *entry; int err = -EINVAL; - /* Only track when pat_wc_enabled */ - if (!pat_wc_enabled) { + if (!pat_enabled) return 0; - } /* Low ISA region is always mapped WB. No need to track */ - if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) { + if (is_ISA_range(start, end - 1)) return 0; - } spin_lock(&memtype_lock); - list_for_each_entry(ml, &memtype_list, nd) { - if (ml->start == start && ml->end == end) { - list_del(&ml->nd); - kfree(ml); + list_for_each_entry(entry, &memtype_list, nd) { + if (entry->start == start && entry->end == end) { + list_del(&entry->nd); + kfree(entry); err = 0; break; } @@ -453,7 +354,7 @@ int free_memtype(u64 start, u64 end) current->comm, current->pid, start, end); } - pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end); + dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); return err; } @@ -522,12 +423,12 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, * caching for the high addresses through the KEN pin, but * we maintain the tradition of paranoia in this code. */ - if (!pat_wc_enabled && - ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) && - (pfn << PAGE_SHIFT) >= __pa(high_memory)) { + if (!pat_enabled && + !(boot_cpu_has(X86_FEATURE_MTRR) || + boot_cpu_has(X86_FEATURE_K6_MTRR) || + boot_cpu_has(X86_FEATURE_CYRIX_ARR) || + boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) && + (pfn << PAGE_SHIFT) >= __pa(high_memory)) { flags = _PAGE_CACHE_UC; } #endif @@ -549,7 +450,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, return 0; if (pfn <= max_pfn_mapped && - ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { + ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { free_memtype(offset, offset + size); printk(KERN_INFO "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", @@ -587,4 +488,3 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) free_memtype(addr, addr + size); } - diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 50159764f69..ee1d6d39edd 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -255,7 +255,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, if (pte_young(*ptep)) ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, - &ptep->pte); + (unsigned long *) &ptep->pte); if (ret) pte_update(vma->vm_mm, addr, ptep); diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 3890234e5b2..391d5103587 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -97,36 +97,9 @@ static __init inline int srat_disabled(void) return numa_off || acpi_numa < 0; } -/* - * A lot of BIOS fill in 10 (= no distance) everywhere. This messes - * up the NUMA heuristics which wants the local node to have a smaller - * distance than the others. - * Do some quick checks here and only use the SLIT if it passes. - */ -static __init int slit_valid(struct acpi_table_slit *slit) -{ - int i, j; - int d = slit->locality_count; - for (i = 0; i < d; i++) { - for (j = 0; j < d; j++) { - u8 val = slit->entry[d*i + j]; - if (i == j) { - if (val != LOCAL_DISTANCE) - return 0; - } else if (val <= LOCAL_DISTANCE) - return 0; - } - } - return 1; -} - /* Callback for SLIT parsing */ void __init acpi_numa_slit_init(struct acpi_table_slit *slit) { - if (!slit_valid(slit)) { - printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n"); - return; - } acpi_slit = slit; } @@ -522,6 +495,7 @@ int __node_distance(int a, int b) EXPORT_SYMBOL(__node_distance); +#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) int memory_add_physaddr_to_nid(u64 start) { int i, ret = 0; @@ -533,4 +507,4 @@ int memory_add_physaddr_to_nid(u64 start) return ret; } EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); - +#endif |