aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/discontig_32.c54
-rw-r--r--arch/x86/mm/dump_pagetables.c2
-rw-r--r--arch/x86/mm/fault.c25
-rw-r--r--arch/x86/mm/init_32.c18
-rw-r--r--arch/x86/mm/init_64.c26
-rw-r--r--arch/x86/mm/ioremap.c21
-rw-r--r--arch/x86/mm/k8topology_64.c13
-rw-r--r--arch/x86/mm/pageattr.c41
-rw-r--r--arch/x86/mm/pat.c392
-rw-r--r--arch/x86/mm/pgtable.c2
-rw-r--r--arch/x86/mm/srat_64.c30
11 files changed, 262 insertions, 362 deletions
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 914ccf98368..8b4eac0ca07 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -156,7 +156,7 @@ static void __init propagate_e820_map_node(int nid)
*/
static void __init allocate_pgdat(int nid)
{
- if (nid && node_has_online_mem(nid))
+ if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid])
NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
else {
NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn));
@@ -164,16 +164,13 @@ static void __init allocate_pgdat(int nid)
}
}
-#ifdef CONFIG_DISCONTIGMEM
/*
- * In the discontig memory model, a portion of the kernel virtual area (KVA)
- * is reserved and portions of nodes are mapped using it. This is to allow
- * node-local memory to be allocated for structures that would normally require
- * ZONE_NORMAL. The memory is allocated with alloc_remap() and callers
- * should be prepared to allocate from the bootmem allocator instead. This KVA
- * mechanism is incompatible with SPARSEMEM as it makes assumptions about the
- * layout of memory that are broken if alloc_remap() succeeds for some of the
- * map and fails for others
+ * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel
+ * virtual address space (KVA) is reserved and portions of nodes are mapped
+ * using it. This is to allow node-local memory to be allocated for
+ * structures that would normally require ZONE_NORMAL. The memory is
+ * allocated with alloc_remap() and callers should be prepared to allocate
+ * from the bootmem allocator instead.
*/
static unsigned long node_remap_start_pfn[MAX_NUMNODES];
static void *node_remap_end_vaddr[MAX_NUMNODES];
@@ -290,25 +287,6 @@ static void init_remap_allocator(int nid)
(ulong) pfn_to_kaddr(highstart_pfn
+ node_remap_offset[nid] + node_remap_size[nid]));
}
-#else
-void *alloc_remap(int nid, unsigned long size)
-{
- return NULL;
-}
-
-static unsigned long calculate_numa_remap_pages(void)
-{
- return 0;
-}
-
-static void init_remap_allocator(int nid)
-{
-}
-
-void __init remap_numa_kva(void)
-{
-}
-#endif /* CONFIG_DISCONTIGMEM */
extern void setup_bootmem_allocator(void);
unsigned long __init setup_memory(void)
@@ -476,3 +454,21 @@ int memory_add_physaddr_to_nid(u64 addr)
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
+
+#if defined(CONFIG_ACPI_NUMA) && !defined(CONFIG_HAVE_ARCH_PARSE_SRAT)
+/*
+ * Dummy on 32-bit, for now:
+ */
+void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
+{
+}
+
+void __init
+acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
+{
+}
+
+void __init acpi_numa_arch_fixup(void)
+{
+}
+#endif
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 2c24bea92c6..0bb0caed897 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -42,7 +42,7 @@ static struct addr_marker address_markers[] = {
{ 0, "User Space" },
#ifdef CONFIG_X86_64
{ 0x8000000000000000UL, "Kernel Space" },
- { 0xffff810000000000UL, "Low Kernel Mapping" },
+ { PAGE_OFFSET, "Low Kernel Mapping" },
{ VMALLOC_START, "vmalloc() Area" },
{ VMEMMAP_START, "Vmemmap" },
{ __START_KERNEL_map, "High Kernel Mapping" },
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fd7e1798c75..578b7681955 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -55,11 +55,7 @@ static inline int notify_page_fault(struct pt_regs *regs)
int ret = 0;
/* kprobe_running() needs smp_processor_id() */
-#ifdef CONFIG_X86_32
if (!user_mode_vm(regs)) {
-#else
- if (!user_mode(regs)) {
-#endif
preempt_disable();
if (kprobe_running() && kprobe_fault_handler(regs, 14))
ret = 1;
@@ -396,11 +392,7 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
printk(KERN_CONT "NULL pointer dereference");
else
printk(KERN_CONT "paging request");
-#ifdef CONFIG_X86_32
- printk(KERN_CONT " at %08lx\n", address);
-#else
- printk(KERN_CONT " at %016lx\n", address);
-#endif
+ printk(KERN_CONT " at %p\n", (void *) address);
printk(KERN_ALERT "IP:");
printk_address(regs->ip, 1);
dump_pagetable(address);
@@ -497,6 +489,11 @@ static int vmalloc_fault(unsigned long address)
unsigned long pgd_paddr;
pmd_t *pmd_k;
pte_t *pte_k;
+
+ /* Make sure we are in vmalloc area */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
/*
* Synchronize this task's top level page-table
* with the 'reference' page table.
@@ -795,14 +792,10 @@ bad_area_nosemaphore:
if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
printk_ratelimit()) {
printk(
-#ifdef CONFIG_X86_32
- "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
-#else
- "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
-#endif
+ "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
- tsk->comm, task_pid_nr(tsk), address, regs->ip,
- regs->sp, error_code);
+ tsk->comm, task_pid_nr(tsk), address,
+ (void *) regs->ip, (void *) regs->sp, error_code);
print_vma_addr(" in ", regs->ip);
printk("\n");
}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ec30d10154b..d71be0eb013 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -162,6 +162,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
pgd_t *pgd;
pmd_t *pmd;
pte_t *pte;
+ unsigned pages_2m = 0, pages_4k = 0;
pgd_idx = pgd_index(PAGE_OFFSET);
pgd = pgd_base + pgd_idx;
@@ -197,6 +198,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
is_kernel_text(addr2))
prot = PAGE_KERNEL_LARGE_EXEC;
+ pages_2m++;
set_pmd(pmd, pfn_pmd(pfn, prot));
pfn += PTRS_PER_PTE;
@@ -213,11 +215,14 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
if (is_kernel_text(addr))
prot = PAGE_KERNEL_EXEC;
+ pages_4k++;
set_pte(pte, pfn_pte(pfn, prot));
}
max_pfn_mapped = pfn;
}
}
+ update_page_count(PG_LEVEL_2M, pages_2m);
+ update_page_count(PG_LEVEL_4K, pages_4k);
}
static inline int page_kills_ppro(unsigned long pagenr)
@@ -571,17 +576,6 @@ void __init mem_init(void)
#endif
bad_ppro = ppro_with_ram_bug();
-#ifdef CONFIG_HIGHMEM
- /* check that fixmap and pkmap do not overlap */
- if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
- printk(KERN_ERR
- "fixmap and kmap areas overlap - this will crash\n");
- printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
- PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE,
- FIXADDR_START);
- BUG();
- }
-#endif
/* this will put all low memory onto the freelists */
totalram_pages += free_all_bootmem();
@@ -614,7 +608,6 @@ void __init mem_init(void)
(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
);
-#if 1 /* double-sanity-check paranoia */
printk(KERN_INFO "virtual kernel memory layout:\n"
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#ifdef CONFIG_HIGHMEM
@@ -655,7 +648,6 @@ void __init mem_init(void)
#endif
BUG_ON(VMALLOC_START > VMALLOC_END);
BUG_ON((unsigned long)high_memory > VMALLOC_START);
-#endif /* double-sanity-check paranoia */
if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit();
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 32ba13b0f81..48623ae628f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -18,6 +18,7 @@
#include <linux/swap.h>
#include <linux/smp.h>
#include <linux/init.h>
+#include <linux/initrd.h>
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/proc_fs.h>
@@ -135,7 +136,7 @@ static __init void *spp_getpage(void)
return ptr;
}
-static void
+static __init void
set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
{
pgd_t *pgd;
@@ -206,7 +207,7 @@ void __init cleanup_highmap(void)
pmd_t *last_pmd = pmd + PTRS_PER_PMD;
for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
- if (!pmd_present(*pmd))
+ if (pmd_none(*pmd))
continue;
if (vaddr < (unsigned long) _text || vaddr > end)
set_pmd(pmd, __pmd(0));
@@ -214,7 +215,7 @@ void __init cleanup_highmap(void)
}
/* NOTE: this is meant to be run only at boot */
-void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+void __init __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
{
unsigned long address = __fix_to_virt(idx);
@@ -312,6 +313,8 @@ __meminit void early_iounmap(void *addr, unsigned long size)
static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
{
+ unsigned long pages = 0;
+
int i = pmd_index(address);
for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
@@ -328,9 +331,11 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
if (pmd_val(*pmd))
continue;
+ pages++;
set_pte((pte_t *)pmd,
pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
}
+ update_page_count(PG_LEVEL_2M, pages);
return address;
}
@@ -350,6 +355,7 @@ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
{
+ unsigned long pages = 0;
unsigned long last_map_addr = end;
int i = pud_index(addr);
@@ -374,6 +380,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
}
if (direct_gbpages) {
+ pages++;
set_pte((pte_t *)pud,
pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
@@ -390,6 +397,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
unmap_low_page(pmd);
}
__flush_tlb_all();
+ update_page_count(PG_LEVEL_1G, pages);
return last_map_addr >> PAGE_SHIFT;
}
@@ -431,7 +439,7 @@ static void __init init_gbpages(void)
direct_gbpages = 0;
}
-#ifdef CONFIG_MEMTEST_BOOTPARAM
+#ifdef CONFIG_MEMTEST
static void __init memtest(unsigned long start_phys, unsigned long size,
unsigned pattern)
@@ -493,7 +501,8 @@ static void __init memtest(unsigned long start_phys, unsigned long size,
}
-static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE;
+/* default is disabled */
+static int memtest_pattern __initdata;
static int __init parse_memtest(char *arg)
{
@@ -506,7 +515,7 @@ early_param("memtest", parse_memtest);
static void __init early_memtest(unsigned long start, unsigned long end)
{
- unsigned long t_start, t_size;
+ u64 t_start, t_size;
unsigned pattern;
if (!memtest_pattern)
@@ -525,8 +534,9 @@ static void __init early_memtest(unsigned long start, unsigned long end)
if (t_start + t_size > end)
t_size = end - t_start;
- printk(KERN_CONT "\n %016lx - %016lx pattern %d",
- t_start, t_start + t_size, pattern);
+ printk(KERN_CONT "\n %016llx - %016llx pattern %d",
+ (unsigned long long)t_start,
+ (unsigned long long)t_start + t_size, pattern);
memtest(t_start, t_size, pattern);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 71bb3159031..416ea415f5c 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -142,7 +142,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
/*
* Don't remap the low PCI/ISA area, it's always mapped..
*/
- if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
+ if (is_ISA_range(phys_addr, last_addr))
return (__force void __iomem *)phys_to_virt(phys_addr);
/*
@@ -261,7 +261,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
{
/*
* Ideally, this should be:
- * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
+ * pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
*
* Till we fix all X drivers to use ioremap_wc(), we will use
* UC MINUS.
@@ -285,7 +285,7 @@ EXPORT_SYMBOL(ioremap_nocache);
*/
void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
{
- if (pat_wc_enabled)
+ if (pat_enabled)
return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
__builtin_return_address(0));
else
@@ -318,8 +318,8 @@ void iounmap(volatile void __iomem *addr)
* vm_area and by simply returning an address into the kernel mapping
* of ISA space. So handle that here.
*/
- if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
- addr < phys_to_virt(ISA_END_ADDRESS))
+ if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
+ (void __force *)addr < phys_to_virt(ISA_END_ADDRESS))
return;
addr = (volatile void __iomem *)
@@ -332,7 +332,7 @@ void iounmap(volatile void __iomem *addr)
cpa takes care of the direct mappings. */
read_lock(&vmlist_lock);
for (p = vmlist; p; p = p->next) {
- if (p->addr == addr)
+ if (p->addr == (void __force *)addr)
break;
}
read_unlock(&vmlist_lock);
@@ -346,7 +346,7 @@ void iounmap(volatile void __iomem *addr)
free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
/* Finally remove it */
- o = remove_vm_area((void *)addr);
+ o = remove_vm_area((void __force *)addr);
BUG_ON(p != o || o == NULL);
kfree(p);
}
@@ -365,7 +365,7 @@ void *xlate_dev_mem_ptr(unsigned long phys)
if (page_is_ram(start >> PAGE_SHIFT))
return __va(phys);
- addr = (void *)ioremap(start, PAGE_SIZE);
+ addr = (void __force *)ioremap(start, PAGE_SIZE);
if (addr)
addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
@@ -593,10 +593,11 @@ void __init early_iounmap(void *addr, unsigned long size)
unsigned long offset;
unsigned int nrpages;
enum fixed_addresses idx;
- unsigned int nesting;
+ int nesting;
nesting = --early_ioremap_nested;
- WARN_ON(nesting < 0);
+ if (WARN_ON(nesting < 0))
+ return;
if (early_ioremap_debug) {
printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 1f476e47784..0ea66b532c3 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -22,6 +22,7 @@
#include <asm/numa.h>
#include <asm/mpspec.h>
#include <asm/apic.h>
+#include <asm/k8.h>
static __init int find_northbridge(void)
{
@@ -73,17 +74,12 @@ static __init void early_get_boot_cpu_id(void)
int __init k8_scan_nodes(unsigned long start, unsigned long end)
{
+ unsigned numnodes, cores, bits, apicid_base;
unsigned long prevbase;
struct bootnode nodes[8];
- int nodeid, i, nb;
unsigned char nodeids[8];
- int found = 0;
- u32 reg;
- unsigned numnodes;
- unsigned cores;
- unsigned bits;
- int j;
- unsigned apicid_base;
+ int i, j, nb, found = 0;
+ u32 nodeid, reg;
if (!early_pci_allowed())
return -1;
@@ -105,7 +101,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
prevbase = 0;
for (i = 0; i < 8; i++) {
unsigned long base, limit;
- u32 nodeid;
base = read_pci_config(0, nb, 1, 0x40 + i*8);
limit = read_pci_config(0, nb, 1, 0x44 + i*8);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 60bcb5b6a37..afd40054d15 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -34,6 +34,41 @@ struct cpa_data {
unsigned force_split : 1;
};
+#ifdef CONFIG_PROC_FS
+static unsigned long direct_pages_count[PG_LEVEL_NUM];
+
+void update_page_count(int level, unsigned long pages)
+{
+ unsigned long flags;
+
+ /* Protect against CPA */
+ spin_lock_irqsave(&pgd_lock, flags);
+ direct_pages_count[level] += pages;
+ spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+static void split_page_count(int level)
+{
+ direct_pages_count[level]--;
+ direct_pages_count[level - 1] += PTRS_PER_PTE;
+}
+
+int arch_report_meminfo(char *page)
+{
+ int n = sprintf(page, "DirectMap4k: %8lu\n"
+ "DirectMap2M: %8lu\n",
+ direct_pages_count[PG_LEVEL_4K],
+ direct_pages_count[PG_LEVEL_2M]);
+#ifdef CONFIG_X86_64
+ n += sprintf(page + n, "DirectMap1G: %8lu\n",
+ direct_pages_count[PG_LEVEL_1G]);
+#endif
+ return n;
+}
+#else
+static inline void split_page_count(int level) { }
+#endif
+
#ifdef CONFIG_X86_64
static inline unsigned long highmap_start_pfn(void)
@@ -500,6 +535,10 @@ static int split_large_page(pte_t *kpte, unsigned long address)
for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
+ if (address >= (unsigned long)__va(0) &&
+ address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
+ split_page_count(level);
+
/*
* Install the new, split up pagetable. Important details here:
*
@@ -805,7 +844,7 @@ int _set_memory_wc(unsigned long addr, int numpages)
int set_memory_wc(unsigned long addr, int numpages)
{
- if (!pat_wc_enabled)
+ if (!pat_enabled)
return set_memory_uc(addr, numpages);
if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index de3a9981245..a885a1019b8 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -26,15 +26,15 @@
#include <asm/io.h>
#ifdef CONFIG_X86_PAT
-int __read_mostly pat_wc_enabled = 1;
+int __read_mostly pat_enabled = 1;
void __cpuinit pat_disable(char *reason)
{
- pat_wc_enabled = 0;
+ pat_enabled = 0;
printk(KERN_INFO "%s\n", reason);
}
-static int nopat(char *str)
+static int __init nopat(char *str)
{
pat_disable("PAT support disabled.");
return 0;
@@ -42,6 +42,19 @@ static int nopat(char *str)
early_param("nopat", nopat);
#endif
+
+static int debug_enable;
+static int __init pat_debug_setup(char *str)
+{
+ debug_enable = 1;
+ return 0;
+}
+__setup("debugpat", pat_debug_setup);
+
+#define dprintk(fmt, arg...) \
+ do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0)
+
+
static u64 __read_mostly boot_pat_state;
enum {
@@ -53,24 +66,25 @@ enum {
PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
};
-#define PAT(x,y) ((u64)PAT_ ## y << ((x)*8))
+#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
void pat_init(void)
{
u64 pat;
- if (!pat_wc_enabled)
+ if (!pat_enabled)
return;
/* Paranoia check. */
- if (!cpu_has_pat) {
- printk(KERN_ERR "PAT enabled, but CPU feature cleared\n");
+ if (!cpu_has_pat && boot_pat_state) {
/*
- * Panic if this happens on the secondary CPU, and we
+ * If this happens we are on a secondary CPU, but
* switched to PAT on the boot CPU. We have no way to
* undo PAT.
- */
- BUG_ON(boot_pat_state);
+ */
+ printk(KERN_ERR "PAT enabled, "
+ "but not supported by secondary CPU\n");
+ BUG();
}
/* Set PWT to Write-Combining. All other bits stay the same */
@@ -86,8 +100,8 @@ void pat_init(void)
* 011 UC _PAGE_CACHE_UC
* PAT bit unused
*/
- pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) |
- PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC);
+ pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
+ PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
/* Boot CPU check */
if (!boot_pat_state)
@@ -103,11 +117,11 @@ void pat_init(void)
static char *cattr_name(unsigned long flags)
{
switch (flags & _PAGE_CACHE_MASK) {
- case _PAGE_CACHE_UC: return "uncached";
- case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
- case _PAGE_CACHE_WB: return "write-back";
- case _PAGE_CACHE_WC: return "write-combining";
- default: return "broken";
+ case _PAGE_CACHE_UC: return "uncached";
+ case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
+ case _PAGE_CACHE_WB: return "write-back";
+ case _PAGE_CACHE_WC: return "write-combining";
+ default: return "broken";
}
}
@@ -145,46 +159,50 @@ static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
* The intersection is based on "Effective Memory Type" tables in IA-32
* SDM vol 3a
*/
-static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
- unsigned long *ret_prot)
+static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
{
- unsigned long pat_type;
- u8 mtrr_type;
-
- mtrr_type = mtrr_type_lookup(start, end);
- if (mtrr_type == 0xFF) { /* MTRR not enabled */
- *ret_prot = prot;
- return 0;
- }
- if (mtrr_type == 0xFE) { /* MTRR match error */
- *ret_prot = _PAGE_CACHE_UC;
- return -1;
- }
- if (mtrr_type != MTRR_TYPE_UNCACHABLE &&
- mtrr_type != MTRR_TYPE_WRBACK &&
- mtrr_type != MTRR_TYPE_WRCOMB) { /* MTRR type unhandled */
- *ret_prot = _PAGE_CACHE_UC;
- return -1;
+ /*
+ * Look for MTRR hint to get the effective type in case where PAT
+ * request is for WB.
+ */
+ if (req_type == _PAGE_CACHE_WB) {
+ u8 mtrr_type;
+
+ mtrr_type = mtrr_type_lookup(start, end);
+ if (mtrr_type == MTRR_TYPE_UNCACHABLE)
+ return _PAGE_CACHE_UC;
+ if (mtrr_type == MTRR_TYPE_WRCOMB)
+ return _PAGE_CACHE_WC;
}
- pat_type = prot & _PAGE_CACHE_MASK;
- prot &= (~_PAGE_CACHE_MASK);
-
- /* Currently doing intersection by hand. Optimize it later. */
- if (pat_type == _PAGE_CACHE_WC) {
- *ret_prot = prot | _PAGE_CACHE_WC;
- } else if (pat_type == _PAGE_CACHE_UC_MINUS) {
- *ret_prot = prot | _PAGE_CACHE_UC_MINUS;
- } else if (pat_type == _PAGE_CACHE_UC ||
- mtrr_type == MTRR_TYPE_UNCACHABLE) {
- *ret_prot = prot | _PAGE_CACHE_UC;
- } else if (mtrr_type == MTRR_TYPE_WRCOMB) {
- *ret_prot = prot | _PAGE_CACHE_WC;
- } else {
- *ret_prot = prot | _PAGE_CACHE_WB;
+ return req_type;
+}
+
+static int chk_conflict(struct memtype *new, struct memtype *entry,
+ unsigned long *type)
+{
+ if (new->type != entry->type) {
+ if (type) {
+ new->type = entry->type;
+ *type = entry->type;
+ } else
+ goto conflict;
}
+ /* check overlaps with more than one entry in the list */
+ list_for_each_entry_continue(entry, &memtype_list, nd) {
+ if (new->end <= entry->start)
+ break;
+ else if (new->type != entry->type)
+ goto conflict;
+ }
return 0;
+
+ conflict:
+ printk(KERN_INFO "%s:%d conflicting memory types "
+ "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start,
+ new->end, cattr_name(new->type), cattr_name(entry->type));
+ return -EBUSY;
}
/*
@@ -197,251 +215,134 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
* req_type will have a special case value '-1', when requester want to inherit
* the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
*
- * If ret_type is NULL, function will return an error if it cannot reserve the
- * region with req_type. If ret_type is non-null, function will return
- * available type in ret_type in case of no error. In case of any error
+ * If new_type is NULL, function will return an error if it cannot reserve the
+ * region with req_type. If new_type is non-NULL, function will return
+ * available type in new_type in case of no error. In case of any error
* it will return a negative return value.
*/
int reserve_memtype(u64 start, u64 end, unsigned long req_type,
- unsigned long *ret_type)
+ unsigned long *new_type)
{
- struct memtype *new_entry = NULL;
- struct memtype *parse;
+ struct memtype *new, *entry;
unsigned long actual_type;
+ struct list_head *where;
int err = 0;
- /* Only track when pat_wc_enabled */
- if (!pat_wc_enabled) {
+ BUG_ON(start >= end); /* end is exclusive */
+
+ if (!pat_enabled) {
/* This is identical to page table setting without PAT */
- if (ret_type) {
- if (req_type == -1) {
- *ret_type = _PAGE_CACHE_WB;
- } else {
- *ret_type = req_type;
- }
+ if (new_type) {
+ if (req_type == -1)
+ *new_type = _PAGE_CACHE_WB;
+ else
+ *new_type = req_type & _PAGE_CACHE_MASK;
}
return 0;
}
/* Low ISA region is always mapped WB in page table. No need to track */
- if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) {
- if (ret_type)
- *ret_type = _PAGE_CACHE_WB;
-
+ if (is_ISA_range(start, end - 1)) {
+ if (new_type)
+ *new_type = _PAGE_CACHE_WB;
return 0;
}
if (req_type == -1) {
/*
- * Special case where caller wants to inherit from mtrr or
- * existing pat mapping, defaulting to UC_MINUS in case of
- * no match.
+ * Call mtrr_lookup to get the type hint. This is an
+ * optimization for /dev/mem mmap'ers into WB memory (BIOS
+ * tools and ACPI tools). Use WB request for WB memory and use
+ * UC_MINUS otherwise.
*/
u8 mtrr_type = mtrr_type_lookup(start, end);
- if (mtrr_type == 0xFE) { /* MTRR match error */
- err = -1;
- }
- if (mtrr_type == MTRR_TYPE_WRBACK) {
- req_type = _PAGE_CACHE_WB;
+ if (mtrr_type == MTRR_TYPE_WRBACK)
actual_type = _PAGE_CACHE_WB;
- } else {
- req_type = _PAGE_CACHE_UC_MINUS;
+ else
actual_type = _PAGE_CACHE_UC_MINUS;
- }
- } else {
- req_type &= _PAGE_CACHE_MASK;
- err = pat_x_mtrr_type(start, end, req_type, &actual_type);
- }
+ } else
+ actual_type = pat_x_mtrr_type(start, end,
+ req_type & _PAGE_CACHE_MASK);
- if (err) {
- if (ret_type)
- *ret_type = actual_type;
-
- return -EINVAL;
- }
-
- new_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL);
- if (!new_entry)
+ new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
+ if (!new)
return -ENOMEM;
- new_entry->start = start;
- new_entry->end = end;
- new_entry->type = actual_type;
+ new->start = start;
+ new->end = end;
+ new->type = actual_type;
- if (ret_type)
- *ret_type = actual_type;
+ if (new_type)
+ *new_type = actual_type;
spin_lock(&memtype_lock);
/* Search for existing mapping that overlaps the current range */
- list_for_each_entry(parse, &memtype_list, nd) {
- struct memtype *saved_ptr;
-
- if (parse->start >= end) {
- pr_debug("New Entry\n");
- list_add(&new_entry->nd, parse->nd.prev);
- new_entry = NULL;
+ where = NULL;
+ list_for_each_entry(entry, &memtype_list, nd) {
+ if (end <= entry->start) {
+ where = entry->nd.prev;
break;
- }
-
- if (start <= parse->start && end >= parse->start) {
- if (actual_type != parse->type && ret_type) {
- actual_type = parse->type;
- *ret_type = actual_type;
- new_entry->type = actual_type;
- }
-
- if (actual_type != parse->type) {
- printk(
- KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
- current->comm, current->pid,
- start, end,
- cattr_name(actual_type),
- cattr_name(parse->type));
- err = -EBUSY;
- break;
- }
-
- saved_ptr = parse;
- /*
- * Check to see whether the request overlaps more
- * than one entry in the list
- */
- list_for_each_entry_continue(parse, &memtype_list, nd) {
- if (end <= parse->start) {
- break;
- }
-
- if (actual_type != parse->type) {
- printk(
- KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
- current->comm, current->pid,
- start, end,
- cattr_name(actual_type),
- cattr_name(parse->type));
- err = -EBUSY;
- break;
- }
+ } else if (start <= entry->start) { /* end > entry->start */
+ err = chk_conflict(new, entry, new_type);
+ if (!err) {
+ dprintk("Overlap at 0x%Lx-0x%Lx\n",
+ entry->start, entry->end);
+ where = entry->nd.prev;
}
-
- if (err) {
- break;
- }
-
- pr_debug("Overlap at 0x%Lx-0x%Lx\n",
- saved_ptr->start, saved_ptr->end);
- /* No conflict. Go ahead and add this new entry */
- list_add(&new_entry->nd, saved_ptr->nd.prev);
- new_entry = NULL;
break;
- }
-
- if (start < parse->end) {
- if (actual_type != parse->type && ret_type) {
- actual_type = parse->type;
- *ret_type = actual_type;
- new_entry->type = actual_type;
- }
-
- if (actual_type != parse->type) {
- printk(
- KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
- current->comm, current->pid,
- start, end,
- cattr_name(actual_type),
- cattr_name(parse->type));
- err = -EBUSY;
- break;
+ } else if (start < entry->end) { /* start > entry->start */
+ err = chk_conflict(new, entry, new_type);
+ if (!err) {
+ dprintk("Overlap at 0x%Lx-0x%Lx\n",
+ entry->start, entry->end);
+ where = &entry->nd;
}
-
- saved_ptr = parse;
- /*
- * Check to see whether the request overlaps more
- * than one entry in the list
- */
- list_for_each_entry_continue(parse, &memtype_list, nd) {
- if (end <= parse->start) {
- break;
- }
-
- if (actual_type != parse->type) {
- printk(
- KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
- current->comm, current->pid,
- start, end,
- cattr_name(actual_type),
- cattr_name(parse->type));
- err = -EBUSY;
- break;
- }
- }
-
- if (err) {
- break;
- }
-
- pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
- saved_ptr->start, saved_ptr->end);
- /* No conflict. Go ahead and add this new entry */
- list_add(&new_entry->nd, &saved_ptr->nd);
- new_entry = NULL;
break;
}
}
if (err) {
- printk(KERN_INFO
- "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
- start, end, cattr_name(new_entry->type),
- cattr_name(req_type));
- kfree(new_entry);
+ printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, "
+ "track %s, req %s\n",
+ start, end, cattr_name(new->type), cattr_name(req_type));
+ kfree(new);
spin_unlock(&memtype_lock);
return err;
}
- if (new_entry) {
- /* No conflict. Not yet added to the list. Add to the tail */
- list_add_tail(&new_entry->nd, &memtype_list);
- pr_debug("New Entry\n");
- }
-
- if (ret_type) {
- pr_debug(
- "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
- start, end, cattr_name(actual_type),
- cattr_name(req_type), cattr_name(*ret_type));
- } else {
- pr_debug(
- "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
- start, end, cattr_name(actual_type),
- cattr_name(req_type));
- }
+ if (where)
+ list_add(&new->nd, where);
+ else
+ list_add_tail(&new->nd, &memtype_list);
spin_unlock(&memtype_lock);
+
+ dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
+ start, end, cattr_name(new->type), cattr_name(req_type),
+ new_type ? cattr_name(*new_type) : "-");
+
return err;
}
int free_memtype(u64 start, u64 end)
{
- struct memtype *ml;
+ struct memtype *entry;
int err = -EINVAL;
- /* Only track when pat_wc_enabled */
- if (!pat_wc_enabled) {
+ if (!pat_enabled)
return 0;
- }
/* Low ISA region is always mapped WB. No need to track */
- if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) {
+ if (is_ISA_range(start, end - 1))
return 0;
- }
spin_lock(&memtype_lock);
- list_for_each_entry(ml, &memtype_list, nd) {
- if (ml->start == start && ml->end == end) {
- list_del(&ml->nd);
- kfree(ml);
+ list_for_each_entry(entry, &memtype_list, nd) {
+ if (entry->start == start && entry->end == end) {
+ list_del(&entry->nd);
+ kfree(entry);
err = 0;
break;
}
@@ -453,7 +354,7 @@ int free_memtype(u64 start, u64 end)
current->comm, current->pid, start, end);
}
- pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end);
+ dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end);
return err;
}
@@ -522,12 +423,12 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
* caching for the high addresses through the KEN pin, but
* we maintain the tradition of paranoia in this code.
*/
- if (!pat_wc_enabled &&
- ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
- test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
- test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
- test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) &&
- (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
+ if (!pat_enabled &&
+ !(boot_cpu_has(X86_FEATURE_MTRR) ||
+ boot_cpu_has(X86_FEATURE_K6_MTRR) ||
+ boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
+ boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
+ (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
flags = _PAGE_CACHE_UC;
}
#endif
@@ -549,7 +450,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
return 0;
if (pfn <= max_pfn_mapped &&
- ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
+ ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
free_memtype(offset, offset + size);
printk(KERN_INFO
"%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
@@ -587,4 +488,3 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
free_memtype(addr, addr + size);
}
-
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 50159764f69..ee1d6d39edd 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -255,7 +255,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
if (pte_young(*ptep))
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
- &ptep->pte);
+ (unsigned long *) &ptep->pte);
if (ret)
pte_update(vma->vm_mm, addr, ptep);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 3890234e5b2..391d5103587 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -97,36 +97,9 @@ static __init inline int srat_disabled(void)
return numa_off || acpi_numa < 0;
}
-/*
- * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
- * up the NUMA heuristics which wants the local node to have a smaller
- * distance than the others.
- * Do some quick checks here and only use the SLIT if it passes.
- */
-static __init int slit_valid(struct acpi_table_slit *slit)
-{
- int i, j;
- int d = slit->locality_count;
- for (i = 0; i < d; i++) {
- for (j = 0; j < d; j++) {
- u8 val = slit->entry[d*i + j];
- if (i == j) {
- if (val != LOCAL_DISTANCE)
- return 0;
- } else if (val <= LOCAL_DISTANCE)
- return 0;
- }
- }
- return 1;
-}
-
/* Callback for SLIT parsing */
void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
{
- if (!slit_valid(slit)) {
- printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n");
- return;
- }
acpi_slit = slit;
}
@@ -522,6 +495,7 @@ int __node_distance(int a, int b)
EXPORT_SYMBOL(__node_distance);
+#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
int memory_add_physaddr_to_nid(u64 start)
{
int i, ret = 0;
@@ -533,4 +507,4 @@ int memory_add_physaddr_to_nid(u64 start)
return ret;
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-
+#endif