aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-18 18:37:14 +0100
committerIngo Molnar <mingo@elte.hu>2009-01-18 18:37:14 +0100
commitb2b062b8163391c42b3219d466ca1ac9742b9c7b (patch)
treef3f920c09b8de694b1bc1d4b878cfd2b0b98c913 /arch/x86/mm
parenta9de18eb761f7c1c860964b2e5addc1a35c7e861 (diff)
parent99937d6455cea95405ac681c86a857d0fcd530bd (diff)
Merge branch 'core/percpu' into stackprotector
Conflicts: arch/x86/include/asm/pda.h arch/x86/include/asm/system.h Also, moved include/asm-x86/stackprotector.h to arch/x86/include/asm. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/fault.c26
-rw-r--r--arch/x86/mm/init_32.c13
-rw-r--r--arch/x86/mm/init_64.c4
-rw-r--r--arch/x86/mm/k8topology_64.c20
-rw-r--r--arch/x86/mm/numa_32.c4
-rw-r--r--arch/x86/mm/numa_64.c4
-rw-r--r--arch/x86/mm/pageattr.c10
-rw-r--r--arch/x86/mm/pat.c127
-rw-r--r--arch/x86/mm/srat_64.c2
9 files changed, 127 insertions, 83 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4c056b5d6a9..37242c405f1 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -535,7 +535,7 @@ static int vmalloc_fault(unsigned long address)
happen within a race in page table update. In the later
case just flush. */
- pgd = pgd_offset(current->mm ?: &init_mm, address);
+ pgd = pgd_offset(current->active_mm, address);
pgd_ref = pgd_offset_k(address);
if (pgd_none(*pgd_ref))
return -1;
@@ -670,7 +670,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (unlikely(in_atomic() || !mm))
goto bad_area_nosemaphore;
-again:
/*
* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
@@ -866,25 +865,14 @@ no_context:
oops_end(flags, regs, sig);
#endif
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
out_of_memory:
+ /*
+ * We ran out of memory, call the OOM killer, and return the userspace
+ * (which will retry the fault, or kill us if we got oom-killed).
+ */
up_read(&mm->mmap_sem);
- if (is_global_init(tsk)) {
- yield();
- /*
- * Re-lookup the vma - in theory the vma tree might
- * have changed:
- */
- goto again;
- }
-
- printk("VM: killing process %s\n", tsk->comm);
- if (error_code & PF_USER)
- do_group_exit(SIGKILL);
- goto no_context;
+ pagefault_out_of_memory();
+ return;
do_sigbus:
up_read(&mm->mmap_sem);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8655b5bb096..4a6989e47a5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,7 +49,6 @@
#include <asm/paravirt.h>
#include <asm/setup.h>
#include <asm/cacheflush.h>
-#include <asm/smp.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;
@@ -328,6 +327,8 @@ int devmem_is_allowed(unsigned long pagenr)
{
if (pagenr <= 256)
return 1;
+ if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
+ return 0;
if (!page_is_ram(pagenr))
return 1;
return 0;
@@ -435,8 +436,12 @@ static void __init set_highmem_pages_init(void)
#endif /* !CONFIG_NUMA */
#else
-# define permanent_kmaps_init(pgd_base) do { } while (0)
-# define set_highmem_pages_init() do { } while (0)
+static inline void permanent_kmaps_init(pgd_t *pgd_base)
+{
+}
+static inline void set_highmem_pages_init(void)
+{
+}
#endif /* CONFIG_HIGHMEM */
void __init native_pagetable_setup_start(pgd_t *base)
@@ -1075,7 +1080,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- return __add_pages(zone, start_pfn, nr_pages);
+ return __add_pages(nid, zone, start_pfn, nr_pages);
}
#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9f7a0d24d42..23f68e77ad1 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -857,7 +857,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
if (last_mapped_pfn > max_pfn_mapped)
max_pfn_mapped = last_mapped_pfn;
- ret = __add_pages(zone, start_pfn, nr_pages);
+ ret = __add_pages(nid, zone, start_pfn, nr_pages);
WARN_ON_ONCE(ret);
return ret;
@@ -888,6 +888,8 @@ int devmem_is_allowed(unsigned long pagenr)
{
if (pagenr <= 256)
return 1;
+ if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
+ return 0;
if (!page_is_ram(pagenr))
return 1;
return 0;
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 41f1b5c00a1..268f8255280 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -81,7 +81,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
unsigned numnodes, cores, bits, apicid_base;
unsigned long prevbase;
struct bootnode nodes[8];
- unsigned char nodeids[8];
int i, j, nb, found = 0;
u32 nodeid, reg;
@@ -110,7 +109,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
limit = read_pci_config(0, nb, 1, 0x44 + i*8);
nodeid = limit & 7;
- nodeids[i] = nodeid;
if ((base & 3) == 0) {
if (i < numnodes)
printk("Skipping disabled node %d\n", i);
@@ -179,9 +177,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
nodes[nodeid].start = base;
nodes[nodeid].end = limit;
- e820_register_active_regions(nodeid,
- nodes[nodeid].start >> PAGE_SHIFT,
- nodes[nodeid].end >> PAGE_SHIFT);
prevbase = base;
@@ -211,12 +206,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
}
for (i = 0; i < 8; i++) {
- if (nodes[i].start != nodes[i].end) {
- nodeid = nodeids[i];
- for (j = apicid_base; j < cores + apicid_base; j++)
- apicid_to_node[(nodeid << bits) + j] = i;
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
+ if (nodes[i].start == nodes[i].end)
+ continue;
+
+ e820_register_active_regions(i,
+ nodes[i].start >> PAGE_SHIFT,
+ nodes[i].end >> PAGE_SHIFT);
+ for (j = apicid_base; j < cores + apicid_base; j++)
+ apicid_to_node[(i << bits) + j] = i;
+ setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
numa_init_array();
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 8518c678d83..d1f7439d173 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -239,7 +239,7 @@ void resume_map_numa_kva(pgd_t *pgd_base)
start_pfn = node_remap_start_pfn[node];
size = node_remap_size[node];
- printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node);
+ printk(KERN_DEBUG "%s: node %d\n", __func__, node);
for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
@@ -251,7 +251,7 @@ void resume_map_numa_kva(pgd_t *pgd_base)
PAGE_KERNEL_LARGE_EXEC));
printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
- __FUNCTION__, vaddr, start_pfn + pfn);
+ __func__, vaddr, start_pfn + pfn);
}
}
}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index cebcbf152d4..71a14f89f89 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -278,7 +278,7 @@ void __init numa_init_array(void)
int rr, i;
rr = first_node(node_online_map);
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
if (early_cpu_to_node(i) != NUMA_NO_NODE)
continue;
numa_set_node(i, rr);
@@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
memnodemap[0] = 0;
node_set_online(0);
node_set(0, node_possible_map);
- for (i = 0; i < NR_CPUS; i++)
+ for (i = 0; i < nr_cpu_ids; i++)
numa_set_node(i, 0);
e820_register_active_regions(0, start_pfn, last_pfn);
setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e89d24815f2..4cf30dee816 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -555,10 +555,12 @@ repeat:
if (!pte_val(old_pte)) {
if (!primary)
return 0;
- WARN(1, KERN_WARNING "CPA: called for zero pte. "
- "vaddr = %lx cpa->vaddr = %lx\n", address,
- *cpa->vaddr);
- return -EINVAL;
+
+ /*
+ * Special error value returned, indicating that the mapping
+ * did not exist at this address.
+ */
+ return -EFAULT;
}
if (level == PG_LEVEL_4K) {
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 85cbd3cd372..3be399013de 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -333,11 +333,20 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
req_type & _PAGE_CACHE_MASK);
}
- is_range_ram = pagerange_is_ram(start, end);
- if (is_range_ram == 1)
- return reserve_ram_pages_type(start, end, req_type, new_type);
- else if (is_range_ram < 0)
- return -EINVAL;
+ /*
+ * For legacy reasons, some parts of the physical address range in the
+ * legacy 1MB region is treated as non-RAM (even when listed as RAM in
+ * the e820 tables). So we will track the memory attributes of this
+ * legacy 1MB region using the linear memtype_list always.
+ */
+ if (end >= ISA_END_ADDRESS) {
+ is_range_ram = pagerange_is_ram(start, end);
+ if (is_range_ram == 1)
+ return reserve_ram_pages_type(start, end, req_type,
+ new_type);
+ else if (is_range_ram < 0)
+ return -EINVAL;
+ }
new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
if (!new)
@@ -437,11 +446,19 @@ int free_memtype(u64 start, u64 end)
if (is_ISA_range(start, end - 1))
return 0;
- is_range_ram = pagerange_is_ram(start, end);
- if (is_range_ram == 1)
- return free_ram_pages_type(start, end);
- else if (is_range_ram < 0)
- return -EINVAL;
+ /*
+ * For legacy reasons, some parts of the physical address range in the
+ * legacy 1MB region is treated as non-RAM (even when listed as RAM in
+ * the e820 tables). So we will track the memory attributes of this
+ * legacy 1MB region using the linear memtype_list always.
+ */
+ if (end >= ISA_END_ADDRESS) {
+ is_range_ram = pagerange_is_ram(start, end);
+ if (is_range_ram == 1)
+ return free_ram_pages_type(start, end);
+ else if (is_range_ram < 0)
+ return -EINVAL;
+ }
spin_lock(&memtype_lock);
list_for_each_entry(entry, &memtype_list, nd) {
@@ -505,6 +522,35 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
}
#endif /* CONFIG_STRICT_DEVMEM */
+/*
+ * Change the memory type for the physial address range in kernel identity
+ * mapping space if that range is a part of identity map.
+ */
+static int kernel_map_sync_memtype(u64 base, unsigned long size,
+ unsigned long flags)
+{
+ unsigned long id_sz;
+ int ret;
+
+ if (!pat_enabled || base >= __pa(high_memory))
+ return 0;
+
+ id_sz = (__pa(high_memory) < base + size) ?
+ __pa(high_memory) - base :
+ size;
+
+ ret = ioremap_change_attr((unsigned long)__va(base), id_sz, flags);
+ /*
+ * -EFAULT return means that the addr was not valid and did not have
+ * any identity mapping. That case is a success for
+ * kernel_map_sync_memtype.
+ */
+ if (ret == -EFAULT)
+ ret = 0;
+
+ return ret;
+}
+
int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t *vma_prot)
{
@@ -555,9 +601,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
if (retval < 0)
return 0;
- if (((pfn < max_low_pfn_mapped) ||
- (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) &&
- ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
+ if (kernel_map_sync_memtype(offset, size, flags)) {
free_memtype(offset, offset + size);
printk(KERN_INFO
"%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
@@ -601,12 +645,13 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
* Reserved non RAM regions only and after successful reserve_memtype,
* this func also keeps identity mapping (if any) in sync with this new prot.
*/
-static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot)
+static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
+ int strict_prot)
{
int is_ram = 0;
- int id_sz, ret;
+ int ret;
unsigned long flags;
- unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
+ unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
is_ram = pagerange_is_ram(paddr, paddr + size);
@@ -625,26 +670,27 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot)
return ret;
if (flags != want_flags) {
- free_memtype(paddr, paddr + size);
- printk(KERN_ERR
- "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n",
- current->comm, current->pid,
- cattr_name(want_flags),
- (unsigned long long)paddr,
- (unsigned long long)(paddr + size),
- cattr_name(flags));
- return -EINVAL;
+ if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) {
+ free_memtype(paddr, paddr + size);
+ printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
+ " for %Lx-%Lx, got %s\n",
+ current->comm, current->pid,
+ cattr_name(want_flags),
+ (unsigned long long)paddr,
+ (unsigned long long)(paddr + size),
+ cattr_name(flags));
+ return -EINVAL;
+ }
+ /*
+ * We allow returning different type than the one requested in
+ * non strict case.
+ */
+ *vma_prot = __pgprot((pgprot_val(*vma_prot) &
+ (~_PAGE_CACHE_MASK)) |
+ flags);
}
- /* Need to keep identity mapping in sync */
- if (paddr >= __pa(high_memory))
- return 0;
-
- id_sz = (__pa(high_memory) < paddr + size) ?
- __pa(high_memory) - paddr :
- size;
-
- if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
+ if (kernel_map_sync_memtype(paddr, size, flags)) {
free_memtype(paddr, paddr + size);
printk(KERN_ERR
"%s:%d reserve_pfn_range ioremap_change_attr failed %s "
@@ -689,6 +735,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
unsigned long vma_start = vma->vm_start;
unsigned long vma_end = vma->vm_end;
unsigned long vma_size = vma_end - vma_start;
+ pgprot_t pgprot;
if (!pat_enabled)
return 0;
@@ -702,7 +749,8 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
WARN_ON_ONCE(1);
return -EINVAL;
}
- return reserve_pfn_range(paddr, vma_size, __pgprot(prot));
+ pgprot = __pgprot(prot);
+ return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
}
/* reserve entire vma page by page, using pfn and prot from pte */
@@ -710,7 +758,8 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
continue;
- retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot));
+ pgprot = __pgprot(prot);
+ retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1);
if (retval)
goto cleanup_ret;
}
@@ -741,7 +790,7 @@ cleanup_ret:
* Note that this function can be called with caller trying to map only a
* subrange/page inside the vma.
*/
-int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
+int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long size)
{
int retval = 0;
@@ -758,14 +807,14 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
if (is_linear_pfn_mapping(vma)) {
/* reserve the whole chunk starting from vm_pgoff */
paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
- return reserve_pfn_range(paddr, vma_size, prot);
+ return reserve_pfn_range(paddr, vma_size, prot, 0);
}
/* reserve page by page using pfn and size */
base_paddr = (resource_size_t)pfn << PAGE_SHIFT;
for (i = 0; i < size; i += PAGE_SIZE) {
paddr = base_paddr + i;
- retval = reserve_pfn_range(paddr, PAGE_SIZE, prot);
+ retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0);
if (retval)
goto cleanup_ret;
}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 51c0a2fc14f..09737c8af07 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
if (!node_online(i))
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
int node = early_cpu_to_node(i);
if (node == NUMA_NO_NODE)