aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-12-31 08:31:57 +0100
committerIngo Molnar <mingo@elte.hu>2008-12-31 08:31:57 +0100
commita9de18eb761f7c1c860964b2e5addc1a35c7e861 (patch)
tree886e75fdfd09690cd262ca69cb7f5d1d42b48602 /arch/x86/mm
parentb2aaf8f74cdc84a9182f6cabf198b7763bcb9d40 (diff)
parent6a94cb73064c952255336cc57731904174b2c58f (diff)
Merge branch 'linus' into stackprotector
Conflicts: arch/x86/include/asm/pda.h kernel/fork.c
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile5
-rw-r--r--arch/x86/mm/fault.c60
-rw-r--r--arch/x86/mm/gup.c2
-rw-r--r--arch/x86/mm/highmem_32.c1
-rw-r--r--arch/x86/mm/init_32.c36
-rw-r--r--arch/x86/mm/init_64.c77
-rw-r--r--arch/x86/mm/iomap_32.c59
-rw-r--r--arch/x86/mm/ioremap.c29
-rw-r--r--arch/x86/mm/memtest.c7
-rw-r--r--arch/x86/mm/mmio-mod.c87
-rw-r--r--arch/x86/mm/numa_32.c35
-rw-r--r--arch/x86/mm/pageattr.c13
-rw-r--r--arch/x86/mm/pat.c240
-rw-r--r--arch/x86/mm/pf_in.c121
-rw-r--r--arch/x86/mm/testmmiotrace.c4
15 files changed, 575 insertions, 201 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 59f89b434b4..d8cc96a2738 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,16 +1,15 @@
obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
pat.o pgtable.o gup.o
-obj-$(CONFIG_X86_32) += pgtable_32.o
+obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
-obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
-mmiotrace-y := pf_in.o mmio-mod.o
+mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
obj-$(CONFIG_NUMA) += numa_$(BITS).o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d18ea136d8a..4c056b5d6a9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -54,7 +54,7 @@
static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
-#ifdef CONFIG_MMIOTRACE_HOOKS
+#ifdef CONFIG_MMIOTRACE
if (unlikely(is_kmmio_active()))
if (kmmio_handler(regs, addr) == 1)
return -1;
@@ -394,7 +394,7 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
if (pte && pte_present(*pte) && !pte_exec(*pte))
printk(KERN_CRIT "kernel tried to execute "
"NX-protected page - exploit attempt? "
- "(uid: %d)\n", current->uid);
+ "(uid: %d)\n", current_uid());
}
#endif
@@ -414,6 +414,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
unsigned long error_code)
{
unsigned long flags = oops_begin();
+ int sig = SIGKILL;
struct task_struct *tsk;
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
@@ -424,8 +425,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
tsk->thread.trap_no = 14;
tsk->thread.error_code = error_code;
if (__die("Bad pagetable", regs, error_code))
- regs = NULL;
- oops_end(flags, regs, SIGKILL);
+ sig = 0;
+ oops_end(flags, regs, sig);
}
#endif
@@ -593,6 +594,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
#ifdef CONFIG_X86_64
unsigned long flags;
+ int sig;
#endif
tsk = current;
@@ -643,24 +645,23 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
}
-#ifdef CONFIG_X86_32
- /* It's safe to allow irq's after cr2 has been saved and the vmalloc
- fault has been handled. */
- if (regs->flags & (X86_EFLAGS_IF | X86_VM_MASK))
- local_irq_enable();
-
/*
- * If we're in an interrupt, have no user context or are running in an
- * atomic region then we must not take the fault.
+ * It's safe to allow irq's after cr2 has been saved and the
+ * vmalloc fault has been handled.
+ *
+ * User-mode registers count as a user access even for any
+ * potential system fault or CPU buglet.
*/
- if (in_atomic() || !mm)
- goto bad_area_nosemaphore;
-#else /* CONFIG_X86_64 */
- if (likely(regs->flags & X86_EFLAGS_IF))
+ if (user_mode_vm(regs)) {
+ local_irq_enable();
+ error_code |= PF_USER;
+ } else if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
+#ifdef CONFIG_X86_64
if (unlikely(error_code & PF_RSVD))
pgtable_bad(address, regs, error_code);
+#endif
/*
* If we're in an interrupt, have no user context or are running in an
@@ -669,15 +670,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (unlikely(in_atomic() || !mm))
goto bad_area_nosemaphore;
- /*
- * User-mode registers count as a user access even for any
- * potential system fault or CPU buglet.
- */
- if (user_mode_vm(regs))
- error_code |= PF_USER;
again:
-#endif
- /* When running in the kernel we expect faults to occur only to
+ /*
+ * When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
* erroneous fault occurring in a code path which already holds mmap_sem
@@ -740,9 +735,6 @@ good_area:
goto bad_area;
}
-#ifdef CONFIG_X86_32
-survive:
-#endif
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
@@ -866,11 +858,12 @@ no_context:
bust_spinlocks(0);
do_exit(SIGKILL);
#else
+ sig = SIGKILL;
if (__die("Oops", regs, error_code))
- regs = NULL;
+ sig = 0;
/* Executive summary in case the body of the oops scrolled away */
printk(KERN_EMERG "CR2: %016lx\n", address);
- oops_end(flags, regs, SIGKILL);
+ oops_end(flags, regs, sig);
#endif
/*
@@ -881,12 +874,11 @@ out_of_memory:
up_read(&mm->mmap_sem);
if (is_global_init(tsk)) {
yield();
-#ifdef CONFIG_X86_32
- down_read(&mm->mmap_sem);
- goto survive;
-#else
+ /*
+ * Re-lookup the vma - in theory the vma tree might
+ * have changed:
+ */
goto again;
-#endif
}
printk("VM: killing process %s\n", tsk->comm);
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 4ba373c5b8c..be54176e9eb 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -233,7 +233,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
- start, len)))
+ (void __user *)start, len)))
goto slow_irqon;
/*
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 165c871ba9a..bcc079c282d 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -137,6 +137,7 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
return (void*) vaddr;
}
+EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
struct page *kmap_atomic_to_page(void *ptr)
{
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8396868e82c..8655b5bb096 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -21,6 +21,7 @@
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
+#include <linux/pci.h>
#include <linux/pfn.h>
#include <linux/poison.h>
#include <linux/bootmem.h>
@@ -67,7 +68,7 @@ static unsigned long __meminitdata table_top;
static int __initdata after_init_bootmem;
-static __init void *alloc_low_page(unsigned long *phys)
+static __init void *alloc_low_page(void)
{
unsigned long pfn = table_end++;
void *adr;
@@ -77,7 +78,6 @@ static __init void *alloc_low_page(unsigned long *phys)
adr = __va(pfn * PAGE_SIZE);
memset(adr, 0, PAGE_SIZE);
- *phys = pfn * PAGE_SIZE;
return adr;
}
@@ -92,16 +92,17 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
pmd_t *pmd_table;
#ifdef CONFIG_X86_PAE
- unsigned long phys;
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
if (after_init_bootmem)
pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
else
- pmd_table = (pmd_t *)alloc_low_page(&phys);
+ pmd_table = (pmd_t *)alloc_low_page();
paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
pud = pud_offset(pgd, 0);
BUG_ON(pmd_table != pmd_offset(pud, 0));
+
+ return pmd_table;
}
#endif
pud = pud_offset(pgd, 0);
@@ -126,10 +127,8 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
if (!page_table)
page_table =
(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
- } else {
- unsigned long phys;
- page_table = (pte_t *)alloc_low_page(&phys);
- }
+ } else
+ page_table = (pte_t *)alloc_low_page();
paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
@@ -334,7 +333,6 @@ int devmem_is_allowed(unsigned long pagenr)
return 0;
}
-#ifdef CONFIG_HIGHMEM
pte_t *kmap_pte;
pgprot_t kmap_prot;
@@ -357,6 +355,7 @@ static void __init kmap_init(void)
kmap_prot = PAGE_KERNEL;
}
+#ifdef CONFIG_HIGHMEM
static void __init permanent_kmaps_init(pgd_t *pgd_base)
{
unsigned long vaddr;
@@ -436,7 +435,6 @@ static void __init set_highmem_pages_init(void)
#endif /* !CONFIG_NUMA */
#else
-# define kmap_init() do { } while (0)
# define permanent_kmaps_init(pgd_base) do { } while (0)
# define set_highmem_pages_init() do { } while (0)
#endif /* CONFIG_HIGHMEM */
@@ -970,7 +968,7 @@ void __init mem_init(void)
int codesize, reservedpages, datasize, initsize;
int tmp;
- start_periodic_check_for_corruption();
+ pci_iommu_alloc();
#ifdef CONFIG_FLATMEM
BUG_ON(!mem_map);
@@ -1041,11 +1039,25 @@ void __init mem_init(void)
(unsigned long)&_text, (unsigned long)&_etext,
((unsigned long)&_etext - (unsigned long)&_text) >> 10);
+ /*
+ * Check boundaries twice: Some fundamental inconsistencies can
+ * be detected at build time already.
+ */
+#define __FIXADDR_TOP (-PAGE_SIZE)
+#ifdef CONFIG_HIGHMEM
+ BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
+ BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
+#endif
+#define high_memory (-128UL << 20)
+ BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
+#undef high_memory
+#undef __FIXADDR_TOP
+
#ifdef CONFIG_HIGHMEM
BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
BUG_ON(VMALLOC_END > PKMAP_BASE);
#endif
- BUG_ON(VMALLOC_START > VMALLOC_END);
+ BUG_ON(VMALLOC_START >= VMALLOC_END);
BUG_ON((unsigned long)high_memory > VMALLOC_START);
if (boot_cpu_data.wp_works_ok < 0)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b8e461d4941..9f7a0d24d42 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -350,8 +350,10 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
* pagetable pages as RO. So assume someone who pre-setup
* these mappings are more intelligent.
*/
- if (pte_val(*pte))
+ if (pte_val(*pte)) {
+ pages++;
continue;
+ }
if (0)
printk(" pte=%p addr=%lx pte=%016lx\n",
@@ -418,8 +420,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
* not differ with respect to page frame and
* attributes.
*/
- if (page_size_mask & (1 << PG_LEVEL_2M))
+ if (page_size_mask & (1 << PG_LEVEL_2M)) {
+ pages++;
continue;
+ }
new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
}
@@ -499,8 +503,10 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
* not differ with respect to page frame and
* attributes.
*/
- if (page_size_mask & (1 << PG_LEVEL_1G))
+ if (page_size_mask & (1 << PG_LEVEL_1G)) {
+ pages++;
continue;
+ }
prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
}
@@ -665,12 +671,13 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
unsigned long last_map_addr = 0;
unsigned long page_size_mask = 0;
unsigned long start_pfn, end_pfn;
+ unsigned long pos;
struct map_range mr[NR_RANGE_MR];
int nr_range, i;
int use_pse, use_gbpages;
- printk(KERN_INFO "init_memory_mapping\n");
+ printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
/*
* Find space for the kernel direct mapping tables.
@@ -704,35 +711,50 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
/* head if not big page alignment ?*/
start_pfn = start >> PAGE_SHIFT;
- end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT)
+ pos = start_pfn << PAGE_SHIFT;
+ end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
<< (PMD_SHIFT - PAGE_SHIFT);
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+ pos = end_pfn << PAGE_SHIFT;
+ }
/* big page (2M) range*/
- start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
+ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
<< (PMD_SHIFT - PAGE_SHIFT);
- end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT)
+ end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
<< (PUD_SHIFT - PAGE_SHIFT);
- if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)))
- end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT));
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
- page_size_mask & (1<<PG_LEVEL_2M));
+ if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
+ end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+ page_size_mask & (1<<PG_LEVEL_2M));
+ pos = end_pfn << PAGE_SHIFT;
+ }
/* big page (1G) range */
- start_pfn = end_pfn;
- end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+ start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
+ << (PUD_SHIFT - PAGE_SHIFT);
+ end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
page_size_mask &
((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
+ pos = end_pfn << PAGE_SHIFT;
+ }
/* tail is not big page (1G) alignment */
- start_pfn = end_pfn;
- end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
- page_size_mask & (1<<PG_LEVEL_2M));
+ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
+ << (PMD_SHIFT - PAGE_SHIFT);
+ end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+ page_size_mask & (1<<PG_LEVEL_2M));
+ pos = end_pfn << PAGE_SHIFT;
+ }
/* tail is not big page (2M) alignment */
- start_pfn = end_pfn;
+ start_pfn = pos>>PAGE_SHIFT;
end_pfn = end>>PAGE_SHIFT;
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
@@ -831,12 +853,12 @@ int arch_add_memory(int nid, u64 start, u64 size)
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
- last_mapped_pfn = init_memory_mapping(start, start + size-1);
+ last_mapped_pfn = init_memory_mapping(start, start + size);
if (last_mapped_pfn > max_pfn_mapped)
max_pfn_mapped = last_mapped_pfn;
ret = __add_pages(zone, start_pfn, nr_pages);
- WARN_ON(1);
+ WARN_ON_ONCE(ret);
return ret;
}
@@ -878,8 +900,7 @@ static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
void __init mem_init(void)
{
long codesize, reservedpages, datasize, initsize;
-
- start_periodic_check_for_corruption();
+ unsigned long absent_pages;
pci_iommu_alloc();
@@ -893,8 +914,9 @@ void __init mem_init(void)
#else
totalram_pages = free_all_bootmem();
#endif
- reservedpages = max_pfn - totalram_pages -
- absent_pages_in_range(0, max_pfn);
+
+ absent_pages = absent_pages_in_range(0, max_pfn);
+ reservedpages = max_pfn - totalram_pages - absent_pages;
after_bootmem = 1;
codesize = (unsigned long) &_etext - (unsigned long) &_text;
@@ -911,10 +933,11 @@ void __init mem_init(void)
VSYSCALL_END - VSYSCALL_START);
printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
- "%ldk reserved, %ldk data, %ldk init)\n",
+ "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
max_pfn << (PAGE_SHIFT-10),
codesize >> 10,
+ absent_pages << (PAGE_SHIFT-10),
reservedpages << (PAGE_SHIFT-10),
datasize >> 10,
initsize >> 10);
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
new file mode 100644
index 00000000000..d0151d8ce45
--- /dev/null
+++ b/arch/x86/mm/iomap_32.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright © 2008 Ingo Molnar
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <asm/iomap.h>
+#include <linux/module.h>
+
+/* Map 'pfn' using fixed map 'type' and protections 'prot'
+ */
+void *
+iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
+{
+ enum fixed_addresses idx;
+ unsigned long vaddr;
+
+ pagefault_disable();
+
+ idx = type + KM_TYPE_NR*smp_processor_id();
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
+ arch_flush_lazy_mmu_mode();
+
+ return (void*) vaddr;
+}
+EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
+
+void
+iounmap_atomic(void *kvaddr, enum km_type type)
+{
+ unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+ enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+
+ /*
+ * Force other mappings to Oops if they'll try to access this pte
+ * without first remap it. Keeping stale mappings around is a bad idea
+ * also, in case the page changes cacheability attributes or becomes
+ * a protected page in a hypervisor.
+ */
+ if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+ kpte_clear_flush(kmap_pte-idx, vaddr);
+
+ arch_flush_lazy_mmu_mode();
+ pagefault_enable();
+}
+EXPORT_SYMBOL_GPL(iounmap_atomic);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index e4c43ec71b2..bd85d42819e 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -220,6 +220,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
return (__force void __iomem *)phys_to_virt(phys_addr);
/*
+ * Check if the request spans more than any BAR in the iomem resource
+ * tree.
+ */
+ WARN_ONCE(iomem_map_sanity_check(phys_addr, size),
+ KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
+
+ /*
* Don't allow anybody to remap normal RAM that we're using..
*/
for (pfn = phys_addr >> PAGE_SHIFT;
@@ -381,7 +388,7 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
unsigned long size)
{
unsigned long flags;
- void *ret;
+ void __iomem *ret;
int err;
/*
@@ -393,11 +400,11 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
if (err < 0)
return NULL;
- ret = (void *) __ioremap_caller(phys_addr, size, flags,
- __builtin_return_address(0));
+ ret = __ioremap_caller(phys_addr, size, flags,
+ __builtin_return_address(0));
free_memtype(phys_addr, phys_addr + size);
- return (void __iomem *)ret;
+ return ret;
}
void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
@@ -616,7 +623,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
__early_set_fixmap(idx, 0, __pgprot(0));
}
-static void *prev_map[FIX_BTMAPS_SLOTS] __initdata;
+static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
static int __init check_early_ioremap_leak(void)
{
@@ -639,7 +646,7 @@ static int __init check_early_ioremap_leak(void)
}
late_initcall(check_early_ioremap_leak);
-static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
+static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
{
unsigned long offset, last_addr;
unsigned int nrpages;
@@ -707,23 +714,23 @@ static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size,
if (early_ioremap_debug)
printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
- prev_map[slot] = (void *) (offset + fix_to_virt(idx0));
+ prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0));
return prev_map[slot];
}
/* Remap an IO device */
-void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
+void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size)
{
return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
}
/* Remap memory */
-void __init *early_memremap(unsigned long phys_addr, unsigned long size)
+void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size)
{
return __early_ioremap(phys_addr, size, PAGE_KERNEL);
}
-void __init early_iounmap(void *addr, unsigned long size)
+void __init early_iounmap(void __iomem *addr, unsigned long size)
{
unsigned long virt_addr;
unsigned long offset;
@@ -773,7 +780,7 @@ void __init early_iounmap(void *addr, unsigned long size)
--idx;
--nrpages;
}
- prev_map[slot] = 0;
+ prev_map[slot] = NULL;
}
void __this_fixmap_does_not_exist(void)
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 672e17f8262..9cab18b0b85 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -61,9 +61,9 @@ static void __init memtest(unsigned long start_phys, unsigned long size,
last_bad += incr;
} else {
if (start_bad) {
- printk(KERN_CONT "\n %010lx bad mem addr %010lx - %010lx reserved",
+ printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved",
val, start_bad, last_bad + incr);
- reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
+ reserve_early(start_bad, last_bad + incr, "BAD RAM");
}
start_bad = last_bad = start_phys_aligned;
}
@@ -72,9 +72,8 @@ static void __init memtest(unsigned long start_phys, unsigned long size,
if (start_bad) {
printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved",
val, start_bad, last_bad + incr);
- reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
+ reserve_early(start_bad, last_bad + incr, "BAD RAM");
}
-
}
/* default is disabled */
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 635b50e8558..2c4baa88f2c 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -56,13 +56,6 @@ struct remap_trace {
static DEFINE_PER_CPU(struct trap_reason, pf_reason);
static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace);
-#if 0 /* XXX: no way gather this info anymore */
-/* Access to this is not per-cpu. */
-static DEFINE_PER_CPU(atomic_t, dropped);
-#endif
-
-static struct dentry *marker_file;
-
static DEFINE_MUTEX(mmiotrace_mutex);
static DEFINE_SPINLOCK(trace_lock);
static atomic_t mmiotrace_enabled;
@@ -75,7 +68,7 @@ static LIST_HEAD(trace_list); /* struct remap_trace */
* and trace_lock.
* - Routines depending on is_enabled() must take trace_lock.
* - trace_list users must hold trace_lock.
- * - is_enabled() guarantees that mmio_trace_record is allowed.
+ * - is_enabled() guarantees that mmio_trace_{rw,mapping} are allowed.
* - pre/post callbacks assume the effect of is_enabled() being true.
*/
@@ -97,44 +90,6 @@ static bool is_enabled(void)
return atomic_read(&mmiotrace_enabled);
}
-#if 0 /* XXX: needs rewrite */
-/*
- * Write callback for the debugfs entry:
- * Read a marker and write it to the mmio trace log
- */
-static ssize_t write_marker(struct file *file, const char __user *buffer,
- size_t count, loff_t *ppos)
-{
- char *event = NULL;
- struct mm_io_header *headp;
- ssize_t len = (count > 65535) ? 65535 : count;
-
- event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
- if (!event)
- return -ENOMEM;
-
- headp = (struct mm_io_header *)event;
- headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
- headp->data_len = len;
-
- if (copy_from_user(event + sizeof(*headp), buffer, len)) {
- kfree(event);
- return -EFAULT;
- }
-
- spin_lock_irq(&trace_lock);
-#if 0 /* XXX: convert this to use tracing */
- if (is_enabled())
- relay_write(chan, event, sizeof(*headp) + len);
- else
-#endif
- len = -EINVAL;
- spin_unlock_irq(&trace_lock);
- kfree(event);
- return len;
-}
-#endif
-
static void print_pte(unsigned long address)
{
unsigned int level;
@@ -307,8 +262,10 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size,
map.map_id = trace->id;
spin_lock_irq(&trace_lock);
- if (!is_enabled())
+ if (!is_enabled()) {
+ kfree(trace);
goto not_enabled;
+ }
mmio_trace_mapping(&map);
list_add_tail(&trace->list, &trace_list);
@@ -377,6 +334,23 @@ void mmiotrace_iounmap(volatile void __iomem *addr)
iounmap_trace_core(addr);
}
+int mmiotrace_printk(const char *fmt, ...)
+{
+ int ret = 0;
+ va_list args;
+ unsigned long flags;
+ va_start(args, fmt);
+
+ spin_lock_irqsave(&trace_lock, flags);
+ if (is_enabled())
+ ret = mmio_trace_printk(fmt, args);
+ spin_unlock_irqrestore(&trace_lock, flags);
+
+ va_end(args);
+ return ret;
+}
+EXPORT_SYMBOL(mmiotrace_printk);
+
static void clear_trace_list(void)
{
struct remap_trace *trace;
@@ -462,26 +436,12 @@ static void leave_uniprocessor(void)
}
#endif
-#if 0 /* XXX: out of order */
-static struct file_operations fops_marker = {
- .owner = THIS_MODULE,
- .write = write_marker
-};
-#endif
-
void enable_mmiotrace(void)
{
mutex_lock(&mmiotrace_mutex);
if (is_enabled())
goto out;
-#if 0 /* XXX: tracing does not support text entries */
- marker_file = debugfs_create_file("marker", 0660, dir, NULL,
- &fops_marker);
- if (!marker_file)
- pr_err(NAME "marker file creation failed.\n");
-#endif
-
if (nommiotrace)
pr_info(NAME "MMIO tracing disabled.\n");
enter_uniprocessor();
@@ -506,11 +466,6 @@ void disable_mmiotrace(void)
clear_trace_list(); /* guarantees: no more kmmio callbacks */
leave_uniprocessor();
- if (marker_file) {
- debugfs_remove(marker_file);
- marker_file = NULL;
- }
-
pr_info(NAME "disabled.\n");
out:
mutex_unlock(&mmiotrace_mutex);
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 847c164725f..8518c678d83 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -222,6 +222,41 @@ static void __init remap_numa_kva(void)
}
}
+#ifdef CONFIG_HIBERNATION
+/**
+ * resume_map_numa_kva - add KVA mapping to the temporary page tables created
+ * during resume from hibernation
+ * @pgd_base - temporary resume page directory
+ */
+void resume_map_numa_kva(pgd_t *pgd_base)
+{
+ int node;
+
+ for_each_online_node(node) {
+ unsigned long start_va, start_pfn, size, pfn;
+
+ start_va = (unsigned long)node_remap_start_vaddr[node];
+ start_pfn = node_remap_start_pfn[node];
+ size = node_remap_size[node];
+
+ printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node);
+
+ for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
+ unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
+ pgd_t *pgd = pgd_base + pgd_index(vaddr);
+ pud_t *pud = pud_offset(pgd, vaddr);
+ pmd_t *pmd = pmd_offset(pud, vaddr);
+
+ set_pmd(pmd, pfn_pmd(start_pfn + pfn,
+ PAGE_KERNEL_LARGE_EXEC));
+
+ printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
+ __FUNCTION__, vaddr, start_pfn + pfn);
+ }
+ }
+}
+#endif
+
static unsigned long calculate_numa_remap_pages(void)
{
int nid;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a9ec89c3fbc..e89d24815f2 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -65,23 +65,22 @@ static void split_page_count(int level)
direct_pages_count[level - 1] += PTRS_PER_PTE;
}
-int arch_report_meminfo(char *page)
+void arch_report_meminfo(struct seq_file *m)
{
- int n = sprintf(page, "DirectMap4k: %8lu kB\n",
+ seq_printf(m, "DirectMap4k: %8lu kB\n",
direct_pages_count[PG_LEVEL_4K] << 2);
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
- n += sprintf(page + n, "DirectMap2M: %8lu kB\n",
+ seq_printf(m, "DirectMap2M: %8lu kB\n",
direct_pages_count[PG_LEVEL_2M] << 11);
#else
- n += sprintf(page + n, "DirectMap4M: %8lu kB\n",
+ seq_printf(m, "DirectMap4M: %8lu kB\n",
direct_pages_count[PG_LEVEL_2M] << 12);
#endif
#ifdef CONFIG_X86_64
if (direct_gbpages)
- n += sprintf(page + n, "DirectMap1G: %8lu kB\n",
+ seq_printf(m, "DirectMap1G: %8lu kB\n",
direct_pages_count[PG_LEVEL_1G] << 20);
#endif
- return n;
}
#else
static inline void split_page_count(int level) { }
@@ -792,6 +791,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
/* Must avoid aliasing mappings in the highmem code */
kmap_flush_unused();
+ vm_unmap_aliases();
+
cpa.vaddr = addr;
cpa.numpages = numpages;
cpa.mask_set = mask_set;
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 738fd0f2495..85cbd3cd372 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -481,12 +481,16 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
return 1;
}
#else
+/* This check is needed to avoid cache aliasing when PAT is enabled */
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
u64 from = ((u64)pfn) << PAGE_SHIFT;
u64 to = from + size;
u64 cursor = from;
+ if (!pat_enabled)
+ return 1;
+
while (cursor < to) {
if (!devmem_is_allowed(pfn)) {
printk(KERN_INFO
@@ -592,6 +596,242 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
free_memtype(addr, addr + size);
}
+/*
+ * Internal interface to reserve a range of physical memory with prot.
+ * Reserved non RAM regions only and after successful reserve_memtype,
+ * this func also keeps identity mapping (if any) in sync with this new prot.
+ */
+static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot)
+{
+ int is_ram = 0;
+ int id_sz, ret;
+ unsigned long flags;
+ unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
+
+ is_ram = pagerange_is_ram(paddr, paddr + size);
+
+ if (is_ram != 0) {
+ /*
+ * For mapping RAM pages, drivers need to call
+ * set_memory_[uc|wc|wb] directly, for reserve and free, before
+ * setting up the PTE.
+ */
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+
+ ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
+ if (ret)
+ return ret;
+
+ if (flags != want_flags) {
+ free_memtype(paddr, paddr + size);
+ printk(KERN_ERR
+ "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n",
+ current->comm, current->pid,
+ cattr_name(want_flags),
+ (unsigned long long)paddr,
+ (unsigned long long)(paddr + size),
+ cattr_name(flags));
+ return -EINVAL;
+ }
+
+ /* Need to keep identity mapping in sync */
+ if (paddr >= __pa(high_memory))
+ return 0;
+
+ id_sz = (__pa(high_memory) < paddr + size) ?
+ __pa(high_memory) - paddr :
+ size;
+
+ if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
+ free_memtype(paddr, paddr + size);
+ printk(KERN_ERR
+ "%s:%d reserve_pfn_range ioremap_change_attr failed %s "
+ "for %Lx-%Lx\n",
+ current->comm, current->pid,
+ cattr_name(flags),
+ (unsigned long long)paddr,
+ (unsigned long long)(paddr + size));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * Internal interface to free a range of physical memory.
+ * Frees non RAM regions only.
+ */
+static void free_pfn_range(u64 paddr, unsigned long size)
+{
+ int is_ram;
+
+ is_ram = pagerange_is_ram(paddr, paddr + size);
+ if (is_ram == 0)
+ free_memtype(paddr, paddr + size);
+}
+
+/*
+ * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
+ * copied through copy_page_range().
+ *
+ * If the vma has a linear pfn mapping for the entire range, we get the prot
+ * from pte and reserve the entire vma range with single reserve_pfn_range call.
+ * Otherwise, we reserve the entire vma range, my ging through the PTEs page
+ * by page to get physical address and protection.
+ */
+int track_pfn_vma_copy(struct vm_area_struct *vma)
+{
+ int retval = 0;
+ unsigned long i, j;
+ resource_size_t paddr;
+ unsigned long prot;
+ unsigned long vma_start = vma->vm_start;
+ unsigned long vma_end = vma->vm_end;
+ unsigned long vma_size = vma_end - vma_start;
+
+ if (!pat_enabled)
+ return 0;
+
+ if (is_linear_pfn_mapping(vma)) {
+ /*
+ * reserve the whole chunk covered by vma. We need the
+ * starting address and protection from pte.
+ */
+ if (follow_phys(vma, vma_start, 0, &prot, &paddr)) {
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+ return reserve_pfn_range(paddr, vma_size, __pgprot(prot));
+ }
+
+ /* reserve entire vma page by page, using pfn and prot from pte */
+ for (i = 0; i < vma_size; i += PAGE_SIZE) {
+ if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
+ continue;
+
+ retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot));
+ if (retval)
+ goto cleanup_ret;
+ }
+ return 0;
+
+cleanup_ret:
+ /* Reserve error: Cleanup partial reservation and return error */
+ for (j = 0; j < i; j += PAGE_SIZE) {
+ if (follow_phys(vma, vma_start + j, 0, &prot, &paddr))
+ continue;
+
+ free_pfn_range(paddr, PAGE_SIZE);
+ }
+
+ return retval;
+}
+
+/*
+ * track_pfn_vma_new is called when a _new_ pfn mapping is being established
+ * for physical range indicated by pfn and size.
+ *
+ * prot is passed in as a parameter for the new mapping. If the vma has a
+ * linear pfn mapping for the entire range reserve the entire vma range with
+ * single reserve_pfn_range call.
+ * Otherwise, we look t the pfn and size and reserve only the specified range
+ * page by page.
+ *
+ * Note that this function can be called with caller trying to map only a
+ * subrange/page inside the vma.
+ */
+int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
+ unsigned long pfn, unsigned long size)
+{
+ int retval = 0;
+ unsigned long i, j;
+ resource_size_t base_paddr;
+ resource_size_t paddr;
+ unsigned long vma_start = vma->vm_start;
+ unsigned long vma_end = vma->vm_end;
+ unsigned long vma_size = vma_end - vma_start;
+
+ if (!pat_enabled)
+ return 0;
+
+ if (is_linear_pfn_mapping(vma)) {
+ /* reserve the whole chunk starting from vm_pgoff */
+ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
+ return reserve_pfn_range(paddr, vma_size, prot);
+ }
+
+ /* reserve page by page using pfn and size */
+ base_paddr = (resource_size_t)pfn << PAGE_SHIFT;
+ for (i = 0; i < size; i += PAGE_SIZE) {
+ paddr = base_paddr + i;
+ retval = reserve_pfn_range(paddr, PAGE_SIZE, prot);
+ if (retval)
+ goto cleanup_ret;
+ }
+ return 0;
+
+cleanup_ret:
+ /* Reserve error: Cleanup partial reservation and return error */
+ for (j = 0; j < i; j += PAGE_SIZE) {
+ paddr = base_paddr + j;
+ free_pfn_range(paddr, PAGE_SIZE);
+ }
+
+ return retval;
+}
+
+/*
+ * untrack_pfn_vma is called while unmapping a pfnmap for a region.
+ * untrack can be called for a specific region indicated by pfn and size or
+ * can be for the entire vma (in which case size can be zero).
+ */
+void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
+ unsigned long size)
+{
+ unsigned long i;
+ resource_size_t paddr;
+ unsigned long prot;
+ unsigned long vma_start = vma->vm_start;
+ unsigned long vma_end = vma->vm_end;
+ unsigned long vma_size = vma_end - vma_start;
+
+ if (!pat_enabled)
+ return;
+
+ if (is_linear_pfn_mapping(vma)) {
+ /* free the whole chunk starting from vm_pgoff */
+ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
+ free_pfn_range(paddr, vma_size);
+ return;
+ }
+
+ if (size != 0 && size != vma_size) {
+ /* free page by page, using pfn and size */
+ paddr = (resource_size_t)pfn << PAGE_SHIFT;
+ for (i = 0; i < size; i += PAGE_SIZE) {
+ paddr = paddr + i;
+ free_pfn_range(paddr, PAGE_SIZE);
+ }
+ } else {
+ /* free entire vma, page by page, using the pfn from pte */
+ for (i = 0; i < vma_size; i += PAGE_SIZE) {
+ if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
+ continue;
+
+ free_pfn_range(paddr, PAGE_SIZE);
+ }
+ }
+}
+
+pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+ if (pat_enabled)
+ return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC);
+ else
+ return pgprot_noncached(prot);
+}
+
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
/* get Nth element of the linked list */
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
index efa1911e20c..df3d5c861cd 100644
--- a/arch/x86/mm/pf_in.c
+++ b/arch/x86/mm/pf_in.c
@@ -79,25 +79,34 @@ static unsigned int mw32[] = { 0xC7 };
static unsigned int mw64[] = { 0x89, 0x8B };
#endif /* not __i386__ */
-static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged,
- int *rexr)
+struct prefix_bits {
+ unsigned shorted:1;
+ unsigned enlarged:1;
+ unsigned rexr:1;
+ unsigned rex:1;
+};
+
+static int skip_prefix(unsigned char *addr, struct prefix_bits *prf)
{
int i;
unsigned char *p = addr;
- *shorted = 0;
- *enlarged = 0;
- *rexr = 0;
+ prf->shorted = 0;
+ prf->enlarged = 0;
+ prf->rexr = 0;
+ prf->rex = 0;
restart:
for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) {
if (*p == prefix_codes[i]) {
if (*p == 0x66)
- *shorted = 1;
+ prf->shorted = 1;
#ifdef __amd64__
if ((*p & 0xf8) == 0x48)
- *enlarged = 1;
+ prf->enlarged = 1;
if ((*p & 0xf4) == 0x44)
- *rexr = 1;
+ prf->rexr = 1;
+ if ((*p & 0xf0) == 0x40)
+ prf->rex = 1;
#endif
p++;
goto restart;
@@ -135,12 +144,12 @@ enum reason_type get_ins_type(unsigned long ins_addr)
{
unsigned int opcode;
unsigned char *p;
- int shorted, enlarged, rexr;
+ struct prefix_bits prf;
int i;
enum reason_type rv = OTHERS;
p = (unsigned char *)ins_addr;
- p += skip_prefix(p, &shorted, &enlarged, &rexr);
+ p += skip_prefix(p, &prf);
p += get_opcode(p, &opcode);
CHECK_OP_TYPE(opcode, reg_rop, REG_READ);
@@ -156,10 +165,11 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr)
{
unsigned int opcode;
unsigned char *p;
- int i, shorted, enlarged, rexr;
+ struct prefix_bits prf;
+ int i;
p = (unsigned char *)ins_addr;
- p += skip_prefix(p, &shorted, &enlarged, &rexr);
+ p += skip_prefix(p, &prf);
p += get_opcode(p, &opcode);
for (i = 0; i < ARRAY_SIZE(rw8); i++)
@@ -168,7 +178,7 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr)
for (i = 0; i < ARRAY_SIZE(rw32); i++)
if (rw32[i] == opcode)
- return (shorted ? 2 : (enlarged ? 8 : 4));
+ return prf.shorted ? 2 : (prf.enlarged ? 8 : 4);
printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
return 0;
@@ -178,10 +188,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr)
{
unsigned int opcode;
unsigned char *p;
- int i, shorted, enlarged, rexr;
+ struct prefix_bits prf;
+ int i;
p = (unsigned char *)ins_addr;
- p += skip_prefix(p, &shorted, &enlarged, &rexr);
+ p += skip_prefix(p, &prf);
p += get_opcode(p, &opcode);
for (i = 0; i < ARRAY_SIZE(mw8); i++)
@@ -194,11 +205,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr)
for (i = 0; i < ARRAY_SIZE(mw32); i++)
if (mw32[i] == opcode)
- return shorted ? 2 : 4;
+ return prf.shorted ? 2 : 4;
for (i = 0; i < ARRAY_SIZE(mw64); i++)
if (mw64[i] == opcode)
- return shorted ? 2 : (enlarged ? 8 : 4);
+ return prf.shorted ? 2 : (prf.enlarged ? 8 : 4);
printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
return 0;
@@ -238,7 +249,7 @@ enum {
#endif
};
-static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
+static unsigned char *get_reg_w8(int no, int rex, struct pt_regs *regs)
{
unsigned char *rv = NULL;
@@ -255,18 +266,6 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
case arg_DL:
rv = (unsigned char *)&regs->dx;
break;
- case arg_AH:
- rv = 1 + (unsigned char *)&regs->ax;
- break;
- case arg_BH:
- rv = 1 + (unsigned char *)&regs->bx;
- break;
- case arg_CH:
- rv = 1 + (unsigned char *)&regs->cx;
- break;
- case arg_DH:
- rv = 1 + (unsigned char *)&regs->dx;
- break;
#ifdef __amd64__
case arg_R8:
rv = (unsigned char *)&regs->r8;
@@ -294,9 +293,55 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
break;
#endif
default:
- printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
break;
}
+
+ if (rv)
+ return rv;
+
+ if (rex) {
+ /*
+ * If REX prefix exists, access low bytes of SI etc.
+ * instead of AH etc.
+ */
+ switch (no) {
+ case arg_SI:
+ rv = (unsigned char *)&regs->si;
+ break;
+ case arg_DI:
+ rv = (unsigned char *)&regs->di;
+ break;
+ case arg_BP:
+ rv = (unsigned char *)&regs->bp;
+ break;
+ case arg_SP:
+ rv = (unsigned char *)&regs->sp;
+ break;
+ default:
+ break;
+ }
+ } else {
+ switch (no) {
+ case arg_AH:
+ rv = 1 + (unsigned char *)&regs->ax;
+ break;
+ case arg_BH:
+ rv = 1 + (unsigned char *)&regs->bx;
+ break;
+ case arg_CH:
+ rv = 1 + (unsigned char *)&regs->cx;
+ break;
+ case arg_DH:
+ rv = 1 + (unsigned char *)&regs->dx;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!rv)
+ printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
+
return rv;
}
@@ -368,11 +413,12 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
unsigned char mod_rm;
int reg;
unsigned char *p;
- int i, shorted, enlarged, rexr;
+ struct prefix_bits prf;
+ int i;
unsigned long rv;
p = (unsigned char *)ins_addr;
- p += skip_prefix(p, &shorted, &enlarged, &rexr);
+ p += skip_prefix(p, &prf);
p += get_opcode(p, &opcode);
for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
if (reg_rop[i] == opcode) {
@@ -392,10 +438,10 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
do_work:
mod_rm = *p;
- reg = ((mod_rm >> 3) & 0x7) | (rexr << 3);
+ reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
switch (get_ins_reg_width(ins_addr)) {
case 1:
- return *get_reg_w8(reg, regs);
+ return *get_reg_w8(reg, prf.rex, regs);
case 2:
return *(unsigned short *)get_reg_w32(reg, regs);
@@ -422,11 +468,12 @@ unsigned long get_ins_imm_val(unsigned long ins_addr)
unsigned char mod_rm;
unsigned char mod;
unsigned char *p;
- int i, shorted, enlarged, rexr;
+ struct prefix_bits prf;
+ int i;
unsigned long rv;
p = (unsigned char *)ins_addr;
- p += skip_prefix(p, &shorted, &enlarged, &rexr);
+ p += skip_prefix(p, &prf);
p += get_opcode(p, &opcode);
for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
if (imm_wop[i] == opcode) {
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index d877c5b423e..ab50a8d7402 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -3,6 +3,7 @@
*/
#include <linux/module.h>
#include <linux/io.h>
+#include <linux/mmiotrace.h>
#define MODULE_NAME "testmmiotrace"
@@ -13,6 +14,7 @@ MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
static void do_write_test(void __iomem *p)
{
unsigned int i;
+ mmiotrace_printk("Write test.\n");
for (i = 0; i < 256; i++)
iowrite8(i, p + i);
for (i = 1024; i < (5 * 1024); i += 2)
@@ -24,6 +26,7 @@ static void do_write_test(void __iomem *p)
static void do_read_test(void __iomem *p)
{
unsigned int i;
+ mmiotrace_printk("Read test.\n");
for (i = 0; i < 256; i++)
ioread8(p + i);
for (i = 1024; i < (5 * 1024); i += 2)
@@ -39,6 +42,7 @@ static void do_test(void)
pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
return;
}
+ mmiotrace_printk("ioremap returned %p.\n", p);
do_write_test(p);
do_read_test(p);
iounmap(p);