aboutsummaryrefslogtreecommitdiff
path: root/arch/x86_64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r--arch/x86_64/kernel/aperture.c2
-rw-r--r--arch/x86_64/kernel/e820.c36
-rw-r--r--arch/x86_64/kernel/entry.S28
-rw-r--r--arch/x86_64/kernel/mce.c8
-rw-r--r--arch/x86_64/kernel/nmi.c7
-rw-r--r--arch/x86_64/kernel/pci-dma.c2
-rw-r--r--arch/x86_64/kernel/process.c10
-rw-r--r--arch/x86_64/kernel/ptrace.c5
-rw-r--r--arch/x86_64/kernel/setup.c4
-rw-r--r--arch/x86_64/kernel/time.c4
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c3
12 files changed, 77 insertions, 34 deletions
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index fffd6b0a2fa..70b9d21ed67 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -80,7 +80,7 @@ static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size)
printk("Aperture from %s beyond 4GB. Ignoring.\n",name);
return 0;
}
- if (e820_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
+ if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name);
return 0;
}
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 293cd71a266..62776c07cff 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -80,7 +80,12 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
return 0;
}
-int __init e820_mapped(unsigned long start, unsigned long end, unsigned type)
+/*
+ * This function checks if any part of the range <start,end> is mapped
+ * with type.
+ */
+int __meminit
+e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
@@ -94,6 +99,35 @@ int __init e820_mapped(unsigned long start, unsigned long end, unsigned type)
return 0;
}
+/*
+ * This function checks if the entire range <start,end> is mapped with type.
+ *
+ * Note: this function only works correct if the e820 table is sorted and
+ * not-overlapping, which is the case
+ */
+int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
+{
+ int i;
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ if (type && ei->type != type)
+ continue;
+ /* is the region (part) in overlap with the current region ?*/
+ if (ei->addr >= end || ei->addr + ei->size <= start)
+ continue;
+
+ /* if the region is at the beginning of <start,end> we move
+ * start to the end of the region since it's ok until there
+ */
+ if (ei->addr <= start)
+ start = ei->addr + ei->size;
+ /* if start is now at or beyond end, we're done, full coverage */
+ if (start >= end)
+ return 1; /* we're done */
+ }
+ return 0;
+}
+
/*
* Find a free area in a specific range.
*/
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 8538bfea30e..c946e4fe67a 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -180,6 +180,10 @@ rff_trace:
*
* XXX if we had a free scratch register we could save the RSP into the stack frame
* and report it properly in ps. Unfortunately we haven't.
+ *
+ * When user can change the frames always force IRET. That is because
+ * it deals with uncanonical addresses better. SYSRET has trouble
+ * with them due to bugs in both AMD and Intel CPUs.
*/
ENTRY(system_call)
@@ -254,7 +258,10 @@ sysret_signal:
xorl %esi,%esi # oldset -> arg2
call ptregscall_common
1: movl $_TIF_NEED_RESCHED,%edi
- jmp sysret_check
+ /* Use IRET because user could have changed frame. This
+ works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
+ cli
+ jmp int_with_check
badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -280,7 +287,8 @@ tracesys:
call syscall_trace_leave
RESTORE_TOP_OF_STACK %rbx
RESTORE_REST
- jmp ret_from_sys_call
+ /* Use IRET because user could have changed frame */
+ jmp int_ret_from_sys_call
CFI_ENDPROC
/*
@@ -408,25 +416,9 @@ ENTRY(stub_execve)
CFI_ADJUST_CFA_OFFSET -8
CFI_REGISTER rip, r11
SAVE_REST
- movq %r11, %r15
- CFI_REGISTER rip, r15
FIXUP_TOP_OF_STACK %r11
call sys_execve
- GET_THREAD_INFO(%rcx)
- bt $TIF_IA32,threadinfo_flags(%rcx)
- CFI_REMEMBER_STATE
- jc exec_32bit
RESTORE_TOP_OF_STACK %r11
- movq %r15, %r11
- CFI_REGISTER rip, r11
- RESTORE_REST
- pushq %r11
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rip, 0
- ret
-
-exec_32bit:
- CFI_RESTORE_STATE
movq %rax,RAX(%rsp)
RESTORE_REST
jmp int_ret_from_sys_call
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 10b3e348fc9..6f0790e8b6d 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -29,6 +29,8 @@
#define MISC_MCELOG_MINOR 227
#define NR_BANKS 6
+atomic_t mce_entry;
+
static int mce_dont_init;
/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
@@ -172,10 +174,12 @@ void do_machine_check(struct pt_regs * regs, long error_code)
int i;
int panicm_found = 0;
+ atomic_inc(&mce_entry);
+
if (regs)
notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
if (!banks)
- return;
+ goto out2;
memset(&m, 0, sizeof(struct mce));
m.cpu = safe_smp_processor_id();
@@ -266,6 +270,8 @@ void do_machine_check(struct pt_regs * regs, long error_code)
out:
/* Last thing done in the machine check exception to clear state. */
wrmsrl(MSR_IA32_MCG_STATUS, 0);
+ out2:
+ atomic_dec(&mce_entry);
}
/*
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index d9e4067faf0..4e6357fe0ec 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -34,6 +34,7 @@
#include <asm/proto.h>
#include <asm/kdebug.h>
#include <asm/local.h>
+#include <asm/mce.h>
/*
* lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -480,6 +481,12 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
__get_cpu_var(nmi_touch) = 0;
touched = 1;
}
+#ifdef CONFIG_X86_MCE
+ /* Could check oops_in_progress here too, but it's safer
+ not too */
+ if (atomic_read(&mce_entry) > 0)
+ touched = 1;
+#endif
if (!touched && __get_cpu_var(last_irq_sum) == sum) {
/*
* Ayiee, looks like this CPU is stuck ...
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index 03c9eeedb0f..af035ede70c 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -48,9 +48,11 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
{
struct page *page;
int node;
+#ifdef CONFIG_PCI
if (dev->bus == &pci_bus_type)
node = pcibus_to_node(to_pci_dev(dev)->bus);
else
+#endif
node = numa_node_id();
page = alloc_pages_node(node, gfp, order);
return page ? page_address(page) : NULL;
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 70dd8e5c688..1c44b53cb15 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -781,10 +781,16 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
}
case ARCH_GET_GS: {
unsigned long base;
+ unsigned gsindex;
if (task->thread.gsindex == GS_TLS_SEL)
base = read_32bit_tls(task, GS_TLS);
- else if (doit)
- rdmsrl(MSR_KERNEL_GS_BASE, base);
+ else if (doit) {
+ asm("movl %%gs,%0" : "=r" (gsindex));
+ if (gsindex)
+ rdmsrl(MSR_KERNEL_GS_BASE, base);
+ else
+ base = task->thread.gs;
+ }
else
base = task->thread.gs;
ret = put_user(base, (unsigned long __user *)addr);
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
index d44b2c1e63a..da8e7903d81 100644
--- a/arch/x86_64/kernel/ptrace.c
+++ b/arch/x86_64/kernel/ptrace.c
@@ -274,11 +274,6 @@ static int putreg(struct task_struct *child,
return -EIO;
value &= 0xffff;
break;
- case offsetof(struct user_regs_struct, rip):
- /* Check if the new RIP address is canonical */
- if (value >= TASK_SIZE_OF(child))
- return -EIO;
- break;
}
put_stack_long(child, regno - sizeof(struct pt_regs), value);
return 0;
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 0856ad444f9..c50b06765a8 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -353,8 +353,10 @@ static __init void parse_cmdline_early (char ** cmdline_p)
if (fullarg(from, "enable_timer_pin_1"))
disable_timer_pin_1 = -1;
- if (fullarg(from, "nolapic") || fullarg(from, "disableapic"))
+ if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
+ clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
disable_apic = 1;
+ }
if (fullarg(from, "noapic"))
skip_ioapic_setup = 1;
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index ef8bc46dc14..7392570f975 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -726,7 +726,7 @@ static __init int late_hpet_init(void)
unsigned int ntimer;
if (!vxtime.hpet_address)
- return -1;
+ return 0;
memset(&hd, 0, sizeof (hd));
@@ -917,6 +917,8 @@ void __init time_init(void)
vxtime.hpet_address = 0;
if (hpet_use_timer) {
+ /* set tick_nsec to use the proper rate for HPET */
+ tick_nsec = TICK_NSEC_HPET;
cpu_khz = hpet_calibrate_tsc();
timename = "HPET";
#ifdef CONFIG_X86_PM_TIMER
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 39ff0708f80..b81f473c4a1 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -65,7 +65,7 @@ SECTIONS
.data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
*(.data.cacheline_aligned)
}
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
.data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
*(.data.read_mostly)
}
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index d78f46056bd..1def21c9f7c 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -112,7 +112,6 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback);
#undef memcpy
#undef memset
#undef memmove
-#undef strlen
extern void * memset(void *,int,__kernel_size_t);
extern size_t strlen(const char *);
@@ -121,8 +120,6 @@ extern void * memcpy(void *,const void *,__kernel_size_t);
extern void * __memcpy(void *,const void *,__kernel_size_t);
EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);