aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c2
-rw-r--r--arch/x86/include/asm/efi.h5
-rw-r--r--arch/x86/include/asm/irqflags.h8
-rw-r--r--arch/x86/include/asm/pgtable.h12
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h9
-rw-r--r--arch/x86/kernel/aperture_64.c6
-rw-r--r--arch/x86/kernel/apic/io_apic.c3
-rw-r--r--arch/x86/kernel/apic/ipi.c3
-rw-r--r--arch/x86/kernel/apic/probe_64.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c42
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kernel/cpu/common.c48
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c19
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c23
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c40
-rw-r--r--arch/x86/kernel/efi.c4
-rw-r--r--arch/x86/kernel/efi_64.c6
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/pci-dma.c6
-rw-r--r--arch/x86/kernel/process.c6
-rw-r--r--arch/x86/kernel/reboot.c42
-rw-r--r--arch/x86/kernel/setup_percpu.c14
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/tlb_uv.c1
-rw-r--r--arch/x86/kernel/tsc.c29
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S142
-rw-r--r--arch/x86/kvm/i8254.c3
-rw-r--r--arch/x86/kvm/mmu.c48
-rw-r--r--arch/x86/kvm/svm.c6
-rw-r--r--arch/x86/kvm/vmx.c6
-rw-r--r--arch/x86/kvm/x86.c44
-rw-r--r--arch/x86/lib/msr.c26
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c14
-rw-r--r--arch/x86/mm/pageattr.c39
-rw-r--r--arch/x86/mm/pat.c3
-rw-r--r--arch/x86/mm/pgtable.c1
-rw-r--r--arch/x86/mm/tlb.c21
-rw-r--r--arch/x86/xen/Makefile4
-rw-r--r--arch/x86/xen/enlighten.c24
48 files changed, 485 insertions, 285 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 738bdc6b0f8..13ffa5df37d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,6 +24,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_IDE
select HAVE_OPROFILE
+ select HAVE_PERF_COUNTERS if (!M386 && !M486)
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -742,7 +743,6 @@ config X86_UP_IOAPIC
config X86_LOCAL_APIC
def_bool y
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
- select HAVE_PERF_COUNTERS if (!M386 && !M486)
config X86_IO_APIC
def_bool y
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index e2ff504b4dd..f8ed0658404 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -4,7 +4,7 @@
# create a compressed vmlinux image from the original vmlinux
#
-targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma head_$(BITS).o misc.o piggy.o
+targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma head_$(BITS).o misc.o piggy.o
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index c580c5ec1ca..d3ec8d588d4 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -636,7 +636,7 @@ static int __init aesni_init(void)
int err;
if (!cpu_has_aes) {
- printk(KERN_ERR "Intel AES-NI instructions are not detected.\n");
+ printk(KERN_INFO "Intel AES-NI instructions are not detected.\n");
return -ENODEV;
}
if ((err = crypto_register_alg(&aesni_alg)))
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index edc90f23e70..8406ed7f992 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -33,7 +33,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
efi_call_virt(f, a1, a2, a3, a4, a5, a6)
-#define efi_ioremap(addr, size) ioremap_cache(addr, size)
+#define efi_ioremap(addr, size, type) ioremap_cache(addr, size)
#else /* !CONFIG_X86_32 */
@@ -84,7 +84,8 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
(u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
-extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size);
+extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
+ u32 type);
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 2bdab21f089..c6ccbe7e81a 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -12,9 +12,15 @@ static inline unsigned long native_save_fl(void)
{
unsigned long flags;
+ /*
+ * Note: this needs to be "=r" not "=rm", because we have the
+ * stack offset from what gcc expects at the time the "pop" is
+ * executed, and so a memory reference with respect to the stack
+ * would end up using the wrong address.
+ */
asm volatile("# __raw_save_flags\n\t"
"pushf ; pop %0"
- : "=g" (flags)
+ : "=r" (flags)
: /* no input */
: "memory");
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 3cc06e3fceb..16748077559 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -2,6 +2,7 @@
#define _ASM_X86_PGTABLE_H
#include <asm/page.h>
+#include <asm/e820.h>
#include <asm/pgtable_types.h>
@@ -269,10 +270,17 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
-static inline int is_new_memtype_allowed(unsigned long flags,
- unsigned long new_flags)
+static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
+ unsigned long flags,
+ unsigned long new_flags)
{
/*
+ * PAT type is always WB for ISA. So no need to check.
+ */
+ if (is_ISA_range(paddr, paddr + size - 1))
+ return 1;
+
+ /*
* Certain new memtypes are not allowed with certain
* requested memtype:
* - request is uncached, return cannot be write-back
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index bddd44f2f0a..80e2984f521 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -133,7 +133,7 @@ struct bau_msg_payload {
* see table 4.2.3.0.1 in broacast_assist spec.
*/
struct bau_msg_header {
- unsigned int dest_subnodeid:6; /* must be zero */
+ unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
/* bits 5:0 */
unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */
/* bits 20:6 */ /* first bit in node_map */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 341070f7ad5..77a68505419 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -175,7 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT))
#define UV_GLOBAL_MMR64_PNODE_BITS(p) \
- ((unsigned long)(UV_PNODE_TO_GNODE(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT)
+ (((unsigned long)(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT)
#define UV_APIC_PNODE_SHIFT 6
@@ -327,6 +327,7 @@ struct uv_blade_info {
unsigned short nr_possible_cpus;
unsigned short nr_online_cpus;
unsigned short pnode;
+ short memory_nid;
};
extern struct uv_blade_info *uv_blade_info;
extern short *uv_node_to_blade;
@@ -363,6 +364,12 @@ static inline int uv_blade_to_pnode(int bid)
return uv_blade_info[bid].pnode;
}
+/* Nid of memory node on blade. -1 if no blade-local memory */
+static inline int uv_blade_to_memory_nid(int bid)
+{
+ return uv_blade_info[bid].memory_nid;
+}
+
/* Determine the number of possible cpus on a blade */
static inline int uv_blade_nr_possible_cpus(int bid)
{
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 676debfc170..128111d8ffe 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -20,6 +20,7 @@
#include <linux/bitops.h>
#include <linux/ioport.h>
#include <linux/suspend.h>
+#include <linux/kmemleak.h>
#include <asm/e820.h>
#include <asm/io.h>
#include <asm/iommu.h>
@@ -94,6 +95,11 @@ static u32 __init allocate_aperture(void)
* code for safe
*/
p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
+ /*
+ * Kmemleak should not scan this block as it may not be mapped via the
+ * kernel direct mapping.
+ */
+ kmemleak_ignore(p);
if (!p || __pa(p)+aper_size > 0xffffffff) {
printk(KERN_ERR
"Cannot allocate aperture memory hole (%p,%uK)\n",
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 2284a4812b6..d2ed6c5ddc8 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3793,6 +3793,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
+
return irq;
}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index dbf5445727a..6ef00ba4c88 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -106,6 +106,9 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
unsigned long mask = cpumask_bits(cpumask)[0];
unsigned long flags;
+ if (WARN_ONCE(!mask, "empty IPI mask"))
+ return;
+
local_irq_save(flags);
WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index bc3e880f9b8..fcec2f1d34a 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -44,6 +44,11 @@ static struct apic *apic_probe[] __initdata = {
NULL,
};
+static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+ return hard_smp_processor_id() >> index_msb;
+}
+
/*
* Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
*/
@@ -69,6 +74,11 @@ void __init default_setup_apic_routing(void)
printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
}
+ if (is_vsmp_box()) {
+ /* need to update phys_pkg_id */
+ apic->phys_pkg_id = apicid_phys_pkg_id;
+ }
+
/*
* Now that apic routing model is selected, configure the
* fault handling for intr remapping.
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 8e4cbb255c3..a5371ec3677 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return x2apic_enabled();
}
-/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
-
+/*
+ * need to use more than cpu 0, because we need more vectors when
+ * MSI-X are used.
+ */
static const struct cpumask *x2apic_target_cpus(void)
{
- return cpumask_of(0);
+ return cpu_online_mask;
}
/*
@@ -170,7 +172,7 @@ static unsigned long set_apic_id(unsigned int id)
static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb)
{
- return current_cpu_data.initial_apicid >> index_msb;
+ return initial_apicid >> index_msb;
}
static void x2apic_send_IPI_self(int vector)
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index a284359627e..a8989aadc99 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 0;
}
-/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
-
+/*
+ * need to use more than cpu 0, because we need more vectors when
+ * MSI-X are used.
+ */
static const struct cpumask *x2apic_target_cpus(void)
{
- return cpumask_of(0);
+ return cpu_online_mask;
}
static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
@@ -162,7 +164,7 @@ static unsigned long set_apic_id(unsigned int id)
static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
{
- return current_cpu_data.initial_apicid >> index_msb;
+ return initial_apicid >> index_msb;
}
static void x2apic_send_IPI_self(int vector)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 096d19aea2f..601159374e8 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -46,7 +46,7 @@ static int early_get_nodeid(void)
return node_id.s.node_id;
}
-static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
if (!strcmp(oem_id, "SGI")) {
if (!strcmp(oem_table_id, "UVL"))
@@ -253,7 +253,7 @@ static void uv_send_IPI_self(int vector)
apic_write(APIC_SELF_IPI, vector);
}
-struct apic apic_x2apic_uv_x = {
+struct apic __refdata apic_x2apic_uv_x = {
.name = "UV large system",
.probe = NULL,
@@ -261,7 +261,7 @@ struct apic apic_x2apic_uv_x = {
.apic_id_registered = uv_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
- .irq_dest_mode = 1, /* logical */
+ .irq_dest_mode = 0, /* physical */
.target_cpus = uv_target_cpus,
.disable_esr = 0,
@@ -362,12 +362,6 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
BUG();
}
-static __init void map_low_mmrs(void)
-{
- init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
- init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
-}
-
enum map_type {map_wb, map_uc};
static __init void map_high(char *id, unsigned long base, int shift,
@@ -395,26 +389,6 @@ static __init void map_gru_high(int max_pnode)
map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
}
-static __init void map_config_high(int max_pnode)
-{
- union uvh_rh_gam_cfg_overlay_config_mmr_u cfg;
- int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT;
-
- cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
- if (cfg.s.enable)
- map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc);
-}
-
-static __init void map_mmr_high(int max_pnode)
-{
- union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
- int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
-
- mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
- if (mmr.s.enable)
- map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
-}
-
static __init void map_mmioh_high(int max_pnode)
{
union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
@@ -566,8 +540,6 @@ void __init uv_system_init(void)
unsigned long mmr_base, present, paddr;
unsigned short pnode_mask;
- map_low_mmrs();
-
m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
m_val = m_n_config.s.m_skt;
n_val = m_n_config.s.n_skt;
@@ -591,6 +563,8 @@ void __init uv_system_init(void)
bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
uv_blade_info = kmalloc(bytes, GFP_KERNEL);
BUG_ON(!uv_blade_info);
+ for (blade = 0; blade < uv_num_possible_blades(); blade++)
+ uv_blade_info[blade].memory_nid = -1;
get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
@@ -629,6 +603,9 @@ void __init uv_system_init(void)
lcpu = uv_blade_info[blade].nr_possible_cpus;
uv_blade_info[blade].nr_possible_cpus++;
+ /* Any node on the blade, else will contain -1. */
+ uv_blade_info[blade].memory_nid = nid;
+
uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
uv_cpu_hub_info(cpu)->m_val = m_val;
@@ -662,11 +639,10 @@ void __init uv_system_init(void)
pnode = (paddr >> m_val) & pnode_mask;
blade = boot_pnode_to_blade(pnode);
uv_node_to_blade[nid] = blade;
+ max_pnode = max(pnode, max_pnode);
}
map_gru_high(max_pnode);
- map_mmr_high(max_pnode);
- map_config_high(max_pnode);
map_mmioh_high(max_pnode);
uv_cpu_init();
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 79302e9a33a..442b5508893 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -811,7 +811,7 @@ static int apm_do_idle(void)
u8 ret = 0;
int idled = 0;
int polling;
- int err;
+ int err = 0;
polling = !!(current_thread_info()->status & TS_POLLING);
if (polling) {
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 3efcb2b96a1..c1f253dac15 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -7,6 +7,10 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_common.o = -pg
endif
+# Make sure load_percpu_segment has no stackprotector
+nostackp := $(call cc-option, -fno-stack-protector)
+CFLAGS_common.o := $(nostackp)
+
obj-y := intel_cacheinfo.o addon_cpuid_features.o
obj-y += proc.o capflags.o powerflags.o common.o
obj-y += vmware.o hypervisor.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e2485b03f1c..63fddcd082c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
level = cpuid_eax(1);
if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+
+ /*
+ * Some BIOSes incorrectly force this feature, but only K8
+ * revision D (model = 0x14) and later actually support it.
+ */
+ if (c->x86_model < 0x14)
+ clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
}
if (c->x86 == 0x10 || c->x86 == 0x11)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1961c07af9..5ce60a88027 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void)
alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
}
-static const struct cpu_dev *this_cpu __cpuinitdata;
+static void __cpuinit default_init(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
+ display_cacheinfo(c);
+#else
+ /* Not much we can do here... */
+ /* Check if at least it has cpuid */
+ if (c->cpuid_level == -1) {
+ /* No cpuid. It must be an ancient CPU */
+ if (c->x86 == 4)
+ strcpy(c->x86_model_id, "486");
+ else if (c->x86 == 3)
+ strcpy(c->x86_model_id, "386");
+ }
+#endif
+}
+
+static const struct cpu_dev __cpuinitconst default_cpu = {
+ .c_init = default_init,
+ .c_vendor = "Unknown",
+ .c_x86_vendor = X86_VENDOR_UNKNOWN,
+};
+
+static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
#ifdef CONFIG_X86_64
@@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu)
static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
-static void __cpuinit default_init(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_X86_64
- display_cacheinfo(c);
-#else
- /* Not much we can do here... */
- /* Check if at least it has cpuid */
- if (c->cpuid_level == -1) {
- /* No cpuid. It must be an ancient CPU */
- if (c->x86 == 4)
- strcpy(c->x86_model_id, "486");
- else if (c->x86 == 3)
- strcpy(c->x86_model_id, "386");
- }
-#endif
-}
-
-static const struct cpu_dev __cpuinitconst default_cpu = {
- .c_init = default_init,
- .c_vendor = "Unknown",
- .c_x86_vendor = X86_VENDOR_UNKNOWN,
-};
-
static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 1cfb623ce11..01213048f62 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1226,8 +1226,13 @@ static void mce_init(void)
}
/* Add per CPU specific workarounds here */
-static void mce_cpu_quirks(struct cpuinfo_x86 *c)
+static int mce_cpu_quirks(struct cpuinfo_x86 *c)
{
+ if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
+ pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
+ return -EOPNOTSUPP;
+ }
+
/* This should be disabled by the BIOS, but isn't always */
if (c->x86_vendor == X86_VENDOR_AMD) {
if (c->x86 == 15 && banks > 4) {
@@ -1273,11 +1278,20 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
monarch_timeout < 0)
monarch_timeout = USEC_PER_SEC;
+
+ /*
+ * There are also broken BIOSes on some Pentium M and
+ * earlier systems:
+ */
+ if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0)
+ mce_bootlog = 0;
}
if (monarch_timeout < 0)
monarch_timeout = 0;
if (mce_bootlog != 0)
mce_panic_timeout = 30;
+
+ return 0;
}
static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
@@ -1338,11 +1352,10 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
if (!mce_available(c))
return;
- if (mce_cap_init() < 0) {
+ if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) {
mce_disabled = 1;
return;
}
- mce_cpu_quirks(c);
machine_check_vector = do_machine_check;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index bff8dd191dd..5957a93e517 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -36,6 +36,7 @@
static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
+static DEFINE_PER_CPU(bool, thermal_throttle_active);
static atomic_t therm_throt_en = ATOMIC_INIT(0);
@@ -96,27 +97,33 @@ static int therm_throt_process(int curr)
{
unsigned int cpu = smp_processor_id();
__u64 tmp_jiffs = get_jiffies_64();
+ bool was_throttled = __get_cpu_var(thermal_throttle_active);
+ bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
- if (curr)
+ if (is_throttled)
__get_cpu_var(thermal_throttle_count)++;
- if (time_before64(tmp_jiffs, __get_cpu_var(next_check)))
+ if (!(was_throttled ^ is_throttled) &&
+ time_before64(tmp_jiffs, __get_cpu_var(next_check)))
return 0;
__get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
/* if we just entered the thermal event */
- if (curr) {
+ if (is_throttled) {
printk(KERN_CRIT "CPU%d: Temperature above threshold, "
- "cpu clock throttled (total events = %lu)\n", cpu,
- __get_cpu_var(thermal_throttle_count));
+ "cpu clock throttled (total events = %lu)\n",
+ cpu, __get_cpu_var(thermal_throttle_count));
add_taint(TAINT_MACHINE_CHECK);
- } else {
- printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
+ return 1;
+ }
+ if (was_throttled) {
+ printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+ return 1;
}
- return 1;
+ return 0;
}
#ifdef CONFIG_SYSFS
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a7aa8f90095..900332b800f 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -55,6 +55,7 @@ struct x86_pmu {
int num_counters_fixed;
int counter_bits;
u64 counter_mask;
+ int apic;
u64 max_period;
u64 intel_ctrl;
};
@@ -72,8 +73,8 @@ static const u64 p6_perfmon_event_map[] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x0000,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
[PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
@@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
static bool reserve_pmc_hardware(void)
{
+#ifdef CONFIG_X86_LOCAL_APIC
int i;
if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void)
if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
goto eventsel_fail;
}
+#endif
return true;
+#ifdef CONFIG_X86_LOCAL_APIC
eventsel_fail:
for (i--; i >= 0; i--)
release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -644,10 +648,12 @@ perfctr_fail:
enable_lapic_nmi_watchdog();
return false;
+#endif
}
static void release_pmc_hardware(void)
{
+#ifdef CONFIG_X86_LOCAL_APIC
int i;
for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -657,6 +663,7 @@ static void release_pmc_hardware(void)
if (nmi_watchdog == NMI_LOCAL_APIC)
enable_lapic_nmi_watchdog();
+#endif
}
static void hw_perf_counter_destroy(struct perf_counter *counter)
@@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
hwc->sample_period = x86_pmu.max_period;
hwc->last_period = hwc->sample_period;
atomic64_set(&hwc->period_left, hwc->sample_period);
+ } else {
+ /*
+ * If we have a PMU initialized but no APIC
+ * interrupts, we cannot sample hardware
+ * counters (user-space has to fall back and
+ * sample via a hrtimer based software counter):
+ */
+ if (!x86_pmu.apic)
+ return -EOPNOTSUPP;
}
counter->destroy = hw_perf_counter_destroy;
@@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
void set_perf_counter_pending(void)
{
+#ifdef CONFIG_X86_LOCAL_APIC
apic->send_IPI_self(LOCAL_PENDING_VECTOR);
+#endif
}
void perf_counters_lapic_init(void)
{
- if (!x86_pmu_initialized())
+#ifdef CONFIG_X86_LOCAL_APIC
+ if (!x86_pmu.apic || !x86_pmu_initialized())
return;
/*
* Always use NMI for PMU
*/
apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
}
static int __kprobes
@@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
regs = args->regs;
+#ifdef CONFIG_X86_LOCAL_APIC
apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
/*
* Can't rely on the handled return value to say it was our NMI, two
* counters could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = {
.event_map = p6_pmu_event_map,
.raw_event = p6_pmu_raw_event,
.max_events = ARRAY_SIZE(p6_perfmon_event_map),
+ .apic = 1,
.max_period = (1ULL << 31) - 1,
.version = 0,
.num_counters = 2,
@@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
.event_map = intel_pmu_event_map,
.raw_event = intel_pmu_raw_event,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
+ .apic = 1,
/*
* Intel PMCs cannot be accessed sanely above 32 bit width,
* so we install an artificial 1<<31 period regardless of
@@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = {
.num_counters = 4,
.counter_bits = 48,
.counter_mask = (1ULL << 48) - 1,
+ .apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
};
@@ -1589,13 +1614,14 @@ static int p6_pmu_init(void)
return -ENODEV;
}
+ x86_pmu = p6_pmu;
+
if (!cpu_has_apic) {
- pr_info("no Local APIC, try rebooting with lapic");
- return -ENODEV;
+ pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
+ pr_info("no hardware sampling interrupt available.\n");
+ x86_pmu.apic = 0;
}
- x86_pmu = p6_pmu;
-
return 0;
}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 96f7ac0bbf0..fe26ba3e345 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -354,7 +354,7 @@ void __init efi_init(void)
*/
c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
if (c16) {
- for (i = 0; i < sizeof(vendor) && *c16; ++i)
+ for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
vendor[i] = *c16++;
vendor[i] = '\0';
} else
@@ -512,7 +512,7 @@ void __init efi_enter_virtual_mode(void)
&& end_pfn <= max_pfn_mapped))
va = __va(md->phys_addr);
else
- va = efi_ioremap(md->phys_addr, size);
+ va = efi_ioremap(md->phys_addr, size, md->type);
md->virt_addr = (u64) (unsigned long) va;
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 22c3b7828c5..ac0621a7ac3 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -98,10 +98,14 @@ void __init efi_call_phys_epilog(void)
early_runtime_code_mapping_set_exec(0);
}
-void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size)
+void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
+ u32 type)
{
unsigned long last_map_pfn;
+ if (type == EFI_MEMORY_MAPPED_IO)
+ return ioremap(phys_addr, size);
+
last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
return NULL;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 8663afb5653..cc827ac9e8d 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -261,9 +261,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
* which will be freed later
*/
-#ifndef CONFIG_HOTPLUG_CPU
-.section .init.text,"ax",@progbits
-#endif
+__CPUINIT
#ifdef CONFIG_SMP
ENTRY(startup_32_smp)
@@ -602,7 +600,7 @@ ignore_int:
#endif
iret
-.section .cpuinit.data,"wa"
+ __REFDATA
.align 4
ENTRY(initial_code)
.long i386_start_kernel
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1a041bcf506..fa80f60e960 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -3,6 +3,7 @@
#include <linux/dmar.h>
#include <linux/bootmem.h>
#include <linux/pci.h>
+#include <linux/kmemleak.h>
#include <asm/proto.h>
#include <asm/dma.h>
@@ -88,6 +89,11 @@ void __init dma32_reserve_bootmem(void)
size = roundup(dma32_bootmem_size, align);
dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
512ULL<<20);
+ /*
+ * Kmemleak should not scan this block as it may not be mapped via the
+ * kernel direct mapping.
+ */
+ kmemleak_ignore(dma32_bootmem_ptr);
if (dma32_bootmem_ptr)
dma32_bootmem_size = size;
else
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 994dd6a4a2a..071166a4ba8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -519,16 +519,12 @@ static void c1e_idle(void)
if (!cpumask_test_cpu(cpu, c1e_mask)) {
cpumask_set_cpu(cpu, c1e_mask);
/*
- * Force broadcast so ACPI can not interfere. Needs
- * to run with interrupts enabled as it uses
- * smp_function_call.
+ * Force broadcast so ACPI can not interfere.
*/
- local_irq_enable();
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
&cpu);
printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
cpu);
- local_irq_disable();
}
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 508e982dd07..a06e8d10184 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/pm.h>
#include <linux/efi.h>
+#include <linux/dmi.h>
#include <acpi/reboot.h>
#include <asm/io.h>
#include <asm/apic.h>
@@ -17,7 +18,6 @@
#include <asm/cpu.h>
#ifdef CONFIG_X86_32
-# include <linux/dmi.h>
# include <linux/ctype.h>
# include <linux/mc146818rtc.h>
#else
@@ -404,6 +404,46 @@ EXPORT_SYMBOL(machine_real_restart);
#endif /* CONFIG_X86_32 */
+/*
+ * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
+ */
+static int __init set_pci_reboot(const struct dmi_system_id *d)
+{
+ if (reboot_type != BOOT_CF9) {
+ reboot_type = BOOT_CF9;
+ printk(KERN_INFO "%s series board detected. "
+ "Selecting PCI-method for reboots.\n", d->ident);
+ }
+ return 0;
+}
+
+static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
+ { /* Handle problems with rebooting on Apple MacBook5 */
+ .callback = set_pci_reboot,
+ .ident = "Apple MacBook5",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
+ },
+ },
+ { /* Handle problems with rebooting on Apple MacBookPro5 */
+ .callback = set_pci_reboot,
+ .ident = "Apple MacBookPro5",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
+ },
+ },
+ { }
+};
+
+static int __init pci_reboot_init(void)
+{
+ dmi_check_system(pci_reboot_dmi_table);
+ return 0;
+}
+core_initcall(pci_reboot_init);
+
static inline void kb_wait(void)
{
int i;
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 29a3eef7cf4..07d81916f21 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -165,7 +165,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
if (!chosen) {
size_t vm_size = VMALLOC_END - VMALLOC_START;
- size_t tot_size = num_possible_cpus() * PMD_SIZE;
+ size_t tot_size = nr_cpu_ids * PMD_SIZE;
/* on non-NUMA, embedding is better */
if (!pcpu_need_numa())
@@ -199,7 +199,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
/* allocate pointer array and alloc large pages */
- map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0]));
+ map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0]));
pcpul_map = alloc_bootmem(map_size);
for_each_possible_cpu(cpu) {
@@ -228,7 +228,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
/* allocate address and map */
pcpul_vm.flags = VM_ALLOC;
- pcpul_vm.size = num_possible_cpus() * PMD_SIZE;
+ pcpul_vm.size = nr_cpu_ids * PMD_SIZE;
vm_area_register_early(&pcpul_vm, PMD_SIZE);
for_each_possible_cpu(cpu) {
@@ -250,8 +250,8 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
PMD_SIZE, pcpul_vm.addr, NULL);
/* sort pcpul_map array for pcpu_lpage_remapped() */
- for (i = 0; i < num_possible_cpus() - 1; i++)
- for (j = i + 1; j < num_possible_cpus(); j++)
+ for (i = 0; i < nr_cpu_ids - 1; i++)
+ for (j = i + 1; j < nr_cpu_ids; j++)
if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
struct pcpul_ent tmp = pcpul_map[i];
pcpul_map[i] = pcpul_map[j];
@@ -288,7 +288,7 @@ void *pcpu_lpage_remapped(void *kaddr)
{
void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
- int left = 0, right = num_possible_cpus() - 1;
+ int left = 0, right = nr_cpu_ids - 1;
int pos;
/* pcpul in use at all? */
@@ -377,7 +377,7 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
pcpu4k_nr_static_pages = PFN_UP(static_size);
/* unaligned allocations can't be freed, round up to page size */
- pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus()
+ pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids
* sizeof(pcpu4k_pages[0]));
pcpu4k_pages = alloc_bootmem(pages_size);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4c578751e94..81e58238c4c 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -869,6 +869,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 8ccabb8a2f6..77b9689f8ed 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -744,6 +744,7 @@ uv_activation_descriptor_init(int node, int pnode)
* note that base_dest_nodeid is actually a nasid.
*/
ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
+ ad2->header.dest_subnodeid = 0x10; /* the LB */
ad2->header.command = UV_NET_ENDPOINT_INTD;
ad2->header.int_both = 1;
/*
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6e1a368d21d..71f4368b357 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -275,15 +275,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
* use the TSC value at the transitions to calculate a pretty
* good value for the TSC frequencty.
*/
+static inline int pit_verify_msb(unsigned char val)
+{
+ /* Ignore LSB */
+ inb(0x42);
+ return inb(0x42) == val;
+}
+
static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
{
int count;
u64 tsc = 0;
for (count = 0; count < 50000; count++) {
- /* Ignore LSB */
- inb(0x42);
- if (inb(0x42) != val)
+ if (!pit_verify_msb(val))
break;
tsc = get_cycles();
}
@@ -336,8 +341,7 @@ static unsigned long quick_pit_calibrate(void)
* to do that is to just read back the 16-bit counter
* once from the PIT.
*/
- inb(0x42);
- inb(0x42);
+ pit_verify_msb(0);
if (pit_expect_msb(0xff, &tsc, &d1)) {
for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
@@ -348,8 +352,19 @@ static unsigned long quick_pit_calibrate(void)
* Iterate until the error is less than 500 ppm
*/
delta -= tsc;
- if (d1+d2 < delta >> 11)
- goto success;
+ if (d1+d2 >= delta >> 11)
+ continue;
+
+ /*
+ * Check the PIT one more time to verify that
+ * all TSC reads were stable wrt the PIT.
+ *
+ * This also guarantees serialization of the
+ * last cycle read ('d2') in pit_expect_msb.
+ */
+ if (!pit_verify_msb(0xfe - i))
+ break;
+ goto success;
}
}
printk("Fast TSC calibration failed\n");
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index b263423fbe2..95a7289e4b0 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -441,7 +441,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
ap.ds = __USER_DS;
ap.es = __USER_DS;
ap.fs = __KERNEL_PERCPU;
- ap.gs = 0;
+ ap.gs = __KERNEL_STACK_CANARY;
ap.eflags = 0;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 59f31d2dd43..9fc178255c0 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -46,11 +46,10 @@ PHDRS {
data PT_LOAD FLAGS(7); /* RWE */
#ifdef CONFIG_X86_64
user PT_LOAD FLAGS(7); /* RWE */
- data.init PT_LOAD FLAGS(7); /* RWE */
#ifdef CONFIG_SMP
percpu PT_LOAD FLAGS(7); /* RWE */
#endif
- data.init2 PT_LOAD FLAGS(7); /* RWE */
+ init PT_LOAD FLAGS(7); /* RWE */
#endif
note PT_NOTE FLAGS(0); /* ___ */
}
@@ -103,65 +102,43 @@ SECTIONS
__stop___ex_table = .;
} :text = 0x9090
- RODATA
+ RO_DATA(PAGE_SIZE)
/* Data */
- . = ALIGN(PAGE_SIZE);
.data : AT(ADDR(.data) - LOAD_OFFSET) {
/* Start of data section */
_sdata = .;
- DATA_DATA
- CONSTRUCTORS
- } :data
+
+ /* init_task */
+ INIT_TASK_DATA(THREAD_SIZE)
#ifdef CONFIG_X86_32
- /* 32 bit has nosave before _edata */
- . = ALIGN(PAGE_SIZE);
- .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
- __nosave_begin = .;
- *(.data.nosave)
- . = ALIGN(PAGE_SIZE);
- __nosave_end = .;
- }
+ /* 32 bit has nosave before _edata */
+ NOSAVE_DATA
#endif
- . = ALIGN(PAGE_SIZE);
- .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
- *(.data.page_aligned)
+ PAGE_ALIGNED_DATA(PAGE_SIZE)
*(.data.idt)
- }
-#ifdef CONFIG_X86_32
- . = ALIGN(32);
-#else
- . = ALIGN(PAGE_SIZE);
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-#endif
- .data.cacheline_aligned :
- AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
- *(.data.cacheline_aligned)
- }
+ CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES)
- /* rarely changed data like cpu maps */
-#ifdef CONFIG_X86_32
- . = ALIGN(32);
-#else
- . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
-#endif
- .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
- *(.data.read_mostly)
+ DATA_DATA
+ CONSTRUCTORS
+
+ /* rarely changed data like cpu maps */
+ READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES)
/* End of data section */
_edata = .;
- }
+ } :data
#ifdef CONFIG_X86_64
#define VSYSCALL_ADDR (-10*1024*1024)
-#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \
- SIZEOF(.data.read_mostly) + 4095) & ~(4095))
-#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \
- SIZEOF(.data.read_mostly) + 4095) & ~(4095))
+#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \
+ PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
+#define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \
+ PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
@@ -227,35 +204,29 @@ SECTIONS
#endif /* CONFIG_X86_64 */
- /* init_task */
- . = ALIGN(THREAD_SIZE);
- .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
- *(.data.init_task)
+ /* Init code and data - will be freed after init */
+ . = ALIGN(PAGE_SIZE);
+ .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) {
+ __init_begin = .; /* paired with __init_end */
}
-#ifdef CONFIG_X86_64
- :data.init
-#endif
+#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
/*
- * smp_locks might be freed after init
- * start/end must be page aligned
+ * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
+ * output PHDR, so the next output section - .init.text - should
+ * start another segment - init.
*/
- . = ALIGN(PAGE_SIZE);
- .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
- __smp_locks = .;
- *(.smp_locks)
- __smp_locks_end = .;
- . = ALIGN(PAGE_SIZE);
- }
+ PERCPU_VADDR(0, :percpu)
+#endif
- /* Init code and data - will be freed after init */
- . = ALIGN(PAGE_SIZE);
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
- __init_begin = .; /* paired with __init_end */
_sinittext = .;
INIT_TEXT
_einittext = .;
}
+#ifdef CONFIG_X86_64
+ :init
+#endif
.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
INIT_DATA
@@ -326,17 +297,7 @@ SECTIONS
}
#endif
-#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
- /*
- * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
- * output PHDR, so the next output section - __data_nosave - should
- * start another section data.init2. Also, pda should be at the head of
- * percpu area. Preallocate it and define the percpu offset symbol
- * so that it can be accessed as a percpu variable.
- */
- . = ALIGN(PAGE_SIZE);
- PERCPU_VADDR(0, :percpu)
-#else
+#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
PERCPU(PAGE_SIZE)
#endif
@@ -347,15 +308,22 @@ SECTIONS
__init_end = .;
}
+ /*
+ * smp_locks might be freed after init
+ * start/end must be page aligned
+ */
+ . = ALIGN(PAGE_SIZE);
+ .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+ __smp_locks = .;
+ *(.smp_locks)
+ __smp_locks_end = .;
+ . = ALIGN(PAGE_SIZE);
+ }
+
#ifdef CONFIG_X86_64
.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
- . = ALIGN(PAGE_SIZE);
- __nosave_begin = .;
- *(.data.nosave)
- . = ALIGN(PAGE_SIZE);
- __nosave_end = .;
- } :data.init2
- /* use another section data.init2, see PERCPU_VADDR() above */
+ NOSAVE_DATA
+ }
#endif
/* BSS */
@@ -393,8 +361,8 @@ SECTIONS
#ifdef CONFIG_X86_32
-ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
- "kernel image bigger than KERNEL_IMAGE_SIZE")
+. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
+ "kernel image bigger than KERNEL_IMAGE_SIZE");
#else
/*
* Per-cpu symbols which need to be offset from __per_cpu_load
@@ -407,12 +375,12 @@ INIT_PER_CPU(irq_stack_union);
/*
* Build-time check on the image size:
*/
-ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
- "kernel image bigger than KERNEL_IMAGE_SIZE")
+. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
+ "kernel image bigger than KERNEL_IMAGE_SIZE");
#ifdef CONFIG_SMP
-ASSERT((per_cpu__irq_stack_union == 0),
- "irq_stack_union is not at start of per-cpu area");
+. = ASSERT((per_cpu__irq_stack_union == 0),
+ "irq_stack_union is not at start of per-cpu area");
#endif
#endif /* CONFIG_X86_32 */
@@ -420,7 +388,7 @@ ASSERT((per_cpu__irq_stack_union == 0),
#ifdef CONFIG_KEXEC
#include <asm/kexec.h>
-ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
- "kexec control code size is too big")
+. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
+ "kexec control code size is too big");
#endif
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4d6f0d293ee..21f68e00524 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -104,6 +104,9 @@ static s64 __kpit_elapsed(struct kvm *kvm)
ktime_t remaining;
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+ if (!ps->pit_timer.period)
+ return 0;
+
/*
* The Counter does not stop when it reaches zero. In
* Modes 0, 1, 4, and 5 the Counter ``wraps around'' to
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7030b5f911b..0ef5bb2b404 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -489,16 +489,20 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
*
* If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
* containing more mappings.
+ *
+ * Returns the number of rmap entries before the spte was added or zero if
+ * the spte was not added.
+ *
*/
-static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
+static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
{
struct kvm_mmu_page *sp;
struct kvm_rmap_desc *desc;
unsigned long *rmapp;
- int i;
+ int i, count = 0;
if (!is_rmap_pte(*spte))
- return;
+ return count;
gfn = unalias_gfn(vcpu->kvm, gfn);
sp = page_header(__pa(spte));
sp->gfns[spte - sp->spt] = gfn;
@@ -515,8 +519,10 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
} else {
rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
- while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
+ while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) {
desc = desc->more;
+ count += RMAP_EXT;
+ }
if (desc->shadow_ptes[RMAP_EXT-1]) {
desc->more = mmu_alloc_rmap_desc(vcpu);
desc = desc->more;
@@ -525,6 +531,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
;
desc->shadow_ptes[i] = spte;
}
+ return count;
}
static void rmap_desc_remove_entry(unsigned long *rmapp,
@@ -754,6 +761,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
return young;
}
+#define RMAP_RECYCLE_THRESHOLD 1000
+
+static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage)
+{
+ unsigned long *rmapp;
+
+ gfn = unalias_gfn(vcpu->kvm, gfn);
+ rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
+
+ kvm_unmap_rmapp(vcpu->kvm, rmapp);
+ kvm_flush_remote_tlbs(vcpu->kvm);
+}
+
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
{
return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
@@ -1407,24 +1427,25 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
*/
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
{
+ int used_pages;
+
+ used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages;
+ used_pages = max(0, used_pages);
+
/*
* If we set the number of mmu pages to be smaller be than the
* number of actived pages , we must to free some mmu pages before we
* change the value
*/
- if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) >
- kvm_nr_mmu_pages) {
- int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages
- - kvm->arch.n_free_mmu_pages;
-
- while (n_used_mmu_pages > kvm_nr_mmu_pages) {
+ if (used_pages > kvm_nr_mmu_pages) {
+ while (used_pages > kvm_nr_mmu_pages) {
struct kvm_mmu_page *page;
page = container_of(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
kvm_mmu_zap_page(kvm, page);
- n_used_mmu_pages--;
+ used_pages--;
}
kvm->arch.n_free_mmu_pages = 0;
}
@@ -1740,6 +1761,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
{
int was_rmapped = 0;
int was_writeble = is_writeble_pte(*shadow_pte);
+ int rmap_count;
pgprintk("%s: spte %llx access %x write_fault %d"
" user_fault %d gfn %lx\n",
@@ -1781,9 +1803,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
if (!was_rmapped) {
- rmap_add(vcpu, shadow_pte, gfn, largepage);
+ rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage);
if (!is_rmap_pte(*shadow_pte))
kvm_release_pfn_clean(pfn);
+ if (rmap_count > RMAP_RECYCLE_THRESHOLD)
+ rmap_recycle(vcpu, gfn, largepage);
} else {
if (was_writeble)
kvm_release_pfn_dirty(pfn);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 71510e07e69..b1f658ad2f0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -711,6 +711,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
svm->vmcb->control.tsc_offset += delta;
vcpu->cpu = cpu;
kvm_migrate_timers(vcpu);
+ svm->asid_generation = 0;
}
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
@@ -1031,7 +1032,6 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
}
- svm->vcpu.cpu = svm_data->cpu;
svm->asid_generation = svm_data->asid_generation;
svm->vmcb->control.asid = svm_data->next_asid++;
}
@@ -2300,8 +2300,8 @@ static void pre_svm_run(struct vcpu_svm *svm)
struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
- if (svm->vcpu.cpu != cpu ||
- svm->asid_generation != svm_data->asid_generation)
+ /* FIXME: handle wraparound of asid_generation */
+ if (svm->asid_generation != svm_data->asid_generation)
new_asid(svm, svm_data);
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 356a0ce85c6..29f912927a5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3157,8 +3157,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
struct vcpu_vmx *vmx = to_vmx(vcpu);
enum emulation_result err = EMULATE_DONE;
- preempt_enable();
local_irq_enable();
+ preempt_enable();
while (!guest_state_valid(vcpu)) {
err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
@@ -3168,7 +3168,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
if (err != EMULATE_DONE) {
kvm_report_emulation_failure(vcpu, "emulation failure");
- return;
+ break;
}
if (signal_pending(current))
@@ -3177,8 +3177,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
schedule();
}
- local_irq_disable();
preempt_disable();
+ local_irq_disable();
vmx->invalid_state_emulation_result = err;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fe5474aec41..3d452901182 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -704,11 +704,48 @@ static bool msr_mtrr_valid(unsigned msr)
return false;
}
+static bool valid_pat_type(unsigned t)
+{
+ return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
+}
+
+static bool valid_mtrr_type(unsigned t)
+{
+ return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
+}
+
+static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+ int i;
+
+ if (!msr_mtrr_valid(msr))
+ return false;
+
+ if (msr == MSR_IA32_CR_PAT) {
+ for (i = 0; i < 8; i++)
+ if (!valid_pat_type((data >> (i * 8)) & 0xff))
+ return false;
+ return true;
+ } else if (msr == MSR_MTRRdefType) {
+ if (data & ~0xcff)
+ return false;
+ return valid_mtrr_type(data & 0xff);
+ } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
+ for (i = 0; i < 8 ; i++)
+ if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
+ return false;
+ return true;
+ }
+
+ /* variable MTRRs */
+ return valid_mtrr_type(data & 0xff);
+}
+
static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
- if (!msr_mtrr_valid(msr))
+ if (!mtrr_valid(vcpu, msr, data))
return 1;
if (msr == MSR_MTRRdefType) {
@@ -1079,14 +1116,13 @@ long kvm_arch_dev_ioctl(struct file *filp,
if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
goto out;
r = -E2BIG;
- if (n < num_msrs_to_save)
+ if (n < msr_list.nmsrs)
goto out;
r = -EFAULT;
if (copy_to_user(user_msr_list->indices, &msrs_to_save,
num_msrs_to_save * sizeof(u32)))
goto out;
- if (copy_to_user(user_msr_list->indices
- + num_msrs_to_save * sizeof(u32),
+ if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
&emulated_msrs,
ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
goto out;
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index 1440b9c0547..caa24aca811 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -89,16 +89,13 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs)
rv.msrs = msrs;
rv.msr_no = msr_no;
- preempt_disable();
- /*
- * FIXME: handle the CPU we're executing on separately for now until
- * smp_call_function_many has been fixed to not skip it.
- */
- this_cpu = raw_smp_processor_id();
- smp_call_function_single(this_cpu, __rdmsr_on_cpu, &rv, 1);
+ this_cpu = get_cpu();
+
+ if (cpumask_test_cpu(this_cpu, mask))
+ __rdmsr_on_cpu(&rv);
smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1);
- preempt_enable();
+ put_cpu();
}
EXPORT_SYMBOL(rdmsr_on_cpus);
@@ -121,16 +118,13 @@ void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs)
rv.msrs = msrs;
rv.msr_no = msr_no;
- preempt_disable();
- /*
- * FIXME: handle the CPU we're executing on separately for now until
- * smp_call_function_many has been fixed to not skip it.
- */
- this_cpu = raw_smp_processor_id();
- smp_call_function_single(this_cpu, __wrmsr_on_cpu, &rv, 1);
+ this_cpu = get_cpu();
+
+ if (cpumask_test_cpu(this_cpu, mask))
+ __wrmsr_on_cpu(&rv);
smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1);
- preempt_enable();
+ put_cpu();
}
EXPORT_SYMBOL(wrmsr_on_cpus);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 6176fe8f29e..ea56b8cbb6a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -796,7 +796,7 @@ int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
return ret;
#else
- reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
+ reserve_bootmem(phys, len, flags);
#endif
if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 2c55ed09865..528bf954eb7 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -331,6 +331,20 @@ static void kmemcheck_read_strict(struct pt_regs *regs,
kmemcheck_shadow_set(shadow, size);
}
+bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
+{
+ enum kmemcheck_shadow status;
+ void *shadow;
+
+ shadow = kmemcheck_shadow_lookup(addr);
+ if (!shadow)
+ return true;
+
+ status = kmemcheck_shadow_test(shadow, size);
+
+ return status == KMEMCHECK_SHADOW_INITIALIZED;
+}
+
/* Access may cross page boundary */
static void kmemcheck_read(struct pt_regs *regs,
unsigned long addr, unsigned int size)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 1b734d7a896..7e600c1962d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -591,9 +591,12 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
unsigned int level;
pte_t *kpte, old_pte;
- if (cpa->flags & CPA_PAGES_ARRAY)
- address = (unsigned long)page_address(cpa->pages[cpa->curpage]);
- else if (cpa->flags & CPA_ARRAY)
+ if (cpa->flags & CPA_PAGES_ARRAY) {
+ struct page *page = cpa->pages[cpa->curpage];
+ if (unlikely(PageHighMem(page)))
+ return 0;
+ address = (unsigned long)page_address(page);
+ } else if (cpa->flags & CPA_ARRAY)
address = cpa->vaddr[cpa->curpage];
else
address = *cpa->vaddr;
@@ -697,9 +700,12 @@ static int cpa_process_alias(struct cpa_data *cpa)
* No need to redo, when the primary call touched the direct
* mapping already:
*/
- if (cpa->flags & CPA_PAGES_ARRAY)
- vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]);
- else if (cpa->flags & CPA_ARRAY)
+ if (cpa->flags & CPA_PAGES_ARRAY) {
+ struct page *page = cpa->pages[cpa->curpage];
+ if (unlikely(PageHighMem(page)))
+ return 0;
+ vaddr = (unsigned long)page_address(page);
+ } else if (cpa->flags & CPA_ARRAY)
vaddr = cpa->vaddr[cpa->curpage];
else
vaddr = *cpa->vaddr;
@@ -997,12 +1003,15 @@ EXPORT_SYMBOL(set_memory_array_uc);
int _set_memory_wc(unsigned long addr, int numpages)
{
int ret;
+ unsigned long addr_copy = addr;
+
ret = change_page_attr_set(&addr, numpages,
__pgprot(_PAGE_CACHE_UC_MINUS), 0);
-
if (!ret) {
- ret = change_page_attr_set(&addr, numpages,
- __pgprot(_PAGE_CACHE_WC), 0);
+ ret = change_page_attr_set_clr(&addr_copy, numpages,
+ __pgprot(_PAGE_CACHE_WC),
+ __pgprot(_PAGE_CACHE_MASK),
+ 0, 0, NULL);
}
return ret;
}
@@ -1119,7 +1128,9 @@ int set_pages_array_uc(struct page **pages, int addrinarray)
int free_idx;
for (i = 0; i < addrinarray; i++) {
- start = (unsigned long)page_address(pages[i]);
+ if (PageHighMem(pages[i]))
+ continue;
+ start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
goto err_out;
@@ -1132,7 +1143,9 @@ int set_pages_array_uc(struct page **pages, int addrinarray)
err_out:
free_idx = i;
for (i = 0; i < free_idx; i++) {
- start = (unsigned long)page_address(pages[i]);
+ if (PageHighMem(pages[i]))
+ continue;
+ start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
free_memtype(start, end);
}
@@ -1161,7 +1174,9 @@ int set_pages_array_wb(struct page **pages, int addrinarray)
return retval;
for (i = 0; i < addrinarray; i++) {
- start = (unsigned long)page_address(pages[i]);
+ if (PageHighMem(pages[i]))
+ continue;
+ start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
free_memtype(start, end);
}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index e6718bb2806..352aa9e927e 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -623,7 +623,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
return ret;
if (flags != want_flags) {
- if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) {
+ if (strict_prot ||
+ !is_new_memtype_allowed(paddr, size, want_flags, flags)) {
free_memtype(paddr, paddr + size);
printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
" for %Lx-%Lx, got %s\n",
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index af8f9650058..ed34f5e3599 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -329,7 +329,6 @@ void __init reserve_top_address(unsigned long reserve)
printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
(int)-reserve);
__FIXADDR_TOP = -reserve - PAGE_SIZE;
- __VMALLOC_RESERVE += reserve;
#endif
}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 821e97017e9..c814e144a3f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -183,18 +183,17 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
f->flush_mm = mm;
f->flush_va = va;
- cpumask_andnot(to_cpumask(f->flush_cpumask),
- cpumask, cpumask_of(smp_processor_id()));
-
- /*
- * We have to send the IPI only to
- * CPUs affected.
- */
- apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
- INVALIDATE_TLB_VECTOR_START + sender);
+ if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) {
+ /*
+ * We have to send the IPI only to
+ * CPUs affected.
+ */
+ apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
+ INVALIDATE_TLB_VECTOR_START + sender);
- while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
- cpu_relax();
+ while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
+ cpu_relax();
+ }
f->flush_mm = NULL;
f->flush_va = 0;
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 172438f86a0..7410640db17 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -5,6 +5,10 @@ CFLAGS_REMOVE_time.o = -pg
CFLAGS_REMOVE_irq.o = -pg
endif
+# Make sure early boot has no stackprotector
+nostackp := $(call cc-option, -fno-stack-protector)
+CFLAGS_enlighten.o := $(nostackp)
+
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
time.o xen-asm.o xen-asm_$(BITS).o \
grant-table.o suspend.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0a1700a2be9..eb33aaa8415 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -215,6 +215,7 @@ static __init void xen_init_cpuid_mask(void)
(1 << X86_FEATURE_ACPI)); /* disable ACPI */
ax = 1;
+ cx = 0;
xen_cpuid(&ax, &bx, &cx, &dx);
/* cpuid claims we support xsave; try enabling it to see what happens */
@@ -974,10 +975,6 @@ asmlinkage void __init xen_start_kernel(void)
xen_domain_type = XEN_PV_DOMAIN;
- BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
-
- xen_setup_features();
-
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_init_ops = xen_init_ops;
@@ -986,8 +983,15 @@ asmlinkage void __init xen_start_kernel(void)
pv_apic_ops = xen_apic_ops;
pv_mmu_ops = xen_mmu_ops;
- xen_init_irq_ops();
+#ifdef CONFIG_X86_64
+ /*
+ * Setup percpu state. We only need to do this for 64-bit
+ * because 32-bit already has %fs set properly.
+ */
+ load_percpu_segment(0);
+#endif
+ xen_init_irq_ops();
xen_init_cpuid_mask();
#ifdef CONFIG_X86_LOCAL_APIC
@@ -997,6 +1001,8 @@ asmlinkage void __init xen_start_kernel(void)
set_xen_basic_apic_ops();
#endif
+ xen_setup_features();
+
if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
@@ -1004,13 +1010,6 @@ asmlinkage void __init xen_start_kernel(void)
machine_ops = xen_machine_ops;
-#ifdef CONFIG_X86_64
- /*
- * Setup percpu state. We only need to do this for 64-bit
- * because 32-bit already has %fs set properly.
- */
- load_percpu_segment(0);
-#endif
/*
* The only reliable way to retain the initial address of the
* percpu gdt_page is to remember it here, so we can go and
@@ -1061,6 +1060,7 @@ asmlinkage void __init xen_start_kernel(void)
/* set up basic CPUID stuff */
cpu_detect(&new_cpu_data);
new_cpu_data.hard_math = 1;
+ new_cpu_data.wp_works_ok = 1;
new_cpu_data.x86_capability[0] = cpuid_edx(1);
#endif