aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-09-24 09:56:20 +0200
committerIngo Molnar <mingo@elte.hu>2008-09-24 09:56:20 +0200
commitebdd90a8cb2e3963f55499850f02ce6003558b55 (patch)
treed153f917ed41d257ddafa22f9cc2201bfddf8f9c /arch/x86/kernel
parent3c9339049df5cc3a468c11de6c4101a1ea8c3d83 (diff)
parent72d31053f62c4bc464c2783974926969614a8649 (diff)
Merge commit 'v2.6.27-rc7' into x86/pebs
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/boot.c16
-rw-r--r--arch/x86/kernel/acpi/cstate.c3
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/alternative.c36
-rw-r--r--arch/x86/kernel/amd_iommu.c38
-rw-r--r--arch/x86/kernel/amd_iommu_init.c28
-rw-r--r--arch/x86/kernel/apic_32.c22
-rw-r--r--arch/x86/kernel/apic_64.c7
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c17
-rw-r--r--arch/x86/kernel/cpu/amd.c9
-rw-r--r--arch/x86/kernel/cpu/bugs.c6
-rw-r--r--arch/x86/kernel/cpu/centaur.c11
-rw-r--r--arch/x86/kernel/cpu/common.c18
-rw-r--r--arch/x86/kernel/cpu/common_64.c74
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c10
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c18
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c12
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c3
-rw-r--r--arch/x86/kernel/cpu/cyrix.c50
-rw-r--r--arch/x86/kernel/cpu/feature_names.c3
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c18
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c20
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c5
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c8
-rw-r--r--arch/x86/kernel/cpuid.c15
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/efi_32.c4
-rw-r--r--arch/x86/kernel/genapic_64.c1
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c10
-rw-r--r--arch/x86/kernel/head64.c1
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/hpet.c43
-rw-r--r--arch/x86/kernel/io_apic_32.c6
-rw-r--r--arch/x86/kernel/io_apic_64.c25
-rw-r--r--arch/x86/kernel/io_delay.c8
-rw-r--r--arch/x86/kernel/kdebugfs.c1
-rw-r--r--arch/x86/kernel/kprobes.c6
-rw-r--r--arch/x86/kernel/ldt.c6
-rw-r--r--arch/x86/kernel/machine_kexec_32.c57
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/mfgpt_32.c52
-rw-r--r--arch/x86/kernel/microcode.c17
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c2
-rw-r--r--arch/x86/kernel/mpparse.c17
-rw-r--r--arch/x86/kernel/msr.c40
-rw-r--r--arch/x86/kernel/nmi.c28
-rw-r--r--arch/x86/kernel/numaq_32.c2
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/pci-calgary_64.c172
-rw-r--r--arch/x86/kernel/pci-dma.c157
-rw-r--r--arch/x86/kernel/pci-gart_64.c14
-rw-r--r--arch/x86/kernel/pci-nommu.c14
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c2
-rw-r--r--arch/x86/kernel/process_32.c5
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/reboot.c11
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S178
-rw-r--r--arch/x86/kernel/setup.c42
-rw-r--r--arch/x86/kernel/setup_percpu.c21
-rw-r--r--arch/x86/kernel/signal_64.c11
-rw-r--r--arch/x86/kernel/smpboot.c68
-rw-r--r--arch/x86/kernel/smpcommon.c17
-rw-r--r--arch/x86/kernel/tlb_uv.c3
-rw-r--r--arch/x86/kernel/traps_64.c9
-rw-r--r--arch/x86/kernel/tsc.c242
-rw-r--r--arch/x86/kernel/tsc_sync.c6
-rw-r--r--arch/x86/kernel/visws_quirks.c6
-rw-r--r--arch/x86/kernel/vmi_32.c3
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S8
73 files changed, 1131 insertions, 668 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fa88a1d7129..bfd10fd211c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -97,6 +97,8 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
#warning ACPI uses CMPXCHG, i486 and later hardware
#endif
+static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
+
/* --------------------------------------------------------------------------
Boot-time Configuration
-------------------------------------------------------------------------- */
@@ -158,6 +160,14 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
struct acpi_mcfg_allocation *pci_mmcfg_config;
int pci_mmcfg_config_num;
+static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
+{
+ if (!strcmp(mcfg->header.oem_id, "SGI"))
+ acpi_mcfg_64bit_base_addr = TRUE;
+
+ return 0;
+}
+
int __init acpi_parse_mcfg(struct acpi_table_header *header)
{
struct acpi_table_mcfg *mcfg;
@@ -190,8 +200,12 @@ int __init acpi_parse_mcfg(struct acpi_table_header *header)
}
memcpy(pci_mmcfg_config, &mcfg[1], config_size);
+
+ acpi_mcfg_oem_check(mcfg);
+
for (i = 0; i < pci_mmcfg_config_num; ++i) {
- if (pci_mmcfg_config[i].address > 0xFFFFFFFF) {
+ if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
+ !acpi_mcfg_64bit_base_addr) {
printk(KERN_ERR PREFIX
"MMCONFIG not in low 4GB of memory\n");
kfree(pci_mmcfg_config);
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 9220cf46aa1..c2502eb9aa8 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -73,7 +73,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
struct cpuinfo_x86 *c = &cpu_data(cpu);
cpumask_t saved_mask;
- cpumask_of_cpu_ptr(new_mask, cpu);
int retval;
unsigned int eax, ebx, ecx, edx;
unsigned int edx_part;
@@ -92,7 +91,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
/* Make sure we are running on right CPU */
saved_mask = current->cpus_allowed;
- retval = set_cpus_allowed_ptr(current, new_mask);
+ retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
if (retval)
return -1;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index fa2161d5003..426e5d91b63 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -20,7 +20,7 @@ unsigned long acpi_realmode_flags;
/* address in low memory of the wakeup routine. */
static unsigned long acpi_realmode;
-#ifdef CONFIG_64BIT
+#if defined(CONFIG_SMP) && defined(CONFIG_64BIT)
static char temp_stack[10240];
#endif
@@ -86,7 +86,7 @@ int acpi_save_state_mem(void)
#endif /* !CONFIG_64BIT */
header->pmode_cr0 = read_cr0();
- header->pmode_cr4 = read_cr4();
+ header->pmode_cr4 = read_cr4_safe();
header->realmode_flags = acpi_realmode_flags;
header->real_magic = 0x12345678;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 2763cb37b55..65a0c1b4869 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -145,35 +145,25 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
extern char __vsyscall_0;
const unsigned char *const *find_nop_table(void)
{
- return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
- boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_has(X86_FEATURE_NOPL))
+ return p6_nops;
+ else
+ return k8_nops;
}
#else /* CONFIG_X86_64 */
-static const struct nop {
- int cpuid;
- const unsigned char *const *noptable;
-} noptypes[] = {
- { X86_FEATURE_K8, k8_nops },
- { X86_FEATURE_K7, k7_nops },
- { X86_FEATURE_P4, p6_nops },
- { X86_FEATURE_P3, p6_nops },
- { -1, NULL }
-};
-
const unsigned char *const *find_nop_table(void)
{
- const unsigned char *const *noptable = intel_nops;
- int i;
-
- for (i = 0; noptypes[i].cpuid >= 0; i++) {
- if (boot_cpu_has(noptypes[i].cpuid)) {
- noptable = noptypes[i].noptable;
- break;
- }
- }
- return noptable;
+ if (boot_cpu_has(X86_FEATURE_K8))
+ return k8_nops;
+ else if (boot_cpu_has(X86_FEATURE_K7))
+ return k7_nops;
+ else if (boot_cpu_has(X86_FEATURE_NOPL))
+ return p6_nops;
+ else
+ return intel_nops;
}
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index c25210e6ac8..69b4d060b21 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -29,9 +29,6 @@
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
-#define to_pages(addr, size) \
- (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
-
#define EXIT_LOOP_COUNT 10000000
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
@@ -68,7 +65,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
u8 *target;
tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
- target = (iommu->cmd_buf + tail);
+ target = iommu->cmd_buf + tail;
memcpy_toio(target, cmd, sizeof(*cmd));
tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
@@ -104,16 +101,13 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
*/
static int iommu_completion_wait(struct amd_iommu *iommu)
{
- int ret;
+ int ret, ready = 0;
+ unsigned status = 0;
struct iommu_cmd cmd;
- volatile u64 ready = 0;
- unsigned long ready_phys = virt_to_phys(&ready);
unsigned long i = 0;
memset(&cmd, 0, sizeof(cmd));
- cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
- cmd.data[1] = upper_32_bits(ready_phys);
- cmd.data[2] = 1; /* value written to 'ready' */
+ cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
iommu->need_sync = 0;
@@ -125,9 +119,15 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
while (!ready && (i < EXIT_LOOP_COUNT)) {
++i;
- cpu_relax();
+ /* wait for the bit to become one */
+ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+ ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
}
+ /* set bit back to zero */
+ status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
+ writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
+
if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
@@ -164,7 +164,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
address &= PAGE_MASK;
CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
cmd.data[1] |= domid;
- cmd.data[2] = LOW_U32(address);
+ cmd.data[2] = lower_32_bits(address);
cmd.data[3] = upper_32_bits(address);
if (s) /* size bit - we flush more than one 4kb page */
cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
@@ -185,7 +185,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
u64 address, size_t size)
{
int s = 0;
- unsigned pages = to_pages(address, size);
+ unsigned pages = iommu_num_pages(address, size);
address &= PAGE_MASK;
@@ -557,8 +557,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
if (iommu->exclusion_start &&
iommu->exclusion_start < dma_dom->aperture_size) {
unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
- int pages = to_pages(iommu->exclusion_start,
- iommu->exclusion_length);
+ int pages = iommu_num_pages(iommu->exclusion_start,
+ iommu->exclusion_length);
dma_ops_reserve_addresses(dma_dom, startpage, pages);
}
@@ -667,7 +667,7 @@ static int get_device_resources(struct device *dev,
_bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
/* device not translated by any IOMMU in the system? */
- if (_bdf >= amd_iommu_last_bdf) {
+ if (_bdf > amd_iommu_last_bdf) {
*iommu = NULL;
*domain = NULL;
*bdf = 0xffff;
@@ -767,7 +767,7 @@ static dma_addr_t __map_single(struct device *dev,
unsigned int pages;
int i;
- pages = to_pages(paddr, size);
+ pages = iommu_num_pages(paddr, size);
paddr &= PAGE_MASK;
address = dma_ops_alloc_addresses(dev, dma_dom, pages);
@@ -802,7 +802,7 @@ static void __unmap_single(struct amd_iommu *iommu,
if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
return;
- pages = to_pages(dma_addr, size);
+ pages = iommu_num_pages(dma_addr, size);
dma_addr &= PAGE_MASK;
start = dma_addr;
@@ -1085,7 +1085,7 @@ void prealloc_protection_domains(void)
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
devid = (dev->bus->number << 8) | dev->devfn;
- if (devid >= amd_iommu_last_bdf)
+ if (devid > amd_iommu_last_bdf)
continue;
devid = amd_iommu_alias_table[devid];
if (domain_for_device(devid))
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c9d8ff2eb13..a69cc0f5204 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -732,7 +732,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
set_device_exclusion_range(m->devid, m);
break;
case ACPI_IVMD_TYPE_ALL:
- for (i = 0; i < amd_iommu_last_bdf; ++i)
+ for (i = 0; i <= amd_iommu_last_bdf; ++i)
set_device_exclusion_range(i, m);
break;
case ACPI_IVMD_TYPE_RANGE:
@@ -801,6 +801,21 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
}
/*
+ * Init the device table to not allow DMA access for devices and
+ * suppress all page faults
+ */
+static void init_device_table(void)
+{
+ u16 devid;
+
+ for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
+ set_dev_entry_bit(devid, DEV_ENTRY_VALID);
+ set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
+ set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
+ }
+}
+
+/*
* This function finally enables all IOMMUs found in the system after
* they have been initialized
*/
@@ -931,10 +946,13 @@ int __init amd_iommu_init(void)
if (amd_iommu_pd_alloc_bitmap == NULL)
goto free;
+ /* init the device table */
+ init_device_table();
+
/*
* let all alias entries point to itself
*/
- for (i = 0; i < amd_iommu_last_bdf; ++i)
+ for (i = 0; i <= amd_iommu_last_bdf; ++i)
amd_iommu_alias_table[i] = i;
/*
@@ -954,15 +972,15 @@ int __init amd_iommu_init(void)
if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
goto free;
- ret = amd_iommu_init_dma_ops();
+ ret = sysdev_class_register(&amd_iommu_sysdev_class);
if (ret)
goto free;
- ret = sysdev_class_register(&amd_iommu_sysdev_class);
+ ret = sysdev_register(&device_amd_iommu);
if (ret)
goto free;
- ret = sysdev_register(&device_amd_iommu);
+ ret = amd_iommu_init_dma_ops();
if (ret)
goto free;
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index d6c89835837..f88bd0d982b 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1454,8 +1454,6 @@ void disconnect_bsp_APIC(int virt_wire_setup)
}
}
-unsigned int __cpuinitdata maxcpus = NR_CPUS;
-
void __cpuinit generic_processor_info(int apicid, int version)
{
int cpu;
@@ -1482,12 +1480,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
return;
}
- if (num_processors >= maxcpus) {
- printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
- " Processor ignored.\n", maxcpus);
- return;
- }
-
num_processors++;
cpus_complement(tmp_map, cpu_present_map);
cpu = first_cpu(tmp_map);
@@ -1720,15 +1712,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
}
early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-static int __init apic_set_verbosity(char *str)
+static int __init apic_set_verbosity(char *arg)
{
- if (strcmp("debug", str) == 0)
+ if (!arg)
+ return -EINVAL;
+
+ if (strcmp(arg, "debug") == 0)
apic_verbosity = APIC_DEBUG;
- else if (strcmp("verbose", str) == 0)
+ else if (strcmp(arg, "verbose") == 0)
apic_verbosity = APIC_VERBOSE;
- return 1;
+
+ return 0;
}
-__setup("apic=", apic_set_verbosity);
+early_param("apic", apic_set_verbosity);
static int __init lapic_insert_resource(void)
{
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 7f1f030da7e..446c062e831 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -90,7 +90,6 @@ static unsigned long apic_phys;
unsigned long mp_lapic_addr;
-unsigned int __cpuinitdata maxcpus = NR_CPUS;
/*
* Get the LAPIC version
*/
@@ -1062,12 +1061,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
return;
}
- if (num_processors >= maxcpus) {
- printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
- " Processor ignored.\n", maxcpus);
- return;
- }
-
num_processors++;
cpus_complement(tmp_map, cpu_present_map);
cpu = first_cpu(tmp_map);
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 84a8220a607..a6ef672adbb 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -56,9 +56,22 @@ void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
- if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
+ /*
+ * There is a known erratum on Pentium III and Core Solo
+ * and Core Duo CPUs.
+ * " Page with PAT set to WC while associated MTRR is UC
+ * may consolidate to UC "
+ * Because of this erratum, it is better to stick with
+ * setting WC in MTRR rather than using PAT on these CPUs.
+ *
+ * Enable PAT WC only on P4, Core 2 or later CPUs.
+ */
+ if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15))
return;
- break;
+
+ pat_disable("PAT WC disabled due to known CPU erratum.");
+ return;
+
case X86_VENDOR_AMD:
case X86_VENDOR_CENTAUR:
case X86_VENDOR_TRANSMETA:
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index cae9cabc303..18514ed2610 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -31,6 +31,11 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
if (c->x86_power & (1<<8))
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
}
+
+ /* Set MTRR capability flag if appropriate */
+ if (c->x86_model == 13 || c->x86_model == 9 ||
+ (c->x86_model == 8 && c->x86_mask >= 8))
+ set_cpu_cap(c, X86_FEATURE_K6_MTRR);
}
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
@@ -166,10 +171,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
mbytes);
}
- /* Set MTRR capability flag if appropriate */
- if (c->x86_model == 13 || c->x86_model == 9 ||
- (c->x86_model == 8 && c->x86_mask >= 8))
- set_cpu_cap(c, X86_FEATURE_K6_MTRR);
break;
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c9b58a806e8..c8e315f1aa8 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -50,6 +50,8 @@ static double __initdata y = 3145727.0;
*/
static void __init check_fpu(void)
{
+ s32 fdiv_bug;
+
if (!boot_cpu_data.hard_math) {
#ifndef CONFIG_MATH_EMULATION
printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
@@ -74,8 +76,10 @@ static void __init check_fpu(void)
"fistpl %0\n\t"
"fwait\n\t"
"fninit"
- : "=m" (*&boot_cpu_data.fdiv_bug)
+ : "=m" (*&fdiv_bug)
: "m" (*&x), "m" (*&y));
+
+ boot_cpu_data.fdiv_bug = fdiv_bug;
if (boot_cpu_data.fdiv_bug)
printk("Hmm, FPU with FDIV bug.\n");
}
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index e0f45edd6a5..a0534c04d38 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -314,6 +314,16 @@ enum {
EAMD3D = 1<<20,
};
+static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
+{
+ switch (c->x86) {
+ case 5:
+ /* Emulate MTRRs using Centaur's MCR. */
+ set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
+ break;
+ }
+}
+
static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
{
@@ -462,6 +472,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
.c_vendor = "Centaur",
.c_ident = { "CentaurHauls" },
+ .c_early_init = early_init_centaur,
.c_init = init_centaur,
.c_size_cache = centaur_size_cache,
};
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 80ab20d4fa3..4e456bd955b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,6 +13,7 @@
#include <asm/mtrr.h>
#include <asm/mce.h>
#include <asm/pat.h>
+#include <asm/asm.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
#include <asm/apic.h>
@@ -334,11 +335,24 @@ static void __init early_cpu_detect(void)
get_cpu_vendor(c, 1);
+ early_get_cap(c);
+
if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
cpu_devs[c->x86_vendor]->c_early_init)
cpu_devs[c->x86_vendor]->c_early_init(c);
+}
- early_get_cap(c);
+/*
+ * The NOPL instruction is supposed to exist on all CPUs with
+ * family >= 6; unfortunately, that's not true in practice because
+ * of early VIA chips and (more importantly) broken virtualizers that
+ * are not easy to detect. In the latter case it doesn't even *fail*
+ * reliably, so probing for it doesn't even work. Disable it completely
+ * unless we can find a reliable way to detect all the broken cases.
+ */
+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
+{
+ clear_cpu_cap(c, X86_FEATURE_NOPL);
}
static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
@@ -395,8 +409,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
}
init_scattered_cpuid_features(c);
+ detect_nopl(c);
}
-
}
static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index dd6e3f15017..a11f5d4477c 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -18,6 +18,7 @@
#include <asm/mtrr.h>
#include <asm/mce.h>
#include <asm/pat.h>
+#include <asm/asm.h>
#include <asm/numa.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
@@ -215,6 +216,39 @@ static void __init early_cpu_support_print(void)
}
}
+/*
+ * The NOPL instruction is supposed to exist on all CPUs with
+ * family >= 6, unfortunately, that's not true in practice because
+ * of early VIA chips and (more importantly) broken virtualizers that
+ * are not easy to detect. Hence, probe for it based on first
+ * principles.
+ *
+ * Note: no 64-bit chip is known to lack these, but put the code here
+ * for consistency with 32 bits, and to make it utterly trivial to
+ * diagnose the problem should it ever surface.
+ */
+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
+{
+ const u32 nopl_signature = 0x888c53b1; /* Random number */
+ u32 has_nopl = nopl_signature;
+
+ clear_cpu_cap(c, X86_FEATURE_NOPL);
+ if (c->x86 >= 6) {
+ asm volatile("\n"
+ "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
+ "2:\n"
+ " .section .fixup,\"ax\"\n"
+ "3: xor %0,%0\n"
+ " jmp 2b\n"
+ " .previous\n"
+ _ASM_EXTABLE(1b,3b)
+ : "+a" (has_nopl));
+
+ if (has_nopl == nopl_signature)
+ set_cpu_cap(c, X86_FEATURE_NOPL);
+ }
+}
+
static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
void __init early_cpu_init(void)
@@ -313,6 +347,8 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
c->x86_phys_bits = eax & 0xff;
}
+ detect_nopl(c);
+
if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
cpu_devs[c->x86_vendor]->c_early_init)
cpu_devs[c->x86_vendor]->c_early_init(c);
@@ -493,17 +529,20 @@ void pda_init(int cpu)
/* others are initialized in smpboot.c */
pda->pcurrent = &init_task;
pda->irqstackptr = boot_cpu_stack;
+ pda->irqstackptr += IRQSTACKSIZE - 64;
} else {
- pda->irqstackptr = (char *)
- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
- if (!pda->irqstackptr)
- panic("cannot allocate irqstack for cpu %d", cpu);
+ if (!pda->irqstackptr) {
+ pda->irqstackptr = (char *)
+ __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
+ if (!pda->irqstackptr)
+ panic("cannot allocate irqstack for cpu %d",
+ cpu);
+ pda->irqstackptr += IRQSTACKSIZE - 64;
+ }
if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
pda->nodenumber = cpu_to_node(cpu);
}
-
- pda->irqstackptr += IRQSTACKSIZE-64;
}
char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
@@ -601,19 +640,22 @@ void __cpuinit cpu_init(void)
/*
* set up and load the per-CPU TSS
*/
- for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+ if (!orig_ist->ist[0]) {
static const unsigned int order[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
+ [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
};
- if (cpu) {
- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
- if (!estacks)
- panic("Cannot allocate exception stack %ld %d\n",
- v, cpu);
+ for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+ if (cpu) {
+ estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
+ if (!estacks)
+ panic("Cannot allocate exception "
+ "stack %ld %d\n", v, cpu);
+ }
+ estacks += PAGE_SIZE << order[v];
+ orig_ist->ist[v] = t->x86_tss.ist[v] =
+ (unsigned long)estacks;
}
- estacks += PAGE_SIZE << order[v];
- orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
}
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index cb7a5715596..efae3b22a0f 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -235,9 +235,9 @@ config X86_LONGHAUL
If in doubt, say N.
config X86_E_POWERSAVER
- tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)"
+ tristate "VIA C7 Enhanced PowerSaver"
select CPU_FREQ_TABLE
- depends on X86_32 && EXPERIMENTAL
+ depends on X86_32
help
This adds the CPUFreq driver for VIA C7 processors.
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index ff2fff56f0a..dd097b83583 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -200,12 +200,10 @@ static void drv_read(struct drv_cmd *cmd)
static void drv_write(struct drv_cmd *cmd)
{
cpumask_t saved_mask = current->cpus_allowed;
- cpumask_of_cpu_ptr_declare(cpu_mask);
unsigned int i;
for_each_cpu_mask_nr(i, cmd->mask) {
- cpumask_of_cpu_ptr_next(cpu_mask, i);
- set_cpus_allowed_ptr(current, cpu_mask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
do_drv_write(cmd);
}
@@ -269,12 +267,11 @@ static unsigned int get_measured_perf(unsigned int cpu)
} aperf_cur, mperf_cur;
cpumask_t saved_mask;
- cpumask_of_cpu_ptr(cpu_mask, cpu);
unsigned int perf_percent;
unsigned int retval;
saved_mask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, cpu_mask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
if (get_cpu() != cpu) {
/* We were not able to run on requested processor */
put_cpu();
@@ -340,7 +337,6 @@ static unsigned int get_measured_perf(unsigned int cpu)
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
- cpumask_of_cpu_ptr(cpu_mask, cpu);
struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
unsigned int freq;
unsigned int cached_freq;
@@ -353,7 +349,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
}
cached_freq = data->freq_table[data->acpi_data->state].frequency;
- freq = extract_freq(get_cur_val(cpu_mask), data);
+ freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data);
if (freq != cached_freq) {
/*
* The dreaded BIOS frequency change behind our back.
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 94619c22f56..e4a4bf870e9 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -44,7 +44,7 @@ struct s_elan_multiplier {
* It is important that the frequencies
* are listed in ascending order here!
*/
-struct s_elan_multiplier elan_multiplier[] = {
+static struct s_elan_multiplier elan_multiplier[] = {
{1000, 0x02, 0x18},
{2000, 0x02, 0x10},
{4000, 0x02, 0x08},
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 53c7b693697..84bb395038d 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid)
return 800 + (fid * 100);
}
-
/* Return a frequency in KHz, given an input fid */
static u32 find_khz_freq_from_fid(u32 fid)
{
@@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p
return data[pstate].frequency;
}
-
/* Return the vco fid for an input fid
*
* Each "low" fid has corresponding "high" fid, and you can get to "low" fids
@@ -166,7 +164,6 @@ static void fidvid_msr_init(void)
wrmsr(MSR_FIDVID_CTL, lo, hi);
}
-
/* write the new fid value along with the other control fields to the msr */
static int write_new_fid(struct powernow_k8_data *data, u32 fid)
{
@@ -479,12 +476,11 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi
static int check_supported_cpu(unsigned int cpu)
{
cpumask_t oldmask;
- cpumask_of_cpu_ptr(cpu_mask, cpu);
u32 eax, ebx, ecx, edx;
unsigned int rc = 0;
oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, cpu_mask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
if (smp_processor_id() != cpu) {
printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
@@ -1017,7 +1013,6 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
{
cpumask_t oldmask;
- cpumask_of_cpu_ptr(cpu_mask, pol->cpu);
struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
u32 checkfid;
u32 checkvid;
@@ -1032,7 +1027,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
/* only run on specific CPU from here on */
oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, cpu_mask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
if (smp_processor_id() != pol->cpu) {
printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1107,7 +1102,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
{
struct powernow_k8_data *data;
cpumask_t oldmask;
- cpumask_of_cpu_ptr_declare(newmask);
int rc;
if (!cpu_online(pol->cpu))
@@ -1159,8 +1153,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
/* only run on specific CPU from here on */
oldmask = current->cpus_allowed;
- cpumask_of_cpu_ptr_next(newmask, pol->cpu);
- set_cpus_allowed_ptr(current, newmask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
if (smp_processor_id() != pol->cpu) {
printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1182,7 +1175,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
set_cpus_allowed_ptr(current, &oldmask);
if (cpu_family == CPU_HW_PSTATE)
- pol->cpus = *newmask;
+ pol->cpus = cpumask_of_cpu(pol->cpu);
else
pol->cpus = per_cpu(cpu_core_map, pol->cpu);
data->available_cores = &(pol->cpus);
@@ -1248,7 +1241,6 @@ static unsigned int powernowk8_get (unsigned int cpu)
{
struct powernow_k8_data *data;
cpumask_t oldmask = current->cpus_allowed;
- cpumask_of_cpu_ptr(newmask, cpu);
unsigned int khz = 0;
unsigned int first;
@@ -1258,7 +1250,7 @@ static unsigned int powernowk8_get (unsigned int cpu)
if (!data)
return -EINVAL;
- set_cpus_allowed_ptr(current, newmask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
if (smp_processor_id() != cpu) {
printk(KERN_ERR PFX
"limiting to CPU %d failed in powernowk8_get\n", cpu);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index ca2ac13b7af..15e13c01cc3 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -324,10 +324,9 @@ static unsigned int get_cur_freq(unsigned int cpu)
unsigned l, h;
unsigned clock_freq;
cpumask_t saved_mask;
- cpumask_of_cpu_ptr(new_mask, cpu);
saved_mask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, new_mask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
if (smp_processor_id() != cpu)
return 0;
@@ -585,15 +584,12 @@ static int centrino_target (struct cpufreq_policy *policy,
* Best effort undo..
*/
- if (!cpus_empty(*covered_cpus)) {
- cpumask_of_cpu_ptr_declare(new_mask);
-
+ if (!cpus_empty(*covered_cpus))
for_each_cpu_mask_nr(j, *covered_cpus) {
- cpumask_of_cpu_ptr_next(new_mask, j);
- set_cpus_allowed_ptr(current, new_mask);
+ set_cpus_allowed_ptr(current,
+ &cpumask_of_cpu(j));
wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
}
- }
tmp = freqs.new;
freqs.new = freqs.old;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 2f3728dc24f..191f7263c61 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -244,8 +244,7 @@ static unsigned int _speedstep_get(const cpumask_t *cpus)
static unsigned int speedstep_get(unsigned int cpu)
{
- cpumask_of_cpu_ptr(newmask, cpu);
- return _speedstep_get(newmask);
+ return _speedstep_get(&cpumask_of_cpu(cpu));
}
/**
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 3fd7a67bb06..898a5a2002e 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -15,13 +15,11 @@
/*
* Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
*/
-static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
{
unsigned char ccr2, ccr3;
- unsigned long flags;
/* we test for DEVID by checking whether CCR3 is writable */
- local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, ccr3 ^ 0x80);
getCx86(0xc0); /* dummy to change bus */
@@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
*dir0 = getCx86(CX86_DIR0);
*dir1 = getCx86(CX86_DIR1);
}
- local_irq_restore(flags);
}
+static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __do_cyrix_devid(dir0, dir1);
+ local_irq_restore(flags);
+}
/*
* Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in
* order to identify the Cyrix CPU model after we're out of setup.c
@@ -134,23 +139,6 @@ static void __cpuinit set_cx86_memwb(void)
setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14);
}
-static void __cpuinit set_cx86_inc(void)
-{
- unsigned char ccr3;
-
- printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n");
-
- ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
- /* PCR1 -- Performance Control */
- /* Incrementor on, whatever that is */
- setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02);
- /* PCR0 -- Performance Control */
- /* Incrementor Margin 10 */
- setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04);
- setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
-}
-
/*
* Configure later MediaGX and/or Geode processor.
*/
@@ -174,11 +162,28 @@ static void __cpuinit geode_configure(void)
set_cx86_memwb();
set_cx86_reorder();
- set_cx86_inc();
local_irq_restore(flags);
}
+static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c)
+{
+ unsigned char dir0, dir0_msn, dir1 = 0;
+
+ __do_cyrix_devid(&dir0, &dir1);
+ dir0_msn = dir0 >> 4; /* identifies CPU "family" */
+
+ switch (dir0_msn) {
+ case 3: /* 6x86/6x86L */
+ /* Emulate MTRRs using Cyrix's ARRs. */
+ set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
+ break;
+ case 5: /* 6x86MX/M II */
+ /* Emulate MTRRs using Cyrix's ARRs. */
+ set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
+ break;
+ }
+}
static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
{
@@ -434,6 +439,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
.c_vendor = "Cyrix",
.c_ident = { "CyrixInstead" },
+ .c_early_init = early_init_cyrix,
.c_init = init_cyrix,
.c_identify = cyrix_identify,
};
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
index e43ad4ad4cb..c9017799497 100644
--- a/arch/x86/kernel/cpu/feature_names.c
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -39,7 +39,8 @@ const char * const x86_cap_flags[NCAPINTS*32] = {
NULL, NULL, NULL, NULL,
"constant_tsc", "up", NULL, "arch_perfmon",
"pebs", "bts", NULL, NULL,
- "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ "rep_good", NULL, NULL, NULL,
+ "nopl", NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* Intel-defined (#2) */
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 650d40f7912..6b0a10b002f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -516,7 +516,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
unsigned long j;
int retval;
cpumask_t oldmask;
- cpumask_of_cpu_ptr(newmask, cpu);
if (num_cache_leaves == 0)
return -ENOENT;
@@ -527,7 +526,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
return -ENOMEM;
oldmask = current->cpus_allowed;
- retval = set_cpus_allowed_ptr(current, newmask);
+ retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
if (retval)
goto out;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 65a339678ec..726a5fcdf34 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -759,6 +759,7 @@ static struct sysdev_class mce_sysclass = {
};
DEFINE_PER_CPU(struct sys_device, device_mce);
+void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata;
/* Why are there no generic functions for this? */
#define ACCESSOR(name, var, start) \
@@ -883,9 +884,13 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
mce_create_device(cpu);
+ if (threshold_cpu_callback)
+ threshold_cpu_callback(action, cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
+ if (threshold_cpu_callback)
+ threshold_cpu_callback(action, cpu);
mce_remove_device(cpu);
break;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 88736cadbaa..5eb390a4b2e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -628,6 +628,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
deallocate_threshold_block(cpu, bank);
free_out:
+ kobject_del(b->kobj);
kobject_put(b->kobj);
kfree(b);
per_cpu(threshold_banks, cpu)[bank] = NULL;
@@ -645,14 +646,11 @@ static void threshold_remove_device(unsigned int cpu)
}
/* get notified when a cpu comes on/off */
-static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action,
+ unsigned int cpu)
{
- /* cpu was unsigned int to begin with */
- unsigned int cpu = (unsigned long)hcpu;
-
if (cpu >= NR_CPUS)
- goto out;
+ return;
switch (action) {
case CPU_ONLINE:
@@ -666,14 +664,8 @@ static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
default:
break;
}
- out:
- return NOTIFY_OK;
}
-static struct notifier_block threshold_cpu_notifier __cpuinitdata = {
- .notifier_call = threshold_cpu_callback,
-};
-
static __init int threshold_init_device(void)
{
unsigned lcpu = 0;
@@ -684,7 +676,7 @@ static __init int threshold_init_device(void)
if (err)
return err;
}
- register_hotcpu_notifier(&threshold_cpu_notifier);
+ threshold_cpu_callback = amd_64_threshold_cpu_callback;
return 0;
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 509bd3d9eac..cb7d3b6a80e 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -379,6 +379,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type *type)
{
unsigned int mask_lo, mask_hi, base_lo, base_hi;
+ unsigned int tmp, hi;
rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
if ((mask_lo & 0x800) == 0) {
@@ -392,8 +393,23 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
/* Work out the shifted address mask. */
- mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT)
- | mask_lo >> PAGE_SHIFT;
+ tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
+ mask_lo = size_or_mask | tmp;
+ /* Expand tmp with high bits to all 1s*/
+ hi = fls(tmp);
+ if (hi > 0) {
+ tmp |= ~((1<<(hi - 1)) - 1);
+
+ if (tmp != mask_lo) {
+ static int once = 1;
+
+ if (once) {
+ printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
+ once = 0;
+ }
+ mask_lo = tmp;
+ }
+ }
/* This works correctly if size is a power of two, i.e. a
contiguous range. */
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 6f23969c8fa..b117d7f8a56 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -1496,11 +1496,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
/* kvm/qemu doesn't have mtrr set right, don't trim them all */
if (!highest_pfn) {
- if (!kvm_para_available()) {
- printk(KERN_WARNING
+ WARN(!kvm_para_available(), KERN_WARNING
"WARNING: strange, CPU MTRRs all blank?\n");
- WARN_ON(1);
- }
return 0;
}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index de7439f82b9..05cc22dbd4f 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -478,7 +478,13 @@ static int setup_p4_watchdog(unsigned nmi_hz)
perfctr_msr = MSR_P4_IQ_PERFCTR1;
evntsel_msr = MSR_P4_CRU_ESCR0;
cccr_msr = MSR_P4_IQ_CCCR1;
- cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
+
+ /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
+ if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
+ cccr_val = P4_CCCR_OVF_PMI0;
+ else
+ cccr_val = P4_CCCR_OVF_PMI1;
+ cccr_val |= P4_CCCR_ESCR_SELECT(4);
}
evntsel = P4_ESCR_EVENT_SELECT(0x3F)
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 14b11b3be31..8e9cd6a8ec1 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -89,6 +89,8 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
struct cpuid_regs cmd;
int cpu = iminor(file->f_path.dentry->d_inode);
u64 pos = *ppos;
+ ssize_t bytes = 0;
+ int err = 0;
if (count % 16)
return -EINVAL; /* Invalid chunk size */
@@ -96,14 +98,19 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
for (; count; count -= 16) {
cmd.eax = pos;
cmd.ecx = pos >> 32;
- smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1);
- if (copy_to_user(tmp, &cmd, 16))
- return -EFAULT;
+ err = smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1);
+ if (err)
+ break;
+ if (copy_to_user(tmp, &cmd, 16)) {
+ err = -EFAULT;
+ break;
+ }
tmp += 16;
+ bytes += 16;
*ppos = ++pos;
}
- return tmp - buf;
+ return bytes ? bytes : err;
}
static int cpuid_open(struct inode *inode, struct file *file)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 9af89078f7b..66e48aa2dd1 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1203,7 +1203,7 @@ static int __init parse_memmap_opt(char *p)
if (!p)
return -EINVAL;
- if (!strcmp(p, "exactmap")) {
+ if (!strncmp(p, "exactmap", 8)) {
#ifdef CONFIG_CRASH_DUMP
/*
* If we are doing a crash dump, we still need to know
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index 4b63c8e1f13..5cab48ee61a 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -53,7 +53,7 @@ void efi_call_phys_prelog(void)
* directory. If I have PAE, I just need to duplicate one entry in
* page directory.
*/
- cr4 = read_cr4();
+ cr4 = read_cr4_safe();
if (cr4 & X86_CR4_PAE) {
efi_bak_pg_dir_pointer[0].pgd =
@@ -91,7 +91,7 @@ void efi_call_phys_epilog(void)
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
- cr4 = read_cr4();
+ cr4 = read_cr4_safe();
if (cr4 & X86_CR4_PAE) {
swapper_pg_dir[pgd_index(0)].pgd =
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 1fa8be5bd21..eaff0bbb144 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -99,3 +99,4 @@ int is_uv_system(void)
{
return uv_system_type != UV_NONE;
}
+EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2cfcbded888..bfa837cb16b 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -222,7 +222,7 @@ static __init void map_low_mmrs(void)
enum map_type {map_wb, map_uc};
-static void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
+static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
{
unsigned long bytes, paddr;
@@ -293,7 +293,9 @@ static __init void uv_rtc_init(void)
sn_rtc_cycles_per_second = ticks_per_sec;
}
-static __init void uv_system_init(void)
+static bool uv_system_inited;
+
+void __init uv_system_init(void)
{
union uvh_si_addr_map_config_u m_n_config;
union uvh_node_id_u node_id;
@@ -383,6 +385,7 @@ static __init void uv_system_init(void)
map_mmr_high(max_pnode);
map_config_high(max_pnode);
map_mmioh_high(max_pnode);
+ uv_system_inited = true;
}
/*
@@ -391,8 +394,7 @@ static __init void uv_system_init(void)
*/
void __cpuinit uv_cpu_init(void)
{
- if (!uv_node_to_blade)
- uv_system_init();
+ BUG_ON(!uv_system_inited);
uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 1b318e903bf..9bfc4d72fb2 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -88,6 +88,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
(__START_KERNEL & PGDIR_MASK)));
+ BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
/* clear bss before set_intr_gate with early_idt_handler */
clear_bss();
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f67e93441ca..a7010c3a377 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -456,9 +456,6 @@ is386: movl $2,%ecx # set MP
1:
#endif /* CONFIG_SMP */
jmp *(initial_code)
-.align 4
-ENTRY(initial_code)
- .long i386_start_kernel
/*
* We depend on ET to be correct. This checks for 287/387.
@@ -601,6 +598,11 @@ ignore_int:
#endif
iret
+.section .cpuinit.data,"wa"
+.align 4
+ENTRY(initial_code)
+ .long i386_start_kernel
+
.section .text
/*
* Real beginning of normal "text" segment
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ad2b15a1334..73deaffadd0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -210,8 +210,8 @@ static void hpet_legacy_clockevent_register(void)
/* Calculate the min / max delta */
hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
&hpet_clockevent);
- hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30,
- &hpet_clockevent);
+ /* 5 usec minimum reprogramming delta. */
+ hpet_clockevent.min_delta_ns = 5000;
/*
* Start hpet with the boot cpu mask and make it
@@ -270,15 +270,22 @@ static void hpet_legacy_set_mode(enum clock_event_mode mode,
}
static int hpet_legacy_next_event(unsigned long delta,
- struct clock_event_device *evt)
+ struct clock_event_device *evt)
{
- unsigned long cnt;
+ u32 cnt;
cnt = hpet_readl(HPET_COUNTER);
- cnt += delta;
+ cnt += (u32) delta;
hpet_writel(cnt, HPET_T0_CMP);
- return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0) ? -ETIME : 0;
+ /*
+ * We need to read back the CMP register to make sure that
+ * what we wrote hit the chip before we compare it to the
+ * counter.
+ */
+ WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt);
+
+ return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
}
/*
@@ -359,6 +366,7 @@ static int hpet_clocksource_register(void)
int __init hpet_enable(void)
{
unsigned long id;
+ int i;
if (!is_hpet_capable())
return 0;
@@ -369,6 +377,29 @@ int __init hpet_enable(void)
* Read the period and check for a sane value:
*/
hpet_period = hpet_readl(HPET_PERIOD);
+
+ /*
+ * AMD SB700 based systems with spread spectrum enabled use a
+ * SMM based HPET emulation to provide proper frequency
+ * setting. The SMM code is initialized with the first HPET
+ * register access and takes some time to complete. During
+ * this time the config register reads 0xffffffff. We check
+ * for max. 1000 loops whether the config register reads a non
+ * 0xffffffff value to make sure that HPET is up and running
+ * before we go further. A counting loop is safe, as the HPET
+ * access takes thousands of CPU cycles. On non SB700 based
+ * machines this check is only done once and has no side
+ * effects.
+ */
+ for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) {
+ if (i == 1000) {
+ printk(KERN_WARNING
+ "HPET config register value = 0xFFFFFFFF. "
+ "Disabling HPET\n");
+ goto out_nohpet;
+ }
+ }
+
if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
goto out_nohpet;
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index de9aa0e3a9c..09cddb57bec 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -57,7 +57,7 @@ atomic_t irq_mis_count;
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+DEFINE_SPINLOCK(vector_lock);
int timer_through_8259 __initdata;
@@ -1209,10 +1209,6 @@ static int assign_irq_vector(int irq)
return vector;
}
-void setup_vector_irq(int cpu)
-{
-}
-
static struct irq_chip ioapic_chip;
#define IOAPIC_AUTO -1
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 8269434d170..61a83b70c18 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -101,7 +101,7 @@ int timer_through_8259 __initdata;
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
-DEFINE_SPINLOCK(vector_lock);
+static DEFINE_SPINLOCK(vector_lock);
/*
* # of IRQ routing registers
@@ -697,6 +697,19 @@ static int pin_2_irq(int idx, int apic, int pin)
return irq;
}
+void lock_vector_lock(void)
+{
+ /* Used to the online set of cpus does not change
+ * during assign_irq_vector.
+ */
+ spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+ spin_unlock(&vector_lock);
+}
+
static int __assign_irq_vector(int irq, cpumask_t mask)
{
/*
@@ -802,7 +815,7 @@ static void __clear_irq_vector(int irq)
cpus_clear(cfg->domain);
}
-static void __setup_vector_irq(int cpu)
+void __setup_vector_irq(int cpu)
{
/* Initialize vector_irq on a new cpu */
/* This function must be called with vector_lock held */
@@ -825,14 +838,6 @@ static void __setup_vector_irq(int cpu)
}
}
-void setup_vector_irq(int cpu)
-{
- spin_lock(&vector_lock);
- __setup_vector_irq(smp_processor_id());
- spin_unlock(&vector_lock);
-}
-
-
static struct irq_chip ioapic_chip;
static void ioapic_register_intr(int irq, unsigned long trigger)
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 1c3a66a67f8..720d2607aac 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -92,6 +92,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "30BF")
}
},
+ {
+ .callback = dmi_io_delay_0xed_port,
+ .ident = "Presario F700",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30D3")
+ }
+ },
{ }
};
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index f2d43bc7551..ff7d3b0124f 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -139,6 +139,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
if (PageHighMem(pg)) {
data = ioremap_cache(pa_data, sizeof(*data));
if (!data) {
+ kfree(node);
error = -ENXIO;
goto err_dir;
}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 43c019f85f0..6c27679ec6a 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
regs->ip = (unsigned long)p->ainsn.insn;
}
-/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/* fixup registers */
#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
@@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
kretprobe_assert(ri, orig_ret_address, trampoline_address);
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 3fee2aa50f3..b68e21f06f4 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -62,12 +62,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
if (reload) {
#ifdef CONFIG_SMP
- cpumask_of_cpu_ptr_declare(mask);
-
preempt_disable();
load_LDT(pc);
- cpumask_of_cpu_ptr_next(mask, smp_processor_id());
- if (!cpus_equal(current->mm->cpu_vm_mask, *mask))
+ if (!cpus_equal(current->mm->cpu_vm_mask,
+ cpumask_of_cpu(smp_processor_id())))
smp_call_function(flush_ldt, current->mm, 1);
preempt_enable();
#else
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 8864230d55a..0732adba05c 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/numa.h>
#include <linux/ftrace.h>
+#include <linux/suspend.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -22,6 +23,7 @@
#include <asm/cpufeature.h>
#include <asm/desc.h>
#include <asm/system.h>
+#include <asm/cacheflush.h>
#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
static u32 kexec_pgd[1024] PAGE_ALIGNED;
@@ -77,7 +79,7 @@ static void load_segments(void)
/*
* A architecture hook called to validate the
* proposed image and prepare the control pages
- * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
+ * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE
* have been allocated, but the segments have yet
* been copied into the kernel.
*
@@ -85,10 +87,12 @@ static void load_segments(void)
* reboot code buffer to allow us to avoid allocations
* later.
*
- * Currently nothing.
+ * Make control page executable.
*/
int machine_kexec_prepare(struct kimage *image)
{
+ if (nx_enabled)
+ set_pages_x(image->control_code_page, 1);
return 0;
}
@@ -98,27 +102,54 @@ int machine_kexec_prepare(struct kimage *image)
*/
void machine_kexec_cleanup(struct kimage *image)
{
+ if (nx_enabled)
+ set_pages_nx(image->control_code_page, 1);
}
/*
* Do not allocate memory (or fail in any way) in machine_kexec().
* We are past the point of no return, committed to rebooting now.
*/
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
void *control_page;
+ int save_ftrace_enabled;
+ asmlinkage unsigned long
+ (*relocate_kernel_ptr)(unsigned long indirection_page,
+ unsigned long control_page,
+ unsigned long start_address,
+ unsigned int has_pae,
+ unsigned int preserve_context);
+
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context)
+ save_processor_state();
+#endif
- tracer_disable();
+ save_ftrace_enabled = __ftrace_enabled_save();
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ if (image->preserve_context) {
+#ifdef CONFIG_X86_IO_APIC
+ /* We need to put APICs in legacy mode so that we can
+ * get timer interrupts in second kernel. kexec/kdump
+ * paths already have calls to disable_IO_APIC() in
+ * one form or other. kexec jump path also need
+ * one.
+ */
+ disable_IO_APIC();
+#endif
+ }
+
control_page = page_address(image->control_code_page);
- memcpy(control_page, relocate_kernel, PAGE_SIZE);
+ memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
+ relocate_kernel_ptr = control_page;
page_list[PA_CONTROL_PAGE] = __pa(control_page);
- page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
+ page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
page_list[PA_PGD] = __pa(kexec_pgd);
page_list[VA_PGD] = (unsigned long)kexec_pgd;
#ifdef CONFIG_X86_PAE
@@ -131,6 +162,7 @@ NORET_TYPE void machine_kexec(struct kimage *image)
page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
page_list[PA_PTE_1] = __pa(kexec_pte1);
page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+ page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT);
/* The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
@@ -149,8 +181,17 @@ NORET_TYPE void machine_kexec(struct kimage *image)
set_idt(phys_to_virt(0),0);
/* now call it */
- relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
- image->start, cpu_has_pae);
+ image->start = relocate_kernel_ptr((unsigned long)image->head,
+ (unsigned long)page_list,
+ image->start, cpu_has_pae,
+ image->preserve_context);
+
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context)
+ restore_processor_state();
+#endif
+
+ __ftrace_enabled_restore(save_ftrace_enabled);
}
void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 9dd9262693a..c43caa3a91f 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image)
* Do not allocate memory (or fail in any way) in machine_kexec().
* We are past the point of no return, committed to rebooting now.
*/
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
void *control_page;
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 07c0f828f48..3b599518c32 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -33,6 +33,8 @@
#include <linux/module.h>
#include <asm/geode.h>
+#define MFGPT_DEFAULT_IRQ 7
+
static struct mfgpt_timer_t {
unsigned int avail:1;
} mfgpt_timers[MFGPT_MAX_TIMERS];
@@ -157,29 +159,48 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
}
EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event);
-int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable)
+int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable)
{
- u32 val, dummy;
- int offset;
+ u32 zsel, lpc, dummy;
+ int shift;
if (timer < 0 || timer >= MFGPT_MAX_TIMERS)
return -EIO;
- if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
+ /*
+ * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA
+ * is using the same CMP of the timer's Siamese twin, the IRQ is set to
+ * 2, and we mustn't use nor change it.
+ * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the
+ * IRQ of the 1st. This can only happen if forcing an IRQ, calling this
+ * with *irq==0 is safe. Currently there _are_ no 2 drivers.
+ */
+ rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
+ shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4;
+ if (((zsel >> shift) & 0xF) == 2)
return -EIO;
- rdmsr(MSR_PIC_ZSEL_LOW, val, dummy);
+ /* Choose IRQ: if none supplied, keep IRQ already set or use default */
+ if (!*irq)
+ *irq = (zsel >> shift) & 0xF;
+ if (!*irq)
+ *irq = MFGPT_DEFAULT_IRQ;
- offset = (timer % 4) * 4;
-
- val &= ~((0xF << offset) | (0xF << (offset + 16)));
+ /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */
+ if (*irq < 1 || *irq == 2 || *irq > 15)
+ return -EIO;
+ rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy);
+ if (lpc & (1 << *irq))
+ return -EIO;
+ /* All chosen and checked - go for it */
+ if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
+ return -EIO;
if (enable) {
- val |= (irq & 0x0F) << (offset);
- val |= (irq & 0x0F) << (offset + 16);
+ zsel = (zsel & ~(0xF << shift)) | (*irq << shift);
+ wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
}
- wrmsr(MSR_PIC_ZSEL_LOW, val, dummy);
return 0;
}
@@ -242,7 +263,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN;
static u16 mfgpt_event_clock;
-static int irq = 7;
+static int irq;
static int __init mfgpt_setup(char *str)
{
get_option(&str, &irq);
@@ -346,7 +367,7 @@ int __init mfgpt_timer_setup(void)
mfgpt_event_clock = timer;
/* Set up the IRQ on the MFGPT side */
- if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) {
+ if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) {
printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq);
return -EIO;
}
@@ -374,13 +395,14 @@ int __init mfgpt_timer_setup(void)
&mfgpt_clockevent);
printk(KERN_INFO
- "mfgpt-timer: registering the MFGPT timer as a clock event.\n");
+ "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n",
+ timer, irq);
clockevents_register_device(&mfgpt_clockevent);
return 0;
err:
- geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, irq);
+ geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq);
printk(KERN_ERR
"mfgpt-timer: Unable to set up the MFGPT clock source\n");
return -EIO;
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 6994c751590..652fa5c38eb 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -388,7 +388,6 @@ static int do_microcode_update (void)
void *new_mc = NULL;
int cpu;
cpumask_t old;
- cpumask_of_cpu_ptr_declare(newmask);
old = current->cpus_allowed;
@@ -405,8 +404,7 @@ static int do_microcode_update (void)
if (!uci->valid)
continue;
- cpumask_of_cpu_ptr_next(newmask, cpu);
- set_cpus_allowed_ptr(current, newmask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
error = get_maching_microcode(new_mc, cpu);
if (error < 0)
goto out;
@@ -576,7 +574,6 @@ static int apply_microcode_check_cpu(int cpu)
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
cpumask_t old;
- cpumask_of_cpu_ptr(newmask, cpu);
unsigned int val[2];
int err = 0;
@@ -585,7 +582,7 @@ static int apply_microcode_check_cpu(int cpu)
return 0;
old = current->cpus_allowed;
- set_cpus_allowed_ptr(current, newmask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
/* Check if the microcode we have in memory matches the CPU */
if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
@@ -623,12 +620,11 @@ static int apply_microcode_check_cpu(int cpu)
static void microcode_init_cpu(int cpu, int resume)
{
cpumask_t old;
- cpumask_of_cpu_ptr(newmask, cpu);
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
old = current->cpus_allowed;
- set_cpus_allowed_ptr(current, newmask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
mutex_lock(&microcode_mutex);
collect_cpu_info(cpu);
if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
@@ -661,13 +657,10 @@ static ssize_t reload_store(struct sys_device *dev,
if (end == buf)
return -EINVAL;
if (val == 1) {
- cpumask_t old;
- cpumask_of_cpu_ptr(newmask, cpu);
-
- old = current->cpus_allowed;
+ cpumask_t old = current->cpus_allowed;
get_online_cpus();
- set_cpus_allowed_ptr(current, newmask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
mutex_lock(&microcode_mutex);
if (uci->valid)
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index fdfdc550b36..efc2f361fe8 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = {
{}
};
-void __init check_enable_amd_mmconf_dmi(void)
+void __cpuinit check_enable_amd_mmconf_dmi(void)
{
dmi_check_system(mmconf_dmi_table);
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 6ae005ccaed..b3fb430725c 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -49,7 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
return sum & 0xFF;
}
-static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
+static void __init MP_processor_info(struct mpc_config_processor *m)
{
int apicid;
char *bootup_cpu = "";
@@ -83,7 +83,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
if (x86_quirks->mpc_oem_bus_info)
x86_quirks->mpc_oem_bus_info(m, str);
else
- printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
+ apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str);
#if MAX_MP_BUSSES < 256
if (m->mpc_busid >= MAX_MP_BUSSES) {
@@ -154,7 +154,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
{
- printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
@@ -163,7 +163,7 @@ static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
{
- printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
(mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
@@ -235,7 +235,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
{
- printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x,"
+ apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC LINT %02x\n",
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
@@ -484,7 +484,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
}
-static void construct_ioapic_table(int mpc_default_type)
+static void __init construct_ioapic_table(int mpc_default_type)
{
struct mpc_config_ioapic ioapic;
struct mpc_config_bus bus;
@@ -529,7 +529,7 @@ static void construct_ioapic_table(int mpc_default_type)
construct_default_ioirq_mptable(mpc_default_type);
}
#else
-static inline void construct_ioapic_table(int mpc_default_type) { }
+static inline void __init construct_ioapic_table(int mpc_default_type) { }
#endif
static inline void __init construct_default_ISA_mptable(int mpc_default_type)
@@ -695,7 +695,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
unsigned int *bp = phys_to_virt(base);
struct intel_mp_floating *mpf;
- printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length);
+ apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
+ bp, length);
BUILD_BUG_ON(sizeof(*mpf) != 16);
while (length > 0) {
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 9fd80955244..2e2af5d1819 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -72,21 +72,28 @@ static ssize_t msr_read(struct file *file, char __user *buf,
u32 data[2];
u32 reg = *ppos;
int cpu = iminor(file->f_path.dentry->d_inode);
- int err;
+ int err = 0;
+ ssize_t bytes = 0;
if (count % 8)
return -EINVAL; /* Invalid chunk size */
for (; count; count -= 8) {
err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]);
- if (err)
- return -EIO;
- if (copy_to_user(tmp, &data, 8))
- return -EFAULT;
+ if (err) {
+ if (err == -EFAULT) /* Fix idiotic error code */
+ err = -EIO;
+ break;
+ }
+ if (copy_to_user(tmp, &data, 8)) {
+ err = -EFAULT;
+ break;
+ }
tmp += 2;
+ bytes += 8;
}
- return ((char __user *)tmp) - buf;
+ return bytes ? bytes : err;
}
static ssize_t msr_write(struct file *file, const char __user *buf,
@@ -96,21 +103,28 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
u32 data[2];
u32 reg = *ppos;
int cpu = iminor(file->f_path.dentry->d_inode);
- int err;
+ int err = 0;
+ ssize_t bytes = 0;
if (count % 8)
return -EINVAL; /* Invalid chunk size */
for (; count; count -= 8) {
- if (copy_from_user(&data, tmp, 8))
- return -EFAULT;
+ if (copy_from_user(&data, tmp, 8)) {
+ err = -EFAULT;
+ break;
+ }
err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]);
- if (err)
- return -EIO;
+ if (err) {
+ if (err == -EFAULT) /* Fix idiotic error code */
+ err = -EIO;
+ break;
+ }
tmp += 2;
+ bytes += 8;
}
- return ((char __user *)tmp) - buf;
+ return bytes ? bytes : err;
}
static int msr_open(struct inode *inode, struct file *file)
@@ -131,7 +145,7 @@ static int msr_open(struct inode *inode, struct file *file)
ret = -EIO; /* MSR not supported */
out:
unlock_kernel();
- return 0;
+ return ret;
}
/*
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ac6d51222e7..abb78a2cc4a 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -114,6 +114,23 @@ static __init void nmi_cpu_busy(void *data)
}
#endif
+static void report_broken_nmi(int cpu, int *prev_nmi_count)
+{
+ printk(KERN_CONT "\n");
+
+ printk(KERN_WARNING
+ "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
+ cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
+
+ printk(KERN_WARNING
+ "Please report this to bugzilla.kernel.org,\n");
+ printk(KERN_WARNING
+ "and attach the output of the 'dmesg' command.\n");
+
+ per_cpu(wd_enabled, cpu) = 0;
+ atomic_dec(&nmi_active);
+}
+
int __init check_nmi_watchdog(void)
{
unsigned int *prev_nmi_count;
@@ -141,15 +158,8 @@ int __init check_nmi_watchdog(void)
for_each_online_cpu(cpu) {
if (!per_cpu(wd_enabled, cpu))
continue;
- if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
- printk(KERN_WARNING "WARNING: CPU#%d: NMI "
- "appears to be stuck (%d->%d)!\n",
- cpu,
- prev_nmi_count[cpu],
- get_nmi_count(cpu));
- per_cpu(wd_enabled, cpu) = 0;
- atomic_dec(&nmi_active);
- }
+ if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
+ report_broken_nmi(cpu, prev_nmi_count);
}
endflag = 1;
if (!atomic_read(&nmi_active)) {
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index b8c45610b20..eecc8c18f01 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -73,7 +73,7 @@ static void __init smp_dump_qct(void)
}
-void __init numaq_tsc_disable(void)
+void __cpuinit numaq_tsc_disable(void)
{
if (!found_numaq)
return;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 94da4d52d79..300da17e61c 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -471,7 +471,7 @@ struct pv_lock_ops pv_lock_ops = {
.spin_unlock = __ticket_spin_unlock,
#endif
};
-EXPORT_SYMBOL_GPL(pv_lock_ops);
+EXPORT_SYMBOL(pv_lock_ops);
EXPORT_SYMBOL_GPL(pv_time_ops);
EXPORT_SYMBOL (pv_cpu_ops);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 151f2d171f7..dcdac6c826e 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -29,6 +29,7 @@
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
+#include <linux/crash_dump.h>
#include <linux/dma-mapping.h>
#include <linux/bitops.h>
#include <linux/pci_ids.h>
@@ -36,6 +37,7 @@
#include <linux/delay.h>
#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
+
#include <asm/iommu.h>
#include <asm/calgary.h>
#include <asm/tce.h>
@@ -167,6 +169,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl);
static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
static void calioc2_tce_cache_blast(struct iommu_table *tbl);
static void calioc2_dump_error_regs(struct iommu_table *tbl);
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
+static void get_tce_space_from_tar(void);
static struct cal_chipset_ops calgary_chip_ops = {
.handle_quirks = calgary_handle_quirks,
@@ -339,9 +343,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
/* were we called with bad_dma_address? */
badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE);
if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) {
- printk(KERN_ERR "Calgary: driver tried unmapping bad DMA "
+ WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
"address 0x%Lx\n", dma_addr);
- WARN_ON(1);
return;
}
@@ -410,22 +413,6 @@ static void calgary_unmap_sg(struct device *dev,
}
}
-static int calgary_nontranslate_map_sg(struct device* dev,
- struct scatterlist *sg, int nelems, int direction)
-{
- struct scatterlist *s;
- int i;
-
- for_each_sg(sg, s, nelems, i) {
- struct page *p = sg_page(s);
-
- BUG_ON(!p);
- s->dma_address = virt_to_bus(sg_virt(s));
- s->dma_length = s->length;
- }
- return nelems;
-}
-
static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
int nelems, int direction)
{
@@ -436,9 +423,6 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
unsigned long entry;
int i;
- if (!translation_enabled(tbl))
- return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
-
for_each_sg(sg, s, nelems, i) {
BUG_ON(!sg_page(s));
@@ -474,7 +458,6 @@ error:
static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
size_t size, int direction)
{
- dma_addr_t dma_handle = bad_dma_address;
void *vaddr = phys_to_virt(paddr);
unsigned long uaddr;
unsigned int npages;
@@ -483,12 +466,7 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
uaddr = (unsigned long)vaddr;
npages = num_dma_pages(uaddr, size);
- if (translation_enabled(tbl))
- dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction);
- else
- dma_handle = virt_to_bus(vaddr);
-
- return dma_handle;
+ return iommu_alloc(dev, tbl, vaddr, npages, direction);
}
static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
@@ -497,9 +475,6 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
struct iommu_table *tbl = find_iommu_table(dev);
unsigned int npages;
- if (!translation_enabled(tbl))
- return;
-
npages = num_dma_pages(dma_handle, size);
iommu_free(tbl, dma_handle, npages);
}
@@ -522,18 +497,12 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
goto error;
memset(ret, 0, size);
- if (translation_enabled(tbl)) {
- /* set up tces to cover the allocated range */
- mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
- if (mapping == bad_dma_address)
- goto free;
-
- *dma_handle = mapping;
- } else /* non translated slot */
- *dma_handle = virt_to_bus(ret);
-
+ /* set up tces to cover the allocated range */
+ mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
+ if (mapping == bad_dma_address)
+ goto free;
+ *dma_handle = mapping;
return ret;
-
free:
free_pages((unsigned long)ret, get_order(size));
ret = NULL;
@@ -541,7 +510,7 @@ error:
return ret;
}
-static const struct dma_mapping_ops calgary_dma_ops = {
+static struct dma_mapping_ops calgary_dma_ops = {
.alloc_coherent = calgary_alloc_coherent,
.map_single = calgary_map_single,
.unmap_single = calgary_unmap_single,
@@ -830,7 +799,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
tbl = pci_iommu(dev->bus);
tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
- tce_free(tbl, 0, tbl->it_size);
+
+ if (is_kdump_kernel())
+ calgary_init_bitmap_from_tce_table(tbl);
+ else
+ tce_free(tbl, 0, tbl->it_size);
if (is_calgary(dev->device))
tbl->chip_ops = &calgary_chip_ops;
@@ -1209,6 +1182,10 @@ static int __init calgary_init(void)
if (ret)
return ret;
+ /* Purely for kdump kernel case */
+ if (is_kdump_kernel())
+ get_tce_space_from_tar();
+
do {
dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
if (!dev)
@@ -1230,6 +1207,16 @@ static int __init calgary_init(void)
goto error;
} while (1);
+ dev = NULL;
+ for_each_pci_dev(dev) {
+ struct iommu_table *tbl;
+
+ tbl = find_iommu_table(&dev->dev);
+
+ if (translation_enabled(tbl))
+ dev->dev.archdata.dma_ops = &calgary_dma_ops;
+ }
+
return ret;
error:
@@ -1251,6 +1238,7 @@ error:
calgary_disable_translation(dev);
calgary_free_bus(dev);
pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
+ dev->dev.archdata.dma_ops = NULL;
} while (1);
return ret;
@@ -1280,13 +1268,15 @@ static inline int __init determine_tce_table_size(u64 ram)
static int __init build_detail_arrays(void)
{
unsigned long ptr;
- int i, scal_detail_size, rio_detail_size;
+ unsigned numnodes, i;
+ int scal_detail_size, rio_detail_size;
- if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){
+ numnodes = rio_table_hdr->num_scal_dev;
+ if (numnodes > MAX_NUMNODES){
printk(KERN_WARNING
"Calgary: MAX_NUMNODES too low! Defined as %d, "
"but system has %d nodes.\n",
- MAX_NUMNODES, rio_table_hdr->num_scal_dev);
+ MAX_NUMNODES, numnodes);
return -ENODEV;
}
@@ -1307,8 +1297,7 @@ static int __init build_detail_arrays(void)
}
ptr = ((unsigned long)rio_table_hdr) + 3;
- for (i = 0; i < rio_table_hdr->num_scal_dev;
- i++, ptr += scal_detail_size)
+ for (i = 0; i < numnodes; i++, ptr += scal_detail_size)
scal_devs[i] = (struct scal_detail *)ptr;
for (i = 0; i < rio_table_hdr->num_rio_dev;
@@ -1339,6 +1328,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev)
return (val != 0xffffffff);
}
+/*
+ * calgary_init_bitmap_from_tce_table():
+ * Funtion for kdump case. In the second/kdump kernel initialize
+ * the bitmap based on the tce table entries obtained from first kernel
+ */
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
+{
+ u64 *tp;
+ unsigned int index;
+ tp = ((u64 *)tbl->it_base);
+ for (index = 0 ; index < tbl->it_size; index++) {
+ if (*tp != 0x0)
+ set_bit(index, tbl->it_map);
+ tp++;
+ }
+}
+
+/*
+ * get_tce_space_from_tar():
+ * Function for kdump case. Get the tce tables from first kernel
+ * by reading the contents of the base adress register of calgary iommu
+ */
+static void __init get_tce_space_from_tar(void)
+{
+ int bus;
+ void __iomem *target;
+ unsigned long tce_space;
+
+ for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
+ struct calgary_bus_info *info = &bus_info[bus];
+ unsigned short pci_device;
+ u32 val;
+
+ val = read_pci_config(bus, 0, 0, 0);
+ pci_device = (val & 0xFFFF0000) >> 16;
+
+ if (!is_cal_pci_dev(pci_device))
+ continue;
+ if (info->translation_disabled)
+ continue;
+
+ if (calgary_bus_has_devices(bus, pci_device) ||
+ translate_empty_slots) {
+ target = calgary_reg(bus_info[bus].bbar,
+ tar_offset(bus));
+ tce_space = be64_to_cpu(readq(target));
+ tce_space = tce_space & TAR_SW_BITS;
+
+ tce_space = tce_space & (~specified_table_size);
+ info->tce_space = (u64 *)__va(tce_space);
+ }
+ }
+ return;
+}
+
void __init detect_calgary(void)
{
int bus;
@@ -1394,7 +1438,8 @@ void __init detect_calgary(void)
return;
}
- specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE);
+ specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
+ saved_max_pfn : max_pfn) * PAGE_SIZE);
for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
struct calgary_bus_info *info = &bus_info[bus];
@@ -1412,10 +1457,16 @@ void __init detect_calgary(void)
if (calgary_bus_has_devices(bus, pci_device) ||
translate_empty_slots) {
- tbl = alloc_tce_table();
- if (!tbl)
- goto cleanup;
- info->tce_space = tbl;
+ /*
+ * If it is kdump kernel, find and use tce tables
+ * from first kernel, else allocate tce tables here
+ */
+ if (!is_kdump_kernel()) {
+ tbl = alloc_tce_table();
+ if (!tbl)
+ goto cleanup;
+ info->tce_space = tbl;
+ }
calgary_found = 1;
}
}
@@ -1430,6 +1481,10 @@ void __init detect_calgary(void)
printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
"CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
debugging ? "enabled" : "disabled");
+
+ /* swiotlb for devices that aren't behind the Calgary. */
+ if (max_pfn > MAX_DMA32_PFN)
+ swiotlb = 1;
}
return;
@@ -1446,7 +1501,7 @@ int __init calgary_iommu_init(void)
{
int ret;
- if (no_iommu || swiotlb)
+ if (no_iommu || (swiotlb && !calgary_detected))
return -ENODEV;
if (!calgary_detected)
@@ -1459,15 +1514,14 @@ int __init calgary_iommu_init(void)
if (ret) {
printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
"falling back to no_iommu\n", ret);
- if (max_pfn > MAX_DMA32_PFN)
- printk(KERN_ERR "WARNING more than 4GB of memory, "
- "32bit PCI may malfunction.\n");
return ret;
}
force_iommu = 1;
bad_dma_address = 0x0;
- dma_ops = &calgary_dma_ops;
+ /* dma_ops is set to swiotlb or nommu */
+ if (!dma_ops)
+ dma_ops = &nommu_dma_ops;
return 0;
}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index cbecb05551b..87d4d6964ec 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -11,7 +11,7 @@
static int forbid_dac __read_mostly;
-const struct dma_mapping_ops *dma_ops;
+struct dma_mapping_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
static int iommu_sac_force __read_mostly;
@@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void)
pci_swiotlb_init();
}
+
+unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
+{
+ unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
+
+ return size >> PAGE_SHIFT;
+}
+EXPORT_SYMBOL(iommu_num_pages);
#endif
/*
@@ -192,126 +200,10 @@ static __init int iommu_setup(char *p)
}
early_param("iommu", iommu_setup);
-#ifdef CONFIG_X86_32
-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
- dma_addr_t device_addr, size_t size, int flags)
-{
- void __iomem *mem_base = NULL;
- int pages = size >> PAGE_SHIFT;
- int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
-
- if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
- goto out;
- if (!size)
- goto out;
- if (dev->dma_mem)
- goto out;
-
- /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
-
- mem_base = ioremap(bus_addr, size);
- if (!mem_base)
- goto out;
-
- dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
- if (!dev->dma_mem)
- goto out;
- dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
- if (!dev->dma_mem->bitmap)
- goto free1_out;
-
- dev->dma_mem->virt_base = mem_base;
- dev->dma_mem->device_base = device_addr;
- dev->dma_mem->size = pages;
- dev->dma_mem->flags = flags;
-
- if (flags & DMA_MEMORY_MAP)
- return DMA_MEMORY_MAP;
-
- return DMA_MEMORY_IO;
-
- free1_out:
- kfree(dev->dma_mem);
- out:
- if (mem_base)
- iounmap(mem_base);
- return 0;
-}
-EXPORT_SYMBOL(dma_declare_coherent_memory);
-
-void dma_release_declared_memory(struct device *dev)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
-
- if (!mem)
- return;
- dev->dma_mem = NULL;
- iounmap(mem->virt_base);
- kfree(mem->bitmap);
- kfree(mem);
-}
-EXPORT_SYMBOL(dma_release_declared_memory);
-
-void *dma_mark_declared_memory_occupied(struct device *dev,
- dma_addr_t device_addr, size_t size)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
- int pos, err;
- int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
-
- pages >>= PAGE_SHIFT;
-
- if (!mem)
- return ERR_PTR(-EINVAL);
-
- pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
- err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
- if (err != 0)
- return ERR_PTR(err);
- return mem->virt_base + (pos << PAGE_SHIFT);
-}
-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
-
-static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
- dma_addr_t *dma_handle, void **ret)
-{
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
- int order = get_order(size);
-
- if (mem) {
- int page = bitmap_find_free_region(mem->bitmap, mem->size,
- order);
- if (page >= 0) {
- *dma_handle = mem->device_base + (page << PAGE_SHIFT);
- *ret = mem->virt_base + (page << PAGE_SHIFT);
- memset(*ret, 0, size);
- }
- if (mem->flags & DMA_MEMORY_EXCLUSIVE)
- *ret = NULL;
- }
- return (mem != NULL);
-}
-
-static int dma_release_coherent(struct device *dev, int order, void *vaddr)
-{
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
-
- if (mem && vaddr >= mem->virt_base && vaddr <
- (mem->virt_base + (mem->size << PAGE_SHIFT))) {
- int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
-
- bitmap_release_region(mem->bitmap, page, order);
- return 1;
- }
- return 0;
-}
-#else
-#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
-#define dma_release_coherent(dev, order, vaddr) (0)
-#endif /* CONFIG_X86_32 */
-
int dma_supported(struct device *dev, u64 mask)
{
+ struct dma_mapping_ops *ops = get_dma_ops(dev);
+
#ifdef CONFIG_PCI
if (mask > 0xffffffff && forbid_dac > 0) {
dev_info(dev, "PCI: Disallowing DAC for device\n");
@@ -319,8 +211,8 @@ int dma_supported(struct device *dev, u64 mask)
}
#endif
- if (dma_ops->dma_supported)
- return dma_ops->dma_supported(dev, mask);
+ if (ops->dma_supported)
+ return ops->dma_supported(dev, mask);
/* Copied from i386. Doesn't make much sense, because it will
only work for pci_alloc_coherent.
@@ -367,6 +259,7 @@ void *
dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp)
{
+ struct dma_mapping_ops *ops = get_dma_ops(dev);
void *memory = NULL;
struct page *page;
unsigned long dma_mask = 0;
@@ -376,7 +269,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
/* ignore region specifiers */
gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
- if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
+ if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
return memory;
if (!dev) {
@@ -435,8 +328,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
/* Let low level make its own zone decisions */
gfp &= ~(GFP_DMA32|GFP_DMA);
- if (dma_ops->alloc_coherent)
- return dma_ops->alloc_coherent(dev, size,
+ if (ops->alloc_coherent)
+ return ops->alloc_coherent(dev, size,
dma_handle, gfp);
return NULL;
}
@@ -448,14 +341,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
}
}
- if (dma_ops->alloc_coherent) {
+ if (ops->alloc_coherent) {
free_pages((unsigned long)memory, get_order(size));
gfp &= ~(GFP_DMA|GFP_DMA32);
- return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
+ return ops->alloc_coherent(dev, size, dma_handle, gfp);
}
- if (dma_ops->map_simple) {
- *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory),
+ if (ops->map_simple) {
+ *dma_handle = ops->map_simple(dev, virt_to_phys(memory),
size,
PCI_DMA_BIDIRECTIONAL);
if (*dma_handle != bad_dma_address)
@@ -477,12 +370,14 @@ EXPORT_SYMBOL(dma_alloc_coherent);
void dma_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t bus)
{
+ struct dma_mapping_ops *ops = get_dma_ops(dev);
+
int order = get_order(size);
WARN_ON(irqs_disabled()); /* for portability */
- if (dma_release_coherent(dev, order, vaddr))
+ if (dma_release_from_coherent(dev, order, vaddr))
return;
- if (dma_ops->unmap_single)
- dma_ops->unmap_single(dev, bus, size, 0);
+ if (ops->unmap_single)
+ ops->unmap_single(dev, bus, size, 0);
free_pages((unsigned long)vaddr, order);
}
EXPORT_SYMBOL(dma_free_coherent);
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index df5f142657d..49285f8fd4d 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -67,9 +67,6 @@ static u32 gart_unmapped_entry;
(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
-#define to_pages(addr, size) \
- (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
-
#define EMERGENCY_PAGES 32 /* = 128KB */
#ifdef CONFIG_AGP
@@ -241,7 +238,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
size_t size, int dir)
{
- unsigned long npages = to_pages(phys_mem, size);
+ unsigned long npages = iommu_num_pages(phys_mem, size);
unsigned long iommu_page = alloc_iommu(dev, npages);
int i;
@@ -304,7 +301,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
return;
iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
- npages = to_pages(dma_addr, size);
+ npages = iommu_num_pages(dma_addr, size);
for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
CLEAR_LEAK(iommu_page + i);
@@ -387,7 +384,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
}
addr = phys_addr;
- pages = to_pages(s->offset, s->length);
+ pages = iommu_num_pages(s->offset, s->length);
while (pages--) {
iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
SET_LEAK(iommu_page);
@@ -470,7 +467,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
seg_size += s->length;
need = nextneed;
- pages += to_pages(s->offset, s->length);
+ pages += iommu_num_pages(s->offset, s->length);
ps = s;
}
if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
@@ -692,8 +689,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
extern int agp_amd64_init(void);
-static const struct dma_mapping_ops gart_dma_ops = {
- .mapping_error = NULL,
+static struct dma_mapping_ops gart_dma_ops = {
.map_single = gart_map_single,
.map_simple = gart_map_simple,
.unmap_single = gart_unmap_single,
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 792b9179eff..3f91f71cdc3 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
return nents;
}
-/* Make sure we keep the same behaviour */
-static int nommu_mapping_error(dma_addr_t dma_addr)
-{
-#ifdef CONFIG_X86_32
- return 0;
-#else
- return (dma_addr == bad_dma_address);
-#endif
-}
-
-
-const struct dma_mapping_ops nommu_dma_ops = {
+struct dma_mapping_ops nommu_dma_ops = {
.map_single = nommu_map_single,
.map_sg = nommu_map_sg,
- .mapping_error = nommu_mapping_error,
.is_phys = 1,
};
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 20df839b9c2..c4ce0332759 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
}
-const struct dma_mapping_ops swiotlb_dma_ops = {
+struct dma_mapping_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
.alloc_coherent = swiotlb_alloc_coherent,
.free_coherent = swiotlb_free_coherent,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 68f4ae2ac99..ae584f87d06 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -95,7 +95,6 @@ static inline void play_dead(void)
{
/* This must be done before dead CPU ack */
cpu_exit_clear();
- wbinvd();
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
@@ -104,8 +103,8 @@ static inline void play_dead(void)
* With physical CPU hotplug, we should halt the cpu
*/
local_irq_disable();
- while (1)
- halt();
+ /* mask all interrupts, flush any and all caches, and halt */
+ wbinvd_halt();
}
#else
static inline void play_dead(void)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 91ffce47af8..30ce4734eda 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -93,14 +93,13 @@ DECLARE_PER_CPU(int, cpu_state);
static inline void play_dead(void)
{
idle_task_exit();
- wbinvd();
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
local_irq_disable();
- while (1)
- halt();
+ /* mask all interrupts, flush any and all caches, and halt */
+ wbinvd_halt();
}
#else
static inline void play_dead(void)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 06a9f643817..724adfc63cb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -414,25 +414,20 @@ void native_machine_shutdown(void)
/* The boot cpu is always logical cpu 0 */
int reboot_cpu_id = 0;
- cpumask_of_cpu_ptr(newmask, reboot_cpu_id);
#ifdef CONFIG_X86_32
/* See if there has been given a command line override */
if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
- cpu_online(reboot_cpu)) {
+ cpu_online(reboot_cpu))
reboot_cpu_id = reboot_cpu;
- cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id);
- }
#endif
/* Make certain the cpu I'm about to reboot on is online */
- if (!cpu_online(reboot_cpu_id)) {
+ if (!cpu_online(reboot_cpu_id))
reboot_cpu_id = smp_processor_id();
- cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id);
- }
/* Make certain I only run on the appropriate processor */
- set_cpus_allowed_ptr(current, newmask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id));
/* O.K Now that I'm on the appropriate processor,
* stop all of the others.
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index c30fe25d470..6f50664b2ba 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -20,11 +20,45 @@
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
#define PAE_PGD_ATTR (_PAGE_PRESENT)
+/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
+ * ~ control_page + PAGE_SIZE are used as data storage and stack for
+ * jumping back
+ */
+#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
+
+/* Minimal CPU state */
+#define ESP DATA(0x0)
+#define CR0 DATA(0x4)
+#define CR3 DATA(0x8)
+#define CR4 DATA(0xc)
+
+/* other data */
+#define CP_VA_CONTROL_PAGE DATA(0x10)
+#define CP_PA_PGD DATA(0x14)
+#define CP_PA_SWAP_PAGE DATA(0x18)
+#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c)
+
.text
.align PAGE_SIZE
.globl relocate_kernel
relocate_kernel:
- movl 8(%esp), %ebp /* list of pages */
+ /* Save the CPU context, used for jumping back */
+
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ pushl %ebp
+ pushf
+
+ movl 20+8(%esp), %ebp /* list of pages */
+ movl PTR(VA_CONTROL_PAGE)(%ebp), %edi
+ movl %esp, ESP(%edi)
+ movl %cr0, %eax
+ movl %eax, CR0(%edi)
+ movl %cr3, %eax
+ movl %eax, CR3(%edi)
+ movl %cr4, %eax
+ movl %eax, CR4(%edi)
#ifdef CONFIG_X86_PAE
/* map the control page at its virtual address */
@@ -138,15 +172,25 @@ relocate_kernel:
relocate_new_kernel:
/* read the arguments and say goodbye to the stack */
- movl 4(%esp), %ebx /* page_list */
- movl 8(%esp), %ebp /* list of pages */
- movl 12(%esp), %edx /* start address */
- movl 16(%esp), %ecx /* cpu_has_pae */
+ movl 20+4(%esp), %ebx /* page_list */
+ movl 20+8(%esp), %ebp /* list of pages */
+ movl 20+12(%esp), %edx /* start address */
+ movl 20+16(%esp), %ecx /* cpu_has_pae */
+ movl 20+20(%esp), %esi /* preserve_context */
/* zero out flags, and disable interrupts */
pushl $0
popfl
+ /* save some information for jumping back */
+ movl PTR(VA_CONTROL_PAGE)(%ebp), %edi
+ movl %edi, CP_VA_CONTROL_PAGE(%edi)
+ movl PTR(PA_PGD)(%ebp), %eax
+ movl %eax, CP_PA_PGD(%edi)
+ movl PTR(PA_SWAP_PAGE)(%ebp), %eax
+ movl %eax, CP_PA_SWAP_PAGE(%edi)
+ movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
+
/* get physical address of control page now */
/* this is impossible after page table switch */
movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
@@ -197,8 +241,90 @@ identity_mapped:
xorl %eax, %eax
movl %eax, %cr3
+ movl CP_PA_SWAP_PAGE(%edi), %eax
+ pushl %eax
+ pushl %ebx
+ call swap_pages
+ addl $8, %esp
+
+ /* To be certain of avoiding problems with self-modifying code
+ * I need to execute a serializing instruction here.
+ * So I flush the TLB, it's handy, and not processor dependent.
+ */
+ xorl %eax, %eax
+ movl %eax, %cr3
+
+ /* set all of the registers to known values */
+ /* leave %esp alone */
+
+ testl %esi, %esi
+ jnz 1f
+ xorl %edi, %edi
+ xorl %eax, %eax
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ xorl %esi, %esi
+ xorl %ebp, %ebp
+ ret
+1:
+ popl %edx
+ movl CP_PA_SWAP_PAGE(%edi), %esp
+ addl $PAGE_SIZE, %esp
+2:
+ call *%edx
+
+ /* get the re-entry point of the peer system */
+ movl 0(%esp), %ebp
+ call 1f
+1:
+ popl %ebx
+ subl $(1b - relocate_kernel), %ebx
+ movl CP_VA_CONTROL_PAGE(%ebx), %edi
+ lea PAGE_SIZE(%ebx), %esp
+ movl CP_PA_SWAP_PAGE(%ebx), %eax
+ movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx
+ pushl %eax
+ pushl %edx
+ call swap_pages
+ addl $8, %esp
+ movl CP_PA_PGD(%ebx), %eax
+ movl %eax, %cr3
+ movl %cr0, %eax
+ orl $(1<<31), %eax
+ movl %eax, %cr0
+ lea PAGE_SIZE(%edi), %esp
+ movl %edi, %eax
+ addl $(virtual_mapped - relocate_kernel), %eax
+ pushl %eax
+ ret
+
+virtual_mapped:
+ movl CR4(%edi), %eax
+ movl %eax, %cr4
+ movl CR3(%edi), %eax
+ movl %eax, %cr3
+ movl CR0(%edi), %eax
+ movl %eax, %cr0
+ movl ESP(%edi), %esp
+ movl %ebp, %eax
+
+ popf
+ popl %ebp
+ popl %edi
+ popl %esi
+ popl %ebx
+ ret
+
/* Do the copies */
- movl %ebx, %ecx
+swap_pages:
+ movl 8(%esp), %edx
+ movl 4(%esp), %ecx
+ pushl %ebp
+ pushl %ebx
+ pushl %edi
+ pushl %esi
+ movl %ecx, %ebx
jmp 1f
0: /* top, read another word from the indirection page */
@@ -226,27 +352,31 @@ identity_mapped:
movl %ecx, %esi /* For every source page do a copy */
andl $0xfffff000, %esi
+ movl %edi, %eax
+ movl %esi, %ebp
+
+ movl %edx, %edi
movl $1024, %ecx
rep ; movsl
- jmp 0b
-
-3:
- /* To be certain of avoiding problems with self-modifying code
- * I need to execute a serializing instruction here.
- * So I flush the TLB, it's handy, and not processor dependent.
- */
- xorl %eax, %eax
- movl %eax, %cr3
+ movl %ebp, %edi
+ movl %eax, %esi
+ movl $1024, %ecx
+ rep ; movsl
- /* set all of the registers to known values */
- /* leave %esp alone */
+ movl %eax, %edi
+ movl %edx, %esi
+ movl $1024, %ecx
+ rep ; movsl
- xorl %eax, %eax
- xorl %ebx, %ebx
- xorl %ecx, %ecx
- xorl %edx, %edx
- xorl %esi, %esi
- xorl %edi, %edi
- xorl %ebp, %ebp
+ lea PAGE_SIZE(%ebp), %esi
+ jmp 0b
+3:
+ popl %esi
+ popl %edi
+ popl %ebx
+ popl %ebp
ret
+
+ .globl kexec_control_code_size
+.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b520dae02bf..9838f2539df 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -445,7 +445,7 @@ static void __init reserve_early_setup_data(void)
* @size: Size of the crashkernel memory to reserve.
* Returns the base address on success, and -1ULL on failure.
*/
-unsigned long long find_and_reserve_crashkernel(unsigned long long size)
+unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
{
const unsigned long long alignment = 16<<20; /* 16M */
unsigned long long start = 0LL;
@@ -670,6 +670,18 @@ void __init setup_arch(char **cmdline_p)
parse_early_param();
+#ifdef CONFIG_X86_64
+ check_efer();
+#endif
+
+#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
+ /*
+ * Must be before kernel pagetables are setup
+ * or fixmap area is touched.
+ */
+ vmi_init();
+#endif
+
/* after early param, so could get panic from serial */
reserve_early_setup_data();
@@ -730,7 +742,6 @@ void __init setup_arch(char **cmdline_p)
#else
num_physpages = max_pfn;
- check_efer();
/* How many end-of-memory variables you have, grandma! */
/* need this before calling reserve_initrd */
@@ -788,10 +799,6 @@ void __init setup_arch(char **cmdline_p)
initmem_init(0, max_pfn);
-#ifdef CONFIG_X86_64
- dma32_reserve_bootmem();
-#endif
-
#ifdef CONFIG_ACPI_SLEEP
/*
* Reserve low memory region for sleep support.
@@ -806,20 +813,21 @@ void __init setup_arch(char **cmdline_p)
#endif
reserve_crashkernel();
+#ifdef CONFIG_X86_64
+ /*
+ * dma32_reserve_bootmem() allocates bootmem which may conflict
+ * with the crashkernel command line, so do that after
+ * reserve_crashkernel()
+ */
+ dma32_reserve_bootmem();
+#endif
+
reserve_ibft_region();
#ifdef CONFIG_KVM_CLOCK
kvmclock_init();
#endif
-#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
- /*
- * Must be after max_low_pfn is determined, and before kernel
- * pagetables are setup.
- */
- vmi_init();
-#endif
-
paravirt_pagetable_setup_start(swapper_pg_dir);
paging_init();
paravirt_pagetable_setup_done(swapper_pg_dir);
@@ -856,12 +864,6 @@ void __init setup_arch(char **cmdline_p)
init_apic_mappings();
ioapic_init_mappings();
-#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
- if (def_to_bigsmp)
- printk(KERN_WARNING "More than 8 CPUs detected and "
- "CONFIG_X86_PC cannot handle it.\nUse "
- "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
-#endif
kvm_guest_init();
e820_reserve_resources();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index f7745f94c00..76e305e064f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -80,24 +80,6 @@ static void __init setup_per_cpu_maps(void)
#endif
}
-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
-cpumask_t *cpumask_of_cpu_map __read_mostly;
-EXPORT_SYMBOL(cpumask_of_cpu_map);
-
-/* requires nr_cpu_ids to be initialized */
-static void __init setup_cpumask_of_cpu(void)
-{
- int i;
-
- /* alloc_bootmem zeroes memory */
- cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
- for (i = 0; i < nr_cpu_ids; i++)
- cpu_set(i, cpumask_of_cpu_map[i]);
-}
-#else
-static inline void setup_cpumask_of_cpu(void) { }
-#endif
-
#ifdef CONFIG_X86_32
/*
* Great future not-so-futuristic plan: make i386 and x86_64 do it
@@ -197,9 +179,6 @@ void __init setup_per_cpu_areas(void)
/* Setup node to cpumask map */
setup_node_to_cpumask_map();
-
- /* Setup cpumask_of_cpu map */
- setup_cpumask_of_cpu();
}
#endif
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index b45ef8ddd65..ca316b5b742 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -104,7 +104,16 @@ static inline int restore_i387(struct _fpstate __user *buf)
clts();
task_thread_info(current)->status |= TS_USEDFPU;
}
- return restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
+ err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
+ if (unlikely(err)) {
+ /*
+ * Encountered an error while doing the restore from the
+ * user buffer, clear the fpu state.
+ */
+ clear_fpu(tsk);
+ clear_used_math();
+ }
+ return err;
}
/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 332512767f4..7985c5b3f91 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -326,12 +326,16 @@ static void __cpuinit start_secondary(void *unused)
* for which cpus receive the IPI. Holding this
* lock helps us to not include this cpu in a currently in progress
* smp_call_function().
+ *
+ * We need to hold vector_lock so there the set of online cpus
+ * does not change while we are assigning vectors to cpus. Holding
+ * this lock ensures we don't half assign or remove an irq from a cpu.
*/
ipi_call_lock_irq();
-#ifdef CONFIG_X86_IO_APIC
- setup_vector_irq(smp_processor_id());
-#endif
+ lock_vector_lock();
+ __setup_vector_irq(smp_processor_id());
cpu_set(smp_processor_id(), cpu_online_map);
+ unlock_vector_lock();
ipi_call_unlock_irq();
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
@@ -752,6 +756,14 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
}
#ifdef CONFIG_X86_64
+
+/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
+static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
+{
+ if (!after_bootmem)
+ free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
+}
+
/*
* Allocate node local memory for the AP pda.
*
@@ -780,8 +792,7 @@ int __cpuinit get_local_pda(int cpu)
if (oldpda) {
memcpy(newpda, oldpda, size);
- if (!after_bootmem)
- free_bootmem((unsigned long)oldpda, size);
+ free_bootmem_pda(oldpda);
}
newpda->in_bootmem = 0;
@@ -1044,6 +1055,34 @@ static __init void disable_smp(void)
static int __init smp_sanity_check(unsigned max_cpus)
{
preempt_disable();
+
+#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
+ if (def_to_bigsmp && nr_cpu_ids > 8) {
+ unsigned int cpu;
+ unsigned nr;
+
+ printk(KERN_WARNING
+ "More than 8 CPUs detected - skipping them.\n"
+ "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
+
+ nr = 0;
+ for_each_present_cpu(cpu) {
+ if (nr >= 8)
+ cpu_clear(cpu, cpu_present_map);
+ nr++;
+ }
+
+ nr = 0;
+ for_each_possible_cpu(cpu) {
+ if (nr >= 8)
+ cpu_clear(cpu, cpu_possible_map);
+ nr++;
+ }
+
+ nr_cpu_ids = 8;
+ }
+#endif
+
if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
printk(KERN_WARNING "weird, boot CPU (#%d) not listed"
"by the BIOS.\n", hard_smp_processor_id());
@@ -1182,6 +1221,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
printk(KERN_INFO "CPU%d: ", 0);
print_cpu_info(&cpu_data(0));
setup_boot_clock();
+
+ if (is_uv_system())
+ uv_system_init();
out:
preempt_enable();
}
@@ -1336,7 +1378,9 @@ int __cpu_disable(void)
remove_siblinginfo(cpu);
/* It's now safe to remove this processor from the online map */
+ lock_vector_lock();
remove_cpu_from_maps(cpu);
+ unlock_vector_lock();
fixup_irqs(cpu_online_map);
return 0;
}
@@ -1370,17 +1414,3 @@ void __cpu_die(unsigned int cpu)
BUG();
}
#endif
-
-/*
- * If the BIOS enumerates physical processors before logical,
- * maxcpus=N at enumeration-time can be used to disable HT.
- */
-static int __init parse_maxcpus(char *arg)
-{
- extern unsigned int maxcpus;
-
- if (arg)
- maxcpus = simple_strtoul(arg, NULL, 0);
- return 0;
-}
-early_param("maxcpus", parse_maxcpus);
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 99941b37eca..397e309839d 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -8,18 +8,21 @@
DEFINE_PER_CPU(unsigned long, this_cpu_off);
EXPORT_PER_CPU_SYMBOL(this_cpu_off);
-/* Initialize the CPU's GDT. This is either the boot CPU doing itself
- (still using the master per-cpu area), or a CPU doing it for a
- secondary which will soon come up. */
+/*
+ * Initialize the CPU's GDT. This is either the boot CPU doing itself
+ * (still using the master per-cpu area), or a CPU doing it for a
+ * secondary which will soon come up.
+ */
__cpuinit void init_gdt(int cpu)
{
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+ struct desc_struct gdt;
- pack_descriptor(&gdt[GDT_ENTRY_PERCPU],
- __per_cpu_offset[cpu], 0xFFFFF,
+ pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
0x2 | DESCTYPE_S, 0x8);
+ gdt.s = 1;
- gdt[GDT_ENTRY_PERCPU].s = 1;
+ write_gdt_entry(get_cpu_gdt_table(cpu),
+ GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
per_cpu(cpu_number, cpu) = cpu;
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index d0fbb7712ab..8b8c0d6640f 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -17,6 +17,7 @@
#include <asm/genapic.h>
#include <asm/idle.h>
#include <asm/tsc.h>
+#include <asm/irq_vectors.h>
#include <mach_apic.h>
@@ -783,7 +784,7 @@ static int __init uv_bau_init(void)
uv_init_blade(blade, node, cur_cpu);
cur_cpu += uv_blade_nr_possible_cpus(blade);
}
- set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
+ alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
uv_enable_timeouts();
return 0;
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 3f18d73f420..513caaca711 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1131,7 +1131,14 @@ asmlinkage void math_state_restore(void)
}
clts(); /* Allow maths ops (or we recurse) */
- restore_fpu_checking(&me->thread.xstate->fxsave);
+ /*
+ * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+ */
+ if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) {
+ stts();
+ force_sig(SIGSEGV, me);
+ return;
+ }
task_thread_info(me)->status |= TS_USEDFPU;
me->fpu_counter++;
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 7603c055390..8f98e9de1b8 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -104,7 +104,7 @@ __setup("notsc", notsc_setup);
/*
* Read TSC and the reference counters. Take care of SMI disturbance
*/
-static u64 __init tsc_read_refs(u64 *pm, u64 *hpet)
+static u64 tsc_read_refs(u64 *pm, u64 *hpet)
{
u64 t1, t2;
int i;
@@ -122,80 +122,216 @@ static u64 __init tsc_read_refs(u64 *pm, u64 *hpet)
return ULLONG_MAX;
}
-/**
- * native_calibrate_tsc - calibrate the tsc on boot
+/*
+ * Try to calibrate the TSC against the Programmable
+ * Interrupt Timer and return the frequency of the TSC
+ * in kHz.
+ *
+ * Return ULONG_MAX on failure to calibrate.
*/
-unsigned long native_calibrate_tsc(void)
+static unsigned long pit_calibrate_tsc(void)
{
- unsigned long flags;
- u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2;
- int hpet = is_hpet_enabled();
- unsigned int tsc_khz_val = 0;
-
- local_irq_save(flags);
-
- tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
+ u64 tsc, t1, t2, delta;
+ unsigned long tscmin, tscmax;
+ int pitcnt;
+ /* Set the Gate high, disable speaker */
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+ /*
+ * Setup CTC channel 2* for mode 0, (interrupt on terminal
+ * count mode), binary count. Set the latch register to 50ms
+ * (LSB then MSB) to begin countdown.
+ */
outb(0xb0, 0x43);
outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
- tr1 = get_cycles();
- while ((inb(0x61) & 0x20) == 0);
- tr2 = get_cycles();
- tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
+ tsc = t1 = t2 = get_cycles();
- local_irq_restore(flags);
+ pitcnt = 0;
+ tscmax = 0;
+ tscmin = ULONG_MAX;
+ while ((inb(0x61) & 0x20) == 0) {
+ t2 = get_cycles();
+ delta = t2 - tsc;
+ tsc = t2;
+ if ((unsigned long) delta < tscmin)
+ tscmin = (unsigned int) delta;
+ if ((unsigned long) delta > tscmax)
+ tscmax = (unsigned int) delta;
+ pitcnt++;
+ }
/*
- * Preset the result with the raw and inaccurate PIT
- * calibration value
+ * Sanity checks:
+ *
+ * If we were not able to read the PIT more than 5000
+ * times, then we have been hit by a massive SMI
+ *
+ * If the maximum is 10 times larger than the minimum,
+ * then we got hit by an SMI as well.
*/
- delta = (tr2 - tr1);
+ if (pitcnt < 5000 || tscmax > 10 * tscmin)
+ return ULONG_MAX;
+
+ /* Calculate the PIT value */
+ delta = t2 - t1;
do_div(delta, 50);
- tsc_khz_val = delta;
+ return delta;
+}
+
+
+/**
+ * native_calibrate_tsc - calibrate the tsc on boot
+ */
+unsigned long native_calibrate_tsc(void)
+{
+ u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2;
+ unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
+ unsigned long flags;
+ int hpet = is_hpet_enabled(), i;
+
+ /*
+ * Run 5 calibration loops to get the lowest frequency value
+ * (the best estimate). We use two different calibration modes
+ * here:
+ *
+ * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and
+ * load a timeout of 50ms. We read the time right after we
+ * started the timer and wait until the PIT count down reaches
+ * zero. In each wait loop iteration we read the TSC and check
+ * the delta to the previous read. We keep track of the min
+ * and max values of that delta. The delta is mostly defined
+ * by the IO time of the PIT access, so we can detect when a
+ * SMI/SMM disturbance happend between the two reads. If the
+ * maximum time is significantly larger than the minimum time,
+ * then we discard the result and have another try.
+ *
+ * 2) Reference counter. If available we use the HPET or the
+ * PMTIMER as a reference to check the sanity of that value.
+ * We use separate TSC readouts and check inside of the
+ * reference read for a SMI/SMM disturbance. We dicard
+ * disturbed values here as well. We do that around the PIT
+ * calibration delay loop as we have to wait for a certain
+ * amount of time anyway.
+ */
+ for (i = 0; i < 5; i++) {
+ unsigned long tsc_pit_khz;
+
+ /*
+ * Read the start value and the reference count of
+ * hpet/pmtimer when available. Then do the PIT
+ * calibration, which will take at least 50ms, and
+ * read the end value.
+ */
+ local_irq_save(flags);
+ tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
+ tsc_pit_khz = pit_calibrate_tsc();
+ tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
+ local_irq_restore(flags);
+
+ /* Pick the lowest PIT TSC calibration so far */
+ tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
+
+ /* hpet or pmtimer available ? */
+ if (!hpet && !pm1 && !pm2)
+ continue;
+
+ /* Check, whether the sampling was disturbed by an SMI */
+ if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
+ continue;
+
+ tsc2 = (tsc2 - tsc1) * 1000000LL;
+
+ if (hpet) {
+ if (hpet2 < hpet1)
+ hpet2 += 0x100000000ULL;
+ hpet2 -= hpet1;
+ tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
+ do_div(tsc1, 1000000);
+ } else {
+ if (pm2 < pm1)
+ pm2 += (u64)ACPI_PM_OVRRUN;
+ pm2 -= pm1;
+ tsc1 = pm2 * 1000000000LL;
+ do_div(tsc1, PMTMR_TICKS_PER_SEC);
+ }
+
+ do_div(tsc2, tsc1);
+ tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
+ }
+
+ /*
+ * Now check the results.
+ */
+ if (tsc_pit_min == ULONG_MAX) {
+ /* PIT gave no useful value */
+ printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n");
+
+ /* We don't have an alternative source, disable TSC */
+ if (!hpet && !pm1 && !pm2) {
+ printk("TSC: No reference (HPET/PMTIMER) available\n");
+ return 0;
+ }
+
+ /* The alternative source failed as well, disable TSC */
+ if (tsc_ref_min == ULONG_MAX) {
+ printk(KERN_WARNING "TSC: HPET/PMTIMER calibration "
+ "failed due to SMI disturbance.\n");
+ return 0;
+ }
+
+ /* Use the alternative source */
+ printk(KERN_INFO "TSC: using %s reference calibration\n",
+ hpet ? "HPET" : "PMTIMER");
+
+ return tsc_ref_min;
+ }
- /* hpet or pmtimer available ? */
+ /* We don't have an alternative source, use the PIT calibration value */
if (!hpet && !pm1 && !pm2) {
- printk(KERN_INFO "TSC calibrated against PIT\n");
- goto out;
+ printk(KERN_INFO "TSC: Using PIT calibration value\n");
+ return tsc_pit_min;
}
- /* Check, whether the sampling was disturbed by an SMI */
- if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) {
- printk(KERN_WARNING "TSC calibration disturbed by SMI, "
- "using PIT calibration result\n");
- goto out;
+ /* The alternative source failed, use the PIT calibration value */
+ if (tsc_ref_min == ULONG_MAX) {
+ printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due "
+ "to SMI disturbance. Using PIT calibration\n");
+ return tsc_pit_min;
}
- tsc2 = (tsc2 - tsc1) * 1000000LL;
-
- if (hpet) {
- printk(KERN_INFO "TSC calibrated against HPET\n");
- if (hpet2 < hpet1)
- hpet2 += 0x100000000ULL;
- hpet2 -= hpet1;
- tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
- do_div(tsc1, 1000000);
- } else {
- printk(KERN_INFO "TSC calibrated against PM_TIMER\n");
- if (pm2 < pm1)
- pm2 += (u64)ACPI_PM_OVRRUN;
- pm2 -= pm1;
- tsc1 = pm2 * 1000000000LL;
- do_div(tsc1, PMTMR_TICKS_PER_SEC);
+ /* Check the reference deviation */
+ delta = ((u64) tsc_pit_min) * 100;
+ do_div(delta, tsc_ref_min);
+
+ /*
+ * If both calibration results are inside a 5% window, the we
+ * use the lower frequency of those as it is probably the
+ * closest estimate.
+ */
+ if (delta >= 95 && delta <= 105) {
+ printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n",
+ hpet ? "HPET" : "PMTIMER");
+ printk(KERN_INFO "TSC: using %s calibration value\n",
+ tsc_pit_min <= tsc_ref_min ? "PIT" :
+ hpet ? "HPET" : "PMTIMER");
+ return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min;
}
- do_div(tsc2, tsc1);
- tsc_khz_val = tsc2;
+ printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
+ hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
-out:
- return tsc_khz_val;
+ /*
+ * The calibration values differ too much. In doubt, we use
+ * the PIT value as we know that there are PMTIMERs around
+ * running at double speed.
+ */
+ printk(KERN_INFO "TSC: Using PIT calibration value\n");
+ return tsc_pit_min;
}
-
#ifdef CONFIG_X86_32
/* Only called from the Powernow K7 cpu freq driver */
int recalibrate_cpu_khz(void)
@@ -314,7 +450,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
mark_tsc_unstable("cpufreq changes");
}
- set_cyc2ns_scale(tsc_khz_ref, freq->cpu);
+ set_cyc2ns_scale(tsc_khz, freq->cpu);
return 0;
}
@@ -325,6 +461,10 @@ static struct notifier_block time_cpufreq_notifier_block = {
static int __init cpufreq_tsc(void)
{
+ if (!cpu_has_tsc)
+ return 0;
+ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ return 0;
cpufreq_register_notifier(&time_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
return 0;
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 0577825cf89..9ffb01c31c4 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -88,11 +88,9 @@ static __cpuinit void check_tsc_warp(void)
__raw_spin_unlock(&sync_lock);
}
}
- if (!(now-start)) {
- printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
+ WARN(!(now-start),
+ "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
now-start, end-start);
- WARN_ON(1);
- }
}
/*
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 41e01b145c4..594ef47f0a6 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -184,8 +184,6 @@ static int __init visws_get_smp_config(unsigned int early)
return 1;
}
-extern unsigned int __cpuinitdata maxcpus;
-
/*
* The Visual Workstation is Intel MP compliant in the hardware
* sense, but it doesn't have a BIOS(-configuration table).
@@ -244,8 +242,8 @@ static int __init visws_find_smp_config(unsigned int reserve)
ncpus = CO_CPU_MAX;
}
- if (ncpus > maxcpus)
- ncpus = maxcpus;
+ if (ncpus > setup_max_cpus)
+ ncpus = setup_max_cpus;
#ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 1;
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 0a1b1a9d922..6ca515d6db5 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -37,6 +37,7 @@
#include <asm/timer.h>
#include <asm/vmi_time.h>
#include <asm/kmap_types.h>
+#include <asm/setup.h>
/* Convenient for calling VMI functions indirectly in the ROM */
typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
@@ -683,7 +684,7 @@ void vmi_bringup(void)
{
/* We must establish the lowmem mapping for MMU ops to work */
if (vmi_ops.set_linear_mapping)
- vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0);
+ vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0);
}
/*
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index cdb2363697d..af5bdad8460 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -209,3 +209,11 @@ SECTIONS
DWARF_DEBUG
}
+
+#ifdef CONFIG_KEXEC
+/* Link time checks */
+#include <asm/kexec.h>
+
+ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
+ "kexec control code size is too big")
+#endif