aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-08-19 03:34:07 +0200
committerIngo Molnar <mingo@elte.hu>2008-08-19 03:34:07 +0200
commit2879a927bb7a3cf91ae3906a5e59215f9c17dd75 (patch)
tree870bdd1bd530a3d5d2abd10539700446b2878188 /arch/x86/kernel
parent7e7b43892b87b6be259479ef4de14029dcb4012f (diff)
parent20211e4d344729f4d4c93da37a590fc1c3a1fd9b (diff)
Merge branch 'x86/oprofile' into oprofile
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/boot.c16
-rw-r--r--arch/x86/kernel/acpi/cstate.c3
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/amd_iommu.c36
-rw-r--r--arch/x86/kernel/amd_iommu_init.c28
-rw-r--r--arch/x86/kernel/apic_32.c22
-rw-r--r--arch/x86/kernel/apic_64.c7
-rw-r--r--arch/x86/kernel/cpu/bugs.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c10
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c125
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c12
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c3
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c3
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c8
-rw-r--r--arch/x86/kernel/efi_32.c4
-rw-r--r--arch/x86/kernel/genapic_64.c1
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/head64.c1
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/hpet.c24
-rw-r--r--arch/x86/kernel/io_apic_32.c6
-rw-r--r--arch/x86/kernel/io_apic_64.c25
-rw-r--r--arch/x86/kernel/ldt.c6
-rw-r--r--arch/x86/kernel/machine_kexec_32.c57
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/mfgpt_32.c52
-rw-r--r--arch/x86/kernel/microcode.c17
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c2
-rw-r--r--arch/x86/kernel/mpparse.c17
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c28
-rw-r--r--arch/x86/kernel/pci-calgary_64.c75
-rw-r--r--arch/x86/kernel/pci-dma.c157
-rw-r--r--arch/x86/kernel/pci-gart_64.c14
-rw-r--r--arch/x86/kernel/pci-nommu.c14
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c2
-rw-r--r--arch/x86/kernel/process_32.c5
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/reboot.c11
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S178
-rw-r--r--arch/x86/kernel/setup.c37
-rw-r--r--arch/x86/kernel/setup_percpu.c21
-rw-r--r--arch/x86/kernel/signal_64.c11
-rw-r--r--arch/x86/kernel/smpboot.c65
-rw-r--r--arch/x86/kernel/smpcommon.c17
-rw-r--r--arch/x86/kernel/traps_64.c9
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/visws_quirks.c6
-rw-r--r--arch/x86/kernel/vmi_32.c3
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S8
53 files changed, 672 insertions, 514 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fa88a1d7129..bfd10fd211c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -97,6 +97,8 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
#warning ACPI uses CMPXCHG, i486 and later hardware
#endif
+static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
+
/* --------------------------------------------------------------------------
Boot-time Configuration
-------------------------------------------------------------------------- */
@@ -158,6 +160,14 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
struct acpi_mcfg_allocation *pci_mmcfg_config;
int pci_mmcfg_config_num;
+static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
+{
+ if (!strcmp(mcfg->header.oem_id, "SGI"))
+ acpi_mcfg_64bit_base_addr = TRUE;
+
+ return 0;
+}
+
int __init acpi_parse_mcfg(struct acpi_table_header *header)
{
struct acpi_table_mcfg *mcfg;
@@ -190,8 +200,12 @@ int __init acpi_parse_mcfg(struct acpi_table_header *header)
}
memcpy(pci_mmcfg_config, &mcfg[1], config_size);
+
+ acpi_mcfg_oem_check(mcfg);
+
for (i = 0; i < pci_mmcfg_config_num; ++i) {
- if (pci_mmcfg_config[i].address > 0xFFFFFFFF) {
+ if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
+ !acpi_mcfg_64bit_base_addr) {
printk(KERN_ERR PREFIX
"MMCONFIG not in low 4GB of memory\n");
kfree(pci_mmcfg_config);
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 9220cf46aa1..c2502eb9aa8 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -73,7 +73,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
struct cpuinfo_x86 *c = &cpu_data(cpu);
cpumask_t saved_mask;
- cpumask_of_cpu_ptr(new_mask, cpu);
int retval;
unsigned int eax, ebx, ecx, edx;
unsigned int edx_part;
@@ -92,7 +91,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
/* Make sure we are running on right CPU */
saved_mask = current->cpus_allowed;
- retval = set_cpus_allowed_ptr(current, new_mask);
+ retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
if (retval)
return -1;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index fa2161d5003..426e5d91b63 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -20,7 +20,7 @@ unsigned long acpi_realmode_flags;
/* address in low memory of the wakeup routine. */
static unsigned long acpi_realmode;
-#ifdef CONFIG_64BIT
+#if defined(CONFIG_SMP) && defined(CONFIG_64BIT)
static char temp_stack[10240];
#endif
@@ -86,7 +86,7 @@ int acpi_save_state_mem(void)
#endif /* !CONFIG_64BIT */
header->pmode_cr0 = read_cr0();
- header->pmode_cr4 = read_cr4();
+ header->pmode_cr4 = read_cr4_safe();
header->realmode_flags = acpi_realmode_flags;
header->real_magic = 0x12345678;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index c25210e6ac8..de39e1f2ede 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -29,9 +29,6 @@
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
-#define to_pages(addr, size) \
- (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
-
#define EXIT_LOOP_COUNT 10000000
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
@@ -104,16 +101,13 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
*/
static int iommu_completion_wait(struct amd_iommu *iommu)
{
- int ret;
+ int ret, ready = 0;
+ unsigned status = 0;
struct iommu_cmd cmd;
- volatile u64 ready = 0;
- unsigned long ready_phys = virt_to_phys(&ready);
unsigned long i = 0;
memset(&cmd, 0, sizeof(cmd));
- cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
- cmd.data[1] = upper_32_bits(ready_phys);
- cmd.data[2] = 1; /* value written to 'ready' */
+ cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
iommu->need_sync = 0;
@@ -125,9 +119,15 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
while (!ready && (i < EXIT_LOOP_COUNT)) {
++i;
- cpu_relax();
+ /* wait for the bit to become one */
+ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+ ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
}
+ /* set bit back to zero */
+ status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
+ writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
+
if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
@@ -164,7 +164,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
address &= PAGE_MASK;
CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
cmd.data[1] |= domid;
- cmd.data[2] = LOW_U32(address);
+ cmd.data[2] = lower_32_bits(address);
cmd.data[3] = upper_32_bits(address);
if (s) /* size bit - we flush more than one 4kb page */
cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
@@ -185,7 +185,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
u64 address, size_t size)
{
int s = 0;
- unsigned pages = to_pages(address, size);
+ unsigned pages = iommu_num_pages(address, size);
address &= PAGE_MASK;
@@ -557,8 +557,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
if (iommu->exclusion_start &&
iommu->exclusion_start < dma_dom->aperture_size) {
unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
- int pages = to_pages(iommu->exclusion_start,
- iommu->exclusion_length);
+ int pages = iommu_num_pages(iommu->exclusion_start,
+ iommu->exclusion_length);
dma_ops_reserve_addresses(dma_dom, startpage, pages);
}
@@ -667,7 +667,7 @@ static int get_device_resources(struct device *dev,
_bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
/* device not translated by any IOMMU in the system? */
- if (_bdf >= amd_iommu_last_bdf) {
+ if (_bdf > amd_iommu_last_bdf) {
*iommu = NULL;
*domain = NULL;
*bdf = 0xffff;
@@ -767,7 +767,7 @@ static dma_addr_t __map_single(struct device *dev,
unsigned int pages;
int i;
- pages = to_pages(paddr, size);
+ pages = iommu_num_pages(paddr, size);
paddr &= PAGE_MASK;
address = dma_ops_alloc_addresses(dev, dma_dom, pages);
@@ -802,7 +802,7 @@ static void __unmap_single(struct amd_iommu *iommu,
if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
return;
- pages = to_pages(dma_addr, size);
+ pages = iommu_num_pages(dma_addr, size);
dma_addr &= PAGE_MASK;
start = dma_addr;
@@ -1085,7 +1085,7 @@ void prealloc_protection_domains(void)
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
devid = (dev->bus->number << 8) | dev->devfn;
- if (devid >= amd_iommu_last_bdf)
+ if (devid > amd_iommu_last_bdf)
continue;
devid = amd_iommu_alias_table[devid];
if (domain_for_device(devid))
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c9d8ff2eb13..a69cc0f5204 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -732,7 +732,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
set_device_exclusion_range(m->devid, m);
break;
case ACPI_IVMD_TYPE_ALL:
- for (i = 0; i < amd_iommu_last_bdf; ++i)
+ for (i = 0; i <= amd_iommu_last_bdf; ++i)
set_device_exclusion_range(i, m);
break;
case ACPI_IVMD_TYPE_RANGE:
@@ -801,6 +801,21 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
}
/*
+ * Init the device table to not allow DMA access for devices and
+ * suppress all page faults
+ */
+static void init_device_table(void)
+{
+ u16 devid;
+
+ for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
+ set_dev_entry_bit(devid, DEV_ENTRY_VALID);
+ set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
+ set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
+ }
+}
+
+/*
* This function finally enables all IOMMUs found in the system after
* they have been initialized
*/
@@ -931,10 +946,13 @@ int __init amd_iommu_init(void)
if (amd_iommu_pd_alloc_bitmap == NULL)
goto free;
+ /* init the device table */
+ init_device_table();
+
/*
* let all alias entries point to itself
*/
- for (i = 0; i < amd_iommu_last_bdf; ++i)
+ for (i = 0; i <= amd_iommu_last_bdf; ++i)
amd_iommu_alias_table[i] = i;
/*
@@ -954,15 +972,15 @@ int __init amd_iommu_init(void)
if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
goto free;
- ret = amd_iommu_init_dma_ops();
+ ret = sysdev_class_register(&amd_iommu_sysdev_class);
if (ret)
goto free;
- ret = sysdev_class_register(&amd_iommu_sysdev_class);
+ ret = sysdev_register(&device_amd_iommu);
if (ret)
goto free;
- ret = sysdev_register(&device_amd_iommu);
+ ret = amd_iommu_init_dma_ops();
if (ret)
goto free;
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 0059e7a8a9e..0ff576d026a 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1458,8 +1458,6 @@ void disconnect_bsp_APIC(int virt_wire_setup)
}
}
-unsigned int __cpuinitdata maxcpus = NR_CPUS;
-
void __cpuinit generic_processor_info(int apicid, int version)
{
int cpu;
@@ -1486,12 +1484,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
return;
}
- if (num_processors >= maxcpus) {
- printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
- " Processor ignored.\n", maxcpus);
- return;
- }
-
num_processors++;
cpus_complement(tmp_map, cpu_present_map);
cpu = first_cpu(tmp_map);
@@ -1724,15 +1716,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
}
early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-static int __init apic_set_verbosity(char *str)
+static int __init apic_set_verbosity(char *arg)
{
- if (strcmp("debug", str) == 0)
+ if (!arg)
+ return -EINVAL;
+
+ if (strcmp(arg, "debug") == 0)
apic_verbosity = APIC_DEBUG;
- else if (strcmp("verbose", str) == 0)
+ else if (strcmp(arg, "verbose") == 0)
apic_verbosity = APIC_VERBOSE;
- return 1;
+
+ return 0;
}
-__setup("apic=", apic_set_verbosity);
+early_param("apic", apic_set_verbosity);
static int __init lapic_insert_resource(void)
{
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index e571351f2a9..57744f4a75b 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -90,7 +90,6 @@ static unsigned long apic_phys;
unsigned long mp_lapic_addr;
-unsigned int __cpuinitdata maxcpus = NR_CPUS;
/*
* Get the LAPIC version
*/
@@ -1066,12 +1065,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
return;
}
- if (num_processors >= maxcpus) {
- printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
- " Processor ignored.\n", maxcpus);
- return;
- }
-
num_processors++;
cpus_complement(tmp_map, cpu_present_map);
cpu = first_cpu(tmp_map);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c9b58a806e8..c8e315f1aa8 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -50,6 +50,8 @@ static double __initdata y = 3145727.0;
*/
static void __init check_fpu(void)
{
+ s32 fdiv_bug;
+
if (!boot_cpu_data.hard_math) {
#ifndef CONFIG_MATH_EMULATION
printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
@@ -74,8 +76,10 @@ static void __init check_fpu(void)
"fistpl %0\n\t"
"fwait\n\t"
"fninit"
- : "=m" (*&boot_cpu_data.fdiv_bug)
+ : "=m" (*&fdiv_bug)
: "m" (*&x), "m" (*&y));
+
+ boot_cpu_data.fdiv_bug = fdiv_bug;
if (boot_cpu_data.fdiv_bug)
printk("Hmm, FPU with FDIV bug.\n");
}
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index cb7a5715596..efae3b22a0f 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -235,9 +235,9 @@ config X86_LONGHAUL
If in doubt, say N.
config X86_E_POWERSAVER
- tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)"
+ tristate "VIA C7 Enhanced PowerSaver"
select CPU_FREQ_TABLE
- depends on X86_32 && EXPERIMENTAL
+ depends on X86_32
help
This adds the CPUFreq driver for VIA C7 processors.
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index ff2fff56f0a..dd097b83583 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -200,12 +200,10 @@ static void drv_read(struct drv_cmd *cmd)
static void drv_write(struct drv_cmd *cmd)
{
cpumask_t saved_mask = current->cpus_allowed;
- cpumask_of_cpu_ptr_declare(cpu_mask);
unsigned int i;
for_each_cpu_mask_nr(i, cmd->mask) {
- cpumask_of_cpu_ptr_next(cpu_mask, i);
- set_cpus_allowed_ptr(current, cpu_mask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
do_drv_write(cmd);
}
@@ -269,12 +267,11 @@ static unsigned int get_measured_perf(unsigned int cpu)
} aperf_cur, mperf_cur;
cpumask_t saved_mask;
- cpumask_of_cpu_ptr(cpu_mask, cpu);
unsigned int perf_percent;
unsigned int retval;
saved_mask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, cpu_mask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
if (get_cpu() != cpu) {
/* We were not able to run on requested processor */
put_cpu();
@@ -340,7 +337,6 @@ static unsigned int get_measured_perf(unsigned int cpu)
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
- cpumask_of_cpu_ptr(cpu_mask, cpu);
struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
unsigned int freq;
unsigned int cached_freq;
@@ -353,7 +349,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
}
cached_freq = data->freq_table[data->acpi_data->state].frequency;
- freq = extract_freq(get_cur_val(cpu_mask), data);
+ freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data);
if (freq != cached_freq) {
/*
* The dreaded BIOS frequency change behind our back.
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 94619c22f56..e4a4bf870e9 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -44,7 +44,7 @@ struct s_elan_multiplier {
* It is important that the frequencies
* are listed in ascending order here!
*/
-struct s_elan_multiplier elan_multiplier[] = {
+static struct s_elan_multiplier elan_multiplier[] = {
{1000, 0x02, 0x18},
{2000, 0x02, 0x10},
{4000, 0x02, 0x08},
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 53c7b693697..4e7271999a7 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid)
return 800 + (fid * 100);
}
-
/* Return a frequency in KHz, given an input fid */
static u32 find_khz_freq_from_fid(u32 fid)
{
@@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p
return data[pstate].frequency;
}
-
/* Return the vco fid for an input fid
*
* Each "low" fid has corresponding "high" fid, and you can get to "low" fids
@@ -166,7 +164,6 @@ static void fidvid_msr_init(void)
wrmsr(MSR_FIDVID_CTL, lo, hi);
}
-
/* write the new fid value along with the other control fields to the msr */
static int write_new_fid(struct powernow_k8_data *data, u32 fid)
{
@@ -479,12 +476,11 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi
static int check_supported_cpu(unsigned int cpu)
{
cpumask_t oldmask;
- cpumask_of_cpu_ptr(cpu_mask, cpu);
u32 eax, ebx, ecx, edx;
unsigned int rc = 0;
oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, cpu_mask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
if (smp_processor_id() != cpu) {
printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
@@ -741,44 +737,63 @@ static int find_psb_table(struct powernow_k8_data *data)
#ifdef CONFIG_X86_POWERNOW_K8_ACPI
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
{
- if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
+ if (!data->acpi_data->state_count || (cpu_family == CPU_HW_PSTATE))
return;
- data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
- data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK;
- data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
- data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
- data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK);
- data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK;
+ data->irt = (data->acpi_data->states[index].control >> IRT_SHIFT) & IRT_MASK;
+ data->rvo = (data->acpi_data->states[index].control >> RVO_SHIFT) & RVO_MASK;
+ data->exttype = (data->acpi_data->states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
+ data->plllock = (data->acpi_data->states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
+ data->vidmvs = 1 << ((data->acpi_data->states[index].control >> MVS_SHIFT) & MVS_MASK);
+ data->vstable = (data->acpi_data->states[index].control >> VST_SHIFT) & VST_MASK;
+}
+
+
+static struct acpi_processor_performance *acpi_perf_data;
+static int preregister_valid;
+
+static int powernow_k8_cpu_preinit_acpi(void)
+{
+ acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
+ if (!acpi_perf_data)
+ return -ENODEV;
+
+ if (acpi_processor_preregister_performance(acpi_perf_data))
+ return -ENODEV;
+ else
+ preregister_valid = 1;
+ return 0;
}
static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
{
struct cpufreq_frequency_table *powernow_table;
int ret_val;
+ int cpu = 0;
- if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
+ data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
+ if (acpi_processor_register_performance(data->acpi_data, data->cpu)) {
dprintk("register performance failed: bad ACPI data\n");
return -EIO;
}
/* verify the data contained in the ACPI structures */
- if (data->acpi_data.state_count <= 1) {
+ if (data->acpi_data->state_count <= 1) {
dprintk("No ACPI P-States\n");
goto err_out;
}
- if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
- (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+ if ((data->acpi_data->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+ (data->acpi_data->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
dprintk("Invalid control/status registers (%x - %x)\n",
- data->acpi_data.control_register.space_id,
- data->acpi_data.status_register.space_id);
+ data->acpi_data->control_register.space_id,
+ data->acpi_data->status_register.space_id);
goto err_out;
}
/* fill in data->powernow_table */
powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
- * (data->acpi_data.state_count + 1)), GFP_KERNEL);
+ * (data->acpi_data->state_count + 1)), GFP_KERNEL);
if (!powernow_table) {
dprintk("powernow_table memory alloc failure\n");
goto err_out;
@@ -791,12 +806,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
if (ret_val)
goto err_out_mem;
- powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
- powernow_table[data->acpi_data.state_count].index = 0;
+ powernow_table[data->acpi_data->state_count].frequency = CPUFREQ_TABLE_END;
+ powernow_table[data->acpi_data->state_count].index = 0;
data->powernow_table = powernow_table;
/* fill in data */
- data->numps = data->acpi_data.state_count;
+ data->numps = data->acpi_data->state_count;
if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
print_basics(data);
powernow_k8_acpi_pst_values(data, 0);
@@ -804,16 +819,31 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
/* notify BIOS that we exist */
acpi_processor_notify_smm(THIS_MODULE);
+ /* determine affinity, from ACPI if available */
+ if (preregister_valid) {
+ if ((data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ALL) ||
+ (data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ANY))
+ data->starting_core_affinity = data->acpi_data->shared_cpu_map;
+ else
+ data->starting_core_affinity = cpumask_of_cpu(data->cpu);
+ } else {
+ /* best guess from family if not */
+ if (cpu_family == CPU_HW_PSTATE)
+ data->starting_core_affinity = cpumask_of_cpu(data->cpu);
+ else
+ data->starting_core_affinity = per_cpu(cpu_core_map, data->cpu);
+ }
+
return 0;
err_out_mem:
kfree(powernow_table);
err_out:
- acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+ acpi_processor_unregister_performance(data->acpi_data, data->cpu);
/* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
- data->acpi_data.state_count = 0;
+ data->acpi_data->state_count = 0;
return -ENODEV;
}
@@ -825,10 +855,10 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo);
data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
- for (i = 0; i < data->acpi_data.state_count; i++) {
+ for (i = 0; i < data->acpi_data->state_count; i++) {
u32 index;
- index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
+ index = data->acpi_data->states[i].control & HW_PSTATE_MASK;
if (index > data->max_hw_pstate) {
printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
@@ -844,7 +874,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
powernow_table[i].index = index;
- powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000;
+ powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000;
}
return 0;
}
@@ -853,16 +883,16 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
{
int i;
int cntlofreq = 0;
- for (i = 0; i < data->acpi_data.state_count; i++) {
+ for (i = 0; i < data->acpi_data->state_count; i++) {
u32 fid;
u32 vid;
if (data->exttype) {
- fid = data->acpi_data.states[i].status & EXT_FID_MASK;
- vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK;
+ fid = data->acpi_data->states[i].status & EXT_FID_MASK;
+ vid = (data->acpi_data->states[i].status >> VID_SHIFT) & EXT_VID_MASK;
} else {
- fid = data->acpi_data.states[i].control & FID_MASK;
- vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
+ fid = data->acpi_data->states[i].control & FID_MASK;
+ vid = (data->acpi_data->states[i].control >> VID_SHIFT) & VID_MASK;
}
dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
@@ -903,10 +933,10 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
cntlofreq = i;
}
- if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
+ if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) {
printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
powernow_table[i].frequency,
- (unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
+ (unsigned int) (data->acpi_data->states[i].core_frequency * 1000));
powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
continue;
}
@@ -916,11 +946,12 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
{
- if (data->acpi_data.state_count)
- acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+ if (data->acpi_data->state_count)
+ acpi_processor_unregister_performance(data->acpi_data, data->cpu);
}
#else
+static int powernow_k8_cpu_preinit_acpi(void) { return -ENODEV; }
static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
@@ -1017,7 +1048,6 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
{
cpumask_t oldmask;
- cpumask_of_cpu_ptr(cpu_mask, pol->cpu);
struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
u32 checkfid;
u32 checkvid;
@@ -1032,7 +1062,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
/* only run on specific CPU from here on */
oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, cpu_mask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
if (smp_processor_id() != pol->cpu) {
printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1106,8 +1136,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
{
struct powernow_k8_data *data;
- cpumask_t oldmask;
- cpumask_of_cpu_ptr_declare(newmask);
+ cpumask_t oldmask = CPU_MASK_ALL;
int rc;
if (!cpu_online(pol->cpu))
@@ -1159,8 +1188,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
/* only run on specific CPU from here on */
oldmask = current->cpus_allowed;
- cpumask_of_cpu_ptr_next(newmask, pol->cpu);
- set_cpus_allowed_ptr(current, newmask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
if (smp_processor_id() != pol->cpu) {
printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1181,10 +1209,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
/* run on any CPU again */
set_cpus_allowed_ptr(current, &oldmask);
- if (cpu_family == CPU_HW_PSTATE)
- pol->cpus = *newmask;
- else
- pol->cpus = per_cpu(cpu_core_map, pol->cpu);
+ pol->cpus = data->starting_core_affinity;
data->available_cores = &(pol->cpus);
/* Take a crude guess here.
@@ -1248,7 +1273,6 @@ static unsigned int powernowk8_get (unsigned int cpu)
{
struct powernow_k8_data *data;
cpumask_t oldmask = current->cpus_allowed;
- cpumask_of_cpu_ptr(newmask, cpu);
unsigned int khz = 0;
unsigned int first;
@@ -1258,7 +1282,7 @@ static unsigned int powernowk8_get (unsigned int cpu)
if (!data)
return -EINVAL;
- set_cpus_allowed_ptr(current, newmask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
if (smp_processor_id() != cpu) {
printk(KERN_ERR PFX
"limiting to CPU %d failed in powernowk8_get\n", cpu);
@@ -1308,6 +1332,7 @@ static int __cpuinit powernowk8_init(void)
}
if (supported_cpus == num_online_cpus()) {
+ powernow_k8_cpu_preinit_acpi();
printk(KERN_INFO PFX "Found %d %s "
"processors (%d cpu cores) (" VERSION ")\n",
num_online_nodes(),
@@ -1324,6 +1349,10 @@ static void __exit powernowk8_exit(void)
dprintk("exit\n");
cpufreq_unregister_driver(&cpufreq_amd64_driver);
+
+#ifdef CONFIG_X86_POWERNOW_K8_ACPI
+ free_percpu(acpi_perf_data);
+#endif
}
MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index ab48cfed4d9..a62612cd4be 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -33,12 +33,13 @@ struct powernow_k8_data {
#ifdef CONFIG_X86_POWERNOW_K8_ACPI
/* the acpi table needs to be kept. it's only available if ACPI was
* used to determine valid frequency/vid/fid states */
- struct acpi_processor_performance acpi_data;
+ struct acpi_processor_performance *acpi_data;
#endif
/* we need to keep track of associated cores, but let cpufreq
* handle hotplug events - so just point at cpufreq pol->cpus
* structure */
cpumask_t *available_cores;
+ cpumask_t starting_core_affinity;
};
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index ca2ac13b7af..15e13c01cc3 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -324,10 +324,9 @@ static unsigned int get_cur_freq(unsigned int cpu)
unsigned l, h;
unsigned clock_freq;
cpumask_t saved_mask;
- cpumask_of_cpu_ptr(new_mask, cpu);
saved_mask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, new_mask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
if (smp_processor_id() != cpu)
return 0;
@@ -585,15 +584,12 @@ static int centrino_target (struct cpufreq_policy *policy,
* Best effort undo..
*/
- if (!cpus_empty(*covered_cpus)) {
- cpumask_of_cpu_ptr_declare(new_mask);
-
+ if (!cpus_empty(*covered_cpus))
for_each_cpu_mask_nr(j, *covered_cpus) {
- cpumask_of_cpu_ptr_next(new_mask, j);
- set_cpus_allowed_ptr(current, new_mask);
+ set_cpus_allowed_ptr(current,
+ &cpumask_of_cpu(j));
wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
}
- }
tmp = freqs.new;
freqs.new = freqs.old;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 2f3728dc24f..191f7263c61 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -244,8 +244,7 @@ static unsigned int _speedstep_get(const cpumask_t *cpus)
static unsigned int speedstep_get(unsigned int cpu)
{
- cpumask_of_cpu_ptr(newmask, cpu);
- return _speedstep_get(newmask);
+ return _speedstep_get(&cpumask_of_cpu(cpu));
}
/**
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 650d40f7912..6b0a10b002f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -516,7 +516,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
unsigned long j;
int retval;
cpumask_t oldmask;
- cpumask_of_cpu_ptr(newmask, cpu);
if (num_cache_leaves == 0)
return -ENOENT;
@@ -527,7 +526,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
return -ENOMEM;
oldmask = current->cpus_allowed;
- retval = set_cpus_allowed_ptr(current, newmask);
+ retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
if (retval)
goto out;
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index de7439f82b9..05cc22dbd4f 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -478,7 +478,13 @@ static int setup_p4_watchdog(unsigned nmi_hz)
perfctr_msr = MSR_P4_IQ_PERFCTR1;
evntsel_msr = MSR_P4_CRU_ESCR0;
cccr_msr = MSR_P4_IQ_CCCR1;
- cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
+
+ /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
+ if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
+ cccr_val = P4_CCCR_OVF_PMI0;
+ else
+ cccr_val = P4_CCCR_OVF_PMI1;
+ cccr_val |= P4_CCCR_ESCR_SELECT(4);
}
evntsel = P4_ESCR_EVENT_SELECT(0x3F)
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index 4b63c8e1f13..5cab48ee61a 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -53,7 +53,7 @@ void efi_call_phys_prelog(void)
* directory. If I have PAE, I just need to duplicate one entry in
* page directory.
*/
- cr4 = read_cr4();
+ cr4 = read_cr4_safe();
if (cr4 & X86_CR4_PAE) {
efi_bak_pg_dir_pointer[0].pgd =
@@ -91,7 +91,7 @@ void efi_call_phys_epilog(void)
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
- cr4 = read_cr4();
+ cr4 = read_cr4_safe();
if (cr4 & X86_CR4_PAE) {
swapper_pg_dir[pgd_index(0)].pgd =
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 1fa8be5bd21..eaff0bbb144 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -99,3 +99,4 @@ int is_uv_system(void)
{
return uv_system_type != UV_NONE;
}
+EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2cfcbded888..2d7e307c777 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -222,7 +222,7 @@ static __init void map_low_mmrs(void)
enum map_type {map_wb, map_uc};
-static void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
+static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
{
unsigned long bytes, paddr;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 1b318e903bf..9bfc4d72fb2 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -88,6 +88,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
(__START_KERNEL & PGDIR_MASK)));
+ BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
/* clear bss before set_intr_gate with early_idt_handler */
clear_bss();
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f67e93441ca..a7010c3a377 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -456,9 +456,6 @@ is386: movl $2,%ecx # set MP
1:
#endif /* CONFIG_SMP */
jmp *(initial_code)
-.align 4
-ENTRY(initial_code)
- .long i386_start_kernel
/*
* We depend on ET to be correct. This checks for 287/387.
@@ -601,6 +598,11 @@ ignore_int:
#endif
iret
+.section .cpuinit.data,"wa"
+.align 4
+ENTRY(initial_code)
+ .long i386_start_kernel
+
.section .text
/*
* Real beginning of normal "text" segment
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ad2b15a1334..59fd3b6b130 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -359,6 +359,7 @@ static int hpet_clocksource_register(void)
int __init hpet_enable(void)
{
unsigned long id;
+ int i;
if (!is_hpet_capable())
return 0;
@@ -369,6 +370,29 @@ int __init hpet_enable(void)
* Read the period and check for a sane value:
*/
hpet_period = hpet_readl(HPET_PERIOD);
+
+ /*
+ * AMD SB700 based systems with spread spectrum enabled use a
+ * SMM based HPET emulation to provide proper frequency
+ * setting. The SMM code is initialized with the first HPET
+ * register access and takes some time to complete. During
+ * this time the config register reads 0xffffffff. We check
+ * for max. 1000 loops whether the config register reads a non
+ * 0xffffffff value to make sure that HPET is up and running
+ * before we go further. A counting loop is safe, as the HPET
+ * access takes thousands of CPU cycles. On non SB700 based
+ * machines this check is only done once and has no side
+ * effects.
+ */
+ for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) {
+ if (i == 1000) {
+ printk(KERN_WARNING
+ "HPET config register value = 0xFFFFFFFF. "
+ "Disabling HPET\n");
+ goto out_nohpet;
+ }
+ }
+
if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
goto out_nohpet;
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index de9aa0e3a9c..09cddb57bec 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -57,7 +57,7 @@ atomic_t irq_mis_count;
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+DEFINE_SPINLOCK(vector_lock);
int timer_through_8259 __initdata;
@@ -1209,10 +1209,6 @@ static int assign_irq_vector(int irq)
return vector;
}
-void setup_vector_irq(int cpu)
-{
-}
-
static struct irq_chip ioapic_chip;
#define IOAPIC_AUTO -1
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 8269434d170..61a83b70c18 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -101,7 +101,7 @@ int timer_through_8259 __initdata;
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
-DEFINE_SPINLOCK(vector_lock);
+static DEFINE_SPINLOCK(vector_lock);
/*
* # of IRQ routing registers
@@ -697,6 +697,19 @@ static int pin_2_irq(int idx, int apic, int pin)
return irq;
}
+void lock_vector_lock(void)
+{
+ /* Used to the online set of cpus does not change
+ * during assign_irq_vector.
+ */
+ spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+ spin_unlock(&vector_lock);
+}
+
static int __assign_irq_vector(int irq, cpumask_t mask)
{
/*
@@ -802,7 +815,7 @@ static void __clear_irq_vector(int irq)
cpus_clear(cfg->domain);
}
-static void __setup_vector_irq(int cpu)
+void __setup_vector_irq(int cpu)
{
/* Initialize vector_irq on a new cpu */
/* This function must be called with vector_lock held */
@@ -825,14 +838,6 @@ static void __setup_vector_irq(int cpu)
}
}
-void setup_vector_irq(int cpu)
-{
- spin_lock(&vector_lock);
- __setup_vector_irq(smp_processor_id());
- spin_unlock(&vector_lock);
-}
-
-
static struct irq_chip ioapic_chip;
static void ioapic_register_intr(int irq, unsigned long trigger)
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 3fee2aa50f3..b68e21f06f4 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -62,12 +62,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
if (reload) {
#ifdef CONFIG_SMP
- cpumask_of_cpu_ptr_declare(mask);
-
preempt_disable();
load_LDT(pc);
- cpumask_of_cpu_ptr_next(mask, smp_processor_id());
- if (!cpus_equal(current->mm->cpu_vm_mask, *mask))
+ if (!cpus_equal(current->mm->cpu_vm_mask,
+ cpumask_of_cpu(smp_processor_id())))
smp_call_function(flush_ldt, current->mm, 1);
preempt_enable();
#else
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 8864230d55a..0732adba05c 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/numa.h>
#include <linux/ftrace.h>
+#include <linux/suspend.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -22,6 +23,7 @@
#include <asm/cpufeature.h>
#include <asm/desc.h>
#include <asm/system.h>
+#include <asm/cacheflush.h>
#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
static u32 kexec_pgd[1024] PAGE_ALIGNED;
@@ -77,7 +79,7 @@ static void load_segments(void)
/*
* A architecture hook called to validate the
* proposed image and prepare the control pages
- * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
+ * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE
* have been allocated, but the segments have yet
* been copied into the kernel.
*
@@ -85,10 +87,12 @@ static void load_segments(void)
* reboot code buffer to allow us to avoid allocations
* later.
*
- * Currently nothing.
+ * Make control page executable.
*/
int machine_kexec_prepare(struct kimage *image)
{
+ if (nx_enabled)
+ set_pages_x(image->control_code_page, 1);
return 0;
}
@@ -98,27 +102,54 @@ int machine_kexec_prepare(struct kimage *image)
*/
void machine_kexec_cleanup(struct kimage *image)
{
+ if (nx_enabled)
+ set_pages_nx(image->control_code_page, 1);
}
/*
* Do not allocate memory (or fail in any way) in machine_kexec().
* We are past the point of no return, committed to rebooting now.
*/
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
void *control_page;
+ int save_ftrace_enabled;
+ asmlinkage unsigned long
+ (*relocate_kernel_ptr)(unsigned long indirection_page,
+ unsigned long control_page,
+ unsigned long start_address,
+ unsigned int has_pae,
+ unsigned int preserve_context);
+
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context)
+ save_processor_state();
+#endif
- tracer_disable();
+ save_ftrace_enabled = __ftrace_enabled_save();
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ if (image->preserve_context) {
+#ifdef CONFIG_X86_IO_APIC
+ /* We need to put APICs in legacy mode so that we can
+ * get timer interrupts in second kernel. kexec/kdump
+ * paths already have calls to disable_IO_APIC() in
+ * one form or other. kexec jump path also need
+ * one.
+ */
+ disable_IO_APIC();
+#endif
+ }
+
control_page = page_address(image->control_code_page);
- memcpy(control_page, relocate_kernel, PAGE_SIZE);
+ memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
+ relocate_kernel_ptr = control_page;
page_list[PA_CONTROL_PAGE] = __pa(control_page);
- page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
+ page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
page_list[PA_PGD] = __pa(kexec_pgd);
page_list[VA_PGD] = (unsigned long)kexec_pgd;
#ifdef CONFIG_X86_PAE
@@ -131,6 +162,7 @@ NORET_TYPE void machine_kexec(struct kimage *image)
page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
page_list[PA_PTE_1] = __pa(kexec_pte1);
page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+ page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT);
/* The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
@@ -149,8 +181,17 @@ NORET_TYPE void machine_kexec(struct kimage *image)
set_idt(phys_to_virt(0),0);
/* now call it */
- relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
- image->start, cpu_has_pae);
+ image->start = relocate_kernel_ptr((unsigned long)image->head,
+ (unsigned long)page_list,
+ image->start, cpu_has_pae,
+ image->preserve_context);
+
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context)
+ restore_processor_state();
+#endif
+
+ __ftrace_enabled_restore(save_ftrace_enabled);
}
void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 9dd9262693a..c43caa3a91f 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image)
* Do not allocate memory (or fail in any way) in machine_kexec().
* We are past the point of no return, committed to rebooting now.
*/
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
void *control_page;
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 07c0f828f48..3b599518c32 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -33,6 +33,8 @@
#include <linux/module.h>
#include <asm/geode.h>
+#define MFGPT_DEFAULT_IRQ 7
+
static struct mfgpt_timer_t {
unsigned int avail:1;
} mfgpt_timers[MFGPT_MAX_TIMERS];
@@ -157,29 +159,48 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
}
EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event);
-int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable)
+int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable)
{
- u32 val, dummy;
- int offset;
+ u32 zsel, lpc, dummy;
+ int shift;
if (timer < 0 || timer >= MFGPT_MAX_TIMERS)
return -EIO;
- if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
+ /*
+ * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA
+ * is using the same CMP of the timer's Siamese twin, the IRQ is set to
+ * 2, and we mustn't use nor change it.
+ * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the
+ * IRQ of the 1st. This can only happen if forcing an IRQ, calling this
+ * with *irq==0 is safe. Currently there _are_ no 2 drivers.
+ */
+ rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
+ shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4;
+ if (((zsel >> shift) & 0xF) == 2)
return -EIO;
- rdmsr(MSR_PIC_ZSEL_LOW, val, dummy);
+ /* Choose IRQ: if none supplied, keep IRQ already set or use default */
+ if (!*irq)
+ *irq = (zsel >> shift) & 0xF;
+ if (!*irq)
+ *irq = MFGPT_DEFAULT_IRQ;
- offset = (timer % 4) * 4;
-
- val &= ~((0xF << offset) | (0xF << (offset + 16)));
+ /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */
+ if (*irq < 1 || *irq == 2 || *irq > 15)
+ return -EIO;
+ rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy);
+ if (lpc & (1 << *irq))
+ return -EIO;
+ /* All chosen and checked - go for it */
+ if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
+ return -EIO;
if (enable) {
- val |= (irq & 0x0F) << (offset);
- val |= (irq & 0x0F) << (offset + 16);
+ zsel = (zsel & ~(0xF << shift)) | (*irq << shift);
+ wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
}
- wrmsr(MSR_PIC_ZSEL_LOW, val, dummy);
return 0;
}
@@ -242,7 +263,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN;
static u16 mfgpt_event_clock;
-static int irq = 7;
+static int irq;
static int __init mfgpt_setup(char *str)
{
get_option(&str, &irq);
@@ -346,7 +367,7 @@ int __init mfgpt_timer_setup(void)
mfgpt_event_clock = timer;
/* Set up the IRQ on the MFGPT side */
- if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) {
+ if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) {
printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq);
return -EIO;
}
@@ -374,13 +395,14 @@ int __init mfgpt_timer_setup(void)
&mfgpt_clockevent);
printk(KERN_INFO
- "mfgpt-timer: registering the MFGPT timer as a clock event.\n");
+ "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n",
+ timer, irq);
clockevents_register_device(&mfgpt_clockevent);
return 0;
err:
- geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, irq);
+ geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq);
printk(KERN_ERR
"mfgpt-timer: Unable to set up the MFGPT clock source\n");
return -EIO;
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 6994c751590..652fa5c38eb 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -388,7 +388,6 @@ static int do_microcode_update (void)
void *new_mc = NULL;
int cpu;
cpumask_t old;
- cpumask_of_cpu_ptr_declare(newmask);
old = current->cpus_allowed;
@@ -405,8 +404,7 @@ static int do_microcode_update (void)
if (!uci->valid)
continue;
- cpumask_of_cpu_ptr_next(newmask, cpu);
- set_cpus_allowed_ptr(current, newmask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
error = get_maching_microcode(new_mc, cpu);
if (error < 0)
goto out;
@@ -576,7 +574,6 @@ static int apply_microcode_check_cpu(int cpu)
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
cpumask_t old;
- cpumask_of_cpu_ptr(newmask, cpu);
unsigned int val[2];
int err = 0;
@@ -585,7 +582,7 @@ static int apply_microcode_check_cpu(int cpu)
return 0;
old = current->cpus_allowed;
- set_cpus_allowed_ptr(current, newmask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
/* Check if the microcode we have in memory matches the CPU */
if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
@@ -623,12 +620,11 @@ static int apply_microcode_check_cpu(int cpu)
static void microcode_init_cpu(int cpu, int resume)
{
cpumask_t old;
- cpumask_of_cpu_ptr(newmask, cpu);
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
old = current->cpus_allowed;
- set_cpus_allowed_ptr(current, newmask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
mutex_lock(&microcode_mutex);
collect_cpu_info(cpu);
if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
@@ -661,13 +657,10 @@ static ssize_t reload_store(struct sys_device *dev,
if (end == buf)
return -EINVAL;
if (val == 1) {
- cpumask_t old;
- cpumask_of_cpu_ptr(newmask, cpu);
-
- old = current->cpus_allowed;
+ cpumask_t old = current->cpus_allowed;
get_online_cpus();
- set_cpus_allowed_ptr(current, newmask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
mutex_lock(&microcode_mutex);
if (uci->valid)
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index fdfdc550b36..efc2f361fe8 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = {
{}
};
-void __init check_enable_amd_mmconf_dmi(void)
+void __cpuinit check_enable_amd_mmconf_dmi(void)
{
dmi_check_system(mmconf_dmi_table);
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 6ae005ccaed..b3fb430725c 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -49,7 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
return sum & 0xFF;
}
-static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
+static void __init MP_processor_info(struct mpc_config_processor *m)
{
int apicid;
char *bootup_cpu = "";
@@ -83,7 +83,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
if (x86_quirks->mpc_oem_bus_info)
x86_quirks->mpc_oem_bus_info(m, str);
else
- printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
+ apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str);
#if MAX_MP_BUSSES < 256
if (m->mpc_busid >= MAX_MP_BUSSES) {
@@ -154,7 +154,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
{
- printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
@@ -163,7 +163,7 @@ static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
{
- printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
(mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
@@ -235,7 +235,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
{
- printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x,"
+ apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC LINT %02x\n",
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
@@ -484,7 +484,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
}
-static void construct_ioapic_table(int mpc_default_type)
+static void __init construct_ioapic_table(int mpc_default_type)
{
struct mpc_config_ioapic ioapic;
struct mpc_config_bus bus;
@@ -529,7 +529,7 @@ static void construct_ioapic_table(int mpc_default_type)
construct_default_ioirq_mptable(mpc_default_type);
}
#else
-static inline void construct_ioapic_table(int mpc_default_type) { }
+static inline void __init construct_ioapic_table(int mpc_default_type) { }
#endif
static inline void __init construct_default_ISA_mptable(int mpc_default_type)
@@ -695,7 +695,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
unsigned int *bp = phys_to_virt(base);
struct intel_mp_floating *mpf;
- printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length);
+ apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
+ bp, length);
BUILD_BUG_ON(sizeof(*mpf) != 16);
while (length > 0) {
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 9fd80955244..e4393808688 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -131,7 +131,7 @@ static int msr_open(struct inode *inode, struct file *file)
ret = -EIO; /* MSR not supported */
out:
unlock_kernel();
- return 0;
+ return ret;
}
/*
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ac6d51222e7..abb78a2cc4a 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -114,6 +114,23 @@ static __init void nmi_cpu_busy(void *data)
}
#endif
+static void report_broken_nmi(int cpu, int *prev_nmi_count)
+{
+ printk(KERN_CONT "\n");
+
+ printk(KERN_WARNING
+ "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
+ cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
+
+ printk(KERN_WARNING
+ "Please report this to bugzilla.kernel.org,\n");
+ printk(KERN_WARNING
+ "and attach the output of the 'dmesg' command.\n");
+
+ per_cpu(wd_enabled, cpu) = 0;
+ atomic_dec(&nmi_active);
+}
+
int __init check_nmi_watchdog(void)
{
unsigned int *prev_nmi_count;
@@ -141,15 +158,8 @@ int __init check_nmi_watchdog(void)
for_each_online_cpu(cpu) {
if (!per_cpu(wd_enabled, cpu))
continue;
- if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
- printk(KERN_WARNING "WARNING: CPU#%d: NMI "
- "appears to be stuck (%d->%d)!\n",
- cpu,
- prev_nmi_count[cpu],
- get_nmi_count(cpu));
- per_cpu(wd_enabled, cpu) = 0;
- atomic_dec(&nmi_active);
- }
+ if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
+ report_broken_nmi(cpu, prev_nmi_count);
}
endflag = 1;
if (!atomic_read(&nmi_active)) {
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 19e7fc7c2c4..218d783ed7a 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -37,6 +37,7 @@
#include <linux/delay.h>
#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
+
#include <asm/iommu.h>
#include <asm/calgary.h>
#include <asm/tce.h>
@@ -413,22 +414,6 @@ static void calgary_unmap_sg(struct device *dev,
}
}
-static int calgary_nontranslate_map_sg(struct device* dev,
- struct scatterlist *sg, int nelems, int direction)
-{
- struct scatterlist *s;
- int i;
-
- for_each_sg(sg, s, nelems, i) {
- struct page *p = sg_page(s);
-
- BUG_ON(!p);
- s->dma_address = virt_to_bus(sg_virt(s));
- s->dma_length = s->length;
- }
- return nelems;
-}
-
static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
int nelems, int direction)
{
@@ -439,9 +424,6 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
unsigned long entry;
int i;
- if (!translation_enabled(tbl))
- return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
-
for_each_sg(sg, s, nelems, i) {
BUG_ON(!sg_page(s));
@@ -477,7 +459,6 @@ error:
static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
size_t size, int direction)
{
- dma_addr_t dma_handle = bad_dma_address;
void *vaddr = phys_to_virt(paddr);
unsigned long uaddr;
unsigned int npages;
@@ -486,12 +467,7 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
uaddr = (unsigned long)vaddr;
npages = num_dma_pages(uaddr, size);
- if (translation_enabled(tbl))
- dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction);
- else
- dma_handle = virt_to_bus(vaddr);
-
- return dma_handle;
+ return iommu_alloc(dev, tbl, vaddr, npages, direction);
}
static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
@@ -500,9 +476,6 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
struct iommu_table *tbl = find_iommu_table(dev);
unsigned int npages;
- if (!translation_enabled(tbl))
- return;
-
npages = num_dma_pages(dma_handle, size);
iommu_free(tbl, dma_handle, npages);
}
@@ -525,18 +498,12 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
goto error;
memset(ret, 0, size);
- if (translation_enabled(tbl)) {
- /* set up tces to cover the allocated range */
- mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
- if (mapping == bad_dma_address)
- goto free;
-
- *dma_handle = mapping;
- } else /* non translated slot */
- *dma_handle = virt_to_bus(ret);
-
+ /* set up tces to cover the allocated range */
+ mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
+ if (mapping == bad_dma_address)
+ goto free;
+ *dma_handle = mapping;
return ret;
-
free:
free_pages((unsigned long)ret, get_order(size));
ret = NULL;
@@ -544,7 +511,7 @@ error:
return ret;
}
-static const struct dma_mapping_ops calgary_dma_ops = {
+static struct dma_mapping_ops calgary_dma_ops = {
.alloc_coherent = calgary_alloc_coherent,
.map_single = calgary_map_single,
.unmap_single = calgary_unmap_single,
@@ -1241,6 +1208,16 @@ static int __init calgary_init(void)
goto error;
} while (1);
+ dev = NULL;
+ for_each_pci_dev(dev) {
+ struct iommu_table *tbl;
+
+ tbl = find_iommu_table(&dev->dev);
+
+ if (translation_enabled(tbl))
+ dev->dev.archdata.dma_ops = &calgary_dma_ops;
+ }
+
return ret;
error:
@@ -1262,6 +1239,7 @@ error:
calgary_disable_translation(dev);
calgary_free_bus(dev);
pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
+ dev->dev.archdata.dma_ops = NULL;
} while (1);
return ret;
@@ -1372,7 +1350,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
* Function for kdump case. Get the tce tables from first kernel
* by reading the contents of the base adress register of calgary iommu
*/
-static void get_tce_space_from_tar()
+static void __init get_tce_space_from_tar(void)
{
int bus;
void __iomem *target;
@@ -1503,6 +1481,10 @@ void __init detect_calgary(void)
printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
"CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
debugging ? "enabled" : "disabled");
+
+ /* swiotlb for devices that aren't behind the Calgary. */
+ if (max_pfn > MAX_DMA32_PFN)
+ swiotlb = 1;
}
return;
@@ -1519,7 +1501,7 @@ int __init calgary_iommu_init(void)
{
int ret;
- if (no_iommu || swiotlb)
+ if (no_iommu || (swiotlb && !calgary_detected))
return -ENODEV;
if (!calgary_detected)
@@ -1532,15 +1514,14 @@ int __init calgary_iommu_init(void)
if (ret) {
printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
"falling back to no_iommu\n", ret);
- if (max_pfn > MAX_DMA32_PFN)
- printk(KERN_ERR "WARNING more than 4GB of memory, "
- "32bit PCI may malfunction.\n");
return ret;
}
force_iommu = 1;
bad_dma_address = 0x0;
- dma_ops = &calgary_dma_ops;
+ /* dma_ops is set to swiotlb or nommu */
+ if (!dma_ops)
+ dma_ops = &nommu_dma_ops;
return 0;
}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index cbecb05551b..87d4d6964ec 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -11,7 +11,7 @@
static int forbid_dac __read_mostly;
-const struct dma_mapping_ops *dma_ops;
+struct dma_mapping_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
static int iommu_sac_force __read_mostly;
@@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void)
pci_swiotlb_init();
}
+
+unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
+{
+ unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
+
+ return size >> PAGE_SHIFT;
+}
+EXPORT_SYMBOL(iommu_num_pages);
#endif
/*
@@ -192,126 +200,10 @@ static __init int iommu_setup(char *p)
}
early_param("iommu", iommu_setup);
-#ifdef CONFIG_X86_32
-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
- dma_addr_t device_addr, size_t size, int flags)
-{
- void __iomem *mem_base = NULL;
- int pages = size >> PAGE_SHIFT;
- int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
-
- if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
- goto out;
- if (!size)
- goto out;
- if (dev->dma_mem)
- goto out;
-
- /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
-
- mem_base = ioremap(bus_addr, size);
- if (!mem_base)
- goto out;
-
- dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
- if (!dev->dma_mem)
- goto out;
- dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
- if (!dev->dma_mem->bitmap)
- goto free1_out;
-
- dev->dma_mem->virt_base = mem_base;
- dev->dma_mem->device_base = device_addr;
- dev->dma_mem->size = pages;
- dev->dma_mem->flags = flags;
-
- if (flags & DMA_MEMORY_MAP)
- return DMA_MEMORY_MAP;
-
- return DMA_MEMORY_IO;
-
- free1_out:
- kfree(dev->dma_mem);
- out:
- if (mem_base)
- iounmap(mem_base);
- return 0;
-}
-EXPORT_SYMBOL(dma_declare_coherent_memory);
-
-void dma_release_declared_memory(struct device *dev)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
-
- if (!mem)
- return;
- dev->dma_mem = NULL;
- iounmap(mem->virt_base);
- kfree(mem->bitmap);
- kfree(mem);
-}
-EXPORT_SYMBOL(dma_release_declared_memory);
-
-void *dma_mark_declared_memory_occupied(struct device *dev,
- dma_addr_t device_addr, size_t size)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
- int pos, err;
- int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
-
- pages >>= PAGE_SHIFT;
-
- if (!mem)
- return ERR_PTR(-EINVAL);
-
- pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
- err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
- if (err != 0)
- return ERR_PTR(err);
- return mem->virt_base + (pos << PAGE_SHIFT);
-}
-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
-
-static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
- dma_addr_t *dma_handle, void **ret)
-{
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
- int order = get_order(size);
-
- if (mem) {
- int page = bitmap_find_free_region(mem->bitmap, mem->size,
- order);
- if (page >= 0) {
- *dma_handle = mem->device_base + (page << PAGE_SHIFT);
- *ret = mem->virt_base + (page << PAGE_SHIFT);
- memset(*ret, 0, size);
- }
- if (mem->flags & DMA_MEMORY_EXCLUSIVE)
- *ret = NULL;
- }
- return (mem != NULL);
-}
-
-static int dma_release_coherent(struct device *dev, int order, void *vaddr)
-{
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
-
- if (mem && vaddr >= mem->virt_base && vaddr <
- (mem->virt_base + (mem->size << PAGE_SHIFT))) {
- int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
-
- bitmap_release_region(mem->bitmap, page, order);
- return 1;
- }
- return 0;
-}
-#else
-#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
-#define dma_release_coherent(dev, order, vaddr) (0)
-#endif /* CONFIG_X86_32 */
-
int dma_supported(struct device *dev, u64 mask)
{
+ struct dma_mapping_ops *ops = get_dma_ops(dev);
+
#ifdef CONFIG_PCI
if (mask > 0xffffffff && forbid_dac > 0) {
dev_info(dev, "PCI: Disallowing DAC for device\n");
@@ -319,8 +211,8 @@ int dma_supported(struct device *dev, u64 mask)
}
#endif
- if (dma_ops->dma_supported)
- return dma_ops->dma_supported(dev, mask);
+ if (ops->dma_supported)
+ return ops->dma_supported(dev, mask);
/* Copied from i386. Doesn't make much sense, because it will
only work for pci_alloc_coherent.
@@ -367,6 +259,7 @@ void *
dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp)
{
+ struct dma_mapping_ops *ops = get_dma_ops(dev);
void *memory = NULL;
struct page *page;
unsigned long dma_mask = 0;
@@ -376,7 +269,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
/* ignore region specifiers */
gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
- if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
+ if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
return memory;
if (!dev) {
@@ -435,8 +328,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
/* Let low level make its own zone decisions */
gfp &= ~(GFP_DMA32|GFP_DMA);
- if (dma_ops->alloc_coherent)
- return dma_ops->alloc_coherent(dev, size,
+ if (ops->alloc_coherent)
+ return ops->alloc_coherent(dev, size,
dma_handle, gfp);
return NULL;
}
@@ -448,14 +341,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
}
}
- if (dma_ops->alloc_coherent) {
+ if (ops->alloc_coherent) {
free_pages((unsigned long)memory, get_order(size));
gfp &= ~(GFP_DMA|GFP_DMA32);
- return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
+ return ops->alloc_coherent(dev, size, dma_handle, gfp);
}
- if (dma_ops->map_simple) {
- *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory),
+ if (ops->map_simple) {
+ *dma_handle = ops->map_simple(dev, virt_to_phys(memory),
size,
PCI_DMA_BIDIRECTIONAL);
if (*dma_handle != bad_dma_address)
@@ -477,12 +370,14 @@ EXPORT_SYMBOL(dma_alloc_coherent);
void dma_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t bus)
{
+ struct dma_mapping_ops *ops = get_dma_ops(dev);
+
int order = get_order(size);
WARN_ON(irqs_disabled()); /* for portability */
- if (dma_release_coherent(dev, order, vaddr))
+ if (dma_release_from_coherent(dev, order, vaddr))
return;
- if (dma_ops->unmap_single)
- dma_ops->unmap_single(dev, bus, size, 0);
+ if (ops->unmap_single)
+ ops->unmap_single(dev, bus, size, 0);
free_pages((unsigned long)vaddr, order);
}
EXPORT_SYMBOL(dma_free_coherent);
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index df5f142657d..49285f8fd4d 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -67,9 +67,6 @@ static u32 gart_unmapped_entry;
(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
-#define to_pages(addr, size) \
- (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
-
#define EMERGENCY_PAGES 32 /* = 128KB */
#ifdef CONFIG_AGP
@@ -241,7 +238,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
size_t size, int dir)
{
- unsigned long npages = to_pages(phys_mem, size);
+ unsigned long npages = iommu_num_pages(phys_mem, size);
unsigned long iommu_page = alloc_iommu(dev, npages);
int i;
@@ -304,7 +301,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
return;
iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
- npages = to_pages(dma_addr, size);
+ npages = iommu_num_pages(dma_addr, size);
for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
CLEAR_LEAK(iommu_page + i);
@@ -387,7 +384,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
}
addr = phys_addr;
- pages = to_pages(s->offset, s->length);
+ pages = iommu_num_pages(s->offset, s->length);
while (pages--) {
iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
SET_LEAK(iommu_page);
@@ -470,7 +467,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
seg_size += s->length;
need = nextneed;
- pages += to_pages(s->offset, s->length);
+ pages += iommu_num_pages(s->offset, s->length);
ps = s;
}
if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
@@ -692,8 +689,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
extern int agp_amd64_init(void);
-static const struct dma_mapping_ops gart_dma_ops = {
- .mapping_error = NULL,
+static struct dma_mapping_ops gart_dma_ops = {
.map_single = gart_map_single,
.map_simple = gart_map_simple,
.unmap_single = gart_unmap_single,
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 792b9179eff..3f91f71cdc3 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
return nents;
}
-/* Make sure we keep the same behaviour */
-static int nommu_mapping_error(dma_addr_t dma_addr)
-{
-#ifdef CONFIG_X86_32
- return 0;
-#else
- return (dma_addr == bad_dma_address);
-#endif
-}
-
-
-const struct dma_mapping_ops nommu_dma_ops = {
+struct dma_mapping_ops nommu_dma_ops = {
.map_single = nommu_map_single,
.map_sg = nommu_map_sg,
- .mapping_error = nommu_mapping_error,
.is_phys = 1,
};
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 20df839b9c2..c4ce0332759 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
}
-const struct dma_mapping_ops swiotlb_dma_ops = {
+struct dma_mapping_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
.alloc_coherent = swiotlb_alloc_coherent,
.free_coherent = swiotlb_free_coherent,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 53bc653ed5c..3b7a1ddcc0b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -95,7 +95,6 @@ static inline void play_dead(void)
{
/* This must be done before dead CPU ack */
cpu_exit_clear();
- wbinvd();
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
@@ -104,8 +103,8 @@ static inline void play_dead(void)
* With physical CPU hotplug, we should halt the cpu
*/
local_irq_disable();
- while (1)
- halt();
+ /* mask all interrupts, flush any and all caches, and halt */
+ wbinvd_halt();
}
#else
static inline void play_dead(void)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3fb62a7d9a1..71553b664e2 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -93,14 +93,13 @@ DECLARE_PER_CPU(int, cpu_state);
static inline void play_dead(void)
{
idle_task_exit();
- wbinvd();
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
local_irq_disable();
- while (1)
- halt();
+ /* mask all interrupts, flush any and all caches, and halt */
+ wbinvd_halt();
}
#else
static inline void play_dead(void)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 06a9f643817..724adfc63cb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -414,25 +414,20 @@ void native_machine_shutdown(void)
/* The boot cpu is always logical cpu 0 */
int reboot_cpu_id = 0;
- cpumask_of_cpu_ptr(newmask, reboot_cpu_id);
#ifdef CONFIG_X86_32
/* See if there has been given a command line override */
if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
- cpu_online(reboot_cpu)) {
+ cpu_online(reboot_cpu))
reboot_cpu_id = reboot_cpu;
- cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id);
- }
#endif
/* Make certain the cpu I'm about to reboot on is online */
- if (!cpu_online(reboot_cpu_id)) {
+ if (!cpu_online(reboot_cpu_id))
reboot_cpu_id = smp_processor_id();
- cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id);
- }
/* Make certain I only run on the appropriate processor */
- set_cpus_allowed_ptr(current, newmask);
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id));
/* O.K Now that I'm on the appropriate processor,
* stop all of the others.
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index c30fe25d470..6f50664b2ba 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -20,11 +20,45 @@
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
#define PAE_PGD_ATTR (_PAGE_PRESENT)
+/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
+ * ~ control_page + PAGE_SIZE are used as data storage and stack for
+ * jumping back
+ */
+#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
+
+/* Minimal CPU state */
+#define ESP DATA(0x0)
+#define CR0 DATA(0x4)
+#define CR3 DATA(0x8)
+#define CR4 DATA(0xc)
+
+/* other data */
+#define CP_VA_CONTROL_PAGE DATA(0x10)
+#define CP_PA_PGD DATA(0x14)
+#define CP_PA_SWAP_PAGE DATA(0x18)
+#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c)
+
.text
.align PAGE_SIZE
.globl relocate_kernel
relocate_kernel:
- movl 8(%esp), %ebp /* list of pages */
+ /* Save the CPU context, used for jumping back */
+
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ pushl %ebp
+ pushf
+
+ movl 20+8(%esp), %ebp /* list of pages */
+ movl PTR(VA_CONTROL_PAGE)(%ebp), %edi
+ movl %esp, ESP(%edi)
+ movl %cr0, %eax
+ movl %eax, CR0(%edi)
+ movl %cr3, %eax
+ movl %eax, CR3(%edi)
+ movl %cr4, %eax
+ movl %eax, CR4(%edi)
#ifdef CONFIG_X86_PAE
/* map the control page at its virtual address */
@@ -138,15 +172,25 @@ relocate_kernel:
relocate_new_kernel:
/* read the arguments and say goodbye to the stack */
- movl 4(%esp), %ebx /* page_list */
- movl 8(%esp), %ebp /* list of pages */
- movl 12(%esp), %edx /* start address */
- movl 16(%esp), %ecx /* cpu_has_pae */
+ movl 20+4(%esp), %ebx /* page_list */
+ movl 20+8(%esp), %ebp /* list of pages */
+ movl 20+12(%esp), %edx /* start address */
+ movl 20+16(%esp), %ecx /* cpu_has_pae */
+ movl 20+20(%esp), %esi /* preserve_context */
/* zero out flags, and disable interrupts */
pushl $0
popfl
+ /* save some information for jumping back */
+ movl PTR(VA_CONTROL_PAGE)(%ebp), %edi
+ movl %edi, CP_VA_CONTROL_PAGE(%edi)
+ movl PTR(PA_PGD)(%ebp), %eax
+ movl %eax, CP_PA_PGD(%edi)
+ movl PTR(PA_SWAP_PAGE)(%ebp), %eax
+ movl %eax, CP_PA_SWAP_PAGE(%edi)
+ movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
+
/* get physical address of control page now */
/* this is impossible after page table switch */
movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
@@ -197,8 +241,90 @@ identity_mapped:
xorl %eax, %eax
movl %eax, %cr3
+ movl CP_PA_SWAP_PAGE(%edi), %eax
+ pushl %eax
+ pushl %ebx
+ call swap_pages
+ addl $8, %esp
+
+ /* To be certain of avoiding problems with self-modifying code
+ * I need to execute a serializing instruction here.
+ * So I flush the TLB, it's handy, and not processor dependent.
+ */
+ xorl %eax, %eax
+ movl %eax, %cr3
+
+ /* set all of the registers to known values */
+ /* leave %esp alone */
+
+ testl %esi, %esi
+ jnz 1f
+ xorl %edi, %edi
+ xorl %eax, %eax
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ xorl %esi, %esi
+ xorl %ebp, %ebp
+ ret
+1:
+ popl %edx
+ movl CP_PA_SWAP_PAGE(%edi), %esp
+ addl $PAGE_SIZE, %esp
+2:
+ call *%edx
+
+ /* get the re-entry point of the peer system */
+ movl 0(%esp), %ebp
+ call 1f
+1:
+ popl %ebx
+ subl $(1b - relocate_kernel), %ebx
+ movl CP_VA_CONTROL_PAGE(%ebx), %edi
+ lea PAGE_SIZE(%ebx), %esp
+ movl CP_PA_SWAP_PAGE(%ebx), %eax
+ movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx
+ pushl %eax
+ pushl %edx
+ call swap_pages
+ addl $8, %esp
+ movl CP_PA_PGD(%ebx), %eax
+ movl %eax, %cr3
+ movl %cr0, %eax
+ orl $(1<<31), %eax
+ movl %eax, %cr0
+ lea PAGE_SIZE(%edi), %esp
+ movl %edi, %eax
+ addl $(virtual_mapped - relocate_kernel), %eax
+ pushl %eax
+ ret
+
+virtual_mapped:
+ movl CR4(%edi), %eax
+ movl %eax, %cr4
+ movl CR3(%edi), %eax
+ movl %eax, %cr3
+ movl CR0(%edi), %eax
+ movl %eax, %cr0
+ movl ESP(%edi), %esp
+ movl %ebp, %eax
+
+ popf
+ popl %ebp
+ popl %edi
+ popl %esi
+ popl %ebx
+ ret
+
/* Do the copies */
- movl %ebx, %ecx
+swap_pages:
+ movl 8(%esp), %edx
+ movl 4(%esp), %ecx
+ pushl %ebp
+ pushl %ebx
+ pushl %edi
+ pushl %esi
+ movl %ecx, %ebx
jmp 1f
0: /* top, read another word from the indirection page */
@@ -226,27 +352,31 @@ identity_mapped:
movl %ecx, %esi /* For every source page do a copy */
andl $0xfffff000, %esi
+ movl %edi, %eax
+ movl %esi, %ebp
+
+ movl %edx, %edi
movl $1024, %ecx
rep ; movsl
- jmp 0b
-
-3:
- /* To be certain of avoiding problems with self-modifying code
- * I need to execute a serializing instruction here.
- * So I flush the TLB, it's handy, and not processor dependent.
- */
- xorl %eax, %eax
- movl %eax, %cr3
+ movl %ebp, %edi
+ movl %eax, %esi
+ movl $1024, %ecx
+ rep ; movsl
- /* set all of the registers to known values */
- /* leave %esp alone */
+ movl %eax, %edi
+ movl %edx, %esi
+ movl $1024, %ecx
+ rep ; movsl
- xorl %eax, %eax
- xorl %ebx, %ebx
- xorl %ecx, %ecx
- xorl %edx, %edx
- xorl %esi, %esi
- xorl %edi, %edi
- xorl %ebp, %ebp
+ lea PAGE_SIZE(%ebp), %esi
+ jmp 0b
+3:
+ popl %esi
+ popl %edi
+ popl %ebx
+ popl %ebp
ret
+
+ .globl kexec_control_code_size
+.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b520dae02bf..a4656adab53 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -445,7 +445,7 @@ static void __init reserve_early_setup_data(void)
* @size: Size of the crashkernel memory to reserve.
* Returns the base address on success, and -1ULL on failure.
*/
-unsigned long long find_and_reserve_crashkernel(unsigned long long size)
+unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
{
const unsigned long long alignment = 16<<20; /* 16M */
unsigned long long start = 0LL;
@@ -604,6 +604,14 @@ void __init setup_arch(char **cmdline_p)
early_cpu_init();
early_ioremap_init();
+#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
+ /*
+ * Must be before kernel pagetables are setup
+ * or fixmap area is touched.
+ */
+ vmi_init();
+#endif
+
ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
screen_info = boot_params.screen_info;
edid_info = boot_params.edid_info;
@@ -788,10 +796,6 @@ void __init setup_arch(char **cmdline_p)
initmem_init(0, max_pfn);
-#ifdef CONFIG_X86_64
- dma32_reserve_bootmem();
-#endif
-
#ifdef CONFIG_ACPI_SLEEP
/*
* Reserve low memory region for sleep support.
@@ -806,20 +810,21 @@ void __init setup_arch(char **cmdline_p)
#endif
reserve_crashkernel();
+#ifdef CONFIG_X86_64
+ /*
+ * dma32_reserve_bootmem() allocates bootmem which may conflict
+ * with the crashkernel command line, so do that after
+ * reserve_crashkernel()
+ */
+ dma32_reserve_bootmem();
+#endif
+
reserve_ibft_region();
#ifdef CONFIG_KVM_CLOCK
kvmclock_init();
#endif
-#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
- /*
- * Must be after max_low_pfn is determined, and before kernel
- * pagetables are setup.
- */
- vmi_init();
-#endif
-
paravirt_pagetable_setup_start(swapper_pg_dir);
paging_init();
paravirt_pagetable_setup_done(swapper_pg_dir);
@@ -856,12 +861,6 @@ void __init setup_arch(char **cmdline_p)
init_apic_mappings();
ioapic_init_mappings();
-#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
- if (def_to_bigsmp)
- printk(KERN_WARNING "More than 8 CPUs detected and "
- "CONFIG_X86_PC cannot handle it.\nUse "
- "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
-#endif
kvm_guest_init();
e820_reserve_resources();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index f7745f94c00..76e305e064f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -80,24 +80,6 @@ static void __init setup_per_cpu_maps(void)
#endif
}
-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
-cpumask_t *cpumask_of_cpu_map __read_mostly;
-EXPORT_SYMBOL(cpumask_of_cpu_map);
-
-/* requires nr_cpu_ids to be initialized */
-static void __init setup_cpumask_of_cpu(void)
-{
- int i;
-
- /* alloc_bootmem zeroes memory */
- cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
- for (i = 0; i < nr_cpu_ids; i++)
- cpu_set(i, cpumask_of_cpu_map[i]);
-}
-#else
-static inline void setup_cpumask_of_cpu(void) { }
-#endif
-
#ifdef CONFIG_X86_32
/*
* Great future not-so-futuristic plan: make i386 and x86_64 do it
@@ -197,9 +179,6 @@ void __init setup_per_cpu_areas(void)
/* Setup node to cpumask map */
setup_node_to_cpumask_map();
-
- /* Setup cpumask_of_cpu map */
- setup_cpumask_of_cpu();
}
#endif
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index b45ef8ddd65..ca316b5b742 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -104,7 +104,16 @@ static inline int restore_i387(struct _fpstate __user *buf)
clts();
task_thread_info(current)->status |= TS_USEDFPU;
}
- return restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
+ err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
+ if (unlikely(err)) {
+ /*
+ * Encountered an error while doing the restore from the
+ * user buffer, clear the fpu state.
+ */
+ clear_fpu(tsk);
+ clear_used_math();
+ }
+ return err;
}
/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 332512767f4..e139e617f42 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -326,12 +326,16 @@ static void __cpuinit start_secondary(void *unused)
* for which cpus receive the IPI. Holding this
* lock helps us to not include this cpu in a currently in progress
* smp_call_function().
+ *
+ * We need to hold vector_lock so there the set of online cpus
+ * does not change while we are assigning vectors to cpus. Holding
+ * this lock ensures we don't half assign or remove an irq from a cpu.
*/
ipi_call_lock_irq();
-#ifdef CONFIG_X86_IO_APIC
- setup_vector_irq(smp_processor_id());
-#endif
+ lock_vector_lock();
+ __setup_vector_irq(smp_processor_id());
cpu_set(smp_processor_id(), cpu_online_map);
+ unlock_vector_lock();
ipi_call_unlock_irq();
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
@@ -752,6 +756,14 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
}
#ifdef CONFIG_X86_64
+
+/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
+static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
+{
+ if (!after_bootmem)
+ free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
+}
+
/*
* Allocate node local memory for the AP pda.
*
@@ -780,8 +792,7 @@ int __cpuinit get_local_pda(int cpu)
if (oldpda) {
memcpy(newpda, oldpda, size);
- if (!after_bootmem)
- free_bootmem((unsigned long)oldpda, size);
+ free_bootmem_pda(oldpda);
}
newpda->in_bootmem = 0;
@@ -1044,6 +1055,34 @@ static __init void disable_smp(void)
static int __init smp_sanity_check(unsigned max_cpus)
{
preempt_disable();
+
+#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
+ if (def_to_bigsmp && nr_cpu_ids > 8) {
+ unsigned int cpu;
+ unsigned nr;
+
+ printk(KERN_WARNING
+ "More than 8 CPUs detected - skipping them.\n"
+ "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
+
+ nr = 0;
+ for_each_present_cpu(cpu) {
+ if (nr >= 8)
+ cpu_clear(cpu, cpu_present_map);
+ nr++;
+ }
+
+ nr = 0;
+ for_each_possible_cpu(cpu) {
+ if (nr >= 8)
+ cpu_clear(cpu, cpu_possible_map);
+ nr++;
+ }
+
+ nr_cpu_ids = 8;
+ }
+#endif
+
if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
printk(KERN_WARNING "weird, boot CPU (#%d) not listed"
"by the BIOS.\n", hard_smp_processor_id());
@@ -1336,7 +1375,9 @@ int __cpu_disable(void)
remove_siblinginfo(cpu);
/* It's now safe to remove this processor from the online map */
+ lock_vector_lock();
remove_cpu_from_maps(cpu);
+ unlock_vector_lock();
fixup_irqs(cpu_online_map);
return 0;
}
@@ -1370,17 +1411,3 @@ void __cpu_die(unsigned int cpu)
BUG();
}
#endif
-
-/*
- * If the BIOS enumerates physical processors before logical,
- * maxcpus=N at enumeration-time can be used to disable HT.
- */
-static int __init parse_maxcpus(char *arg)
-{
- extern unsigned int maxcpus;
-
- if (arg)
- maxcpus = simple_strtoul(arg, NULL, 0);
- return 0;
-}
-early_param("maxcpus", parse_maxcpus);
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 99941b37eca..397e309839d 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -8,18 +8,21 @@
DEFINE_PER_CPU(unsigned long, this_cpu_off);
EXPORT_PER_CPU_SYMBOL(this_cpu_off);
-/* Initialize the CPU's GDT. This is either the boot CPU doing itself
- (still using the master per-cpu area), or a CPU doing it for a
- secondary which will soon come up. */
+/*
+ * Initialize the CPU's GDT. This is either the boot CPU doing itself
+ * (still using the master per-cpu area), or a CPU doing it for a
+ * secondary which will soon come up.
+ */
__cpuinit void init_gdt(int cpu)
{
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+ struct desc_struct gdt;
- pack_descriptor(&gdt[GDT_ENTRY_PERCPU],
- __per_cpu_offset[cpu], 0xFFFFF,
+ pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
0x2 | DESCTYPE_S, 0x8);
+ gdt.s = 1;
- gdt[GDT_ENTRY_PERCPU].s = 1;
+ write_gdt_entry(get_cpu_gdt_table(cpu),
+ GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
per_cpu(cpu_number, cpu) = cpu;
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 3f18d73f420..513caaca711 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1131,7 +1131,14 @@ asmlinkage void math_state_restore(void)
}
clts(); /* Allow maths ops (or we recurse) */
- restore_fpu_checking(&me->thread.xstate->fxsave);
+ /*
+ * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+ */
+ if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) {
+ stts();
+ force_sig(SIGSEGV, me);
+ return;
+ }
task_thread_info(me)->status |= TS_USEDFPU;
me->fpu_counter++;
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 7603c055390..46af7167673 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -104,7 +104,7 @@ __setup("notsc", notsc_setup);
/*
* Read TSC and the reference counters. Take care of SMI disturbance
*/
-static u64 __init tsc_read_refs(u64 *pm, u64 *hpet)
+static u64 tsc_read_refs(u64 *pm, u64 *hpet)
{
u64 t1, t2;
int i;
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 41e01b145c4..594ef47f0a6 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -184,8 +184,6 @@ static int __init visws_get_smp_config(unsigned int early)
return 1;
}
-extern unsigned int __cpuinitdata maxcpus;
-
/*
* The Visual Workstation is Intel MP compliant in the hardware
* sense, but it doesn't have a BIOS(-configuration table).
@@ -244,8 +242,8 @@ static int __init visws_find_smp_config(unsigned int reserve)
ncpus = CO_CPU_MAX;
}
- if (ncpus > maxcpus)
- ncpus = maxcpus;
+ if (ncpus > setup_max_cpus)
+ ncpus = setup_max_cpus;
#ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 1;
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 0a1b1a9d922..6ca515d6db5 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -37,6 +37,7 @@
#include <asm/timer.h>
#include <asm/vmi_time.h>
#include <asm/kmap_types.h>
+#include <asm/setup.h>
/* Convenient for calling VMI functions indirectly in the ROM */
typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
@@ -683,7 +684,7 @@ void vmi_bringup(void)
{
/* We must establish the lowmem mapping for MMU ops to work */
if (vmi_ops.set_linear_mapping)
- vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0);
+ vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0);
}
/*
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index cdb2363697d..af5bdad8460 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -209,3 +209,11 @@ SECTIONS
DWARF_DEBUG
}
+
+#ifdef CONFIG_KEXEC
+/* Link time checks */
+#include <asm/kexec.h>
+
+ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
+ "kexec control code size is too big")
+#endif