aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-08-14 14:58:01 +0200
committerIngo Molnar <mingo@elte.hu>2008-08-14 14:58:01 +0200
commit51ca3c679194e7435c25b8e77b0a73c597e41ae9 (patch)
treea681dca369607ab0f371d5246b0f75140b860a8a /arch/x86
parentb55793f7528ce1b73c25b3ac8a86a6cda2a0f9a4 (diff)
parentb635acec48bcaa9183fcbf4e3955616b0d4119b5 (diff)
Merge branch 'linus' into x86/core
Conflicts: arch/x86/kernel/genapic_64.c include/asm-x86/kvm_host.h Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/kernel/apic_32.c14
-rw-r--r--arch/x86/kernel/cpu/bugs.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c112
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h3
-rw-r--r--arch/x86/kernel/io_apic_32.c6
-rw-r--r--arch/x86/kernel/io_apic_64.c25
-rw-r--r--arch/x86/kernel/mpparse.c11
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/pci-dma.c8
-rw-r--r--arch/x86/kernel/setup.c35
-rw-r--r--arch/x86/kernel/smpboot.c22
-rw-r--r--arch/x86/kernel/vmi_32.c3
-rw-r--r--arch/x86/kvm/mmu.c100
-rw-r--r--arch/x86/kvm/paging_tmpl.h12
-rw-r--r--arch/x86/kvm/x86.c24
-rw-r--r--arch/x86/lib/copy_user_64.S2
-rw-r--r--arch/x86/lib/copy_user_nocache_64.S3
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/pgtable.c3
22 files changed, 296 insertions, 105 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 608a12ff483..53ab3687873 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,7 +22,6 @@ config X86
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_IOREMAP_PROT
- select HAVE_GET_USER_PAGES_FAST
select HAVE_KPROBES
select ARCH_WANT_OPTIONAL_GPIOLIB
select HAVE_KRETPROBES
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 9e341c9d941..84318edad8f 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1705,15 +1705,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
}
early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-static int __init apic_set_verbosity(char *str)
+static int __init apic_set_verbosity(char *arg)
{
- if (strcmp("debug", str) == 0)
+ if (!arg)
+ return -EINVAL;
+
+ if (strcmp(arg, "debug") == 0)
apic_verbosity = APIC_DEBUG;
- else if (strcmp("verbose", str) == 0)
+ else if (strcmp(arg, "verbose") == 0)
apic_verbosity = APIC_VERBOSE;
- return 1;
+
+ return 0;
}
-__setup("apic=", apic_set_verbosity);
+early_param("apic", apic_set_verbosity);
static int __init lapic_insert_resource(void)
{
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c9b58a806e8..c8e315f1aa8 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -50,6 +50,8 @@ static double __initdata y = 3145727.0;
*/
static void __init check_fpu(void)
{
+ s32 fdiv_bug;
+
if (!boot_cpu_data.hard_math) {
#ifndef CONFIG_MATH_EMULATION
printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
@@ -74,8 +76,10 @@ static void __init check_fpu(void)
"fistpl %0\n\t"
"fwait\n\t"
"fninit"
- : "=m" (*&boot_cpu_data.fdiv_bug)
+ : "=m" (*&fdiv_bug)
: "m" (*&x), "m" (*&y));
+
+ boot_cpu_data.fdiv_bug = fdiv_bug;
if (boot_cpu_data.fdiv_bug)
printk("Hmm, FPU with FDIV bug.\n");
}
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index cb7a5715596..efae3b22a0f 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -235,9 +235,9 @@ config X86_LONGHAUL
If in doubt, say N.
config X86_E_POWERSAVER
- tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)"
+ tristate "VIA C7 Enhanced PowerSaver"
select CPU_FREQ_TABLE
- depends on X86_32 && EXPERIMENTAL
+ depends on X86_32
help
This adds the CPUFreq driver for VIA C7 processors.
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 94619c22f56..e4a4bf870e9 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -44,7 +44,7 @@ struct s_elan_multiplier {
* It is important that the frequencies
* are listed in ascending order here!
*/
-struct s_elan_multiplier elan_multiplier[] = {
+static struct s_elan_multiplier elan_multiplier[] = {
{1000, 0x02, 0x18},
{2000, 0x02, 0x10},
{4000, 0x02, 0x08},
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index c45ca6d4dce..4e7271999a7 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid)
return 800 + (fid * 100);
}
-
/* Return a frequency in KHz, given an input fid */
static u32 find_khz_freq_from_fid(u32 fid)
{
@@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p
return data[pstate].frequency;
}
-
/* Return the vco fid for an input fid
*
* Each "low" fid has corresponding "high" fid, and you can get to "low" fids
@@ -166,7 +164,6 @@ static void fidvid_msr_init(void)
wrmsr(MSR_FIDVID_CTL, lo, hi);
}
-
/* write the new fid value along with the other control fields to the msr */
static int write_new_fid(struct powernow_k8_data *data, u32 fid)
{
@@ -740,44 +737,63 @@ static int find_psb_table(struct powernow_k8_data *data)
#ifdef CONFIG_X86_POWERNOW_K8_ACPI
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
{
- if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
+ if (!data->acpi_data->state_count || (cpu_family == CPU_HW_PSTATE))
return;
- data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
- data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK;
- data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
- data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
- data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK);
- data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK;
+ data->irt = (data->acpi_data->states[index].control >> IRT_SHIFT) & IRT_MASK;
+ data->rvo = (data->acpi_data->states[index].control >> RVO_SHIFT) & RVO_MASK;
+ data->exttype = (data->acpi_data->states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
+ data->plllock = (data->acpi_data->states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
+ data->vidmvs = 1 << ((data->acpi_data->states[index].control >> MVS_SHIFT) & MVS_MASK);
+ data->vstable = (data->acpi_data->states[index].control >> VST_SHIFT) & VST_MASK;
+}
+
+
+static struct acpi_processor_performance *acpi_perf_data;
+static int preregister_valid;
+
+static int powernow_k8_cpu_preinit_acpi(void)
+{
+ acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
+ if (!acpi_perf_data)
+ return -ENODEV;
+
+ if (acpi_processor_preregister_performance(acpi_perf_data))
+ return -ENODEV;
+ else
+ preregister_valid = 1;
+ return 0;
}
static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
{
struct cpufreq_frequency_table *powernow_table;
int ret_val;
+ int cpu = 0;
- if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
+ data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
+ if (acpi_processor_register_performance(data->acpi_data, data->cpu)) {
dprintk("register performance failed: bad ACPI data\n");
return -EIO;
}
/* verify the data contained in the ACPI structures */
- if (data->acpi_data.state_count <= 1) {
+ if (data->acpi_data->state_count <= 1) {
dprintk("No ACPI P-States\n");
goto err_out;
}
- if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
- (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+ if ((data->acpi_data->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+ (data->acpi_data->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
dprintk("Invalid control/status registers (%x - %x)\n",
- data->acpi_data.control_register.space_id,
- data->acpi_data.status_register.space_id);
+ data->acpi_data->control_register.space_id,
+ data->acpi_data->status_register.space_id);
goto err_out;
}
/* fill in data->powernow_table */
powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
- * (data->acpi_data.state_count + 1)), GFP_KERNEL);
+ * (data->acpi_data->state_count + 1)), GFP_KERNEL);
if (!powernow_table) {
dprintk("powernow_table memory alloc failure\n");
goto err_out;
@@ -790,12 +806,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
if (ret_val)
goto err_out_mem;
- powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
- powernow_table[data->acpi_data.state_count].index = 0;
+ powernow_table[data->acpi_data->state_count].frequency = CPUFREQ_TABLE_END;
+ powernow_table[data->acpi_data->state_count].index = 0;
data->powernow_table = powernow_table;
/* fill in data */
- data->numps = data->acpi_data.state_count;
+ data->numps = data->acpi_data->state_count;
if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
print_basics(data);
powernow_k8_acpi_pst_values(data, 0);
@@ -803,16 +819,31 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
/* notify BIOS that we exist */
acpi_processor_notify_smm(THIS_MODULE);
+ /* determine affinity, from ACPI if available */
+ if (preregister_valid) {
+ if ((data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ALL) ||
+ (data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ANY))
+ data->starting_core_affinity = data->acpi_data->shared_cpu_map;
+ else
+ data->starting_core_affinity = cpumask_of_cpu(data->cpu);
+ } else {
+ /* best guess from family if not */
+ if (cpu_family == CPU_HW_PSTATE)
+ data->starting_core_affinity = cpumask_of_cpu(data->cpu);
+ else
+ data->starting_core_affinity = per_cpu(cpu_core_map, data->cpu);
+ }
+
return 0;
err_out_mem:
kfree(powernow_table);
err_out:
- acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+ acpi_processor_unregister_performance(data->acpi_data, data->cpu);
/* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
- data->acpi_data.state_count = 0;
+ data->acpi_data->state_count = 0;
return -ENODEV;
}
@@ -824,10 +855,10 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo);
data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
- for (i = 0; i < data->acpi_data.state_count; i++) {
+ for (i = 0; i < data->acpi_data->state_count; i++) {
u32 index;
- index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
+ index = data->acpi_data->states[i].control & HW_PSTATE_MASK;
if (index > data->max_hw_pstate) {
printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
@@ -843,7 +874,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
powernow_table[i].index = index;
- powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000;
+ powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000;
}
return 0;
}
@@ -852,16 +883,16 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
{
int i;
int cntlofreq = 0;
- for (i = 0; i < data->acpi_data.state_count; i++) {
+ for (i = 0; i < data->acpi_data->state_count; i++) {
u32 fid;
u32 vid;
if (data->exttype) {
- fid = data->acpi_data.states[i].status & EXT_FID_MASK;
- vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK;
+ fid = data->acpi_data->states[i].status & EXT_FID_MASK;
+ vid = (data->acpi_data->states[i].status >> VID_SHIFT) & EXT_VID_MASK;
} else {
- fid = data->acpi_data.states[i].control & FID_MASK;
- vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
+ fid = data->acpi_data->states[i].control & FID_MASK;
+ vid = (data->acpi_data->states[i].control >> VID_SHIFT) & VID_MASK;
}
dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
@@ -902,10 +933,10 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
cntlofreq = i;
}
- if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
+ if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) {
printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
powernow_table[i].frequency,
- (unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
+ (unsigned int) (data->acpi_data->states[i].core_frequency * 1000));
powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
continue;
}
@@ -915,11 +946,12 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
{
- if (data->acpi_data.state_count)
- acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+ if (data->acpi_data->state_count)
+ acpi_processor_unregister_performance(data->acpi_data, data->cpu);
}
#else
+static int powernow_k8_cpu_preinit_acpi(void) { return -ENODEV; }
static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
@@ -1104,7 +1136,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
{
struct powernow_k8_data *data;
- cpumask_t oldmask;
+ cpumask_t oldmask = CPU_MASK_ALL;
int rc;
if (!cpu_online(pol->cpu))
@@ -1177,10 +1209,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
/* run on any CPU again */
set_cpus_allowed_ptr(current, &oldmask);
- if (cpu_family == CPU_HW_PSTATE)
- pol->cpus = cpumask_of_cpu(pol->cpu);
- else
- pol->cpus = per_cpu(cpu_core_map, pol->cpu);
+ pol->cpus = data->starting_core_affinity;
data->available_cores = &(pol->cpus);
/* Take a crude guess here.
@@ -1303,6 +1332,7 @@ static int __cpuinit powernowk8_init(void)
}
if (supported_cpus == num_online_cpus()) {
+ powernow_k8_cpu_preinit_acpi();
printk(KERN_INFO PFX "Found %d %s "
"processors (%d cpu cores) (" VERSION ")\n",
num_online_nodes(),
@@ -1319,6 +1349,10 @@ static void __exit powernowk8_exit(void)
dprintk("exit\n");
cpufreq_unregister_driver(&cpufreq_amd64_driver);
+
+#ifdef CONFIG_X86_POWERNOW_K8_ACPI
+ free_percpu(acpi_perf_data);
+#endif
}
MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index ab48cfed4d9..a62612cd4be 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -33,12 +33,13 @@ struct powernow_k8_data {
#ifdef CONFIG_X86_POWERNOW_K8_ACPI
/* the acpi table needs to be kept. it's only available if ACPI was
* used to determine valid frequency/vid/fid states */
- struct acpi_processor_performance acpi_data;
+ struct acpi_processor_performance *acpi_data;
#endif
/* we need to keep track of associated cores, but let cpufreq
* handle hotplug events - so just point at cpufreq pol->cpus
* structure */
cpumask_t *available_cores;
+ cpumask_t starting_core_affinity;
};
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 72ba06314c7..26ea3ea3fb3 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -58,7 +58,7 @@ atomic_t irq_mis_count;
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+DEFINE_SPINLOCK(vector_lock);
int timer_through_8259 __initdata;
@@ -1210,10 +1210,6 @@ static int assign_irq_vector(int irq)
return vector;
}
-void setup_vector_irq(int cpu)
-{
-}
-
static struct irq_chip ioapic_chip;
#define IOAPIC_AUTO -1
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index b9950dae59b..e63282e7886 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -103,7 +103,7 @@ int timer_through_8259 __initdata;
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
-DEFINE_SPINLOCK(vector_lock);
+static DEFINE_SPINLOCK(vector_lock);
/*
* # of IRQ routing registers
@@ -770,6 +770,19 @@ static int pin_2_irq(int idx, int apic, int pin)
return irq;
}
+void lock_vector_lock(void)
+{
+ /* Used to the online set of cpus does not change
+ * during assign_irq_vector.
+ */
+ spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+ spin_unlock(&vector_lock);
+}
+
static int __assign_irq_vector(int irq, cpumask_t mask)
{
/*
@@ -875,7 +888,7 @@ static void __clear_irq_vector(int irq)
cpus_clear(cfg->domain);
}
-static void __setup_vector_irq(int cpu)
+void __setup_vector_irq(int cpu)
{
/* Initialize vector_irq on a new cpu */
/* This function must be called with vector_lock held */
@@ -898,14 +911,6 @@ static void __setup_vector_irq(int cpu)
}
}
-void setup_vector_irq(int cpu)
-{
- spin_lock(&vector_lock);
- __setup_vector_irq(smp_processor_id());
- spin_unlock(&vector_lock);
-}
-
-
static struct irq_chip ioapic_chip;
#ifdef CONFIG_INTR_REMAP
static struct irq_chip ir_ioapic_chip;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index e362c6ab4d3..e5d23675bb7 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -83,7 +83,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
if (x86_quirks->mpc_oem_bus_info)
x86_quirks->mpc_oem_bus_info(m, str);
else
- printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
+ apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str);
#if MAX_MP_BUSSES < 256
if (m->mpc_busid >= MAX_MP_BUSSES) {
@@ -154,7 +154,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
{
- printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
@@ -163,7 +163,7 @@ static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
{
- printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
(mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
@@ -235,7 +235,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
{
- printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x,"
+ apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC LINT %02x\n",
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
@@ -697,7 +697,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
unsigned int *bp = phys_to_virt(base);
struct intel_mp_floating *mpf;
- printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length);
+ apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
+ bp, length);
BUILD_BUG_ON(sizeof(*mpf) != 16);
while (length > 0) {
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index b67a4b1d4ea..02d19328525 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1350,7 +1350,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
* Function for kdump case. Get the tce tables from first kernel
* by reading the contents of the base adress register of calgary iommu
*/
-static void get_tce_space_from_tar()
+static void get_tce_space_from_tar(void)
{
int bus;
void __iomem *target;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 8dbffb846de..87d4d6964ec 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void)
pci_swiotlb_init();
}
+
+unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
+{
+ unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
+
+ return size >> PAGE_SHIFT;
+}
+EXPORT_SYMBOL(iommu_num_pages);
#endif
/*
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 792b87853a7..59f07e14d08 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -604,6 +604,14 @@ void __init setup_arch(char **cmdline_p)
early_cpu_init();
early_ioremap_init();
+#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
+ /*
+ * Must be before kernel pagetables are setup
+ * or fixmap area is touched.
+ */
+ vmi_init();
+#endif
+
ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
screen_info = boot_params.screen_info;
edid_info = boot_params.edid_info;
@@ -790,10 +798,6 @@ void __init setup_arch(char **cmdline_p)
initmem_init(0, max_pfn);
-#ifdef CONFIG_X86_64
- dma32_reserve_bootmem();
-#endif
-
#ifdef CONFIG_ACPI_SLEEP
/*
* Reserve low memory region for sleep support.
@@ -808,20 +812,21 @@ void __init setup_arch(char **cmdline_p)
#endif
reserve_crashkernel();
+#ifdef CONFIG_X86_64
+ /*
+ * dma32_reserve_bootmem() allocates bootmem which may conflict
+ * with the crashkernel command line, so do that after
+ * reserve_crashkernel()
+ */
+ dma32_reserve_bootmem();
+#endif
+
reserve_ibft_region();
#ifdef CONFIG_KVM_CLOCK
kvmclock_init();
#endif
-#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
- /*
- * Must be after max_low_pfn is determined, and before kernel
- * pagetables are setup.
- */
- vmi_init();
-#endif
-
paravirt_pagetable_setup_start(swapper_pg_dir);
paging_init();
paravirt_pagetable_setup_done(swapper_pg_dir);
@@ -858,12 +863,6 @@ void __init setup_arch(char **cmdline_p)
init_apic_mappings();
ioapic_init_mappings();
-#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
- if (def_to_bigsmp)
- printk(KERN_WARNING "More than 8 CPUs detected and "
- "CONFIG_X86_PC cannot handle it.\nUse "
- "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
-#endif
kvm_guest_init();
e820_reserve_resources();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 626618bf2f8..04f78ab51b4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -327,12 +327,16 @@ static void __cpuinit start_secondary(void *unused)
* for which cpus receive the IPI. Holding this
* lock helps us to not include this cpu in a currently in progress
* smp_call_function().
+ *
+ * We need to hold vector_lock so there the set of online cpus
+ * does not change while we are assigning vectors to cpus. Holding
+ * this lock ensures we don't half assign or remove an irq from a cpu.
*/
ipi_call_lock_irq();
-#ifdef CONFIG_X86_IO_APIC
- setup_vector_irq(smp_processor_id());
-#endif
+ lock_vector_lock();
+ __setup_vector_irq(smp_processor_id());
cpu_set(smp_processor_id(), cpu_online_map);
+ unlock_vector_lock();
ipi_call_unlock_irq();
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
@@ -983,7 +987,17 @@ int __cpuinit native_cpu_up(unsigned int cpu)
flush_tlb_all();
low_mappings = 1;
+#ifdef CONFIG_X86_PC
+ if (def_to_bigsmp && apicid > 8) {
+ printk(KERN_WARNING
+ "More than 8 CPUs detected - skipping them.\n"
+ "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
+ err = -1;
+ } else
+ err = do_boot_cpu(apicid, cpu);
+#else
err = do_boot_cpu(apicid, cpu);
+#endif
zap_low_mappings();
low_mappings = 0;
@@ -1336,7 +1350,9 @@ int __cpu_disable(void)
remove_siblinginfo(cpu);
/* It's now safe to remove this processor from the online map */
+ lock_vector_lock();
remove_cpu_from_maps(cpu);
+ unlock_vector_lock();
fixup_irqs(cpu_online_map);
return 0;
}
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 45c27c4e2a6..61531d5c950 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -37,6 +37,7 @@
#include <asm/timer.h>
#include <asm/vmi_time.h>
#include <asm/kmap_types.h>
+#include <asm/setup.h>
/* Convenient for calling VMI functions indirectly in the ROM */
typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
@@ -683,7 +684,7 @@ void vmi_bringup(void)
{
/* We must establish the lowmem mapping for MMU ops to work */
if (vmi_ops.set_linear_mapping)
- vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0);
+ vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0);
}
/*
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2fa231923cf..0bfe2bd305e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -653,6 +653,84 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
account_shadowed(kvm, gfn);
}
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+ u64 *spte;
+ int need_tlb_flush = 0;
+
+ while ((spte = rmap_next(kvm, rmapp, NULL))) {
+ BUG_ON(!(*spte & PT_PRESENT_MASK));
+ rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
+ rmap_remove(kvm, spte);
+ set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+ need_tlb_flush = 1;
+ }
+ return need_tlb_flush;
+}
+
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+ int (*handler)(struct kvm *kvm, unsigned long *rmapp))
+{
+ int i;
+ int retval = 0;
+
+ /*
+ * If mmap_sem isn't taken, we can look the memslots with only
+ * the mmu_lock by skipping over the slots with userspace_addr == 0.
+ */
+ for (i = 0; i < kvm->nmemslots; i++) {
+ struct kvm_memory_slot *memslot = &kvm->memslots[i];
+ unsigned long start = memslot->userspace_addr;
+ unsigned long end;
+
+ /* mmu_lock protects userspace_addr */
+ if (!start)
+ continue;
+
+ end = start + (memslot->npages << PAGE_SHIFT);
+ if (hva >= start && hva < end) {
+ gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
+ retval |= handler(kvm, &memslot->rmap[gfn_offset]);
+ retval |= handler(kvm,
+ &memslot->lpage_info[
+ gfn_offset /
+ KVM_PAGES_PER_HPAGE].rmap_pde);
+ }
+ }
+
+ return retval;
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+ return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+}
+
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+ u64 *spte;
+ int young = 0;
+
+ spte = rmap_next(kvm, rmapp, NULL);
+ while (spte) {
+ int _young;
+ u64 _spte = *spte;
+ BUG_ON(!(_spte & PT_PRESENT_MASK));
+ _young = _spte & PT_ACCESSED_MASK;
+ if (_young) {
+ young = 1;
+ clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
+ }
+ spte = rmap_next(kvm, rmapp, spte);
+ }
+ return young;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+}
+
#ifdef MMU_DEBUG
static int is_empty_shadow_page(u64 *spt)
{
@@ -1203,6 +1281,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
int r;
int largepage = 0;
pfn_t pfn;
+ unsigned long mmu_seq;
down_read(&current->mm->mmap_sem);
if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
@@ -1210,6 +1289,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
largepage = 1;
}
+ mmu_seq = vcpu->kvm->mmu_notifier_seq;
+ /* implicit mb(), we'll read before PT lock is unlocked */
pfn = gfn_to_pfn(vcpu->kvm, gfn);
up_read(&current->mm->mmap_sem);
@@ -1220,6 +1301,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
}
spin_lock(&vcpu->kvm->mmu_lock);
+ if (mmu_notifier_retry(vcpu, mmu_seq))
+ goto out_unlock;
kvm_mmu_free_some_pages(vcpu);
r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
PT32E_ROOT_LEVEL);
@@ -1227,6 +1310,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
return r;
+
+out_unlock:
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ kvm_release_pfn_clean(pfn);
+ return 0;
}
@@ -1345,6 +1433,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
int r;
int largepage = 0;
gfn_t gfn = gpa >> PAGE_SHIFT;
+ unsigned long mmu_seq;
ASSERT(vcpu);
ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -1358,6 +1447,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
gfn &= ~(KVM_PAGES_PER_HPAGE-1);
largepage = 1;
}
+ mmu_seq = vcpu->kvm->mmu_notifier_seq;
+ /* implicit mb(), we'll read before PT lock is unlocked */
pfn = gfn_to_pfn(vcpu->kvm, gfn);
up_read(&current->mm->mmap_sem);
if (is_error_pfn(pfn)) {
@@ -1365,12 +1456,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
return 1;
}
spin_lock(&vcpu->kvm->mmu_lock);
+ if (mmu_notifier_retry(vcpu, mmu_seq))
+ goto out_unlock;
kvm_mmu_free_some_pages(vcpu);
r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
largepage, gfn, pfn, kvm_x86_ops->get_tdp_level());
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
+
+out_unlock:
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ kvm_release_pfn_clean(pfn);
+ return 0;
}
static void nonpaging_free(struct kvm_vcpu *vcpu)
@@ -1670,6 +1768,8 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
gfn &= ~(KVM_PAGES_PER_HPAGE-1);
vcpu->arch.update_pte.largepage = 1;
}
+ vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
+ /* implicit mb(), we'll read before PT lock is unlocked */
pfn = gfn_to_pfn(vcpu->kvm, gfn);
up_read(&current->mm->mmap_sem);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 4d918220bae..f72ac1fa35f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -263,6 +263,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
pfn = vcpu->arch.update_pte.pfn;
if (is_error_pfn(pfn))
return;
+ if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
+ return;
kvm_get_pfn(pfn);
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
@@ -380,6 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
int r;
pfn_t pfn;
int largepage = 0;
+ unsigned long mmu_seq;
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
kvm_mmu_audit(vcpu, "pre page fault");
@@ -413,6 +416,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
largepage = 1;
}
}
+ mmu_seq = vcpu->kvm->mmu_notifier_seq;
+ /* implicit mb(), we'll read before PT lock is unlocked */
pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
up_read(&current->mm->mmap_sem);
@@ -424,6 +429,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
}
spin_lock(&vcpu->kvm->mmu_lock);
+ if (mmu_notifier_retry(vcpu, mmu_seq))
+ goto out_unlock;
kvm_mmu_free_some_pages(vcpu);
shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
largepage, &write_pt, pfn);
@@ -439,6 +446,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
spin_unlock(&vcpu->kvm->mmu_lock);
return write_pt;
+
+out_unlock:
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ kvm_release_pfn_clean(pfn);
+ return 0;
}
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5916191420c..0d682fc6aeb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -883,6 +883,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PIT:
case KVM_CAP_NOP_IO_DELAY:
case KVM_CAP_MP_STATE:
+ case KVM_CAP_SYNC_MMU:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -1495,6 +1496,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
goto out;
down_write(&kvm->slots_lock);
+ spin_lock(&kvm->mmu_lock);
p = &kvm->arch.aliases[alias->slot];
p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -1506,6 +1508,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
break;
kvm->arch.naliases = n;
+ spin_unlock(&kvm->mmu_lock);
kvm_mmu_zap_all(kvm);
up_write(&kvm->slots_lock);
@@ -3972,16 +3975,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
*/
if (!user_alloc) {
if (npages && !old.rmap) {
+ unsigned long userspace_addr;
+
down_write(&current->mm->mmap_sem);
- memslot->userspace_addr = do_mmap(NULL, 0,
- npages * PAGE_SIZE,
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS,
- 0);
+ userspace_addr = do_mmap(NULL, 0,
+ npages * PAGE_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS,
+ 0);
up_write(&current->mm->mmap_sem);
- if (IS_ERR((void *)memslot->userspace_addr))
- return PTR_ERR((void *)memslot->userspace_addr);
+ if (IS_ERR((void *)userspace_addr))
+ return PTR_ERR((void *)userspace_addr);
+
+ /* set userspace_addr atomically for kvm_hva_to_rmapp */
+ spin_lock(&kvm->mmu_lock);
+ memslot->userspace_addr = userspace_addr;
+ spin_unlock(&kvm->mmu_lock);
} else {
if (!old.user_alloc && old.rmap) {
int ret;
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index dfdf428975c..f118c110af3 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -52,7 +52,7 @@
jnz 100b
102:
.section .fixup,"ax"
-103: addl %r8d,%edx /* ecx is zerorest also */
+103: addl %ecx,%edx /* ecx is zerorest also */
jmp copy_user_handle_tail
.previous
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index 40e0e309d27..cb0c112386f 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -32,7 +32,7 @@
jnz 100b
102:
.section .fixup,"ax"
-103: addl %r8d,%edx /* ecx is zerorest also */
+103: addl %ecx,%edx /* ecx is zerorest also */
jmp copy_user_handle_tail
.previous
@@ -108,7 +108,6 @@ ENTRY(__copy_user_nocache)
jmp 60f
50: movl %ecx,%edx
60: sfence
- movl %r8d,%ecx
jmp copy_user_handle_tail
.previous
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 2977ea37791..dfb932dcf13 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,7 +1,6 @@
obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o
+ pat.o pgtable.o gup.o
-obj-$(CONFIG_HAVE_GET_USER_PAGES_FAST) += gup.o
obj-$(CONFIG_X86_32) += pgtable_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 557b2abceef..d50302774fe 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -207,6 +207,9 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
unsigned long addr;
int i;
+ if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
+ return;
+
pud = pud_offset(pgd, 0);
for (addr = i = 0; i < PREALLOCATED_PMDS;