From 1de5b0854623d30d01d72cd4ea323eb5f39d1f16 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 2 Nov 2008 16:04:18 +0000 Subject: x86: HPET: convert WARN_ON to WARN_ON_ONCE It is possible to flood the console with call traces if the WARN_ON condition is true because of the frequency with which this function is called. Signed-off-by: Matt Fleming Cc: mingo@elte.hu Cc: venkatesh.pallipadi@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 77017e834cf..f10f9461a43 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -322,7 +322,7 @@ static int hpet_next_event(unsigned long delta, * what we wrote hit the chip before we compare it to the * counter. */ - WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt); + WARN_ON_ONCE((u32)hpet_readl(HPET_T0_CMP) != cnt); return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } -- cgit v1.2.3 From 89d77a1eb60be916d85d9394bedbfa2037af89c5 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 2 Nov 2008 16:04:20 +0000 Subject: x86: HPET: read from HPET_Tn_CMP() not HPET_T0_CMP In hpet_next_event() we check that the value we just wrote to HPET_Tn_CMP(timer) has reached the chip. Currently, we're checking that the value we wrote to HPET_Tn_CMP(timer) is in HPET_T0_CMP, which, if timer is anything other than timer 0, is likely to fail. Signed-off-by: Matt Fleming Cc: mingo@elte.hu Cc: venkatesh.pallipadi@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index f10f9461a43..cfe6aa56f71 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -322,7 +322,7 @@ static int hpet_next_event(unsigned long delta, * what we wrote hit the chip before we compare it to the * counter. */ - WARN_ON_ONCE((u32)hpet_readl(HPET_T0_CMP) != cnt); + WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt); return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } -- cgit v1.2.3 From 5ceb1a04187553e08c6ab60d30cee7c454ee139a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 2 Nov 2008 22:23:13 +0000 Subject: x86: HPET: enter hpet_interrupt_handler with interrupts disabled Some functions that may be called from this handler require that interrupts are disabled. Also, combining IRQF_DISABLED and IRQF_SHARED does not reliably disable interrupts in a handler, so remove IRQF_SHARED from the irq flags (this irq is not shared anyway). Signed-off-by: Matt Fleming Cc: mingo@elte.hu Cc: venkatesh.pallipadi@intel.com Cc: "Will Newton" Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index cfe6aa56f71..067d8de913f 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -445,7 +445,7 @@ static int hpet_setup_irq(struct hpet_dev *dev) { if (request_irq(dev->irq, hpet_interrupt_handler, - IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev)) + IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev)) return -1; disable_irq(dev->irq); -- cgit v1.2.3 From 4694516d1987303dd83bfd0efdd36fa5b65d701b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 10 Nov 2008 21:52:47 +0100 Subject: x86: Make NUMA on 32-bit depend on BROKEN While investigating the failure of hibernation on 32-bit x86 with CONFIG_NUMA set, as described in this message http://marc.info/?l=linux-kernel&m=122634118116226&w=4 I asked some people for help and I was told that it wasn't really worth the effort, because CONFIG_NUMA was generally broken on 32-bit x86 systems and it shouldn't be used in such configs. For this reason, make CONFIG_NUMA depend on BROKEN instead of EXPERIMENTAL on x86-32. Signed-off-by: Rafael J. Wysocki Cc: Andi Kleen Cc: Pavel Machek Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4cf0ab13d18..93224b56918 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -957,7 +957,7 @@ config ARCH_PHYS_ADDR_T_64BIT config NUMA bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" depends on SMP - depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) + depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && BROKEN) default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) help -- cgit v1.2.3 From c41ef344de212bd918f7765af21b5008628c03e0 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 28 Oct 2008 18:16:58 -0200 Subject: KVM: MMU: increase per-vcpu rmap cache alloc size The page fault path can use two rmap_desc structures, if: - walk_addr's dirty pte update allocates one rmap_desc. - mmu_lock is dropped, sptes are zapped resulting in rmap_desc being freed. - fetch->mmu_set_spte allocates another rmap_desc. Increase to 4 for safety. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2a5e64881d9..f1983d9477c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -314,7 +314,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) if (r) goto out; r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, - rmap_desc_cache, 1); + rmap_desc_cache, 4); if (r) goto out; r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); -- cgit v1.2.3 From a29a2af378f3f6362b68e126e2541c8bde885ead Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Wed, 29 Oct 2008 14:13:39 -0700 Subject: x86: KVM guest: fix section mismatch warning in kvmclock.c WARNING: arch/x86/kernel/built-in.o(.text+0x1722c): Section mismatch in reference from the function kvm_setup_secondary_clock() to the function .devinit.text:setup_secondary_APIC_clock() The function kvm_setup_secondary_clock() references the function __devinit setup_secondary_APIC_clock(). This is often because kvm_setup_secondary_clock lacks a __devinit annotation or the annotation of setup_secondary_APIC_clock is wrong. Signed-off-by: Md.Rakib H. Mullick Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Avi Kivity --- arch/x86/kernel/kvmclock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 774ac499156..1c9cc431ea4 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt) } #ifdef CONFIG_X86_LOCAL_APIC -static void kvm_setup_secondary_clock(void) +static void __devinit kvm_setup_secondary_clock(void) { /* * Now that the first cpu already had this clocksource initialized, -- cgit v1.2.3 From ca93e992fdfdc6569ac2845d7560eeb5de4a4e0b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 4 Nov 2008 11:25:17 +0200 Subject: KVM: Require the PCI subsystem PCI device assignment makes calls to pci code, so require it to be built into the kernel. Signed-off-by: Avi Kivity --- arch/x86/kvm/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index ce3251ce550..b81125f0bde 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -20,6 +20,8 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM + # for device assignment: + depends on PCI select PREEMPT_NOTIFIERS select MMU_NOTIFIER select ANON_INODES -- cgit v1.2.3 From 928d4bf747e9c290b690ff515d8f81e8ee226d97 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 6 Nov 2008 14:55:45 +0800 Subject: KVM: VMX: Set IGMT bit in EPT entry There is a potential issue that, when guest using pagetable without vmexit when EPT enabled, guest would use PAT/PCD/PWT bits to index PAT msr for it's memory, which would be inconsistent with host side and would cause host MCE due to inconsistent cache attribute. The patch set IGMT bit in EPT entry to ignore guest PAT and use WB as default memory type to protect host (notice that all memory mapped by KVM should be WB). Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 3 ++- arch/x86/kvm/vmx.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2643b430d83..d06b4dc0e2e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3564,7 +3564,8 @@ static int __init vmx_init(void) bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | VMX_EPT_WRITABLE_MASK | - VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT | + VMX_EPT_IGMT_BIT); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK); kvm_enable_tdp(); diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 3e010d21fdd..ec5edc339da 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -352,6 +352,7 @@ enum vmcs_field { #define VMX_EPT_READABLE_MASK 0x1ull #define VMX_EPT_WRITABLE_MASK 0x2ull #define VMX_EPT_EXECUTABLE_MASK 0x4ull +#define VMX_EPT_IGMT_BIT (1ull << 6) #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul -- cgit v1.2.3 From e17d1dc0863767bab8fde4ba9be92c7f79e7fe50 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 11 Nov 2008 13:09:36 +0200 Subject: KVM: Fix pit memory leak if unable to allocate irq source id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-By: Daniel Marjamäki Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 8772dc94682..59ebd37ad79 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -548,8 +548,10 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) mutex_lock(&kvm->lock); pit->irq_source_id = kvm_request_irq_source_id(kvm); mutex_unlock(&kvm->lock); - if (pit->irq_source_id < 0) + if (pit->irq_source_id < 0) { + kfree(pit); return NULL; + } mutex_init(&pit->pit_state.lock); mutex_lock(&pit->pit_state.lock); -- cgit v1.2.3 From 32836259ff25ce97010569706cd33ba94de81d62 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 5 Nov 2008 16:17:52 -0700 Subject: ACPI: pci_link: remove acpi_irq_balance_set() interface This removes the acpi_irq_balance_set() interface from the PCI interrupt link driver. x86 used acpi_irq_balance_set() to tell the PCI interrupt link driver to configure links to minimize IRQ sharing. But the link driver can easily figure out whether to turn on IRQ balancing based on the IRQ model (PIC/IOAPIC/etc), so we can get rid of that external interface. It's better for the driver to figure this out at init-time. If we set it externally via the x86 code, the interface reduces modularity, and we depend on the fact that acpi_process_madt() happens before we process the kernel command line. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- arch/x86/include/asm/acpi.h | 1 - arch/x86/kernel/acpi/boot.c | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 8d676d8ecde..9830681446a 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -113,7 +113,6 @@ static inline void acpi_disable_pci(void) acpi_pci_disabled = 1; acpi_noirq_set(); } -extern int acpi_irq_balance_set(char *str); /* routines for saving/restoring kernel state */ extern int acpi_save_state_mem(void); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8c1f76abae9..4c51a2f8fd3 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1343,7 +1343,6 @@ static void __init acpi_process_madt(void) error = acpi_parse_madt_ioapic_entries(); if (!error) { acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; - acpi_irq_balance_set(NULL); acpi_ioapic = 1; smp_found_config = 1; -- cgit v1.2.3 From 52168e60f7d86d83124903098ac8c2dba93cd1c4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 14 Nov 2008 13:47:31 +0000 Subject: Revert "x86: blacklist DMAR on Intel G31/G33 chipsets" This reverts commit e51af6630848406fc97adbd71443818cdcda297b, which was wrongly hoovered up and submitted about a month after a better fix had already been merged. The better fix is commit cbda1ba898647aeb4ee770b803c922f595e97731 ("PCI/iommu: blacklist DMAR on Intel G31/G33 chipsets"), where we do this blacklisting based on the DMI identification for the offending motherboard, since sometimes this chipset (or at least a chipset with the same PCI ID) apparently _does_ actually have an IOMMU. Signed-off-by: David Woodhouse Signed-off-by: Linus Torvalds --- arch/x86/include/asm/iommu.h | 1 - arch/x86/kernel/early-quirks.c | 18 ------------------ 2 files changed, 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index e4a552d4446..0b500c5b644 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -6,7 +6,6 @@ extern void no_iommu_init(void); extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; -extern int dmar_disabled; extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 3ce029ffaa5..1b894b72c0f 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -188,20 +188,6 @@ static void __init ati_bugs_contd(int num, int slot, int func) } #endif -#ifdef CONFIG_DMAR -static void __init intel_g33_dmar(int num, int slot, int func) -{ - struct acpi_table_header *dmar_tbl; - acpi_status status; - - status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); - if (ACPI_SUCCESS(status)) { - printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n"); - dmar_disabled = 1; - } -} -#endif - #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -225,10 +211,6 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, -#ifdef CONFIG_DMAR - { PCI_VENDOR_ID_INTEL, 0x29c0, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, -#endif {} }; -- cgit v1.2.3 From e5e1f606ecbf67e52ebe2df5d14f8b94ec6544d0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Nov 2008 15:07:17 +0100 Subject: AMD IOMMU: add parameter to disable device isolation Impact: add a new AMD IOMMU kernel command line parameter Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 0cdcda35a05..838a2e1d5bb 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1213,6 +1213,8 @@ static int __init parse_amd_iommu_options(char *str) for (; *str; ++str) { if (strncmp(str, "isolate", 7) == 0) amd_iommu_isolate = 1; + if (strncmp(str, "share", 5) == 0) + amd_iommu_isolate = 0; if (strncmp(str, "fullflush", 11) == 0) amd_iommu_unmap_flush = true; } -- cgit v1.2.3 From 3ce1f93c6d53c3f91c3846cf66b018276c8ac2e7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Nov 2008 15:09:20 +0100 Subject: AMD IOMMU: enable device isolation per default Impact: makes device isolation the default for AMD IOMMU Some device drivers showed double-free bugs of DMA memory while testing them with AMD IOMMU. If all devices share the same protection domain this can lead to data corruption and data loss. Prevent this by putting each device into its own protection domain per default. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 838a2e1d5bb..595edd2befc 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -121,7 +121,7 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ -int amd_iommu_isolate; /* if 1, device isolation is enabled */ +int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the -- cgit v1.2.3 From 695b5676c727d80921a2dc8737d5b3322222db85 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Nov 2008 15:16:43 +0100 Subject: AMD IOMMU: fix fullflush comparison length Impact: fix comparison length for 'fullflush' Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 595edd2befc..30ae2701b3d 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1215,7 +1215,7 @@ static int __init parse_amd_iommu_options(char *str) amd_iommu_isolate = 1; if (strncmp(str, "share", 5) == 0) amd_iommu_isolate = 0; - if (strncmp(str, "fullflush", 11) == 0) + if (strncmp(str, "fullflush", 9) == 0) amd_iommu_unmap_flush = true; } -- cgit v1.2.3 From 8501c45cc32c311ae755a2d5ac8c4a5f04908d42 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Nov 2008 19:11:46 +0100 Subject: AMD IOMMU: check for next_bit also in unmapped area Impact: fix possible use of stale IO/TLB entries Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 331b318304e..e4899e0e878 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -537,7 +537,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, address >>= PAGE_SHIFT; iommu_area_free(dom->bitmap, address, pages); - if (address + pages >= dom->next_bit) + if (address >= dom->next_bit) dom->need_flush = true; } -- cgit v1.2.3