From a1676072558854b95336c8f7db76b0504e909a0a Mon Sep 17 00:00:00 2001 From: Tony Camuso Date: Thu, 15 May 2008 14:40:14 -0400 Subject: PCI: Correct last two HP entries in the bfsort whitelist Greetings. There is a code flaw in the bfsort whitelist, where there are redundant entries for the same two HP systems, DL385 G2 and DL585 G2. This patch replaces those redundant entries with the correct ones. The correct entries are for large-volume systems, the DL360 and DL380. ----------------------------------------------------------------------- commit ec69f0374c3b0ad7ea991b0e9ac00377acfe5b1a Author: Tony Camuso Date: Wed May 14 07:09:28 2008 -0400 Replace Redundant Whitelist Entries with the Correct Ones The ProLiant DL585 G2 and the DL585 G2 are entered reundantly in the dmi_system_id table. What should have been there are the DL360 and DL380. This patch simply replaces the redundant entries with the correct entries. arch/x86/pci/common.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) Signed-off-by: Tony Camuso Signed-off-by: Pat Schoeller Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/common.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 6e64aaf00d1..940185ecaed 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -328,18 +328,18 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { #endif { .callback = set_bf_sort, - .ident = "HP ProLiant DL385 G2", + .ident = "HP ProLiant DL360", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL385 G2"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL360"), }, }, { .callback = set_bf_sort, - .ident = "HP ProLiant DL585 G2", + .ident = "HP ProLiant DL380", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL380"), }, }, {} -- cgit v1.2.3 From 4efeb4dd3c0bf534e431a8e7c72d0afbd4cd24aa Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 May 2008 21:21:05 +0200 Subject: PCI: use dev_to_node in pci_call_probe to make sure get one online node. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- drivers/pci/pci-driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 72cf61ed8f9..e1637bd82b8 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -181,7 +181,7 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, any need to change it. */ struct mempolicy *oldpol; cpumask_t oldmask = current->cpus_allowed; - int node = pcibus_to_node(dev->bus); + int node = dev_to_node(&dev->dev); if (node >= 0) { node_to_cpumask_ptr(nodecpumask, node); -- cgit v1.2.3 From 75b19b790bec3ebffbf513405b27500e22270cbc Mon Sep 17 00:00:00 2001 From: Bertram Felgenhauer Date: Fri, 30 May 2008 03:20:05 +0200 Subject: pci, x86: add workaround for bug in ASUS A7V600 BIOS (rev 1005) This BIOS claims the VIA 8237 south bridge to be compatible with VIA 586, which it is not. Without this patch, I get the following warning while booting, among others, | PCI: Using IRQ router VIA [1106/3227] at 0000:00:11.0 | ------------[ cut here ]------------ | WARNING: at arch/x86/pci/irq.c:265 pirq_via586_get+0x4a/0x60() | Modules linked in: | Pid: 1, comm: swapper Not tainted 2.6.26-rc4-00015-g1ec7d99 #1 | [] warn_on_slowpath+0x54/0x70 | [] ? vt_console_print+0x210/0x2b0 | [] ? vt_console_print+0x0/0x2b0 | [] ? __call_console_drivers+0x43/0x60 | [] ? _call_console_drivers+0x52/0x80 | [] ? release_console_sem+0x1c9/0x200 | [] ? raw_pci_read+0x41/0x70 | [] ? pci_read+0x2f/0x40 | [] pirq_via586_get+0x4a/0x60 | [] ? pirq_via586_get+0x0/0x60 | [] pcibios_lookup_irq+0x15d/0x430 | [] pcibios_irq_init+0x17a/0x3e0 | [] ? kernel_init+0x0/0x250 | [] kernel_init+0x73/0x250 | [] ? pcibios_irq_init+0x0/0x3e0 | [] ? schedule_tail+0x10/0x40 | [] ? ret_from_fork+0x6/0x1c | [] ? kernel_init+0x0/0x250 | [] ? kernel_init+0x0/0x250 | [] kernel_thread_helper+0x7/0x1c | ======================= | ---[ end trace 4eaa2a86a8e2da22 ]--- and IRQ trouble later, | irq 10: nobody cared (try booting with the "irqpoll" option) Now that's an VIA 8237 chip, so pirq_via586_get shouldn't be called at all; adding this workaround to via_router_probe() fixes the problem for me. Amazingly I have a 2.6.23.8 kernel that somehow works fine ... I'll never understand why. Signed-off-by: Bertram Felgenhauer Acked-by: Alan Cox Signed-off-by: Ingo Molnar --- arch/x86/pci/irq.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 0908fca901b..ca8df9c260b 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -621,6 +621,13 @@ static __init int via_router_probe(struct irq_router *r, */ device = PCI_DEVICE_ID_VIA_8235; break; + case PCI_DEVICE_ID_VIA_8237: + /** + * Asus a7v600 bios wrongly reports 8237 + * as 586-compatible + */ + device = PCI_DEVICE_ID_VIA_8237; + break; } } -- cgit v1.2.3 From db9f600b96c16bb3c7f094e294fbdd370226ad86 Mon Sep 17 00:00:00 2001 From: Miquel van Smoorenburg Date: Wed, 28 May 2008 10:31:25 +0200 Subject: x86: pci-dma.c: use __GFP_NO_OOM instead of __GFP_NORETRY On Wed, 2008-05-28 at 04:47 +0200, Andi Kleen wrote: > > So... why not just remove the setting of __GFP_NORETRY? Why is it > > wrong to oom-kill things in this case? > > When the 16MB zone overflows (which can be common in some workloads) > calling the OOM killer is pretty useless because it has barely any > real user data [only exception would be the "only 16MB" case Alan > mentioned]. Killing random processes in this case is bad. > > I think for 16MB __GFP_NORETRY is ok because there should be > nothing freeable in there so looping is useless. Only exception would be the > "only 16MB total" case again but I'm not sure 2.6 supports that at all > on x86. > > On the other hand d_a_c() does more allocations than just 16MB, especially > on 64bit and the other zones need different strategies. Okay, so how about this then ? Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index c5ef1af8e79..069e843f0b9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -397,9 +397,6 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (dev->dma_mask == NULL) return NULL; - /* Don't invoke OOM killer */ - gfp |= __GFP_NORETRY; - #ifdef CONFIG_X86_64 /* Why <=? Even when the mask is smaller than 4GB it is often larger than 16MB and in this case we have a chance of @@ -410,7 +407,9 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, #endif again: - page = dma_alloc_pages(dev, gfp, get_order(size)); + /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ + page = dma_alloc_pages(dev, + (gfp & GFP_DMA) ? gfp | __GFP_NORETRY : gfp, get_order(size)); if (page == NULL) return NULL; -- cgit v1.2.3 From f529626a86d61897862aa1bbbb4537773209238e Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 29 May 2008 00:30:21 -0700 Subject: suspend-vs-iommu: prevent suspend if we could not resume iommu/gart support misses suspend/resume code, which can do bad stuff, including memory corruption on resume. Prevent system suspend in case we would be unable to resume. Signed-off-by: Pavel Machek Tested-by: Patrick Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c07455d1695..aa8ec928caa 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -548,6 +549,28 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) return aper_base; } +static int gart_resume(struct sys_device *dev) +{ + return 0; +} + +static int gart_suspend(struct sys_device *dev, pm_message_t state) +{ + return -EINVAL; +} + +static struct sysdev_class gart_sysdev_class = { + .name = "gart", + .suspend = gart_suspend, + .resume = gart_resume, + +}; + +static struct sys_device device_gart = { + .id = 0, + .cls = &gart_sysdev_class, +}; + /* * Private Northbridge GATT initialization in case we cannot use the * AGP driver for some reason. @@ -558,7 +581,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) unsigned aper_base, new_aper_base; struct pci_dev *dev; void *gatt; - int i; + int i, error; printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); aper_size = aper_base = info->aper_size = 0; @@ -606,6 +629,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info) pci_write_config_dword(dev, 0x90, ctl); } + + error = sysdev_class_register(&gart_sysdev_class); + if (!error) + error = sysdev_register(&device_gart); + if (error) + panic("Could not register gart_sysdev -- would corrupt data on next suspend"); flush_gart(); printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", -- cgit v1.2.3 From b7f09ae583c49d28b2796d2fa5893dcf822e3a10 Mon Sep 17 00:00:00 2001 From: Miquel van Smoorenburg Date: Thu, 5 Jun 2008 18:14:44 +0200 Subject: x86, pci-dma.c: don't always add __GFP_NORETRY to gfp Currently arch/x86/kernel/pci-dma.c always adds __GFP_NORETRY to the allocation flags, because it wants to be reasonably sure not to deadlock when calling alloc_pages(). But really that should only be done in two cases: - when allocating memory in the lower 16 MB DMA zone. If there's no free memory there, waiting or OOM killing is of no use - when optimistically trying an allocation in the DMA32 zone when dma_mask < DMA_32BIT_MASK hoping that the allocation happens to fall within the limits of the dma_mask Also blindly adding __GFP_NORETRY to the the gfp variable might not be a good idea since we then also use it when calling dma_ops->alloc_coherent(). Clearing it might also not be a good idea, dma_alloc_coherent()'s caller might have set it on purpose. The gfp variable should not be clobbered. [ mingo@elte.hu: converted to delta patch ontop of previous version. ] Signed-off-by: Miquel van Smoorenburg Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 069e843f0b9..dc00a1331ac 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -378,6 +378,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, struct page *page; unsigned long dma_mask = 0; dma_addr_t bus; + int noretry = 0; /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); @@ -397,19 +398,25 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (dev->dma_mask == NULL) return NULL; + /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ + if (gfp & __GFP_DMA) + noretry = 1; + #ifdef CONFIG_X86_64 /* Why <=? Even when the mask is smaller than 4GB it is often larger than 16MB and in this case we have a chance of finding fitting memory in the next higher zone first. If not retry with true GFP_DMA. -AK */ - if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) + if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { gfp |= GFP_DMA32; + if (dma_mask < DMA_32BIT_MASK) + noretry = 1; + } #endif again: - /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ page = dma_alloc_pages(dev, - (gfp & GFP_DMA) ? gfp | __GFP_NORETRY : gfp, get_order(size)); + noretry ? gfp | __GFP_NORETRY : gfp, get_order(size)); if (page == NULL) return NULL; -- cgit v1.2.3 From 45aec1ae72fc592f231e9e73ed9ed4d10cfbc0b5 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:22 -0700 Subject: x86: PAT export resource_wc in pci sysfs For the ranges with IORESOURCE_PREFETCH, export a new resource_wc interface in pci /sysfs along with resource (which is uncached). Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Acked-by: Jesse Barnes Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/filesystems/sysfs-pci.txt | 1 + drivers/pci/pci-sysfs.c | 84 ++++++++++++++++++++++++--------- include/linux/pci.h | 1 + 3 files changed, 64 insertions(+), 22 deletions(-) diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt index 5daa2aaec2c..68ef48839c0 100644 --- a/Documentation/filesystems/sysfs-pci.txt +++ b/Documentation/filesystems/sysfs-pci.txt @@ -36,6 +36,7 @@ files, each with their own function. local_cpus nearby CPU mask (cpumask, ro) resource PCI resource host addresses (ascii, ro) resource0..N PCI resource N, if present (binary, mmap) + resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap) rom PCI ROM resource, if present (binary, ro) subsystem_device PCI subsystem device (ascii, ro) subsystem_vendor PCI subsystem vendor (ascii, ro) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 271d41cc05a..9ec7d3977a8 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -489,13 +489,14 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr, * @kobj: kobject for mapping * @attr: struct bin_attribute for the file being mapped * @vma: struct vm_area_struct passed into the mmap + * @write_combine: 1 for write_combine mapping * * Use the regular PCI mapping routines to map a PCI resource into userspace. * FIXME: write combining? maybe automatic for prefetchable regions? */ static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, - struct vm_area_struct *vma) + struct vm_area_struct *vma, int write_combine) { struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj)); @@ -518,7 +519,21 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, vma->vm_pgoff += start >> PAGE_SHIFT; mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io; - return pci_mmap_page_range(pdev, vma, mmap_type, 0); + return pci_mmap_page_range(pdev, vma, mmap_type, write_combine); +} + +static int +pci_mmap_resource_uc(struct kobject *kobj, struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + return pci_mmap_resource(kobj, attr, vma, 0); +} + +static int +pci_mmap_resource_wc(struct kobject *kobj, struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + return pci_mmap_resource(kobj, attr, vma, 1); } /** @@ -541,9 +556,46 @@ pci_remove_resource_files(struct pci_dev *pdev) sysfs_remove_bin_file(&pdev->dev.kobj, res_attr); kfree(res_attr); } + + res_attr = pdev->res_attr_wc[i]; + if (res_attr) { + sysfs_remove_bin_file(&pdev->dev.kobj, res_attr); + kfree(res_attr); + } } } +static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine) +{ + /* allocate attribute structure, piggyback attribute name */ + int name_len = write_combine ? 13 : 10; + struct bin_attribute *res_attr; + int retval; + + res_attr = kzalloc(sizeof(*res_attr) + name_len, GFP_ATOMIC); + if (res_attr) { + char *res_attr_name = (char *)(res_attr + 1); + + if (write_combine) { + pdev->res_attr_wc[num] = res_attr; + sprintf(res_attr_name, "resource%d_wc", num); + res_attr->mmap = pci_mmap_resource_wc; + } else { + pdev->res_attr[num] = res_attr; + sprintf(res_attr_name, "resource%d", num); + res_attr->mmap = pci_mmap_resource_uc; + } + res_attr->attr.name = res_attr_name; + res_attr->attr.mode = S_IRUSR | S_IWUSR; + res_attr->size = pci_resource_len(pdev, num); + res_attr->private = &pdev->resource[num]; + retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr); + } else + retval = -ENOMEM; + + return retval; +} + /** * pci_create_resource_files - create resource files in sysfs for @dev * @dev: dev in question @@ -557,31 +609,19 @@ static int pci_create_resource_files(struct pci_dev *pdev) /* Expose the PCI resources from this device as files */ for (i = 0; i < PCI_ROM_RESOURCE; i++) { - struct bin_attribute *res_attr; /* skip empty resources */ if (!pci_resource_len(pdev, i)) continue; - /* allocate attribute structure, piggyback attribute name */ - res_attr = kzalloc(sizeof(*res_attr) + 10, GFP_ATOMIC); - if (res_attr) { - char *res_attr_name = (char *)(res_attr + 1); - - pdev->res_attr[i] = res_attr; - sprintf(res_attr_name, "resource%d", i); - res_attr->attr.name = res_attr_name; - res_attr->attr.mode = S_IRUSR | S_IWUSR; - res_attr->size = pci_resource_len(pdev, i); - res_attr->mmap = pci_mmap_resource; - res_attr->private = &pdev->resource[i]; - retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr); - if (retval) { - pci_remove_resource_files(pdev); - return retval; - } - } else { - return -ENOMEM; + retval = pci_create_attr(pdev, i, 0); + /* for prefetchable resources, create a WC mappable file */ + if (!retval && pdev->resource[i].flags & IORESOURCE_PREFETCH) + retval = pci_create_attr(pdev, i, 1); + + if (retval) { + pci_remove_resource_files(pdev); + return retval; } } return 0; diff --git a/include/linux/pci.h b/include/linux/pci.h index 509159bcd4e..d18b1dd49fa 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -206,6 +206,7 @@ struct pci_dev { struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */ int rom_attr_enabled; /* has display of the rom attribute been enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ + struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */ #ifdef CONFIG_PCI_MSI struct list_head msi_list; #endif -- cgit v1.2.3 From 81d5575a48f49f494289a1299a32e4e5e41fbf40 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 12 Jun 2008 13:51:46 -0700 Subject: PCI: fixup write combine comment in pci_mmap_resource Now that we can actually do write combining properly, there's no need to have the FIXME. Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 9ec7d3977a8..6f3c7446c32 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -492,7 +492,6 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr, * @write_combine: 1 for write_combine mapping * * Use the regular PCI mapping routines to map a PCI resource into userspace. - * FIXME: write combining? maybe automatic for prefetchable regions? */ static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, -- cgit v1.2.3