From 4c923d4714821cf32ff115bb9c91867dff711972 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 2 Oct 2009 11:01:24 -0700 Subject: iommu: Allocate dma-remapping structures using numa locality info Allocate dma-remapping structures using numa locality info. On platforms having remapping hardware units span different nodes, this enables optimized dma-remapping transalation structures access by remapping hardware. Signed-off-by: Suresh Siddha Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index b1e97e68250..c8311364622 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -277,6 +277,7 @@ static int hw_pass_through = 1; struct dmar_domain { int id; /* domain id */ + int nid; /* node id */ unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ struct list_head devices; /* all devices' list */ @@ -400,15 +401,18 @@ static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep) } -static inline void *alloc_pgtable_page(void) +static inline void *alloc_pgtable_page(int node) { unsigned int flags; - void *vaddr; + struct page *page; + void *vaddr = NULL; /* trying to avoid low memory issues */ flags = current->flags & PF_MEMALLOC; current->flags |= PF_MEMALLOC; - vaddr = (void *)get_zeroed_page(GFP_ATOMIC); + page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); + if (page) + vaddr = page_address(page); current->flags &= (~PF_MEMALLOC | flags); return vaddr; } @@ -589,7 +593,8 @@ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, root = &iommu->root_entry[bus]; context = get_context_addr_from_root(root); if (!context) { - context = (struct context_entry *)alloc_pgtable_page(); + context = (struct context_entry *) + alloc_pgtable_page(iommu->node); if (!context) { spin_unlock_irqrestore(&iommu->lock, flags); return NULL; @@ -732,7 +737,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(); + tmp_page = alloc_pgtable_page(domain->nid); if (!tmp_page) return NULL; @@ -868,7 +873,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) struct root_entry *root; unsigned long flags; - root = (struct root_entry *)alloc_pgtable_page(); + root = (struct root_entry *)alloc_pgtable_page(iommu->node); if (!root) return -ENOMEM; @@ -1263,6 +1268,7 @@ static struct dmar_domain *alloc_domain(void) if (!domain) return NULL; + domain->nid = -1; memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); domain->flags = 0; @@ -1420,9 +1426,10 @@ static int domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_snooping = 0; domain->iommu_count = 1; + domain->nid = iommu->node; /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(); + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); if (!domain->pgd) return -ENOMEM; __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); @@ -1577,6 +1584,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, spin_lock_irqsave(&domain->iommu_lock, flags); if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) { domain->iommu_count++; + if (domain->iommu_count == 1) + domain->nid = iommu->node; domain_update_iommu_cap(domain); } spin_unlock_irqrestore(&domain->iommu_lock, flags); @@ -3416,6 +3425,7 @@ static struct dmar_domain *iommu_alloc_vm_domain(void) return NULL; domain->id = vm_domid++; + domain->nid = -1; memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; @@ -3442,9 +3452,10 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_coherency = 0; domain->iommu_snooping = 0; domain->max_addr = 0; + domain->nid = -1; /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(); + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); -- cgit v1.2.3 From 5595b528b49a702c0428c0762bab60999648254c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 2 Dec 2009 09:21:55 +0000 Subject: intel-iommu: Check for an RMRR which ends before it starts. Some HP BIOSes report an RMRR region (a region which needs a 1:1 mapping in the IOMMU for a given device) which has an end address lower than its start address. Detect that and warn, rather than triggering the BUG() in dma_pte_clear_range(). Cc: stable@kernel.org Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index cb5cae3e020..83cabdc118d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2000,6 +2000,16 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", pci_name(pdev), start, end); + if (end < start) { + WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + ret = -EIO; + goto error; + } + if (end >> agaw_to_width(domain->agaw)) { WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", -- cgit v1.2.3 From 44cd613c0e4cd93079ea2a93aa06649d8ca0830a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 2 Dec 2009 10:18:30 +0000 Subject: intel-iommu: Fix oops with intel_iommu=igfx_off The hotplug notifier will call find_domain() to see if the device in question has been assigned an IOMMU domain. However, this should never be called for devices with a "dummy" domain, such as graphics devices when intel_iommu=igfx_off is set and the corresponding IOMMU isn't even initialised. If you do that, it'll oops as it dereferences the (-1) pointer. The notifier function should check iommu_no_mapping() for the device before doing anything else. Cc: stable@kernel.org Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 83cabdc118d..e3e84cac0e9 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3247,6 +3247,9 @@ static int device_notifier(struct notifier_block *nb, struct pci_dev *pdev = to_pci_dev(dev); struct dmar_domain *domain; + if (iommu_no_mapping(dev)) + return 0; + domain = find_domain(pdev); if (!domain) return 0; -- cgit v1.2.3 From 1672af1164d3d50ba8908014fd34cc0b58afdc1e Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 2 Dec 2009 12:06:34 -0800 Subject: intel-iommu: ignore page table validation in pass through mode We are seeing a bug when booting w/ iommu=pt with current upstream (bisect blames 19943b0e30b05d42e494ae6fef78156ebc8c637e "intel-iommu: Unify hardware and software passthrough support). The issue is specific to this loop during identity map initialization of each device: domain_context_mapping_one(si_domain, ..., CONTEXT_TT_PASS_THROUGH) ... /* Skip top levels of page tables for * iommu which has less agaw than default. */ for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { pgd = phys_to_virt(dma_pte_addr(pgd)); if (!dma_pte_present(pgd)) { <------ failing here spin_unlock_irqrestore(&iommu->lock, flags); return -ENOMEM; } This box has 2 iommu's in it. The catchall iommu has MGAW == 48, and SAGAW == 4. The other iommu has MGAW == 39, SAGAW == 2. The device that's failing the above pgd test is the only device connected to the non-catchall iommu, which has a smaller address width than the domain default. This test is not necessary since the context is in PT mode and the ASR is ignored. Thanks to Don Dutile for discovering and debugging this one. Cc: stable@kernel.org Signed-off-by: Chris Wright Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index e3e84cac0e9..46607953533 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1530,12 +1530,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, /* Skip top levels of page tables for * iommu which has less agaw than default. + * Unnecessary for PT mode. */ - for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) { - spin_unlock_irqrestore(&iommu->lock, flags); - return -ENOMEM; + if (translation != CONTEXT_TT_PASS_THROUGH) { + for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) { + spin_unlock_irqrestore(&iommu->lock, flags); + return -ENOMEM; + } } } } -- cgit v1.2.3 From 354bb65e6e0df0aaae0e5b1ea33948d8e0b61418 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 17 Nov 2009 16:21:09 +0900 Subject: Revert "Intel IOMMU: Avoid memory allocation failures in dma map api calls" commit eb3fa7cb51 said Intel IOMMU Intel IOMMU driver needs memory during DMA map calls to setup its internal page tables and for other data structures. As we all know that these DMA map calls are mostly called in the interrupt context or with the spinlock held by the upper level drivers(network/storage drivers), so in order to avoid any memory allocation failure due to low memory issues, this patch makes memory allocation by temporarily setting PF_MEMALLOC flags for the current task before making memory allocation calls. We evaluated mempools as a backup when kmem_cache_alloc() fails and found that mempools are really not useful here because 1) We don't know for sure how much to reserve in advance 2) And mempools are not useful for GFP_ATOMIC case (as we call memory alloc functions with GFP_ATOMIC) (akpm: point 2 is wrong...) The above description doesn't justify to waste system emergency memory at all. Non MM subsystem must not use PF_MEMALLOC. Memory reclaim need few memory, anyone must not prevent it. Otherwise the system cause mysterious hang-up and/or OOM Killer invokation. Plus, akpm already pointed out what we should do. Then, this patch revert it. Cc: Keshavamurthy Anil S Signed-off-by: KOSAKI Motohiro Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 46607953533..4e1dd40f18e 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -387,33 +387,14 @@ static struct kmem_cache *iommu_domain_cache; static struct kmem_cache *iommu_devinfo_cache; static struct kmem_cache *iommu_iova_cache; -static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep) -{ - unsigned int flags; - void *vaddr; - - /* trying to avoid low memory issues */ - flags = current->flags & PF_MEMALLOC; - current->flags |= PF_MEMALLOC; - vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC); - current->flags &= (~PF_MEMALLOC | flags); - return vaddr; -} - - static inline void *alloc_pgtable_page(int node) { - unsigned int flags; struct page *page; void *vaddr = NULL; - /* trying to avoid low memory issues */ - flags = current->flags & PF_MEMALLOC; - current->flags |= PF_MEMALLOC; page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); if (page) vaddr = page_address(page); - current->flags &= (~PF_MEMALLOC | flags); return vaddr; } @@ -424,7 +405,7 @@ static inline void free_pgtable_page(void *vaddr) static inline void *alloc_domain_mem(void) { - return iommu_kmem_cache_alloc(iommu_domain_cache); + return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC); } static void free_domain_mem(void *vaddr) @@ -434,7 +415,7 @@ static void free_domain_mem(void *vaddr) static inline void * alloc_devinfo_mem(void) { - return iommu_kmem_cache_alloc(iommu_devinfo_cache); + return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC); } static inline void free_devinfo_mem(void *vaddr) @@ -444,7 +425,7 @@ static inline void free_devinfo_mem(void *vaddr) struct iova *alloc_iova_mem(void) { - return iommu_kmem_cache_alloc(iommu_iova_cache); + return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC); } void free_iova_mem(struct iova *iova) -- cgit v1.2.3