From 7e25a2422987a37729706b18583d177966919d2a Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Thu, 25 Jun 2009 18:52:05 -0700 Subject: intel-iommu: fix Identity Mapping to be arch independent Drop the e820 scanning and use existing function for finding valid RAM regions to add to 1:1 mapping. Signed-off-by: Chris Wright Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index e53eacd75c8..420afa88728 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -39,7 +39,6 @@ #include #include #include -#include #include "pci.h" #define ROOT_SIZE VTD_PAGE_SIZE @@ -1908,7 +1907,6 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, rmrr->end_address + 1); } -#ifdef CONFIG_DMAR_GFX_WA struct iommu_prepare_data { struct pci_dev *pdev; int ret; @@ -1943,6 +1941,7 @@ static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev) return data.ret; } +#ifdef CONFIG_DMAR_GFX_WA static void __init iommu_prepare_gfx_mapping(void) { struct pci_dev *pdev = NULL; @@ -2081,7 +2080,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, static int iommu_prepare_static_identity_mapping(void) { - int i; struct pci_dev *pdev = NULL; int ret; @@ -2091,17 +2089,10 @@ static int iommu_prepare_static_identity_mapping(void) printk(KERN_INFO "IOMMU: Setting identity map:\n"); for_each_pci_dev(pdev) { - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (ei->type == E820_RAM) { - ret = iommu_prepare_identity_map(pdev, - ei->addr, ei->addr + ei->size); - if (ret) { - printk(KERN_INFO "1:1 mapping to one domain failed.\n"); - return -EFAULT; - } - } + ret = iommu_prepare_with_active_regions(pdev); + if (ret) { + printk(KERN_INFO "1:1 mapping to one domain failed.\n"); + return -EFAULT; } ret = domain_add_dev_info(si_domain, pdev); if (ret) -- cgit v1.2.3 From bf92df30df909710c498d05620e2df1be1ef779b Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 29 Jun 2009 11:31:45 +0800 Subject: intel-iommu: Only avoid flushing device IOTLB for domain ID 0 in caching mode In caching mode, domain ID 0 is reserved for non-present to present mapping flush. Device IOTLB doesn't need to be flushed in this case. Previously we were avoiding the flush for domain zero, even if the IOMMU wasn't in caching mode and domain zero wasn't special. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 420afa88728..3cad7006ed8 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1054,7 +1054,12 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, else iommu->flush.flush_iotlb(iommu, did, addr, mask, DMA_TLB_PSI_FLUSH); - if (did) + + /* + * In caching mode, domain ID 0 is reserved for non-present to present + * mapping flush. Device IOTLB doesn't need to be flushed in this case. + */ + if (!cap_caching_mode(iommu->cap) || did) iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); } -- cgit v1.2.3 From b213203e475212a69ad6fedfb73464087e317148 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 26 Jun 2009 18:50:28 +0100 Subject: intel-iommu: Create new iommu_domain_identity_map() function We'll want to do this to a _domain_ (the si_domain) rather than a PCI device. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 62 +++++++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 26 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 3cad7006ed8..3a4f347e2f8 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1849,25 +1849,12 @@ error: static int iommu_identity_mapping; -static int iommu_prepare_identity_map(struct pci_dev *pdev, - unsigned long long start, - unsigned long long end) +static int iommu_domain_identity_map(struct dmar_domain *domain, + unsigned long long start, + unsigned long long end) { - struct dmar_domain *domain; unsigned long size; unsigned long long base; - int ret; - - printk(KERN_INFO - "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", - pci_name(pdev), start, end); - if (iommu_identity_mapping) - domain = si_domain; - else - /* page table init */ - domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); - if (!domain) - return -ENOMEM; /* The address might not be aligned */ base = start & PAGE_MASK; @@ -1876,31 +1863,54 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, if (!reserve_iova(&domain->iovad, IOVA_PFN(base), IOVA_PFN(base + size) - 1)) { printk(KERN_ERR "IOMMU: reserve iova failed\n"); - ret = -ENOMEM; - goto error; + return -ENOMEM; } - pr_debug("Mapping reserved region %lx@%llx for %s\n", - size, base, pci_name(pdev)); + pr_debug("Mapping reserved region %lx@%llx for domain %d\n", + size, base, domain->id); /* * RMRR range might have overlap with physical memory range, * clear it first */ dma_pte_clear_range(domain, base, base + size); - ret = domain_page_mapping(domain, base, base, size, - DMA_PTE_READ|DMA_PTE_WRITE); + return domain_page_mapping(domain, base, base, size, + DMA_PTE_READ|DMA_PTE_WRITE); +} + +static int iommu_prepare_identity_map(struct pci_dev *pdev, + unsigned long long start, + unsigned long long end) +{ + struct dmar_domain *domain; + int ret; + + printk(KERN_INFO + "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", + pci_name(pdev), start, end); + + if (iommu_identity_mapping) + domain = si_domain; + else + /* page table init */ + domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + if (!domain) + return -ENOMEM; + + ret = iommu_domain_identity_map(domain, start, end); if (ret) goto error; /* context entry init */ ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); - if (!ret) - return 0; -error: + if (ret) + goto error; + + return 0; + + error: domain_exit(domain); return ret; - } static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, -- cgit v1.2.3 From c7ab48d2acaf959e4d59c3f55d12fdb7ca9afd7c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 26 Jun 2009 19:10:36 +0100 Subject: intel-iommu: Clean up identity mapping code, remove CONFIG_DMAR_GFX_WA There's no need for the GFX workaround now we have 'iommu=pt' for the cases where people really care about performance. There's no need to have a special case for just one type of device. This also speeds up the iommu=pt path and reduces memory usage by setting up the si_domain _once_ and then using it for all devices, rather than giving each device its own private page tables. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 107 ++++++++++++++-------------------------------- 1 file changed, 32 insertions(+), 75 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 3a4f347e2f8..fc121967cb5 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1889,11 +1889,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", pci_name(pdev), start, end); - if (iommu_identity_mapping) - domain = si_domain; - else - /* page table init */ - domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) return -ENOMEM; @@ -1922,64 +1918,6 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, rmrr->end_address + 1); } -struct iommu_prepare_data { - struct pci_dev *pdev; - int ret; -}; - -static int __init iommu_prepare_work_fn(unsigned long start_pfn, - unsigned long end_pfn, void *datax) -{ - struct iommu_prepare_data *data; - - data = (struct iommu_prepare_data *)datax; - - data->ret = iommu_prepare_identity_map(data->pdev, - start_pfn<ret; - -} - -static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev) -{ - int nid; - struct iommu_prepare_data data; - - data.pdev = pdev; - data.ret = 0; - - for_each_online_node(nid) { - work_with_active_regions(nid, iommu_prepare_work_fn, &data); - if (data.ret) - return data.ret; - } - return data.ret; -} - -#ifdef CONFIG_DMAR_GFX_WA -static void __init iommu_prepare_gfx_mapping(void) -{ - struct pci_dev *pdev = NULL; - int ret; - - for_each_pci_dev(pdev) { - if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO || - !IS_GFX_DEVICE(pdev)) - continue; - printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n", - pci_name(pdev)); - ret = iommu_prepare_with_active_regions(pdev); - if (ret) - printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); - } -} -#else /* !CONFIG_DMAR_GFX_WA */ -static inline void iommu_prepare_gfx_mapping(void) -{ - return; -} -#endif - #ifdef CONFIG_DMAR_FLOPPY_WA static inline void iommu_prepare_isa(void) { @@ -1990,12 +1928,12 @@ static inline void iommu_prepare_isa(void) if (!pdev) return; - printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); + printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); if (ret) - printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " - "floppy might not work\n"); + printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " + "floppy might not work\n"); } #else @@ -2023,16 +1961,30 @@ static int __init init_context_pass_through(void) } static int md_domain_init(struct dmar_domain *domain, int guest_width); + +static int __init si_domain_work_fn(unsigned long start_pfn, + unsigned long end_pfn, void *datax) +{ + int *ret = datax; + + *ret = iommu_domain_identity_map(si_domain, + (uint64_t)start_pfn << PAGE_SHIFT, + (uint64_t)end_pfn << PAGE_SHIFT); + return *ret; + +} + static int si_domain_init(void) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; - int ret = 0; + int nid, ret = 0; si_domain = alloc_domain(); if (!si_domain) return -EFAULT; + pr_debug("Identity mapping domain is domain %d\n", si_domain->id); for_each_active_iommu(iommu, drhd) { ret = iommu_attach_domain(si_domain, iommu); @@ -2049,6 +2001,12 @@ static int si_domain_init(void) si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; + for_each_online_node(nid) { + work_with_active_regions(nid, si_domain_work_fn, &ret); + if (ret) + return ret; + } + return 0; } @@ -2102,13 +2060,14 @@ static int iommu_prepare_static_identity_mapping(void) if (ret) return -EFAULT; - printk(KERN_INFO "IOMMU: Setting identity map:\n"); for_each_pci_dev(pdev) { - ret = iommu_prepare_with_active_regions(pdev); - if (ret) { - printk(KERN_INFO "1:1 mapping to one domain failed.\n"); - return -EFAULT; - } + printk(KERN_INFO "IOMMU: identity mapping for device %s\n", + pci_name(pdev)); + + ret = domain_context_mapping(si_domain, pdev, + CONTEXT_TT_MULTI_LEVEL); + if (ret) + return ret; ret = domain_add_dev_info(si_domain, pdev); if (ret) return ret; @@ -2299,8 +2258,6 @@ int __init init_dmars(void) } } - iommu_prepare_gfx_mapping(); - iommu_prepare_isa(); } -- cgit v1.2.3 From dd4e831960e4f0214480fa96a53ca9bb7dd04927 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 16:21:20 +0100 Subject: intel-iommu: Change dma_set_pte_addr() to dma_set_pte_pfn() Add some helpers for converting between VT-d and normal system pfns, since system pages can be larger than VT-d pages. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index fc121967cb5..852f40a913d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -65,6 +65,26 @@ #define PHYSICAL_PAGE_MASK PAGE_MASK #endif +/* VT-d pages must always be _smaller_ than MM pages. Otherwise things + are never going to work. */ +static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) +{ + return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT); +} + +static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) +{ + return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); +} +static inline unsigned long page_to_dma_pfn(struct page *pg) +{ + return mm_to_dma_pfn(page_to_pfn(pg)); +} +static inline unsigned long virt_to_dma_pfn(void *p) +{ + return page_to_dma_pfn(virt_to_page(p)); +} + /* global iommu list, set NULL for ignored DMAR units */ static struct intel_iommu **g_iommus; @@ -207,9 +227,9 @@ static inline u64 dma_pte_addr(struct dma_pte *pte) return (pte->val & VTD_PAGE_MASK); } -static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) +static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn) { - pte->val |= (addr & VTD_PAGE_MASK); + pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT; } static inline bool dma_pte_present(struct dma_pte *pte) @@ -702,7 +722,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) return NULL; } domain_flush_cache(domain, tmp_page, PAGE_SIZE); - dma_set_pte_addr(pte, virt_to_phys(tmp_page)); + dma_set_pte_pfn(pte, virt_to_dma_pfn(tmp_page)); /* * high level table always sets r/w, last level page * table control read/write @@ -1648,7 +1668,7 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, * touches the iova range */ BUG_ON(dma_pte_addr(pte)); - dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); + dma_set_pte_pfn(pte, start_pfn); dma_set_pte_prot(pte, prot); if (prot & DMA_PTE_SNP) dma_set_pte_snp(pte); -- cgit v1.2.3 From 77dfa56c94d2855a25ff552b74980a5538e129f8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 16:40:08 +0100 Subject: intel-iommu: Change address_level_offset() to pfn_level_offset() We're shifting the inputs for now, but that'll change... Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 852f40a913d..529c1c13048 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -671,9 +671,9 @@ static inline unsigned int level_to_offset_bits(int level) return (12 + (level - 1) * LEVEL_STRIDE); } -static inline int address_level_offset(u64 addr, int level) +static inline int pfn_level_offset(unsigned long pfn, int level) { - return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); + return (pfn >> (level_to_offset_bits(level) - 12)) & LEVEL_MASK; } static inline u64 level_mask(int level) @@ -708,7 +708,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) while (level > 0) { void *tmp_page; - offset = address_level_offset(addr, level); + offset = pfn_level_offset(addr >> VTD_PAGE_SHIFT, level); pte = &parent[offset]; if (level == 1) break; @@ -749,7 +749,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, parent = domain->pgd; while (level <= total) { - offset = address_level_offset(addr, total); + offset = pfn_level_offset(addr >> VTD_PAGE_SHIFT, total); pte = &parent[offset]; if (level == total) return pte; -- cgit v1.2.3 From 90dcfb5eb2fd427b16135a14f176a6902750b6b4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 17:14:59 +0100 Subject: intel-iommu: Change dma_addr_level_pte() to dma_pfn_level_pte() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 529c1c13048..edd39d348a9 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -740,8 +740,9 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) } /* return address's pte at specific level */ -static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, - int level) +static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, + unsigned long pfn, + int level) { struct dma_pte *parent, *pte = NULL; int total = agaw_to_level(domain->agaw); @@ -749,7 +750,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, parent = domain->pgd; while (level <= total) { - offset = pfn_level_offset(addr >> VTD_PAGE_SHIFT, total); + offset = pfn_level_offset(pfn, total); pte = &parent[offset]; if (level == total) return pte; @@ -768,7 +769,7 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) struct dma_pte *pte = NULL; /* get last level pte */ - pte = dma_addr_level_pte(domain, addr, 1); + pte = dma_pfn_level_pte(domain, addr >> VTD_PAGE_SHIFT, 1); if (pte) { dma_clear_pte(pte); @@ -817,7 +818,8 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, return; while (tmp < end) { - pte = dma_addr_level_pte(domain, tmp, level); + pte = dma_pfn_level_pte(domain, tmp >> VTD_PAGE_SHIFT, + level); if (pte) { free_pgtable_page( phys_to_virt(dma_pte_addr(pte))); -- cgit v1.2.3 From a75f7cf94f01717c5103138319b96752ee2a2be9 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 17:44:39 +0100 Subject: intel-iommu: Make dma_pte_clear_one() take pfn not address Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index edd39d348a9..40eae2097ac 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -764,12 +764,12 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, } /* clear one page's page table */ -static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) +static void dma_pte_clear_one(struct dmar_domain *domain, unsigned long pfn) { struct dma_pte *pte = NULL; /* get last level pte */ - pte = dma_pfn_level_pte(domain, addr >> VTD_PAGE_SHIFT, 1); + pte = dma_pfn_level_pte(domain, pfn, 1); if (pte) { dma_clear_pte(pte); @@ -792,7 +792,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) /* we don't need lock here, nobody else touches the iova range */ while (npages--) { - dma_pte_clear_one(domain, start); + dma_pte_clear_one(domain, start >> VTD_PAGE_SHIFT); start += VTD_PAGE_SIZE; } } -- cgit v1.2.3 From 66eae8469e4e4ba6f4ca7ef82103c78f6d645583 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 19:00:32 +0100 Subject: intel-iommu: Don't just mask out too-big physical addresses; BUG() instead Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 40eae2097ac..ad367f53a2b 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -700,8 +700,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) unsigned long flags; BUG_ON(!domain->pgd); - - addr &= (((u64)1) << addr_width) - 1; + BUG_ON(addr >> addr_width); parent = domain->pgd; spin_lock_irqsave(&domain->mapping_lock, flags); @@ -783,8 +782,9 @@ static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) int addr_width = agaw_to_width(domain->agaw); int npages; - start &= (((u64)1) << addr_width) - 1; - end &= (((u64)1) << addr_width) - 1; + BUG_ON(start >> addr_width); + BUG_ON((end-1) >> addr_width); + /* in case it's partial page */ start &= PAGE_MASK; end = PAGE_ALIGN(end); @@ -807,8 +807,8 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, int level; u64 tmp; - start &= (((u64)1) << addr_width) - 1; - end &= (((u64)1) << addr_width) - 1; + BUG_ON(start >> addr_width); + BUG_ON(end >> addr_width); /* we don't need lock here, nobody else touches the iova range */ level = 2; @@ -1654,7 +1654,7 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, int index; int addr_width = agaw_to_width(domain->agaw); - hpa &= (((u64)1) << addr_width) - 1; + BUG_ON(hpa >> addr_width); if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; -- cgit v1.2.3 From 04b18e65dd5a3e544f07f4bcfa8fb52704a1833b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 19:15:01 +0100 Subject: intel-iommu: Make dma_pte_clear_range() use pfns Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ad367f53a2b..d4217f73715 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -779,21 +779,17 @@ static void dma_pte_clear_one(struct dmar_domain *domain, unsigned long pfn) /* clear last level pte, a tlb flush should be followed */ static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) { - int addr_width = agaw_to_width(domain->agaw); - int npages; - - BUG_ON(start >> addr_width); - BUG_ON((end-1) >> addr_width); + unsigned long start_pfn = IOVA_PFN(start); + unsigned long end_pfn = IOVA_PFN(end-1); + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - /* in case it's partial page */ - start &= PAGE_MASK; - end = PAGE_ALIGN(end); - npages = (end - start) / VTD_PAGE_SIZE; + BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && end_pfn >> addr_width); - /* we don't need lock here, nobody else touches the iova range */ - while (npages--) { - dma_pte_clear_one(domain, start >> VTD_PAGE_SHIFT); - start += VTD_PAGE_SIZE; + /* we don't need lock here; nobody else touches the iova range */ + while (start_pfn <= end_pfn) { + dma_pte_clear_one(domain, start_pfn); + start_pfn++; } } -- cgit v1.2.3 From 595badf5d65d50300319e6178e6df005ea501f70 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 22:09:11 +0100 Subject: intel-iommu: Make dma_pte_clear_range() take pfns as argument Noting that this is now an _inclusive_ range. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index d4217f73715..ff8b7ce4a01 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -56,6 +56,7 @@ #define MAX_AGAW_WIDTH 64 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) +#define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) @@ -777,17 +778,17 @@ static void dma_pte_clear_one(struct dmar_domain *domain, unsigned long pfn) } /* clear last level pte, a tlb flush should be followed */ -static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) +static void dma_pte_clear_range(struct dmar_domain *domain, + unsigned long start_pfn, + unsigned long last_pfn) { - unsigned long start_pfn = IOVA_PFN(start); - unsigned long end_pfn = IOVA_PFN(end-1); int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); - BUG_ON(addr_width < BITS_PER_LONG && end_pfn >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); /* we don't need lock here; nobody else touches the iova range */ - while (start_pfn <= end_pfn) { + while (start_pfn <= last_pfn) { dma_pte_clear_one(domain, start_pfn); start_pfn++; } @@ -1424,7 +1425,7 @@ static void domain_exit(struct dmar_domain *domain) end = end & (~PAGE_MASK); /* clear ptes */ - dma_pte_clear_range(domain, 0, end); + dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); /* free page tables */ dma_pte_free_pagetable(domain, 0, end); @@ -1890,7 +1891,8 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, * RMRR range might have overlap with physical memory range, * clear it first */ - dma_pte_clear_range(domain, base, base + size); + dma_pte_clear_range(domain, base >> VTD_PAGE_SHIFT, + (base + size - 1) >> VTD_PAGE_SHIFT); return domain_page_mapping(domain, base, base, size, DMA_PTE_READ|DMA_PTE_WRITE); @@ -2618,7 +2620,8 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, pci_name(pdev), size, (unsigned long long)start_addr); /* clear the whole page */ - dma_pte_clear_range(domain, start_addr, start_addr + size); + dma_pte_clear_range(domain, start_addr >> VTD_PAGE_SHIFT, + (start_addr + size - 1) >> VTD_PAGE_SHIFT); /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + size); if (intel_iommu_strict) { @@ -2710,7 +2713,8 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, start_addr = iova->pfn_lo << PAGE_SHIFT; /* clear the whole page */ - dma_pte_clear_range(domain, start_addr, start_addr + size); + dma_pte_clear_range(domain, start_addr >> VTD_PAGE_SHIFT, + (start_addr + size - 1) >> VTD_PAGE_SHIFT); /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + size); @@ -2792,8 +2796,9 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne size, prot); if (ret) { /* clear the page */ - dma_pte_clear_range(domain, start_addr, - start_addr + offset); + dma_pte_clear_range(domain, + start_addr >> VTD_PAGE_SHIFT, + (start_addr + offset - 1) >> VTD_PAGE_SHIFT); /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + offset); @@ -3382,7 +3387,7 @@ static void vm_domain_exit(struct dmar_domain *domain) end = end & (~VTD_PAGE_MASK); /* clear ptes */ - dma_pte_clear_range(domain, 0, end); + dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); /* free page tables */ dma_pte_free_pagetable(domain, 0, end); @@ -3526,7 +3531,8 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain, /* The address might not be aligned */ base = iova & VTD_PAGE_MASK; size = VTD_PAGE_ALIGN(size); - dma_pte_clear_range(dmar_domain, base, base + size); + dma_pte_clear_range(dmar_domain, base >> VTD_PAGE_SHIFT, + (base + size - 1) >> VTD_PAGE_SHIFT); if (dmar_domain->max_addr == base + size) dmar_domain->max_addr = base; -- cgit v1.2.3 From 6660c63a79a639b86e3a709e25a8c4fc3ab24770 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 22:41:00 +0100 Subject: intel-iommu: Make dma_pte_free_pagetable() use pfns Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ff8b7ce4a01..1526864a9d6 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -669,27 +669,27 @@ static inline int width_to_agaw(int width) static inline unsigned int level_to_offset_bits(int level) { - return (12 + (level - 1) * LEVEL_STRIDE); + return (level - 1) * LEVEL_STRIDE; } static inline int pfn_level_offset(unsigned long pfn, int level) { - return (pfn >> (level_to_offset_bits(level) - 12)) & LEVEL_MASK; + return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; } -static inline u64 level_mask(int level) +static inline unsigned long level_mask(int level) { - return ((u64)-1 << level_to_offset_bits(level)); + return -1UL << level_to_offset_bits(level); } -static inline u64 level_size(int level) +static inline unsigned long level_size(int level) { - return ((u64)1 << level_to_offset_bits(level)); + return 1UL << level_to_offset_bits(level); } -static inline u64 align_to_level(u64 addr, int level) +static inline unsigned long align_to_level(unsigned long pfn, int level) { - return ((addr + level_size(level) - 1) & level_mask(level)); + return (pfn + level_size(level) - 1) & level_mask(level); } static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) @@ -798,25 +798,29 @@ static void dma_pte_clear_range(struct dmar_domain *domain, static void dma_pte_free_pagetable(struct dmar_domain *domain, u64 start, u64 end) { - int addr_width = agaw_to_width(domain->agaw); + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + unsigned long start_pfn = start >> VTD_PAGE_SHIFT; + unsigned long last_pfn = (end-1) >> VTD_PAGE_SHIFT; struct dma_pte *pte; int total = agaw_to_level(domain->agaw); int level; - u64 tmp; + unsigned long tmp; - BUG_ON(start >> addr_width); - BUG_ON(end >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); /* we don't need lock here, nobody else touches the iova range */ level = 2; while (level <= total) { - tmp = align_to_level(start, level); - if (tmp >= end || (tmp + level_size(level) > end)) + tmp = align_to_level(start_pfn, level); + + /* Only clear this pte/pmd if we're asked to clear its + _whole_ range */ + if (tmp + level_size(level) - 1 > last_pfn) return; - while (tmp < end) { - pte = dma_pfn_level_pte(domain, tmp >> VTD_PAGE_SHIFT, - level); + while (tmp <= last_pfn) { + pte = dma_pfn_level_pte(domain, tmp, level); if (pte) { free_pgtable_page( phys_to_virt(dma_pte_addr(pte))); @@ -828,7 +832,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, level++; } /* free pgd */ - if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { + if (start == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { free_pgtable_page(domain->pgd); domain->pgd = NULL; } -- cgit v1.2.3 From d794dc9b302c2781c571c10dedb8094e223d31b8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 00:27:49 +0100 Subject: intel-iommu: Make dma_pte_free_pagetable() take pfns as argument With some cleanup of intel_unmap_page(), intel_unmap_sg() and vm_domain_exit() to no longer play with 64-bit addresses. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 68 +++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 40 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 1526864a9d6..fc593adb049 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -796,11 +796,10 @@ static void dma_pte_clear_range(struct dmar_domain *domain, /* free page table pages. last level pte should already be cleared */ static void dma_pte_free_pagetable(struct dmar_domain *domain, - u64 start, u64 end) + unsigned long start_pfn, + unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - unsigned long start_pfn = start >> VTD_PAGE_SHIFT; - unsigned long last_pfn = (end-1) >> VTD_PAGE_SHIFT; struct dma_pte *pte; int total = agaw_to_level(domain->agaw); int level; @@ -832,7 +831,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, level++; } /* free pgd */ - if (start == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { + if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { free_pgtable_page(domain->pgd); domain->pgd = NULL; } @@ -1416,7 +1415,6 @@ static void domain_exit(struct dmar_domain *domain) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; - u64 end; /* Domain 0 is reserved, so dont process it */ if (!domain) @@ -1425,14 +1423,12 @@ static void domain_exit(struct dmar_domain *domain) domain_remove_dev_info(domain); /* destroy iovas */ put_iova_domain(&domain->iovad); - end = DOMAIN_MAX_ADDR(domain->gaw); - end = end & (~PAGE_MASK); /* clear ptes */ dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); /* free page tables */ - dma_pte_free_pagetable(domain, 0, end); + dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); for_each_active_iommu(iommu, drhd) if (test_bit(iommu->seq_id, &domain->iommu_bmp)) @@ -2601,7 +2597,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, { struct pci_dev *pdev = to_pci_dev(dev); struct dmar_domain *domain; - unsigned long start_addr; + unsigned long start_pfn, last_pfn; struct iova *iova; struct intel_iommu *iommu; @@ -2617,20 +2613,22 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, if (!iova) return; - start_addr = iova->pfn_lo << PAGE_SHIFT; - size = aligned_size((u64)dev_addr, size); + start_pfn = mm_to_dma_pfn(iova->pfn_lo); + last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; - pr_debug("Device %s unmapping: %zx@%llx\n", - pci_name(pdev), size, (unsigned long long)start_addr); + pr_debug("Device %s unmapping: pfn %lx-%lx\n", + pci_name(pdev), start_pfn, last_pfn); /* clear the whole page */ - dma_pte_clear_range(domain, start_addr >> VTD_PAGE_SHIFT, - (start_addr + size - 1) >> VTD_PAGE_SHIFT); + dma_pte_clear_range(domain, start_pfn, last_pfn); + /* free page tables */ - dma_pte_free_pagetable(domain, start_addr, start_addr + size); + dma_pte_free_pagetable(domain, start_pfn, last_pfn); + if (intel_iommu_strict) { - iommu_flush_iotlb_psi(iommu, domain->id, start_addr, - size >> VTD_PAGE_SHIFT); + iommu_flush_iotlb_psi(iommu, domain->id, + start_pfn << VTD_PAGE_SHIFT, + last_pfn - start_pfn + 1); /* free iova */ __free_iova(&domain->iovad, iova); } else { @@ -2688,14 +2686,10 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { - int i; struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; - unsigned long start_addr; + unsigned long start_pfn, last_pfn; struct iova *iova; - size_t size = 0; - phys_addr_t addr; - struct scatterlist *sg; struct intel_iommu *iommu; if (iommu_no_mapping(pdev)) @@ -2709,21 +2703,19 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); if (!iova) return; - for_each_sg(sglist, sg, nelems, i) { - addr = page_to_phys(sg_page(sg)) + sg->offset; - size += aligned_size((u64)addr, sg->length); - } - start_addr = iova->pfn_lo << PAGE_SHIFT; + start_pfn = mm_to_dma_pfn(iova->pfn_lo); + last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; /* clear the whole page */ - dma_pte_clear_range(domain, start_addr >> VTD_PAGE_SHIFT, - (start_addr + size - 1) >> VTD_PAGE_SHIFT); + dma_pte_clear_range(domain, start_pfn, last_pfn); + /* free page tables */ - dma_pte_free_pagetable(domain, start_addr, start_addr + size); + dma_pte_free_pagetable(domain, start_pfn, last_pfn); - iommu_flush_iotlb_psi(iommu, domain->id, start_addr, - size >> VTD_PAGE_SHIFT); + iommu_flush_iotlb_psi(iommu, domain->id, + start_pfn << VTD_PAGE_SHIFT, + (last_pfn - start_pfn + 1)); /* free iova */ __free_iova(&domain->iovad, iova); @@ -2804,8 +2796,8 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne start_addr >> VTD_PAGE_SHIFT, (start_addr + offset - 1) >> VTD_PAGE_SHIFT); /* free page tables */ - dma_pte_free_pagetable(domain, start_addr, - start_addr + offset); + dma_pte_free_pagetable(domain, start_addr >> VTD_PAGE_SHIFT, + (start_addr + offset - 1) >> VTD_PAGE_SHIFT); /* free iova */ __free_iova(&domain->iovad, iova); return 0; @@ -3378,8 +3370,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain) static void vm_domain_exit(struct dmar_domain *domain) { - u64 end; - /* Domain 0 is reserved, so dont process it */ if (!domain) return; @@ -3387,14 +3377,12 @@ static void vm_domain_exit(struct dmar_domain *domain) vm_domain_remove_all_dev_info(domain); /* destroy iovas */ put_iova_domain(&domain->iovad); - end = DOMAIN_MAX_ADDR(domain->gaw); - end = end & (~VTD_PAGE_MASK); /* clear ptes */ dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); /* free page tables */ - dma_pte_free_pagetable(domain, 0, end); + dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); iommu_free_vm_domain(domain); free_domain_mem(domain); -- cgit v1.2.3 From 163cc52ccd2cc5c5ae4e1c886f6fde8547feed2a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 00:51:17 +0100 Subject: intel-iommu: Clean up intel_iommu_unmap_range() Use unaligned address for domain->max_addr. That algorithm isn't ideal anyway -- we should probably just look at the last iova in the tree. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index fc593adb049..21dc7731186 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3491,7 +3491,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain, if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) prot |= DMA_PTE_SNP; - max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); + max_addr = iova + size; if (dmar_domain->max_addr < max_addr) { int min_agaw; u64 end; @@ -3518,16 +3518,12 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, size_t size) { struct dmar_domain *dmar_domain = domain->priv; - dma_addr_t base; - /* The address might not be aligned */ - base = iova & VTD_PAGE_MASK; - size = VTD_PAGE_ALIGN(size); - dma_pte_clear_range(dmar_domain, base >> VTD_PAGE_SHIFT, - (base + size - 1) >> VTD_PAGE_SHIFT); + dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, + (iova + size - 1) >> VTD_PAGE_SHIFT); - if (dmar_domain->max_addr == base + size) - dmar_domain->max_addr = base; + if (dmar_domain->max_addr == iova + size) + dmar_domain->max_addr = iova; } static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, -- cgit v1.2.3 From b026fd28ea23af24a3eea6e5be3f3d0193a8e87d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 10:37:25 +0100 Subject: intel-iommu: Change addr_to_dma_pte() to pfn_to_dma_pte() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 21dc7731186..dfbabd151a9 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -692,23 +692,24 @@ static inline unsigned long align_to_level(unsigned long pfn, int level) return (pfn + level_size(level) - 1) & level_mask(level); } -static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) +static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, + unsigned long pfn) { - int addr_width = agaw_to_width(domain->agaw); + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(domain->agaw); int offset; unsigned long flags; BUG_ON(!domain->pgd); - BUG_ON(addr >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); parent = domain->pgd; spin_lock_irqsave(&domain->mapping_lock, flags); while (level > 0) { void *tmp_page; - offset = pfn_level_offset(addr >> VTD_PAGE_SHIFT, level); + offset = pfn_level_offset(pfn, level); pte = &parent[offset]; if (level == 1) break; @@ -1660,7 +1661,7 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; index = 0; while (start_pfn < end_pfn) { - pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index); + pte = pfn_to_dma_pte(domain, (iova >> VTD_PAGE_SHIFT) + index); if (!pte) return -ENOMEM; /* We don't need lock here, nobody else @@ -3533,7 +3534,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, struct dma_pte *pte; u64 phys = 0; - pte = addr_to_dma_pte(dmar_domain, iova); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); if (pte) phys = dma_pte_addr(pte); -- cgit v1.2.3 From 1c5a46ed49e37f56f8aa9000bb1c2ac59670c372 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 10:53:37 +0100 Subject: intel-iommu: Clean up address handling in domain_page_mapping() No more masking and alignment; just use pfns. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index dfbabd151a9..f08d7865fe0 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1647,20 +1647,18 @@ static int domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, u64 hpa, size_t size, int prot) { - u64 start_pfn, end_pfn; + unsigned long start_pfn = hpa >> VTD_PAGE_SHIFT; + unsigned long last_pfn = (hpa + size - 1) >> VTD_PAGE_SHIFT; struct dma_pte *pte; - int index; - int addr_width = agaw_to_width(domain->agaw); + int index = 0; + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - BUG_ON(hpa >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; - iova &= PAGE_MASK; - start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT; - end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; - index = 0; - while (start_pfn < end_pfn) { + + while (start_pfn <= last_pfn) { pte = pfn_to_dma_pte(domain, (iova >> VTD_PAGE_SHIFT) + index); if (!pte) return -ENOMEM; -- cgit v1.2.3 From 61df744314079e8cb8cdec75f517cf0e704e41ef Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 11:55:58 +0100 Subject: intel-iommu: Introduce domain_pfn_mapping() ... and use it in the trivial cases; the other callers want individual (and bisectable) attention, since I screwed them up the first time... Make the BUG_ON() happen on too-large virtual address rather than physical address, too. That's the one we care about. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f08d7865fe0..7540ef91d5f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1643,40 +1643,48 @@ static int domain_context_mapped(struct pci_dev *pdev) tmp->devfn); } -static int -domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot) +static int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + unsigned long phys_pfn, unsigned long nr_pages, + int prot) { - unsigned long start_pfn = hpa >> VTD_PAGE_SHIFT; - unsigned long last_pfn = (hpa + size - 1) >> VTD_PAGE_SHIFT; struct dma_pte *pte; - int index = 0; int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; - while (start_pfn <= last_pfn) { - pte = pfn_to_dma_pte(domain, (iova >> VTD_PAGE_SHIFT) + index); + while (nr_pages--) { + pte = pfn_to_dma_pte(domain, iov_pfn); if (!pte) return -ENOMEM; /* We don't need lock here, nobody else * touches the iova range */ BUG_ON(dma_pte_addr(pte)); - dma_set_pte_pfn(pte, start_pfn); + dma_set_pte_pfn(pte, phys_pfn); dma_set_pte_prot(pte, prot); if (prot & DMA_PTE_SNP) dma_set_pte_snp(pte); domain_flush_cache(domain, pte, sizeof(*pte)); - start_pfn++; - index++; + iov_pfn++; + phys_pfn++; } return 0; } +static int domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, + u64 hpa, size_t size, int prot) +{ + unsigned long first_pfn = hpa >> VTD_PAGE_SHIFT; + unsigned long last_pfn = (hpa + size - 1) >> VTD_PAGE_SHIFT; + + return domain_pfn_mapping(domain, iova >> VTD_PAGE_SHIFT, first_pfn, + last_pfn - first_pfn + 1, prot); + +} + static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) { if (!iommu) @@ -1893,8 +1901,10 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, dma_pte_clear_range(domain, base >> VTD_PAGE_SHIFT, (base + size - 1) >> VTD_PAGE_SHIFT); - return domain_page_mapping(domain, base, base, size, - DMA_PTE_READ|DMA_PTE_WRITE); + return domain_pfn_mapping(domain, base >> VTD_PAGE_SHIFT, + base >> VTD_PAGE_SHIFT, + size >> VTD_PAGE_SHIFT, + DMA_PTE_READ|DMA_PTE_WRITE); } static int iommu_prepare_identity_map(struct pci_dev *pdev, -- cgit v1.2.3 From 0ab36de274ab094c3992b50c9c48c5c89072ec94 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 14:01:43 +0100 Subject: intel-iommu: Use domain_pfn_mapping() in __intel_map_single() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 7540ef91d5f..dccd0a7b7a5 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2477,14 +2477,12 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, return 0; iommu = domain_get_iommu(domain); - size = aligned_size((u64)paddr, size); + size = aligned_size(paddr, size) >> VTD_PAGE_SHIFT; - iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); + iova = __intel_alloc_iova(hwdev, domain, size << VTD_PAGE_SHIFT, pdev->dma_mask); if (!iova) goto error; - start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; - /* * Check if DMAR supports zero-length reads on write only * mappings.. @@ -2500,20 +2498,20 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, * might have two guest_addr mapping to the same host paddr, but this * is not a big problem */ - ret = domain_page_mapping(domain, start_paddr, - ((u64)paddr) & PHYSICAL_PAGE_MASK, - size, prot); + ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo), + paddr >> VTD_PAGE_SHIFT, size, prot); if (ret) goto error; + start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; + /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, 0, start_paddr, - size >> VTD_PAGE_SHIFT); + iommu_flush_iotlb_psi(iommu, 0, start_paddr, size); else iommu_flush_write_buffer(iommu); - return start_paddr + ((u64)paddr & (~PAGE_MASK)); + return start_paddr + (paddr & (~PAGE_MASK)); error: if (iova) -- cgit v1.2.3 From ad05122162b67f64d5a1c6d35e001f7a88619b88 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 14:22:28 +0100 Subject: intel-iommu: Use domain_pfn_mapping() in intel_iommu_map_range() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index dccd0a7b7a5..a490b39ca3d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3516,8 +3516,11 @@ static int intel_iommu_map_range(struct iommu_domain *domain, } dmar_domain->max_addr = max_addr; } - - ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); + /* Round up size to next multiple of PAGE_SIZE, if it and + the low bits of hpa would take us onto the next page */ + size = aligned_size(hpa, size) >> VTD_PAGE_SHIFT; + ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, + hpa >> VTD_PAGE_SHIFT, size, prot); return ret; } -- cgit v1.2.3 From b536d24d212c994a7d98469ea3a8891573d45fd4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 14:49:31 +0100 Subject: intel-iommu: Clean up intel_map_sg(), remove domain_page_mapping() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 54 +++++++++++++++++------------------------------ 1 file changed, 19 insertions(+), 35 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index a490b39ca3d..bc49b121c66 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1674,17 +1674,6 @@ static int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return 0; } -static int domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot) -{ - unsigned long first_pfn = hpa >> VTD_PAGE_SHIFT; - unsigned long last_pfn = (hpa + size - 1) >> VTD_PAGE_SHIFT; - - return domain_pfn_mapping(domain, iova >> VTD_PAGE_SHIFT, first_pfn, - last_pfn - first_pfn + 1, prot); - -} - static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) { if (!iommu) @@ -2745,17 +2734,16 @@ static int intel_nontranslate_map_sg(struct device *hddev, static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { - phys_addr_t addr; int i; struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; size_t size = 0; int prot = 0; - size_t offset = 0; + size_t offset_pfn = 0; struct iova *iova = NULL; int ret; struct scatterlist *sg; - unsigned long start_addr; + unsigned long start_vpfn; struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); @@ -2768,10 +2756,8 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne iommu = domain_get_iommu(domain); - for_each_sg(sglist, sg, nelems, i) { - addr = page_to_phys(sg_page(sg)) + sg->offset; - size += aligned_size((u64)addr, sg->length); - } + for_each_sg(sglist, sg, nelems, i) + size += aligned_size(sg->offset, sg->length); iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); if (!iova) { @@ -2789,36 +2775,34 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) prot |= DMA_PTE_WRITE; - start_addr = iova->pfn_lo << PAGE_SHIFT; - offset = 0; + start_vpfn = mm_to_dma_pfn(iova->pfn_lo); + offset_pfn = 0; for_each_sg(sglist, sg, nelems, i) { - addr = page_to_phys(sg_page(sg)) + sg->offset; - size = aligned_size((u64)addr, sg->length); - ret = domain_page_mapping(domain, start_addr + offset, - ((u64)addr) & PHYSICAL_PAGE_MASK, - size, prot); + int nr_pages = aligned_size(sg->offset, sg->length) >> VTD_PAGE_SHIFT; + ret = domain_pfn_mapping(domain, start_vpfn + offset_pfn, + page_to_dma_pfn(sg_page(sg)), + nr_pages, prot); if (ret) { /* clear the page */ - dma_pte_clear_range(domain, - start_addr >> VTD_PAGE_SHIFT, - (start_addr + offset - 1) >> VTD_PAGE_SHIFT); + dma_pte_clear_range(domain, start_vpfn, + start_vpfn + offset_pfn); /* free page tables */ - dma_pte_free_pagetable(domain, start_addr >> VTD_PAGE_SHIFT, - (start_addr + offset - 1) >> VTD_PAGE_SHIFT); + dma_pte_free_pagetable(domain, start_vpfn, + start_vpfn + offset_pfn); /* free iova */ __free_iova(&domain->iovad, iova); return 0; } - sg->dma_address = start_addr + offset + - ((u64)addr & (~PAGE_MASK)); + sg->dma_address = ((dma_addr_t)(start_vpfn + offset_pfn) + << VTD_PAGE_SHIFT) + sg->offset; sg->dma_length = sg->length; - offset += size; + offset_pfn += nr_pages; } /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, 0, start_addr, - offset >> VTD_PAGE_SHIFT); + iommu_flush_iotlb_psi(iommu, 0, start_vpfn << VTD_PAGE_SHIFT, + offset_pfn); else iommu_flush_write_buffer(iommu); -- cgit v1.2.3 From 88cb6a7424d9465faf6caaaadff5af0766c93991 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 15:03:06 +0100 Subject: intel-iommu: Change aligned_size() to aligned_nrpages() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index bc49b121c66..22add36fd73 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2320,11 +2320,13 @@ error: return ret; } -static inline u64 aligned_size(u64 host_addr, size_t size) +static inline unsigned long aligned_nrpages(unsigned long host_addr, + size_t size) { - u64 addr; - addr = (host_addr & (~PAGE_MASK)) + size; - return PAGE_ALIGN(addr); + host_addr &= ~PAGE_MASK; + host_addr += size + PAGE_SIZE - 1; + + return host_addr >> VTD_PAGE_SHIFT; } struct iova * @@ -2466,7 +2468,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, return 0; iommu = domain_get_iommu(domain); - size = aligned_size(paddr, size) >> VTD_PAGE_SHIFT; + size = aligned_nrpages(paddr, size); iova = __intel_alloc_iova(hwdev, domain, size << VTD_PAGE_SHIFT, pdev->dma_mask); if (!iova) @@ -2757,9 +2759,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne iommu = domain_get_iommu(domain); for_each_sg(sglist, sg, nelems, i) - size += aligned_size(sg->offset, sg->length); + size += aligned_nrpages(sg->offset, sg->length); - iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); + iova = __intel_alloc_iova(hwdev, domain, size << VTD_PAGE_SHIFT, + pdev->dma_mask); if (!iova) { sglist->dma_length = 0; return 0; @@ -2778,7 +2781,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne start_vpfn = mm_to_dma_pfn(iova->pfn_lo); offset_pfn = 0; for_each_sg(sglist, sg, nelems, i) { - int nr_pages = aligned_size(sg->offset, sg->length) >> VTD_PAGE_SHIFT; + int nr_pages = aligned_nrpages(sg->offset, sg->length); ret = domain_pfn_mapping(domain, start_vpfn + offset_pfn, page_to_dma_pfn(sg_page(sg)), nr_pages, prot); @@ -3502,7 +3505,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain, } /* Round up size to next multiple of PAGE_SIZE, if it and the low bits of hpa would take us onto the next page */ - size = aligned_size(hpa, size) >> VTD_PAGE_SHIFT; + size = aligned_nrpages(hpa, size); ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, hpa >> VTD_PAGE_SHIFT, size, prot); return ret; -- cgit v1.2.3 From 03d6a2461ab1704c171ce21081c5022378ef7a91 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 15:33:46 +0100 Subject: intel-iommu: Make iommu_flush_iotlb_psi() take pfn as argument Most of its callers are having to shift for themselves anyway, so we might as well do it in iommu_flush_iotlb_psi(). Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 22add36fd73..6afe44cb681 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1058,11 +1058,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, } static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, - u64 addr, unsigned int pages) + unsigned long pfn, unsigned int pages) { unsigned int mask = ilog2(__roundup_pow_of_two(pages)); + uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; - BUG_ON(addr & (~VTD_PAGE_MASK)); BUG_ON(pages == 0); /* @@ -2494,15 +2494,15 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, if (ret) goto error; - start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; - /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, 0, start_paddr, size); + iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size); else iommu_flush_write_buffer(iommu); - return start_paddr + (paddr & (~PAGE_MASK)); + start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; + start_paddr += paddr & ~PAGE_MASK; + return start_paddr; error: if (iova) @@ -2624,8 +2624,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, dma_pte_free_pagetable(domain, start_pfn, last_pfn); if (intel_iommu_strict) { - iommu_flush_iotlb_psi(iommu, domain->id, - start_pfn << VTD_PAGE_SHIFT, + iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, last_pfn - start_pfn + 1); /* free iova */ __free_iova(&domain->iovad, iova); @@ -2711,8 +2710,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, /* free page tables */ dma_pte_free_pagetable(domain, start_pfn, last_pfn); - iommu_flush_iotlb_psi(iommu, domain->id, - start_pfn << VTD_PAGE_SHIFT, + iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, (last_pfn - start_pfn + 1)); /* free iova */ @@ -2804,8 +2802,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, 0, start_vpfn << VTD_PAGE_SHIFT, - offset_pfn); + iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn); else iommu_flush_write_buffer(iommu); -- cgit v1.2.3 From 1a4a45516d7a57de0691352d899d7008f2e090d1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 16:00:42 +0100 Subject: intel-iommu: Remove last use of PHYSICAL_PAGE_MASK, for reserving PCI BARs This is fairly broken anyway -- it doesn't take hotplug into account. We should probably be checking page_is_ram() instead. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 6afe44cb681..a55f5fb06b1 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -62,9 +62,6 @@ #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) -#ifndef PHYSICAL_PAGE_MASK -#define PHYSICAL_PAGE_MASK PAGE_MASK -#endif /* VT-d pages must always be _smaller_ than MM pages. Otherwise things are never going to work. */ @@ -1307,7 +1304,6 @@ static void dmar_init_reserved_ranges(void) struct pci_dev *pdev = NULL; struct iova *iova; int i; - u64 addr, size; init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); @@ -1330,12 +1326,9 @@ static void dmar_init_reserved_ranges(void) r = &pdev->resource[i]; if (!r->flags || !(r->flags & IORESOURCE_MEM)) continue; - addr = r->start; - addr &= PHYSICAL_PAGE_MASK; - size = r->end - addr; - size = PAGE_ALIGN(size); - iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr), - IOVA_PFN(size + addr) - 1); + iova = reserve_iova(&reserved_iova_list, + IOVA_PFN(r->start), + IOVA_PFN(r->end)); if (!iova) printk(KERN_ERR "Reserve iova failed\n"); } -- cgit v1.2.3 From c5395d5c4a82159889cb650de93b591ea51d8c56 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 16:35:56 +0100 Subject: intel-iommu: Clean up iommu_domain_identity_map() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index a55f5fb06b1..c5caf7d63a0 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1861,31 +1861,25 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, unsigned long long start, unsigned long long end) { - unsigned long size; - unsigned long long base; + unsigned long first_vpfn = start >> VTD_PAGE_SHIFT; + unsigned long last_vpfn = end >> VTD_PAGE_SHIFT; - /* The address might not be aligned */ - base = start & PAGE_MASK; - size = end - base; - size = PAGE_ALIGN(size); - if (!reserve_iova(&domain->iovad, IOVA_PFN(base), - IOVA_PFN(base + size) - 1)) { + if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), + dma_to_mm_pfn(last_vpfn))) { printk(KERN_ERR "IOMMU: reserve iova failed\n"); return -ENOMEM; } - pr_debug("Mapping reserved region %lx@%llx for domain %d\n", - size, base, domain->id); + pr_debug("Mapping reserved region %llx-%llx for domain %d\n", + start, end, domain->id); /* * RMRR range might have overlap with physical memory range, * clear it first */ - dma_pte_clear_range(domain, base >> VTD_PAGE_SHIFT, - (base + size - 1) >> VTD_PAGE_SHIFT); + dma_pte_clear_range(domain, first_vpfn, last_vpfn); - return domain_pfn_mapping(domain, base >> VTD_PAGE_SHIFT, - base >> VTD_PAGE_SHIFT, - size >> VTD_PAGE_SHIFT, + return domain_pfn_mapping(domain, first_vpfn, first_vpfn, + last_vpfn - first_vpfn + 1, DMA_PTE_READ|DMA_PTE_WRITE); } -- cgit v1.2.3 From 310a5ab93cb4ce29367238f682affd9ac352f4d0 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 18:52:20 +0100 Subject: intel-iommu: Performance improvement for dma_pte_clear_range() It's a bit silly to repeatedly call domain_flush_cache() for each PTE individually, as we clear it. Instead, batch them up and flush a whole range at a time. We might as well refrain from recalculating the PTE address from scratch each time round the loop too. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index c5caf7d63a0..ba7e37f7111 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -761,34 +761,33 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, return NULL; } -/* clear one page's page table */ -static void dma_pte_clear_one(struct dmar_domain *domain, unsigned long pfn) -{ - struct dma_pte *pte = NULL; - - /* get last level pte */ - pte = dma_pfn_level_pte(domain, pfn, 1); - - if (pte) { - dma_clear_pte(pte); - domain_flush_cache(domain, pte, sizeof(*pte)); - } -} - /* clear last level pte, a tlb flush should be followed */ static void dma_pte_clear_range(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + struct dma_pte *first_pte, *pte; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); /* we don't need lock here; nobody else touches the iova range */ while (start_pfn <= last_pfn) { - dma_pte_clear_one(domain, start_pfn); - start_pfn++; + first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); + if (!pte) { + start_pfn = align_to_level(start_pfn + 1, 2); + continue; + } + while (start_pfn <= last_pfn && + (unsigned long)pte >> VTD_PAGE_SHIFT == + (unsigned long)first_pte >> VTD_PAGE_SHIFT) { + dma_clear_pte(pte); + start_pfn++; + pte++; + } + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); } } -- cgit v1.2.3 From 6f6a00e40aa3fdd3b29c30e3ef1fc9690506bc03 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 20:38:49 +0100 Subject: intel-iommu: Performance improvement for domain_pfn_mapping() As with dma_pte_clear_range(), don't keep flushing a single PTE at a time. And also micro-optimise the setting of PTE values rather than using the helper functions to do all the masking. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ba7e37f7111..f8074236bcc 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1639,7 +1639,7 @@ static int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long phys_pfn, unsigned long nr_pages, int prot) { - struct dma_pte *pte; + struct dma_pte *first_pte = NULL, *pte = NULL; int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); @@ -1647,19 +1647,27 @@ static int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; + prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; + while (nr_pages--) { - pte = pfn_to_dma_pte(domain, iov_pfn); - if (!pte) - return -ENOMEM; + if (!pte) { + first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); + if (!pte) + return -ENOMEM; + } /* We don't need lock here, nobody else * touches the iova range */ BUG_ON(dma_pte_addr(pte)); - dma_set_pte_pfn(pte, phys_pfn); - dma_set_pte_prot(pte, prot); - if (prot & DMA_PTE_SNP) - dma_set_pte_snp(pte); - domain_flush_cache(domain, pte, sizeof(*pte)); + pte->val = (phys_pfn << VTD_PAGE_SHIFT) | prot; + pte++; + if (!nr_pages || + (unsigned long)pte >> VTD_PAGE_SHIFT != + (unsigned long)first_pte >> VTD_PAGE_SHIFT) { + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); + pte = NULL; + } iov_pfn++; phys_pfn++; } -- cgit v1.2.3 From 875764de6f0ddb23d270c29357d5a339232a0488 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 21:20:51 +0100 Subject: intel-iommu: Simplify __intel_alloc_iova() There's no need for the separate iommu_alloc_iova() function, and certainly not for it to be global. Remove the underscores while we're at it. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 49 +++++++++++++++++------------------------------ 1 file changed, 18 insertions(+), 31 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f8074236bcc..11a23201445 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2323,43 +2323,31 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr, return host_addr >> VTD_PAGE_SHIFT; } -struct iova * -iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end) -{ - struct iova *piova; - - /* Make sure it's in range */ - end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end); - if (!size || (IOVA_START_ADDR + size > end)) - return NULL; - - piova = alloc_iova(&domain->iovad, - size >> PAGE_SHIFT, IOVA_PFN(end), 1); - return piova; -} - -static struct iova * -__intel_alloc_iova(struct device *dev, struct dmar_domain *domain, - size_t size, u64 dma_mask) +static struct iova *intel_alloc_iova(struct device *dev, + struct dmar_domain *domain, + unsigned long nrpages, uint64_t dma_mask) { struct pci_dev *pdev = to_pci_dev(dev); struct iova *iova = NULL; - if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac) - iova = iommu_alloc_iova(domain, size, dma_mask); - else { + /* Restrict dma_mask to the width that the iommu can handle */ + dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask); + + if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) { /* * First try to allocate an io virtual address in * DMA_BIT_MASK(32) and if that fails then try allocating * from higher range */ - iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32)); - if (!iova) - iova = iommu_alloc_iova(domain, size, dma_mask); - } - - if (!iova) { - printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); + iova = alloc_iova(&domain->iovad, nrpages, + IOVA_PFN(DMA_BIT_MASK(32)), 1); + if (iova) + return iova; + } + iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1); + if (unlikely(!iova)) { + printk(KERN_ERR "Allocating %ld-page iova for %s failed", + nrpages, pci_name(pdev)); return NULL; } @@ -2464,7 +2452,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, iommu = domain_get_iommu(domain); size = aligned_nrpages(paddr, size); - iova = __intel_alloc_iova(hwdev, domain, size << VTD_PAGE_SHIFT, pdev->dma_mask); + iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); if (!iova) goto error; @@ -2753,8 +2741,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne for_each_sg(sglist, sg, nelems, i) size += aligned_nrpages(sg->offset, sg->length); - iova = __intel_alloc_iova(hwdev, domain, size << VTD_PAGE_SHIFT, - pdev->dma_mask); + iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); if (!iova) { sglist->dma_length = 0; return 0; -- cgit v1.2.3 From 0d07348931daef854aca8c834a89f1a99ba4ff2b Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 24 Jun 2009 12:08:27 +0900 Subject: PCI MSI: Return if alloc_msi_entry for MSI-X failed In current code it continues setup even if alloc_msi_entry() for MSI-X is failed due to lack of memory. It means arch_setup_msi_irqs() might be called with msi_desc entries less than its argument nvec. At least x86's arch_setup_msi_irqs() uses list_for_each_entry() for dev->msi_list that suspected to have entries same numbers as nvec, and it doesn't check the number of allocated vectors and passed arg nvec. Therefore it will result in success of pci_enable_msix(), with less vectors allocated than requested. This patch fixes the error route to return -ENOMEM, instead of continuing the setup (proposed by Matthew Wilcox). Note that there is no iounmap in msi_free_irqs() if no msi_disc is allocated. Reviewed-by: Matthew Wilcox Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index d9f06fbfa0b..628c14150d4 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -439,8 +439,14 @@ static int msix_capability_init(struct pci_dev *dev, for (i = 0; i < nvec; i++) { entry = alloc_msi_entry(dev); - if (!entry) - break; + if (!entry) { + if (!i) + iounmap(base); + else + msi_free_irqs(dev); + /* No enough memory. Don't try again */ + return -ENOMEM; + } j = entries[i].entry; entry->msi_attrib.is_msix = 1; -- cgit v1.2.3 From 50e5628a4ac465a52f0d4ca6567343be029731a0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 28 Jun 2009 09:26:40 -0700 Subject: PCI ECRC: Remove unnecessary semicolons Acked-by: Andrew Patterson Signed-off-by: Joe Perches Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/ecrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/aer/ecrc.c b/drivers/pci/pcie/aer/ecrc.c index ece97df4df6..a928d8ab6bd 100644 --- a/drivers/pci/pcie/aer/ecrc.c +++ b/drivers/pci/pcie/aer/ecrc.c @@ -106,7 +106,7 @@ void pcie_set_ecrc_checking(struct pci_dev *dev) disable_ecrc_checking(dev); break; case ECRC_POLICY_ON: - enable_ecrc_checking(dev);; + enable_ecrc_checking(dev); break; default: return; -- cgit v1.2.3 From 654b75e044119bf8e7d773bce41ea039281bbfbe Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 26 Jun 2009 14:04:46 +0800 Subject: PCI: check if bus has a proper bridge device before triggering SBR For devices attached to the root bus, we can't trigger Secondary Bus Reset because there is no bridge device associated with the bus. So need to check bus->self again NULL first before using it. Reviewed-by: Kenji Kaneshige Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6c93af5ced1..d5d6f5667d8 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2171,7 +2171,7 @@ static int pci_parent_bus_reset(struct pci_dev *dev, int probe) u16 ctrl; struct pci_dev *pdev; - if (dev->subordinate) + if (pci_is_root_bus(dev->bus) || dev->subordinate || !dev->bus->self) return -ENOTTY; list_for_each_entry(pdev, &dev->bus->devices, bus_list) -- cgit v1.2.3 From 503998ca4a295f7da233689850ba4b9d13cf41e7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 24 Jun 2009 09:18:14 -0700 Subject: PCI: fix kernel-doc warnings Add documentation for missing parameters in PCI hotplug code. Signed-off-by: Randy Dunlap Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pci_hotplug_core.c | 2 ++ drivers/pci/slot.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index 844580489d4..5c5043f239c 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -555,6 +555,8 @@ static struct hotplug_slot *get_slot_from_name (const char *name) * @slot: pointer to the &struct hotplug_slot to register * @devnr: device number * @name: name registered with kobject core + * @owner: caller module owner + * @mod_name: caller module name * * Registers a hotplug slot with the pci hotplug subsystem, which will allow * userspace interaction to the slot. diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index eddb0748b0e..8c02b6c53bd 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -311,7 +311,7 @@ EXPORT_SYMBOL_GPL(pci_destroy_slot); #include /** * pci_hp_create_link - create symbolic link to the hotplug driver module. - * @slot: struct pci_slot + * @pci_slot: struct pci_slot * * Helper function for pci_hotplug_core.c to create symbolic link to * the hotplug driver module. @@ -334,7 +334,7 @@ EXPORT_SYMBOL_GPL(pci_hp_create_module_link); /** * pci_hp_remove_link - remove symbolic link to the hotplug driver module. - * @slot: struct pci_slot + * @pci_slot: struct pci_slot * * Helper function for pci_hotplug_core.c to remove symbolic link to * the hotplug driver module. -- cgit v1.2.3 From 7a661c6f1082693a7e9627e9ad2d1546a9337fdc Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 24 Jun 2009 16:02:27 +0100 Subject: PCI: More PATA quirks for not entering D3 The ALi loses some state if it goes into D3. Unfortunately even with the chipset documents I can't figure out how to restore some bits of it. The VIA one saves/restores apparently fine but the ACPI _GTM methods break on some platforms if we do this and this causes cable misdetections. These are both effectively regressions as historically nothing matched the devices and then decided not to bind to them. Nowdays something is binding to all sorts of devices and a result they get dumped into D3. Signed-off-by: Alan Cox Acked-by: Jeff Garzik Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 56552d74abe..06b96562396 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1058,6 +1058,11 @@ static void __devinit quirk_no_ata_d3(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_ANY_ID, quirk_no_ata_d3); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, PCI_ANY_ID, quirk_no_ata_d3); +/* ALi loses some register settings that we cannot then restore */ +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AL, PCI_ANY_ID, quirk_no_ata_d3); +/* VIA comes back fine but we need to keep it alive or ACPI GTM failures + occur when mode detecting */ +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_VIA, PCI_ANY_ID, quirk_no_ata_d3); /* This was originally an Alpha specific thing, but it really fits here. * The i82375 PCI/EISA bridge appears as non-classified. Fix that. -- cgit v1.2.3 From 2c21fd4b333e4c780a46edcd6d1e85bfc6cdf371 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 23 Jun 2009 17:40:04 +0900 Subject: PCI MSI: shorten PCI_MSIX_ENTRY_* symbol names These names are too long! Drop _OFFSET to save some bytes/lines. Reviewed-by: Matthew Wilcox Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 18 ++++++++---------- drivers/pci/msi.h | 10 +++++----- 2 files changed, 13 insertions(+), 15 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 628c14150d4..a088fc6f583 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -151,7 +151,7 @@ static void msix_mask_irq(struct msi_desc *desc, u32 flag) { u32 mask_bits = desc->masked; unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; + PCI_MSIX_ENTRY_VECTOR_CTRL; mask_bits &= ~1; mask_bits |= flag; writel(mask_bits, desc->mask_base + offset); @@ -188,9 +188,9 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) void __iomem *base = entry->mask_base + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; - msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET); + msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); + msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); + msg->data = readl(base + PCI_MSIX_ENTRY_DATA); } else { struct pci_dev *dev = entry->dev; int pos = entry->msi_attrib.pos; @@ -225,11 +225,9 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) base = entry->mask_base + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; - writel(msg->address_lo, - base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - writel(msg->address_hi, - base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET); + writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); + writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); + writel(msg->data, base + PCI_MSIX_ENTRY_DATA); } else { struct pci_dev *dev = entry->dev; int pos = entry->msi_attrib.pos; @@ -493,7 +491,7 @@ static int msix_capability_init(struct pci_dev *dev, set_irq_msi(entry->irq, entry); j = entries[i].entry; entry->masked = readl(base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); + PCI_MSIX_ENTRY_VECTOR_CTRL); msix_mask_irq(entry, 1); i++; } diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h index a0662842550..de27c1cb5a2 100644 --- a/drivers/pci/msi.h +++ b/drivers/pci/msi.h @@ -6,11 +6,11 @@ #ifndef MSI_H #define MSI_H -#define PCI_MSIX_ENTRY_SIZE 16 -#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0 -#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4 -#define PCI_MSIX_ENTRY_DATA_OFFSET 8 -#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12 +#define PCI_MSIX_ENTRY_SIZE 16 +#define PCI_MSIX_ENTRY_LOWER_ADDR 0 +#define PCI_MSIX_ENTRY_UPPER_ADDR 4 +#define PCI_MSIX_ENTRY_DATA 8 +#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 #define msi_control_reg(base) (base + PCI_MSI_FLAGS) #define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO) -- cgit v1.2.3 From 7ba1930db02fc3118165338ef4e562869f575583 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 23 Jun 2009 17:39:27 +0900 Subject: PCI MSI: Unmask MSI if setup failed The initial state of mask register of MSI is unmasked. We set it masked before calling arch_setup_msi_irqs(). If arch_setup_msi_irq() fails, it is better to restore the state of the mask register. Reviewed-by: Matthew Wilcox Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/pci') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index a088fc6f583..9ab4fe8f20a 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -383,6 +383,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec) /* Configure MSI capability structure */ ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); if (ret) { + msi_mask_irq(entry, mask, ~mask); msi_free_irqs(dev); return ret; } -- cgit v1.2.3 From 12abb8ba8444f7c9b355bbdd44a6d0839f7a41b6 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 24 Jun 2009 12:08:09 +0900 Subject: PCI MSI: Fix restoration of MSI/MSI-X mask states in suspend/resume There are 2 problems on mask states in suspend/resume. [1]: It is better to restore the mask states of MSI/MSI-X to initial states (MSI is unmasked, MSI-X is masked) when we release the device. The pci_msi_shutdown() does the restoration of mask states for MSI, while the msi_free_irqs() does it for MSI-X. In other words, in the "disable" path both of MSI and MSI-X are handled, but in the "shutdown" path only MSI is handled. MSI: pci_disable_msi() => pci_msi_shutdown() [ mask states for MSI restored ] => msi_set_enable(dev, pos, 0); => msi_free_irqs() MSI-X: pci_disable_msix() => pci_msix_shutdown() => msix_set_enable(dev, 0); => msix_free_all_irqs => msi_free_irqs() [ mask states for MSI-X restored ] This patch moves the masking for MSI-X from msi_free_irqs() to pci_msix_shutdown(). This change has some positive side effects: - It prevents OS from touching mask states before reading preserved bits in the register, which can be happen if msi_free_irqs() is called from error path in msix_capability_init(). - It also prevents touching the register after turning off MSI-X in "disable" path, which can be a problem on some devices. [2]: We have cache of the mask state in msi_desc, which is automatically updated when msi/msix_mask_irq() is called. This cached states are used for the resume. But since what need to be restored in the resume is the states before the shutdown on the suspend, calling msi/msix_mask_irq() from pci_msi/msix_shutdown() is not appropriate. This patch introduces __msi/msix_mask_irq() that do mask as same as msi/msix_mask_irq() but does not update cached state, for use in pci_msi/msix_shutdown(). [updated: get rid of msi/msix_mask_irq_nocache() (proposed by Matthew Wilcox)] Reviewed-by: Matthew Wilcox Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 9ab4fe8f20a..d986afb7032 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -127,17 +127,23 @@ static inline __attribute_const__ u32 msi_enabled_mask(u16 control) * reliably as devices without an INTx disable bit will then generate a * level IRQ which will never be cleared. */ -static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) +static u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) { u32 mask_bits = desc->masked; if (!desc->msi_attrib.maskbit) - return; + return 0; mask_bits &= ~mask; mask_bits |= flag; pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits); - desc->masked = mask_bits; + + return mask_bits; +} + +static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) +{ + desc->masked = __msi_mask_irq(desc, mask, flag); } /* @@ -147,7 +153,7 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) * file. This saves a few milliseconds when initialising devices with lots * of MSI-X interrupts. */ -static void msix_mask_irq(struct msi_desc *desc, u32 flag) +static u32 __msix_mask_irq(struct msi_desc *desc, u32 flag) { u32 mask_bits = desc->masked; unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + @@ -155,7 +161,13 @@ static void msix_mask_irq(struct msi_desc *desc, u32 flag) mask_bits &= ~1; mask_bits |= flag; writel(mask_bits, desc->mask_base + offset); - desc->masked = mask_bits; + + return mask_bits; +} + +static void msix_mask_irq(struct msi_desc *desc, u32 flag) +{ + desc->masked = __msix_mask_irq(desc, flag); } static void msi_set_mask_bit(unsigned irq, u32 flag) @@ -616,9 +628,11 @@ void pci_msi_shutdown(struct pci_dev *dev) pci_intx_for_msi(dev, 1); dev->msi_enabled = 0; + /* Return the device with MSI unmasked as initial states */ pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &ctrl); mask = msi_capable_mask(ctrl); - msi_mask_irq(desc, mask, ~mask); + /* Keep cached state to be restored */ + __msi_mask_irq(desc, mask, ~mask); /* Restore dev->irq to its default pin-assertion irq */ dev->irq = desc->msi_attrib.default_irq; @@ -658,7 +672,6 @@ static int msi_free_irqs(struct pci_dev* dev) list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) { if (entry->msi_attrib.is_msix) { - msix_mask_irq(entry, 1); if (list_is_last(&entry->list, &dev->msi_list)) iounmap(entry->mask_base); } @@ -746,9 +759,17 @@ static void msix_free_all_irqs(struct pci_dev *dev) void pci_msix_shutdown(struct pci_dev* dev) { + struct msi_desc *entry; + if (!pci_msi_enable || !dev || !dev->msix_enabled) return; + /* Return the device with MSI-X masked as initial states */ + list_for_each_entry(entry, &dev->msi_list, list) { + /* Keep cached states to be restored */ + __msix_mask_irq(entry, 1); + } + msix_set_enable(dev, 0); pci_intx_for_msi(dev, 1); dev->msix_enabled = 0; -- cgit v1.2.3 From e1605495c716ef4eebdb7606bcd1b593f28e2837 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 29 Jun 2009 11:17:38 +0100 Subject: intel-iommu: Introduce domain_sg_mapping() to speed up intel_map_sg() Instead of calling domain_pfn_mapping() repeatedly with single or small numbers of pages, just pass the sglist in. It can optimise the number of cache flushes like domain_pfn_mapping() does, and gives a huge speedup for large scatterlists. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 83 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 21 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 11a23201445..28bd5f2d78f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1635,6 +1635,56 @@ static int domain_context_mapped(struct pci_dev *pdev) tmp->devfn); } +static int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + struct scatterlist *sg, unsigned long nr_pages, + int prot) +{ + struct dma_pte *first_pte = NULL, *pte = NULL; + uint64_t pteval; + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + unsigned long sg_res = 0; + + BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); + + if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) + return -EINVAL; + + prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; + + while (nr_pages--) { + if (!sg_res) { + sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT; + sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; + sg->dma_length = sg->length; + pteval = page_to_phys(sg_page(sg)) | prot; + } + if (!pte) { + first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); + if (!pte) + return -ENOMEM; + } + /* We don't need lock here, nobody else + * touches the iova range + */ + BUG_ON(dma_pte_addr(pte)); + pte->val = pteval; + pte++; + if (!nr_pages || + (unsigned long)pte >> VTD_PAGE_SHIFT != + (unsigned long)first_pte >> VTD_PAGE_SHIFT) { + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); + pte = NULL; + } + iov_pfn++; + pteval += VTD_PAGE_SIZE; + sg_res--; + if (!sg_res) + sg = sg_next(sg); + } + return 0; +} + static int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long phys_pfn, unsigned long nr_pages, int prot) @@ -2758,27 +2808,18 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne prot |= DMA_PTE_WRITE; start_vpfn = mm_to_dma_pfn(iova->pfn_lo); - offset_pfn = 0; - for_each_sg(sglist, sg, nelems, i) { - int nr_pages = aligned_nrpages(sg->offset, sg->length); - ret = domain_pfn_mapping(domain, start_vpfn + offset_pfn, - page_to_dma_pfn(sg_page(sg)), - nr_pages, prot); - if (ret) { - /* clear the page */ - dma_pte_clear_range(domain, start_vpfn, - start_vpfn + offset_pfn); - /* free page tables */ - dma_pte_free_pagetable(domain, start_vpfn, - start_vpfn + offset_pfn); - /* free iova */ - __free_iova(&domain->iovad, iova); - return 0; - } - sg->dma_address = ((dma_addr_t)(start_vpfn + offset_pfn) - << VTD_PAGE_SHIFT) + sg->offset; - sg->dma_length = sg->length; - offset_pfn += nr_pages; + + ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot); + if (unlikely(ret)) { + /* clear the page */ + dma_pte_clear_range(domain, start_vpfn, + start_vpfn + size - 1); + /* free page tables */ + dma_pte_free_pagetable(domain, start_vpfn, + start_vpfn + size - 1); + /* free iova */ + __free_iova(&domain->iovad, iova); + return 0; } /* it's a non-present to present mapping. Only flush if caching mode */ -- cgit v1.2.3 From 9051aa0268dc1c3e42cd79a802b0af1f2bfcadae Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 29 Jun 2009 12:30:54 +0100 Subject: intel-iommu: Combine domain_pfn_mapping() and domain_sg_mapping() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 62 +++++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 40 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 28bd5f2d78f..14308533b1c 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1635,14 +1635,14 @@ static int domain_context_mapped(struct pci_dev *pdev) tmp->devfn); } -static int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - struct scatterlist *sg, unsigned long nr_pages, - int prot) +static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + struct scatterlist *sg, unsigned long phys_pfn, + unsigned long nr_pages, int prot) { struct dma_pte *first_pte = NULL, *pte = NULL; - uint64_t pteval; + phys_addr_t uninitialized_var(pteval); int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - unsigned long sg_res = 0; + unsigned long sg_res; BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); @@ -1651,6 +1651,13 @@ static int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; + if (sg) + sg_res = 0; + else { + sg_res = nr_pages + 1; + pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; + } + while (nr_pages--) { if (!sg_res) { sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT; @@ -1685,43 +1692,18 @@ static int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return 0; } -static int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - unsigned long phys_pfn, unsigned long nr_pages, - int prot) +static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + struct scatterlist *sg, unsigned long nr_pages, + int prot) { - struct dma_pte *first_pte = NULL, *pte = NULL; - int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - - BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); - - if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) - return -EINVAL; - - prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; + return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot); +} - while (nr_pages--) { - if (!pte) { - first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); - if (!pte) - return -ENOMEM; - } - /* We don't need lock here, nobody else - * touches the iova range - */ - BUG_ON(dma_pte_addr(pte)); - pte->val = (phys_pfn << VTD_PAGE_SHIFT) | prot; - pte++; - if (!nr_pages || - (unsigned long)pte >> VTD_PAGE_SHIFT != - (unsigned long)first_pte >> VTD_PAGE_SHIFT) { - domain_flush_cache(domain, first_pte, - (void *)pte - (void *)first_pte); - pte = NULL; - } - iov_pfn++; - phys_pfn++; - } - return 0; +static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + unsigned long phys_pfn, unsigned long nr_pages, + int prot) +{ + return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot); } static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) -- cgit v1.2.3 From 1bf20f0dc5629032ddd07617139d9fbca66c1642 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 29 Jun 2009 22:06:43 +0100 Subject: intel-iommu: dump mappings but don't die on pte already set Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 14308533b1c..40ce5a03f18 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1673,7 +1673,16 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, /* We don't need lock here, nobody else * touches the iova range */ - BUG_ON(dma_pte_addr(pte)); + if (unlikely(dma_pte_addr(pte))) { + static int dumps = 5; + printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx)\n", + iov_pfn, pte->val); + if (dumps) { + dumps--; + debug_dma_dump_mappings(NULL); + } + WARN_ON(1); + } pte->val = pteval; pte++; if (!nr_pages || -- cgit v1.2.3 From 3d7b0e4154b4963d6bd39991ec8eaa09caeb3994 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 30 Jun 2009 03:38:09 +0100 Subject: intel-iommu: Don't free too much in dma_pte_free_pagetable() The loop condition was wrong -- we should free a PMD only if its _entire_ range is within the range we're intending to clear. The early-termination condition was right, but not the loop. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 40ce5a03f18..35bdd2a06ca 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -815,7 +815,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, if (tmp + level_size(level) - 1 > last_pfn) return; - while (tmp <= last_pfn) { + while (tmp + level_size(level) - 1 <= last_pfn) { pte = dma_pfn_level_pte(domain, tmp, level); if (pte) { free_pgtable_page( -- cgit v1.2.3 From f3a0a52fff4dbfdea2dccc908d00c038481d888e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 30 Jun 2009 03:40:07 +0100 Subject: intel-iommu: Performance improvement for dma_pte_free_pagetable() As with other functions, batch the CPU data cache flushes and don't keep recalculating PTE addresses. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 35bdd2a06ca..ec7e032d5ab 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -797,7 +797,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - struct dma_pte *pte; + struct dma_pte *first_pte, *pte; int total = agaw_to_level(domain->agaw); int level; unsigned long tmp; @@ -805,25 +805,32 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); - /* we don't need lock here, nobody else touches the iova range */ + /* We don't need lock here; nobody else touches the iova range */ level = 2; while (level <= total) { tmp = align_to_level(start_pfn, level); - /* Only clear this pte/pmd if we're asked to clear its - _whole_ range */ + /* If we can't even clear one PTE at this level, we're done */ if (tmp + level_size(level) - 1 > last_pfn) return; while (tmp + level_size(level) - 1 <= last_pfn) { - pte = dma_pfn_level_pte(domain, tmp, level); - if (pte) { - free_pgtable_page( - phys_to_virt(dma_pte_addr(pte))); + first_pte = pte = dma_pfn_level_pte(domain, tmp, level); + if (!pte) { + tmp = align_to_level(tmp + 1, level + 1); + continue; + } + while (tmp + level_size(level) - 1 <= last_pfn && + (unsigned long)pte >> VTD_PAGE_SHIFT == + (unsigned long)first_pte >> VTD_PAGE_SHIFT) { + free_pgtable_page(phys_to_virt(dma_pte_addr(pte))); dma_clear_pte(pte); - domain_flush_cache(domain, pte, sizeof(*pte)); + pte++; + tmp += level_size(level); } - tmp += level_size(level); + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); + } level++; } -- cgit v1.2.3 From a15a519ed6e5e644f5a33c213c00b0c1d3cfe683 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 1 Jul 2009 18:49:06 +0100 Subject: Fix iommu address space allocation This fixes kernel.org bug #13584. The IOVA code attempted to optimise the insertion of new ranges into the rbtree, with the unfortunate result that some ranges just didn't get inserted into the tree at all. Then those ranges would be handed out more than once, and things kind of go downhill from there. Introduced after 2.6.25 by ddf02886cbe665d67ca750750196ea5bf524b10b ("PCI: iova RB tree setup tweak"). Signed-off-by: David Woodhouse Cc: mark gross Cc: Andrew Morton Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- drivers/pci/iova.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 2287116e982..46dd440e231 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c @@ -1,9 +1,19 @@ /* - * Copyright (c) 2006, Intel Corporation. + * Copyright © 2006-2009, Intel Corporation. * - * This file is released under the GPLv2. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. * - * Copyright (C) 2006-2008 Intel Corporation * Author: Anil S Keshavamurthy */ @@ -123,7 +133,15 @@ move_left: /* Insert the new_iova into domain rbtree by holding writer lock */ /* Add new node and rebalance tree. */ { - struct rb_node **entry = &((prev)), *parent = NULL; + struct rb_node **entry, *parent = NULL; + + /* If we have 'prev', it's a valid place to start the + insertion. Otherwise, start from the root. */ + if (prev) + entry = &prev; + else + entry = &iovad->rbroot.rb_node; + /* Figure out where to put new node */ while (*entry) { struct iova *this = container_of(*entry, -- cgit v1.2.3 From c85994e4771025ef2a66533eb1a4c6c2217b9cda Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 1 Jul 2009 19:21:24 +0100 Subject: intel-iommu: Ensure that PTE writes are 64-bit atomic, even on i386 Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ec7e032d5ab..eea1006c860 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -222,7 +222,12 @@ static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot) static inline u64 dma_pte_addr(struct dma_pte *pte) { - return (pte->val & VTD_PAGE_MASK); +#ifdef CONFIG_64BIT + return pte->val & VTD_PAGE_MASK; +#else + /* Must have a full atomic 64-bit read */ + return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK; +#endif } static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn) @@ -712,6 +717,8 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, break; if (!dma_pte_present(pte)) { + uint64_t pteval; + tmp_page = alloc_pgtable_page(); if (!tmp_page) { @@ -719,15 +726,15 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, flags); return NULL; } - domain_flush_cache(domain, tmp_page, PAGE_SIZE); - dma_set_pte_pfn(pte, virt_to_dma_pfn(tmp_page)); - /* - * high level table always sets r/w, last level page - * table control read/write - */ - dma_set_pte_readable(pte); - dma_set_pte_writable(pte); - domain_flush_cache(domain, pte, sizeof(*pte)); + domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); + pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; + if (cmpxchg64(&pte->val, 0ULL, pteval)) { + /* Someone else set it while we were thinking; use theirs. */ + free_pgtable_page(tmp_page); + } else { + dma_pte_addr(pte); + domain_flush_cache(domain, pte, sizeof(*pte)); + } } parent = phys_to_virt(dma_pte_addr(pte)); level--; @@ -1666,6 +1673,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, } while (nr_pages--) { + uint64_t tmp; + if (!sg_res) { sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT; sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; @@ -1680,17 +1689,17 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, /* We don't need lock here, nobody else * touches the iova range */ - if (unlikely(dma_pte_addr(pte))) { + tmp = cmpxchg64(&pte->val, 0ULL, pteval); + if (tmp) { static int dumps = 5; - printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx)\n", - iov_pfn, pte->val); + printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n", + iov_pfn, tmp, (unsigned long long)pteval); if (dumps) { dumps--; debug_dma_dump_mappings(NULL); } WARN_ON(1); } - pte->val = pteval; pte++; if (!nr_pages || (unsigned long)pte >> VTD_PAGE_SHIFT != -- cgit v1.2.3 From 206a73c102fc480ba072a9388bc2142c303113aa Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 1 Jul 2009 19:30:28 +0100 Subject: intel-iommu: Kill superfluous mapping_lock Since we're using cmpxchg64() anyway (because that's the only way to do an atomic 64-bit store on i386), we might as well ditch the extra locking and just use cmpxchg64() to ensure that we don't add the page twice. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index eea1006c860..02223e2e27d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -267,7 +267,6 @@ struct dmar_domain { struct iova_domain iovad; /* iova's that belong to this domain */ struct dma_pte *pgd; /* virtual address */ - spinlock_t mapping_lock; /* page table lock */ int gaw; /* max guest address width */ /* adjusted guest address width, 0 is level 2 30-bit */ @@ -701,13 +700,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(domain->agaw); int offset; - unsigned long flags; BUG_ON(!domain->pgd); BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); parent = domain->pgd; - spin_lock_irqsave(&domain->mapping_lock, flags); while (level > 0) { void *tmp_page; @@ -721,11 +718,9 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, tmp_page = alloc_pgtable_page(); - if (!tmp_page) { - spin_unlock_irqrestore(&domain->mapping_lock, - flags); + if (!tmp_page) return NULL; - } + domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; if (cmpxchg64(&pte->val, 0ULL, pteval)) { @@ -740,7 +735,6 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, level--; } - spin_unlock_irqrestore(&domain->mapping_lock, flags); return pte; } @@ -1375,7 +1369,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width) unsigned long sagaw; init_iova_domain(&domain->iovad, DMA_32BIT_PFN); - spin_lock_init(&domain->mapping_lock); spin_lock_init(&domain->iommu_lock); domain_reserve_special_ranges(domain); @@ -3336,7 +3329,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) int adjust_width; init_iova_domain(&domain->iovad, DMA_32BIT_PFN); - spin_lock_init(&domain->mapping_lock); spin_lock_init(&domain->iommu_lock); domain_reserve_special_ranges(domain); -- cgit v1.2.3 From 85b98276f2ffa66b25ae6328b00bfadfd74b74e7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 1 Jul 2009 19:27:53 +0100 Subject: intel-iommu: Warn about unmatched unmap requests This would have found the bug in i386 pci_unmap_addr() a long time ago. We shouldn't just silently return without doing anything. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 02223e2e27d..2bbc3fc88dc 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2640,7 +2640,8 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, iommu = domain_get_iommu(domain); iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); - if (!iova) + if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n", + (unsigned long long)dev_addr)) return; start_pfn = mm_to_dma_pfn(iova->pfn_lo); @@ -2730,7 +2731,8 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, iommu = domain_get_iommu(domain); iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); - if (!iova) + if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n", + (unsigned long long)sglist[0].dma_address)) return; start_pfn = mm_to_dma_pfn(iova->pfn_lo); -- cgit v1.2.3 From 7766a3fb905f0b078b05f5d6a6be8df4c64b9f51 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 1 Jul 2009 20:27:03 +0100 Subject: intel-iommu: Use cmpxchg64_local() for setting PTEs Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 2bbc3fc88dc..2c1b2babfdc 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1682,7 +1682,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, /* We don't need lock here, nobody else * touches the iova range */ - tmp = cmpxchg64(&pte->val, 0ULL, pteval); + tmp = cmpxchg64_local(&pte->val, 0ULL, pteval); if (tmp) { static int dumps = 5; printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n", -- cgit v1.2.3 From 46b952a3c3a94afa339bd4961a4f3d1482436599 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 1 Jul 2009 14:24:30 -0700 Subject: PCI: Fix IRQ swizzling for ARI-enabled devices For many purposes, including interrupt-swizzling, devices with ARI enabled behave as if they have one device (number 0) and 256 functions. This probably hasn't bitten us in practice because all ARI devices I've seen are also IOV devices, and IOV devices are required to use MSI. This isn't guaranteed, and there are legitimate reasons to use ARI without IOV, and hence potentially use pin-based interrupts. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d5d6f5667d8..dbd0f947f49 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1517,11 +1517,20 @@ void pci_enable_ari(struct pci_dev *dev) * * Perform INTx swizzling for a device behind one level of bridge. This is * required by section 9.1 of the PCI-to-PCI bridge specification for devices - * behind bridges on add-in cards. + * behind bridges on add-in cards. For devices with ARI enabled, the slot + * number is always 0 (see the Implementation Note in section 2.2.8.1 of + * the PCI Express Base Specification, Revision 2.1) */ u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin) { - return (((pin - 1) + PCI_SLOT(dev->devfn)) % 4) + 1; + int slot; + + if (pci_ari_enabled(dev->bus)) + slot = 0; + else + slot = PCI_SLOT(dev->devfn); + + return (((pin - 1) + slot) % 4) + 1; } int -- cgit v1.2.3 From 75e6bf9638992dfc0fec9c3ca10444c8e0d6a638 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 2 Jul 2009 11:21:16 +0100 Subject: intel-iommu: Introduce first_pte_in_page() to simplify PTE-setting loops On Wed, 2009-07-01 at 16:59 -0700, Linus Torvalds wrote: > I also _really_ hate how you do > > (unsigned long)pte >> VTD_PAGE_SHIFT == > (unsigned long)first_pte >> VTD_PAGE_SHIFT Kill this, in favour of just looking to see if the incremented pte pointer has 'wrapped' onto the next page. Which means we have to check it _after_ incrementing it, not before. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 2c1b2babfdc..dcf0295a9b6 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -240,6 +240,11 @@ static inline bool dma_pte_present(struct dma_pte *pte) return (pte->val & 3) != 0; } +static inline int first_pte_in_page(struct dma_pte *pte) +{ + return !((unsigned long)pte & ~VTD_PAGE_MASK); +} + /* * This domain is a statically identity mapping domain. * 1. This domain creats a static 1:1 mapping to all usable memory. @@ -780,13 +785,12 @@ static void dma_pte_clear_range(struct dmar_domain *domain, start_pfn = align_to_level(start_pfn + 1, 2); continue; } - while (start_pfn <= last_pfn && - (unsigned long)pte >> VTD_PAGE_SHIFT == - (unsigned long)first_pte >> VTD_PAGE_SHIFT) { + do { dma_clear_pte(pte); start_pfn++; pte++; - } + } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); + domain_flush_cache(domain, first_pte, (void *)pte - (void *)first_pte); } @@ -821,14 +825,14 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, tmp = align_to_level(tmp + 1, level + 1); continue; } - while (tmp + level_size(level) - 1 <= last_pfn && - (unsigned long)pte >> VTD_PAGE_SHIFT == - (unsigned long)first_pte >> VTD_PAGE_SHIFT) { + do { free_pgtable_page(phys_to_virt(dma_pte_addr(pte))); dma_clear_pte(pte); pte++; tmp += level_size(level); - } + } while (!first_pte_in_page(pte) && + tmp + level_size(level) - 1 <= last_pfn); + domain_flush_cache(domain, first_pte, (void *)pte - (void *)first_pte); @@ -1694,9 +1698,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, WARN_ON(1); } pte++; - if (!nr_pages || - (unsigned long)pte >> VTD_PAGE_SHIFT != - (unsigned long)first_pte >> VTD_PAGE_SHIFT) { + if (!nr_pages || first_pte_in_page(pte)) { domain_flush_cache(domain, first_pte, (void *)pte - (void *)first_pte); pte = NULL; -- cgit v1.2.3 From 6a43e574c5af7d9bd084992b1c9c3cdbc3b6c0e9 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 2 Jul 2009 12:02:34 +0100 Subject: intel-iommu: Don't keep freeing page zero in dma_pte_free_pagetable() Check dma_pte_present() and only free the page if there _is_ one. Kind of surprising that there was no warning about this. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index dcf0295a9b6..53075424a43 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -826,8 +826,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, continue; } do { - free_pgtable_page(phys_to_virt(dma_pte_addr(pte))); - dma_clear_pte(pte); + if (dma_pte_present(pte)) { + free_pgtable_page(phys_to_virt(dma_pte_addr(pte))); + dma_clear_pte(pte); + } pte++; tmp += level_size(level); } while (!first_pte_in_page(pte) && -- cgit v1.2.3 From 5a5e02a614e59db7536cd11029e6674adc41b191 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 4 Jul 2009 09:35:44 +0100 Subject: intel-iommu: Fix dma vs. mm page confusion with aligned_nrpages() The aligned_nrpages() function rounds up to the next VM page, but returns its result as a number of DMA pages. Purely theoretical except on IA64, which doesn't boot with VT-d right now anyway. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 53075424a43..ad85e95d2dc 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2368,15 +2368,15 @@ error: return ret; } +/* Returns a number of VTD pages, but aligned to MM page size */ static inline unsigned long aligned_nrpages(unsigned long host_addr, size_t size) { host_addr &= ~PAGE_MASK; - host_addr += size + PAGE_SIZE - 1; - - return host_addr >> VTD_PAGE_SHIFT; + return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; } +/* This takes a number of _MM_ pages, not VTD pages */ static struct iova *intel_alloc_iova(struct device *dev, struct dmar_domain *domain, unsigned long nrpages, uint64_t dma_mask) @@ -2506,7 +2506,8 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, iommu = domain_get_iommu(domain); size = aligned_nrpages(paddr, size); - iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); + iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), + pdev->dma_mask); if (!iova) goto error; @@ -2797,7 +2798,8 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne for_each_sg(sglist, sg, nelems, i) size += aligned_nrpages(sg->offset, sg->length); - iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); + iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), + pdev->dma_mask); if (!iova) { sglist->dma_length = 0; return 0; -- cgit v1.2.3 From 1e4c64c46d413de84cc0b786bd6a9b555ba7d111 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 4 Jul 2009 10:40:38 +0100 Subject: intel-iommu: Don't set identity mapping for bypassed graphics devices We should check iommu_dummy() _first_, because that means it's attached to an iommu that we've just disabled completely. At the moment, we might try to put the device into the identity mapping domain. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ad85e95d2dc..3e3910127fc 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2447,8 +2447,11 @@ static int iommu_no_mapping(struct pci_dev *pdev) { int found; + if (iommu_dummy(pdev)) + return 1; + if (!iommu_identity_mapping) - return iommu_dummy(pdev); + return 0; found = identity_mapping(pdev); if (found) { @@ -2480,7 +2483,7 @@ static int iommu_no_mapping(struct pci_dev *pdev) } } - return iommu_dummy(pdev); + return 0; } static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, -- cgit v1.2.3 From 1b7bc0a1618b4de1e6f55c6d95b790f4ab6fcd9e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 4 Jul 2009 10:49:46 +0100 Subject: intel-iommu: Fix reattaching of devices to identity mapping domain When we reattach a device to the si_domain (because it's been removed from a VM), we weren't calling domain_context_mapping() to actually tell the hardware about that. We should really put the call to domain_context_mapping() into domain_add_dev_info() -- we never call the latter without also doing the former, and we can keep the error paths simple that way. But that's a cleanup which can wait for 2.6.32 now. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 3e3910127fc..73a5c71dd37 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2475,6 +2475,9 @@ static int iommu_no_mapping(struct pci_dev *pdev) if (pdev->dma_mask > DMA_BIT_MASK(32)) { int ret; ret = domain_add_dev_info(si_domain, pdev); + if (ret) + return 0; + ret = domain_context_mapping(si_domain, pdev, CONTEXT_TT_MULTI_LEVEL); if (!ret) { printk(KERN_INFO "64bit %s uses identity mapping\n", pci_name(pdev)); -- cgit v1.2.3 From 40e4aa34324bff3938a900014254f88943d05e15 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 4 Jul 2009 10:55:41 +0100 Subject: intel-iommu: Add iommu_should_identity_map() function We do this twice, and it's about to get more complicated. This makes the code slightly clearer about what it's doing, too. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 73a5c71dd37..ae5ccdf8b19 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2442,6 +2442,11 @@ static int iommu_dummy(struct pci_dev *pdev) return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; } +static int iommu_should_identity_map(struct pci_dev *pdev) +{ + return pdev->dma_mask > DMA_BIT_MASK(32); +} + /* Check if the pdev needs to go through non-identity map and unmap process.*/ static int iommu_no_mapping(struct pci_dev *pdev) { @@ -2455,7 +2460,7 @@ static int iommu_no_mapping(struct pci_dev *pdev) found = identity_mapping(pdev); if (found) { - if (pdev->dma_mask > DMA_BIT_MASK(32)) + if (iommu_should_identity_map(pdev)) return 1; else { /* @@ -2472,7 +2477,7 @@ static int iommu_no_mapping(struct pci_dev *pdev) * In case of a detached 64 bit DMA device from vm, the device * is put into si_domain for identity mapping. */ - if (pdev->dma_mask > DMA_BIT_MASK(32)) { + if (iommu_should_identity_map(pdev)) { int ret; ret = domain_add_dev_info(si_domain, pdev); if (ret) -- cgit v1.2.3 From 62edf5dc4a524e4cb525e6020b955a1ad593d9ba Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 4 Jul 2009 10:59:46 +0100 Subject: intel-iommu: Restore DMAR_BROKEN_GFX_WA option for broken graphics drivers We need to give people a little more time to fix the broken drivers. Re-introduce this, but tied in properly with the 'iommu=pt' support this time. Change the config option name and make it default to 'no' too. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ae5ccdf8b19..5ee8305257e 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2127,16 +2127,18 @@ static int iommu_prepare_static_identity_mapping(void) return -EFAULT; for_each_pci_dev(pdev) { - printk(KERN_INFO "IOMMU: identity mapping for device %s\n", - pci_name(pdev)); + if (iommu_identity_mapping == 1 || IS_GFX_DEVICE(pdev)) { + printk(KERN_INFO "IOMMU: identity mapping for device %s\n", + pci_name(pdev)); - ret = domain_context_mapping(si_domain, pdev, - CONTEXT_TT_MULTI_LEVEL); - if (ret) - return ret; - ret = domain_add_dev_info(si_domain, pdev); - if (ret) - return ret; + ret = domain_context_mapping(si_domain, pdev, + CONTEXT_TT_MULTI_LEVEL); + if (ret) + return ret; + ret = domain_add_dev_info(si_domain, pdev); + if (ret) + return ret; + } } return 0; @@ -2291,6 +2293,10 @@ int __init init_dmars(void) * identity mapping if iommu_identity_mapping is set. */ if (!iommu_pass_through) { +#ifdef CONFIG_DMAR_BROKEN_GFX_WA + if (!iommu_identity_mapping) + iommu_identity_mapping = 2; +#endif if (iommu_identity_mapping) iommu_prepare_static_identity_mapping(); /* @@ -2444,7 +2450,10 @@ static int iommu_dummy(struct pci_dev *pdev) static int iommu_should_identity_map(struct pci_dev *pdev) { - return pdev->dma_mask > DMA_BIT_MASK(32); + if (iommu_identity_mapping == 2) + return IS_GFX_DEVICE(pdev); + else + return pdev->dma_mask > DMA_BIT_MASK(32); } /* Check if the pdev needs to go through non-identity map and unmap process.*/ -- cgit v1.2.3 From 736768325efcbee7b0861d62670d01a54c2d158b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 4 Jul 2009 14:08:36 +0100 Subject: intel-iommu: No mapping for non-PCI devices This should fix kernel.org bug #11821, where the dcdbas driver makes up a platform device and then uses dma_alloc_coherent() on it, in an attempt to get memory < 4GiB. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 5ee8305257e..fa052dd8991 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2457,10 +2457,15 @@ static int iommu_should_identity_map(struct pci_dev *pdev) } /* Check if the pdev needs to go through non-identity map and unmap process.*/ -static int iommu_no_mapping(struct pci_dev *pdev) +static int iommu_no_mapping(struct device *dev) { + struct pci_dev *pdev; int found; + if (unlikely(dev->bus != &pci_bus_type)) + return 1; + + pdev = to_pci_dev(dev); if (iommu_dummy(pdev)) return 1; @@ -2516,7 +2521,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); - if (iommu_no_mapping(pdev)) + if (iommu_no_mapping(hwdev)) return paddr; domain = get_valid_domain_for_dev(pdev); @@ -2656,7 +2661,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, struct iova *iova; struct intel_iommu *iommu; - if (iommu_no_mapping(pdev)) + if (iommu_no_mapping(dev)) return; domain = find_domain(pdev); @@ -2747,7 +2752,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, struct iova *iova; struct intel_iommu *iommu; - if (iommu_no_mapping(pdev)) + if (iommu_no_mapping(hwdev)) return; domain = find_domain(pdev); @@ -2806,7 +2811,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); - if (iommu_no_mapping(pdev)) + if (iommu_no_mapping(hwdev)) return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); domain = get_valid_domain_for_dev(pdev); -- cgit v1.2.3 From 6941af2810c6fc970b88f7c0d52ba4e286acbee5 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 4 Jul 2009 18:24:27 +0100 Subject: intel-iommu: Use iommu_should_identity_map() at startup time too. At boot time, the dma_mask won't have been set on any devices, so we assume that all devices will be 64-bit capable (and thus get a 1:1 map). Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index fa052dd8991..f9fc4f3bfa3 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2117,6 +2117,17 @@ static int domain_add_dev_info(struct dmar_domain *domain, return 0; } +static int iommu_should_identity_map(struct pci_dev *pdev, int startup) +{ + if (iommu_identity_mapping == 2) + return IS_GFX_DEVICE(pdev); + + if (!startup) + return pdev->dma_mask > DMA_BIT_MASK(32); + + return 1; +} + static int iommu_prepare_static_identity_mapping(void) { struct pci_dev *pdev = NULL; @@ -2127,7 +2138,7 @@ static int iommu_prepare_static_identity_mapping(void) return -EFAULT; for_each_pci_dev(pdev) { - if (iommu_identity_mapping == 1 || IS_GFX_DEVICE(pdev)) { + if (iommu_should_identity_map(pdev, 1)) { printk(KERN_INFO "IOMMU: identity mapping for device %s\n", pci_name(pdev)); @@ -2448,14 +2459,6 @@ static int iommu_dummy(struct pci_dev *pdev) return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; } -static int iommu_should_identity_map(struct pci_dev *pdev) -{ - if (iommu_identity_mapping == 2) - return IS_GFX_DEVICE(pdev); - else - return pdev->dma_mask > DMA_BIT_MASK(32); -} - /* Check if the pdev needs to go through non-identity map and unmap process.*/ static int iommu_no_mapping(struct device *dev) { @@ -2474,7 +2477,7 @@ static int iommu_no_mapping(struct device *dev) found = identity_mapping(pdev); if (found) { - if (iommu_should_identity_map(pdev)) + if (iommu_should_identity_map(pdev, 0)) return 1; else { /* @@ -2491,7 +2494,7 @@ static int iommu_no_mapping(struct device *dev) * In case of a detached 64 bit DMA device from vm, the device * is put into si_domain for identity mapping. */ - if (iommu_should_identity_map(pdev)) { + if (iommu_should_identity_map(pdev, 0)) { int ret; ret = domain_add_dev_info(si_domain, pdev); if (ret) -- cgit v1.2.3 From 3dfc813d94bba2046c6aed216e0fd69ac93a8e03 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 4 Jul 2009 19:11:08 +0100 Subject: intel-iommu: Don't use identity mapping for PCI devices behind bridges Our current strategy for pass-through mode is to put all devices into the 1:1 domain at startup (which is before we know what their dma_mask will be), and only _later_ take them out of that domain, if it turns out that they really can't address all of memory. However, when there are a bunch of PCI devices behind a bridge, they all end up with the same source-id on their DMA transactions, and hence in the same IOMMU domain. This means that we _can't_ easily move them from the 1:1 domain into their own domain at runtime, because there might be DMA in-flight from their siblings. So we have to adjust our pass-through strategy: For PCI devices not on the root bus, and for the bridges which will take responsibility for their transactions, we have to start up _out_ of the 1:1 domain, just in case. This fixes the BUG() we see when we have 32-bit-capable devices behind a PCI-PCI bridge, and use the software identity mapping. It does mean that we might end up using 'normal' mapping mode for some devices which could actually live with the faster 1:1 mapping -- but this is only for PCI devices behind bridges, which presumably aren't the devices for which people are most concerned about performance. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f9fc4f3bfa3..360fb67a30d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2122,6 +2122,36 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) if (iommu_identity_mapping == 2) return IS_GFX_DEVICE(pdev); + /* + * We want to start off with all devices in the 1:1 domain, and + * take them out later if we find they can't access all of memory. + * + * However, we can't do this for PCI devices behind bridges, + * because all PCI devices behind the same bridge will end up + * with the same source-id on their transactions. + * + * Practically speaking, we can't change things around for these + * devices at run-time, because we can't be sure there'll be no + * DMA transactions in flight for any of their siblings. + * + * So PCI devices (unless they're on the root bus) as well as + * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of + * the 1:1 domain, just in _case_ one of their siblings turns out + * not to be able to map all of memory. + */ + if (!pdev->is_pcie) { + if (!pci_is_root_bus(pdev->bus)) + return 0; + if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) + return 0; + } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) + return 0; + + /* + * At boot time, we don't yet know if devices will be 64-bit capable. + * Assume that they will -- if they turn out not to be, then we can + * take them out of the 1:1 domain later. + */ if (!startup) return pdev->dma_mask > DMA_BIT_MASK(32); -- cgit v1.2.3 From 4b99d3527041bec1abe9903263a3a1b7d2ba0c4a Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 8 Jul 2009 11:52:52 +0100 Subject: intel-iommu: Fix intel_iommu_unmap_range() with size 0 After some API change, intel_iommu_unmap_range() introduced a assumption that parameter size != 0, otherwise the dma_pte_clean_range() would have a overflowed argument. But the user like KVM don't have this assumption before, then some BUG() triggered. Fix it by ignoring size = 0. Signed-off-by: Sheng Yang Signed-off-by: David Woodhouse Signed-off-by: Linus Torvalds --- drivers/pci/intel-iommu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 360fb67a30d..ebc9b8dca88 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3600,6 +3600,9 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain, { struct dmar_domain *dmar_domain = domain->priv; + if (!size) + return; + dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, (iova + size - 1) >> VTD_PAGE_SHIFT); -- cgit v1.2.3 From 405f55712dfe464b3240d7816cc4fe4174831be2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 11 Jul 2009 22:08:37 +0400 Subject: headers: smp_lock.h redux * Remove smp_lock.h from files which don't need it (including some headers!) * Add smp_lock.h to files which do need it * Make smp_lock.h include conditional in hardirq.h It's needed only for one kernel_locked() usage which is under CONFIG_PREEMPT This will make hardirq.h inclusion cheaper for every PREEMPT=n config (which includes allmodconfig/allyesconfig, BTW) Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- drivers/pci/hotplug/cpci_hotplug_core.c | 1 - drivers/pci/hotplug/cpqphp_ctrl.c | 1 - drivers/pci/hotplug/cpqphp_sysfs.c | 1 + drivers/pci/hotplug/pciehp_ctrl.c | 1 - drivers/pci/syscall.c | 1 - 5 files changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c index a5b9f6ae507..d703e73fffa 100644 --- a/drivers/pci/hotplug/cpci_hotplug_core.c +++ b/drivers/pci/hotplug/cpci_hotplug_core.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c index 2fa47af992a..0ff689afa75 100644 --- a/drivers/pci/hotplug/cpqphp_ctrl.c +++ b/drivers/pci/hotplug/cpqphp_ctrl.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/hotplug/cpqphp_sysfs.c b/drivers/pci/hotplug/cpqphp_sysfs.c index 8450f4a6568..e6089bdb6e5 100644 --- a/drivers/pci/hotplug/cpqphp_sysfs.c +++ b/drivers/pci/hotplug/cpqphp_sysfs.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "cpqphp.h" diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index ff4034502d2..8aab8edf123 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include "../pci.h" diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c index ec22284eed3..e1c1ec54089 100644 --- a/drivers/pci/syscall.c +++ b/drivers/pci/syscall.c @@ -9,7 +9,6 @@ #include #include -#include #include #include #include "pci.h" -- cgit v1.2.3 From 79896cf42f6a96d7e14f2dc3473443d68d74031d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Aug 2009 14:04:19 -0700 Subject: Make pci_claim_resource() use request_resource() rather than insert_resource() This function has traditionally used "insert_resource()", because before commit cebd78a8c5 ("Fix pci_claim_resource") it used to just insert the resource into whatever root resource tree that was indicated by "pcibios_select_root()". So there Matthew fixed it to actually look up the proper parent resource, which means that now it's actively wrong to then traverse the resource tree any more: we already know exactly where the new resource should go. And when we then did commit a76117dfd6 ("x86: Use pci_claim_resource"), which changed the x86 PCI code from the open-coded pr = pci_find_parent_resource(dev, r); if (!pr || request_resource(pr, r) < 0) { to using if (pci_claim_resource(dev, idx) < 0) { that "insert_resource()" now suddenly became a problem, and causes a regression covered by http://bugzilla.kernel.org/show_bug.cgi?id=13891 which this fixes. Reported-and-tested-by: Rafael J. Wysocki Cc: Matthew Wilcox Cc: Andrew Patterson Cc: Linux PCI Signed-off-by: Linus Torvalds --- drivers/pci/setup-res.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index b711fb7181e..1898c7b4790 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -100,16 +100,16 @@ int pci_claim_resource(struct pci_dev *dev, int resource) { struct resource *res = &dev->resource[resource]; struct resource *root; - char *dtype = resource < PCI_BRIDGE_RESOURCES ? "device" : "bridge"; int err; root = pci_find_parent_resource(dev, res); err = -EINVAL; if (root != NULL) - err = insert_resource(root, res); + err = request_resource(root, res); if (err) { + const char *dtype = resource < PCI_BRIDGE_RESOURCES ? "device" : "bridge"; dev_err(&dev->dev, "BAR %d: %s of %s %pR\n", resource, root ? "address space collision on" : -- cgit v1.2.3 From f532959b77e5e567c84c914cb7c7b07d2582448b Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 4 Aug 2009 15:09:37 -0700 Subject: intel-iommu: Correct sglist size calculation. In domain_sg_mapping(), use aligned_nrpages() instead of hand-coded rounding code for calculating the size of each sg elem. This means that on IA64 we correctly round up to the MM page size, not just to the VT-d page size. Also remove the incorrect mm_to_dma_pfn() when intel_map_sg() calls domain_sg_mapping() -- the 'size' variable is in VT-d pages already. Signed-off-by: Fenghua Yu Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ebc9b8dca88..11b317a78b4 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1648,6 +1648,14 @@ static int domain_context_mapped(struct pci_dev *pdev) tmp->devfn); } +/* Returns a number of VTD pages, but aligned to MM page size */ +static inline unsigned long aligned_nrpages(unsigned long host_addr, + size_t size) +{ + host_addr &= ~PAGE_MASK; + return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; +} + static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, struct scatterlist *sg, unsigned long phys_pfn, unsigned long nr_pages, int prot) @@ -1675,7 +1683,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, uint64_t tmp; if (!sg_res) { - sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT; + sg_res = aligned_nrpages(sg->offset, sg->length); sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; sg->dma_length = sg->length; pteval = page_to_phys(sg_page(sg)) | prot; @@ -2415,14 +2423,6 @@ error: return ret; } -/* Returns a number of VTD pages, but aligned to MM page size */ -static inline unsigned long aligned_nrpages(unsigned long host_addr, - size_t size) -{ - host_addr &= ~PAGE_MASK; - return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; -} - /* This takes a number of _MM_ pages, not VTD pages */ static struct iova *intel_alloc_iova(struct device *dev, struct dmar_domain *domain, @@ -2875,7 +2875,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne start_vpfn = mm_to_dma_pfn(iova->pfn_lo); - ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot); + ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot); if (unlikely(ret)) { /* clear the page */ dma_pte_clear_range(domain, start_vpfn, -- cgit v1.2.3 From 33041ec049d39a6e0463c7edc7b6f631d24559e3 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 4 Aug 2009 15:10:59 -0700 Subject: intel-iommu: Mask physical address to correct page size in intel_map_single() The physical address passed to domain_pfn_mapping() should be rounded down to the start of the MM page, not the VT-d page. This issue causes kernel panic on PAGE_SIZE>VTD_PAGE_SIZE platforms e.g. ia64 platforms. Signed-off-by: Fenghua Yu Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 11b317a78b4..af7ff9b5aed 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2551,6 +2551,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, int prot = 0; int ret; struct intel_iommu *iommu; + unsigned long paddr_pfn = paddr >> PAGE_SHIFT; BUG_ON(dir == DMA_NONE); @@ -2585,7 +2586,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, * is not a big problem */ ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo), - paddr >> VTD_PAGE_SHIFT, size, prot); + mm_to_dma_pfn(paddr_pfn), size, prot); if (ret) goto error; -- cgit v1.2.3 From c5b1525533c484238015c48c78f86d49a1bfb86b Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 6 Aug 2009 13:31:56 +0800 Subject: intel-iommu: Fix enabling snooping feature by mistake Two defects work together result in KVM device passthrough randomly can't work: 1. iommu_snooping is not initialized to zero when vm_iommu_init() called. So it is possible to get a random value. 2. One line added by commit 2c2e2c38("IOMMU Identity Mapping Support") change the code path, let it bypass domain_update_iommu_cap(), as well as missing the increment of domain iommu reference count. The latter is also likely to cause a leak of domains on repeated VMM assignment and deassignment. Signed-off-by: Sheng Yang Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index af7ff9b5aed..2314ad7ee5f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1505,7 +1505,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, } set_bit(num, iommu->domain_ids); - set_bit(iommu->seq_id, &domain->iommu_bmp); iommu->domains[num] = domain; id = num; } @@ -3409,6 +3408,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_count = 0; domain->iommu_coherency = 0; + domain->iommu_snooping = 0; domain->max_addr = 0; /* always allocate the top pgd */ -- cgit v1.2.3