From 42d7c5e353cef9062129b0de3ec9ddf10567b9ca Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Wed, 8 Apr 2009 09:09:21 -0500 Subject: swiotlb: change swiotlb_bus_to[phys,virt] prototypes Add a hwdev argument that is needed on some architectures in order to access a per-device offset that is taken into account when producing a physical address (also needed to get from bus address to virtual address because the physical address is an intermediate step). Also make swiotlb_bus_to_virt weak so architectures can override it. Signed-off-by: Becky Bruce Acked-by: FUJITA Tomonori Signed-off-by: Kumar Gala Cc: jeremy@goop.org Cc: ian.campbell@citrix.com LKML-Reference: <1239199761-22886-8-git-send-email-galak@kernel.crashing.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-swiotlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 34f12e9996e..887388a1c57 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -28,7 +28,7 @@ dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) return paddr; } -phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) +phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) { return baddr; } -- cgit v1.2.3 From 7e05575c422d45f393c2d9b5900e97a30bf69bea Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 14 Apr 2009 12:12:29 +0900 Subject: x86: calgary: remove IOMMU_DEBUG CONFIG_IOMMU_DEBUG has depends on CONFIG_GART_IOMMU: config IOMMU_DEBUG bool "Enable IOMMU debugging" depends on GART_IOMMU && DEBUG_KERNEL depends on X86_64 So it's not useful to have CONFIG_IOMMU_DEBUG in Calgary IOMMU code, which does the extra checking of the bitmap space management. And Calgary uses the iommu helper for the bitmap space management now so it would be better to have the extra checking feature in the iommu helper rather than Calgary code (if necessary). Signed-off-by: FUJITA Tomonori Acked-by: Muli Ben-Yehuda Cc: Joerg Roedel Cc: alexisb@us.ibm.com LKML-Reference: <20090414120827G.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 54 ++-------------------------------------- 1 file changed, 2 insertions(+), 52 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 755c21e906f..971a3bec47a 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -186,37 +186,6 @@ static struct cal_chipset_ops calioc2_chip_ops = { static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; -/* enable this to stress test the chip's TCE cache */ -#ifdef CONFIG_IOMMU_DEBUG -static int debugging = 1; - -static inline unsigned long verify_bit_range(unsigned long* bitmap, - int expected, unsigned long start, unsigned long end) -{ - unsigned long idx = start; - - BUG_ON(start >= end); - - while (idx < end) { - if (!!test_bit(idx, bitmap) != expected) - return idx; - ++idx; - } - - /* all bits have the expected value */ - return ~0UL; -} -#else /* debugging is disabled */ -static int debugging; - -static inline unsigned long verify_bit_range(unsigned long* bitmap, - int expected, unsigned long start, unsigned long end) -{ - return ~0UL; -} - -#endif /* CONFIG_IOMMU_DEBUG */ - static inline int translation_enabled(struct iommu_table *tbl) { /* only PHBs with translation enabled have an IOMMU table */ @@ -228,7 +197,6 @@ static void iommu_range_reserve(struct iommu_table *tbl, { unsigned long index; unsigned long end; - unsigned long badbit; unsigned long flags; index = start_addr >> PAGE_SHIFT; @@ -243,14 +211,6 @@ static void iommu_range_reserve(struct iommu_table *tbl, spin_lock_irqsave(&tbl->it_lock, flags); - badbit = verify_bit_range(tbl->it_map, 0, index, end); - if (badbit != ~0UL) { - if (printk_ratelimit()) - printk(KERN_ERR "Calgary: entry already allocated at " - "0x%lx tbl %p dma 0x%lx npages %u\n", - badbit, tbl, start_addr, npages); - } - iommu_area_reserve(tbl->it_map, index, npages); spin_unlock_irqrestore(&tbl->it_lock, flags); @@ -326,7 +286,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned int npages) { unsigned long entry; - unsigned long badbit; unsigned long badend; unsigned long flags; @@ -346,14 +305,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, spin_lock_irqsave(&tbl->it_lock, flags); - badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages); - if (badbit != ~0UL) { - if (printk_ratelimit()) - printk(KERN_ERR "Calgary: bit is off at 0x%lx " - "tbl %p dma 0x%Lx entry 0x%lx npages %u\n", - badbit, tbl, dma_addr, entry, npages); - } - iommu_area_free(tbl->it_map, entry, npages); spin_unlock_irqrestore(&tbl->it_lock, flags); @@ -1488,9 +1439,8 @@ void __init detect_calgary(void) iommu_detected = 1; calgary_detected = 1; printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n"); - printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " - "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, - debugging ? "enabled" : "disabled"); + printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", + specified_table_size); /* swiotlb for devices that aren't behind the Calgary. */ if (max_pfn > MAX_DMA32_PFN) -- cgit v1.2.3 From 19c1a6f5764d787113fa323ffb18be7991208f82 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 14 Apr 2009 09:43:19 +0900 Subject: x86 gart: reimplement IOMMU_LEAK feature by using DMA_API_DEBUG IOMMU_LEAK, GART's own feature, dumps the used IOMMU entries when IOMMU entries is full, which might be useful to find a bad driver that eats IOMMU entries. DMA_API_DEBUG provides the similar feature, debug_dma_dump_mappings, and it's better than GART's IOMMU_LEAK feature. GART's IOMMU_LEAK feature doesn't say who uses IOMMU entries so it's hard to find a bad driver. This patch reimplements the GART's IOMMU_LEAK feature by using DMA_API_DEBUG. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Cc: Andrew Morton LKML-Reference: <1239669799-23579-2-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 3 +-- arch/x86/kernel/pci-gart_64.c | 45 ++++++++----------------------------------- 2 files changed, 9 insertions(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d8359e73317..5865712d105 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -161,8 +161,7 @@ config IOMMU_DEBUG config IOMMU_LEAK bool "IOMMU leak tracing" - depends on DEBUG_KERNEL - depends on IOMMU_DEBUG + depends on IOMMU_DEBUG && DMA_API_DEBUG ---help--- Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index b284b58c035..1e8920d98f7 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -144,48 +144,21 @@ static void flush_gart(void) } #ifdef CONFIG_IOMMU_LEAK - -#define SET_LEAK(x) \ - do { \ - if (iommu_leak_tab) \ - iommu_leak_tab[x] = __builtin_return_address(0);\ - } while (0) - -#define CLEAR_LEAK(x) \ - do { \ - if (iommu_leak_tab) \ - iommu_leak_tab[x] = NULL; \ - } while (0) - /* Debugging aid for drivers that don't free their IOMMU tables */ -static void **iommu_leak_tab; static int leak_trace; static int iommu_leak_pages = 20; static void dump_leak(void) { - int i; static int dump; - if (dump || !iommu_leak_tab) + if (dump) return; dump = 1; - show_stack(NULL, NULL); - /* Very crude. dump some from the end of the table too */ - printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n", - iommu_leak_pages); - for (i = 0; i < iommu_leak_pages; i += 2) { - printk(KERN_DEBUG "%lu: ", iommu_pages-i); - printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], - 0); - printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); - } - printk(KERN_DEBUG "\n"); + show_stack(NULL, NULL); + debug_dma_dump_mappings(NULL); } -#else -# define SET_LEAK(x) -# define CLEAR_LEAK(x) #endif static void iommu_full(struct device *dev, size_t size, int dir) @@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); - SET_LEAK(iommu_page + i); phys_mem += PAGE_SIZE; } return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); @@ -294,7 +266,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr, npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; - CLEAR_LEAK(iommu_page + i); } free_iommu(iommu_page, npages); } @@ -377,7 +348,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE); while (pages--) { iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); - SET_LEAK(iommu_page); addr += PAGE_SIZE; iommu_page++; } @@ -801,11 +771,12 @@ void __init gart_iommu_init(void) #ifdef CONFIG_IOMMU_LEAK if (leak_trace) { - iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, - get_order(iommu_pages*sizeof(void *))); - if (!iommu_leak_tab) + int ret; + + ret = dma_debug_resize_entries(iommu_pages); + if (ret) printk(KERN_DEBUG - "PCI-DMA: Cannot allocate leak trace area\n"); + "PCI-DMA: Cannot trace all the entries\n"); } #endif -- cgit v1.2.3 From fefda117ddb324b872312f1f061230e627c9f5ee Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 20 May 2009 12:21:42 +0200 Subject: amd-iommu: add amd_iommu_dump parameter This kernel parameter will be useful to get some AMD IOMMU related information in dmesg that is not necessary for the default user but may be helpful in debug situations. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 6 ++++++ arch/x86/kernel/amd_iommu_init.c | 10 ++++++++++ 2 files changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 95c8cd9d22b..89dfb3793ed 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -194,6 +194,12 @@ #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops domain for an IOMMU */ +extern bool amd_iommu_dump; +#define DUMP_printk(format, arg...) \ + do { \ + if (amd_iommu_dump) \ + printk(KERN_INFO "AMD IOMMU: " format, ## arg); \ + } while(0); /* * This structure contains generic data for IOMMU protection domains diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 8c0be0902da..57fb7a7cb6e 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -115,6 +115,8 @@ struct ivmd_header { u64 range_length; } __attribute__((packed)); +bool amd_iommu_dump; + static int __initdata amd_iommu_detected; u16 amd_iommu_last_bdf; /* largest PCI device id we have @@ -1211,6 +1213,13 @@ void __init amd_iommu_detect(void) * ****************************************************************************/ +static int __init parse_amd_iommu_dump(char *str) +{ + amd_iommu_dump = true; + + return 1; +} + static int __init parse_amd_iommu_options(char *str) { for (; *str; ++str) { @@ -1235,5 +1244,6 @@ static int __init parse_amd_iommu_size_options(char *str) return 1; } +__setup("amd_iommu_dump", parse_amd_iommu_dump); __setup("amd_iommu=", parse_amd_iommu_options); __setup("amd_iommu_size=", parse_amd_iommu_size_options); -- cgit v1.2.3 From 9c72041f719e2864d4208a89341c36b316dbf893 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 20 May 2009 13:53:57 +0200 Subject: amd-iommu: add dump for iommus described in ivrs table Add information about IOMMU devices described in the IVRS ACPI table to the kernel log if amd_iommu_dump was specified on the kernel command line. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 57fb7a7cb6e..28165902ae2 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -748,6 +748,15 @@ static int __init init_iommu_all(struct acpi_table_header *table) h = (struct ivhd_header *)p; switch (*p) { case ACPI_IVHD_TYPE: + + DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x " + "seg: %d flags: %01x info %04x\n", + PCI_BUS(h->devid), PCI_SLOT(h->devid), + PCI_FUNC(h->devid), h->cap_ptr, + h->pci_seg, h->flags, h->info); + DUMP_printk(" mmio-addr: %016llx\n", + h->mmio_phys); + iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); if (iommu == NULL) return -ENOMEM; -- cgit v1.2.3 From 42a698f40a0946f5517308411b9e003ae031414d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 20 May 2009 15:41:28 +0200 Subject: amd-iommu: print ivhd information to dmesg when requested Add information about devices belonging to an IOMMU as described in the IVRS ACPI table to the kernel log if amd_iommu_dump was specified on the kernel command line. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 73 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 28165902ae2..fe3e6453cbf 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -598,32 +598,83 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, p += sizeof(struct ivhd_header); end += h->length; + while (p < end) { e = (struct ivhd_entry *)p; switch (e->type) { case IVHD_DEV_ALL: + + DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x" + " last device %02x:%02x.%x flags: %02x\n", + PCI_BUS(iommu->first_device), + PCI_SLOT(iommu->first_device), + PCI_FUNC(iommu->first_device), + PCI_BUS(iommu->last_device), + PCI_SLOT(iommu->last_device), + PCI_FUNC(iommu->last_device), + e->flags); + for (dev_i = iommu->first_device; dev_i <= iommu->last_device; ++dev_i) set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); break; case IVHD_DEV_SELECT: + + DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x " + "flags: %02x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags); + devid = e->devid; set_dev_entry_from_acpi(iommu, devid, e->flags, 0); break; case IVHD_DEV_SELECT_RANGE_START: + + DUMP_printk(" DEV_SELECT_RANGE_START\t " + "devid: %02x:%02x.%x flags: %02x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags); + devid_start = e->devid; flags = e->flags; ext_flags = 0; alias = false; break; case IVHD_DEV_ALIAS: + + DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x " + "flags: %02x devid_to: %02x:%02x.%x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, + PCI_BUS(e->ext >> 8), + PCI_SLOT(e->ext >> 8), + PCI_FUNC(e->ext >> 8)); + devid = e->devid; devid_to = e->ext >> 8; set_dev_entry_from_acpi(iommu, devid, e->flags, 0); amd_iommu_alias_table[devid] = devid_to; break; case IVHD_DEV_ALIAS_RANGE: + + DUMP_printk(" DEV_ALIAS_RANGE\t\t " + "devid: %02x:%02x.%x flags: %02x " + "devid_to: %02x:%02x.%x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, + PCI_BUS(e->ext >> 8), + PCI_SLOT(e->ext >> 8), + PCI_FUNC(e->ext >> 8)); + devid_start = e->devid; flags = e->flags; devid_to = e->ext >> 8; @@ -631,17 +682,39 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, alias = true; break; case IVHD_DEV_EXT_SELECT: + + DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x " + "flags: %02x ext: %08x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, e->ext); + devid = e->devid; set_dev_entry_from_acpi(iommu, devid, e->flags, e->ext); break; case IVHD_DEV_EXT_SELECT_RANGE: + + DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " + "%02x:%02x.%x flags: %02x ext: %08x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, e->ext); + devid_start = e->devid; flags = e->flags; ext_flags = e->ext; alias = false; break; case IVHD_DEV_RANGE_END: + + DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid)); + devid = e->devid; for (dev_i = devid_start; dev_i <= devid; ++dev_i) { if (alias) -- cgit v1.2.3 From 02acc43a294098c2a4cd22cf24e9c988644f9f7f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 20 May 2009 16:24:21 +0200 Subject: amd-iommu: print ivmd information to dmesg when requested Add information about device memory mapping requirements for the IOMMU as described in the IVRS ACPI table to the kernel log if amd_iommu_dump was specified on the kernel command line. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index fe3e6453cbf..b90a78cfdcb 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -983,6 +983,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) static int __init init_unity_map_range(struct ivmd_header *m) { struct unity_map_entry *e = 0; + char *s; e = kzalloc(sizeof(*e), GFP_KERNEL); if (e == NULL) @@ -991,13 +992,16 @@ static int __init init_unity_map_range(struct ivmd_header *m) switch (m->type) { default: case ACPI_IVMD_TYPE: + s = "IVMD_TYPEi\t\t\t"; e->devid_start = e->devid_end = m->devid; break; case ACPI_IVMD_TYPE_ALL: + s = "IVMD_TYPE_ALL\t\t"; e->devid_start = 0; e->devid_end = amd_iommu_last_bdf; break; case ACPI_IVMD_TYPE_RANGE: + s = "IVMD_TYPE_RANGE\t\t"; e->devid_start = m->devid; e->devid_end = m->aux; break; @@ -1006,6 +1010,13 @@ static int __init init_unity_map_range(struct ivmd_header *m) e->address_end = e->address_start + PAGE_ALIGN(m->range_length); e->prot = m->flags >> 1; + DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x" + " range_start: %016llx range_end: %016llx flags: %x\n", s, + PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start), + PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end), + PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), + e->address_start, e->address_end, m->flags); + list_add_tail(&e->list, &amd_iommu_unity_map); return 0; -- cgit v1.2.3 From b3b99ef8b4f80f3f093a72110e7697c2281ae45d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 May 2009 12:02:48 +0200 Subject: amd-iommu: move protection domain printk to dump code This information is only helpful for debugging. Don't print it anymore unless explicitly requested. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a97db99dad5..33565990164 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1009,8 +1009,9 @@ static int device_change_notifier(struct notifier_block *nb, if (!dma_domain) dma_domain = iommu->default_dom; attach_device(iommu, &dma_domain->domain, devid); - printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " - "device %s\n", dma_domain->domain.id, dev_name(dev)); + DUMP_printk(KERN_INFO "AMD IOMMU: Using protection domain " + "%d for device %s\n", + dma_domain->domain.id, dev_name(dev)); break; case BUS_NOTIFY_UNBIND_DRIVER: if (!domain) @@ -1133,8 +1134,9 @@ static int get_device_resources(struct device *dev, dma_dom = (*iommu)->default_dom; *domain = &dma_dom->domain; attach_device(*iommu, *domain, *bdf); - printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " - "device %s\n", (*domain)->id, dev_name(dev)); + DUMP_printk(KERN_INFO "AMD IOMMU: Using protection domain " + "%d for device %s\n", + (*domain)->id, dev_name(dev)); } if (domain_for_device(_bdf) == NULL) -- cgit v1.2.3 From 2be69c79e9a46a554fc3ff57d886e65e7a73eb72 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 May 2009 12:15:49 +0200 Subject: x86/iommu: add IOMMU_STRESS Kconfig entry This Kconfig option is intended to enable various code paths or parameters in IOMMU implementations to stress test the code and/or the hardware. This can also be done by disabling optimizations in the code when this option is switched on. Signed-off-by: Joerg Roedel Cc: David Woodhouse Cc: FUJITA Tomonori --- arch/x86/Kconfig.debug | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 5865712d105..33fac6bbe1c 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -159,6 +159,14 @@ config IOMMU_DEBUG options. See Documentation/x86_64/boot-options.txt for more details. +config IOMMU_STRESS + bool "Enable IOMMU stress-test mode" + ---help--- + This option disables various optimizations in IOMMU related + code to do real stress testing of the IOMMU code. This option + will cause a performance drop and should only be enabled for + testing. + config IOMMU_LEAK bool "IOMMU leak tracing" depends on IOMMU_DEBUG && DMA_API_DEBUG -- cgit v1.2.3 From 2e8b569614b89c9b1b85cba37db36daeeeff744e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 May 2009 12:44:03 +0200 Subject: amd-iommu: disable device isolation with CONFIG_IOMMU_STRESS With device isolation disabled we can test better for race conditions in dma_ops related code. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index b90a78cfdcb..66941129e9c 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -124,8 +124,14 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ + +#ifdef CONFIG_IOMMU_STRESS +bool amd_iommu_isolate = false; +#else bool amd_iommu_isolate = true; /* if true, device isolation is enabled */ +#endif + bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the -- cgit v1.2.3 From 421f909c803d1c397f6c66b75653f238696c39ee Mon Sep 17 00:00:00 2001 From: Neil Turton Date: Thu, 14 May 2009 14:00:35 +0100 Subject: amd-iommu: fix an off-by-one error in the AMD IOMMU driver. The variable amd_iommu_last_bdf holds the maximum bdf of any device controlled by an IOMMU, so the number of device entries needed is amd_iommu_last_bdf+1. The function tbl_size used amd_iommu_last_bdf instead. This would be a problem if the last device were a large enough power of 2. [ Impact: fix amd_iommu_last_bdf off-by-one error ] Signed-off-by: Neil Turton Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 8c0be0902da..35fc9654c7a 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -175,7 +175,7 @@ static inline void update_last_devid(u16 devid) static inline unsigned long tbl_size(int entry_size) { unsigned shift = PAGE_SHIFT + - get_order(amd_iommu_last_bdf * entry_size); + get_order(((int)amd_iommu_last_bdf + 1) * entry_size); return 1UL << shift; } -- cgit v1.2.3 From 7455aab1f95f6464c5af3fbdee28744e73f38564 Mon Sep 17 00:00:00 2001 From: Neil Turton Date: Thu, 14 May 2009 14:08:11 +0100 Subject: amd-iommu: fix the handling of device aliases in the AMD IOMMU driver. The devid parameter to set_dev_entry_from_acpi is the requester ID rather than the device ID since it is used to index the IOMMU device table. The handling of IVHD_DEV_ALIAS used to pass the device ID. This patch fixes it to pass the requester ID. [ Impact: fix setting the wrong req-id in acpi-table parsing ] Signed-off-by: Neil Turton Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 35fc9654c7a..53f93db54c4 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -618,7 +618,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_ALIAS: devid = e->devid; devid_to = e->ext >> 8; - set_dev_entry_from_acpi(iommu, devid, e->flags, 0); + set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); amd_iommu_alias_table[devid] = devid_to; break; case IVHD_DEV_ALIAS_RANGE: -- cgit v1.2.3 From 0bc252f430d6a3ac7836d40f00d0ae020593b11b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 May 2009 12:48:05 +0200 Subject: amd-iommu: make sure only ivmd entries are parsed The bug never triggered. But it should be fixed to protect against broken ACPI tables in the future. [ Impact: protect against broken ivrs acpi table ] Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 53f93db54c4..a3a2b98bb39 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -906,6 +906,8 @@ static int __init init_unity_map_range(struct ivmd_header *m) switch (m->type) { default: + kfree(e); + return 0; case ACPI_IVMD_TYPE: e->devid_start = e->devid_end = m->devid; break; -- cgit v1.2.3 From c1eee67b2d8464781f5868a34168df61e40e85a6 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Thu, 21 May 2009 00:56:58 -0700 Subject: amd iommu: properly detach from protection domain on ->remove Some drivers may use the dma api during ->remove which will cause a protection domain to get reattached to a device. Delay the detach until after the driver is completely unbound. [ joro: added a little merge helper ] [ Impact: fix too early device<->domain removal ] Signed-off-by: Chris Wright Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a97db99dad5..d6898833c36 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -57,6 +57,10 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, static struct dma_ops_domain *find_protection_domain(u16 devid); +#ifndef BUS_NOTIFY_UNBOUND_DRIVER +#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005 +#endif + #ifdef CONFIG_AMD_IOMMU_STATS /* @@ -1012,7 +1016,7 @@ static int device_change_notifier(struct notifier_block *nb, printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " "device %s\n", dma_domain->domain.id, dev_name(dev)); break; - case BUS_NOTIFY_UNBIND_DRIVER: + case BUS_NOTIFY_UNBOUND_DRIVER: if (!domain) goto out; detach_device(domain, devid); -- cgit v1.2.3 From 3bd221724adb9d642270df0e78b0105fb61e4a1c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 May 2009 15:06:20 +0200 Subject: amd-iommu: introduce for_each_iommu* macros This patch introduces the for_each_iommu and for_each_iommu_safe macros to simplify the developers life when having to iterate over all AMD IOMMUs in the system. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 8 ++++++++ arch/x86/kernel/amd_iommu.c | 8 ++++---- arch/x86/kernel/amd_iommu_init.c | 8 ++++---- 3 files changed, 16 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 95c8cd9d22b..cf5ef172cfc 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -195,6 +195,14 @@ #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops domain for an IOMMU */ +/* + * Make iterating over all IOMMUs easier + */ +#define for_each_iommu(iommu) \ + list_for_each_entry((iommu), &amd_iommu_list, list) +#define for_each_iommu_safe(iommu, next) \ + list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list) + /* * This structure contains generic data for IOMMU protection domains * independent of their use. diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a97db99dad5..d9e9dc141a1 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -213,7 +213,7 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data) { struct amd_iommu *iommu; - list_for_each_entry(iommu, &amd_iommu_list, list) + for_each_iommu(iommu) iommu_poll_events(iommu); return IRQ_HANDLED; @@ -440,7 +440,7 @@ static void iommu_flush_domain(u16 domid) __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, domid, 1, 1); - list_for_each_entry(iommu, &amd_iommu_list, list) { + for_each_iommu(iommu) { spin_lock_irqsave(&iommu->lock, flags); __iommu_queue_command(iommu, &cmd); __iommu_completion_wait(iommu); @@ -1672,7 +1672,7 @@ int __init amd_iommu_init_dma_ops(void) * found in the system. Devices not assigned to any other * protection domain will be assigned to the default one. */ - list_for_each_entry(iommu, &amd_iommu_list, list) { + for_each_iommu(iommu) { iommu->default_dom = dma_ops_domain_alloc(iommu, order); if (iommu->default_dom == NULL) return -ENOMEM; @@ -1710,7 +1710,7 @@ int __init amd_iommu_init_dma_ops(void) free_domains: - list_for_each_entry(iommu, &amd_iommu_list, list) { + for_each_iommu(iommu) { if (iommu->default_dom) dma_ops_domain_free(iommu->default_dom); } diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 8c0be0902da..675a4b642f7 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -679,7 +679,7 @@ static void __init free_iommu_all(void) { struct amd_iommu *iommu, *next; - list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) { + for_each_iommu_safe(iommu, next) { list_del(&iommu->list); free_iommu_one(iommu); kfree(iommu); @@ -779,7 +779,7 @@ static int __init iommu_setup_msix(struct amd_iommu *iommu) struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */ int nvec = 0, i; - list_for_each_entry(curr, &amd_iommu_list, list) { + for_each_iommu(curr) { if (curr->dev == iommu->dev) { entries[nvec].entry = curr->evt_msi_num; entries[nvec].vector = 0; @@ -818,7 +818,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu) int r; struct amd_iommu *curr; - list_for_each_entry(curr, &amd_iommu_list, list) { + for_each_iommu(curr) { if (curr->dev == iommu->dev) curr->int_enabled = true; } @@ -971,7 +971,7 @@ static void __init enable_iommus(void) { struct amd_iommu *iommu; - list_for_each_entry(iommu, &amd_iommu_list, list) { + for_each_iommu(iommu) { iommu_set_exclusion_range(iommu); iommu_init_msi(iommu); iommu_enable_event_logging(iommu); -- cgit v1.2.3 From 58492e128892e3b55f1a6ef0cf3c3ab4ce7cc214 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 May 2009 18:41:16 +0200 Subject: amd-iommu: consolidate hardware initialization to one function This patch restructures the AMD IOMMU initialization code to initialize all hardware registers with one single function call. This is helpful for suspend/resume support. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 50 +++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 675a4b642f7..74f4f1fea93 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -252,13 +252,6 @@ static void __init iommu_enable(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_IOMMU_EN); } -/* Function to enable IOMMU event logging and event interrupts */ -static void __init iommu_enable_event_logging(struct amd_iommu *iommu) -{ - iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); - iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); -} - /* * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in * the system has one. @@ -413,25 +406,36 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) { u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(CMD_BUFFER_SIZE)); - u64 entry; if (cmd_buf == NULL) return NULL; iommu->cmd_buf_size = CMD_BUFFER_SIZE; - entry = (u64)virt_to_phys(cmd_buf); + return cmd_buf; +} + +/* + * This function writes the command buffer address to the hardware and + * enables it. + */ +static void iommu_enable_command_buffer(struct amd_iommu *iommu) +{ + u64 entry; + + BUG_ON(iommu->cmd_buf == NULL); + + entry = (u64)virt_to_phys(iommu->cmd_buf); entry |= MMIO_CMD_SIZE_512; + memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, - &entry, sizeof(entry)); + &entry, sizeof(entry)); /* set head and tail to zero manually */ writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); - - return cmd_buf; } static void __init free_command_buffer(struct amd_iommu *iommu) @@ -443,20 +447,27 @@ static void __init free_command_buffer(struct amd_iommu *iommu) /* allocates the memory where the IOMMU will log its events to */ static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) { - u64 entry; iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(EVT_BUFFER_SIZE)); if (iommu->evt_buf == NULL) return NULL; + return iommu->evt_buf; +} + +static void iommu_enable_event_buffer(struct amd_iommu *iommu) +{ + u64 entry; + + BUG_ON(iommu->evt_buf == NULL); + entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; + memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, &entry, sizeof(entry)); - iommu->evt_buf_size = EVT_BUFFER_SIZE; - - return iommu->evt_buf; + iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); } static void __init free_event_buffer(struct amd_iommu *iommu) @@ -710,7 +721,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->mmio_base) return -ENOMEM; - iommu_set_device_table(iommu); iommu->cmd_buf = alloc_command_buffer(iommu); if (!iommu->cmd_buf) return -ENOMEM; @@ -837,6 +847,8 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu) return 1; } + iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); + return 0; } @@ -972,9 +984,11 @@ static void __init enable_iommus(void) struct amd_iommu *iommu; for_each_iommu(iommu) { + iommu_set_device_table(iommu); + iommu_enable_command_buffer(iommu); + iommu_enable_event_buffer(iommu); iommu_set_exclusion_range(iommu); iommu_init_msi(iommu); - iommu_enable_event_logging(iommu); iommu_enable(iommu); } } -- cgit v1.2.3 From fab6afa30954a0684ef8ac1d9a606e74a6215ab6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 May 2009 18:46:34 +0200 Subject: amd-iommu: drop pointless iommu-loop in msi setup code It is not necessary to loop again over all IOMMUs in this code. So drop the loop. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 74f4f1fea93..cc99f609223 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -826,13 +826,6 @@ out_free: static int __init iommu_setup_msi(struct amd_iommu *iommu) { int r; - struct amd_iommu *curr; - - for_each_iommu(curr) { - if (curr->dev == iommu->dev) - curr->int_enabled = true; - } - if (pci_enable_msi(iommu->dev)) return 1; @@ -847,6 +840,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu) return 1; } + iommu->int_enabled = true; iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); return 0; -- cgit v1.2.3 From d91cecdd796c27df46339e80ed436a980c56fcad Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 May 2009 18:51:00 +0200 Subject: amd-iommu: remove support for msi-x Current hardware uses msi instead of msi-x so this code it not necessary and can not be tested. The best thing is to drop this code. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 44 +--------------------------------------- 1 file changed, 1 insertion(+), 43 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index cc99f609223..feee475e626 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -783,46 +783,6 @@ static int __init init_iommu_all(struct acpi_table_header *table) * ****************************************************************************/ -static int __init iommu_setup_msix(struct amd_iommu *iommu) -{ - struct amd_iommu *curr; - struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */ - int nvec = 0, i; - - for_each_iommu(curr) { - if (curr->dev == iommu->dev) { - entries[nvec].entry = curr->evt_msi_num; - entries[nvec].vector = 0; - curr->int_enabled = true; - nvec++; - } - } - - if (pci_enable_msix(iommu->dev, entries, nvec)) { - pci_disable_msix(iommu->dev); - return 1; - } - - for (i = 0; i < nvec; ++i) { - int r = request_irq(entries->vector, amd_iommu_int_handler, - IRQF_SAMPLE_RANDOM, - "AMD IOMMU", - NULL); - if (r) - goto out_free; - } - - return 0; - -out_free: - for (i -= 1; i >= 0; --i) - free_irq(entries->vector, NULL); - - pci_disable_msix(iommu->dev); - - return 1; -} - static int __init iommu_setup_msi(struct amd_iommu *iommu) { int r; @@ -851,9 +811,7 @@ static int __init iommu_init_msi(struct amd_iommu *iommu) if (iommu->int_enabled) return 0; - if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX)) - return iommu_setup_msix(iommu); - else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) + if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) return iommu_setup_msi(iommu); return 1; -- cgit v1.2.3 From 92ac4320af6ed4294c2c221dd4ccbfd9026a3aa7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 May 2009 19:06:27 +0200 Subject: amd-iommu: add function to disable all iommus This function is required for suspend/resume support with AMD IOMMU enabled. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index feee475e626..ed10c0f5ff7 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -252,6 +252,11 @@ static void __init iommu_enable(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_IOMMU_EN); } +static void iommu_disable(struct amd_iommu *iommu) +{ + iommu_feature_disable(iommu, CONTROL_IOMMU_EN); +} + /* * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in * the system has one. @@ -945,6 +950,14 @@ static void __init enable_iommus(void) } } +static void disable_iommus(void) +{ + struct amd_iommu *iommu; + + for_each_iommu(iommu) + iommu_disable(iommu); +} + /* * Suspend/Resume support * disable suspend until real resume implemented -- cgit v1.2.3 From bfd1be1857e5a3385bf146e02e6dc3dd4241bec1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 May 2009 15:33:57 +0200 Subject: amd-iommu: add function to flush tlb for all domains This function is required for suspend/resume support with AMD IOMMU enabled. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu.h | 1 + arch/x86/kernel/amd_iommu.c | 11 +++++++++++ 2 files changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index f712344329b..1750e1f85d3 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -27,6 +27,7 @@ extern int amd_iommu_init(void); extern int amd_iommu_init_dma_ops(void); extern void amd_iommu_detect(void); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); +extern void amd_iommu_flush_all_domains(void); #else static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index d9e9dc141a1..826ad079efc 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -449,6 +449,17 @@ static void iommu_flush_domain(u16 domid) } } +void amd_iommu_flush_all_domains(void) +{ + int i; + + for (i = 1; i < MAX_DOMAIN_ID; ++i) { + if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) + continue; + iommu_flush_domain(i); + } +} + /**************************************************************************** * * The functions below are used the create the page table mappings for -- cgit v1.2.3 From 7d7a110c6127b7fc683dc6d764555f2dbd22b054 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 May 2009 15:48:10 +0200 Subject: amd-iommu: add function to flush tlb for all devices This function is required for suspend/resume support with AMD IOMMU enabled. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu.h | 1 + arch/x86/kernel/amd_iommu.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 1750e1f85d3..262e0282004 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -28,6 +28,7 @@ extern int amd_iommu_init_dma_ops(void); extern void amd_iommu_detect(void); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_flush_all_domains(void); +extern void amd_iommu_flush_all_devices(void); #else static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 826ad079efc..92b0e1881e0 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -460,6 +460,24 @@ void amd_iommu_flush_all_domains(void) } } +void amd_iommu_flush_all_devices(void) +{ + struct amd_iommu *iommu; + int i; + + for (i = 0; i <= amd_iommu_last_bdf; ++i) { + if (amd_iommu_pd_table[i] == NULL) + continue; + + iommu = amd_iommu_rlookup_table[i]; + if (!iommu) + continue; + + iommu_queue_inv_dev_entry(iommu, i); + iommu_completion_wait(iommu); + } +} + /**************************************************************************** * * The functions below are used the create the page table mappings for -- cgit v1.2.3 From 05f92db9f47f852ff48bbed1b063b8ab8ad00285 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 12 May 2009 09:52:46 +0200 Subject: amd_iommu: un __init functions required for suspend/resume This patch makes sure that no function required for suspend/resume of AMD IOMMU driver is thrown away after boot. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index ed10c0f5ff7..330896ba6a9 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -193,7 +193,7 @@ static inline unsigned long tbl_size(int entry_size) * This function set the exclusion range in the IOMMU. DMA accesses to the * exclusion range are passed through untranslated */ -static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) +static void iommu_set_exclusion_range(struct amd_iommu *iommu) { u64 start = iommu->exclusion_start & PAGE_MASK; u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; @@ -225,7 +225,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu) } /* Generic functions to enable/disable certain features of the IOMMU. */ -static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) +static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) { u32 ctrl; @@ -244,7 +244,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) } /* Function to enable the hardware */ -static void __init iommu_enable(struct amd_iommu *iommu) +static void iommu_enable(struct amd_iommu *iommu) { printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", dev_name(&iommu->dev->dev), iommu->cap_ptr); @@ -811,7 +811,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu) return 0; } -static int __init iommu_init_msi(struct amd_iommu *iommu) +static int iommu_init_msi(struct amd_iommu *iommu) { if (iommu->int_enabled) return 0; @@ -936,7 +936,7 @@ static void init_device_table(void) * This function finally enables all IOMMUs found in the system after * they have been initialized */ -static void __init enable_iommus(void) +static void enable_iommus(void) { struct amd_iommu *iommu; -- cgit v1.2.3 From 736501ee000757082a4f0832826ae1eda7ea106e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 12 May 2009 09:56:12 +0200 Subject: amd-iommu: implement suspend/resume This patch puts everything together and enables suspend/resume support in the AMD IOMMU driver. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 330896ba6a9..4ca8fbfb68d 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -965,12 +965,31 @@ static void disable_iommus(void) static int amd_iommu_resume(struct sys_device *dev) { + /* + * Disable IOMMUs before reprogramming the hardware registers. + * IOMMU is still enabled from the resume kernel. + */ + disable_iommus(); + + /* re-load the hardware */ + enable_iommus(); + + /* + * we have to flush after the IOMMUs are enabled because a + * disabled IOMMU will never execute the commands we send + */ + amd_iommu_flush_all_domains(); + amd_iommu_flush_all_devices(); + return 0; } static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state) { - return -EINVAL; + /* disable IOMMUs to go out of the way for BIOS */ + disable_iommus(); + + return 0; } static struct sysdev_class amd_iommu_sysdev_class = { -- cgit v1.2.3 From c3239567a20e90e3026ac5453d5267506ef7b030 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 12 May 2009 10:56:44 +0200 Subject: amd-iommu: introduce aperture_range structure This is a preperation for extended address allocator. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 32 +++++++++++++++-------- arch/x86/kernel/amd_iommu.c | 46 ++++++++++++++++------------------ 2 files changed, 43 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 95c8cd9d22b..4c64c9bc683 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -195,6 +195,8 @@ #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops domain for an IOMMU */ +#define APERTURE_RANGE_SIZE (128 * 1024 * 1024) + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -209,6 +211,24 @@ struct protection_domain { void *priv; /* private data */ }; +/* + * For dynamic growth the aperture size is split into ranges of 128MB of + * DMA address space each. This struct represents one such range. + */ +struct aperture_range { + + /* address allocation bitmap */ + unsigned long *bitmap; + + /* + * Array of PTE pages for the aperture. In this array we save all the + * leaf pages of the domain page table used for the aperture. This way + * we don't need to walk the page table to find a specific PTE. We can + * just calculate its address in constant time. + */ + u64 *pte_pages[64]; +}; + /* * Data container for a dma_ops specific protection domain */ @@ -224,16 +244,8 @@ struct dma_ops_domain { /* address we start to search for free addresses */ unsigned long next_bit; - /* address allocation bitmap */ - unsigned long *bitmap; - - /* - * Array of PTE pages for the aperture. In this array we save all the - * leaf pages of the domain page table used for the aperture. This way - * we don't need to walk the page table to find a specific PTE. We can - * just calculate its address in constant time. - */ - u64 **pte_pages; + /* address space relevant data */ + struct aperture_range aperture; /* This will be set to true when TLB needs to be flushed */ bool need_flush; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a97db99dad5..62acd09cd19 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -595,7 +595,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, * as allocated in the aperture */ if (addr < dma_dom->aperture_size) - __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap); + __set_bit(addr >> PAGE_SHIFT, + dma_dom->aperture.bitmap); } return 0; @@ -656,11 +657,12 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, dom->need_flush = true; } - address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, - 0 , boundary_size, align_mask); + address = iommu_area_alloc(dom->aperture.bitmap, limit, dom->next_bit, + pages, 0 , boundary_size, align_mask); if (address == -1) { - address = iommu_area_alloc(dom->bitmap, limit, 0, pages, - 0, boundary_size, align_mask); + address = iommu_area_alloc(dom->aperture.bitmap, limit, 0, + pages, 0, boundary_size, + align_mask); dom->need_flush = true; } @@ -685,7 +687,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, unsigned int pages) { address >>= PAGE_SHIFT; - iommu_area_free(dom->bitmap, address, pages); + iommu_area_free(dom->aperture.bitmap, address, pages); if (address >= dom->next_bit) dom->need_flush = true; @@ -741,7 +743,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, if (start_page + pages > last_page) pages = last_page - start_page; - iommu_area_reserve(dom->bitmap, start_page, pages); + iommu_area_reserve(dom->aperture.bitmap, start_page, pages); } static void free_pagetable(struct protection_domain *domain) @@ -785,9 +787,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) free_pagetable(&dom->domain); - kfree(dom->pte_pages); - - kfree(dom->bitmap); + free_page((unsigned long)dom->aperture.bitmap); kfree(dom); } @@ -826,16 +826,15 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->domain.priv = dma_dom; if (!dma_dom->domain.pt_root) goto free_dma_dom; - dma_dom->aperture_size = (1ULL << order); - dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8), - GFP_KERNEL); - if (!dma_dom->bitmap) + dma_dom->aperture_size = APERTURE_RANGE_SIZE; + dma_dom->aperture.bitmap = (void *)get_zeroed_page(GFP_KERNEL); + if (!dma_dom->aperture.bitmap) goto free_dma_dom; /* * mark the first page as allocated so we never return 0 as * a valid dma-address. So we can use 0 as error value */ - dma_dom->bitmap[0] = 1; + dma_dom->aperture.bitmap[0] = 1; dma_dom->next_bit = 0; dma_dom->need_flush = false; @@ -854,13 +853,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, /* * At the last step, build the page tables so we don't need to * allocate page table pages in the dma_ops mapping/unmapping - * path. + * path for the first 128MB of dma address space. */ num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); - dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), - GFP_KERNEL); - if (!dma_dom->pte_pages) - goto free_dma_dom; l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL); if (l2_pde == NULL) @@ -869,10 +864,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde)); for (i = 0; i < num_pte_pages; ++i) { - dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL); - if (!dma_dom->pte_pages[i]) + u64 **pte_page = &dma_dom->aperture.pte_pages[i]; + *pte_page = (u64 *)get_zeroed_page(GFP_KERNEL); + if (!*pte_page) goto free_dma_dom; - address = virt_to_phys(dma_dom->pte_pages[i]); + address = virt_to_phys(*pte_page); l2_pde[i] = IOMMU_L1_PDE(address); } @@ -1159,7 +1155,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, paddr &= PAGE_MASK; - pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; + pte = dom->aperture.pte_pages[IOMMU_PTE_L1_INDEX(address)]; pte += IOMMU_PTE_L0_INDEX(address); __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; @@ -1192,7 +1188,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size); - pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; + pte = dom->aperture.pte_pages[IOMMU_PTE_L1_INDEX(address)]; pte += IOMMU_PTE_L0_INDEX(address); WARN_ON(!*pte); -- cgit v1.2.3 From 8bda3092bcfa68f786d94549ae026e8db1eff041 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 12 May 2009 12:02:46 +0200 Subject: amd-iommu: move page table allocation code to seperate function This patch makes page table allocation usable for dma_ops code. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 86 ++++++++++++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 62acd09cd19..ded79f7747c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -55,7 +55,9 @@ struct iommu_cmd { static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, struct unity_map_entry *e); static struct dma_ops_domain *find_protection_domain(u16 devid); - +static u64* alloc_pte(struct protection_domain *dom, + unsigned long address, u64 + **pte_page, gfp_t gfp); #ifdef CONFIG_AMD_IOMMU_STATS @@ -468,7 +470,7 @@ static int iommu_map_page(struct protection_domain *dom, unsigned long phys_addr, int prot) { - u64 __pte, *pte, *page; + u64 __pte, *pte; bus_addr = PAGE_ALIGN(bus_addr); phys_addr = PAGE_ALIGN(phys_addr); @@ -477,27 +479,7 @@ static int iommu_map_page(struct protection_domain *dom, if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) return -EINVAL; - pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; - - if (!IOMMU_PTE_PRESENT(*pte)) { - page = (u64 *)get_zeroed_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - *pte = IOMMU_L2_PDE(virt_to_phys(page)); - } - - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; - - if (!IOMMU_PTE_PRESENT(*pte)) { - page = (u64 *)get_zeroed_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - *pte = IOMMU_L1_PDE(virt_to_phys(page)); - } - - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)]; + pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL); if (IOMMU_PTE_PRESENT(*pte)) return -EBUSY; @@ -1139,6 +1121,61 @@ static int get_device_resources(struct device *dev, return 1; } +/* + * If the pte_page is not yet allocated this function is called + */ +static u64* alloc_pte(struct protection_domain *dom, + unsigned long address, u64 **pte_page, gfp_t gfp) +{ + u64 *pte, *page; + + pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)]; + + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(gfp); + if (!page) + return NULL; + *pte = IOMMU_L2_PDE(virt_to_phys(page)); + } + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L1_INDEX(address)]; + + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(gfp); + if (!page) + return NULL; + *pte = IOMMU_L1_PDE(virt_to_phys(page)); + } + + pte = IOMMU_PTE_PAGE(*pte); + + if (pte_page) + *pte_page = pte; + + pte = &pte[IOMMU_PTE_L0_INDEX(address)]; + + return pte; +} + +/* + * This function fetches the PTE for a given address in the aperture + */ +static u64* dma_ops_get_pte(struct dma_ops_domain *dom, + unsigned long address) +{ + struct aperture_range *aperture = &dom->aperture; + u64 *pte, *pte_page; + + pte = aperture->pte_pages[IOMMU_PTE_L1_INDEX(address)]; + if (!pte) { + pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC); + aperture->pte_pages[IOMMU_PTE_L1_INDEX(address)] = pte_page; + } + + return pte; +} + /* * This is the generic map function. It maps one 4kb page at paddr to * the given address in the DMA address space for the domain. @@ -1155,8 +1192,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, paddr &= PAGE_MASK; - pte = dom->aperture.pte_pages[IOMMU_PTE_L1_INDEX(address)]; - pte += IOMMU_PTE_L0_INDEX(address); + pte = dma_ops_get_pte(dom, address); __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; -- cgit v1.2.3 From 53812c115cda1f660b286c939669154a56976f6b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 12 May 2009 12:17:38 +0200 Subject: amd-iommu: handle page table allocation failures in dma_ops code The code will be required when the aperture size increases dynamically in the extended address allocator. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index ded79f7747c..a467addb44b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1193,6 +1193,8 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, paddr &= PAGE_MASK; pte = dma_ops_get_pte(dom, address); + if (!pte) + return bad_dma_address; __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; @@ -1248,7 +1250,7 @@ static dma_addr_t __map_single(struct device *dev, u64 dma_mask) { dma_addr_t offset = paddr & ~PAGE_MASK; - dma_addr_t address, start; + dma_addr_t address, start, ret; unsigned int pages; unsigned long align_mask = 0; int i; @@ -1271,7 +1273,10 @@ static dma_addr_t __map_single(struct device *dev, start = address; for (i = 0; i < pages; ++i) { - dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); + ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); + if (ret == bad_dma_address) + goto out_unmap; + paddr += PAGE_SIZE; start += PAGE_SIZE; } @@ -1287,6 +1292,17 @@ static dma_addr_t __map_single(struct device *dev, out: return address; + +out_unmap: + + for (--i; i >= 0; --i) { + start -= PAGE_SIZE; + dma_ops_domain_unmap(iommu, dma_dom, start); + } + + dma_ops_free_addresses(dma_dom, address, pages); + + return bad_dma_address; } /* -- cgit v1.2.3 From 384de72910a7bf96a02a6d8023fe9e16d872beb2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 15 May 2009 12:30:05 +0200 Subject: amd-iommu: make address allocator aware of multiple aperture ranges This patch changes the AMD IOMMU address allocator to allow up to 32 aperture ranges per dma_ops domain. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 11 ++- arch/x86/kernel/amd_iommu.c | 138 ++++++++++++++++++++++++--------- 2 files changed, 110 insertions(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 4c64c9bc683..eca912931a8 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -195,7 +195,12 @@ #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops domain for an IOMMU */ -#define APERTURE_RANGE_SIZE (128 * 1024 * 1024) +#define APERTURE_RANGE_SHIFT 27 /* 128 MB */ +#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT) +#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT) +#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */ +#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) +#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) /* * This structure contains generic data for IOMMU protection domains @@ -227,6 +232,8 @@ struct aperture_range { * just calculate its address in constant time. */ u64 *pte_pages[64]; + + unsigned long offset; }; /* @@ -245,7 +252,7 @@ struct dma_ops_domain { unsigned long next_bit; /* address space relevant data */ - struct aperture_range aperture; + struct aperture_range *aperture[APERTURE_MAX_RANGES]; /* This will be set to true when TLB needs to be flushed */ bool need_flush; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a467addb44b..794163ae97b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -578,7 +578,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, */ if (addr < dma_dom->aperture_size) __set_bit(addr >> PAGE_SHIFT, - dma_dom->aperture.bitmap); + dma_dom->aperture[0]->bitmap); } return 0; @@ -615,43 +615,74 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, ****************************************************************************/ /* - * The address allocator core function. + * The address allocator core functions. * * called with domain->lock held */ + +static unsigned long dma_ops_area_alloc(struct device *dev, + struct dma_ops_domain *dom, + unsigned int pages, + unsigned long align_mask, + u64 dma_mask, + unsigned long start) +{ + unsigned long next_bit = dom->next_bit % APERTURE_RANGE_PAGES; + int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT; + int i = start >> APERTURE_RANGE_SHIFT; + unsigned long boundary_size; + unsigned long address = -1; + unsigned long limit; + + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + PAGE_SIZE) >> PAGE_SHIFT; + + for (;i < max_index; ++i) { + unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT; + + if (dom->aperture[i]->offset >= dma_mask) + break; + + limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, + dma_mask >> PAGE_SHIFT); + + address = iommu_area_alloc(dom->aperture[i]->bitmap, + limit, next_bit, pages, 0, + boundary_size, align_mask); + if (address != -1) { + address = dom->aperture[i]->offset + + (address << PAGE_SHIFT); + dom->next_bit = (address >> PAGE_SHIFT) + pages; + break; + } + + next_bit = 0; + } + + return address; +} + static unsigned long dma_ops_alloc_addresses(struct device *dev, struct dma_ops_domain *dom, unsigned int pages, unsigned long align_mask, u64 dma_mask) { - unsigned long limit; unsigned long address; - unsigned long boundary_size; + unsigned long start = dom->next_bit << PAGE_SHIFT; - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - PAGE_SIZE) >> PAGE_SHIFT; - limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0, - dma_mask >> PAGE_SHIFT); - if (dom->next_bit >= limit) { - dom->next_bit = 0; - dom->need_flush = true; - } + address = dma_ops_area_alloc(dev, dom, pages, align_mask, + dma_mask, start); - address = iommu_area_alloc(dom->aperture.bitmap, limit, dom->next_bit, - pages, 0 , boundary_size, align_mask); if (address == -1) { - address = iommu_area_alloc(dom->aperture.bitmap, limit, 0, - pages, 0, boundary_size, - align_mask); + dom->next_bit = 0; + address = dma_ops_area_alloc(dev, dom, pages, align_mask, + dma_mask, 0); dom->need_flush = true; } - if (likely(address != -1)) { - dom->next_bit = address + pages; - address <<= PAGE_SHIFT; - } else + if (unlikely(address == -1)) address = bad_dma_address; WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); @@ -668,11 +699,17 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, unsigned long address, unsigned int pages) { - address >>= PAGE_SHIFT; - iommu_area_free(dom->aperture.bitmap, address, pages); + unsigned i = address >> APERTURE_RANGE_SHIFT; + struct aperture_range *range = dom->aperture[i]; + + BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); - if (address >= dom->next_bit) + if ((address >> PAGE_SHIFT) >= dom->next_bit) dom->need_flush = true; + + address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; + iommu_area_free(range->bitmap, address, pages); + } /**************************************************************************** @@ -720,12 +757,16 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, unsigned long start_page, unsigned int pages) { - unsigned int last_page = dom->aperture_size >> PAGE_SHIFT; + unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; if (start_page + pages > last_page) pages = last_page - start_page; - iommu_area_reserve(dom->aperture.bitmap, start_page, pages); + for (i = start_page; i < start_page + pages; ++i) { + int index = i / APERTURE_RANGE_PAGES; + int page = i % APERTURE_RANGE_PAGES; + __set_bit(page, dom->aperture[index]->bitmap); + } } static void free_pagetable(struct protection_domain *domain) @@ -764,12 +805,19 @@ static void free_pagetable(struct protection_domain *domain) */ static void dma_ops_domain_free(struct dma_ops_domain *dom) { + int i; + if (!dom) return; free_pagetable(&dom->domain); - free_page((unsigned long)dom->aperture.bitmap); + for (i = 0; i < APERTURE_MAX_RANGES; ++i) { + if (!dom->aperture[i]) + continue; + free_page((unsigned long)dom->aperture[i]->bitmap); + kfree(dom->aperture[i]); + } kfree(dom); } @@ -797,6 +845,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, if (!dma_dom) return NULL; + dma_dom->aperture[0] = kzalloc(sizeof(struct aperture_range), + GFP_KERNEL); + if (!dma_dom->aperture[0]) + goto free_dma_dom; + spin_lock_init(&dma_dom->domain.lock); dma_dom->domain.id = domain_id_alloc(); @@ -809,14 +862,14 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, if (!dma_dom->domain.pt_root) goto free_dma_dom; dma_dom->aperture_size = APERTURE_RANGE_SIZE; - dma_dom->aperture.bitmap = (void *)get_zeroed_page(GFP_KERNEL); - if (!dma_dom->aperture.bitmap) + dma_dom->aperture[0]->bitmap = (void *)get_zeroed_page(GFP_KERNEL); + if (!dma_dom->aperture[0]->bitmap) goto free_dma_dom; /* * mark the first page as allocated so we never return 0 as * a valid dma-address. So we can use 0 as error value */ - dma_dom->aperture.bitmap[0] = 1; + dma_dom->aperture[0]->bitmap[0] = 1; dma_dom->next_bit = 0; dma_dom->need_flush = false; @@ -846,7 +899,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde)); for (i = 0; i < num_pte_pages; ++i) { - u64 **pte_page = &dma_dom->aperture.pte_pages[i]; + u64 **pte_page = &dma_dom->aperture[0]->pte_pages[i]; *pte_page = (u64 *)get_zeroed_page(GFP_KERNEL); if (!*pte_page) goto free_dma_dom; @@ -1164,14 +1217,19 @@ static u64* alloc_pte(struct protection_domain *dom, static u64* dma_ops_get_pte(struct dma_ops_domain *dom, unsigned long address) { - struct aperture_range *aperture = &dom->aperture; + struct aperture_range *aperture; u64 *pte, *pte_page; - pte = aperture->pte_pages[IOMMU_PTE_L1_INDEX(address)]; + aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; + if (!aperture) + return NULL; + + pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; if (!pte) { pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC); - aperture->pte_pages[IOMMU_PTE_L1_INDEX(address)] = pte_page; - } + aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; + } else + pte += IOMMU_PTE_L0_INDEX(address); return pte; } @@ -1219,14 +1277,20 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, struct dma_ops_domain *dom, unsigned long address) { + struct aperture_range *aperture; u64 *pte; if (address >= dom->aperture_size) return; - WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size); + aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; + if (!aperture) + return; + + pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; + if (!pte) + return; - pte = dom->aperture.pte_pages[IOMMU_PTE_L1_INDEX(address)]; pte += IOMMU_PTE_L0_INDEX(address); WARN_ON(!*pte); -- cgit v1.2.3 From 803b8cb4d9a93b90c67aba2aab7f2c54d595b5b9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 18 May 2009 15:32:48 +0200 Subject: amd-iommu: change dma_dom->next_bit to dma_dom->next_address Simplify the code a little bit by using the same unit for all address space related state in the dma_ops domain structure. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 2 +- arch/x86/kernel/amd_iommu.c | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index eca912931a8..4ff4cf1f080 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -249,7 +249,7 @@ struct dma_ops_domain { unsigned long aperture_size; /* address we start to search for free addresses */ - unsigned long next_bit; + unsigned long next_address; /* address space relevant data */ struct aperture_range *aperture[APERTURE_MAX_RANGES]; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 794163ae97b..c1a08b9119c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -627,13 +627,15 @@ static unsigned long dma_ops_area_alloc(struct device *dev, u64 dma_mask, unsigned long start) { - unsigned long next_bit = dom->next_bit % APERTURE_RANGE_PAGES; + unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE; int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT; int i = start >> APERTURE_RANGE_SHIFT; unsigned long boundary_size; unsigned long address = -1; unsigned long limit; + next_bit >>= PAGE_SHIFT; + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; @@ -652,7 +654,7 @@ static unsigned long dma_ops_area_alloc(struct device *dev, if (address != -1) { address = dom->aperture[i]->offset + (address << PAGE_SHIFT); - dom->next_bit = (address >> PAGE_SHIFT) + pages; + dom->next_address = address + (pages << PAGE_SHIFT); break; } @@ -669,14 +671,12 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, u64 dma_mask) { unsigned long address; - unsigned long start = dom->next_bit << PAGE_SHIFT; - address = dma_ops_area_alloc(dev, dom, pages, align_mask, - dma_mask, start); + dma_mask, dom->next_address); if (address == -1) { - dom->next_bit = 0; + dom->next_address = 0; address = dma_ops_area_alloc(dev, dom, pages, align_mask, dma_mask, 0); dom->need_flush = true; @@ -704,10 +704,11 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); - if ((address >> PAGE_SHIFT) >= dom->next_bit) + if (address >= dom->next_address) dom->need_flush = true; address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; + iommu_area_free(range->bitmap, address, pages); } @@ -870,7 +871,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, * a valid dma-address. So we can use 0 as error value */ dma_dom->aperture[0]->bitmap[0] = 1; - dma_dom->next_bit = 0; + dma_dom->next_address = 0; dma_dom->need_flush = false; dma_dom->target_dev = 0xffff; -- cgit v1.2.3 From 9cabe89b99773e682538a8809abc7d4000c77083 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 18 May 2009 16:38:55 +0200 Subject: amd-iommu: move aperture_range allocation code to seperate function This patch prepares the dynamic increasement of dma_ops domain apertures. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 95 ++++++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c1a08b9119c..8ff02ee69e8 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -620,6 +620,59 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, * called with domain->lock held */ +/* + * This function is used to add a new aperture range to an existing + * aperture in case of dma_ops domain allocation or address allocation + * failure. + */ +static int alloc_new_range(struct dma_ops_domain *dma_dom, + bool populate, gfp_t gfp) +{ + int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; + + if (index >= APERTURE_MAX_RANGES) + return -ENOMEM; + + dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp); + if (!dma_dom->aperture[index]) + return -ENOMEM; + + dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp); + if (!dma_dom->aperture[index]->bitmap) + goto out_free; + + dma_dom->aperture[index]->offset = dma_dom->aperture_size; + + if (populate) { + unsigned long address = dma_dom->aperture_size; + int i, num_ptes = APERTURE_RANGE_PAGES / 512; + u64 *pte, *pte_page; + + for (i = 0; i < num_ptes; ++i) { + pte = alloc_pte(&dma_dom->domain, address, + &pte_page, gfp); + if (!pte) + goto out_free; + + dma_dom->aperture[index]->pte_pages[i] = pte_page; + + address += APERTURE_RANGE_SIZE / 64; + } + } + + dma_dom->aperture_size += APERTURE_RANGE_SIZE; + + return 0; + +out_free: + free_page((unsigned long)dma_dom->aperture[index]->bitmap); + + kfree(dma_dom->aperture[index]); + dma_dom->aperture[index] = NULL; + + return -ENOMEM; +} + static unsigned long dma_ops_area_alloc(struct device *dev, struct dma_ops_domain *dom, unsigned int pages, @@ -832,9 +885,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, unsigned order) { struct dma_ops_domain *dma_dom; - unsigned i, num_pte_pages; - u64 *l2_pde; - u64 address; /* * Currently the DMA aperture must be between 32 MB and 1GB in size @@ -846,11 +896,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, if (!dma_dom) return NULL; - dma_dom->aperture[0] = kzalloc(sizeof(struct aperture_range), - GFP_KERNEL); - if (!dma_dom->aperture[0]) - goto free_dma_dom; - spin_lock_init(&dma_dom->domain.lock); dma_dom->domain.id = domain_id_alloc(); @@ -862,10 +907,13 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->domain.priv = dma_dom; if (!dma_dom->domain.pt_root) goto free_dma_dom; - dma_dom->aperture_size = APERTURE_RANGE_SIZE; - dma_dom->aperture[0]->bitmap = (void *)get_zeroed_page(GFP_KERNEL); - if (!dma_dom->aperture[0]->bitmap) + + dma_dom->need_flush = false; + dma_dom->target_dev = 0xffff; + + if (alloc_new_range(dma_dom, true, GFP_KERNEL)) goto free_dma_dom; + /* * mark the first page as allocated so we never return 0 as * a valid dma-address. So we can use 0 as error value @@ -873,9 +921,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->aperture[0]->bitmap[0] = 1; dma_dom->next_address = 0; - dma_dom->need_flush = false; - dma_dom->target_dev = 0xffff; - /* Intialize the exclusion range if necessary */ if (iommu->exclusion_start && iommu->exclusion_start < dma_dom->aperture_size) { @@ -886,28 +931,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_ops_reserve_addresses(dma_dom, startpage, pages); } - /* - * At the last step, build the page tables so we don't need to - * allocate page table pages in the dma_ops mapping/unmapping - * path for the first 128MB of dma address space. - */ - num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); - - l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL); - if (l2_pde == NULL) - goto free_dma_dom; - - dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde)); - - for (i = 0; i < num_pte_pages; ++i) { - u64 **pte_page = &dma_dom->aperture[0]->pte_pages[i]; - *pte_page = (u64 *)get_zeroed_page(GFP_KERNEL); - if (!*pte_page) - goto free_dma_dom; - address = virt_to_phys(*pte_page); - l2_pde[i] = IOMMU_L1_PDE(address); - } - return dma_dom; free_dma_dom: -- cgit v1.2.3 From 00cd122ae5e5e7c60cce2af3c35b190d4c3f2d0d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 May 2009 09:52:40 +0200 Subject: amd-iommu: handle exlusion ranges and unity mappings in alloc_new_range This patch makes sure no reserved addresses are allocated in an dma_ops domain when the aperture is increased dynamically. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 71 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 8ff02ee69e8..59ee1b94a7c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -58,6 +58,9 @@ static struct dma_ops_domain *find_protection_domain(u16 devid); static u64* alloc_pte(struct protection_domain *dom, unsigned long address, u64 **pte_page, gfp_t gfp); +static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, + unsigned long start_page, + unsigned int pages); #ifdef CONFIG_AMD_IOMMU_STATS @@ -620,15 +623,43 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, * called with domain->lock held */ +/* + * This function checks if there is a PTE for a given dma address. If + * there is one, it returns the pointer to it. + */ +static u64* fetch_pte(struct protection_domain *domain, + unsigned long address) +{ + u64 *pte; + + pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)]; + + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L1_INDEX(address)]; + + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L0_INDEX(address)]; + + return pte; +} + /* * This function is used to add a new aperture range to an existing * aperture in case of dma_ops domain allocation or address allocation * failure. */ -static int alloc_new_range(struct dma_ops_domain *dma_dom, +static int alloc_new_range(struct amd_iommu *iommu, + struct dma_ops_domain *dma_dom, bool populate, gfp_t gfp) { int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; + int i; if (index >= APERTURE_MAX_RANGES) return -ENOMEM; @@ -662,6 +693,33 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, dma_dom->aperture_size += APERTURE_RANGE_SIZE; + /* Intialize the exclusion range if necessary */ + if (iommu->exclusion_start && + iommu->exclusion_start >= dma_dom->aperture[index]->offset && + iommu->exclusion_start < dma_dom->aperture_size) { + unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; + int pages = iommu_num_pages(iommu->exclusion_start, + iommu->exclusion_length, + PAGE_SIZE); + dma_ops_reserve_addresses(dma_dom, startpage, pages); + } + + /* + * Check for areas already mapped as present in the new aperture + * range and mark those pages as reserved in the allocator. Such + * mappings may already exist as a result of requested unity + * mappings for devices. + */ + for (i = dma_dom->aperture[index]->offset; + i < dma_dom->aperture_size; + i += PAGE_SIZE) { + u64 *pte = fetch_pte(&dma_dom->domain, i); + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + continue; + + dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); + } + return 0; out_free: @@ -911,7 +969,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->need_flush = false; dma_dom->target_dev = 0xffff; - if (alloc_new_range(dma_dom, true, GFP_KERNEL)) + if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) goto free_dma_dom; /* @@ -921,15 +979,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->aperture[0]->bitmap[0] = 1; dma_dom->next_address = 0; - /* Intialize the exclusion range if necessary */ - if (iommu->exclusion_start && - iommu->exclusion_start < dma_dom->aperture_size) { - unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; - int pages = iommu_num_pages(iommu->exclusion_start, - iommu->exclusion_length, - PAGE_SIZE); - dma_ops_reserve_addresses(dma_dom, startpage, pages); - } return dma_dom; -- cgit v1.2.3 From 11b83888ae729457b5cfb936dbd498481f6408df Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 May 2009 10:23:15 +0200 Subject: amd-iommu: enlarge the aperture dynamically By dynamically increasing the aperture the extended allocator is now ready for use. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 59ee1b94a7c..d129d8feba0 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1403,10 +1403,26 @@ static dma_addr_t __map_single(struct device *dev, if (align) align_mask = (1UL << get_order(size)) - 1; +retry: address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, dma_mask); - if (unlikely(address == bad_dma_address)) - goto out; + if (unlikely(address == bad_dma_address)) { + /* + * setting next_address here will let the address + * allocator only scan the new allocated range in the + * first run. This is a small optimization. + */ + dma_dom->next_address = dma_dom->aperture_size; + + if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC)) + goto out; + + /* + * aperture was sucessfully enlarged by 128 MB, try + * allocation again + */ + goto retry; + } start = address; for (i = 0; i < pages; ++i) { -- cgit v1.2.3 From d9cfed925448f097ec7faab80d903eb7e5f99712 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 May 2009 12:16:29 +0200 Subject: amd-iommu: remove amd_iommu_size kernel parameter This parameter is not longer necessary when aperture increases dynamically. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 18 ++++-------------- arch/x86/kernel/amd_iommu_init.c | 15 --------------- 2 files changed, 4 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index d129d8feba0..31d56c36010 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -939,17 +939,10 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) * It also intializes the page table and the address allocator data * structures required for the dma_ops interface */ -static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, - unsigned order) +static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) { struct dma_ops_domain *dma_dom; - /* - * Currently the DMA aperture must be between 32 MB and 1GB in size - */ - if ((order < 25) || (order > 30)) - return NULL; - dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); if (!dma_dom) return NULL; @@ -1087,7 +1080,6 @@ static int device_change_notifier(struct notifier_block *nb, struct protection_domain *domain; struct dma_ops_domain *dma_domain; struct amd_iommu *iommu; - int order = amd_iommu_aperture_order; unsigned long flags; if (devid > amd_iommu_last_bdf) @@ -1126,7 +1118,7 @@ static int device_change_notifier(struct notifier_block *nb, dma_domain = find_protection_domain(devid); if (dma_domain) goto out; - dma_domain = dma_ops_domain_alloc(iommu, order); + dma_domain = dma_ops_domain_alloc(iommu); if (!dma_domain) goto out; dma_domain->target_dev = devid; @@ -1826,7 +1818,6 @@ static void prealloc_protection_domains(void) struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; struct amd_iommu *iommu; - int order = amd_iommu_aperture_order; u16 devid; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { @@ -1839,7 +1830,7 @@ static void prealloc_protection_domains(void) iommu = amd_iommu_rlookup_table[devid]; if (!iommu) continue; - dma_dom = dma_ops_domain_alloc(iommu, order); + dma_dom = dma_ops_domain_alloc(iommu); if (!dma_dom) continue; init_unity_mappings_for_device(dma_dom, devid); @@ -1865,7 +1856,6 @@ static struct dma_map_ops amd_iommu_dma_ops = { int __init amd_iommu_init_dma_ops(void) { struct amd_iommu *iommu; - int order = amd_iommu_aperture_order; int ret; /* @@ -1874,7 +1864,7 @@ int __init amd_iommu_init_dma_ops(void) * protection domain will be assigned to the default one. */ list_for_each_entry(iommu, &amd_iommu_list, list) { - iommu->default_dom = dma_ops_domain_alloc(iommu, order); + iommu->default_dom = dma_ops_domain_alloc(iommu); if (iommu->default_dom == NULL) return -ENOMEM; iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 8c0be0902da..762a4eefec9 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -121,7 +121,6 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have to handle */ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ -unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ bool amd_iommu_isolate = true; /* if true, device isolation is enabled */ bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ @@ -1137,9 +1136,6 @@ int __init amd_iommu_init(void) enable_iommus(); - printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n", - (1 << (amd_iommu_aperture_order-20))); - printk(KERN_INFO "AMD IOMMU: device isolation "); if (amd_iommu_isolate) printk("enabled\n"); @@ -1225,15 +1221,4 @@ static int __init parse_amd_iommu_options(char *str) return 1; } -static int __init parse_amd_iommu_size_options(char *str) -{ - unsigned order = PAGE_SHIFT + get_order(memparse(str, &str)); - - if ((order > 24) && (order < 31)) - amd_iommu_aperture_order = order; - - return 1; -} - __setup("amd_iommu=", parse_amd_iommu_options); -__setup("amd_iommu_size=", parse_amd_iommu_size_options); -- cgit v1.2.3 From fe16f088a88fb73161bba8784375c829f7e87b54 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 May 2009 12:27:53 +0200 Subject: amd-iommu: disable round-robin allocator for CONFIG_IOMMU_STRESS Disabling the round-robin allocator results in reusing the same dma-addresses again very fast. This is a good test if the iotlb flushing is working correctly. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 31d56c36010..543822b39a8 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -783,6 +783,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, { unsigned long address; +#ifdef CONFIG_IOMMU_STRESS + dom->next_address = 0; + dom->need_flush = true; +#endif + address = dma_ops_area_alloc(dev, dom, pages, align_mask, dma_mask, dom->next_address); -- cgit v1.2.3 From f5e9705c6429d24dee832b2edd7f4848d432ea03 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 May 2009 12:31:53 +0200 Subject: amd-iommu: don't preallocate page tables with CONFIG_IOMMU_STRESS This forces testing of on-demand page table allocation code. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 543822b39a8..33434c497a6 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -661,6 +661,10 @@ static int alloc_new_range(struct amd_iommu *iommu, int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; int i; +#ifdef CONFIG_IOMMU_STRESS + populate = false; +#endif + if (index >= APERTURE_MAX_RANGES) return -ENOMEM; -- cgit v1.2.3 From 47bccd6bb2b866449d3ecf2ba350ac1c7473b2b8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 May 2009 12:40:54 +0200 Subject: amd-iommu: don't free dma adresses below 512MB with CONFIG_IOMMU_STRESS This will test the automatic aperture enlargement code. This is important because only very few devices will ever trigger this code path. So force it under CONFIG_IOMMU_STRESS. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 33434c497a6..04ff5ec4ac0 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -824,6 +824,11 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); +#ifdef CONFIG_IOMMU_STRESS + if (i < 4) + return; +#endif + if (address >= dom->next_address) dom->need_flush = true; -- cgit v1.2.3 From 367d04c4ec02dad34d80452e32e3370db7fb6fee Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 28 May 2009 09:54:48 +0200 Subject: amd_iommu: fix lock imbalance In alloc_coherent there is an omitted unlock on the path where mapping fails. Add the unlock. [ Impact: fix lock imbalance in alloc_coherent ] Signed-off-by: Jiri Slaby Cc: Joerg Roedel Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index d6898833c36..9f89bb645b3 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1541,8 +1541,10 @@ static void *alloc_coherent(struct device *dev, size_t size, *dma_addr = __map_single(dev, iommu, domain->priv, paddr, size, DMA_BIDIRECTIONAL, true, dma_mask); - if (*dma_addr == bad_dma_address) + if (*dma_addr == bad_dma_address) { + spin_unlock_irqrestore(&domain->lock, flags); goto out_free; + } iommu_completion_wait(iommu); -- cgit v1.2.3 From fe2245c905631a3a353504fc04388ce3dfaf9d9e Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Sun, 5 Jul 2009 15:50:52 -0500 Subject: x86: enable GART-IOMMU only after setting up protection methods The current code to set up the GART as an IOMMU enables GART translations before it removes the aperture from the kernel memory map, sets the GART PTEs to UC, sets up the guard and scratch pages, or does a wbinvd(). This leaves the possibility of cache aliasing open and can cause system crashes. Re-order the code so as to enable the GART translations only after all safeguards are in place and the tlb has been flushed. AMD has tested this patch on both Istanbul systems and 1st generation Opteron systems with APG enabled and seen no adverse effects. Istanbul systems with HT Assist enabled sometimes see MCE errors due to cache artifacts with the unmodified code. Signed-off-by: Mark Langsdorf Cc: Cc: Joerg Roedel Cc: akpm@linux-foundation.org Cc: jbarnes@virtuousgeek.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 1e8920d98f7..cfd9f906389 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -658,8 +658,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) agp_gatt_table = gatt; - enable_gart_translations(); - error = sysdev_class_register(&gart_sysdev_class); if (!error) error = sysdev_register(&device_gart); @@ -816,6 +814,14 @@ void __init gart_iommu_init(void) * the pages as Not-Present: */ wbinvd(); + + /* + * Now all caches are flushed and we can safely enable + * GART hardware. Doing it early leaves the possibility + * of stale cache entries that can lead to GART PTE + * errors. + */ + enable_gart_translations(); /* * Try to workaround a bug (thanks to BenH): -- cgit v1.2.3 From 29150078d7a1758df8c7a6cd2ec066ac65e1fab9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Jun 2009 10:54:18 +0200 Subject: amd-iommu: remove BUS_NOTIFY_BOUND_DRIVER handling Handling this event causes device assignment in KVM to fail because the device gets re-attached as soon as the pci-stub registers as the driver for the device. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 8510e90ebfe..81872604eb7 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1145,17 +1145,6 @@ static int device_change_notifier(struct notifier_block *nb, "to a non-dma-ops domain\n", dev_name(dev)); switch (action) { - case BUS_NOTIFY_BOUND_DRIVER: - if (domain) - goto out; - dma_domain = find_protection_domain(devid); - if (!dma_domain) - dma_domain = iommu->default_dom; - attach_device(iommu, &dma_domain->domain, devid); - DUMP_printk(KERN_INFO "AMD IOMMU: Using protection domain " - "%d for device %s\n", - dma_domain->domain.id, dev_name(dev)); - break; case BUS_NOTIFY_UNBOUND_DRIVER: if (!domain) goto out; -- cgit v1.2.3 From 71ff3bca2f70264effe8cbbdd5bc10cf6be5f2f0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Jun 2009 13:47:33 -0700 Subject: amd-iommu: detach device explicitly before attaching it to a new domain This fixes a bug with a device that could not be assigned to a KVM guest because it is still assigned to a dma_ops protection domain. [chrisw: simply remove WARN_ON(), will always fire since dev->driver will be pci-sub] Signed-off-by: Chris Wright Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 81872604eb7..772e91088e4 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -2073,7 +2073,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, old_domain = domain_for_device(devid); if (old_domain) - return -EBUSY; + detach_device(old_domain, devid); attach_device(iommu, domain, devid); -- cgit v1.2.3 From e9a22a13c71986851e931bdfa054f68839ff8576 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Jun 2009 12:00:37 +0200 Subject: amd-iommu: remove unnecessary "AMD IOMMU: " prefix That prefix is already included in the DUMP_printk macro. So there is no need to repeat it in the format string. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 772e91088e4..1c60554537c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1266,9 +1266,8 @@ static int get_device_resources(struct device *dev, dma_dom = (*iommu)->default_dom; *domain = &dma_dom->domain; attach_device(*iommu, *domain, *bdf); - DUMP_printk(KERN_INFO "AMD IOMMU: Using protection domain " - "%d for device %s\n", - (*domain)->id, dev_name(dev)); + DUMP_printk("Using protection domain %d for device %s\n", + (*domain)->id, dev_name(dev)); } if (domain_for_device(_bdf) == NULL) -- cgit v1.2.3