aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig.debug11
-rw-r--r--arch/x86/include/asm/amd_iommu.h2
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h55
-rw-r--r--arch/x86/kernel/amd_iommu.c490
-rw-r--r--arch/x86/kernel/amd_iommu_init.c273
-rw-r--r--arch/x86/kernel/pci-calgary_64.c54
-rw-r--r--arch/x86/kernel/pci-gart_64.c55
-rw-r--r--arch/x86/kernel/pci-swiotlb.c2
8 files changed, 622 insertions, 320 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d8359e73317..33fac6bbe1c 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -159,10 +159,17 @@ config IOMMU_DEBUG
options. See Documentation/x86_64/boot-options.txt for more
details.
+config IOMMU_STRESS
+ bool "Enable IOMMU stress-test mode"
+ ---help---
+ This option disables various optimizations in IOMMU related
+ code to do real stress testing of the IOMMU code. This option
+ will cause a performance drop and should only be enabled for
+ testing.
+
config IOMMU_LEAK
bool "IOMMU leak tracing"
- depends on DEBUG_KERNEL
- depends on IOMMU_DEBUG
+ depends on IOMMU_DEBUG && DMA_API_DEBUG
---help---
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index f712344329b..262e0282004 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -27,6 +27,8 @@ extern int amd_iommu_init(void);
extern int amd_iommu_init_dma_ops(void);
extern void amd_iommu_detect(void);
extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
+extern void amd_iommu_flush_all_domains(void);
+extern void amd_iommu_flush_all_devices(void);
#else
static inline int amd_iommu_init(void) { return -ENODEV; }
static inline void amd_iommu_detect(void) { }
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 95c8cd9d22b..0c878caaa0a 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -194,6 +194,27 @@
#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
domain for an IOMMU */
+extern bool amd_iommu_dump;
+#define DUMP_printk(format, arg...) \
+ do { \
+ if (amd_iommu_dump) \
+ printk(KERN_INFO "AMD IOMMU: " format, ## arg); \
+ } while(0);
+
+/*
+ * Make iterating over all IOMMUs easier
+ */
+#define for_each_iommu(iommu) \
+ list_for_each_entry((iommu), &amd_iommu_list, list)
+#define for_each_iommu_safe(iommu, next) \
+ list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
+
+#define APERTURE_RANGE_SHIFT 27 /* 128 MB */
+#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT)
+#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT)
+#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */
+#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
+#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
/*
* This structure contains generic data for IOMMU protection domains
@@ -210,6 +231,26 @@ struct protection_domain {
};
/*
+ * For dynamic growth the aperture size is split into ranges of 128MB of
+ * DMA address space each. This struct represents one such range.
+ */
+struct aperture_range {
+
+ /* address allocation bitmap */
+ unsigned long *bitmap;
+
+ /*
+ * Array of PTE pages for the aperture. In this array we save all the
+ * leaf pages of the domain page table used for the aperture. This way
+ * we don't need to walk the page table to find a specific PTE. We can
+ * just calculate its address in constant time.
+ */
+ u64 *pte_pages[64];
+
+ unsigned long offset;
+};
+
+/*
* Data container for a dma_ops specific protection domain
*/
struct dma_ops_domain {
@@ -222,18 +263,10 @@ struct dma_ops_domain {
unsigned long aperture_size;
/* address we start to search for free addresses */
- unsigned long next_bit;
-
- /* address allocation bitmap */
- unsigned long *bitmap;
+ unsigned long next_address;
- /*
- * Array of PTE pages for the aperture. In this array we save all the
- * leaf pages of the domain page table used for the aperture. This way
- * we don't need to walk the page table to find a specific PTE. We can
- * just calculate its address in constant time.
- */
- u64 **pte_pages;
+ /* address space relevant data */
+ struct aperture_range *aperture[APERTURE_MAX_RANGES];
/* This will be set to true when TLB needs to be flushed */
bool need_flush;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a97db99dad5..2c63d874813 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -55,7 +55,16 @@ struct iommu_cmd {
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
struct unity_map_entry *e);
static struct dma_ops_domain *find_protection_domain(u16 devid);
+static u64* alloc_pte(struct protection_domain *dom,
+ unsigned long address, u64
+ **pte_page, gfp_t gfp);
+static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
+ unsigned long start_page,
+ unsigned int pages);
+#ifndef BUS_NOTIFY_UNBOUND_DRIVER
+#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
+#endif
#ifdef CONFIG_AMD_IOMMU_STATS
@@ -213,7 +222,7 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
{
struct amd_iommu *iommu;
- list_for_each_entry(iommu, &amd_iommu_list, list)
+ for_each_iommu(iommu)
iommu_poll_events(iommu);
return IRQ_HANDLED;
@@ -440,7 +449,7 @@ static void iommu_flush_domain(u16 domid)
__iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
domid, 1, 1);
- list_for_each_entry(iommu, &amd_iommu_list, list) {
+ for_each_iommu(iommu) {
spin_lock_irqsave(&iommu->lock, flags);
__iommu_queue_command(iommu, &cmd);
__iommu_completion_wait(iommu);
@@ -449,6 +458,35 @@ static void iommu_flush_domain(u16 domid)
}
}
+void amd_iommu_flush_all_domains(void)
+{
+ int i;
+
+ for (i = 1; i < MAX_DOMAIN_ID; ++i) {
+ if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
+ continue;
+ iommu_flush_domain(i);
+ }
+}
+
+void amd_iommu_flush_all_devices(void)
+{
+ struct amd_iommu *iommu;
+ int i;
+
+ for (i = 0; i <= amd_iommu_last_bdf; ++i) {
+ if (amd_iommu_pd_table[i] == NULL)
+ continue;
+
+ iommu = amd_iommu_rlookup_table[i];
+ if (!iommu)
+ continue;
+
+ iommu_queue_inv_dev_entry(iommu, i);
+ iommu_completion_wait(iommu);
+ }
+}
+
/****************************************************************************
*
* The functions below are used the create the page table mappings for
@@ -468,7 +506,7 @@ static int iommu_map_page(struct protection_domain *dom,
unsigned long phys_addr,
int prot)
{
- u64 __pte, *pte, *page;
+ u64 __pte, *pte;
bus_addr = PAGE_ALIGN(bus_addr);
phys_addr = PAGE_ALIGN(phys_addr);
@@ -477,27 +515,7 @@ static int iommu_map_page(struct protection_domain *dom,
if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
return -EINVAL;
- pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
-
- if (!IOMMU_PTE_PRESENT(*pte)) {
- page = (u64 *)get_zeroed_page(GFP_KERNEL);
- if (!page)
- return -ENOMEM;
- *pte = IOMMU_L2_PDE(virt_to_phys(page));
- }
-
- pte = IOMMU_PTE_PAGE(*pte);
- pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
-
- if (!IOMMU_PTE_PRESENT(*pte)) {
- page = (u64 *)get_zeroed_page(GFP_KERNEL);
- if (!page)
- return -ENOMEM;
- *pte = IOMMU_L1_PDE(virt_to_phys(page));
- }
-
- pte = IOMMU_PTE_PAGE(*pte);
- pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
+ pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
if (IOMMU_PTE_PRESENT(*pte))
return -EBUSY;
@@ -595,7 +613,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
* as allocated in the aperture
*/
if (addr < dma_dom->aperture_size)
- __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
+ __set_bit(addr >> PAGE_SHIFT,
+ dma_dom->aperture[0]->bitmap);
}
return 0;
@@ -632,42 +651,191 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
****************************************************************************/
/*
- * The address allocator core function.
+ * The address allocator core functions.
*
* called with domain->lock held
*/
+
+/*
+ * This function checks if there is a PTE for a given dma address. If
+ * there is one, it returns the pointer to it.
+ */
+static u64* fetch_pte(struct protection_domain *domain,
+ unsigned long address)
+{
+ u64 *pte;
+
+ pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
+
+ if (!IOMMU_PTE_PRESENT(*pte))
+ return NULL;
+
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+
+ if (!IOMMU_PTE_PRESENT(*pte))
+ return NULL;
+
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+
+ return pte;
+}
+
+/*
+ * This function is used to add a new aperture range to an existing
+ * aperture in case of dma_ops domain allocation or address allocation
+ * failure.
+ */
+static int alloc_new_range(struct amd_iommu *iommu,
+ struct dma_ops_domain *dma_dom,
+ bool populate, gfp_t gfp)
+{
+ int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
+ int i;
+
+#ifdef CONFIG_IOMMU_STRESS
+ populate = false;
+#endif
+
+ if (index >= APERTURE_MAX_RANGES)
+ return -ENOMEM;
+
+ dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
+ if (!dma_dom->aperture[index])
+ return -ENOMEM;
+
+ dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
+ if (!dma_dom->aperture[index]->bitmap)
+ goto out_free;
+
+ dma_dom->aperture[index]->offset = dma_dom->aperture_size;
+
+ if (populate) {
+ unsigned long address = dma_dom->aperture_size;
+ int i, num_ptes = APERTURE_RANGE_PAGES / 512;
+ u64 *pte, *pte_page;
+
+ for (i = 0; i < num_ptes; ++i) {
+ pte = alloc_pte(&dma_dom->domain, address,
+ &pte_page, gfp);
+ if (!pte)
+ goto out_free;
+
+ dma_dom->aperture[index]->pte_pages[i] = pte_page;
+
+ address += APERTURE_RANGE_SIZE / 64;
+ }
+ }
+
+ dma_dom->aperture_size += APERTURE_RANGE_SIZE;
+
+ /* Intialize the exclusion range if necessary */
+ if (iommu->exclusion_start &&
+ iommu->exclusion_start >= dma_dom->aperture[index]->offset &&
+ iommu->exclusion_start < dma_dom->aperture_size) {
+ unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
+ int pages = iommu_num_pages(iommu->exclusion_start,
+ iommu->exclusion_length,
+ PAGE_SIZE);
+ dma_ops_reserve_addresses(dma_dom, startpage, pages);
+ }
+
+ /*
+ * Check for areas already mapped as present in the new aperture
+ * range and mark those pages as reserved in the allocator. Such
+ * mappings may already exist as a result of requested unity
+ * mappings for devices.
+ */
+ for (i = dma_dom->aperture[index]->offset;
+ i < dma_dom->aperture_size;
+ i += PAGE_SIZE) {
+ u64 *pte = fetch_pte(&dma_dom->domain, i);
+ if (!pte || !IOMMU_PTE_PRESENT(*pte))
+ continue;
+
+ dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
+ }
+
+ return 0;
+
+out_free:
+ free_page((unsigned long)dma_dom->aperture[index]->bitmap);
+
+ kfree(dma_dom->aperture[index]);
+ dma_dom->aperture[index] = NULL;
+
+ return -ENOMEM;
+}
+
+static unsigned long dma_ops_area_alloc(struct device *dev,
+ struct dma_ops_domain *dom,
+ unsigned int pages,
+ unsigned long align_mask,
+ u64 dma_mask,
+ unsigned long start)
+{
+ unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
+ int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
+ int i = start >> APERTURE_RANGE_SHIFT;
+ unsigned long boundary_size;
+ unsigned long address = -1;
+ unsigned long limit;
+
+ next_bit >>= PAGE_SHIFT;
+
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ PAGE_SIZE) >> PAGE_SHIFT;
+
+ for (;i < max_index; ++i) {
+ unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
+
+ if (dom->aperture[i]->offset >= dma_mask)
+ break;
+
+ limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
+ dma_mask >> PAGE_SHIFT);
+
+ address = iommu_area_alloc(dom->aperture[i]->bitmap,
+ limit, next_bit, pages, 0,
+ boundary_size, align_mask);
+ if (address != -1) {
+ address = dom->aperture[i]->offset +
+ (address << PAGE_SHIFT);
+ dom->next_address = address + (pages << PAGE_SHIFT);
+ break;
+ }
+
+ next_bit = 0;
+ }
+
+ return address;
+}
+
static unsigned long dma_ops_alloc_addresses(struct device *dev,
struct dma_ops_domain *dom,
unsigned int pages,
unsigned long align_mask,
u64 dma_mask)
{
- unsigned long limit;
unsigned long address;
- unsigned long boundary_size;
- boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
- PAGE_SIZE) >> PAGE_SHIFT;
- limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0,
- dma_mask >> PAGE_SHIFT);
+#ifdef CONFIG_IOMMU_STRESS
+ dom->next_address = 0;
+ dom->need_flush = true;
+#endif
- if (dom->next_bit >= limit) {
- dom->next_bit = 0;
- dom->need_flush = true;
- }
+ address = dma_ops_area_alloc(dev, dom, pages, align_mask,
+ dma_mask, dom->next_address);
- address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
- 0 , boundary_size, align_mask);
if (address == -1) {
- address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
- 0, boundary_size, align_mask);
+ dom->next_address = 0;
+ address = dma_ops_area_alloc(dev, dom, pages, align_mask,
+ dma_mask, 0);
dom->need_flush = true;
}
- if (likely(address != -1)) {
- dom->next_bit = address + pages;
- address <<= PAGE_SHIFT;
- } else
+ if (unlikely(address == -1))
address = bad_dma_address;
WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
@@ -684,11 +852,23 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
unsigned long address,
unsigned int pages)
{
- address >>= PAGE_SHIFT;
- iommu_area_free(dom->bitmap, address, pages);
+ unsigned i = address >> APERTURE_RANGE_SHIFT;
+ struct aperture_range *range = dom->aperture[i];
+
+ BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
+
+#ifdef CONFIG_IOMMU_STRESS
+ if (i < 4)
+ return;
+#endif
- if (address >= dom->next_bit)
+ if (address >= dom->next_address)
dom->need_flush = true;
+
+ address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
+
+ iommu_area_free(range->bitmap, address, pages);
+
}
/****************************************************************************
@@ -736,12 +916,16 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
unsigned long start_page,
unsigned int pages)
{
- unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
+ unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
if (start_page + pages > last_page)
pages = last_page - start_page;
- iommu_area_reserve(dom->bitmap, start_page, pages);
+ for (i = start_page; i < start_page + pages; ++i) {
+ int index = i / APERTURE_RANGE_PAGES;
+ int page = i % APERTURE_RANGE_PAGES;
+ __set_bit(page, dom->aperture[index]->bitmap);
+ }
}
static void free_pagetable(struct protection_domain *domain)
@@ -780,14 +964,19 @@ static void free_pagetable(struct protection_domain *domain)
*/
static void dma_ops_domain_free(struct dma_ops_domain *dom)
{
+ int i;
+
if (!dom)
return;
free_pagetable(&dom->domain);
- kfree(dom->pte_pages);
-
- kfree(dom->bitmap);
+ for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
+ if (!dom->aperture[i])
+ continue;
+ free_page((unsigned long)dom->aperture[i]->bitmap);
+ kfree(dom->aperture[i]);
+ }
kfree(dom);
}
@@ -797,19 +986,9 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
* It also intializes the page table and the address allocator data
* structures required for the dma_ops interface
*/
-static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
- unsigned order)
+static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
{
struct dma_ops_domain *dma_dom;
- unsigned i, num_pte_pages;
- u64 *l2_pde;
- u64 address;
-
- /*
- * Currently the DMA aperture must be between 32 MB and 1GB in size
- */
- if ((order < 25) || (order > 30))
- return NULL;
dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
if (!dma_dom)
@@ -826,55 +1005,20 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
dma_dom->domain.priv = dma_dom;
if (!dma_dom->domain.pt_root)
goto free_dma_dom;
- dma_dom->aperture_size = (1ULL << order);
- dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
- GFP_KERNEL);
- if (!dma_dom->bitmap)
- goto free_dma_dom;
- /*
- * mark the first page as allocated so we never return 0 as
- * a valid dma-address. So we can use 0 as error value
- */
- dma_dom->bitmap[0] = 1;
- dma_dom->next_bit = 0;
dma_dom->need_flush = false;
dma_dom->target_dev = 0xffff;
- /* Intialize the exclusion range if necessary */
- if (iommu->exclusion_start &&
- iommu->exclusion_start < dma_dom->aperture_size) {
- unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
- int pages = iommu_num_pages(iommu->exclusion_start,
- iommu->exclusion_length,
- PAGE_SIZE);
- dma_ops_reserve_addresses(dma_dom, startpage, pages);
- }
+ if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL))
+ goto free_dma_dom;
/*
- * At the last step, build the page tables so we don't need to
- * allocate page table pages in the dma_ops mapping/unmapping
- * path.
+ * mark the first page as allocated so we never return 0 as
+ * a valid dma-address. So we can use 0 as error value
*/
- num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
- dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
- GFP_KERNEL);
- if (!dma_dom->pte_pages)
- goto free_dma_dom;
-
- l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
- if (l2_pde == NULL)
- goto free_dma_dom;
+ dma_dom->aperture[0]->bitmap[0] = 1;
+ dma_dom->next_address = 0;
- dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
-
- for (i = 0; i < num_pte_pages; ++i) {
- dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
- if (!dma_dom->pte_pages[i])
- goto free_dma_dom;
- address = virt_to_phys(dma_dom->pte_pages[i]);
- l2_pde[i] = IOMMU_L1_PDE(address);
- }
return dma_dom;
@@ -983,7 +1127,6 @@ static int device_change_notifier(struct notifier_block *nb,
struct protection_domain *domain;
struct dma_ops_domain *dma_domain;
struct amd_iommu *iommu;
- int order = amd_iommu_aperture_order;
unsigned long flags;
if (devid > amd_iommu_last_bdf)
@@ -1009,10 +1152,11 @@ static int device_change_notifier(struct notifier_block *nb,
if (!dma_domain)
dma_domain = iommu->default_dom;
attach_device(iommu, &dma_domain->domain, devid);
- printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
- "device %s\n", dma_domain->domain.id, dev_name(dev));
+ DUMP_printk(KERN_INFO "AMD IOMMU: Using protection domain "
+ "%d for device %s\n",
+ dma_domain->domain.id, dev_name(dev));
break;
- case BUS_NOTIFY_UNBIND_DRIVER:
+ case BUS_NOTIFY_UNBOUND_DRIVER:
if (!domain)
goto out;
detach_device(domain, devid);
@@ -1022,7 +1166,7 @@ static int device_change_notifier(struct notifier_block *nb,
dma_domain = find_protection_domain(devid);
if (dma_domain)
goto out;
- dma_domain = dma_ops_domain_alloc(iommu, order);
+ dma_domain = dma_ops_domain_alloc(iommu);
if (!dma_domain)
goto out;
dma_domain->target_dev = devid;
@@ -1133,8 +1277,9 @@ static int get_device_resources(struct device *dev,
dma_dom = (*iommu)->default_dom;
*domain = &dma_dom->domain;
attach_device(*iommu, *domain, *bdf);
- printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
- "device %s\n", (*domain)->id, dev_name(dev));
+ DUMP_printk(KERN_INFO "AMD IOMMU: Using protection domain "
+ "%d for device %s\n",
+ (*domain)->id, dev_name(dev));
}
if (domain_for_device(_bdf) == NULL)
@@ -1144,6 +1289,66 @@ static int get_device_resources(struct device *dev,
}
/*
+ * If the pte_page is not yet allocated this function is called
+ */
+static u64* alloc_pte(struct protection_domain *dom,
+ unsigned long address, u64 **pte_page, gfp_t gfp)
+{
+ u64 *pte, *page;
+
+ pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)];
+
+ if (!IOMMU_PTE_PRESENT(*pte)) {
+ page = (u64 *)get_zeroed_page(gfp);
+ if (!page)
+ return NULL;
+ *pte = IOMMU_L2_PDE(virt_to_phys(page));
+ }
+
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+
+ if (!IOMMU_PTE_PRESENT(*pte)) {
+ page = (u64 *)get_zeroed_page(gfp);
+ if (!page)
+ return NULL;
+ *pte = IOMMU_L1_PDE(virt_to_phys(page));
+ }
+
+ pte = IOMMU_PTE_PAGE(*pte);
+
+ if (pte_page)
+ *pte_page = pte;
+
+ pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+
+ return pte;
+}
+
+/*
+ * This function fetches the PTE for a given address in the aperture
+ */
+static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
+ unsigned long address)
+{
+ struct aperture_range *aperture;
+ u64 *pte, *pte_page;
+
+ aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
+ if (!aperture)
+ return NULL;
+
+ pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
+ if (!pte) {
+ pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
+ aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
+ } else
+ pte += IOMMU_PTE_L0_INDEX(address);
+
+ return pte;
+}
+
+/*
* This is the generic map function. It maps one 4kb page at paddr to
* the given address in the DMA address space for the domain.
*/
@@ -1159,8 +1364,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
paddr &= PAGE_MASK;
- pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
- pte += IOMMU_PTE_L0_INDEX(address);
+ pte = dma_ops_get_pte(dom, address);
+ if (!pte)
+ return bad_dma_address;
__pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
@@ -1185,14 +1391,20 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
struct dma_ops_domain *dom,
unsigned long address)
{
+ struct aperture_range *aperture;
u64 *pte;
if (address >= dom->aperture_size)
return;
- WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size);
+ aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
+ if (!aperture)
+ return;
+
+ pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
+ if (!pte)
+ return;
- pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
pte += IOMMU_PTE_L0_INDEX(address);
WARN_ON(!*pte);
@@ -1216,7 +1428,7 @@ static dma_addr_t __map_single(struct device *dev,
u64 dma_mask)
{
dma_addr_t offset = paddr & ~PAGE_MASK;
- dma_addr_t address, start;
+ dma_addr_t address, start, ret;
unsigned int pages;
unsigned long align_mask = 0;
int i;
@@ -1232,14 +1444,33 @@ static dma_addr_t __map_single(struct device *dev,
if (align)
align_mask = (1UL << get_order(size)) - 1;
+retry:
address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
dma_mask);
- if (unlikely(address == bad_dma_address))
- goto out;
+ if (unlikely(address == bad_dma_address)) {
+ /*
+ * setting next_address here will let the address
+ * allocator only scan the new allocated range in the
+ * first run. This is a small optimization.
+ */
+ dma_dom->next_address = dma_dom->aperture_size;
+
+ if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC))
+ goto out;
+
+ /*
+ * aperture was sucessfully enlarged by 128 MB, try
+ * allocation again
+ */
+ goto retry;
+ }
start = address;
for (i = 0; i < pages; ++i) {
- dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
+ ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
+ if (ret == bad_dma_address)
+ goto out_unmap;
+
paddr += PAGE_SIZE;
start += PAGE_SIZE;
}
@@ -1255,6 +1486,17 @@ static dma_addr_t __map_single(struct device *dev,
out:
return address;
+
+out_unmap:
+
+ for (--i; i >= 0; --i) {
+ start -= PAGE_SIZE;
+ dma_ops_domain_unmap(iommu, dma_dom, start);
+ }
+
+ dma_ops_free_addresses(dma_dom, address, pages);
+
+ return bad_dma_address;
}
/*
@@ -1625,7 +1867,6 @@ static void prealloc_protection_domains(void)
struct pci_dev *dev = NULL;
struct dma_ops_domain *dma_dom;
struct amd_iommu *iommu;
- int order = amd_iommu_aperture_order;
u16 devid;
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
@@ -1638,7 +1879,7 @@ static void prealloc_protection_domains(void)
iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
continue;
- dma_dom = dma_ops_domain_alloc(iommu, order);
+ dma_dom = dma_ops_domain_alloc(iommu);
if (!dma_dom)
continue;
init_unity_mappings_for_device(dma_dom, devid);
@@ -1664,7 +1905,6 @@ static struct dma_map_ops amd_iommu_dma_ops = {
int __init amd_iommu_init_dma_ops(void)
{
struct amd_iommu *iommu;
- int order = amd_iommu_aperture_order;
int ret;
/*
@@ -1672,8 +1912,8 @@ int __init amd_iommu_init_dma_ops(void)
* found in the system. Devices not assigned to any other
* protection domain will be assigned to the default one.
*/
- list_for_each_entry(iommu, &amd_iommu_list, list) {
- iommu->default_dom = dma_ops_domain_alloc(iommu, order);
+ for_each_iommu(iommu) {
+ iommu->default_dom = dma_ops_domain_alloc(iommu);
if (iommu->default_dom == NULL)
return -ENOMEM;
iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
@@ -1710,7 +1950,7 @@ int __init amd_iommu_init_dma_ops(void)
free_domains:
- list_for_each_entry(iommu, &amd_iommu_list, list) {
+ for_each_iommu(iommu) {
if (iommu->default_dom)
dma_ops_domain_free(iommu->default_dom);
}
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 8c0be0902da..238989ec077 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -115,15 +115,21 @@ struct ivmd_header {
u64 range_length;
} __attribute__((packed));
+bool amd_iommu_dump;
+
static int __initdata amd_iommu_detected;
u16 amd_iommu_last_bdf; /* largest PCI device id we have
to handle */
LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
we find in ACPI */
-unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
+#ifdef CONFIG_IOMMU_STRESS
+bool amd_iommu_isolate = false;
+#else
bool amd_iommu_isolate = true; /* if true, device isolation is
enabled */
+#endif
+
bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -175,7 +181,7 @@ static inline void update_last_devid(u16 devid)
static inline unsigned long tbl_size(int entry_size)
{
unsigned shift = PAGE_SHIFT +
- get_order(amd_iommu_last_bdf * entry_size);
+ get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
return 1UL << shift;
}
@@ -193,7 +199,7 @@ static inline unsigned long tbl_size(int entry_size)
* This function set the exclusion range in the IOMMU. DMA accesses to the
* exclusion range are passed through untranslated
*/
-static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
+static void iommu_set_exclusion_range(struct amd_iommu *iommu)
{
u64 start = iommu->exclusion_start & PAGE_MASK;
u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
@@ -225,7 +231,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu)
}
/* Generic functions to enable/disable certain features of the IOMMU. */
-static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
+static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
{
u32 ctrl;
@@ -244,7 +250,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
}
/* Function to enable the hardware */
-static void __init iommu_enable(struct amd_iommu *iommu)
+static void iommu_enable(struct amd_iommu *iommu)
{
printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
dev_name(&iommu->dev->dev), iommu->cap_ptr);
@@ -252,11 +258,9 @@ static void __init iommu_enable(struct amd_iommu *iommu)
iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
}
-/* Function to enable IOMMU event logging and event interrupts */
-static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
+static void iommu_disable(struct amd_iommu *iommu)
{
- iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
- iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+ iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
}
/*
@@ -413,25 +417,36 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
{
u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(CMD_BUFFER_SIZE));
- u64 entry;
if (cmd_buf == NULL)
return NULL;
iommu->cmd_buf_size = CMD_BUFFER_SIZE;
- entry = (u64)virt_to_phys(cmd_buf);
+ return cmd_buf;
+}
+
+/*
+ * This function writes the command buffer address to the hardware and
+ * enables it.
+ */
+static void iommu_enable_command_buffer(struct amd_iommu *iommu)
+{
+ u64 entry;
+
+ BUG_ON(iommu->cmd_buf == NULL);
+
+ entry = (u64)virt_to_phys(iommu->cmd_buf);
entry |= MMIO_CMD_SIZE_512;
+
memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
- &entry, sizeof(entry));
+ &entry, sizeof(entry));
/* set head and tail to zero manually */
writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
-
- return cmd_buf;
}
static void __init free_command_buffer(struct amd_iommu *iommu)
@@ -443,20 +458,27 @@ static void __init free_command_buffer(struct amd_iommu *iommu)
/* allocates the memory where the IOMMU will log its events to */
static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
{
- u64 entry;
iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(EVT_BUFFER_SIZE));
if (iommu->evt_buf == NULL)
return NULL;
+ return iommu->evt_buf;
+}
+
+static void iommu_enable_event_buffer(struct amd_iommu *iommu)
+{
+ u64 entry;
+
+ BUG_ON(iommu->evt_buf == NULL);
+
entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
+
memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
&entry, sizeof(entry));
- iommu->evt_buf_size = EVT_BUFFER_SIZE;
-
- return iommu->evt_buf;
+ iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
}
static void __init free_event_buffer(struct amd_iommu *iommu)
@@ -596,32 +618,83 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
p += sizeof(struct ivhd_header);
end += h->length;
+
while (p < end) {
e = (struct ivhd_entry *)p;
switch (e->type) {
case IVHD_DEV_ALL:
+
+ DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x"
+ " last device %02x:%02x.%x flags: %02x\n",
+ PCI_BUS(iommu->first_device),
+ PCI_SLOT(iommu->first_device),
+ PCI_FUNC(iommu->first_device),
+ PCI_BUS(iommu->last_device),
+ PCI_SLOT(iommu->last_device),
+ PCI_FUNC(iommu->last_device),
+ e->flags);
+
for (dev_i = iommu->first_device;
dev_i <= iommu->last_device; ++dev_i)
set_dev_entry_from_acpi(iommu, dev_i,
e->flags, 0);
break;
case IVHD_DEV_SELECT:
+
+ DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x "
+ "flags: %02x\n",
+ PCI_BUS(e->devid),
+ PCI_SLOT(e->devid),
+ PCI_FUNC(e->devid),
+ e->flags);
+
devid = e->devid;
set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
break;
case IVHD_DEV_SELECT_RANGE_START:
+
+ DUMP_printk(" DEV_SELECT_RANGE_START\t "
+ "devid: %02x:%02x.%x flags: %02x\n",
+ PCI_BUS(e->devid),
+ PCI_SLOT(e->devid),
+ PCI_FUNC(e->devid),
+ e->flags);
+
devid_start = e->devid;
flags = e->flags;
ext_flags = 0;
alias = false;
break;
case IVHD_DEV_ALIAS:
+
+ DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
+ "flags: %02x devid_to: %02x:%02x.%x\n",
+ PCI_BUS(e->devid),
+ PCI_SLOT(e->devid),
+ PCI_FUNC(e->devid),
+ e->flags,
+ PCI_BUS(e->ext >> 8),
+ PCI_SLOT(e->ext >> 8),
+ PCI_FUNC(e->ext >> 8));
+
devid = e->devid;
devid_to = e->ext >> 8;
- set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
+ set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
amd_iommu_alias_table[devid] = devid_to;
break;
case IVHD_DEV_ALIAS_RANGE:
+
+ DUMP_printk(" DEV_ALIAS_RANGE\t\t "
+ "devid: %02x:%02x.%x flags: %02x "
+ "devid_to: %02x:%02x.%x\n",
+ PCI_BUS(e->devid),
+ PCI_SLOT(e->devid),
+ PCI_FUNC(e->devid),
+ e->flags,
+ PCI_BUS(e->ext >> 8),
+ PCI_SLOT(e->ext >> 8),
+ PCI_FUNC(e->ext >> 8));
+
devid_start = e->devid;
flags = e->flags;
devid_to = e->ext >> 8;
@@ -629,17 +702,39 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
alias = true;
break;
case IVHD_DEV_EXT_SELECT:
+
+ DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
+ "flags: %02x ext: %08x\n",
+ PCI_BUS(e->devid),
+ PCI_SLOT(e->devid),
+ PCI_FUNC(e->devid),
+ e->flags, e->ext);
+
devid = e->devid;
set_dev_entry_from_acpi(iommu, devid, e->flags,
e->ext);
break;
case IVHD_DEV_EXT_SELECT_RANGE:
+
+ DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: "
+ "%02x:%02x.%x flags: %02x ext: %08x\n",
+ PCI_BUS(e->devid),
+ PCI_SLOT(e->devid),
+ PCI_FUNC(e->devid),
+ e->flags, e->ext);
+
devid_start = e->devid;
flags = e->flags;
ext_flags = e->ext;
alias = false;
break;
case IVHD_DEV_RANGE_END:
+
+ DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
+ PCI_BUS(e->devid),
+ PCI_SLOT(e->devid),
+ PCI_FUNC(e->devid));
+
devid = e->devid;
for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
if (alias)
@@ -679,7 +774,7 @@ static void __init free_iommu_all(void)
{
struct amd_iommu *iommu, *next;
- list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) {
+ for_each_iommu_safe(iommu, next) {
list_del(&iommu->list);
free_iommu_one(iommu);
kfree(iommu);
@@ -710,7 +805,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
if (!iommu->mmio_base)
return -ENOMEM;
- iommu_set_device_table(iommu);
iommu->cmd_buf = alloc_command_buffer(iommu);
if (!iommu->cmd_buf)
return -ENOMEM;
@@ -746,6 +840,15 @@ static int __init init_iommu_all(struct acpi_table_header *table)
h = (struct ivhd_header *)p;
switch (*p) {
case ACPI_IVHD_TYPE:
+
+ DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x "
+ "seg: %d flags: %01x info %04x\n",
+ PCI_BUS(h->devid), PCI_SLOT(h->devid),
+ PCI_FUNC(h->devid), h->cap_ptr,
+ h->pci_seg, h->flags, h->info);
+ DUMP_printk(" mmio-addr: %016llx\n",
+ h->mmio_phys);
+
iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
if (iommu == NULL)
return -ENOMEM;
@@ -773,56 +876,9 @@ static int __init init_iommu_all(struct acpi_table_header *table)
*
****************************************************************************/
-static int __init iommu_setup_msix(struct amd_iommu *iommu)
-{
- struct amd_iommu *curr;
- struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */
- int nvec = 0, i;
-
- list_for_each_entry(curr, &amd_iommu_list, list) {
- if (curr->dev == iommu->dev) {
- entries[nvec].entry = curr->evt_msi_num;
- entries[nvec].vector = 0;
- curr->int_enabled = true;
- nvec++;
- }
- }
-
- if (pci_enable_msix(iommu->dev, entries, nvec)) {
- pci_disable_msix(iommu->dev);
- return 1;
- }
-
- for (i = 0; i < nvec; ++i) {
- int r = request_irq(entries->vector, amd_iommu_int_handler,
- IRQF_SAMPLE_RANDOM,
- "AMD IOMMU",
- NULL);
- if (r)
- goto out_free;
- }
-
- return 0;
-
-out_free:
- for (i -= 1; i >= 0; --i)
- free_irq(entries->vector, NULL);
-
- pci_disable_msix(iommu->dev);
-
- return 1;
-}
-
static int __init iommu_setup_msi(struct amd_iommu *iommu)
{
int r;
- struct amd_iommu *curr;
-
- list_for_each_entry(curr, &amd_iommu_list, list) {
- if (curr->dev == iommu->dev)
- curr->int_enabled = true;
- }
-
if (pci_enable_msi(iommu->dev))
return 1;
@@ -837,17 +893,18 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
return 1;
}
+ iommu->int_enabled = true;
+ iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+
return 0;
}
-static int __init iommu_init_msi(struct amd_iommu *iommu)
+static int iommu_init_msi(struct amd_iommu *iommu)
{
if (iommu->int_enabled)
return 0;
- if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX))
- return iommu_setup_msix(iommu);
- else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
+ if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
return iommu_setup_msi(iommu);
return 1;
@@ -899,6 +956,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
static int __init init_unity_map_range(struct ivmd_header *m)
{
struct unity_map_entry *e = 0;
+ char *s;
e = kzalloc(sizeof(*e), GFP_KERNEL);
if (e == NULL)
@@ -906,14 +964,19 @@ static int __init init_unity_map_range(struct ivmd_header *m)
switch (m->type) {
default:
+ kfree(e);
+ return 0;
case ACPI_IVMD_TYPE:
+ s = "IVMD_TYPEi\t\t\t";
e->devid_start = e->devid_end = m->devid;
break;
case ACPI_IVMD_TYPE_ALL:
+ s = "IVMD_TYPE_ALL\t\t";
e->devid_start = 0;
e->devid_end = amd_iommu_last_bdf;
break;
case ACPI_IVMD_TYPE_RANGE:
+ s = "IVMD_TYPE_RANGE\t\t";
e->devid_start = m->devid;
e->devid_end = m->aux;
break;
@@ -922,6 +985,13 @@ static int __init init_unity_map_range(struct ivmd_header *m)
e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
e->prot = m->flags >> 1;
+ DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
+ " range_start: %016llx range_end: %016llx flags: %x\n", s,
+ PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start),
+ PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end),
+ PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
+ e->address_start, e->address_end, m->flags);
+
list_add_tail(&e->list, &amd_iommu_unity_map);
return 0;
@@ -967,18 +1037,28 @@ static void init_device_table(void)
* This function finally enables all IOMMUs found in the system after
* they have been initialized
*/
-static void __init enable_iommus(void)
+static void enable_iommus(void)
{
struct amd_iommu *iommu;
- list_for_each_entry(iommu, &amd_iommu_list, list) {
+ for_each_iommu(iommu) {
+ iommu_set_device_table(iommu);
+ iommu_enable_command_buffer(iommu);
+ iommu_enable_event_buffer(iommu);
iommu_set_exclusion_range(iommu);
iommu_init_msi(iommu);
- iommu_enable_event_logging(iommu);
iommu_enable(iommu);
}
}
+static void disable_iommus(void)
+{
+ struct amd_iommu *iommu;
+
+ for_each_iommu(iommu)
+ iommu_disable(iommu);
+}
+
/*
* Suspend/Resume support
* disable suspend until real resume implemented
@@ -986,12 +1066,31 @@ static void __init enable_iommus(void)
static int amd_iommu_resume(struct sys_device *dev)
{
+ /*
+ * Disable IOMMUs before reprogramming the hardware registers.
+ * IOMMU is still enabled from the resume kernel.
+ */
+ disable_iommus();
+
+ /* re-load the hardware */
+ enable_iommus();
+
+ /*
+ * we have to flush after the IOMMUs are enabled because a
+ * disabled IOMMU will never execute the commands we send
+ */
+ amd_iommu_flush_all_domains();
+ amd_iommu_flush_all_devices();
+
return 0;
}
static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
{
- return -EINVAL;
+ /* disable IOMMUs to go out of the way for BIOS */
+ disable_iommus();
+
+ return 0;
}
static struct sysdev_class amd_iommu_sysdev_class = {
@@ -1137,9 +1236,6 @@ int __init amd_iommu_init(void)
enable_iommus();
- printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
- (1 << (amd_iommu_aperture_order-20)));
-
printk(KERN_INFO "AMD IOMMU: device isolation ");
if (amd_iommu_isolate)
printk("enabled\n");
@@ -1211,6 +1307,13 @@ void __init amd_iommu_detect(void)
*
****************************************************************************/
+static int __init parse_amd_iommu_dump(char *str)
+{
+ amd_iommu_dump = true;
+
+ return 1;
+}
+
static int __init parse_amd_iommu_options(char *str)
{
for (; *str; ++str) {
@@ -1225,15 +1328,5 @@ static int __init parse_amd_iommu_options(char *str)
return 1;
}
-static int __init parse_amd_iommu_size_options(char *str)
-{
- unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
-
- if ((order > 24) && (order < 31))
- amd_iommu_aperture_order = order;
-
- return 1;
-}
-
+__setup("amd_iommu_dump", parse_amd_iommu_dump);
__setup("amd_iommu=", parse_amd_iommu_options);
-__setup("amd_iommu_size=", parse_amd_iommu_size_options);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 755c21e906f..971a3bec47a 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -186,37 +186,6 @@ static struct cal_chipset_ops calioc2_chip_ops = {
static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
-/* enable this to stress test the chip's TCE cache */
-#ifdef CONFIG_IOMMU_DEBUG
-static int debugging = 1;
-
-static inline unsigned long verify_bit_range(unsigned long* bitmap,
- int expected, unsigned long start, unsigned long end)
-{
- unsigned long idx = start;
-
- BUG_ON(start >= end);
-
- while (idx < end) {
- if (!!test_bit(idx, bitmap) != expected)
- return idx;
- ++idx;
- }
-
- /* all bits have the expected value */
- return ~0UL;
-}
-#else /* debugging is disabled */
-static int debugging;
-
-static inline unsigned long verify_bit_range(unsigned long* bitmap,
- int expected, unsigned long start, unsigned long end)
-{
- return ~0UL;
-}
-
-#endif /* CONFIG_IOMMU_DEBUG */
-
static inline int translation_enabled(struct iommu_table *tbl)
{
/* only PHBs with translation enabled have an IOMMU table */
@@ -228,7 +197,6 @@ static void iommu_range_reserve(struct iommu_table *tbl,
{
unsigned long index;
unsigned long end;
- unsigned long badbit;
unsigned long flags;
index = start_addr >> PAGE_SHIFT;
@@ -243,14 +211,6 @@ static void iommu_range_reserve(struct iommu_table *tbl,
spin_lock_irqsave(&tbl->it_lock, flags);
- badbit = verify_bit_range(tbl->it_map, 0, index, end);
- if (badbit != ~0UL) {
- if (printk_ratelimit())
- printk(KERN_ERR "Calgary: entry already allocated at "
- "0x%lx tbl %p dma 0x%lx npages %u\n",
- badbit, tbl, start_addr, npages);
- }
-
iommu_area_reserve(tbl->it_map, index, npages);
spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -326,7 +286,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned int npages)
{
unsigned long entry;
- unsigned long badbit;
unsigned long badend;
unsigned long flags;
@@ -346,14 +305,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
spin_lock_irqsave(&tbl->it_lock, flags);
- badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages);
- if (badbit != ~0UL) {
- if (printk_ratelimit())
- printk(KERN_ERR "Calgary: bit is off at 0x%lx "
- "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
- badbit, tbl, dma_addr, entry, npages);
- }
-
iommu_area_free(tbl->it_map, entry, npages);
spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -1488,9 +1439,8 @@ void __init detect_calgary(void)
iommu_detected = 1;
calgary_detected = 1;
printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n");
- printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
- "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
- debugging ? "enabled" : "disabled");
+ printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
+ specified_table_size);
/* swiotlb for devices that aren't behind the Calgary. */
if (max_pfn > MAX_DMA32_PFN)
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index b284b58c035..cfd9f906389 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -144,48 +144,21 @@ static void flush_gart(void)
}
#ifdef CONFIG_IOMMU_LEAK
-
-#define SET_LEAK(x) \
- do { \
- if (iommu_leak_tab) \
- iommu_leak_tab[x] = __builtin_return_address(0);\
- } while (0)
-
-#define CLEAR_LEAK(x) \
- do { \
- if (iommu_leak_tab) \
- iommu_leak_tab[x] = NULL; \
- } while (0)
-
/* Debugging aid for drivers that don't free their IOMMU tables */
-static void **iommu_leak_tab;
static int leak_trace;
static int iommu_leak_pages = 20;
static void dump_leak(void)
{
- int i;
static int dump;
- if (dump || !iommu_leak_tab)
+ if (dump)
return;
dump = 1;
- show_stack(NULL, NULL);
- /* Very crude. dump some from the end of the table too */
- printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n",
- iommu_leak_pages);
- for (i = 0; i < iommu_leak_pages; i += 2) {
- printk(KERN_DEBUG "%lu: ", iommu_pages-i);
- printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
- 0);
- printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
- }
- printk(KERN_DEBUG "\n");
+ show_stack(NULL, NULL);
+ debug_dma_dump_mappings(NULL);
}
-#else
-# define SET_LEAK(x)
-# define CLEAR_LEAK(x)
#endif
static void iommu_full(struct device *dev, size_t size, int dir)
@@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
- SET_LEAK(iommu_page + i);
phys_mem += PAGE_SIZE;
}
return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
@@ -294,7 +266,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
- CLEAR_LEAK(iommu_page + i);
}
free_iommu(iommu_page, npages);
}
@@ -377,7 +348,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
while (pages--) {
iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
- SET_LEAK(iommu_page);
addr += PAGE_SIZE;
iommu_page++;
}
@@ -688,8 +658,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
agp_gatt_table = gatt;
- enable_gart_translations();
-
error = sysdev_class_register(&gart_sysdev_class);
if (!error)
error = sysdev_register(&device_gart);
@@ -801,11 +769,12 @@ void __init gart_iommu_init(void)
#ifdef CONFIG_IOMMU_LEAK
if (leak_trace) {
- iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
- get_order(iommu_pages*sizeof(void *)));
- if (!iommu_leak_tab)
+ int ret;
+
+ ret = dma_debug_resize_entries(iommu_pages);
+ if (ret)
printk(KERN_DEBUG
- "PCI-DMA: Cannot allocate leak trace area\n");
+ "PCI-DMA: Cannot trace all the entries\n");
}
#endif
@@ -845,6 +814,14 @@ void __init gart_iommu_init(void)
* the pages as Not-Present:
*/
wbinvd();
+
+ /*
+ * Now all caches are flushed and we can safely enable
+ * GART hardware. Doing it early leaves the possibility
+ * of stale cache entries that can lead to GART PTE
+ * errors.
+ */
+ enable_gart_translations();
/*
* Try to workaround a bug (thanks to BenH):
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 221a3853e26..a1712f2b50f 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -28,7 +28,7 @@ dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
return paddr;
}
-phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
+phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
{
return baddr;
}