From 4ed0d3e6c64cfd9ba4ceb2099b10d1cf8ece4320 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 24 Apr 2009 17:30:20 -0700 Subject: Intel IOMMU Pass Through Support The patch adds kernel parameter intel_iommu=pt to set up pass through mode in context mapping entry. This disables DMAR in linux kernel; but KVM still runs on VT-d and interrupt remapping still works. In this mode, kernel uses swiotlb for DMA API functions but other VT-d functionalities are enabled for KVM. KVM always uses multi level translation page table in VT-d. By default, pass though mode is disabled in kernel. This is useful when people don't want to enable VT-d DMAR in kernel but still want to use KVM and interrupt remapping for reasons like DMAR performance concern or debug purpose. Signed-off-by: Fenghua Yu Acked-by: Weidong Han Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/pci/dmar.c') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index fa3a11365ec..d3d86b749ee 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -515,6 +515,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) u32 ver; static int iommu_allocated = 0; int agaw = 0; + int msagaw = 0; iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -535,12 +536,20 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) agaw = iommu_calculate_agaw(iommu); if (agaw < 0) { printk(KERN_ERR - "Cannot get a valid agaw for iommu (seq_id = %d)\n", + "Cannot get a valid agaw for iommu (seq_id = %d)\n", + iommu->seq_id); + goto error; + } + msagaw = iommu_calculate_max_sagaw(iommu); + if (msagaw < 0) { + printk(KERN_ERR + "Cannot get a valid max agaw for iommu (seq_id = %d)\n", iommu->seq_id); goto error; } #endif iommu->agaw = agaw; + iommu->msagaw = msagaw; /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), -- cgit v1.2.3 From 4c25a2c1b90bf785fc2e2f0f0c74a80b3e070d39 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 10 May 2009 17:16:06 +0100 Subject: intel-iommu: Clean up handling of "caching mode" vs. context flushing. It really doesn't make a lot of sense to have some of the logic to handle caching vs. non-caching mode duplicated in qi_flush_context() and __iommu_flush_context(), while the return value indicates whether the caller should take other action which depends on the same thing. Especially since qi_flush_context() thought it was returning something entirely different anyway. This patch makes qi_flush_context() and __iommu_flush_context() both return void, removes the 'non_present_entry_flush' argument and makes the only call site which _set_ that argument to 1 do the right thing. Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'drivers/pci/dmar.c') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index d3d86b749ee..10a071ba323 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -723,23 +723,16 @@ void qi_global_iec(struct intel_iommu *iommu) qi_submit_sync(&desc, iommu); } -int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, - u64 type, int non_present_entry_flush) +void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, + u64 type) { struct qi_desc desc; - if (non_present_entry_flush) { - if (!cap_caching_mode(iommu->cap)) - return 1; - else - did = 0; - } - desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) | QI_CC_GRAN(type) | QI_CC_TYPE; desc.high = 0; - return qi_submit_sync(&desc, iommu); + qi_submit_sync(&desc, iommu); } int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, -- cgit v1.2.3 From 1f0ef2aa18802a8ce7eb5a5164aaaf4d59073801 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 10 May 2009 19:58:49 +0100 Subject: intel-iommu: Clean up handling of "caching mode" vs. IOTLB flushing. As we just did for context cache flushing, clean up the logic around whether we need to flush the iotlb or just the write-buffer, depending on caching mode. Fix the same bug in qi_flush_iotlb() that qi_flush_context() had -- it isn't supposed to be returning an error; it's supposed to be returning a flag which triggers a write-buffer flush. Remove some superfluous conditional write-buffer flushes which could never have happened because they weren't for non-present-to-present mapping changes anyway. Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'drivers/pci/dmar.c') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 10a071ba323..df6af0d4ec0 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -735,22 +735,14 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, qi_submit_sync(&desc, iommu); } -int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type, - int non_present_entry_flush) +void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type) { u8 dw = 0, dr = 0; struct qi_desc desc; int ih = 0; - if (non_present_entry_flush) { - if (!cap_caching_mode(iommu->cap)) - return 1; - else - did = 0; - } - if (cap_write_drain(iommu->cap)) dw = 1; @@ -762,7 +754,7 @@ int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) | QI_IOTLB_AM(size_order); - return qi_submit_sync(&desc, iommu); + qi_submit_sync(&desc, iommu); } /* -- cgit v1.2.3 From c416daa98a584596df21ee2c26fac6579ee58f57 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 10 May 2009 20:30:58 +0100 Subject: intel-iommu: Tidy up iommu->gcmd handling Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/pci/dmar.c') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index df6af0d4ec0..faf77a00caf 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -784,7 +784,6 @@ void dmar_disable_qi(struct intel_iommu *iommu) cpu_relax(); iommu->gcmd &= ~DMA_GCMD_QIE; - writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, @@ -798,7 +797,7 @@ end: */ static void __dmar_enable_qi(struct intel_iommu *iommu) { - u32 cmd, sts; + u32 sts; unsigned long flags; struct q_inval *qi = iommu->qi; @@ -812,9 +811,8 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); - cmd = iommu->gcmd | DMA_GCMD_QIE; iommu->gcmd |= DMA_GCMD_QIE; - writel(cmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts); -- cgit v1.2.3 From dd7264355a203c3456dbba04db471947d3b55e7e Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 13 May 2009 15:55:52 -0700 Subject: intel-iommu: dmar_set_interrupt return error value dmar_set_interrupt feigns success when arch_setup_dmar_msi fails, return error value. Signed-off-by: Chris Wright Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci/dmar.c') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index faf77a00caf..f23460a5d10 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -1088,7 +1088,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu) set_irq_data(irq, NULL); iommu->irq = 0; destroy_irq(irq); - return 0; + return ret; } ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu); -- cgit v1.2.3 From aa5d2b515b6fca5f8a56eac84f7fa0a68c1ce9b7 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 18 May 2009 13:51:34 +0800 Subject: VT-d: parse ATSR in DMA Remapping Reporting Structure Parse the Root Port ATS Capability Reporting Structure in the DMA Remapping Reporting Structure ACPI table. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 106 insertions(+), 6 deletions(-) (limited to 'drivers/pci/dmar.c') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index f23460a5d10..6d7f9619b8a 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -267,6 +267,84 @@ rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) } return ret; } + +static LIST_HEAD(dmar_atsr_units); + +static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr) +{ + struct acpi_dmar_atsr *atsr; + struct dmar_atsr_unit *atsru; + + atsr = container_of(hdr, struct acpi_dmar_atsr, header); + atsru = kzalloc(sizeof(*atsru), GFP_KERNEL); + if (!atsru) + return -ENOMEM; + + atsru->hdr = hdr; + atsru->include_all = atsr->flags & 0x1; + + list_add(&atsru->list, &dmar_atsr_units); + + return 0; +} + +static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru) +{ + int rc; + struct acpi_dmar_atsr *atsr; + + if (atsru->include_all) + return 0; + + atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); + rc = dmar_parse_dev_scope((void *)(atsr + 1), + (void *)atsr + atsr->header.length, + &atsru->devices_cnt, &atsru->devices, + atsr->segment); + if (rc || !atsru->devices_cnt) { + list_del(&atsru->list); + kfree(atsru); + } + + return rc; +} + +int dmar_find_matched_atsr_unit(struct pci_dev *dev) +{ + int i; + struct pci_bus *bus; + struct acpi_dmar_atsr *atsr; + struct dmar_atsr_unit *atsru; + + list_for_each_entry(atsru, &dmar_atsr_units, list) { + atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); + if (atsr->segment == pci_domain_nr(dev->bus)) + goto found; + } + + return 0; + +found: + for (bus = dev->bus; bus; bus = bus->parent) { + struct pci_dev *bridge = bus->self; + + if (!bridge || !bridge->is_pcie || + bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) + return 0; + + if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) { + for (i = 0; i < atsru->devices_cnt; i++) + if (atsru->devices[i] == bridge) + return 1; + break; + } + } + + if (atsru->include_all) + return 1; + + return 0; +} #endif static void __init @@ -274,22 +352,28 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) { struct acpi_dmar_hardware_unit *drhd; struct acpi_dmar_reserved_memory *rmrr; + struct acpi_dmar_atsr *atsr; switch (header->type) { case ACPI_DMAR_TYPE_HARDWARE_UNIT: - drhd = (struct acpi_dmar_hardware_unit *)header; + drhd = container_of(header, struct acpi_dmar_hardware_unit, + header); printk (KERN_INFO PREFIX - "DRHD (flags: 0x%08x)base: 0x%016Lx\n", - drhd->flags, (unsigned long long)drhd->address); + "DRHD base: %#016Lx flags: %#x\n", + (unsigned long long)drhd->address, drhd->flags); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: - rmrr = (struct acpi_dmar_reserved_memory *)header; - + rmrr = container_of(header, struct acpi_dmar_reserved_memory, + header); printk (KERN_INFO PREFIX - "RMRR base: 0x%016Lx end: 0x%016Lx\n", + "RMRR base: %#016Lx end: %#016Lx\n", (unsigned long long)rmrr->base_address, (unsigned long long)rmrr->end_address); break; + case ACPI_DMAR_TYPE_ATSR: + atsr = container_of(header, struct acpi_dmar_atsr, header); + printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags); + break; } } @@ -361,6 +445,11 @@ parse_dmar_table(void) case ACPI_DMAR_TYPE_RESERVED_MEMORY: #ifdef CONFIG_DMAR ret = dmar_parse_one_rmrr(entry_header); +#endif + break; + case ACPI_DMAR_TYPE_ATSR: +#ifdef CONFIG_DMAR + ret = dmar_parse_one_atsr(entry_header); #endif break; default: @@ -431,11 +520,19 @@ int __init dmar_dev_scope_init(void) #ifdef CONFIG_DMAR { struct dmar_rmrr_unit *rmrr, *rmrr_n; + struct dmar_atsr_unit *atsr, *atsr_n; + list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) { ret = rmrr_parse_dev(rmrr); if (ret) return ret; } + + list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) { + ret = atsr_parse_dev(atsr); + if (ret) + return ret; + } } #endif @@ -468,6 +565,9 @@ int __init dmar_table_init(void) #ifdef CONFIG_DMAR if (list_empty(&dmar_rmrr_units)) printk(KERN_INFO PREFIX "No RMRR found\n"); + + if (list_empty(&dmar_atsr_units)) + printk(KERN_INFO PREFIX "No ATSR found\n"); #endif #ifdef CONFIG_INTR_REMAP -- cgit v1.2.3 From 6ba6c3a4cacfd68bf970e3e04e2ff0d66fa0f695 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 18 May 2009 13:51:35 +0800 Subject: VT-d: add device IOTLB invalidation support Support device IOTLB invalidation to flush the translation cached in the Endpoint. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 8 deletions(-) (limited to 'drivers/pci/dmar.c') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 6d7f9619b8a..7b287cb38b7 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -699,7 +699,8 @@ void free_iommu(struct intel_iommu *iommu) */ static inline void reclaim_free_desc(struct q_inval *qi) { - while (qi->desc_status[qi->free_tail] == QI_DONE) { + while (qi->desc_status[qi->free_tail] == QI_DONE || + qi->desc_status[qi->free_tail] == QI_ABORT) { qi->desc_status[qi->free_tail] = QI_FREE; qi->free_tail = (qi->free_tail + 1) % QI_LENGTH; qi->free_cnt++; @@ -709,10 +710,13 @@ static inline void reclaim_free_desc(struct q_inval *qi) static int qi_check_fault(struct intel_iommu *iommu, int index) { u32 fault; - int head; + int head, tail; struct q_inval *qi = iommu->qi; int wait_index = (index + 1) % QI_LENGTH; + if (qi->desc_status[wait_index] == QI_ABORT) + return -EAGAIN; + fault = readl(iommu->reg + DMAR_FSTS_REG); /* @@ -722,7 +726,11 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) */ if (fault & DMA_FSTS_IQE) { head = readl(iommu->reg + DMAR_IQH_REG); - if ((head >> 4) == index) { + if ((head >> DMAR_IQ_SHIFT) == index) { + printk(KERN_ERR "VT-d detected invalid descriptor: " + "low=%llx, high=%llx\n", + (unsigned long long)qi->desc[index].low, + (unsigned long long)qi->desc[index].high); memcpy(&qi->desc[index], &qi->desc[wait_index], sizeof(struct qi_desc)); __iommu_flush_cache(iommu, &qi->desc[index], @@ -732,6 +740,32 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) } } + /* + * If ITE happens, all pending wait_desc commands are aborted. + * No new descriptors are fetched until the ITE is cleared. + */ + if (fault & DMA_FSTS_ITE) { + head = readl(iommu->reg + DMAR_IQH_REG); + head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; + head |= 1; + tail = readl(iommu->reg + DMAR_IQT_REG); + tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; + + writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); + + do { + if (qi->desc_status[head] == QI_IN_USE) + qi->desc_status[head] = QI_ABORT; + head = (head - 2 + QI_LENGTH) % QI_LENGTH; + } while (head != tail); + + if (qi->desc_status[wait_index] == QI_ABORT) + return -EAGAIN; + } + + if (fault & DMA_FSTS_ICE) + writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG); + return 0; } @@ -741,7 +775,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) */ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) { - int rc = 0; + int rc; struct q_inval *qi = iommu->qi; struct qi_desc *hw, wait_desc; int wait_index, index; @@ -752,6 +786,9 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) hw = qi->desc; +restart: + rc = 0; + spin_lock_irqsave(&qi->q_lock, flags); while (qi->free_cnt < 3) { spin_unlock_irqrestore(&qi->q_lock, flags); @@ -782,7 +819,7 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) * update the HW tail register indicating the presence of * new descriptors. */ - writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG); + writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG); while (qi->desc_status[wait_index] != QI_DONE) { /* @@ -794,18 +831,21 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) */ rc = qi_check_fault(iommu, index); if (rc) - goto out; + break; spin_unlock(&qi->q_lock); cpu_relax(); spin_lock(&qi->q_lock); } -out: - qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE; + + qi->desc_status[index] = QI_DONE; reclaim_free_desc(qi); spin_unlock_irqrestore(&qi->q_lock, flags); + if (rc == -EAGAIN) + goto restart; + return rc; } @@ -857,6 +897,27 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, qi_submit_sync(&desc, iommu); } +void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, + u64 addr, unsigned mask) +{ + struct qi_desc desc; + + if (mask) { + BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1)); + addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1; + desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; + } else + desc.high = QI_DEV_IOTLB_ADDR(addr); + + if (qdep >= QI_DEV_IOTLB_MAX_INVS) + qdep = 0; + + desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | + QI_DIOTLB_TYPE; + + qi_submit_sync(&desc, iommu); +} + /* * Disable Queued Invalidation interface. */ -- cgit v1.2.3