From 52fbf214c70b28615d1a7ec4573cd3544e815b0c Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 28 Jun 2025 18:03:51 +0800 Subject: [PATCH 01/19] iommu/vt-d: Assign devtlb cache tag on ATS enablement ANBZ: #32087 commit 25b1b75bbaf96331750fb01302825069657b2ff8 upstream. Commit <4f1492efb495> ("iommu/vt-d: Revert ATS timing change to fix boot failure") placed the enabling of ATS in the probe_finalize callback. This occurs after the default domain attachment, which is when the ATS cache tag is assigned. Consequently, the device TLB cache tag is missed when the domain is attached, leading to the device TLB not being invalidated in the iommu_unmap paths. Fix this by assigning the CACHE_TAG_DEVTLB cache tag when ATS is enabled. Fixes: 4f1492efb495 ("iommu/vt-d: Revert ATS timing change to fix boot failure") Cc: stable@vger.kernel.org Suggested-by: Kevin Tian Signed-off-by: Lu Baolu Tested-by: Shuicheng Lin Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20250625050135.3129955-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20250628100351.3198955-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Guixin Liu --- drivers/iommu/intel/cache.c | 5 ++--- drivers/iommu/intel/iommu.c | 11 ++++++++++- drivers/iommu/intel/iommu.h | 2 ++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index fc35cba59145..47692cbfaabd 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -40,9 +40,8 @@ static bool cache_tage_match(struct cache_tag *tag, u16 domain_id, } /* Assign a cache tag with specified type to domain. */ -static int cache_tag_assign(struct dmar_domain *domain, u16 did, - struct device *dev, ioasid_t pasid, - enum cache_tag_type type) +int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev, + ioasid_t pasid, enum cache_tag_type type) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 42a620720b19..041aaf33ca80 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3841,8 +3841,17 @@ static void intel_iommu_probe_finalize(struct device *dev) !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) info->pasid_enabled = 1; - if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { iommu_enable_pci_ats(info); + /* Assign a DEVTLB cache tag to the default domain. */ + if (info->ats_enabled && info->domain) { + u16 did = domain_id_iommu(info->domain, iommu); + + if (cache_tag_assign(info->domain, did, dev, + IOMMU_NO_PASID, CACHE_TAG_DEVTLB)) + iommu_disable_pci_ats(info); + } + } iommu_enable_pci_pri(info); } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 5167ce689bdf..e645dce31dcb 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1289,6 +1289,8 @@ struct cache_tag { unsigned int users; }; +int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev, + ioasid_t pasid, enum cache_tag_type type); int cache_tag_assign_domain(struct dmar_domain *domain, struct device *dev, ioasid_t pasid); void cache_tag_unassign_domain(struct dmar_domain *domain, -- Gitee From ac18283923d88eefc4a4c2dfaf63d7189c618028 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 14 Jul 2025 12:50:19 +0800 Subject: [PATCH 02/19] iommu/vt-d: Optimize iotlb_sync_map for non-caching/non-RWBF modes ANBZ: #32087 commit 12724ce3fe1a3d8f30d56e48b4f272d8860d1970 upstream. The iotlb_sync_map iommu ops allows drivers to perform necessary cache flushes when new mappings are established. For the Intel iommu driver, this callback specifically serves two purposes: - To flush caches when a second-stage page table is attached to a device whose iommu is operating in caching mode (CAP_REG.CM==1). - To explicitly flush internal write buffers to ensure updates to memory- resident remapping structures are visible to hardware (CAP_REG.RWBF==1). However, in scenarios where neither caching mode nor the RWBF flag is active, the cache_tag_flush_range_np() helper, which is called in the iotlb_sync_map path, effectively becomes a no-op. Despite being a no-op, cache_tag_flush_range_np() involves iterating through all cache tags of the iommu's attached to the domain, protected by a spinlock. This unnecessary execution path introduces overhead, leading to a measurable I/O performance regression. On systems with NVMes under the same bridge, performance was observed to drop from approximately ~6150 MiB/s down to ~4985 MiB/s. Introduce a flag in the dmar_domain structure. This flag will only be set when iotlb_sync_map is required (i.e., when CM or RWBF is set). The cache_tag_flush_range_np() is called only for domains where this flag is set. This flag, once set, is immutable, given that there won't be mixed configurations in real-world scenarios where some IOMMUs in a system operate in caching mode while others do not. Theoretically, the immutability of this flag does not impact functionality. Reported-by: Ioanna Alifieraki Closes: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2115738 Link: https://lore.kernel.org/r/20250701171154.52435-1-ioanna-maria.alifieraki@canonical.com Fixes: 129dab6e1286 ("iommu/vt-d: Use cache_tag_flush_range_np() in iotlb_sync_map") Cc: stable@vger.kernel.org Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20250703031545.3378602-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20250714045028.958850-3-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Guixin Liu --- drivers/iommu/intel/iommu.c | 19 ++++++++++++++++++- drivers/iommu/intel/iommu.h | 3 +++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 041aaf33ca80..d0da477205e7 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1796,6 +1796,18 @@ static int domain_setup_first_level(struct intel_iommu *iommu, (pgd_t *)pgd, flags, old); } +static bool domain_need_iotlb_sync_map(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + if (cap_caching_mode(iommu->cap) && !domain->use_first_level) + return true; + + if (rwbf_quirk || cap_rwbf(iommu->cap)) + return true; + + return false; +} + static int dmar_domain_attach_device(struct dmar_domain *domain, struct device *dev) { @@ -1833,6 +1845,8 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (ret) goto out_block_translation; + domain->iotlb_sync_map |= domain_need_iotlb_sync_map(domain, iommu); + return 0; out_block_translation: @@ -4015,7 +4029,10 @@ static bool risky_device(struct pci_dev *pdev) static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size) { - cache_tag_flush_range_np(to_dmar_domain(domain), iova, iova + size - 1); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + + if (dmar_domain->iotlb_sync_map) + cache_tag_flush_range_np(dmar_domain, iova, iova + size - 1); return 0; } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index e645dce31dcb..5e51927ba017 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -612,6 +612,9 @@ struct dmar_domain { u8 has_mappings:1; /* Has mappings configured through * iommu_map() interface. */ + u8 iotlb_sync_map:1; /* Need to flush IOTLB cache or write + * buffer when creating mappings. + */ u8 dirty_tracking:1; /* Dirty tracking is enabled */ u8 nested_parent:1; /* Has other domains nested on it */ -- Gitee From adfc1788bdb472ce860783716a4a90fb3d5d9bbf Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Jul 2025 12:50:22 +0800 Subject: [PATCH 03/19] iommu/vt-d: Do not wipe out the page table NID when devices detach ANBZ: #32087 commit 5c3687d5789cfff8d285a2c76bceb47f145bf01f upstream. The NID is used to control which NUMA node memory for the page table is allocated it from. It should be a permanent property of the page table when it was allocated and not change during attach/detach of devices. Reviewed-by: Wei Wang Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v3-dbbe6f7e7ae3+124ffe-vtd_prep_jgg@nvidia.com Signed-off-by: Lu Baolu Fixes: 7c204426b818 ("iommu/vt-d: Add domain_alloc_paging support") Link: https://lore.kernel.org/r/20250714045028.958850-6-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Guixin Liu --- drivers/iommu/intel/iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d0da477205e7..158dd6c30080 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1391,7 +1391,6 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) if (--info->refcnt == 0) { ida_free(&iommu->domain_ida, info->did); xa_erase(&domain->iommu_array, iommu->seq_id); - domain->nid = NUMA_NO_NODE; kfree(info); } } -- Gitee From dbf387e7ea2d1de72ef769b215e72ac4ffb58445 Mon Sep 17 00:00:00 2001 From: Ethan Milon Date: Mon, 14 Jul 2025 12:50:27 +0800 Subject: [PATCH 04/19] iommu/vt-d: Fix missing PASID in dev TLB flush with cache_tag_flush_all ANBZ: #32087 commit 3141153816bf4f0257747bd4dda176d38f1a9a49 upstream. The function cache_tag_flush_all() was originally implemented with incorrect device TLB invalidation logic that does not handle PASID, in commit c4d27ffaa8eb ("iommu/vt-d: Add cache tag invalidation helpers") This causes regressions where full address space TLB invalidations occur with a PASID attached, such as during transparent hugepage unmapping in SVA configurations or when calling iommu_flush_iotlb_all(). In these cases, the device receives a TLB invalidation that lacks PASID. This incorrect logic was later extracted into cache_tag_flush_devtlb_all(), in commit 3297d047cd7f ("iommu/vt-d: Refactor IOTLB and Dev-IOTLB flush for batching") The fix replaces the call to cache_tag_flush_devtlb_all() with cache_tag_flush_devtlb_psi(), which properly handles PASID. Fixes: 4f609dbff51b ("iommu/vt-d: Use cache helpers in arch_invalidate_secondary_tlbs") Fixes: 4e589a53685c ("iommu/vt-d: Use cache_tag_flush_all() in flush_iotlb_all") Signed-off-by: Ethan Milon Link: https://lore.kernel.org/r/20250708214821.30967-1-ethan.milon@eviden.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20250714045028.958850-11-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Guixin Liu --- drivers/iommu/intel/cache.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index 47692cbfaabd..c8b79de84d3f 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -422,22 +422,6 @@ static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_ domain->qi_batch); } -static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag) -{ - struct intel_iommu *iommu = tag->iommu; - struct device_domain_info *info; - u16 sid; - - info = dev_iommu_priv_get(tag->dev); - sid = PCI_DEVID(info->bus, info->devfn); - - qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, - MAX_AGAW_PFN_WIDTH, domain->qi_batch); - if (info->dtlb_extra_inval) - qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, - MAX_AGAW_PFN_WIDTH, domain->qi_batch); -} - /* * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive) * when the memory mappings in the target domain have been modified. @@ -508,7 +492,7 @@ void cache_tag_flush_all(struct dmar_domain *domain) break; case CACHE_TAG_DEVTLB: case CACHE_TAG_NESTING_DEVTLB: - cache_tag_flush_devtlb_all(domain, tag); + cache_tag_flush_devtlb_psi(domain, tag, 0, MAX_AGAW_PFN_WIDTH); break; } -- Gitee From 6bab8bf5b9f00ba1bfe2a425963761e574b844bd Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 23 Jul 2025 15:20:45 +0800 Subject: [PATCH 05/19] iommu/vt-d: Fix UAF on sva unbind with pending IOPFs ANBZ: #32087 commit f0b9d31c6edd50a6207489cd1bd4ddac814b9cd2 upstream. Commit 17fce9d2336d ("iommu/vt-d: Put iopf enablement in domain attach path") disables IOPF on device by removing the device from its IOMMU's IOPF queue when the last IOPF-capable domain is detached from the device. Unfortunately, it did this in a wrong place where there are still pending IOPFs. As a result, a use-after-free error is potentially triggered and eventually a kernel panic with a kernel trace similar to the following: refcount_t: underflow; use-after-free. WARNING: CPU: 3 PID: 313 at lib/refcount.c:28 refcount_warn_saturate+0xd8/0xe0 Workqueue: iopf_queue/dmar0-iopfq iommu_sva_handle_iopf Call Trace: iopf_free_group+0xe/0x20 process_one_work+0x197/0x3d0 worker_thread+0x23a/0x350 ? rescuer_thread+0x4a0/0x4a0 kthread+0xf8/0x230 ? finish_task_switch.isra.0+0x81/0x260 ? kthreads_online_cpu+0x110/0x110 ? kthreads_online_cpu+0x110/0x110 ret_from_fork+0x13b/0x170 ? kthreads_online_cpu+0x110/0x110 ret_from_fork_asm+0x11/0x20 ---[ end trace 0000000000000000 ]--- The intel_pasid_tear_down_entry() function is responsible for blocking hardware from generating new page faults and flushing all in-flight ones. Therefore, moving iopf_for_domain_remove() after this function should resolve this. Fixes: 17fce9d2336d ("iommu/vt-d: Put iopf enablement in domain attach path") Reported-by: Ethan Milon Closes: https://lore.kernel.org/r/e8b37f3e-8539-40d4-8993-43a1f3ffe5aa@eviden.com Suggested-by: Ethan Milon Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20250723072045.1853328-1-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Guixin Liu --- drivers/iommu/intel/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 158dd6c30080..067fae71c157 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4077,8 +4077,8 @@ static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, { struct device_domain_info *info = dev_iommu_priv_get(dev); - iopf_for_domain_remove(old, dev); intel_pasid_tear_down_entry(info->iommu, dev, pasid, false); + iopf_for_domain_remove(old, dev); domain_remove_dev_pasid(old, dev, pasid); return 0; -- Gitee From d5f33f619727fdd8037b57b946b4800c0e46ad49 Mon Sep 17 00:00:00 2001 From: Eugene Koira Date: Wed, 3 Sep 2025 13:53:29 +0800 Subject: [PATCH 06/19] iommu/vt-d: Fix __domain_mapping()'s usage of switch_to_super_page() ANBZ: #32087 commit dce043c07ca1ac19cfbe2844a6dc71e35c322353 upstream. switch_to_super_page() assumes the memory range it's working on is aligned to the target large page level. Unfortunately, __domain_mapping() doesn't take this into account when using it, and will pass unaligned ranges ultimately freeing a PTE range larger than expected. Take for example a mapping with the following iov_pfn range [0x3fe400, 0x4c0600), which should be backed by the following mappings: iov_pfn [0x3fe400, 0x3fffff] covered by 2MiB pages iov_pfn [0x400000, 0x4bffff] covered by 1GiB pages iov_pfn [0x4c0000, 0x4c05ff] covered by 2MiB pages Under this circumstance, __domain_mapping() will pass [0x400000, 0x4c05ff] to switch_to_super_page() at a 1 GiB granularity, which will in turn free PTEs all the way to iov_pfn 0x4fffff. Mitigate this by rounding down the iov_pfn range passed to switch_to_super_page() in __domain_mapping() to the target large page level. Additionally add range alignment checks to switch_to_super_page. Fixes: 9906b9352a35 ("iommu/vt-d: Avoid duplicate removing in __domain_mapping()") Signed-off-by: Eugene Koira Cc: stable@vger.kernel.org Reviewed-by: Nicolas Saenz Julienne Reviewed-by: David Woodhouse Link: https://lore.kernel.org/r/20250826143816.38686-1-eugkoira@amazon.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Guixin Liu --- drivers/iommu/intel/iommu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 067fae71c157..7e237bf8a116 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1587,6 +1587,10 @@ static void switch_to_super_page(struct dmar_domain *domain, unsigned long lvl_pages = lvl_to_nr_pages(level); struct dma_pte *pte = NULL; + if (WARN_ON(!IS_ALIGNED(start_pfn, lvl_pages) || + !IS_ALIGNED(end_pfn + 1, lvl_pages))) + return; + while (start_pfn <= end_pfn) { if (!pte) pte = pfn_to_dma_pte(domain, start_pfn, &level, @@ -1662,7 +1666,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long pages_to_remove; pteval |= DMA_PTE_LARGE_PAGE; - pages_to_remove = min_t(unsigned long, nr_pages, + pages_to_remove = min_t(unsigned long, + round_down(nr_pages, lvl_pages), nr_pte_to_next_page(pte) * lvl_pages); end_pfn = iov_pfn + pages_to_remove - 1; switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl); -- Gitee From 49ca934d790b72185019c570989919ce01f7ef39 Mon Sep 17 00:00:00 2001 From: "Vineeth Pillai (Google)" Date: Thu, 18 Sep 2025 13:01:59 +0800 Subject: [PATCH 07/19] iommu/vt-d: debugfs: Fix legacy mode page table dump logic ANBZ: #32087 commit fbe6070c73badca726e4ff7877320e6c62339917 upstream. In legacy mode, SSPTPTR is ignored if TT is not 00b or 01b. SSPTPTR maybe uninitialized or zero in that case and may cause oops like: Oops: general protection fault, probably for non-canonical address 0xf00087d3f000f000: 0000 [#1] SMP NOPTI CPU: 2 UID: 0 PID: 786 Comm: cat Not tainted 6.16.0 #191 PREEMPT(voluntary) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.17.0-5.fc42 04/01/2014 RIP: 0010:pgtable_walk_level+0x98/0x150 RSP: 0018:ffffc90000f279c0 EFLAGS: 00010206 RAX: 0000000040000000 RBX: ffffc90000f27ab0 RCX: 000000000000001e RDX: 0000000000000003 RSI: f00087d3f000f000 RDI: f00087d3f0010000 RBP: ffffc90000f27a00 R08: ffffc90000f27a98 R09: 0000000000000002 R10: 0000000000000000 R11: 0000000000000000 R12: f00087d3f000f000 R13: 0000000000000000 R14: 0000000040000000 R15: ffffc90000f27a98 FS: 0000764566dcb740(0000) GS:ffff8881f812c000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000764566d44000 CR3: 0000000109d81003 CR4: 0000000000772ef0 PKRU: 55555554 Call Trace: pgtable_walk_level+0x88/0x150 domain_translation_struct_show.isra.0+0x2d9/0x300 dev_domain_translation_struct_show+0x20/0x40 seq_read_iter+0x12d/0x490 ... Avoid walking the page table if TT is not 00b or 01b. Fixes: 2b437e804566 ("iommu/vt-d: debugfs: Support dumping a specified page table") Signed-off-by: Vineeth Pillai (Google) Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20250814163153.634680-1-vineeth@bitbyteword.org Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Guixin Liu --- drivers/iommu/intel/debugfs.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index affbf4a1558d..5aa7f46a420b 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -435,8 +435,21 @@ static int domain_translation_struct_show(struct seq_file *m, } pgd &= VTD_PAGE_MASK; } else { /* legacy mode */ - pgd = context->lo & VTD_PAGE_MASK; - agaw = context->hi & 7; + u8 tt = (u8)(context->lo & GENMASK_ULL(3, 2)) >> 2; + + /* + * According to Translation Type(TT), + * get the page table pointer(SSPTPTR). + */ + switch (tt) { + case CONTEXT_TT_MULTI_LEVEL: + case CONTEXT_TT_DEV_IOTLB: + pgd = context->lo & VTD_PAGE_MASK; + agaw = context->hi & 7; + break; + default: + goto iommu_unlock; + } } seq_printf(m, "Device %04x:%02x:%02x.%x ", -- Gitee From 679b9ae48942ee000ef36d353d61c6e90e07ab1b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 18 Sep 2025 13:02:02 +0800 Subject: [PATCH 08/19] iommu/vt-d: PRS isn't usable if PDS isn't supported ANBZ: #32087 commit 5ef7e24c742038a5d8c626fdc0e3a21834358341 upstream. The specification, Section 7.10, "Software Steps to Drain Page Requests & Responses," requires software to submit an Invalidation Wait Descriptor (inv_wait_dsc) with the Page-request Drain (PD=1) flag set, along with the Invalidation Wait Completion Status Write flag (SW=1). It then waits for the Invalidation Wait Descriptor's completion. However, the PD field in the Invalidation Wait Descriptor is optional, as stated in Section 6.5.2.9, "Invalidation Wait Descriptor": "Page-request Drain (PD): Remapping hardware implementations reporting Page-request draining as not supported (PDS = 0 in ECAP_REG) treat this field as reserved." This implies that if the IOMMU doesn't support the PDS capability, software can't drain page requests and group responses as expected. Do not enable PCI/PRI if the IOMMU doesn't support PDS. Reported-by: Joel Granados Closes: https://lore.kernel.org/r/20250909-jag-pds-v1-1-ad8cba0e494e@kernel.org Fixes: 66ac4db36f4c ("iommu/vt-d: Add page request draining support") Cc: stable@vger.kernel.org Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20250915062946.120196-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Guixin Liu --- drivers/iommu/intel/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7e237bf8a116..20cbed108715 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3804,7 +3804,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) } if (info->ats_supported && ecap_prs(iommu->ecap) && - pci_pri_supported(pdev)) + ecap_pds(iommu->ecap) && pci_pri_supported(pdev)) info->pri_supported = 1; } } -- Gitee From f4fea36469fc0bd3005f1e64fbec98988230ca61 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 18 Sep 2025 13:02:04 +0800 Subject: [PATCH 09/19] iommu/vt-d: debugfs: Avoid dumping context command register ANBZ: #32087 commit 5bd5ab53e7b8c87908341accb3ad8da555d6b778 upstream. The register-based cache invalidation interface is in the process of being replaced by the queued invalidation interface. The VT-d architecture allows hardware implementations with a queued invalidation interface to not implement the registers used for cache invalidation. Currently, the debugfs interface dumps the Context Command Register unconditionally, which is not reasonable. Remove it to avoid potential access to non-present registers. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20250917025051.143853-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Guixin Liu --- drivers/iommu/intel/debugfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index 5aa7f46a420b..4f9cb72e442d 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -62,7 +62,6 @@ static const struct iommu_regset iommu_regs_64[] = { IOMMU_REGSET_ENTRY(CAP), IOMMU_REGSET_ENTRY(ECAP), IOMMU_REGSET_ENTRY(RTADDR), - IOMMU_REGSET_ENTRY(CCMD), IOMMU_REGSET_ENTRY(AFLOG), IOMMU_REGSET_ENTRY(PHMBASE), IOMMU_REGSET_ENTRY(PHMLIMIT), -- Gitee From 8b7ed914aae77b494285d1eb1dd3370bf246d4e2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 26 Sep 2025 10:41:30 +0800 Subject: [PATCH 10/19] iommu/vt-d: Disallow dirty tracking if incoherent page walk ANBZ: #32087 commit 57f55048e564dedd8a4546d018e29d6bbfff0a7e upstream. Dirty page tracking relies on the IOMMU atomically updating the dirty bit in the paging-structure entry. For this operation to succeed, the paging- structure memory must be coherent between the IOMMU and the CPU. In another word, if the iommu page walk is incoherent, dirty page tracking doesn't work. The Intel VT-d specification, Section 3.10 "Snoop Behavior" states: "Remapping hardware encountering the need to atomically update A/EA/D bits in a paging-structure entry that is not snooped will result in a non- recoverable fault." To prevent an IOMMU from being incorrectly configured for dirty page tracking when it is operating in an incoherent mode, mark SSADS as supported only when both ecap_slads and ecap_smpwc are supported. Fixes: f35f22cc760e ("iommu/vt-d: Access/Dirty bit support for SS domains") Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250924083447.123224-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Guixin Liu --- drivers/iommu/intel/iommu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 5e51927ba017..82f0f4bd8f7a 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -541,7 +541,8 @@ enum { #define pasid_supported(iommu) (sm_supported(iommu) && \ ecap_pasid((iommu)->ecap)) #define ssads_supported(iommu) (sm_supported(iommu) && \ - ecap_slads((iommu)->ecap)) + ecap_slads((iommu)->ecap) && \ + ecap_smpwc(iommu->ecap)) #define nested_supported(iommu) (sm_supported(iommu) && \ ecap_nest((iommu)->ecap)) -- Gitee From b0852b261d0658fc8a2ef3b586f0dcdeef88d2cd Mon Sep 17 00:00:00 2001 From: "Vineeth Pillai (Google)" Date: Wed, 19 Nov 2025 13:16:12 +0800 Subject: [PATCH 11/19] iommu/vt-d: Set INTEL_IOMMU_FLOPPY_WA depend on BLK_DEV_FD ANBZ: #32087 commit cb3db5a39e2a6b6396df1780d39a250f649d2e3a upstream. INTEL_IOMMU_FLOPPY_WA workaround was introduced to create direct mappings for first 16MB for floppy devices as the floppy drivers were not using dma apis. We need not do this direct map if floppy driver is not enabled. INTEL_IOMMU_FLOPPY_WA is generally not a good idea. Iommu will be mapping pages in this address range while kernel would also be allocating from this range(mostly on memory stress). A misbehaving device using this domain will have access to the pages that the kernel might be actively using. We noticed this while running a test that was trying to figure out if any pages used by kernel is in iommu page tables. This patch reduces the scope of the above issue by disabling the workaround when floppy driver is not enabled. But we would still need to fix the floppy driver to use dma apis so that we need not do direct map without reserving the pages. Or the other option is to reserve this memory range in firmware so that kernel will not use the pages. Fixes: d850c2ee5fe2 ("iommu/vt-d: Expose ISA direct mapping region via iommu_get_resv_regions") Fixes: 49a0429e53f2 ("Intel IOMMU: Iommu floppy workaround") Signed-off-by: Vineeth Pillai (Google) Link: https://lore.kernel.org/r/20251002161625.1155133-1-vineeth@bitbyteword.org Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Guixin Liu --- drivers/iommu/intel/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index d84eea30fa38..fa46b94667d5 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -66,7 +66,7 @@ config INTEL_IOMMU_DEFAULT_ON config INTEL_IOMMU_FLOPPY_WA def_bool y - depends on X86 + depends on X86 && BLK_DEV_FD help Floppy disk drivers are known to bypass DMA API calls thereby failing to work when IOMMU is enabled. This -- Gitee From b0714e9d4694e63b53382d17f9dc6dfd03866d04 Mon Sep 17 00:00:00 2001 From: Aashish Sharma Date: Wed, 19 Nov 2025 13:16:13 +0800 Subject: [PATCH 12/19] iommu/vt-d: Fix unused invalidation hint in qi_desc_iotlb ANBZ: #32087 commit 6b38a108eeb3936b21643191db535a35dd7c890b upstream. Invalidation hint (ih) in the function 'qi_desc_iotlb' is initialized to zero and never used. It is embedded in the 0th bit of the 'addr' parameter. Get the correct 'ih' value from there. Fixes: f701c9f36bcb ("iommu/vt-d: Factor out invalidation descriptor composition") Signed-off-by: Aashish Sharma Link: https://lore.kernel.org/r/20251009010903.1323979-1-aashish@aashishsharma.net Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Guixin Liu --- drivers/iommu/intel/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 82f0f4bd8f7a..3160ee9600f3 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1101,7 +1101,7 @@ static inline void qi_desc_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, struct qi_desc *desc) { u8 dw = 0, dr = 0; - int ih = 0; + int ih = addr & 1; if (cap_write_drain(iommu->cap)) dw = 1; -- Gitee From 42527cc9c8b743e9d322053d79318801f2ff5977 Mon Sep 17 00:00:00 2001 From: Jinhui Guo Date: Thu, 22 Jan 2026 09:48:50 +0800 Subject: [PATCH 13/19] iommu/vt-d: Skip dev-iotlb flush for inaccessible PCIe device without scalable mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #32087 commit 42662d19839f34735b718129ea200e3734b07e50 upstream. PCIe endpoints with ATS enabled and passed through to userspace (e.g., QEMU, DPDK) can hard-lock the host when their link drops, either by surprise removal or by a link fault. Commit 4fc82cd907ac ("iommu/vt-d: Don't issue ATS Invalidation request when device is disconnected") adds pci_dev_is_disconnected() to devtlb_invalidation_with_pasid() so ATS invalidation is skipped only when the device is being safely removed, but it applies only when Intel IOMMU scalable mode is enabled. With scalable mode disabled or unsupported, a system hard-lock occurs when a PCIe endpoint's link drops because the Intel IOMMU waits indefinitely for an ATS invalidation that cannot complete. Call Trace: qi_submit_sync qi_flush_dev_iotlb __context_flush_dev_iotlb.part.0 domain_context_clear_one_cb pci_for_each_dma_alias device_block_translation blocking_domain_attach_dev iommu_deinit_device __iommu_group_remove_device iommu_release_device iommu_bus_notifier blocking_notifier_call_chain bus_notify device_del pci_remove_bus_device pci_stop_and_remove_bus_device pciehp_unconfigure_device pciehp_disable_slot pciehp_handle_presence_or_link_change pciehp_ist Commit 81e921fd3216 ("iommu/vt-d: Fix NULL domain on device release") adds intel_pasid_teardown_sm_context() to intel_iommu_release_device(), which calls qi_flush_dev_iotlb() and can also hard-lock the system when a PCIe endpoint's link drops. Call Trace: qi_submit_sync qi_flush_dev_iotlb __context_flush_dev_iotlb.part.0 intel_context_flush_no_pasid device_pasid_table_teardown pci_pasid_table_teardown pci_for_each_dma_alias intel_pasid_teardown_sm_context intel_iommu_release_device iommu_deinit_device __iommu_group_remove_device iommu_release_device iommu_bus_notifier blocking_notifier_call_chain bus_notify device_del pci_remove_bus_device pci_stop_and_remove_bus_device pciehp_unconfigure_device pciehp_disable_slot pciehp_handle_presence_or_link_change pciehp_ist Sometimes the endpoint loses connection without a link-down event (e.g., due to a link fault); killing the process (virsh destroy) then hard-locks the host. Call Trace: qi_submit_sync qi_flush_dev_iotlb __context_flush_dev_iotlb.part.0 domain_context_clear_one_cb pci_for_each_dma_alias device_block_translation blocking_domain_attach_dev __iommu_attach_device __iommu_device_set_domain __iommu_group_set_domain_internal iommu_detach_group vfio_iommu_type1_detach_group vfio_group_detach_container vfio_group_fops_release __fput pci_dev_is_disconnected() only covers safe-removal paths; pci_device_is_present() tests accessibility by reading vendor/device IDs and internally calls pci_dev_is_disconnected(). On a ConnectX-5 (8 GT/s, x2) this costs ~70 µs. Since __context_flush_dev_iotlb() is only called on {attach,release}_dev paths (not hot), add pci_device_is_present() there to skip inaccessible devices and avoid the hard-lock. Fixes: 37764b952e1b ("iommu/vt-d: Global devTLB flush when present context entry changed") Fixes: 81e921fd3216 ("iommu/vt-d: Fix NULL domain on device release") Cc: stable@vger.kernel.org Signed-off-by: Jinhui Guo Link: https://lore.kernel.org/r/20251211035946.2071-2-guojinhui.liam@bytedance.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Guixin Liu --- drivers/iommu/intel/pasid.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index ac67a056b6c8..b8f4657aa0f0 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -1105,6 +1105,14 @@ static void __context_flush_dev_iotlb(struct device_domain_info *info) if (!info->ats_enabled) return; + /* + * Skip dev-IOTLB flush for inaccessible PCIe devices to prevent the + * Intel IOMMU from waiting indefinitely for an ATS invalidation that + * cannot complete. + */ + if (!pci_device_is_present(to_pci_dev(info->dev))) + return; + qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn), info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH); -- Gitee From ab8fc41e69d2288f16a057c4eee3a59e2045e2d7 Mon Sep 17 00:00:00 2001 From: Jinhui Guo Date: Thu, 22 Jan 2026 09:48:51 +0800 Subject: [PATCH 14/19] iommu/vt-d: Flush dev-IOTLB only when PCIe device is accessible in scalable mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #32087 commit 10e60d87813989e20eac1f3eda30b3bae461e7f9 upstream. Commit 4fc82cd907ac ("iommu/vt-d: Don't issue ATS Invalidation request when device is disconnected") relies on pci_dev_is_disconnected() to skip ATS invalidation for safely-removed devices, but it does not cover link-down caused by faults, which can still hard-lock the system. For example, if a VM fails to connect to the PCIe device, "virsh destroy" is executed to release resources and isolate the fault, but a hard-lockup occurs while releasing the group fd. Call Trace: qi_submit_sync qi_flush_dev_iotlb intel_pasid_tear_down_entry device_block_translation blocking_domain_attach_dev __iommu_attach_device __iommu_device_set_domain __iommu_group_set_domain_internal iommu_detach_group vfio_iommu_type1_detach_group vfio_group_detach_container vfio_group_fops_release __fput Although pci_device_is_present() is slower than pci_dev_is_disconnected(), it still takes only ~70 µs on a ConnectX-5 (8 GT/s, x2) and becomes even faster as PCIe speed and width increase. Besides, devtlb_invalidation_with_pasid() is called only in the paths below, which are far less frequent than memory map/unmap. 1. mm-struct release 2. {attach,release}_dev 3. set/remove PASID 4. dirty-tracking setup The gain in system stability far outweighs the negligible cost of using pci_device_is_present() instead of pci_dev_is_disconnected() to decide when to skip ATS invalidation, especially under GDR high-load conditions. Fixes: 4fc82cd907ac ("iommu/vt-d: Don't issue ATS Invalidation request when device is disconnected") Cc: stable@vger.kernel.org Signed-off-by: Jinhui Guo Link: https://lore.kernel.org/r/20251211035946.2071-3-guojinhui.liam@bytedance.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Guixin Liu --- drivers/iommu/intel/pasid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index b8f4657aa0f0..c87ed71d78c6 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -218,7 +218,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, if (!info || !info->ats_enabled) return; - if (pci_dev_is_disconnected(to_pci_dev(dev))) + if (!pci_device_is_present(to_pci_dev(dev))) return; sid = PCI_DEVID(info->bus, info->devfn); -- Gitee From 553ced8a7fe442495e0d1f69b6774d68fbd2cd2b Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Thu, 22 Jan 2026 09:48:52 +0800 Subject: [PATCH 15/19] iommu/vt-d: Flush cache for PASID table before using it ANBZ: #32087 commit 22d169bdd2849fe6bd18c2643742e1c02be6451c upstream. When writing the address of a freshly allocated zero-initialized PASID table to a PASID directory entry, do that after the CPU cache flush for this PASID table, not before it, to avoid the time window when this PASID table may be already used by non-coherent IOMMU hardware while its contents in RAM is still some random old data, not zero-initialized. Fixes: 194b3348bdbb ("iommu/vt-d: Fix PASID directory pointer coherency") Signed-off-by: Dmytro Maluka Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20251221123508.37495-1-dmaluka@chromium.org Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Guixin Liu --- drivers/iommu/intel/pasid.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index c87ed71d78c6..dc0675727acb 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -153,6 +153,9 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) if (!entries) return NULL; + if (!ecap_coherent(info->iommu->ecap)) + clflush_cache_range(entries, VTD_PAGE_SIZE); + /* * The pasid directory table entry won't be freed after * allocation. No worry about the race with free and @@ -165,10 +168,8 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) iommu_free_pages(entries); goto retry; } - if (!ecap_coherent(info->iommu->ecap)) { - clflush_cache_range(entries, VTD_PAGE_SIZE); + if (!ecap_coherent(info->iommu->ecap)) clflush_cache_range(&dir[dir_index].val, sizeof(*dir)); - } } return &entries[index]; -- Gitee From f59b518a0474fd22e397067e6b314dbebf7cbf1d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 22 Jan 2026 09:48:54 +0800 Subject: [PATCH 16/19] iommu/vt-d: Clear Present bit before tearing down PASID entry ANBZ: #32087 commit 75ed00055c059dedc47b5daaaa2f8a7a019138ff upstream. The Intel VT-d Scalable Mode PASID table entry consists of 512 bits (64 bytes). When tearing down an entry, the current implementation zeros the entire 64-byte structure immediately using multiple 64-bit writes. Since the IOMMU hardware may fetch these 64 bytes using multiple internal transactions (e.g., four 128-bit bursts), updating or zeroing the entire entry while it is active (P=1) risks a "torn" read. If a hardware fetch occurs simultaneously with the CPU zeroing the entry, the hardware could observe an inconsistent state, leading to unpredictable behavior or spurious faults. Follow the "Guidance to Software for Invalidations" in the VT-d spec (Section 6.5.3.3) by implementing the recommended ownership handshake: 1. Clear only the 'Present' (P) bit of the PASID entry. 2. Use a dma_wmb() to ensure the cleared bit is visible to hardware before proceeding. 3. Execute the required invalidation sequence (PASID cache, IOTLB, and Device-TLB flush) to ensure the hardware has released all cached references. 4. Only after the flushes are complete, zero out the remaining fields of the PASID entry. Also, add a dma_wmb() in pasid_set_present() to ensure that all other fields of the PASID entry are visible to the hardware before the Present bit is set. Fixes: 0bbeb01a4faf ("iommu/vt-d: Manage scalalble mode PASID tables") Signed-off-by: Lu Baolu Reviewed-by: Dmytro Maluka Reviewed-by: Samiullah Khawaja Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20260120061816.2132558-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Guixin Liu --- drivers/iommu/intel/pasid.c | 6 +++++- drivers/iommu/intel/pasid.h | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index dc0675727acb..93213c71e4c7 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -273,7 +273,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, did = pasid_get_domain_id(pte); pgtt = pasid_pte_get_pgtt(pte); - intel_pasid_clear_entry(dev, pasid, fault_ignore); + pasid_clear_present(pte); spin_unlock(&iommu->lock); if (!ecap_coherent(iommu->ecap)) @@ -287,6 +287,10 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); devtlb_invalidation_with_pasid(iommu, dev, pasid); + intel_pasid_clear_entry(dev, pasid, fault_ignore); + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + if (!fault_ignore) intel_iommu_drain_pasid_prq(dev, pasid); } diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index fd0fd1a0df84..28902b51a68f 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -233,9 +233,23 @@ static inline void pasid_set_wpe(struct pasid_entry *pe) */ static inline void pasid_set_present(struct pasid_entry *pe) { + dma_wmb(); pasid_set_bits(&pe->val[0], 1 << 0, 1); } +/* + * Clear the Present (P) bit (bit 0) of a scalable-mode PASID table entry. + * This initiates the transition of the entry's ownership from hardware + * to software. The caller is responsible for fulfilling the invalidation + * handshake recommended by the VT-d spec, Section 6.5.3.3 (Guidance to + * Software for Invalidations). + */ +static inline void pasid_clear_present(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 0, 0); + dma_wmb(); +} + /* * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID * entry. -- Gitee From a66c3c1f037b4041fcd6fe6b9631e4c9127f4bd1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 22 Jan 2026 09:48:55 +0800 Subject: [PATCH 17/19] iommu/vt-d: Clear Present bit before tearing down context entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #32087 commit c1e4f1dccbe9d7656d1c6872ebeadb5992d0aaa2 upstream. When tearing down a context entry, the current implementation zeros the entire 128-bit entry using multiple 64-bit writes. This creates a window where the hardware can fetch a "torn" entry — where some fields are already zeroed while the 'Present' bit is still set — leading to unpredictable behavior or spurious faults. While x86 provides strong write ordering, the compiler may reorder writes to the two 64-bit halves of the context entry. Even without compiler reordering, the hardware fetch is not guaranteed to be atomic with respect to multiple CPU writes. Align with the "Guidance to Software for Invalidations" in the VT-d spec (Section 6.5.3.3) by implementing the recommended ownership handshake: 1. Clear only the 'Present' (P) bit of the context entry first to signal the transition of ownership from hardware to software. 2. Use dma_wmb() to ensure the cleared bit is visible to the IOMMU. 3. Perform the required cache and context-cache invalidation to ensure hardware no longer has cached references to the entry. 4. Fully zero out the entry only after the invalidation is complete. Also, add a dma_wmb() to context_set_present() to ensure the entry is fully initialized before the 'Present' bit becomes visible. Fixes: ba39592764ed2 ("Intel IOMMU: Intel IOMMU driver") Reported-by: Dmytro Maluka Closes: https://lore.kernel.org/all/aTG7gc7I5wExai3S@google.com/ Signed-off-by: Lu Baolu Reviewed-by: Dmytro Maluka Reviewed-by: Samiullah Khawaja Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20260120061816.2132558-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Guixin Liu --- drivers/iommu/intel/iommu.c | 4 +++- drivers/iommu/intel/iommu.h | 21 ++++++++++++++++++++- drivers/iommu/intel/pasid.c | 5 ++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 20cbed108715..d4d9d9602070 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1734,10 +1734,12 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 } did = context_domain_id(context); - context_clear_entry(context); + context_clear_present(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); intel_context_flush_no_pasid(info, context, did); + context_clear_entry(context); + __iommu_flush_cache(iommu, context, sizeof(*context)); } int __domain_setup_first_level(struct intel_iommu *iommu, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 3160ee9600f3..1fb568402310 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -973,7 +973,26 @@ static inline unsigned long lvl_to_nr_pages(unsigned int lvl) static inline void context_set_present(struct context_entry *context) { - context->lo |= 1; + u64 val; + + dma_wmb(); + val = READ_ONCE(context->lo) | 1; + WRITE_ONCE(context->lo, val); +} + +/* + * Clear the Present (P) bit (bit 0) of a context table entry. This initiates + * the transition of the entry's ownership from hardware to software. The + * caller is responsible for fulfilling the invalidation handshake recommended + * by the VT-d spec, Section 6.5.3.3 (Guidance to Software for Invalidations). + */ +static inline void context_clear_present(struct context_entry *context) +{ + u64 val; + + val = READ_ONCE(context->lo) & GENMASK_ULL(63, 1); + WRITE_ONCE(context->lo, val); + dma_wmb(); } static inline void context_set_fault_enable(struct context_entry *context) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 93213c71e4c7..489c2a47e69c 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -1027,7 +1027,7 @@ static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn) } if (context_copied(iommu, bus, devfn)) { - context_clear_entry(context); + context_clear_present(context); __iommu_flush_cache(iommu, context, sizeof(*context)); /* @@ -1047,6 +1047,9 @@ static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn) iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); + context_clear_entry(context); + __iommu_flush_cache(iommu, context, sizeof(*context)); + /* * At this point, the device is supposed to finish reset at * its driver probe stage, so no in-flight DMA will exist, -- Gitee From fa519faed059996d9834d81fe8d6451cf92c3464 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Jul 2025 12:50:20 +0800 Subject: [PATCH 18/19] iommu/vt-d: Lift the __pa to domain_setup_first_level/intel_svm_set_dev_pasid() ANBZ: #32087 commit cd0d0e4e48d817215695e1cc9114c6f614fb629f upstream. Pass the phys_addr_t down through the call chain from the top instead of passing a pgd_t * KVA. This moves the __pa() into domain_setup_first_level() which is the first function to obtain the pgd from the IOMMU page table in this call chain. The SVA flow is also adjusted to get the pa of the mm->pgd. iommput will move the __pa() into iommupt code, it never shares the KVA of the page table with the driver. Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v3-dbbe6f7e7ae3+124ffe-vtd_prep_jgg@nvidia.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20250714045028.958850-4-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Guixin Liu --- drivers/iommu/intel/iommu.c | 15 +++++++-------- drivers/iommu/intel/iommu.h | 7 +++---- drivers/iommu/intel/pasid.c | 17 +++++++++-------- drivers/iommu/intel/pasid.h | 11 +++++------ drivers/iommu/intel/svm.c | 2 +- 5 files changed, 25 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d4d9d9602070..8c0edcfdfddf 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1742,15 +1742,14 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 __iommu_flush_cache(iommu, context, sizeof(*context)); } -int __domain_setup_first_level(struct intel_iommu *iommu, - struct device *dev, ioasid_t pasid, - u16 did, pgd_t *pgd, int flags, - struct iommu_domain *old) +int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev, + ioasid_t pasid, u16 did, phys_addr_t fsptptr, + int flags, struct iommu_domain *old) { if (!old) - return intel_pasid_setup_first_level(iommu, dev, pgd, - pasid, did, flags); - return intel_pasid_replace_first_level(iommu, dev, pgd, pasid, did, + return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid, + did, flags); + return intel_pasid_replace_first_level(iommu, dev, fsptptr, pasid, did, iommu_domain_did(old, iommu), flags); } @@ -1799,7 +1798,7 @@ static int domain_setup_first_level(struct intel_iommu *iommu, return __domain_setup_first_level(iommu, dev, pasid, domain_id_iommu(domain, iommu), - (pgd_t *)pgd, flags, old); + __pa(pgd), flags, old); } static bool domain_need_iotlb_sync_map(struct dmar_domain *domain, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 1fb568402310..2ea175e62db3 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1275,10 +1275,9 @@ domain_add_dev_pasid(struct iommu_domain *domain, void domain_remove_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); -int __domain_setup_first_level(struct intel_iommu *iommu, - struct device *dev, ioasid_t pasid, - u16 did, pgd_t *pgd, int flags, - struct iommu_domain *old); +int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev, + ioasid_t pasid, u16 did, phys_addr_t fsptptr, + int flags, struct iommu_domain *old); int dmar_ir_support(void); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 489c2a47e69c..7a64a55fb588 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -353,14 +353,15 @@ static void intel_pasid_flush_present(struct intel_iommu *iommu, */ static void pasid_pte_config_first_level(struct intel_iommu *iommu, struct pasid_entry *pte, - pgd_t *pgd, u16 did, int flags) + phys_addr_t fsptptr, u16 did, + int flags) { lockdep_assert_held(&iommu->lock); pasid_clear_entry(pte); /* Setup the first level page table pointer: */ - pasid_set_flptr(pte, (u64)__pa(pgd)); + pasid_set_flptr(pte, fsptptr); if (flags & PASID_FLAG_FL5LP) pasid_set_flpm(pte, 1); @@ -377,9 +378,9 @@ static void pasid_pte_config_first_level(struct intel_iommu *iommu, pasid_set_present(pte); } -int intel_pasid_setup_first_level(struct intel_iommu *iommu, - struct device *dev, pgd_t *pgd, - u32 pasid, u16 did, int flags) +int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev, + phys_addr_t fsptptr, u32 pasid, u16 did, + int flags) { struct pasid_entry *pte; @@ -407,7 +408,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, return -EBUSY; } - pasid_pte_config_first_level(iommu, pte, pgd, did, flags); + pasid_pte_config_first_level(iommu, pte, fsptptr, did, flags); spin_unlock(&iommu->lock); @@ -417,7 +418,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, } int intel_pasid_replace_first_level(struct intel_iommu *iommu, - struct device *dev, pgd_t *pgd, + struct device *dev, phys_addr_t fsptptr, u32 pasid, u16 did, u16 old_did, int flags) { @@ -435,7 +436,7 @@ int intel_pasid_replace_first_level(struct intel_iommu *iommu, return -EINVAL; } - pasid_pte_config_first_level(iommu, &new_pte, pgd, did, flags); + pasid_pte_config_first_level(iommu, &new_pte, fsptptr, did, flags); spin_lock(&iommu->lock); pte = intel_pasid_get_entry(dev, pasid); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 28902b51a68f..637373995be8 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -302,9 +302,9 @@ extern unsigned int intel_pasid_max_id; int intel_pasid_alloc_table(struct device *dev); void intel_pasid_free_table(struct device *dev); struct pasid_table *intel_pasid_get_table(struct device *dev); -int intel_pasid_setup_first_level(struct intel_iommu *iommu, - struct device *dev, pgd_t *pgd, - u32 pasid, u16 did, int flags); +int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev, + phys_addr_t fsptptr, u32 pasid, u16 did, + int flags); int intel_pasid_setup_second_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, u32 pasid); @@ -316,9 +316,8 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, u32 pasid, struct dmar_domain *domain); int intel_pasid_replace_first_level(struct intel_iommu *iommu, - struct device *dev, pgd_t *pgd, - u32 pasid, u16 did, u16 old_did, - int flags); + struct device *dev, phys_addr_t fsptptr, + u32 pasid, u16 did, u16 old_did, int flags); int intel_pasid_replace_second_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, u16 old_did, diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index f3da596410b5..8c0bed36c587 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -171,7 +171,7 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, /* Setup the pasid table: */ sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; ret = __domain_setup_first_level(iommu, dev, pasid, - FLPT_DEFAULT_DID, mm->pgd, + FLPT_DEFAULT_DID, __pa(mm->pgd), sflags, old); if (ret) goto out_unwind_iopf; -- Gitee From 65d8b0cd7ee2982a4b6239de36fab930bb7bab45 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 22 Jan 2026 09:48:56 +0800 Subject: [PATCH 19/19] iommu/vt-d: Fix race condition during PASID entry replacement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #32087 commit c3b1edea3791fa91ab7032faa90355913ad9451b upstream. The Intel VT-d PASID table entry is 512 bits (64 bytes). When replacing an active PASID entry (e.g., during domain replacement), the current implementation calculates a new entry on the stack and copies it to the table using a single structure assignment. struct pasid_entry *pte, new_pte; pte = intel_pasid_get_entry(dev, pasid); pasid_pte_config_first_level(iommu, &new_pte, ...); *pte = new_pte; Because the hardware may fetch the 512-bit PASID entry in multiple 128-bit chunks, updating the entire entry while it is active (Present bit set) risks a "torn" read. In this scenario, the IOMMU hardware could observe an inconsistent state — partially new data and partially old data — leading to unpredictable behavior or spurious faults. Fix this by removing the unsafe "replace" helpers and following the "clear-then-update" flow, which ensures the Present bit is cleared and the required invalidation handshake is completed before the new configuration is applied. Fixes: 7543ee63e811 ("iommu/vt-d: Add pasid replace helpers") Signed-off-by: Lu Baolu Reviewed-by: Samiullah Khawaja Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20260120061816.2132558-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Guixin Liu --- drivers/iommu/intel/iommu.c | 29 +++--- drivers/iommu/intel/nested.c | 9 +- drivers/iommu/intel/pasid.c | 190 ----------------------------------- drivers/iommu/intel/pasid.h | 14 --- 4 files changed, 16 insertions(+), 226 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 8c0edcfdfddf..966bfd10cf86 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1746,12 +1746,10 @@ int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev, ioasid_t pasid, u16 did, phys_addr_t fsptptr, int flags, struct iommu_domain *old) { - if (!old) - return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid, - did, flags); - return intel_pasid_replace_first_level(iommu, dev, fsptptr, pasid, did, - iommu_domain_did(old, iommu), - flags); + if (old) + intel_pasid_tear_down_entry(iommu, dev, pasid, false); + + return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid, did, flags); } static int domain_setup_second_level(struct intel_iommu *iommu, @@ -1759,23 +1757,20 @@ static int domain_setup_second_level(struct intel_iommu *iommu, struct device *dev, ioasid_t pasid, struct iommu_domain *old) { - if (!old) - return intel_pasid_setup_second_level(iommu, domain, - dev, pasid); - return intel_pasid_replace_second_level(iommu, domain, dev, - iommu_domain_did(old, iommu), - pasid); + if (old) + intel_pasid_tear_down_entry(iommu, dev, pasid, false); + + return intel_pasid_setup_second_level(iommu, domain, dev, pasid); } static int domain_setup_passthrough(struct intel_iommu *iommu, struct device *dev, ioasid_t pasid, struct iommu_domain *old) { - if (!old) - return intel_pasid_setup_pass_through(iommu, dev, pasid); - return intel_pasid_replace_pass_through(iommu, dev, - iommu_domain_did(old, iommu), - pasid); + if (old) + intel_pasid_tear_down_entry(iommu, dev, pasid, false); + + return intel_pasid_setup_pass_through(iommu, dev, pasid); } static int domain_setup_first_level(struct intel_iommu *iommu, diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index d6795cc6154b..518b76d59550 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -141,11 +141,10 @@ static int domain_setup_nested(struct intel_iommu *iommu, struct device *dev, ioasid_t pasid, struct iommu_domain *old) { - if (!old) - return intel_pasid_setup_nested(iommu, dev, pasid, domain); - return intel_pasid_replace_nested(iommu, dev, pasid, - iommu_domain_did(old, iommu), - domain); + if (old) + intel_pasid_tear_down_entry(iommu, dev, pasid, false); + + return intel_pasid_setup_nested(iommu, dev, pasid, domain); } static int intel_nested_set_dev_pasid(struct iommu_domain *domain, diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 7a64a55fb588..787897e61efa 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -417,50 +417,6 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev, return 0; } -int intel_pasid_replace_first_level(struct intel_iommu *iommu, - struct device *dev, phys_addr_t fsptptr, - u32 pasid, u16 did, u16 old_did, - int flags) -{ - struct pasid_entry *pte, new_pte; - - if (!ecap_flts(iommu->ecap)) { - pr_err("No first level translation support on %s\n", - iommu->name); - return -EINVAL; - } - - if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) { - pr_err("No 5-level paging support for first-level on %s\n", - iommu->name); - return -EINVAL; - } - - pasid_pte_config_first_level(iommu, &new_pte, fsptptr, did, flags); - - spin_lock(&iommu->lock); - pte = intel_pasid_get_entry(dev, pasid); - if (!pte) { - spin_unlock(&iommu->lock); - return -ENODEV; - } - - if (!pasid_pte_is_present(pte)) { - spin_unlock(&iommu->lock); - return -EINVAL; - } - - WARN_ON(old_did != pasid_get_domain_id(pte)); - - *pte = new_pte; - spin_unlock(&iommu->lock); - - intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); - intel_iommu_drain_pasid_prq(dev, pasid); - - return 0; -} - /* * Set up the scalable mode pasid entry for second only translation type. */ @@ -528,57 +484,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, return 0; } -int intel_pasid_replace_second_level(struct intel_iommu *iommu, - struct dmar_domain *domain, - struct device *dev, u16 old_did, - u32 pasid) -{ - struct pasid_entry *pte, new_pte; - struct dma_pte *pgd; - u64 pgd_val; - u16 did; - - /* - * If hardware advertises no support for second level - * translation, return directly. - */ - if (!ecap_slts(iommu->ecap)) { - pr_err("No second level translation support on %s\n", - iommu->name); - return -EINVAL; - } - - pgd = domain->pgd; - pgd_val = virt_to_phys(pgd); - did = domain_id_iommu(domain, iommu); - - pasid_pte_config_second_level(iommu, &new_pte, pgd_val, - domain->agaw, did, - domain->dirty_tracking); - - spin_lock(&iommu->lock); - pte = intel_pasid_get_entry(dev, pasid); - if (!pte) { - spin_unlock(&iommu->lock); - return -ENODEV; - } - - if (!pasid_pte_is_present(pte)) { - spin_unlock(&iommu->lock); - return -EINVAL; - } - - WARN_ON(old_did != pasid_get_domain_id(pte)); - - *pte = new_pte; - spin_unlock(&iommu->lock); - - intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); - intel_iommu_drain_pasid_prq(dev, pasid); - - return 0; -} - /* * Set up dirty tracking on a second only or nested translation type. */ @@ -691,38 +596,6 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, return 0; } -int intel_pasid_replace_pass_through(struct intel_iommu *iommu, - struct device *dev, u16 old_did, - u32 pasid) -{ - struct pasid_entry *pte, new_pte; - u16 did = FLPT_DEFAULT_DID; - - pasid_pte_config_pass_through(iommu, &new_pte, did); - - spin_lock(&iommu->lock); - pte = intel_pasid_get_entry(dev, pasid); - if (!pte) { - spin_unlock(&iommu->lock); - return -ENODEV; - } - - if (!pasid_pte_is_present(pte)) { - spin_unlock(&iommu->lock); - return -EINVAL; - } - - WARN_ON(old_did != pasid_get_domain_id(pte)); - - *pte = new_pte; - spin_unlock(&iommu->lock); - - intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); - intel_iommu_drain_pasid_prq(dev, pasid); - - return 0; -} - /* * Set the page snoop control for a pasid entry which has been set up. */ @@ -853,69 +726,6 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, return 0; } -int intel_pasid_replace_nested(struct intel_iommu *iommu, - struct device *dev, u32 pasid, - u16 old_did, struct dmar_domain *domain) -{ - struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg; - struct dmar_domain *s2_domain = domain->s2_domain; - u16 did = domain_id_iommu(domain, iommu); - struct pasid_entry *pte, new_pte; - - /* Address width should match the address width supported by hardware */ - switch (s1_cfg->addr_width) { - case ADDR_WIDTH_4LEVEL: - break; - case ADDR_WIDTH_5LEVEL: - if (!cap_fl5lp_support(iommu->cap)) { - dev_err_ratelimited(dev, - "5-level paging not supported\n"); - return -EINVAL; - } - break; - default: - dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n", - s1_cfg->addr_width); - return -EINVAL; - } - - if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) { - pr_err_ratelimited("No supervisor request support on %s\n", - iommu->name); - return -EINVAL; - } - - if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) { - pr_err_ratelimited("No extended access flag support on %s\n", - iommu->name); - return -EINVAL; - } - - pasid_pte_config_nestd(iommu, &new_pte, s1_cfg, s2_domain, did); - - spin_lock(&iommu->lock); - pte = intel_pasid_get_entry(dev, pasid); - if (!pte) { - spin_unlock(&iommu->lock); - return -ENODEV; - } - - if (!pasid_pte_is_present(pte)) { - spin_unlock(&iommu->lock); - return -EINVAL; - } - - WARN_ON(old_did != pasid_get_domain_id(pte)); - - *pte = new_pte; - spin_unlock(&iommu->lock); - - intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); - intel_iommu_drain_pasid_prq(dev, pasid); - - return 0; -} - /* * Interfaces to setup or teardown a pasid table to the scalable-mode * context table entry: diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 637373995be8..0296ca55ea5e 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -315,20 +315,6 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, struct device *dev, u32 pasid); int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, u32 pasid, struct dmar_domain *domain); -int intel_pasid_replace_first_level(struct intel_iommu *iommu, - struct device *dev, phys_addr_t fsptptr, - u32 pasid, u16 did, u16 old_did, int flags); -int intel_pasid_replace_second_level(struct intel_iommu *iommu, - struct dmar_domain *domain, - struct device *dev, u16 old_did, - u32 pasid); -int intel_pasid_replace_pass_through(struct intel_iommu *iommu, - struct device *dev, u16 old_did, - u32 pasid); -int intel_pasid_replace_nested(struct intel_iommu *iommu, - struct device *dev, u32 pasid, - u16 old_did, struct dmar_domain *domain); - void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, u32 pasid, bool fault_ignore); -- Gitee