+Vipin
I believe this was introduced by 6.4 commit 5982a5392663 ("KVM: x86/mmu:
Use kvm_ad_enabled() to determine if TDP MMU SPTEs need wrprot").
I see two options to fix:
1. Allow PML to be enabled when L2 is running with EPT is disabled.
2. Fix the TDP MMU logic to write-protect role.guest_mode=1 SPTEs.
(1.) sounds more complicated and will require more testing. (2.) is
quite simple since an entire TDP MMU tree is either guest_mode=0 or
guest_mode=1.
Example fix (fixes syzbot repro but otherwise untested):
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 6ae19b4ee5b1..eb6fb8d9c00c 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1498,6 +1498,24 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
}
}
+static inline u64 tdp_mmu_dirty_bit(struct kvm_mmu_page *root, bool force_wrprot)
+{
+ if (force_wrprot || kvm_mmu_page_ad_need_write_protect(root) || !kvm_ad_enabled())
+ return PT_WRITABLE_MASK;
+
+ return shadow_dirty_mask;
+}
+
+static inline bool tdp_mmu_dirty_bit_invalid_for_spte(struct tdp_iter *iter, u64 dbit)
+{
+ /*
+ * The decision of whether to clear the D-bit or W-bit is made based on
+ * the root, as all TDP MMU SPTEs within a root should require the same
+ * modifications. This check ensures that is actually the case.
+ */
+ return dbit == shadow_dirty_mask && spte_ad_need_write_protect(iter->old_spte);
+}
+
/*
* Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
* AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
@@ -1508,7 +1526,7 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t start, gfn_t end)
{
- u64 dbit = kvm_ad_enabled() ? shadow_dirty_mask : PT_WRITABLE_MASK;
+ u64 dbit = tdp_mmu_dirty_bit(root, false);
struct tdp_iter iter;
bool spte_set = false;
@@ -1523,8 +1541,7 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
continue;
- KVM_MMU_WARN_ON(kvm_ad_enabled() &&
- spte_ad_need_write_protect(iter.old_spte));
+ KVM_MMU_WARN_ON(tdp_mmu_dirty_bit_invalid_for_spte(&iter, dbit));
if (!(iter.old_spte & dbit))
continue;
@@ -1570,8 +1587,7 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t gfn, unsigned long mask, bool wrprot)
{
- u64 dbit = (wrprot || !kvm_ad_enabled()) ? PT_WRITABLE_MASK :
- shadow_dirty_mask;
+ u64 dbit = tdp_mmu_dirty_bit(root, wrprot);
struct tdp_iter iter;
lockdep_assert_held_write(&kvm->mmu_lock);
@@ -1583,8 +1599,7 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
if (!mask)
break;
- KVM_MMU_WARN_ON(kvm_ad_enabled() &&
- spte_ad_need_write_protect(iter.old_spte));
+ KVM_MMU_WARN_ON(tdp_mmu_dirty_bit_invalid_for_spte(&iter, dbit));
if (iter.level > PG_LEVEL_4K ||
!(mask & (1UL << (iter.gfn - gfn))))