3 files changed, 35 insertions, 14 deletions
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 20ede17202bf..28406aa1136d 100644
@@ -183,7 +183,7 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
u64 gen = kvm_memslots(vcpu->kvm)->generation;
- if (unlikely(gen & 1))
+ if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cf761ff58224..5e1cb74922b3 100644
@@ -48,6 +48,27 @@
#define KVM_MEMSLOT_INVALID (1UL << 16)
+ * Bit 0 of the memslot generation number is an "update in-progress flag",
+ * e.g. is temporarily set for the duration of install_new_memslots().
+ * This flag effectively creates a unique generation number that is used to
+ * mark cached memslot data, e.g. MMIO accesses, as potentially being stale,
+ * i.e. may (or may not) have come from the previous memslots generation.
+ * This is necessary because the actual memslots update is not atomic with
+ * respect to the generation number update. Updating the generation number
+ * first would allow a vCPU to cache a spte from the old memslots using the
+ * new generation number, and updating the generation number after switching
+ * to the new memslots would allow cache hits using the old generation number
+ * to reference the defunct memslots.
+ * This mechanism is used to prevent getting hits in KVM's caches while a
+ * memslot update is in-progress, and to prevent cache hits *after* updating
+ * the actual generation number against accesses that were inserted into the
+ * cache *before* the memslots were updated.
+#define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS BIT_ULL(0)
/* Two fragments for cross MMIO pages. */
#define KVM_MAX_MMIO_FRAGMENTS 2
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d54f6578a849..0f1f1c7c7a36 100644
@@ -874,30 +874,30 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
int as_id, struct kvm_memslots *slots)
struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id);
- u64 gen;
+ u64 gen = old_memslots->generation;
- * Set the low bit in the generation, which disables SPTE caching
- * until the end of synchronize_srcu_expedited.
- WARN_ON(old_memslots->generation & 1);
- slots->generation = old_memslots->generation + 1;
+ WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
+ slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
- * Increment the new memslot generation a second time. This prevents
- * vm exits that race with memslot updates from caching a memslot
- * generation that will (potentially) be valid forever.
+ * Increment the new memslot generation a second time, dropping the
+ * update in-progress flag and incrementing then generation based on
+ * the number of address spaces. This provides a unique and easily
+ * identifiable generation number while the memslots are in flux.
+ gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
* Generations must be unique even across address spaces. We do not need
* a global counter for that, instead the generation space is evenly split
* across address spaces. For example, with two address spaces, address
- * space 0 will use generations 0, 4, 8, ... while * address space 1 will
+ * space 0 will use generations 0, 4, 8, ... while address space 1 will
* use generations 2, 6, 10, 14, ...
- gen = slots->generation + KVM_ADDRESS_SPACE_NUM * 2 - 1;
+ gen += KVM_ADDRESS_SPACE_NUM * 2;