this series of patches contains bugfixes for the Nested SVM code and the
conversion of Nested SVM debugging to tracepoints. The fixes are:
1) A patch Alex already sent (1/10) but which was not yet
applied. It fixes a lost event_inj problem when we emulate
a vmrun and a vmexit without entering the guest in
the meantime.
2) The patches 2/10 and 3/10 fixing a schedule() while atomic
bug in the Nested SVM code. The KVM interrupt injection code
runs with preemtion and interrupts disabled. But the
enable_irq_window() function from SVM may emulate a #vmexit.
This emulation migth sleep which causes the schedule() while
atomic() bug.
These fixes (patches 1 to 3) should also be considered for -stable
backporting.
The patches 3 to 9 convert the old printk based debugging for Nested
SVM to tracepoints. Patch 10 removes the nsvm_printk code. Please review
and/or consider to apply these changes.
Thanks,
Joerg
diffstat:
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/svm.c | 98 +++++++++++++++---------
arch/x86/kvm/trace.h | 165 +++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 23 ++++++
include/linux/kvm_host.h | 1 +
5 files changed, 252 insertions(+), 36 deletions(-)
shortlog:
Alexander Graf (1):
KVM: SVM: Notify nested hypervisor of lost event injections
Joerg Roedel (9):
KVM: X86: Add KVM_REQ_VMEXIT to trigger a nested #vmexit
KVM: SVM: Move nested INTR #vmexit into preemtible code
KVM: SVM: Add tracepoint for nested vmrun
KVM: SVM: Add tracepoint for nested #vmexit
KVM: SVM: Add tracepoint for injected #vmexit
KVM: SVM: Add tracepoint for #vmexit because intr pending
KVM: SVM: Add tracepoint for invlpga instruction
KVM: SVM: Add tracepoint for skinit instruction
KVM: SVM: Remove nsvm_printk debugging code
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
If event_inj is valid on a #vmexit the host CPU would write
the contents to exit_int_info, so the hypervisor knows that
the event wasn't injected.
We don't do this in nested SVM by now which is a bug and
fixed by this patch.
Signed-off-by: Alexander Graf <ag...@suse.de>
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 16 ++++++++++++++++
1 files changed, 16 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 279a2ae..b6ce1a9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1615,6 +1615,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
+
+ /*
+ * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
+ * to make sure that we do not lose injected events. So check event_inj
+ * here and copy it to exit_int_info if it is valid.
+ * Exit_int_info and event_inj can't be both valid because the case
+ * below case only happens on a VMRUN instruction intercept which has
+ * no valid exit_int_info set.
+ */
+ if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
+ struct vmcb_control_area *nc = &nested_vmcb->control;
+
+ nc->exit_int_info = vmcb->control.event_inj;
+ nc->exit_int_info_err = vmcb->control.event_inj_err;
+ }
+
nested_vmcb->control.tlb_ctl = 0;
nested_vmcb->control.event_inj = 0;
nested_vmcb->control.event_inj_err = 0;
--
1.6.4.3
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 18 ++++++++++++++++--
1 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b6ce1a9..7015680 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1379,8 +1379,14 @@ static inline int nested_svm_intr(struct vcpu_svm *svm)
svm->vmcb->control.exit_code = SVM_EXIT_INTR;
- if (nested_svm_exit_handled(svm)) {
- nsvm_printk("VMexit -> INTR\n");
+ if (svm->nested.intercept & 1ULL) {
+ /*
+ * The #vmexit can't be emulated here directly because this
+ * code path runs with irqs and preemtion disabled and a
+ * #vmexit emulation might sleep. Only set the request bit for
+ * the #vmexit here.
+ */
+ set_bit(KVM_REQ_VMEXIT, &svm->vcpu.requests);
return 1;
}
@@ -2859,6 +2865,13 @@ static bool svm_gb_page_enable(void)
return true;
}
+static void svm_emulate_vmexit(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ nested_svm_vmexit(svm);
+}
+
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -2923,6 +2936,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.exit_reasons_str = svm_exit_reasons_str,
.gb_page_enable = svm_gb_page_enable,
+ .emulate_vmexit = svm_emulate_vmexit,
};
static int __init svm_init(void)
--
1.6.4.3
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 6 ++++++
arch/x86/kvm/trace.h | 36 ++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 43 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8de84be..e759732 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2355,6 +2355,12 @@ static int handle_exit(struct kvm_vcpu *vcpu)
if (is_nested(svm)) {
int vmexit;
+ trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
+ svm->vmcb->control.exit_info_1,
+ svm->vmcb->control.exit_info_2,
+ svm->vmcb->control.exit_int_info,
+ svm->vmcb->control.exit_int_info_err);
+
nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
exit_code, svm->vmcb->control.exit_info_1,
svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index d63272c..a0b89c3 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -382,6 +382,42 @@ TRACE_EVENT(kvm_nested_vmrun,
__entry->npt ? "on" : "off")
);
+/*
+ * Tracepoint for #VMEXIT while nested
+ */
+TRACE_EVENT(kvm_nested_vmexit,
+ TP_PROTO(__u64 rip, __u32 exit_code,
+ __u64 exit_info1, __u64 exit_info2,
+ __u32 exit_int_info, __u32 exit_int_info_err),
+ TP_ARGS(rip, exit_code, exit_info1, exit_info2,
+ exit_int_info, exit_int_info_err),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( __u32, exit_code )
+ __field( __u64, exit_info1 )
+ __field( __u64, exit_info2 )
+ __field( __u32, exit_int_info )
+ __field( __u32, exit_int_info_err )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->exit_code = exit_code;
+ __entry->exit_info1 = exit_info1;
+ __entry->exit_info2 = exit_info2;
+ __entry->exit_int_info = exit_int_info;
+ __entry->exit_int_info_err = exit_int_info_err;
+ ),
+ TP_printk("rip=0x%016llx reason=%s ext_inf1=0x%016llx "
+ "ext_inf2=0x%016llx ext_int=0x%08x ext_int_err=0x%08x\n",
+ __entry->rip,
+ ftrace_print_symbols_seq(p, __entry->exit_code,
+ kvm_x86_ops->exit_reasons_str),
+ __entry->exit_info1, __entry->exit_info2,
+ __entry->exit_int_info, __entry->exit_int_info_err)
+);
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b51a824..416282e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4998,3 +4998,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
--
1.6.4.3
What if you keep this internal to SVM? Proceed to svm_vcpu_run and
return, do the emulation on the exit handler.
Then there's no need for the request bit (VMX does that, see
vmx_vcpu_run).
Yeah, right. This would be cleaner code. I will change it.
Joerg
this series of patches contains bugfixes for the Nested SVM code and the
conversion of Nested SVM debugging to tracepoints. The fixes are:
1) A patch Alex already sent (1/9) but which was not yet
applied. It fixes a lost event_inj problem when we emulate
a vmrun and a vmexit without entering the guest in
the meantime.
2) The patch 2/9 fixes a schedule() while atomic bug in the
Nested SVM code. The KVM interrupt injection code runs
with preemtion and interrupts disabled. But the
enable_irq_window() function from SVM may emulate a
#vmexit. This emulation migth sleep which causes the
schedule() while atomic() bug.
These fixes (patches 1 and 2) should also be considered for -stable
backporting.
The patches 3 to 8 convert the old printk based debugging for Nested SVM
to tracepoints. Patch 9 removes the nsvm_printk code. Please review
and/or consider to apply these changes.
Thanks,
Joerg
Changes to v1:
* Fixed typo on comment in patch
"KVM: SVM: Notify nested hypervisor of lost event injections"
* Made the fix for the schedule()-while-atomic bug out of the generic
code. It touches only SVM code now.
diffstat:
arch/x86/kvm/svm.c | 107 +++++++++++++++++++++-----------
arch/x86/kvm/trace.h | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 6 ++
3 files changed, 242 insertions(+), 36 deletions(-)
shortlog:
Alexander Graf (1):
KVM: SVM: Notify nested hypervisor of lost event injections
Joerg Roedel (8):
KVM: SVM: Move INTR vmexit out of atomic code
KVM: SVM: Add tracepoint for nested vmrun
KVM: SVM: Add tracepoint for nested #vmexit
KVM: SVM: Add tracepoint for injected #vmexit
KVM: SVM: Add tracepoint for #vmexit because intr pending
KVM: SVM: Add tracepoint for invlpga instruction
KVM: SVM: Add tracepoint for skinit instruction
KVM: SVM: Remove nsvm_printk debugging code
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 6 ++++++
arch/x86/kvm/trace.h | 33 +++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 40 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 884bffc..907af3f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1726,6 +1726,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
/* nested_vmcb is our indicator if nested SVM is activated */
svm->nested.vmcb = svm->vmcb->save.rax;
+ trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
+ nested_vmcb->save.rip,
+ nested_vmcb->control.int_ctl,
+ nested_vmcb->control.event_inj,
+ nested_vmcb->control.nested_ctl);
+
/* Clear internal status */
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 0d480e7..d63272c 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -349,6 +349,39 @@ TRACE_EVENT(kvm_apic_accept_irq,
__entry->coalesced ? " (coalesced)" : "")
);
+/*
+ * Tracepoint for nested VMRUN
+ */
+TRACE_EVENT(kvm_nested_vmrun,
+ TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl,
+ __u32 event_inj, bool npt),
+ TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( __u64, vmcb )
+ __field( __u64, nested_rip )
+ __field( __u32, int_ctl )
+ __field( __u32, event_inj )
+ __field( bool, npt )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->vmcb = vmcb;
+ __entry->nested_rip = nested_rip;
+ __entry->int_ctl = int_ctl;
+ __entry->event_inj = event_inj;
+ __entry->npt = npt;
+ ),
+
+ TP_printk("rip=0x%016llx vmcb=0x%016llx nrip=0x%016llx int_ctl=0x%08x "
+ "event_inj=0x%08x npt=%s\n",
+ __entry->rip, __entry->vmcb, __entry->nested_rip,
+ __entry->int_ctl, __entry->event_inj,
+ __entry->npt ? "on" : "off")
+);
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 11a6f2f..f1e44e9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4980,3 +4980,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
--
1.6.4.3
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 26 +++++++++++++++++++++++++-
1 files changed, 25 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e372854..884bffc 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -85,6 +85,9 @@ struct nested_state {
/* gpa pointers to the real vectors */
u64 vmcb_msrpm;
+ /* A VMEXIT is required but not yet emulated */
+ bool exit_required;
+
/* cache for intercepts of the guest */
u16 intercept_cr_read;
u16 intercept_cr_write;
@@ -1379,7 +1382,14 @@ static inline int nested_svm_intr(struct vcpu_svm *svm)
svm->vmcb->control.exit_code = SVM_EXIT_INTR;
- if (nested_svm_exit_handled(svm)) {
+ if (svm->nested.intercept & 1ULL) {
+ /*
+ * The #vmexit can't be emulated here directly because this
+ * code path runs with irqs and preemtion disabled. A
+ * #vmexit emulation might sleep. Only signal request for
+ * the #vmexit here.
+ */
+ svm->nested.exit_required = true;
nsvm_printk("VMexit -> INTR\n");
return 1;
}
@@ -2340,6 +2350,13 @@ static int handle_exit(struct kvm_vcpu *vcpu)
trace_kvm_exit(exit_code, svm->vmcb->save.rip);
+ if (unlikely(svm->nested.exit_required)) {
+ nested_svm_vmexit(svm);
+ svm->nested.exit_required = false;
+
+ return 1;
+ }
+
if (is_nested(svm)) {
int vmexit;
@@ -2615,6 +2632,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
u16 gs_selector;
u16 ldt_selector;
+ /*
+ * A vmexit emulation is required before the vcpu can be executed
+ * again.
+ */
+ if (unlikely(svm->nested.exit_required))
+ return;
+
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
--
1.6.4.3
It's better to pass only 'svm' as argument and have the tracepoint code
derive everything else, since (I think) argument setup is done
unconditionally, and only the actual trace_kvm call is patched out. It
may not work out due to where the trace code is compiled, but it's worth
trying.
--
error compiling committee.c: too many arguments to function
Hmm, struct vcpu_svm is defined in svm.c and local to that file. It is
not known in x86.c, where the tracepoints are compiled, or in svm.c
where trace.h is included. Is this tracepoint it worth it to move the
definition of vcpu_svm into a (x86-)global header?
Joerg
I was talking about all svm tracepoints, but no, it isn't worth it.
Let's leave it till later.
--
error compiling committee.c: too many arguments to function
--
this series of patches contains bugfixes for the Nested SVM code and the
conversion of Nested SVM debugging to tracepoints. The fixes are:
1) A patch Alex already sent (1/9) but which was not yet
applied. It fixes a lost event_inj problem when we emulate a
vmrun and a vmexit without entering the guest in the
meantime.
2) The patch 2/9 fixes a schedule() while atomic bug in the
Nested SVM code. The KVM interrupt injection code runs with
preemtion and interrupts disabled. But the
enable_irq_window() function from SVM may emulate a #vmexit.
This emulation might sleep which causes the schedule() while
atomic() bug.
These fixes (patches 1 and 2) should also be considered for -stable
backporting. The patches 3 to 8 convert the old printk based debugging
for Nested SVM to tracepoints. Patch 9 removes the nsvm_printk code.
Please review and/or consider to apply these changes.
Thanks,
Joerg
Changes to v2:
* Fixed typo in trace_printk message for invlpga
* Converted "key=value" strings into "key: value"
Changes to v1:
* Fixed typo on comment in patch
"KVM: SVM: Notify nested hypervisor of lost event injections"
* Made the fix for the schedule()-while-atomic bug out of the generic
code. It touches only SVM code now.
Diffstat:
arch/x86/kvm/svm.c | 107 +++++++++++++++++++++-----------
arch/x86/kvm/trace.h | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 6 ++
3 files changed, 242 insertions(+), 36 deletions(-)
Shortlog:
Alexander Graf (1):
KVM: SVM: Notify nested hypervisor of lost event injections
Joerg Roedel (8):
KVM: SVM: Move INTR vmexit out of atomic code
KVM: SVM: Add tracepoint for nested vmrun
KVM: SVM: Add tracepoint for nested #vmexit
KVM: SVM: Add tracepoint for injected #vmexit
KVM: SVM: Add tracepoint for #vmexit because intr pending
KVM: SVM: Add tracepoint for invlpga instruction
KVM: SVM: Add tracepoint for skinit instruction
KVM: SVM: Remove nsvm_printk debugging code
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 2 +-
arch/x86/kvm/trace.h | 18 ++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 20 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 369eeb8..78a391c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1390,7 +1390,7 @@ static inline int nested_svm_intr(struct vcpu_svm *svm)
* the #vmexit here.
*/
svm->nested.exit_required = true;
- nsvm_printk("VMexit -> INTR\n");
+ trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
return 1;
}
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 4d6bb5e..3cc8f44 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -451,6 +451,24 @@ TRACE_EVENT(kvm_nested_vmexit_inject,
__entry->exit_info1, __entry->exit_info2,
__entry->exit_int_info, __entry->exit_int_info_err)
);
+
+/*
+ * Tracepoint for nested #vmexit because of interrupt pending
+ */
+TRACE_EVENT(kvm_nested_intr_vmexit,
+ TP_PROTO(__u64 rip),
+ TP_ARGS(rip),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip
+ ),
+
+ TP_printk("rip: 0x%016llx\n", __entry->rip)
+);
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4f90d45..877f910 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4983,3 +4983,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
--
1.6.4.3
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 3 +++
arch/x86/kvm/trace.h | 23 +++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 27 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 78a391c..ba18fb7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1976,6 +1976,9 @@ static int invlpga_interception(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
nsvm_printk("INVLPGA\n");
+ trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
+ vcpu->arch.regs[VCPU_REGS_RAX]);
+
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 3cc8f44..7e1f08e 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -469,6 +469,29 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
TP_printk("rip: 0x%016llx\n", __entry->rip)
);
+
+/*
+ * Tracepoint for nested #vmexit because of interrupt pending
+ */
+TRACE_EVENT(kvm_invlpga,
+ TP_PROTO(__u64 rip, int asid, u64 address),
+ TP_ARGS(rip, asid, address),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( int, asid )
+ __field( __u64, address )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->asid = asid;
+ __entry->address = address;
+ ),
+
+ TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n",
+ __entry->rip, __entry->asid, __entry->address)
+);
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 877f910..1153d92 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4984,3 +4984,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
--
1.6.4.3
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 6 ++++++
arch/x86/kvm/trace.h | 36 ++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 43 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 907af3f..edf6e8b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2366,6 +2366,12 @@ static int handle_exit(struct kvm_vcpu *vcpu)
if (is_nested(svm)) {
int vmexit;
+ trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
+ svm->vmcb->control.exit_info_1,
+ svm->vmcb->control.exit_info_2,
+ svm->vmcb->control.exit_int_info,
+ svm->vmcb->control.exit_int_info_err);
+
nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
exit_code, svm->vmcb->control.exit_info_1,
svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index b5798e1..a7eb629 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -382,6 +382,42 @@ TRACE_EVENT(kvm_nested_vmrun,
__entry->npt ? "on" : "off")
);
+/*
+ * Tracepoint for #VMEXIT while nested
+ */
+TRACE_EVENT(kvm_nested_vmexit,
+ TP_PROTO(__u64 rip, __u32 exit_code,
+ __u64 exit_info1, __u64 exit_info2,
+ __u32 exit_int_info, __u32 exit_int_info_err),
+ TP_ARGS(rip, exit_code, exit_info1, exit_info2,
+ exit_int_info, exit_int_info_err),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( __u32, exit_code )
+ __field( __u64, exit_info1 )
+ __field( __u64, exit_info2 )
+ __field( __u32, exit_int_info )
+ __field( __u32, exit_int_info_err )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->exit_code = exit_code;
+ __entry->exit_info1 = exit_info1;
+ __entry->exit_info2 = exit_info2;
+ __entry->exit_int_info = exit_int_info;
+ __entry->exit_int_info_err = exit_int_info_err;
+ ),
+ TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx "
+ "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n",
+ __entry->rip,
+ ftrace_print_symbols_seq(p, __entry->exit_code,
+ kvm_x86_ops->exit_reasons_str),
+ __entry->exit_info1, __entry->exit_info2,
+ __entry->exit_int_info, __entry->exit_int_info_err)
+);
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f1e44e9..00c8b60 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4981,3 +4981,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
--
1.6.4.3
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 26 +++++++++++++++++++++++++-
1 files changed, 25 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e372854..884bffc 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -85,6 +85,9 @@ struct nested_state {
/* gpa pointers to the real vectors */
u64 vmcb_msrpm;
+ /* A VMEXIT is required but not yet emulated */
+ bool exit_required;
+
/* cache for intercepts of the guest */
u16 intercept_cr_read;
u16 intercept_cr_write;
@@ -1379,7 +1382,14 @@ static inline int nested_svm_intr(struct vcpu_svm *svm)
svm->vmcb->control.exit_code = SVM_EXIT_INTR;
- if (nested_svm_exit_handled(svm)) {
+ if (svm->nested.intercept & 1ULL) {
+ /*
+ * The #vmexit can't be emulated here directly because this
+ * code path runs with irqs and preemtion disabled. A
+ * #vmexit emulation might sleep. Only signal request for
+ * the #vmexit here.
+ */
+ svm->nested.exit_required = true;
nsvm_printk("VMexit -> INTR\n");
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 34 ----------------------------------
1 files changed, 0 insertions(+), 34 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8b9f6fb..69610c5 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -53,15 +53,6 @@ MODULE_LICENSE("GPL");
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
-/* Turn on to get debugging output*/
-/* #define NESTED_DEBUG */
-
-#ifdef NESTED_DEBUG
-#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
-#else
-#define nsvm_printk(fmt, args...) do {} while(0)
-#endif
-
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
@@ -1540,14 +1531,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
}
default: {
u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
- nsvm_printk("exit code: 0x%x\n", exit_code);
if (svm->nested.intercept & exit_bits)
vmexit = NESTED_EXIT_DONE;
}
}
if (vmexit == NESTED_EXIT_DONE) {
- nsvm_printk("#VMEXIT reason=%04x\n", exit_code);
nested_svm_vmexit(svm);
}
@@ -1658,10 +1647,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
/* Restore the original control entries */
copy_vmcb_control_area(vmcb, hsave);
- /* Kill any pending exceptions */
- if (svm->vcpu.arch.exception.pending == true)
- nsvm_printk("WARNING: Pending Exception\n");
-
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
@@ -1826,25 +1811,14 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
force_new_asid(&svm->vcpu);
svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
- if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
- nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
- nested_vmcb->control.int_ctl);
- }
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
else
svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
- nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
- nested_vmcb->control.exit_int_info,
- nested_vmcb->control.int_state);
-
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
- if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
- nsvm_printk("Injecting Event: 0x%x\n",
- nested_vmcb->control.event_inj);
svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
@@ -1913,8 +1887,6 @@ static int vmsave_interception(struct vcpu_svm *svm)
static int vmrun_interception(struct vcpu_svm *svm)
{
- nsvm_printk("VMrun\n");
-
if (nested_svm_check_permissions(svm))
return 1;
@@ -1974,7 +1946,6 @@ static int clgi_interception(struct vcpu_svm *svm)
static int invlpga_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- nsvm_printk("INVLPGA\n");
trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
vcpu->arch.regs[VCPU_REGS_RAX]);
@@ -2389,10 +2360,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
svm->vmcb->control.exit_int_info,
svm->vmcb->control.exit_int_info_err);
- nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
- exit_code, svm->vmcb->control.exit_info_1,
- svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
-
vmexit = nested_svm_exit_special(svm);
if (vmexit == NESTED_EXIT_CONTINUE)
@@ -2539,7 +2506,6 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
static void enable_irq_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- nsvm_printk("Trying to open IRQ window\n");
nested_svm_intr(svm);
--
1.6.4.3
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 6 ++++++
arch/x86/kvm/trace.h | 33 +++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 40 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index edf6e8b..369eeb8 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1592,6 +1592,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
+ trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
+ vmcb->control.exit_info_1,
+ vmcb->control.exit_info_2,
+ vmcb->control.exit_int_info,
+ vmcb->control.exit_int_info_err);
+
nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
if (!nested_vmcb)
return 1;
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a7eb629..4d6bb5e 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -418,6 +418,39 @@ TRACE_EVENT(kvm_nested_vmexit,
__entry->exit_int_info, __entry->exit_int_info_err)
);
+/*
+ * Tracepoint for #VMEXIT reinjected to the guest
+ */
+TRACE_EVENT(kvm_nested_vmexit_inject,
+ TP_PROTO(__u32 exit_code,
+ __u64 exit_info1, __u64 exit_info2,
+ __u32 exit_int_info, __u32 exit_int_info_err),
+ TP_ARGS(exit_code, exit_info1, exit_info2,
+ exit_int_info, exit_int_info_err),
+
+ TP_STRUCT__entry(
+ __field( __u32, exit_code )
+ __field( __u64, exit_info1 )
+ __field( __u64, exit_info2 )
+ __field( __u32, exit_int_info )
+ __field( __u32, exit_int_info_err )
+ ),
+
+ TP_fast_assign(
+ __entry->exit_code = exit_code;
+ __entry->exit_info1 = exit_info1;
+ __entry->exit_info2 = exit_info2;
+ __entry->exit_int_info = exit_int_info;
+ __entry->exit_int_info_err = exit_int_info_err;
+ ),
+
+ TP_printk("reason: %s ext_inf1: 0x%016llx "
+ "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n",
+ ftrace_print_symbols_seq(p, __entry->exit_code,
+ kvm_x86_ops->exit_reasons_str),
+ __entry->exit_info1, __entry->exit_info2,
+ __entry->exit_int_info, __entry->exit_int_info_err)
+);
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00c8b60..4f90d45 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4982,3 +4982,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
--
1.6.4.3
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 6 ++++++
arch/x86/kvm/trace.h | 33 +++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 40 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 884bffc..907af3f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1726,6 +1726,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
/* nested_vmcb is our indicator if nested SVM is activated */
svm->nested.vmcb = svm->vmcb->save.rax;
+ trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
+ nested_vmcb->save.rip,
+ nested_vmcb->control.int_ctl,
+ nested_vmcb->control.event_inj,
+ nested_vmcb->control.nested_ctl);
+
/* Clear internal status */
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 0d480e7..b5798e1 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -349,6 +349,39 @@ TRACE_EVENT(kvm_apic_accept_irq,
__entry->coalesced ? " (coalesced)" : "")
);
+/*
+ * Tracepoint for nested VMRUN
+ */
+TRACE_EVENT(kvm_nested_vmrun,
+ TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl,
+ __u32 event_inj, bool npt),
+ TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( __u64, vmcb )
+ __field( __u64, nested_rip )
+ __field( __u32, int_ctl )
+ __field( __u32, event_inj )
+ __field( bool, npt )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->vmcb = vmcb;
+ __entry->nested_rip = nested_rip;
+ __entry->int_ctl = int_ctl;
+ __entry->event_inj = event_inj;
+ __entry->npt = npt;
+ ),
+
+ TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x "
+ "event_inj: 0x%08x npt: %s\n",
+ __entry->rip, __entry->vmcb, __entry->nested_rip,
+ __entry->int_ctl, __entry->event_inj,
+ __entry->npt ? "on" : "off")
+);
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 11a6f2f..f1e44e9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4980,3 +4980,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
--
1.6.4.3