Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 10 +++++++++-
arch/x86/kvm/trace.h | 22 ++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 32 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 563ddd3..5082558 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1984,6 +1984,14 @@ static int invlpga_interception(struct vcpu_svm *svm)
return 1;
}
+static int skinit_interception(struct vcpu_svm *svm)
+{
+ trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
+
+ kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ return 1;
+}
+
static int invalid_op_interception(struct vcpu_svm *svm)
{
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
@@ -2347,7 +2355,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_VMSAVE] = vmsave_interception,
[SVM_EXIT_STGI] = stgi_interception,
[SVM_EXIT_CLGI] = clgi_interception,
- [SVM_EXIT_SKINIT] = invalid_op_interception,
+ [SVM_EXIT_SKINIT] = skinit_interception,
[SVM_EXIT_WBINVD] = emulate_on_interception,
[SVM_EXIT_MONITOR] = invalid_op_interception,
[SVM_EXIT_MWAIT] = invalid_op_interception,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a6dcd2d..7948b49 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -492,6 +492,28 @@ TRACE_EVENT(kvm_invlpga,
TP_printk("rip=0x%016llx asid=%d adress=0x%016llx\n",
__entry->rip, __entry->asid, __entry->address)
);
+
+/*
+ * Tracepoint for nested #vmexit because of interrupt pending
+ */
+TRACE_EVENT(kvm_skinit,
+ TP_PROTO(__u64 rip, __u32 slb),
+ TP_ARGS(rip, slb),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( __u32, slb )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->slb = slb;
+ ),
+
+ TP_printk("rip=0x%016llx slb=0x%08x\n",
+ __entry->rip, __entry->slb)
+);
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 18284e1..8043d76 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5002,3 +5002,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
--
1.6.4.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/x86.c | 17 +++++++++++++++++
include/linux/kvm_host.h | 1 +
3 files changed, 19 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 179a919..50e5aa4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -524,6 +524,7 @@ struct kvm_x86_ops {
int (*get_tdp_level)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
bool (*gb_page_enable)(void);
+ void (*emulate_vmexit)(struct kvm_vcpu *vcpu);
const struct trace_print_flags *exit_reasons_str;
};
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 11a6f2f..97e1d9d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3610,6 +3610,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
r = 0;
goto out;
}
+ if (test_and_clear_bit(KVM_REQ_VMEXIT, &vcpu->requests))
+ kvm_x86_ops->emulate_vmexit(vcpu);
}
preempt_disable();
@@ -3638,6 +3640,21 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
kvm_x86_ops->enable_irq_window(vcpu);
+ /*
+ * With nested KVM the enable_irq_window() function may cause an
+ * #vmexit if the vcpu is running in guest mode. A #vmexit may sleep
+ * and can't be executed at this stage. So we use the request field to
+ * tell KVM that a #vmexit has to be done before we can enter the guest
+ * again. The code below checks for this request.
+ */
+ if (vcpu->requests) {
+ local_irq_enable();
+ preempt_enable();
+ r = 1;
+ goto out;
+ }
+
+
if (kvm_lapic_enabled(vcpu)) {
update_cr8_intercept(vcpu);
kvm_lapic_sync_to_vapic(vcpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b985a29..245463f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -38,6 +38,7 @@
#define KVM_REQ_MMU_SYNC 7
#define KVM_REQ_KVMCLOCK_UPDATE 8
#define KVM_REQ_KICK 9
+#define KVM_REQ_VMEXIT 10
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 1 +
arch/x86/kvm/trace.h | 18 ++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 20 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4de75e3..a03dba1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1387,6 +1387,7 @@ static inline int nested_svm_intr(struct vcpu_svm *svm)
* the #vmexit here.
*/
set_bit(KVM_REQ_VMEXIT, &svm->vcpu.requests);
+ trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
return 1;
}
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a00b235..20248a1 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -451,6 +451,24 @@ TRACE_EVENT(kvm_nested_vmexit_inject,
__entry->exit_info1, __entry->exit_info2,
__entry->exit_int_info, __entry->exit_int_info_err)
);
+
+/*
+ * Tracepoint for nested #vmexit because of interrupt pending
+ */
+TRACE_EVENT(kvm_nested_intr_vmexit,
+ TP_PROTO(__u64 rip),
+ TP_ARGS(rip),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip
+ ),
+
+ TP_printk("rip=0x%016llx\n", __entry->rip)
+);
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c7ea2d8..e0a8517 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5000,3 +5000,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 34 ----------------------------------
1 files changed, 0 insertions(+), 34 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 5082558..acdb8d8 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -53,15 +53,6 @@ MODULE_LICENSE("GPL");
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
-/* Turn on to get debugging output*/
-/* #define NESTED_DEBUG */
-
-#ifdef NESTED_DEBUG
-#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
-#else
-#define nsvm_printk(fmt, args...) do {} while(0)
-#endif
-
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
@@ -1537,14 +1528,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
}
default: {
u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
- nsvm_printk("exit code: 0x%x\n", exit_code);
if (svm->nested.intercept & exit_bits)
vmexit = NESTED_EXIT_DONE;
}
}
if (vmexit == NESTED_EXIT_DONE) {
- nsvm_printk("#VMEXIT reason=%04x\n", exit_code);
nested_svm_vmexit(svm);
}
@@ -1655,10 +1644,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
/* Restore the original control entries */
copy_vmcb_control_area(vmcb, hsave);
- /* Kill any pending exceptions */
- if (svm->vcpu.arch.exception.pending == true)
- nsvm_printk("WARNING: Pending Exception\n");
-
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
@@ -1823,25 +1808,14 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
force_new_asid(&svm->vcpu);
svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
- if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
- nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
- nested_vmcb->control.int_ctl);
- }
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
else
svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
- nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
- nested_vmcb->control.exit_int_info,
- nested_vmcb->control.int_state);
-
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
- if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
- nsvm_printk("Injecting Event: 0x%x\n",
- nested_vmcb->control.event_inj);
svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
@@ -1910,8 +1884,6 @@ static int vmsave_interception(struct vcpu_svm *svm)
static int vmrun_interception(struct vcpu_svm *svm)
{
- nsvm_printk("VMrun\n");
-
if (nested_svm_check_permissions(svm))
return 1;
@@ -1971,7 +1943,6 @@ static int clgi_interception(struct vcpu_svm *svm)
static int invlpga_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- nsvm_printk("INVLPGA\n");
trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
vcpu->arch.regs[VCPU_REGS_RAX]);
@@ -2379,10 +2350,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
svm->vmcb->control.exit_int_info,
svm->vmcb->control.exit_int_info_err);
- nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
- exit_code, svm->vmcb->control.exit_info_1,
- svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
-
vmexit = nested_svm_exit_special(svm);
if (vmexit == NESTED_EXIT_CONTINUE)
@@ -2529,7 +2496,6 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
static void enable_irq_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- nsvm_printk("Trying to open IRQ window\n");
nested_svm_intr(svm);
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 6 ++++++
arch/x86/kvm/trace.h | 33 +++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 40 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e759732..4de75e3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1588,6 +1588,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
+ trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
+ vmcb->control.exit_info_1,
+ vmcb->control.exit_info_2,
+ vmcb->control.exit_int_info,
+ vmcb->control.exit_int_info_err);
+
nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
if (!nested_vmcb)
return 1;
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a0b89c3..a00b235 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -418,6 +418,39 @@ TRACE_EVENT(kvm_nested_vmexit,
__entry->exit_int_info, __entry->exit_int_info_err)
);
+/*
+ * Tracepoint for #VMEXIT reinjected to the guest
+ */
+TRACE_EVENT(kvm_nested_vmexit_inject,
+ TP_PROTO(__u32 exit_code,
+ __u64 exit_info1, __u64 exit_info2,
+ __u32 exit_int_info, __u32 exit_int_info_err),
+ TP_ARGS(exit_code, exit_info1, exit_info2,
+ exit_int_info, exit_int_info_err),
+
+ TP_STRUCT__entry(
+ __field( __u32, exit_code )
+ __field( __u64, exit_info1 )
+ __field( __u64, exit_info2 )
+ __field( __u32, exit_int_info )
+ __field( __u32, exit_int_info_err )
+ ),
+
+ TP_fast_assign(
+ __entry->exit_code = exit_code;
+ __entry->exit_info1 = exit_info1;
+ __entry->exit_info2 = exit_info2;
+ __entry->exit_int_info = exit_int_info;
+ __entry->exit_int_info_err = exit_int_info_err;
+ ),
+
+ TP_printk("reason=%s ext_inf1=0x%016llx "
+ "ext_inf2=0x%016llx ext_int=0x%08x ext_int_err=0x%08x\n",
+ ftrace_print_symbols_seq(p, __entry->exit_code,
+ kvm_x86_ops->exit_reasons_str),
+ __entry->exit_info1, __entry->exit_info2,
+ __entry->exit_int_info, __entry->exit_int_info_err)
+);
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 416282e..c7ea2d8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4999,3 +4999,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 3 +++
arch/x86/kvm/trace.h | 23 +++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 27 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a03dba1..563ddd3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1973,6 +1973,9 @@ static int invlpga_interception(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
nsvm_printk("INVLPGA\n");
+ trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
+ vcpu->arch.regs[VCPU_REGS_RAX]);
+
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 20248a1..a6dcd2d 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -469,6 +469,29 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
TP_printk("rip=0x%016llx\n", __entry->rip)
);
+
+/*
+ * Tracepoint for nested #vmexit because of interrupt pending
+ */
+TRACE_EVENT(kvm_invlpga,
+ TP_PROTO(__u64 rip, int asid, u64 address),
+ TP_ARGS(rip, asid, address),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( int, asid )
+ __field( __u64, address )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->asid = asid;
+ __entry->address = address;
+ ),
+
+ TP_printk("rip=0x%016llx asid=%d adress=0x%016llx\n",
+ __entry->rip, __entry->asid, __entry->address)
+);
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e0a8517..18284e1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5001,3 +5001,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 6 ++++++
arch/x86/kvm/trace.h | 33 +++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 40 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7015680..8de84be 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1722,6 +1722,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
/* nested_vmcb is our indicator if nested SVM is activated */
svm->nested.vmcb = svm->vmcb->save.rax;
+ trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
+ nested_vmcb->save.rip,
+ nested_vmcb->control.int_ctl,
+ nested_vmcb->control.event_inj,
+ nested_vmcb->control.nested_ctl);
+
/* Clear internal status */
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 0d480e7..d63272c 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -349,6 +349,39 @@ TRACE_EVENT(kvm_apic_accept_irq,
__entry->coalesced ? " (coalesced)" : "")
);
+/*
+ * Tracepoint for nested VMRUN
+ */
+TRACE_EVENT(kvm_nested_vmrun,
+ TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl,
+ __u32 event_inj, bool npt),
+ TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( __u64, vmcb )
+ __field( __u64, nested_rip )
+ __field( __u32, int_ctl )
+ __field( __u32, event_inj )
+ __field( bool, npt )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->vmcb = vmcb;
+ __entry->nested_rip = nested_rip;
+ __entry->int_ctl = int_ctl;
+ __entry->event_inj = event_inj;
+ __entry->npt = npt;
+ ),
+
+ TP_printk("rip=0x%016llx vmcb=0x%016llx nrip=0x%016llx int_ctl=0x%08x "
+ "event_inj=0x%08x npt=%s\n",
+ __entry->rip, __entry->vmcb, __entry->nested_rip,
+ __entry->int_ctl, __entry->event_inj,
+ __entry->npt ? "on" : "off")
+);
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 97e1d9d..b51a824 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4997,3 +4997,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 2 +-
arch/x86/kvm/trace.h | 18 ++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 20 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 369eeb8..78a391c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1390,7 +1390,7 @@ static inline int nested_svm_intr(struct vcpu_svm *svm)
* the #vmexit here.
*/
svm->nested.exit_required = true;
- nsvm_printk("VMexit -> INTR\n");
+ trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
return 1;
}
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a00b235..20248a1 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -451,6 +451,24 @@ TRACE_EVENT(kvm_nested_vmexit_inject,
__entry->exit_info1, __entry->exit_info2,
__entry->exit_int_info, __entry->exit_int_info_err)
);
+
+/*
+ * Tracepoint for nested #vmexit because of interrupt pending
+ */
+TRACE_EVENT(kvm_nested_intr_vmexit,
+ TP_PROTO(__u64 rip),
+ TP_ARGS(rip),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip
+ ),
+
+ TP_printk("rip=0x%016llx\n", __entry->rip)
+);
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4f90d45..877f910 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4983,3 +4983,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 34 ----------------------------------
1 files changed, 0 insertions(+), 34 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8b9f6fb..69610c5 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -53,15 +53,6 @@ MODULE_LICENSE("GPL");
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
-/* Turn on to get debugging output*/
-/* #define NESTED_DEBUG */
-
-#ifdef NESTED_DEBUG
-#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
-#else
-#define nsvm_printk(fmt, args...) do {} while(0)
-#endif
-
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
@@ -1540,14 +1531,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
}
default: {
u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
- nsvm_printk("exit code: 0x%x\n", exit_code);
if (svm->nested.intercept & exit_bits)
vmexit = NESTED_EXIT_DONE;
}
}
if (vmexit == NESTED_EXIT_DONE) {
- nsvm_printk("#VMEXIT reason=%04x\n", exit_code);
nested_svm_vmexit(svm);
}
@@ -1658,10 +1647,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
/* Restore the original control entries */
copy_vmcb_control_area(vmcb, hsave);
- /* Kill any pending exceptions */
- if (svm->vcpu.arch.exception.pending == true)
- nsvm_printk("WARNING: Pending Exception\n");
-
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
@@ -1826,25 +1811,14 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
force_new_asid(&svm->vcpu);
svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
- if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
- nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
- nested_vmcb->control.int_ctl);
- }
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
else
svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
- nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
- nested_vmcb->control.exit_int_info,
- nested_vmcb->control.int_state);
-
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
- if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
- nsvm_printk("Injecting Event: 0x%x\n",
- nested_vmcb->control.event_inj);
svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
@@ -1913,8 +1887,6 @@ static int vmsave_interception(struct vcpu_svm *svm)
static int vmrun_interception(struct vcpu_svm *svm)
{
- nsvm_printk("VMrun\n");
-
if (nested_svm_check_permissions(svm))
return 1;
@@ -1974,7 +1946,6 @@ static int clgi_interception(struct vcpu_svm *svm)
static int invlpga_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- nsvm_printk("INVLPGA\n");
trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
vcpu->arch.regs[VCPU_REGS_RAX]);
@@ -2389,10 +2360,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
svm->vmcb->control.exit_int_info,
svm->vmcb->control.exit_int_info_err);
- nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
- exit_code, svm->vmcb->control.exit_info_1,
- svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
-
vmexit = nested_svm_exit_special(svm);
if (vmexit == NESTED_EXIT_CONTINUE)
@@ -2539,7 +2506,6 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
static void enable_irq_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- nsvm_printk("Trying to open IRQ window\n");
nested_svm_intr(svm);
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 10 +++++++++-
arch/x86/kvm/trace.h | 22 ++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 32 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ba18fb7..8b9f6fb 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1987,6 +1987,14 @@ static int invlpga_interception(struct vcpu_svm *svm)
return 1;
}
+static int skinit_interception(struct vcpu_svm *svm)
+{
+ trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
+
+ kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ return 1;
+}
+
static int invalid_op_interception(struct vcpu_svm *svm)
{
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
@@ -2350,7 +2358,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_VMSAVE] = vmsave_interception,
[SVM_EXIT_STGI] = stgi_interception,
[SVM_EXIT_CLGI] = clgi_interception,
- [SVM_EXIT_SKINIT] = invalid_op_interception,
+ [SVM_EXIT_SKINIT] = skinit_interception,
[SVM_EXIT_WBINVD] = emulate_on_interception,
[SVM_EXIT_MONITOR] = invalid_op_interception,
[SVM_EXIT_MWAIT] = invalid_op_interception,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a6dcd2d..7948b49 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -492,6 +492,28 @@ TRACE_EVENT(kvm_invlpga,
TP_printk("rip=0x%016llx asid=%d adress=0x%016llx\n",
__entry->rip, __entry->asid, __entry->address)
);
+
+/*
+ * Tracepoint for nested #vmexit because of interrupt pending
+ */
+TRACE_EVENT(kvm_skinit,
+ TP_PROTO(__u64 rip, __u32 slb),
+ TP_ARGS(rip, slb),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( __u32, slb )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->slb = slb;
+ ),
+
+ TP_printk("rip=0x%016llx slb=0x%08x\n",
+ __entry->rip, __entry->slb)
+);
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1153d92..ed4622c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4985,3 +4985,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 3 +++
arch/x86/kvm/trace.h | 23 +++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 27 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 78a391c..ba18fb7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1976,6 +1976,9 @@ static int invlpga_interception(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
nsvm_printk("INVLPGA\n");
+ trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
+ vcpu->arch.regs[VCPU_REGS_RAX]);
+
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 20248a1..a6dcd2d 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -469,6 +469,29 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
TP_printk("rip=0x%016llx\n", __entry->rip)
);
+
+/*
+ * Tracepoint for nested #vmexit because of interrupt pending
+ */
+TRACE_EVENT(kvm_invlpga,
+ TP_PROTO(__u64 rip, int asid, u64 address),
+ TP_ARGS(rip, asid, address),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( int, asid )
+ __field( __u64, address )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->asid = asid;
+ __entry->address = address;
+ ),
+
+ TP_printk("rip=0x%016llx asid=%d adress=0x%016llx\n",
+ __entry->rip, __entry->asid, __entry->address)
+);
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 877f910..1153d92 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4984,3 +4984,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 6 ++++++
arch/x86/kvm/trace.h | 36 ++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 43 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 907af3f..edf6e8b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2366,6 +2366,12 @@ static int handle_exit(struct kvm_vcpu *vcpu)
if (is_nested(svm)) {
int vmexit;
+ trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
+ svm->vmcb->control.exit_info_1,
+ svm->vmcb->control.exit_info_2,
+ svm->vmcb->control.exit_int_info,
+ svm->vmcb->control.exit_int_info_err);
+
nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
exit_code, svm->vmcb->control.exit_info_1,
svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index d63272c..a0b89c3 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -382,6 +382,42 @@ TRACE_EVENT(kvm_nested_vmrun,
__entry->npt ? "on" : "off")
);
+/*
+ * Tracepoint for #VMEXIT while nested
+ */
+TRACE_EVENT(kvm_nested_vmexit,
+ TP_PROTO(__u64 rip, __u32 exit_code,
+ __u64 exit_info1, __u64 exit_info2,
+ __u32 exit_int_info, __u32 exit_int_info_err),
+ TP_ARGS(rip, exit_code, exit_info1, exit_info2,
+ exit_int_info, exit_int_info_err),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( __u32, exit_code )
+ __field( __u64, exit_info1 )
+ __field( __u64, exit_info2 )
+ __field( __u32, exit_int_info )
+ __field( __u32, exit_int_info_err )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->exit_code = exit_code;
+ __entry->exit_info1 = exit_info1;
+ __entry->exit_info2 = exit_info2;
+ __entry->exit_int_info = exit_int_info;
+ __entry->exit_int_info_err = exit_int_info_err;
+ ),
+ TP_printk("rip=0x%016llx reason=%s ext_inf1=0x%016llx "
+ "ext_inf2=0x%016llx ext_int=0x%08x ext_int_err=0x%08x\n",
+ __entry->rip,
+ ftrace_print_symbols_seq(p, __entry->exit_code,
+ kvm_x86_ops->exit_reasons_str),
+ __entry->exit_info1, __entry->exit_info2,
+ __entry->exit_int_info, __entry->exit_int_info_err)
+);
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f1e44e9..00c8b60 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4981,3 +4981,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
If event_inj is valid on a #vmexit the host CPU would write
the contents to exit_int_info, so the hypervisor knows that
the event wasn't injected.
We don't do this in nested SVM by now which is a bug and
fixed by this patch.
Signed-off-by: Alexander Graf <ag...@suse.de>
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 16 ++++++++++++++++
1 files changed, 16 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 279a2ae..e372854 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1615,6 +1615,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
+
+ /*
+ * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
+ * to make sure that we do not lose injected events. So check event_inj
+ * here and copy it to exit_int_info if it is valid.
+ * Exit_int_info and event_inj can't be both valid because the case
+ * below only happens on a VMRUN instruction intercept which has
+ * no valid exit_int_info set.
+ */
+ if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
+ struct vmcb_control_area *nc = &nested_vmcb->control;
+
+ nc->exit_int_info = vmcb->control.event_inj;
+ nc->exit_int_info_err = vmcb->control.event_inj_err;
+ }
+
nested_vmcb->control.tlb_ctl = 0;
nested_vmcb->control.event_inj = 0;
nested_vmcb->control.event_inj_err = 0;
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 6 ++++++
arch/x86/kvm/trace.h | 33 +++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 40 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index edf6e8b..369eeb8 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1592,6 +1592,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
+ trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
+ vmcb->control.exit_info_1,
+ vmcb->control.exit_info_2,
+ vmcb->control.exit_int_info,
+ vmcb->control.exit_int_info_err);
+
nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
if (!nested_vmcb)
return 1;
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a0b89c3..a00b235 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -418,6 +418,39 @@ TRACE_EVENT(kvm_nested_vmexit,
__entry->exit_int_info, __entry->exit_int_info_err)
);
+/*
+ * Tracepoint for #VMEXIT reinjected to the guest
+ */
+TRACE_EVENT(kvm_nested_vmexit_inject,
+ TP_PROTO(__u32 exit_code,
+ __u64 exit_info1, __u64 exit_info2,
+ __u32 exit_int_info, __u32 exit_int_info_err),
+ TP_ARGS(exit_code, exit_info1, exit_info2,
+ exit_int_info, exit_int_info_err),
+
+ TP_STRUCT__entry(
+ __field( __u32, exit_code )
+ __field( __u64, exit_info1 )
+ __field( __u64, exit_info2 )
+ __field( __u32, exit_int_info )
+ __field( __u32, exit_int_info_err )
+ ),
+
+ TP_fast_assign(
+ __entry->exit_code = exit_code;
+ __entry->exit_info1 = exit_info1;
+ __entry->exit_info2 = exit_info2;
+ __entry->exit_int_info = exit_int_info;
+ __entry->exit_int_info_err = exit_int_info_err;
+ ),
+
+ TP_printk("reason=%s ext_inf1=0x%016llx "
+ "ext_inf2=0x%016llx ext_int=0x%08x ext_int_err=0x%08x\n",
+ ftrace_print_symbols_seq(p, __entry->exit_code,
+ kvm_x86_ops->exit_reasons_str),
+ __entry->exit_info1, __entry->exit_info2,
+ __entry->exit_int_info, __entry->exit_int_info_err)
+);
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00c8b60..4f90d45 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4982,3 +4982,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
s/adress/address/.
Also, kvm tracepoints don't use '=' in TP_printk(), please keep it
consistent.
Yeah, I know, but I don't have any real comments to make.
--
error compiling committee.c: too many arguments to function
We need to start thinking about regression tests for these bugs. It
would be relatively easy to set up something with save->cr3 == cr3 (i.e.
no isolation, mmu virtualization, etc.).
--
error compiling committee.c: too many arguments to function
--
I had it with "key: value" formating first but decided to do it this way
because it simplifies automatic parsing of these trace events. With this
format a script can first split by spaces and get the key-value pairs by
splitting on the equal sign. This is also more robust against changes in
the format.
Joerg
Automatic parsing should use the binary transport (which simply has the
TP_struct things).
--
error compiling committee.c: too many arguments to function
--
Isn't it sufficient, for this case, to inject a nested interrupt when
the nested idt is not mapped?
Should be doable with a in-kernel regression test-suite module, I think.
Triggering such (race-condition like) test cases from userspace is
somewhat hard.
Joerg
Ok, in this case I'll change it.
Joerg
(from L1 to L0?)
Wow. Alex, how did you find this?
We can try to cause an interrupt using a signal from another thread, but
that's too difficult as the first test in a test suite.
No. The L1 guest needs to execute VMRUN with an interrupt to inject to
the L2 guest with event_inj. On that VMRUN instruction emulation an
interrupt becomes pending which causes an immediate #vmexit from L2 to
L2 again without even entering the L2 guest. The bug was that in this
case the event which the L1 tried to inject in the L2 was lost because
it was not copied to exit_int_info.
Joerg
Am 08.10.2009 um 18:38 schrieb Avi Kivity <a...@redhat.com>:
> On 10/08/2009 06:32 PM, Joerg Roedel wrote:
>> No. The L1 guest needs to execute VMRUN with an interrupt to inject
>> to
>> the L2 guest with event_inj. On that VMRUN instruction emulation an
>> interrupt becomes pending which causes an immediate #vmexit from L2
>> to
>> L2 again without even entering the L2 guest. The bug was that in this
>> case the event which the L1 tried to inject in the L2 was lost
>> because
>> it was not copied to exit_int_info.
>>
>
> (from L1 to L0?)
>
> Wow. Alex, how did you find this?
Hyper-V got stuck and I was trying to think of possible reasons
looking at the logs :-).
Fortunately this patch also seemed to make things work better with KVM
in KVM.
Doesn't really help with regression testing though...
Alex
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 10 +++++++++-
arch/x86/kvm/trace.h | 22 ++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 32 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ba18fb7..8b9f6fb 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1987,6 +1987,14 @@ static int invlpga_interception(struct vcpu_svm *svm)
return 1;
}
+static int skinit_interception(struct vcpu_svm *svm)
+{
+ trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
+
+ kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ return 1;
+}
+
static int invalid_op_interception(struct vcpu_svm *svm)
{
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
@@ -2350,7 +2358,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_VMSAVE] = vmsave_interception,
[SVM_EXIT_STGI] = stgi_interception,
[SVM_EXIT_CLGI] = clgi_interception,
- [SVM_EXIT_SKINIT] = invalid_op_interception,
+ [SVM_EXIT_SKINIT] = skinit_interception,
[SVM_EXIT_WBINVD] = emulate_on_interception,
[SVM_EXIT_MONITOR] = invalid_op_interception,
[SVM_EXIT_MWAIT] = invalid_op_interception,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 7e1f08e..816e044 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -492,6 +492,28 @@ TRACE_EVENT(kvm_invlpga,
TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n",
__entry->rip, __entry->asid, __entry->address)
);
+
+/*
+ * Tracepoint for nested #vmexit because of interrupt pending
+ */
+TRACE_EVENT(kvm_skinit,
+ TP_PROTO(__u64 rip, __u32 slb),
+ TP_ARGS(rip, slb),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( __u32, slb )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->slb = slb;
+ ),
+
+ TP_printk("rip: 0x%016llx slb: 0x%08x\n",
+ __entry->rip, __entry->slb)
+);
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1153d92..ed4622c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4985,3 +4985,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
--
1.6.4.3
If event_inj is valid on a #vmexit the host CPU would write
the contents to exit_int_info, so the hypervisor knows that
the event wasn't injected.
We don't do this in nested SVM by now which is a bug and
fixed by this patch.
Signed-off-by: Alexander Graf <ag...@suse.de>
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 16 ++++++++++++++++
1 files changed, 16 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 279a2ae..e372854 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1615,6 +1615,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
+
+ /*
+ * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
+ * to make sure that we do not lose injected events. So check event_inj
+ * here and copy it to exit_int_info if it is valid.
+ * Exit_int_info and event_inj can't be both valid because the case
+ * below only happens on a VMRUN instruction intercept which has
+ * no valid exit_int_info set.
+ */
+ if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
+ struct vmcb_control_area *nc = &nested_vmcb->control;
+
+ nc->exit_int_info = vmcb->control.event_inj;
+ nc->exit_int_info_err = vmcb->control.event_inj_err;
+ }
+
nested_vmcb->control.tlb_ctl = 0;
nested_vmcb->control.event_inj = 0;
nested_vmcb->control.event_inj_err = 0;