Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/x86.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2c24cb5..31d44c1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1112,6 +1112,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
break;
case MSR_K7_HWCR:
data &= ~(u64)0x40; /* ignore flush filter disable */
+ data &= ~(u64)0x100; /* ignore ignne emulation enable */
if (data != 0) {
pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
data);
--
1.7.0
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 8 +++++---
1 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 217b8b0..b821b2f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1871,10 +1871,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
if (npt_enabled) {
svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
- } else {
+ } else
kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
- kvm_mmu_reset_context(&svm->vcpu);
- }
+
+ /* Guest paging mode is active - reset mmu */
+ kvm_mmu_reset_context(&svm->vcpu);
+
svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 5 +++++
arch/x86/kvm/trace.h | 22 ++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 28 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b048c2b..6b7590a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1838,6 +1838,11 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
nested_vmcb->control.event_inj,
nested_vmcb->control.nested_ctl);
+ trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read,
+ nested_vmcb->control.intercept_cr_write,
+ nested_vmcb->control.intercept_exceptions,
+ nested_vmcb->control.intercept);
+
/* Clear internal status */
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 12f8d2d..17b52cc 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -419,6 +419,28 @@ TRACE_EVENT(kvm_nested_vmrun,
__entry->npt ? "on" : "off")
);
+TRACE_EVENT(kvm_nested_intercepts,
+ TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u64 intercept),
+ TP_ARGS(cr_read, cr_write, exceptions, intercept),
+
+ TP_STRUCT__entry(
+ __field( __u16, cr_read )
+ __field( __u16, cr_write )
+ __field( __u32, exceptions )
+ __field( __u64, intercept )
+ ),
+
+ TP_fast_assign(
+ __entry->cr_read = cr_read;
+ __entry->cr_write = cr_write;
+ __entry->exceptions = exceptions;
+ __entry->intercept = intercept;
+ ),
+
+ TP_printk("cr_read: %04x cr_write: %04x excp: %08x intercept: %016llx",
+ __entry->cr_read, __entry->cr_write, __entry->exceptions,
+ __entry->intercept)
+);
/*
* Tracepoint for #VMEXIT while nested
*/
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7b436c8..2c24cb5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5924,3 +5924,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9759d9f..b048c2b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1832,7 +1832,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
if (!nested_vmcb)
return false;
- trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
+ trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa,
nested_vmcb->save.rip,
nested_vmcb->control.int_ctl,
nested_vmcb->control.event_inj,
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 22 +++++++++++++++++++---
1 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b821b2f..9759d9f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1480,6 +1480,20 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm)
return true;
}
+/* This function returns true if it is save to enable the nmi window */
+static inline bool nested_svm_nmi(struct vcpu_svm *svm)
+{
+ if (!is_nested(svm))
+ return true;
+
+ if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
+ return true;
+
+ svm->vmcb->control.exit_code = SVM_EXIT_NMI;
+ svm->nested.exit_required = true;
+
+ return false;
+}
static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
{
struct page *page;
@@ -2681,9 +2695,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
* Something prevents NMI from been injected. Single step over possible
* problem (IRET or exception injection or interrupt shadow)
*/
- svm->nmi_singlestep = true;
- svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
- update_db_intercept(vcpu);
+ if (gif_set(svm) && nested_svm_nmi(svm)) {
+ svm->nmi_singlestep = true;
+ svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+ update_db_intercept(vcpu);
+ }
}
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
1. Host KVM intercepts cr0 writes
2. Guest hypervisor intercepts only selective cr0 writes
Then we get an cr0 write intercept which is handled on the
host. But that intercepts may actually be a selective cr0
intercept for the guest. This patch checks for this
condition and injects a selective cr0 intercept if needed.
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 21 +++++++++++++++++++++
1 files changed, 21 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2450a7c..22654de 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1037,6 +1037,27 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ if (is_nested(svm)) {
+ /*
+ * We are here because we run in nested mode, the host kvm
+ * intercepts cr0 writes but the l1 hypervisor does not.
+ * But the L1 hypervisor may intercept selective cr0 writes.
+ * This needs to be checked here.
+ */
+ unsigned long old, new;
+
+ /* Remove bits that would trigger a real cr0 write intercept */
+ old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK;
+ new = cr0 & SVM_CR0_SELECTIVE_MASK;
+
+ if (old == new) {
+ /* cr0 write with ts and mp unchanged */
+ svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
+ if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE)
+ return;
+ }
+ }
+
#ifdef CONFIG_X86_64
if (vcpu->arch.efer & EFER_LME) {
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
Joerg
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 45 ++++++++++++++++++++++++++++++++++++---------
1 files changed, 36 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ae2f211..eb25fea 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -754,6 +754,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
svm->nested.hsave = page_address(hsave_page);
svm->nested.msrpm = page_address(nested_msrpm_pages);
+ svm_vcpu_init_msrpm(svm->nested.msrpm);
svm->vmcb = page_address(page);
clear_page(svm->vmcb);
@@ -1824,20 +1825,46 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
{
- u32 *nested_msrpm;
- struct page *page;
+ /*
+ * This function merges the msr permission bitmaps of kvm and the
+ * nested vmcb. It is omptimized in that it only merges the parts where
+ * the kvm msr permission bitmap may contain zero bits
+ */
+ static const u32 msrpm_offsets[] = {
+ 0x0000002c, /* SYSENTER_CS */
+
+ 0x00000038, /* LASTBRANCHFROMIP
+ LASTBRANCHTOIP
+ LASTINTFROMIP
+ LASTINTTOIP */
+
+ 0x00000820, /* STAR
+ LSTAR
+ CSTAR
+ SYSCALL_MASK */
+
+ 0x00000840, /* FS_BASE
+ GS_BASE
+ KERNEL_GS_BASE */
+
+ 0xffffffff, /* End of List */
+ };
int i;
- nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, &page);
- if (!nested_msrpm)
- return false;
+ for (i = 0; msrpm_offsets[i] != 0xffffffff; i++) {
+ u32 value, p;
+ u64 offset;
- for (i = 0; i < PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
- svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
+ offset = svm->nested.vmcb_msrpm + msrpm_offsets[i];
+ p = msrpm_offsets[i] / 4;
- svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
+ if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
+ return false;
- nested_svm_unmap(page);
+ svm->nested.msrpm[p] = svm->msrpm[p] | value;
+ }
+
+ svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
return true;
}
--
1.7.0
Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/svm.c | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 22654de..ae2f211 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1485,7 +1485,9 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm)
if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
return false;
- svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+ svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+ svm->vmcb->control.exit_info_1 = 0;
+ svm->vmcb->control.exit_info_2 = 0;
if (svm->nested.intercept & 1ULL) {
/*
> This patch optimizes the way the msrpm of the host and the
> guest are merged. The old code merged the 2 msrpm pages
> completly. This code needed to touch 24kb of memory for that
> operation. The optimized variant this patch introduces
> merges only the parts where the host msrpm may contain zero
> bits. This reduces the amount of memory which is touched to
> 48 bytes.
Nice catch!
Isn't there such a list around somewhere already? We really should only keep this list once throughout the whole code. If necessary, just create the list on the fly when bits get set in the msrpm.
Alex
No, the list is hardcoded in 3 functions (as parameter of
set_msr_interception). I think about a variant to do this with a single
list. Probably I create a list of MSRs and check in
set_msr_interceptionm for it.
Joerg
Or, have set_msr_interception() create the list of offsets.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
> On 02/24/2010 09:37 PM, Joerg Roedel wrote:
>>
>>> Isn't there such a list around somewhere already? We really should
>>> only keep this list once throughout the whole code. If necessary, just
>>> create the list on the fly when bits get set in the msrpm.
>>>
>> No, the list is hardcoded in 3 functions (as parameter of
>> set_msr_interception). I think about a variant to do this with a single
>> list. Probably I create a list of MSRs and check in
>> set_msr_interceptionm for it.
>>
>>
>
> Or, have set_msr_interception() create the list of offsets.
Or even better yet put the list into BSS because it should be static anyways. From that the real MSRPM and the merging function can be fed.
Alex--
Yup.
In fact, the real MSRPM can be generated by merging the list into an
empty bitmap.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
--