Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH 07/11] KVM: SVM: Ignore write of hwcr.ignne

2 views
Skip to first unread message

Joerg Roedel

unread,
Feb 24, 2010, 1:00:03 PM2/24/10
to
Hyper-V as a guest wants to write this bit. This patch
ignores it.

Signed-off-by: Joerg Roedel <joerg....@amd.com>
---
arch/x86/kvm/x86.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2c24cb5..31d44c1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1112,6 +1112,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
break;
case MSR_K7_HWCR:
data &= ~(u64)0x40; /* ignore flush filter disable */
+ data &= ~(u64)0x100; /* ignore ignne emulation enable */
if (data != 0) {
pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
data);
--
1.7.0


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Joerg Roedel

unread,
Feb 24, 2010, 1:00:02 PM2/24/10
to
Without resetting the MMU the gva_to_pga function will not
work reliably when the vcpu is running in nested context.

Signed-off-by: Joerg Roedel <joerg....@amd.com>
---

arch/x86/kvm/svm.c | 8 +++++---
1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 217b8b0..b821b2f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1871,10 +1871,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
if (npt_enabled) {
svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
- } else {
+ } else
kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
- kvm_mmu_reset_context(&svm->vcpu);
- }
+
+ /* Guest paging mode is active - reset mmu */
+ kvm_mmu_reset_context(&svm->vcpu);
+
svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);

Joerg Roedel

unread,
Feb 24, 2010, 1:10:02 PM2/24/10
to
This patch adds a tracepoint to get information about the
most important intercept bitmasks from the nested vmcb.

Signed-off-by: Joerg Roedel <joerg....@amd.com>
---

arch/x86/kvm/svm.c | 5 +++++
arch/x86/kvm/trace.h | 22 ++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 28 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b048c2b..6b7590a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1838,6 +1838,11 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
nested_vmcb->control.event_inj,
nested_vmcb->control.nested_ctl);

+ trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read,
+ nested_vmcb->control.intercept_cr_write,
+ nested_vmcb->control.intercept_exceptions,
+ nested_vmcb->control.intercept);
+
/* Clear internal status */
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 12f8d2d..17b52cc 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -419,6 +419,28 @@ TRACE_EVENT(kvm_nested_vmrun,
__entry->npt ? "on" : "off")
);

+TRACE_EVENT(kvm_nested_intercepts,
+ TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u64 intercept),
+ TP_ARGS(cr_read, cr_write, exceptions, intercept),
+
+ TP_STRUCT__entry(
+ __field( __u16, cr_read )
+ __field( __u16, cr_write )
+ __field( __u32, exceptions )
+ __field( __u64, intercept )
+ ),
+
+ TP_fast_assign(
+ __entry->cr_read = cr_read;
+ __entry->cr_write = cr_write;
+ __entry->exceptions = exceptions;
+ __entry->intercept = intercept;
+ ),
+
+ TP_printk("cr_read: %04x cr_write: %04x excp: %08x intercept: %016llx",
+ __entry->cr_read, __entry->cr_write, __entry->exceptions,
+ __entry->intercept)
+);
/*
* Tracepoint for #VMEXIT while nested
*/
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7b436c8..2c24cb5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5924,3 +5924,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);

Joerg Roedel

unread,
Feb 24, 2010, 1:10:02 PM2/24/10
to
A recent change broke tracing of the nested vmcb address. It
was reported as 0 all the time. This patch fixes it.

Signed-off-by: Joerg Roedel <joerg....@amd.com>
---

arch/x86/kvm/svm.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9759d9f..b048c2b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1832,7 +1832,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
if (!nested_vmcb)
return false;

- trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
+ trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa,
nested_vmcb->save.rip,
nested_vmcb->control.int_ctl,
nested_vmcb->control.event_inj,

Joerg Roedel

unread,
Feb 24, 2010, 1:10:03 PM2/24/10
to
This patch implements the NMI intercept checking for nested
svm.

Signed-off-by: Joerg Roedel <joerg....@amd.com>
---

arch/x86/kvm/svm.c | 22 +++++++++++++++++++---
1 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b821b2f..9759d9f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1480,6 +1480,20 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm)
return true;
}

+/* This function returns true if it is save to enable the nmi window */
+static inline bool nested_svm_nmi(struct vcpu_svm *svm)
+{
+ if (!is_nested(svm))
+ return true;
+
+ if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
+ return true;
+
+ svm->vmcb->control.exit_code = SVM_EXIT_NMI;
+ svm->nested.exit_required = true;
+
+ return false;
+}
static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
{
struct page *page;
@@ -2681,9 +2695,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
* Something prevents NMI from been injected. Single step over possible
* problem (IRET or exception injection or interrupt shadow)
*/
- svm->nmi_singlestep = true;
- svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
- update_db_intercept(vcpu);
+ if (gif_set(svm) && nested_svm_nmi(svm)) {
+ svm->nmi_singlestep = true;
+ svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+ update_db_intercept(vcpu);
+ }
}

static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)

Joerg Roedel

unread,
Feb 24, 2010, 1:10:03 PM2/24/10
to
If we have the following situation with nested svm:

1. Host KVM intercepts cr0 writes
2. Guest hypervisor intercepts only selective cr0 writes

Then we get an cr0 write intercept which is handled on the
host. But that intercepts may actually be a selective cr0
intercept for the guest. This patch checks for this
condition and injects a selective cr0 intercept if needed.

Signed-off-by: Joerg Roedel <joerg....@amd.com>
---

arch/x86/kvm/svm.c | 21 +++++++++++++++++++++
1 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2450a7c..22654de 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1037,6 +1037,27 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);

+ if (is_nested(svm)) {
+ /*
+ * We are here because we run in nested mode, the host kvm
+ * intercepts cr0 writes but the l1 hypervisor does not.
+ * But the L1 hypervisor may intercept selective cr0 writes.
+ * This needs to be checked here.
+ */
+ unsigned long old, new;
+
+ /* Remove bits that would trigger a real cr0 write intercept */
+ old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK;
+ new = cr0 & SVM_CR0_SELECTIVE_MASK;
+
+ if (old == new) {
+ /* cr0 write with ts and mp unchanged */
+ svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
+ if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE)
+ return;
+ }
+ }
+
#ifdef CONFIG_X86_64
if (vcpu->arch.efer & EFER_LME) {
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {

Joerg Roedel

unread,
Feb 24, 2010, 1:10:04 PM2/24/10
to
On Wed, Feb 24, 2010 at 06:59:10PM +0100, Joerg Roedel wrote:
> This patch removes whitespace errors, fixes comment formats
> and most of checkpatch warnings. Now vim does not show
^^^^^^^^
This means actually "errors". The warnings left in this file are
completly 80-chars-per-line related. Two errors are left which don't
make sense to fix.

Joerg

Joerg Roedel

unread,
Feb 24, 2010, 1:10:03 PM2/24/10
to
This patch optimizes the way the msrpm of the host and the
guest are merged. The old code merged the 2 msrpm pages
completly. This code needed to touch 24kb of memory for that
operation. The optimized variant this patch introduces
merges only the parts where the host msrpm may contain zero
bits. This reduces the amount of memory which is touched to
48 bytes.

Signed-off-by: Joerg Roedel <joerg....@amd.com>
---

arch/x86/kvm/svm.c | 45 ++++++++++++++++++++++++++++++++++++---------
1 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ae2f211..eb25fea 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -754,6 +754,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
svm->nested.hsave = page_address(hsave_page);

svm->nested.msrpm = page_address(nested_msrpm_pages);
+ svm_vcpu_init_msrpm(svm->nested.msrpm);

svm->vmcb = page_address(page);
clear_page(svm->vmcb);
@@ -1824,20 +1825,46 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)

static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
{
- u32 *nested_msrpm;
- struct page *page;
+ /*
+ * This function merges the msr permission bitmaps of kvm and the
+ * nested vmcb. It is omptimized in that it only merges the parts where
+ * the kvm msr permission bitmap may contain zero bits
+ */
+ static const u32 msrpm_offsets[] = {
+ 0x0000002c, /* SYSENTER_CS */
+
+ 0x00000038, /* LASTBRANCHFROMIP
+ LASTBRANCHTOIP
+ LASTINTFROMIP
+ LASTINTTOIP */
+
+ 0x00000820, /* STAR
+ LSTAR
+ CSTAR
+ SYSCALL_MASK */
+
+ 0x00000840, /* FS_BASE
+ GS_BASE
+ KERNEL_GS_BASE */
+
+ 0xffffffff, /* End of List */
+ };
int i;

- nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, &page);
- if (!nested_msrpm)
- return false;
+ for (i = 0; msrpm_offsets[i] != 0xffffffff; i++) {
+ u32 value, p;
+ u64 offset;

- for (i = 0; i < PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
- svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
+ offset = svm->nested.vmcb_msrpm + msrpm_offsets[i];
+ p = msrpm_offsets[i] / 4;

- svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
+ if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
+ return false;

- nested_svm_unmap(page);
+ svm->nested.msrpm[p] = svm->msrpm[p] | value;
+ }
+
+ svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);

return true;
}
--
1.7.0

Joerg Roedel

unread,
Feb 24, 2010, 1:10:03 PM2/24/10
to
When injecting an vmexit.intr into the nested hypervisor
there might be leftover values in the exit_info fields.
Clear them to not confuse nested hypervisors.

Signed-off-by: Joerg Roedel <joerg....@amd.com>
---

arch/x86/kvm/svm.c | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 22654de..ae2f211 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1485,7 +1485,9 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm)
if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
return false;

- svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+ svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+ svm->vmcb->control.exit_info_1 = 0;
+ svm->vmcb->control.exit_info_2 = 0;

if (svm->nested.intercept & 1ULL) {
/*

Alexander Graf

unread,
Feb 24, 2010, 2:30:01 PM2/24/10
to

On 24.02.2010, at 18:59, Joerg Roedel wrote:

> This patch optimizes the way the msrpm of the host and the
> guest are merged. The old code merged the 2 msrpm pages
> completly. This code needed to touch 24kb of memory for that
> operation. The optimized variant this patch introduces
> merges only the parts where the host msrpm may contain zero
> bits. This reduces the amount of memory which is touched to
> 48 bytes.

Nice catch!

Isn't there such a list around somewhere already? We really should only keep this list once throughout the whole code. If necessary, just create the list on the fly when bits get set in the msrpm.

Alex

Joerg Roedel

unread,
Feb 24, 2010, 2:40:02 PM2/24/10
to
On Wed, Feb 24, 2010 at 08:27:50PM +0100, Alexander Graf wrote:
> > + static const u32 msrpm_offsets[] = {
> > + 0x0000002c, /* SYSENTER_CS */
> > +
> > + 0x00000038, /* LASTBRANCHFROMIP
> > + LASTBRANCHTOIP
> > + LASTINTFROMIP
> > + LASTINTTOIP */
> > +
> > + 0x00000820, /* STAR
> > + LSTAR
> > + CSTAR
> > + SYSCALL_MASK */
> > +
> > + 0x00000840, /* FS_BASE
> > + GS_BASE
> > + KERNEL_GS_BASE */
> > +
> > + 0xffffffff, /* End of List */
>
> Isn't there such a list around somewhere already? We really should
> only keep this list once throughout the whole code. If necessary, just
> create the list on the fly when bits get set in the msrpm.

No, the list is hardcoded in 3 functions (as parameter of
set_msr_interception). I think about a variant to do this with a single
list. Probably I create a list of MSRs and check in
set_msr_interceptionm for it.

Joerg

Avi Kivity

unread,
Feb 24, 2010, 3:00:02 PM2/24/10
to
On 02/24/2010 09:37 PM, Joerg Roedel wrote:
>
>> Isn't there such a list around somewhere already? We really should
>> only keep this list once throughout the whole code. If necessary, just
>> create the list on the fly when bits get set in the msrpm.
>>
> No, the list is hardcoded in 3 functions (as parameter of
> set_msr_interception). I think about a variant to do this with a single
> list. Probably I create a list of MSRs and check in
> set_msr_interceptionm for it.
>
>

Or, have set_msr_interception() create the list of offsets.


--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

Alexander Graf

unread,
Feb 24, 2010, 3:10:03 PM2/24/10
to

On 24.02.2010, at 20:58, Avi Kivity wrote:

> On 02/24/2010 09:37 PM, Joerg Roedel wrote:
>>
>>> Isn't there such a list around somewhere already? We really should
>>> only keep this list once throughout the whole code. If necessary, just
>>> create the list on the fly when bits get set in the msrpm.
>>>
>> No, the list is hardcoded in 3 functions (as parameter of
>> set_msr_interception). I think about a variant to do this with a single
>> list. Probably I create a list of MSRs and check in
>> set_msr_interceptionm for it.
>>
>>
>
> Or, have set_msr_interception() create the list of offsets.

Or even better yet put the list into BSS because it should be static anyways. From that the real MSRPM and the merging function can be fed.

Alex--

Avi Kivity

unread,
Feb 24, 2010, 3:20:02 PM2/24/10
to
On 02/24/2010 10:00 PM, Alexander Graf wrote:
> On 24.02.2010, at 20:58, Avi Kivity wrote:
>
>
>> On 02/24/2010 09:37 PM, Joerg Roedel wrote:
>>
>>>
>>>> Isn't there such a list around somewhere already? We really should
>>>> only keep this list once throughout the whole code. If necessary, just
>>>> create the list on the fly when bits get set in the msrpm.
>>>>
>>>>
>>> No, the list is hardcoded in 3 functions (as parameter of
>>> set_msr_interception). I think about a variant to do this with a single
>>> list. Probably I create a list of MSRs and check in
>>> set_msr_interceptionm for it.
>>>
>>>
>>>
>> Or, have set_msr_interception() create the list of offsets.
>>
> Or even better yet put the list into BSS because it should be static anyways. From that the real MSRPM and the merging function can be fed.
>

Yup.

In fact, the real MSRPM can be generated by merging the list into an
empty bitmap.

--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

--

0 new messages