[PATCH 1/2] x86: Refactoring and Emlation support

hcha...@xvisor-x86.org

unread,

Mar 10, 2021, 5:22:09 AM3/10/21

to xvisor...@googlegroups.com, Himanshu Chauhan

From: Himanshu Chauhan <hcha...@xvisor-x86.org>

o Refactored the code to handle vmexit in modular fasion
o Handle I/O port read write.
o Coreboot can switch over to protected mode and is able
to initialize RTC and console successfully.

Signed-off-by: Himanshu Chauhan <hcha...@xvisor-x86.org>
---
.../x86/cpu/common/include/vm/vmx_intercept.h | 14 +
arch/x86/cpu/common/vm/vtx/ept.c | 16 +-
arch/x86/cpu/common/vm/vtx/intercept.c | 263 +++++++++++++-----
arch/x86/cpu/common/vm/vtx/vmx.c | 2 -
4 files changed, 220 insertions(+), 75 deletions(-)

diff --git a/arch/x86/cpu/common/include/vm/vmx_intercept.h b/arch/x86/cpu/common/include/vm/vmx_intercept.h
index 1421076f..38539848 100644
--- a/arch/x86/cpu/common/include/vm/vmx_intercept.h
+++ b/arch/x86/cpu/common/include/vm/vmx_intercept.h
@@ -39,6 +39,20 @@ typedef union _er {
unsigned long r;
} exit_reason_t;

+typedef union vmx_crx_move_eq {
+ u64 val;
+ struct {
+ u32 cr_num:4;
+ u32 type:2;
+ u32 operand_type:1;
+ u32 res:1;
+ u32 reg:4;
+ u32 res1:4;
+ u32 lms_source:16;
+ u32 res2:32;
+ } bits;
+} vmx_crx_move_eq_t;
+
typedef union vmx_io_exit_qualification {
u64 val;
struct {
diff --git a/arch/x86/cpu/common/vm/vtx/ept.c b/arch/x86/cpu/common/vm/vtx/ept.c
index 3d74edf4..8dbe7bf5 100644
--- a/arch/x86/cpu/common/vm/vtx/ept.c
+++ b/arch/x86/cpu/common/vm/vtx/ept.c
@@ -70,19 +70,19 @@ invalidate_ept (int type, struct invept_desc *desc)
/* most modern CPUs will have this */
if (unlikely(type == INVEPT_ALL_CONTEXT
&& !cpu_has_vmx_ept_invept_all_context)) {
- VM_LOG(LVL_INFO, "EPT all context flush not supported\n");
+ VM_LOG(LVL_DEBUG, "EPT all context flush not supported\n");
return;
}
if (unlikely(type == INVEPT_SINGLE_CONTEXT
&& !cpu_has_vmx_ept_invept_single_context)) {
- VM_LOG(LVL_INFO, "EPT single context flush not supported\n");
+ VM_LOG(LVL_DEBUG, "EPT single context flush not supported\n");
return;
}
asm volatile("invept (%0), %1\n\t"
::"D"(type), "S"(desc)
:"memory", "cc");
} else {
- VM_LOG(LVL_INFO, "INVEPT instruction is not supported by CPU\n");
+ VM_LOG(LVL_DEBUG, "INVEPT instruction is not supported by CPU\n");
}
}

@@ -228,7 +228,7 @@ int ept_create_pte_map(struct vcpu_hw_context *context,
pdpte->pe.mt = 6; /* write-back memory type */
pdpte->pe.ign_pat = 1; /* ignore PAT type */
pdpte->pe.is_page = 1;
- VM_LOG(LVL_INFO, "New PDPT Entry: 0x%"PRIx64"\n", pdpte->val);
+ VM_LOG(LVL_DEBUG, "New PDPT Entry: 0x%"PRIx64"\n", pdpte->val);
rc = VMM_OK;
/* new entry. Invalidate EPT */
goto _invalidate_ept;
@@ -248,7 +248,7 @@ int ept_create_pte_map(struct vcpu_hw_context *context,
pdpte->te.pd_base = EPT_PHYS_4KB_PFN(phys);
pdpte->val &= EPT_PROT_MASK;
pdpte->val |= pg_prot;
- VM_LOG(LVL_INFO, "New PD Page at 0x%"PRIx64" (Phys: 0x%"PRIx64")\n", virt, phys);
+ VM_LOG(LVL_DEBUG, "New PD Page at 0x%"PRIx64" (Phys: 0x%"PRIx64")\n", virt, phys);
} else { /* page is already allocated, a mapping in locality exists */
if (vmm_host_pa2va(e_phys, &virt) != VMM_OK) {
VM_LOG(LVL_ERR, "Couldn't map PDE physical 0x%"PRIx64" to virtual\n",
@@ -267,7 +267,7 @@ int ept_create_pte_map(struct vcpu_hw_context *context,
decode_ept_entry(EPT_LEVEL_PDE, (void *)pde, &e_phys, &e_pg_prot);

if (pde->pe.is_page) {
- VM_LOG(LVL_INFO, "PDE is a 2MB Page!\n");
+ VM_LOG(LVL_DEBUG, "PDE is a 2MB Page!\n");
/* this is marked as 1GB page and new mapping wants otherwise
* then its a problem. Caller didn't free this mapping prior
* to calling this function */
@@ -318,7 +318,7 @@ int ept_create_pte_map(struct vcpu_hw_context *context,
} else {
/* Ok. So this is PDE. Lets find PTE now. */
if (!e_pg_prot) { /* page for PTE is not currently set */
- VM_LOG(LVL_INFO, "Page protection bits not set in PTE page. Creating new one.\n");
+ VM_LOG(LVL_DEBUG, "Page protection bits not set in PTE page. Creating new one.\n");
virt = get_free_page_for_pagemap(context, &phys);
/* allocate a new PTE page */
if (!virt) {
@@ -395,7 +395,7 @@ int setup_ept(struct vcpu_hw_context *context)
eptp_t *eptp = (eptp_t *)&context->eptp;
virtual_addr_t pml4 = get_free_page_for_pagemap(context, &pml4_phys);

- VM_LOG(LVL_INFO, "%s: PML4 vaddr: 0x%016lx paddr: 0x%016lx\n",
+ VM_LOG(LVL_DEBUG, "%s: PML4 vaddr: 0x%016lx paddr: 0x%016lx\n",
__func__, pml4, pml4_phys);

if (!pml4) {
diff --git a/arch/x86/cpu/common/vm/vtx/intercept.c b/arch/x86/cpu/common/vm/vtx/intercept.c
index ab7f3e26..9b823358 100644
--- a/arch/x86/cpu/common/vm/vtx/intercept.c
+++ b/arch/x86/cpu/common/vm/vtx/intercept.c
@@ -40,30 +40,96 @@
#include <vm/vmx_intercept.h>

static inline
-int vmx_handle_guest_realmode_page_fault(struct vcpu_hw_context *context,
- physical_addr_t fault_gphys,
- physical_addr_t hphys_addr)
+int vmx_handle_guest_realmode_page_fault(struct vcpu_hw_context *context)
{
int rc;
+ u32 flags;
+ physical_addr_t hphys_addr;
+ physical_size_t availsz;
+ physical_addr_t fault_gphys;
+ struct vmm_guest *guest = x86_vcpu_hw_context_guest(context);

- fault_gphys &= PAGE_MASK;
- hphys_addr &= PAGE_MASK;
+ physical_addr_t gla = vmr(GUEST_LINEAR_ADDRESS);
+
+ VM_LOG(LVL_DEBUG, "[Real Mode] Faulting Address: 0x%"PRIx64"\n", gla);
+
+ fault_gphys = (0xFFFF0000ULL + gla);
+
+ VM_LOG(LVL_DEBUG, "(Real Mode) Looking for map from guest address: 0x%08lx\n",
+ (fault_gphys & PAGE_MASK));
+
+ rc = vmm_guest_physical_map(guest, (fault_gphys & PAGE_MASK),
+ PAGE_SIZE, &hphys_addr, &availsz, &flags);
+ if (rc) {
+ VM_LOG(LVL_ERR, "ERROR: No region mapped to guest physical 0x%"PRIx64"\n", fault_gphys);
+ goto guest_bad_fault;
+ }

- VM_LOG(LVL_DEBUG, "Handle Page Fault: gphys: 0x%"PRIx64" hphys: 0x%"PRIx64"\n",
- fault_gphys, hphys_addr);
+ if (availsz < PAGE_SIZE) {
+ VM_LOG(LVL_ERR, "ERROR: Size of the available mapping less than page size (%lu)\n", availsz);
+ rc = VMM_EFAIL;
+ goto guest_bad_fault;
+ }
+
+ if (flags & (VMM_REGION_REAL | VMM_REGION_ALIAS)) {
+ VM_LOG(LVL_DEBUG, "GP: 0x%"PRIx64" HP: 0x%"PRIx64" Size: %lu\n", gla, hphys_addr, availsz);

- rc = ept_create_pte_map(context, fault_gphys, hphys_addr, PAGE_SIZE,
- (EPT_PROT_READ | EPT_PROT_WRITE | EPT_PROT_EXEC_S));
- VM_LOG(LVL_DEBUG, "ept_create_pte_map returned with %d\n", rc);
+ gla &= PAGE_MASK;
+ hphys_addr &= PAGE_MASK;

+ VM_LOG(LVL_DEBUG, "Handle Page Fault: gphys: 0x%"PRIx64" hphys: 0x%"PRIx64"\n",
+ fault_gphys, hphys_addr);
+
+ rc = ept_create_pte_map(context, gla, hphys_addr, PAGE_SIZE,
+ (EPT_PROT_READ | EPT_PROT_WRITE | EPT_PROT_EXEC_S));
+ VM_LOG(LVL_DEBUG, "ept_create_pte_map returned with %d\n", rc);
+ } else
+ rc = VMM_EFAIL;
+
+ guest_bad_fault:
return rc;
}

+static inline
+int vmx_handle_guest_protected_mode_page_fault(struct vcpu_hw_context *context)
+{
+ physical_addr_t fault_gphys, hphys_addr;
+ physical_size_t availsz;
+ int rc;
+ u32 flags;
+ struct vmm_guest *guest = x86_vcpu_hw_context_guest(context);
+
+ fault_gphys = vmr(GUEST_LINEAR_ADDRESS);
+
+ VM_LOG(LVL_DEBUG, "(Protected Mode) Looking for map from guest address: 0x%08lx\n",
+ (fault_gphys & PAGE_MASK));
+
+ rc = vmm_guest_physical_map(guest, (fault_gphys & PAGE_MASK),
+ PAGE_SIZE, &hphys_addr, &availsz, &flags);
+ if (rc) {
+ VM_LOG(LVL_ERR, "ERROR: No region mapped to guest physical 0x%"PRIx64"\n", fault_gphys);
+ return VMM_EFAIL;
+ }
+
+ if (availsz < PAGE_SIZE) {
+ VM_LOG(LVL_ERR, "ERROR: Size of the available mapping less than page size (%lu)\n", availsz);
+ return VMM_EFAIL;
+ }
+
+ fault_gphys &= PAGE_MASK;
+ hphys_addr &= PAGE_MASK;
+
+ VM_LOG(LVL_DEBUG, "GP: 0x%"PRIx64" HP: 0x%"PRIx64" Size: %lu\n", fault_gphys, hphys_addr, availsz);
+
+ return ept_create_pte_map(context, fault_gphys, hphys_addr, PAGE_SIZE,
+ (EPT_PROT_READ | EPT_PROT_WRITE | EPT_PROT_EXEC_S));
+}
+
static inline
int guest_in_real_mode(struct vcpu_hw_context *context)
{

- if (VMX_GUEST_CR0(context) & X86_CR0_PG)
+ if (VMX_GUEST_CR0(context) & X86_CR0_PE)
return 0;

if (is_guest_address_translated(VMX_GUEST_EQ(context)))
@@ -72,73 +138,140 @@ int guest_in_real_mode(struct vcpu_hw_context *context)
return 1;
}

-int vmx_handle_vmexit(struct vcpu_hw_context *context, u32 exit_reason)
+static inline
+int vmx_handle_io_instruction_exit(struct vcpu_hw_context *context)
{
- u32 gla, flags;
- int rc;
- physical_addr_t hphys_addr;
- physical_size_t availsz;
- physical_addr_t fault_gphys;
- struct vmm_guest *guest = x86_vcpu_hw_context_guest(context);
vmx_io_exit_qualification_t ioe;
+ u32 wval, io_sz;
+
+ ioe.val = VMX_GUEST_EQ(context);
+ io_sz = (ioe.bits.io_size == 0 ? 1 : (ioe.bits.io_size == 1 ? 2 : 4));
+
+ if (ioe.bits.direction == 0) {
+ if (ioe.bits.port == 0x80) {
+ VM_LOG(LVL_DEBUG, "(0x%"PRIx64") CBDW: 0x%"PRIx64"\n",
+ VMX_GUEST_RIP(context), context->g_regs[GUEST_REGS_RAX]);
+ } else {
+ wval = (u32)context->g_regs[GUEST_REGS_RAX];
+
+ if (vmm_devemu_emulate_iowrite(context->assoc_vcpu, ioe.bits.port,
+ &wval, io_sz, VMM_DEVEMU_NATIVE_ENDIAN) != VMM_OK) {
+ vmm_printf("Failed to emulate OUT instruction in"
+ " guest.\n");
+ goto guest_bad_fault;
+ }
+ }
+ } else {
+ VM_LOG(LVL_DEBUG, "Read on IO Port: %d\n", ioe.bits.port);
+ if (vmm_devemu_emulate_ioread(context->assoc_vcpu, ioe.bits.port, &wval, io_sz,
+ VMM_DEVEMU_NATIVE_ENDIAN) != VMM_OK) {
+ vmm_printf("Failed to emulate IO instruction in "
+ "guest.\n");
+ goto guest_bad_fault;
+ }
+
+ context->g_regs[GUEST_REGS_RAX] = wval;
+ }
+
+ __vmwrite(GUEST_RIP, VMX_GUEST_NEXT_RIP(context));
+
+ return VMM_OK;
+
+ guest_bad_fault:
+ return VMM_EFAIL;
+}
+
+static inline
+int vmx_handle_crx_exit(struct vcpu_hw_context *context)
+{
+ vmx_crx_move_eq_t crx_eq;
+
+ crx_eq.val = VMX_GUEST_EQ(context);

- if (unlikely(guest == NULL))
- vmm_panic("%s: NULL guest on vmexit\n", __func__);
+ if (crx_eq.bits.reg > GUEST_REGS_R15) {
+ VM_LOG(LVL_ERR, "Guest Move to CR0 with invalid reg %d\n", crx_eq.bits.reg);
+ goto guest_bad_fault;
+ }

+ /* Move to CRx */
+ if (crx_eq.bits.type == 0) {
+ switch(crx_eq.bits.cr_num) {
+ case 0:
+ __vmwrite(GUEST_CR0, (VMX_GUEST_CR0(context) | context->g_regs[crx_eq.bits.reg]));
+ VM_LOG(LVL_DEBUG, "Moving %d register (value: 0x%"PRIx64") to CR0\n",
+ crx_eq.bits.reg, (VMX_GUEST_CR0(context) | context->g_regs[crx_eq.bits.reg]));
+ break;
+ case 3:
+ __vmwrite(GUEST_CR3, context->g_regs[crx_eq.bits.reg]);
+ VM_LOG(LVL_DEBUG, "Moving %d register (value: 0x%"PRIx64") to CR3\n",
+ crx_eq.bits.reg, context->g_regs[crx_eq.bits.reg]);
+ break;
+ case 4:
+ __vmwrite(GUEST_CR4, context->g_regs[crx_eq.bits.reg]);
+ VM_LOG(LVL_DEBUG, "Moving %d register (value: 0x%"PRIx64") to CR4\n",
+ crx_eq.bits.reg, context->g_regs[crx_eq.bits.reg]);
+ break;
+ default:
+ VM_LOG(LVL_ERR, "Guest trying to write to reserved CR%d\n", crx_eq.bits.cr_num);
+ goto guest_bad_fault;
+ }
+ } else if (crx_eq.bits.type == 1) { /* Move from CRx */
+ switch(crx_eq.bits.cr_num) {
+ case 0:
+ //context->g_regs[crx_eq.bits.reg] = vmr(GUEST_CR0);
+ //VM_LOG(LVL_DEBUG, "Moving CR3 to register %d\n",
+ // crx_eq.bits.reg);
+ break;
+ case 3:
+ context->g_regs[crx_eq.bits.reg] = vmr(GUEST_CR3);
+ VM_LOG(LVL_DEBUG, "Moving CR3 to register %d\n",
+ crx_eq.bits.reg);
+ break;
+ case 4:
+ context->g_regs[crx_eq.bits.reg] = vmr(GUEST_CR3);
+ VM_LOG(LVL_DEBUG, "Moving CR4 to register %d\n",
+ crx_eq.bits.reg);
+ break;
+ default:
+ VM_LOG(LVL_ERR, "Guest trying to write to reserved CR%d\n", crx_eq.bits.cr_num);
+ goto guest_bad_fault;
+ }
+ } else {
+ VM_LOG(LVL_ERR, "LMSW not supported yet\n");
+ goto guest_bad_fault;
+ }
+ __vmwrite(GUEST_RIP, VMX_GUEST_NEXT_RIP(context));
+
+ return VMM_OK;
+
+ guest_bad_fault:
+ return VMM_EFAIL;
+
+}
+
+int vmx_handle_vmexit(struct vcpu_hw_context *context, u32 exit_reason)
+{
switch (exit_reason) {
case EXIT_REASON_EPT_VIOLATION:
- gla = vmr(GUEST_LINEAR_ADDRESS);
-
/* Guest in real mode */
if (guest_in_real_mode(context)) {
if (is_guest_linear_address_valid(VMX_GUEST_EQ(context))) {
- fault_gphys = (0xFFFF0000 + gla);
-
- VM_LOG(LVL_DEBUG, "(Real Mode) Looking for map from guest address: 0x%08lx\n",
- (fault_gphys & PAGE_MASK));
- rc = vmm_guest_physical_map(guest, (fault_gphys & PAGE_MASK),
- PAGE_SIZE, &hphys_addr, &availsz, &flags);
- if (rc) {
- VM_LOG(LVL_ERR, "ERROR: No region mapped to guest physical 0x%"PRIx32"\n", gla);
- goto guest_bad_fault;
- }
-
- if (availsz < PAGE_SIZE) {
- VM_LOG(LVL_ERR, "ERROR: Size of the available mapping less than page size (%lu)\n", availsz);
- goto guest_bad_fault;
- }
-
- if (flags & (VMM_REGION_REAL | VMM_REGION_ALIAS)) {
- VM_LOG(LVL_DEBUG, "GP: 0x%"PRIx32" HP: 0x%"PRIx64" Size: %lu\n", gla, hphys_addr, availsz);
- return vmx_handle_guest_realmode_page_fault(context, (gla & PAGE_MASK), (hphys_addr & PAGE_MASK));
- } else
- VM_LOG(LVL_ERR, "Unhandled guest fault region flags: 0x%"PRIx32"\n", flags);
+ return vmx_handle_guest_realmode_page_fault(context);
} else {
VM_LOG(LVL_ERR, "(Realmode pagefault) VMX reported invalid linear address.\n");
- goto guest_bad_fault;
+ return VMM_EFAIL;
}
- } else {
- VM_LOG(LVL_ERR, "Handle protected mode guest.\n");
- goto guest_bad_fault;
+ } else { /* Protected mode */
+ return vmx_handle_guest_protected_mode_page_fault(context);
}
break;

case EXIT_REASON_IO_INSTRUCTION:
- ioe.val = VMX_GUEST_EQ(context);
-
- if (ioe.bits.direction == 0) {
- if (ioe.bits.port == 0x80) {
- VM_LOG(LVL_INFO, "(0x%"PRIx64") CBDW: 0x%"PRIx64"\n",
- VMX_GUEST_RIP(context), context->g_regs[GUEST_REGS_RAX]);
- }
- } else {
- VM_LOG(LVL_ERR, "Read on IO Port: %d\n", ioe.bits.port);
- while(1);
- }
+ return vmx_handle_io_instruction_exit(context);

- __vmwrite(GUEST_RIP, VMX_GUEST_NEXT_RIP(context));
+ case EXIT_REASON_CR_ACCESS:
+ return vmx_handle_crx_exit(context);

- return VMM_OK;
default:
goto guest_bad_fault;
}
@@ -172,14 +305,15 @@ void vmx_vcpu_exit(struct vcpu_hw_context *context)
break;
}
} else {
- VM_LOG(LVL_INFO, "VM Exit reason: %d\n", _exit_reason.bits.reason);
+ VM_LOG(LVL_DEBUG, "VM Exit reason: %d\n", _exit_reason.bits.reason);

VMX_GUEST_SAVE_EQ(context);
VMX_GUEST_SAVE_CR0(context);
VMX_GUEST_SAVE_RIP(context);
+ VM_LOG(LVL_DEBUG, "Guest RIP: 0x%"PRIx64"\n", VMX_GUEST_RIP(context));

if (vmx_handle_vmexit(context, _exit_reason.bits.reason) != VMM_OK) {
- VM_LOG(LVL_ERR, "Error handling VMExit\n");
+ VM_LOG(LVL_DEBUG, "Error handling VMExit (Reason: %d)\n", _exit_reason.bits.reason);
goto unhandled_vm_exit;
}

@@ -187,7 +321,6 @@ void vmx_vcpu_exit(struct vcpu_hw_context *context)
}

unhandled_vm_exit:
- VM_LOG(LVL_ERR, "Unhandled vmexit\n");
- while(1);
- //context->vcpu_emergency_shutdown(context);
+ VM_LOG(LVL_DEBUG, "Unhandled vmexit\n");
+ context->vcpu_emergency_shutdown(context);
}
diff --git a/arch/x86/cpu/common/vm/vtx/vmx.c b/arch/x86/cpu/common/vm/vtx/vmx.c
index 45513e45..b735c1d4 100644
--- a/arch/x86/cpu/common/vm/vtx/vmx.c
+++ b/arch/x86/cpu/common/vm/vtx/vmx.c
@@ -223,8 +223,6 @@ static int __vmcs_run(struct vcpu_hw_context *context, bool resume)
int rc = 0;
u64 ins_err = 0;

- VM_LOG(LVL_INFO, "Starting to %s guest...\n", (resume ? "resume" : "launch"));
-
__asm__ __volatile__("pushfq\n\t" /* Save flags */
"movq $vmx_return, %%rax\n\t"
"vmwrite %%rax, %%rbx\n\t"
--
2.27.0

hcha...@xvisor-x86.org

unread,

Mar 10, 2021, 5:22:10 AM3/10/21

to xvisor...@googlegroups.com, Himanshu Chauhan

From: Himanshu Chauhan <hcha...@xvisor-x86.org>

There was a issue where when the vmexit happens after
certain stages of coreboot, xvisor itself hangs. This
used to happend because timers used to freeze.

Turns out that since current MSR read/writes are not
intercepted yet, the guest overwrites the value programmed
by xvisor. So old value is lost. This issue happens when
TSC deadline is used. Because it depends on the DEADLINE
MSR. Added support to change it based on configuration.

Things are fine now. Below is the initial output of the
guest:

XVisor# vserial bind guest0/uart0
[guest0/uart0]
[guest0/uart0]
[guest0/uart0] coreboot-4.13-5c186c6777c9438ff4681929c9c25c98dee28bef Mon Mar 8 19:06:53 UTC 2021 bootblock starting (log level: 7)...
[guest0/uart0] FMAP: Found "FLASH" version 1.1 at 0x0.
[guest0/uart0] FMAP: base = 0xfff80000 size = 0x80000 #areas = 3
[guest0/uart0] FMAP: area COREBOOT found @ 200 (523776 bytes)
[guest0/uart0] CBFS: Found 'fallback/romstage' @0x80 size 0x421c
[guest0/uart0] BS: bootblock times (exec / console): total (unknown) / 4 ms
[guest0/uart0]

Signed-off-by: Himanshu Chauhan <hcha...@xvisor-x86.org>
---

arch/x86/configs/x86_64_generic-defconfig | 8 ++++----
arch/x86/cpu/common/cpu_apic.c | 25 ++++++++++++++++++-----
arch/x86/cpu/x86_64/features.cfg | 11 +++++++++-
3 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/arch/x86/configs/x86_64_generic-defconfig b/arch/x86/configs/x86_64_generic-defconfig
index 7c068852..45f372a3 100644
--- a/arch/x86/configs/x86_64_generic-defconfig
+++ b/arch/x86/configs/x86_64_generic-defconfig
@@ -2,7 +2,7 @@
# Automatically generated make config: don't edit
# Project: Xvisor (eXtensible Versatile hypervISOR)
# Version: 0.3.0
-# Fri Feb 26 15:24:54 2021
+# Wed Mar 10 15:36:13 2021
#
# CONFIG_ARCH_ARM is not set
# CONFIG_ARCH_RISCV is not set
@@ -40,9 +40,11 @@ CONFIG_ACPI=y
#
CONFIG_X86_L1_SHIFT=6
CONFIG_LOCAL_APIC=y
+# CONFIG_USE_DEADLINE_TSC is not set
# CONFIG_8259A is not set
CONFIG_VEXT_AMD_SVM=y
CONFIG_VEXT_INTEL_VTX=y
+# CONFIG_ENABLE_VTX_GUEST_CONFIG_AUDIT is not set

#
# Board Configuration
@@ -53,7 +55,7 @@ CONFIG_X86_64_GENERIC=y
#
# System Timers
#
-CONFIG_HPET=y
+# CONFIG_HPET is not set
CONFIG_LAPIC_TIMER=y

#
@@ -280,8 +282,6 @@ CONFIG_CRYPTO_HASHES=y
CONFIG_CRYPTO_HASH_MD5=y
CONFIG_CRYPTO_HASH_SHA256=y
# CONFIG_LIBAUTH is not set
-# CONFIG_LIBAUTH_USE_MD5_PASSWD is not set
-# CONFIG_LIBAUTH_USE_SHA256_PASSWD is not set
# CONFIG_GENALLOC is not set
# CONFIG_IMAGE_LOADER is not set
# CONFIG_SCSI is not set
diff --git a/arch/x86/cpu/common/cpu_apic.c b/arch/x86/cpu/common/cpu_apic.c
index 7ee72bbf..4dbaf0f4 100644
--- a/arch/x86/cpu/common/cpu_apic.c
+++ b/arch/x86/cpu/common/cpu_apic.c
@@ -445,6 +445,7 @@ struct lapic_timer {
struct vmm_clocksource clksrc;
} lapic_sys_timer = { 0 };

+#ifdef CONFIG_USE_DEADLINE_TSC
int is_tsc_deadline_supported(void)
{
u32 a, b, c, d;
@@ -454,16 +455,19 @@ int is_tsc_deadline_supported(void)

return (c & CPUID_FEAT_ECS_TSCDL);
}
+#endif

static vmm_irq_return_t
lapic_clockchip_irq_handler(int irq_no, void *dev)
{
struct lapic_timer *timer = (struct lapic_timer *)dev;

+#ifndef CONFIG_USE_DEADLINE_TSC
/* when using incremental count mode, just set the count
* to zero and set the mask */
if (!this_cpu(lapic).deadline_supported)
lapic_stop_timer();
+#endif

if (unlikely(!timer->clkchip.event_handler))
return VMM_IRQ_NONE;
@@ -523,6 +527,7 @@ lapic_disarm_timer(struct lapic_timer *timer)
return VMM_OK;
}

+#ifdef CONFIG_USE_DEADLINE_TSC
static int
lapic_set_deadline(unsigned long next)
{
@@ -543,6 +548,7 @@ _tryagain:

return VMM_OK;
}
+#endif

static int
lapic_set_icr(unsigned long next)
@@ -567,8 +573,10 @@ lapic_clockchip_set_next_event(unsigned long next,
timer->armed = 1;
}

+#ifdef CONFIG_USE_DEADLINE_TSC
if (this_cpu(lapic).deadline_supported)
return lapic_set_deadline(next);
+#endif

return lapic_set_icr(next);
}
@@ -623,12 +631,14 @@ int __cpuinit lapic_clockchip_init(void)

lvt = lapic_read(LAPIC_LVTTR(this_cpu(lapic).vbase));

+#ifdef CONFIG_USE_DEADLINE_TSC
/* AMD doesn't support deadline mode */
if (this_cpu(lapic).deadline_supported) {
/* Set the LAPIC timer in Deadline mode */
lvt &= ~(0x3<<17);
lvt |= APIC_LVT_TIMER_TSCDL;
}
+#endif

/* set the LAPIC timer interrupt vector */
lvt |= (LAPIC_TIMER_IRQ_VECTOR & 0xFF);
@@ -758,6 +768,7 @@ lapic_stop_timer(void)
/* Disable the local APIC timer */
lvt = lapic_read(LAPIC_LVTTR(this_cpu(lapic).vbase));

+#ifdef CONFIG_USE_DEADLINE_TSC
if (this_cpu(lapic).deadline_supported) {
/*
* If operating in Deadline mode MSR needs to be zeroed to
@@ -766,6 +777,7 @@ lapic_stop_timer(void)
if ((lvt & (0x3 << 17)) == APIC_LVT_TIMER_TSCDL)
cpu_write_msr(MSR_IA32_TSC_DEADLINE, 0);
}
+#endif

lvt |= APIC_LVT_MASKED;
lapic_write(LAPIC_LVTTR(this_cpu(lapic).vbase), lvt);
@@ -816,12 +828,15 @@ lapic_timer_init(void)

lapic = &this_cpu(lapic);

- if (!is_tsc_deadline_supported()) {
- vmm_printf("%s: TSC Deadline is not supported by LAPIC. Using internal counter.\n", __func__);
- lapic->deadline_supported = 0;
- } else {
- vmm_printf("%s: Using TSC deadline mode\n", __func__);
+#ifdef CONFIG_USE_DEADLINE_TSC
+ if (is_tsc_deadline_supported()) {
+ vmm_printf("%s: Using TSC deadline mode.\n", __func__);
lapic->deadline_supported = 1;
+ } else
+#endif
+ {
+ vmm_printf("%s: Using LAPIC's internal counter as comparator.\n", __func__);
+ lapic->deadline_supported = 0;
}

/* save the calibrated CPU frequency */
diff --git a/arch/x86/cpu/x86_64/features.cfg b/arch/x86/cpu/x86_64/features.cfg
index b6395fbf..9efcf968 100644
--- a/arch/x86/cpu/x86_64/features.cfg
+++ b/arch/x86/cpu/x86_64/features.cfg
@@ -1,5 +1,5 @@
#/**
-# Copyright (c) 2012 Himanshu Chauhan.
+# Copyright (c) 2021 Himanshu Chauhan.
# All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
@@ -35,6 +35,15 @@ choice
bool "Local APIC"
help
Enable Local APIC Support
+if CONFIG_LOCAL_APIC
+ config CONFIG_USE_DEADLINE_TSC
+ bool "Use Deadline TSC feature"
+ default n
+ help
+ Intel CPUs support DEADLINE TSC MSR. Which can
+ be used as a compare register. If deselected,
+ LAPIC's internal compare register will be used.
+endif

config CONFIG_8259A
bool "8259A PIC"
--
2.27.0

Anup Patel

unread,

Mar 16, 2021, 1:08:54 AM3/16/21

to Xvisor Devel, Himanshu Chauhan

On Wed, Mar 10, 2021 at 3:52 PM <hcha...@xvisor-x86.org> wrote:
>
> From: Himanshu Chauhan <hcha...@xvisor-x86.org>
>
> o Refactored the code to handle vmexit in modular fasion
> o Handle I/O port read write.
> o Coreboot can switch over to protected mode and is able
> to initialize RTC and console successfully.
>
> Signed-off-by: Himanshu Chauhan <hcha...@xvisor-x86.org>

Looks good to me.

Reviewed-by: Anup Patel <an...@brainfault.org>

Regards,
Anup

> --
> You received this message because you are subscribed to the Google Groups "Xvisor Development" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to xvisor-devel...@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/xvisor-devel/20210310102155.1959870-1-hchauhan%40xvisor-x86.org.

Anup Patel

unread,

Mar 16, 2021, 1:09:39 AM3/16/21

to Xvisor Devel, Himanshu Chauhan

On Wed, Mar 10, 2021 at 3:52 PM <hcha...@xvisor-x86.org> wrote:
>

> From: Himanshu Chauhan <hcha...@xvisor-x86.org>
>
> There was a issue where when the vmexit happens after
> certain stages of coreboot, xvisor itself hangs. This
> used to happend because timers used to freeze.
>
> Turns out that since current MSR read/writes are not
> intercepted yet, the guest overwrites the value programmed
> by xvisor. So old value is lost. This issue happens when
> TSC deadline is used. Because it depends on the DEADLINE
> MSR. Added support to change it based on configuration.
>
> Things are fine now. Below is the initial output of the
> guest:
>
> XVisor# vserial bind guest0/uart0
> [guest0/uart0]
> [guest0/uart0]
> [guest0/uart0] coreboot-4.13-5c186c6777c9438ff4681929c9c25c98dee28bef Mon Mar 8 19:06:53 UTC 2021 bootblock starting (log level: 7)...
> [guest0/uart0] FMAP: Found "FLASH" version 1.1 at 0x0.
> [guest0/uart0] FMAP: base = 0xfff80000 size = 0x80000 #areas = 3
> [guest0/uart0] FMAP: area COREBOOT found @ 200 (523776 bytes)
> [guest0/uart0] CBFS: Found 'fallback/romstage' @0x80 size 0x421c
> [guest0/uart0] BS: bootblock times (exec / console): total (unknown) / 4 ms
> [guest0/uart0]
>
> Signed-off-by: Himanshu Chauhan <hcha...@xvisor-x86.org>

Looks good to me.

Reviewed-by: Anup Patel <an...@brainfault.org>

Regards,
Anup

> --
> You received this message because you are subscribed to the Google Groups "Xvisor Development" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to xvisor-devel...@googlegroups.com.

> To view this discussion on the web visit https://groups.google.com/d/msgid/xvisor-devel/20210310102155.1959870-2-hchauhan%40xvisor-x86.org.

Reply all

Reply to author

Forward