From: Himanshu Chauhan <
hcha...@xvisor-x86.org>
o Refactored the code to handle vmexit in modular fasion
o Handle I/O port read write.
o Coreboot can switch over to protected mode and is able
to initialize RTC and console successfully.
Signed-off-by: Himanshu Chauhan <
hcha...@xvisor-x86.org>
---
.../x86/cpu/common/include/vm/vmx_intercept.h | 14 +
arch/x86/cpu/common/vm/vtx/ept.c | 16 +-
arch/x86/cpu/common/vm/vtx/intercept.c | 263 +++++++++++++-----
arch/x86/cpu/common/vm/vtx/vmx.c | 2 -
4 files changed, 220 insertions(+), 75 deletions(-)
diff --git a/arch/x86/cpu/common/include/vm/vmx_intercept.h b/arch/x86/cpu/common/include/vm/vmx_intercept.h
index 1421076f..38539848 100644
--- a/arch/x86/cpu/common/include/vm/vmx_intercept.h
+++ b/arch/x86/cpu/common/include/vm/vmx_intercept.h
@@ -39,6 +39,20 @@ typedef union _er {
unsigned long r;
} exit_reason_t;
+typedef union vmx_crx_move_eq {
+ u64 val;
+ struct {
+ u32 cr_num:4;
+ u32 type:2;
+ u32 operand_type:1;
+ u32 res:1;
+ u32 reg:4;
+ u32 res1:4;
+ u32 lms_source:16;
+ u32 res2:32;
+ } bits;
+} vmx_crx_move_eq_t;
+
typedef union vmx_io_exit_qualification {
u64 val;
struct {
diff --git a/arch/x86/cpu/common/vm/vtx/ept.c b/arch/x86/cpu/common/vm/vtx/ept.c
index 3d74edf4..8dbe7bf5 100644
--- a/arch/x86/cpu/common/vm/vtx/ept.c
+++ b/arch/x86/cpu/common/vm/vtx/ept.c
@@ -70,19 +70,19 @@ invalidate_ept (int type, struct invept_desc *desc)
/* most modern CPUs will have this */
if (unlikely(type == INVEPT_ALL_CONTEXT
&& !cpu_has_vmx_ept_invept_all_context)) {
- VM_LOG(LVL_INFO, "EPT all context flush not supported\n");
+ VM_LOG(LVL_DEBUG, "EPT all context flush not supported\n");
return;
}
if (unlikely(type == INVEPT_SINGLE_CONTEXT
&& !cpu_has_vmx_ept_invept_single_context)) {
- VM_LOG(LVL_INFO, "EPT single context flush not supported\n");
+ VM_LOG(LVL_DEBUG, "EPT single context flush not supported\n");
return;
}
asm volatile("invept (%0), %1\n\t"
::"D"(type), "S"(desc)
:"memory", "cc");
} else {
- VM_LOG(LVL_INFO, "INVEPT instruction is not supported by CPU\n");
+ VM_LOG(LVL_DEBUG, "INVEPT instruction is not supported by CPU\n");
}
}
@@ -228,7 +228,7 @@ int ept_create_pte_map(struct vcpu_hw_context *context,
pdpte->
pe.mt = 6; /* write-back memory type */
pdpte->pe.ign_pat = 1; /* ignore PAT type */
pdpte->pe.is_page = 1;
- VM_LOG(LVL_INFO, "New PDPT Entry: 0x%"PRIx64"\n", pdpte->val);
+ VM_LOG(LVL_DEBUG, "New PDPT Entry: 0x%"PRIx64"\n", pdpte->val);
rc = VMM_OK;
/* new entry. Invalidate EPT */
goto _invalidate_ept;
@@ -248,7 +248,7 @@ int ept_create_pte_map(struct vcpu_hw_context *context,
pdpte->te.pd_base = EPT_PHYS_4KB_PFN(phys);
pdpte->val &= EPT_PROT_MASK;
pdpte->val |= pg_prot;
- VM_LOG(LVL_INFO, "New PD Page at 0x%"PRIx64" (Phys: 0x%"PRIx64")\n", virt, phys);
+ VM_LOG(LVL_DEBUG, "New PD Page at 0x%"PRIx64" (Phys: 0x%"PRIx64")\n", virt, phys);
} else { /* page is already allocated, a mapping in locality exists */
if (vmm_host_pa2va(e_phys, &virt) != VMM_OK) {
VM_LOG(LVL_ERR, "Couldn't map PDE physical 0x%"PRIx64" to virtual\n",
@@ -267,7 +267,7 @@ int ept_create_pte_map(struct vcpu_hw_context *context,
decode_ept_entry(EPT_LEVEL_PDE, (void *)pde, &e_phys, &e_pg_prot);
if (pde->pe.is_page) {
- VM_LOG(LVL_INFO, "PDE is a 2MB Page!\n");
+ VM_LOG(LVL_DEBUG, "PDE is a 2MB Page!\n");
/* this is marked as 1GB page and new mapping wants otherwise
* then its a problem. Caller didn't free this mapping prior
* to calling this function */
@@ -318,7 +318,7 @@ int ept_create_pte_map(struct vcpu_hw_context *context,
} else {
/* Ok. So this is PDE. Lets find PTE now. */
if (!e_pg_prot) { /* page for PTE is not currently set */
- VM_LOG(LVL_INFO, "Page protection bits not set in PTE page. Creating new one.\n");
+ VM_LOG(LVL_DEBUG, "Page protection bits not set in PTE page. Creating new one.\n");
virt = get_free_page_for_pagemap(context, &phys);
/* allocate a new PTE page */
if (!virt) {
@@ -395,7 +395,7 @@ int setup_ept(struct vcpu_hw_context *context)
eptp_t *eptp = (eptp_t *)&context->eptp;
virtual_addr_t pml4 = get_free_page_for_pagemap(context, &pml4_phys);
- VM_LOG(LVL_INFO, "%s: PML4 vaddr: 0x%016lx paddr: 0x%016lx\n",
+ VM_LOG(LVL_DEBUG, "%s: PML4 vaddr: 0x%016lx paddr: 0x%016lx\n",
__func__, pml4, pml4_phys);
if (!pml4) {
diff --git a/arch/x86/cpu/common/vm/vtx/intercept.c b/arch/x86/cpu/common/vm/vtx/intercept.c
index ab7f3e26..9b823358 100644
--- a/arch/x86/cpu/common/vm/vtx/intercept.c
+++ b/arch/x86/cpu/common/vm/vtx/intercept.c
@@ -40,30 +40,96 @@
#include <vm/vmx_intercept.h>
static inline
-int vmx_handle_guest_realmode_page_fault(struct vcpu_hw_context *context,
- physical_addr_t fault_gphys,
- physical_addr_t hphys_addr)
+int vmx_handle_guest_realmode_page_fault(struct vcpu_hw_context *context)
{
int rc;
+ u32 flags;
+ physical_addr_t hphys_addr;
+ physical_size_t availsz;
+ physical_addr_t fault_gphys;
+ struct vmm_guest *guest = x86_vcpu_hw_context_guest(context);
- fault_gphys &= PAGE_MASK;
- hphys_addr &= PAGE_MASK;
+ physical_addr_t gla = vmr(GUEST_LINEAR_ADDRESS);
+
+ VM_LOG(LVL_DEBUG, "[Real Mode] Faulting Address: 0x%"PRIx64"\n", gla);
+
+ fault_gphys = (0xFFFF0000ULL + gla);
+
+ VM_LOG(LVL_DEBUG, "(Real Mode) Looking for map from guest address: 0x%08lx\n",
+ (fault_gphys & PAGE_MASK));
+
+ rc = vmm_guest_physical_map(guest, (fault_gphys & PAGE_MASK),
+ PAGE_SIZE, &hphys_addr, &availsz, &flags);
+ if (rc) {
+ VM_LOG(LVL_ERR, "ERROR: No region mapped to guest physical 0x%"PRIx64"\n", fault_gphys);
+ goto guest_bad_fault;
+ }
- VM_LOG(LVL_DEBUG, "Handle Page Fault: gphys: 0x%"PRIx64" hphys: 0x%"PRIx64"\n",
- fault_gphys, hphys_addr);
+ if (availsz < PAGE_SIZE) {
+ VM_LOG(LVL_ERR, "ERROR: Size of the available mapping less than page size (%lu)\n", availsz);
+ rc = VMM_EFAIL;
+ goto guest_bad_fault;
+ }
+
+ if (flags & (VMM_REGION_REAL | VMM_REGION_ALIAS)) {
+ VM_LOG(LVL_DEBUG, "GP: 0x%"PRIx64" HP: 0x%"PRIx64" Size: %lu\n", gla, hphys_addr, availsz);
- rc = ept_create_pte_map(context, fault_gphys, hphys_addr, PAGE_SIZE,
- (EPT_PROT_READ | EPT_PROT_WRITE | EPT_PROT_EXEC_S));
- VM_LOG(LVL_DEBUG, "ept_create_pte_map returned with %d\n", rc);
+ gla &= PAGE_MASK;
+ hphys_addr &= PAGE_MASK;
+ VM_LOG(LVL_DEBUG, "Handle Page Fault: gphys: 0x%"PRIx64" hphys: 0x%"PRIx64"\n",
+ fault_gphys, hphys_addr);
+
+ rc = ept_create_pte_map(context, gla, hphys_addr, PAGE_SIZE,
+ (EPT_PROT_READ | EPT_PROT_WRITE | EPT_PROT_EXEC_S));
+ VM_LOG(LVL_DEBUG, "ept_create_pte_map returned with %d\n", rc);
+ } else
+ rc = VMM_EFAIL;
+
+ guest_bad_fault:
return rc;
}
+static inline
+int vmx_handle_guest_protected_mode_page_fault(struct vcpu_hw_context *context)
+{
+ physical_addr_t fault_gphys, hphys_addr;
+ physical_size_t availsz;
+ int rc;
+ u32 flags;
+ struct vmm_guest *guest = x86_vcpu_hw_context_guest(context);
+
+ fault_gphys = vmr(GUEST_LINEAR_ADDRESS);
+
+ VM_LOG(LVL_DEBUG, "(Protected Mode) Looking for map from guest address: 0x%08lx\n",
+ (fault_gphys & PAGE_MASK));
+
+ rc = vmm_guest_physical_map(guest, (fault_gphys & PAGE_MASK),
+ PAGE_SIZE, &hphys_addr, &availsz, &flags);
+ if (rc) {
+ VM_LOG(LVL_ERR, "ERROR: No region mapped to guest physical 0x%"PRIx64"\n", fault_gphys);
+ return VMM_EFAIL;
+ }
+
+ if (availsz < PAGE_SIZE) {
+ VM_LOG(LVL_ERR, "ERROR: Size of the available mapping less than page size (%lu)\n", availsz);
+ return VMM_EFAIL;
+ }
+
+ fault_gphys &= PAGE_MASK;
+ hphys_addr &= PAGE_MASK;
+
+ VM_LOG(LVL_DEBUG, "GP: 0x%"PRIx64" HP: 0x%"PRIx64" Size: %lu\n", fault_gphys, hphys_addr, availsz);
+
+ return ept_create_pte_map(context, fault_gphys, hphys_addr, PAGE_SIZE,
+ (EPT_PROT_READ | EPT_PROT_WRITE | EPT_PROT_EXEC_S));
+}
+
static inline
int guest_in_real_mode(struct vcpu_hw_context *context)
{
- if (VMX_GUEST_CR0(context) & X86_CR0_PG)
+ if (VMX_GUEST_CR0(context) & X86_CR0_PE)
return 0;
if (is_guest_address_translated(VMX_GUEST_EQ(context)))
@@ -72,73 +138,140 @@ int guest_in_real_mode(struct vcpu_hw_context *context)
return 1;
}
-int vmx_handle_vmexit(struct vcpu_hw_context *context, u32 exit_reason)
+static inline
+int vmx_handle_io_instruction_exit(struct vcpu_hw_context *context)
{
- u32 gla, flags;
- int rc;
- physical_addr_t hphys_addr;
- physical_size_t availsz;
- physical_addr_t fault_gphys;
- struct vmm_guest *guest = x86_vcpu_hw_context_guest(context);
vmx_io_exit_qualification_t ioe;
+ u32 wval, io_sz;
+
+ ioe.val = VMX_GUEST_EQ(context);
+ io_sz = (ioe.bits.io_size == 0 ? 1 : (ioe.bits.io_size == 1 ? 2 : 4));
+
+ if (ioe.bits.direction == 0) {
+ if (ioe.bits.port == 0x80) {
+ VM_LOG(LVL_DEBUG, "(0x%"PRIx64") CBDW: 0x%"PRIx64"\n",
+ VMX_GUEST_RIP(context), context->g_regs[GUEST_REGS_RAX]);
+ } else {
+ wval = (u32)context->g_regs[GUEST_REGS_RAX];
+
+ if (vmm_devemu_emulate_iowrite(context->assoc_vcpu, ioe.bits.port,
+ &wval, io_sz, VMM_DEVEMU_NATIVE_ENDIAN) != VMM_OK) {
+ vmm_printf("Failed to emulate OUT instruction in"
+ " guest.\n");
+ goto guest_bad_fault;
+ }
+ }
+ } else {
+ VM_LOG(LVL_DEBUG, "Read on IO Port: %d\n", ioe.bits.port);
+ if (vmm_devemu_emulate_ioread(context->assoc_vcpu, ioe.bits.port, &wval, io_sz,
+ VMM_DEVEMU_NATIVE_ENDIAN) != VMM_OK) {
+ vmm_printf("Failed to emulate IO instruction in "
+ "guest.\n");
+ goto guest_bad_fault;
+ }
+
+ context->g_regs[GUEST_REGS_RAX] = wval;
+ }
+
+ __vmwrite(GUEST_RIP, VMX_GUEST_NEXT_RIP(context));
+
+ return VMM_OK;
+
+ guest_bad_fault:
+ return VMM_EFAIL;
+}
+
+static inline
+int vmx_handle_crx_exit(struct vcpu_hw_context *context)
+{
+ vmx_crx_move_eq_t crx_eq;
+
+ crx_eq.val = VMX_GUEST_EQ(context);
- if (unlikely(guest == NULL))
- vmm_panic("%s: NULL guest on vmexit\n", __func__);
+ if (crx_eq.bits.reg > GUEST_REGS_R15) {
+ VM_LOG(LVL_ERR, "Guest Move to CR0 with invalid reg %d\n", crx_eq.bits.reg);
+ goto guest_bad_fault;
+ }
+ /* Move to CRx */
+ if (crx_eq.bits.type == 0) {
+ switch(crx_eq.bits.cr_num) {
+ case 0:
+ __vmwrite(GUEST_CR0, (VMX_GUEST_CR0(context) | context->g_regs[crx_eq.bits.reg]));
+ VM_LOG(LVL_DEBUG, "Moving %d register (value: 0x%"PRIx64") to CR0\n",
+ crx_eq.bits.reg, (VMX_GUEST_CR0(context) | context->g_regs[crx_eq.bits.reg]));
+ break;
+ case 3:
+ __vmwrite(GUEST_CR3, context->g_regs[crx_eq.bits.reg]);
+ VM_LOG(LVL_DEBUG, "Moving %d register (value: 0x%"PRIx64") to CR3\n",
+ crx_eq.bits.reg, context->g_regs[crx_eq.bits.reg]);
+ break;
+ case 4:
+ __vmwrite(GUEST_CR4, context->g_regs[crx_eq.bits.reg]);
+ VM_LOG(LVL_DEBUG, "Moving %d register (value: 0x%"PRIx64") to CR4\n",
+ crx_eq.bits.reg, context->g_regs[crx_eq.bits.reg]);
+ break;
+ default:
+ VM_LOG(LVL_ERR, "Guest trying to write to reserved CR%d\n", crx_eq.bits.cr_num);
+ goto guest_bad_fault;
+ }
+ } else if (crx_eq.bits.type == 1) { /* Move from CRx */
+ switch(crx_eq.bits.cr_num) {
+ case 0:
+ //context->g_regs[crx_eq.bits.reg] = vmr(GUEST_CR0);
+ //VM_LOG(LVL_DEBUG, "Moving CR3 to register %d\n",
+ // crx_eq.bits.reg);
+ break;
+ case 3:
+ context->g_regs[crx_eq.bits.reg] = vmr(GUEST_CR3);
+ VM_LOG(LVL_DEBUG, "Moving CR3 to register %d\n",
+ crx_eq.bits.reg);
+ break;
+ case 4:
+ context->g_regs[crx_eq.bits.reg] = vmr(GUEST_CR3);
+ VM_LOG(LVL_DEBUG, "Moving CR4 to register %d\n",
+ crx_eq.bits.reg);
+ break;
+ default:
+ VM_LOG(LVL_ERR, "Guest trying to write to reserved CR%d\n", crx_eq.bits.cr_num);
+ goto guest_bad_fault;
+ }
+ } else {
+ VM_LOG(LVL_ERR, "LMSW not supported yet\n");
+ goto guest_bad_fault;
+ }
+ __vmwrite(GUEST_RIP, VMX_GUEST_NEXT_RIP(context));
+
+ return VMM_OK;
+
+ guest_bad_fault:
+ return VMM_EFAIL;
+
+}
+
+int vmx_handle_vmexit(struct vcpu_hw_context *context, u32 exit_reason)
+{
switch (exit_reason) {
case EXIT_REASON_EPT_VIOLATION:
- gla = vmr(GUEST_LINEAR_ADDRESS);
-
/* Guest in real mode */
if (guest_in_real_mode(context)) {
if (is_guest_linear_address_valid(VMX_GUEST_EQ(context))) {
- fault_gphys = (0xFFFF0000 + gla);
-
- VM_LOG(LVL_DEBUG, "(Real Mode) Looking for map from guest address: 0x%08lx\n",
- (fault_gphys & PAGE_MASK));
- rc = vmm_guest_physical_map(guest, (fault_gphys & PAGE_MASK),
- PAGE_SIZE, &hphys_addr, &availsz, &flags);
- if (rc) {
- VM_LOG(LVL_ERR, "ERROR: No region mapped to guest physical 0x%"PRIx32"\n", gla);
- goto guest_bad_fault;
- }
-
- if (availsz < PAGE_SIZE) {
- VM_LOG(LVL_ERR, "ERROR: Size of the available mapping less than page size (%lu)\n", availsz);
- goto guest_bad_fault;
- }
-
- if (flags & (VMM_REGION_REAL | VMM_REGION_ALIAS)) {
- VM_LOG(LVL_DEBUG, "GP: 0x%"PRIx32" HP: 0x%"PRIx64" Size: %lu\n", gla, hphys_addr, availsz);
- return vmx_handle_guest_realmode_page_fault(context, (gla & PAGE_MASK), (hphys_addr & PAGE_MASK));
- } else
- VM_LOG(LVL_ERR, "Unhandled guest fault region flags: 0x%"PRIx32"\n", flags);
+ return vmx_handle_guest_realmode_page_fault(context);
} else {
VM_LOG(LVL_ERR, "(Realmode pagefault) VMX reported invalid linear address.\n");
- goto guest_bad_fault;
+ return VMM_EFAIL;
}
- } else {
- VM_LOG(LVL_ERR, "Handle protected mode guest.\n");
- goto guest_bad_fault;
+ } else { /* Protected mode */
+ return vmx_handle_guest_protected_mode_page_fault(context);
}
break;
case EXIT_REASON_IO_INSTRUCTION:
- ioe.val = VMX_GUEST_EQ(context);
-
- if (ioe.bits.direction == 0) {
- if (ioe.bits.port == 0x80) {
- VM_LOG(LVL_INFO, "(0x%"PRIx64") CBDW: 0x%"PRIx64"\n",
- VMX_GUEST_RIP(context), context->g_regs[GUEST_REGS_RAX]);
- }
- } else {
- VM_LOG(LVL_ERR, "Read on IO Port: %d\n", ioe.bits.port);
- while(1);
- }
+ return vmx_handle_io_instruction_exit(context);
- __vmwrite(GUEST_RIP, VMX_GUEST_NEXT_RIP(context));
+ case EXIT_REASON_CR_ACCESS:
+ return vmx_handle_crx_exit(context);
- return VMM_OK;
default:
goto guest_bad_fault;
}
@@ -172,14 +305,15 @@ void vmx_vcpu_exit(struct vcpu_hw_context *context)
break;
}
} else {
- VM_LOG(LVL_INFO, "VM Exit reason: %d\n", _exit_reason.bits.reason);
+ VM_LOG(LVL_DEBUG, "VM Exit reason: %d\n", _exit_reason.bits.reason);
VMX_GUEST_SAVE_EQ(context);
VMX_GUEST_SAVE_CR0(context);
VMX_GUEST_SAVE_RIP(context);
+ VM_LOG(LVL_DEBUG, "Guest RIP: 0x%"PRIx64"\n", VMX_GUEST_RIP(context));
if (vmx_handle_vmexit(context, _exit_reason.bits.reason) != VMM_OK) {
- VM_LOG(LVL_ERR, "Error handling VMExit\n");
+ VM_LOG(LVL_DEBUG, "Error handling VMExit (Reason: %d)\n", _exit_reason.bits.reason);
goto unhandled_vm_exit;
}
@@ -187,7 +321,6 @@ void vmx_vcpu_exit(struct vcpu_hw_context *context)
}
unhandled_vm_exit:
- VM_LOG(LVL_ERR, "Unhandled vmexit\n");
- while(1);
- //context->vcpu_emergency_shutdown(context);
+ VM_LOG(LVL_DEBUG, "Unhandled vmexit\n");
+ context->vcpu_emergency_shutdown(context);
}
diff --git a/arch/x86/cpu/common/vm/vtx/vmx.c b/arch/x86/cpu/common/vm/vtx/vmx.c
index 45513e45..b735c1d4 100644
--- a/arch/x86/cpu/common/vm/vtx/vmx.c
+++ b/arch/x86/cpu/common/vm/vtx/vmx.c
@@ -223,8 +223,6 @@ static int __vmcs_run(struct vcpu_hw_context *context, bool resume)
int rc = 0;
u64 ins_err = 0;
- VM_LOG(LVL_INFO, "Starting to %s guest...\n", (resume ? "resume" : "launch"));
-
__asm__ __volatile__("pushfq\n\t" /* Save flags */
"movq $vmx_return, %%rax\n\t"
"vmwrite %%rax, %%rbx\n\t"
--
2.27.0