o Add reset vector empty entry for guest to fault when it
starts running.
o Add support for EPT invalidation functions
arch/x86/cpu/common/include/cpu_vm.h | 1 +
arch/x86/cpu/common/include/vm/ept.h | 61 +++++++++++----
arch/x86/cpu/common/include/vm/vmcs.h | 2 +
arch/x86/cpu/common/include/vm/vmx.h | 12 +++
arch/x86/cpu/common/vm/vtx/ept.c | 105 ++++++++++++++++++--------
5 files changed, 136 insertions(+), 45 deletions(-)
diff --git a/arch/x86/cpu/common/include/cpu_vm.h b/arch/x86/cpu/common/include/cpu_vm.h
index 0067fb95..e0bd9076 100644
--- a/arch/x86/cpu/common/include/cpu_vm.h
+++ b/arch/x86/cpu/common/include/cpu_vm.h
@@ -3,6 +3,7 @@
#include <multiboot.h>
#include <vm/vmcb.h>
+#include <vm/vmx.h>
#include <processor_flags.h>
#include <cpu_features.h>
#include <vmm_types.h>
diff --git a/arch/x86/cpu/common/include/vm/ept.h b/arch/x86/cpu/common/include/vm/ept.h
index 7863da8a..99d87481 100644
--- a/arch/x86/cpu/common/include/vm/ept.h
+++ b/arch/x86/cpu/common/include/vm/ept.h
@@ -41,6 +41,18 @@
extern struct cpuinfo_x86 cpu_info;
#define PHYS_ADDR_BIT_MASK ((0x1ul << cpu_info.phys_bits) - 1)
+#define EPT_PAGE_MASK_2M (PHYS_ADDR_BIT_MASK >> 21)
+#define EPT_PAGE_MASK_4K (PHYS_ADDR_BIT_MASK >> 12)
+#define EPT_PAGE_MASK_1G (PHYS_ADDR_BIT_MASK >> 30)
+
+#define EPT_PHYS_FILTER(_p) (_p & PHYS_ADDR_BIT_MASK)
+#define EPT_PHYS_2MB_PFN(_p) (EPT_PHYS_FILTER(_p) >> 21)
+#define EPT_PHYS_1GB_PFN(_p) (EPT_PHYS_FILTER(_p) >> 30)
+#define EPT_PHYS_4KB_PFN(_p) (EPT_PHYS_FILTER(_p) >> 12)
+
+#define EPT_PHYS_2MB_PAGE(_p) ((_p & EPT_PAGE_MASK_2M) << 21)
+#define EPT_PHYS_1GB_PAGE(_p) ((_p & EPT_PAGE_MASK_1G) << 30)
+#define EPT_PHYS_4KB_PAGE(_p) ((_p & EPT_PAGE_MASK_4K) << 12)
typedef union {
u64 val;
@@ -49,7 +61,9 @@ typedef union {
u64 mt:3; /* Memory type: 0 Uncacheable 6 Writeback */
u64 pgwl:3; /* Pagewalk length */
u64 en_ad:1; /* Enable accessed/dirty flags for EPT structures */
- u64 res:5; /* reserved */
+ u64 en_ssr:1; /* Setting this control to 1 enables enforcement of
+ access rights for supervisor shadow-stack pages */
+ u64 res:4; /* reserved */
u64 pml4:52; /* pml4 physical base, only bits N-1:12 are valid
* where N is the physical address width of the
* logical processor */
@@ -65,10 +79,12 @@ typedef union {
u64 x:1; /* Execute access */
u64 res:5; /* Reserved */
u64 accessed:1; /* Depends on Bit 6 in EPTP. Currently not set */
- u64 ign:3; /* Ignored */
+ u64 ign:1; /* Ignored */
+ u64 mbe:1; /* Mode based execution */
+ u64 ign1:1;
u64 pdpt_base:40; /* Physical address of 4-KByte aligned EPT
* page-directory-pointer table referenced by this entry */
- u64 ign1:12; /* Ignored */
+ u64 ign2:12; /* Ignored */
} bits;
} ept_pml4e_t;
@@ -84,9 +100,14 @@ typedef union {
u64 is_page:1; /* Ignore */
u64 accessed:1; /* Accessed (If bit 6 set in EPTP) */
u64 dirty:1; /* Dirty (If bit 6 set in EPTP) */
- u64 ign1:2; /* Ignored */
+ u64 mbe:1;
+ u64 ign1:1; /* Ignored */
u64 res:18; /* Must be zero */
- u64 phys:22; /* Physical address of the 1 GiB page */
+ u64 phys:22; /* physicall address of PD */
+ u64 ign2:8;
+ u64 superv_ss:1; /* supervisor shadow stack */
+ u64 ign3:2; /* ignored */
+ u64 sup_ve:1; /* suppress #VE exception */
} pe;
struct {
@@ -95,10 +116,11 @@ typedef union {
u64 x:1; /* Execute */
u64 res:5; /* Reservd */
u64 accessed:1; /* Accessed by software (if Bit 6 in EPTP is set) */
- u64 ign:3; /* Ignored */
+ u64 ign:1; /* Ignored */
+ u64 mbe:1; /* mode based exec */
+ u64 ign1:1;
u64 pd_base:40; /* Page directory base */
- u64 ign1:11; /* Ignored */
- u64 sup_ve:1; /* Supress #VE */
+ u64 ign2:12; /* Ignored */
} te;
} ept_pdpte_t;
@@ -114,10 +136,13 @@ typedef union {
u64 is_page:1; /* Must be set to 1 */
u64 accessed:1; /* Region was accessed by software */
u64 dirty:1; /* Region was written to by software */
- u64 ign:2; /* Ignored */
- u64 res:18; /* Must be zero */
- u64 phys:22; /* Physical address of 2MiB page */
- u64 ign1:11; /* Ignored */
+ u64 mbe:1;
+ u64 ign:1; /* Ignored */
+ u64 res:9; /* Must be zero */
+ u64 phys:31; /* Physical address of 2MiB page */
+ u64 ign1:8; /* Ignored */
+ u64 superv_ss:1;
+ u64 ign2:2;
u64 sup_ve:1; /* Suppress #VE */
} pe;
@@ -128,7 +153,9 @@ typedef union {
u64 res:4; /* Reserved */
u64 is_page:1; /* Must be zero */
u64 accessed:1; /* Accessed by software (if bit 6 is set in EPTP) */
- u64 ign:3; /* Ignore */
+ u64 ign:1; /* Ignore */
+ u64 mbe:1;
+ u64 ign1:1;
u64 pt_base:40; /* Physical address of the page table */
u64 res1:12; /* Reserved */
} te;
@@ -146,9 +173,13 @@ typedef union {
u64 ign:1; /* Ignored */
u64 accessed:1; /* Accessed by software (if bit 6 in eptp set) */
u64 dirty:1; /* Written by software (if bit 6 in eptp set) */
- u64 ign1:2; /* Ignored */
+ u64 mbe:1;
+ u64 ign1:1; /* Ignored */
u64 phys:40; /* Physical address of 4 KiB page mapped */
- u64 ign2:11; /* Ignored */
+ u64 ign2:8; /* Ignored */
+ u64 superv_ss:1;
+ u64 subpage_w:1;
+ u64 ign3:1;
u64 sup_ve:1; /* Suppress #VE */
} pe;
} ept_pte_t;
diff --git a/arch/x86/cpu/common/include/vm/vmcs.h b/arch/x86/cpu/common/include/vm/vmcs.h
index c0026f47..801b1e9c 100644
--- a/arch/x86/cpu/common/include/vm/vmcs.h
+++ b/arch/x86/cpu/common/include/vm/vmcs.h
@@ -329,6 +329,8 @@ enum vmcs_field {
#define GUEST_ACTIVITY_ACTIVE 0
#define GUEST_ACTIVITY_HLT 1
+struct vcpu_hw_context;
+
extern void vmx_detect_capability(void);
extern struct vmcs* create_vmcs(void);
extern struct vmcs *current_vmcs(physical_addr_t *phys);
diff --git a/arch/x86/cpu/common/include/vm/vmx.h b/arch/x86/cpu/common/include/vm/vmx.h
index a47450df..2f71d1fb 100644
--- a/arch/x86/cpu/common/include/vm/vmx.h
+++ b/arch/x86/cpu/common/include/vm/vmx.h
@@ -156,12 +156,21 @@ extern u64 vmx_ept_vpid_cap;
(vmx_ept_vpid_cap & VMX_EPT_MEMORY_TYPE_WB)
#define cpu_has_vmx_ept_2MB \
(vmx_ept_vpid_cap & VMX_EPT_SUPERPAGE_2MB)
+#define cpu_has_vmx_invept \
+ (vmx_ept_vpid_cap & VMX_EPT_INVEPT_INSTRUCTION)
#define cpu_has_vmx_ept_invept_single_context \
(vmx_ept_vpid_cap & VMX_EPT_INVEPT_SINGLE_CONTEXT)
+#define cpu_has_vmx_ept_invept_all_context \
+ (vmx_ept_vpid_cap & VMX_EPT_INVEPT_ALL_CONTEXT)
#define INVEPT_SINGLE_CONTEXT 1
#define INVEPT_ALL_CONTEXT 2
+struct invept_desc {
+ u64 eptp;
+ u64 reserved;
+} __attribute__ ((packed));
+
#define cpu_has_vmx_vpid_invvpid_individual_addr \
(vmx_ept_vpid_cap & VMX_VPID_INVVPID_INDIVIDUAL_ADDR)
#define cpu_has_vmx_vpid_invvpid_single_context \
@@ -403,6 +412,9 @@ static inline int __vmxon(u64 addr)
#define EPT_PAGETABLE_ENTRIES 512
+struct vcpu_hw_context;
+struct cpuinfo_x86;
+
extern int __init intel_init(struct cpuinfo_x86 *cpuinfo);
extern int intel_setup_vm_control(struct vcpu_hw_context *context);
diff --git a/arch/x86/cpu/common/vm/vtx/ept.c b/arch/x86/cpu/common/vm/vtx/ept.c
index 66e64911..7531a0a0 100644
--- a/arch/x86/cpu/common/vm/vtx/ept.c
+++ b/arch/x86/cpu/common/vm/vtx/ept.c
@@ -38,33 +38,25 @@
static inline u32 ept_pml4_index(physical_addr_t gphys)
{
- if (gphys & (PHYS_ADDR_BIT_MASK))
- return ((u32)-1);
-
+ gphys &= PHYS_ADDR_BIT_MASK;
return ((gphys >> 39) & 0x1fful);
}
static inline u32 ept_pdpt_index(physical_addr_t gphys)
{
- if (gphys & (PHYS_ADDR_BIT_MASK))
- return ((u32)-1);
-
+ gphys &= PHYS_ADDR_BIT_MASK;
return ((gphys >> 30) & 0x1fful);
}
static inline u32 ept_pd_index(physical_addr_t gphys)
{
- if (gphys & (PHYS_ADDR_BIT_MASK))
- return ((u32)-1);
-
+ gphys &= PHYS_ADDR_BIT_MASK;
return ((gphys >> 21) & 0x1fful);
}
static inline u32 ept_pt_index(physical_addr_t gphys)
{
- if (gphys & (PHYS_ADDR_BIT_MASK))
- return ((u32)-1);
-
+ gphys &= PHYS_ADDR_BIT_MASK;
return ((gphys >> 12) & 0x1fful);
}
@@ -84,86 +76,139 @@ int ept_create_pte(struct vcpu_hw_context *context,
physical_addr_t phys;
virtual_addr_t virt;
- if (pml4_index == -1 || pdpt_index == -1
- || pd_index == -1 || pt_index == -1) {
- VM_LOG(LVL_ERR,
- "Page table index calculation failed. (gphys: 0x%lx)\n",
- gphys);
- return VMM_EFAIL;
- }
+ VM_LOG(LVL_DEBUG, "pml4: 0x%x pdpt: 0x%x pd: 0x%x pt: 0x%x\n",
+ pml4_index, pdpt_index, pd_index, pt_index);
pml4e = (ept_pml4e_t *)(&pml4[pml4_index]);
+ pml4e->val = 0;
pml4e->val &= EPT_PROT_MASK;
- pml4e->val |= pg_prot;
+ pml4e->val |= 0x3;
virt = get_free_page_for_pagemap(context, &phys);
if (!virt) {
VM_LOG(LVL_ERR, "System is out of guest page table memory\n");
return VMM_ENOMEM;
}
- pml4e->bits.pdpt_base = phys;
+ memset((void *)virt, 0, PAGE_SIZE);
+ pml4e->bits.pdpt_base = EPT_PHYS_4KB_PFN(phys);
+ VM_LOG(LVL_DEBUG, "%s: PML4E: 0x%016lx\n", __func__, pml4e->val);
+ phys = 0;
pdpte = (ept_pdpte_t *)(&((u64 *)virt)[pdpt_index]);
+ pdpte->val = 0;
pdpte->val &= EPT_PROT_MASK;
- pdpte->val |= pg_prot;
+ pdpte->val |= 0x3;
virt = get_free_page_for_pagemap(context, &phys);
if (!virt) {
VM_LOG(LVL_ERR, "System is out of guest page table memory\n");
return VMM_ENOMEM;
}
if (pg_size == EPT_PAGE_SIZE_1G) {
- pdpte->pe.phys = hphys;
+ pdpte->pe.phys = EPT_PHYS_1GB_PFN(hphys);
pdpte->
pe.mt = 6; /* write-back memory type */
pdpte->pe.ign_pat = 1; /* ignore PAT type */
pdpte->pe.is_page = 1;
goto _done;
} else {
- pdpte->te.pd_base = phys;
+ pdpte->te.pd_base = EPT_PHYS_4KB_PFN(phys);
}
+ VM_LOG(LVL_DEBUG, "%s: PDPTE: 0x%016lx\n", __func__, pdpte->val);
+ phys = 0;
pde = (ept_pde_t *)(&((u64 *)virt)[pd_index]);
+ pde->val = 0;
pde->val &= EPT_PROT_MASK;
- pde->val |= pg_prot;
+ pde->val |= 0x3;
virt = get_free_page_for_pagemap(context, &phys);
if (!virt) {
VM_LOG(LVL_ERR, "System is out of guest page table memory\n");
return VMM_ENOMEM;
}
if (pg_size == EPT_PAGE_SIZE_2M) {
- pde->pe.phys = hphys;
+ pde->pe.phys = EPT_PHYS_2MB_PFN(hphys);
pde->
pe.mt = 6;
pde->pe.ign_pat = 1;
pde->pe.is_page = 1;
goto _done;
} else {
- pde->te.pt_base = phys;
+ pde->te.pt_base = EPT_PHYS_4KB_PFN(phys);
}
+ VM_LOG(LVL_DEBUG, "%s: PDE: 0x%016lx\n", __func__, pde->val);
pte = (ept_pte_t *)(&((u64 *)virt)[pt_index]);
+ pte->val = 0;
pte->val &= EPT_PROT_MASK;
pte->val |= pg_prot;
- pte->pe.phys = hphys;
+ pte->
pe.mt = 6;
+ pte->pe.phys = EPT_PHYS_4KB_PFN(hphys);
+ VM_LOG(LVL_DEBUG, "%s: PTE: 0x%016lx\n", __func__, pte->val);
_done:
return VMM_OK;
}
+static inline void
+invalidate_ept (int type, struct invept_desc *desc)
+{
+ /* Specifically not using exception table here.
+ * if feature is not present, it will unnecessary
+ * cause context switch. More expensive */
+ if (likely(cpu_has_vmx_invept)) {
+ /* most modern CPUs will have this */
+ if (unlikely(type == INVEPT_ALL_CONTEXT
+ && !cpu_has_vmx_ept_invept_all_context)) {
+ VM_LOG(LVL_INFO, "EPT all context flush not supported\n");
+ return;
+ }
+ if (unlikely(type == INVEPT_SINGLE_CONTEXT
+ && !cpu_has_vmx_ept_invept_single_context)) {
+ VM_LOG(LVL_INFO, "EPT single context flush not supported\n");
+ return;
+ }
+ asm volatile("invept (%0), %1\n\t"
+ ::"D"(type), "S"(desc)
+ :"memory", "cc");
+ } else {
+ VM_LOG(LVL_INFO, "INVEPT instruction is not supported by CPU\n");
+ }
+}
+
int setup_ept(struct vcpu_hw_context *context)
{
+ struct invept_desc id;
physical_addr_t pml4_phys;
eptp_t *eptp = (eptp_t *)&context->eptp;
virtual_addr_t pml4 = get_free_page_for_pagemap(context, &pml4_phys);
+ VM_LOG(LVL_INFO, "%s: PML4 vaddr: 0x%016lx paddr: 0x%016lx\n",
+ __func__, pml4, pml4_phys);
+
if (!pml4) {
VM_LOG(LVL_ERR, "%s: Failed to allocate EPT page\n", __func__);
return VMM_ENOMEM;
+
}
- eptp->
bits.mt = 6; /* Write back */
+ /* most of the reserved bits want zeros */
+ memset((void *)pml4, 0, PAGE_SIZE);
+
+ eptp->val = 0;
+ eptp->
bits.mt = (vmx_ept_vpid_cap & (0x01UL << 8) ? 0 /* UC */
+ : (vmx_ept_vpid_cap & (0x1UL << 14)) ? 6 /* WB */
+ : 6);
+
eptp->bits.pgwl = 3; /* 4 page levels */
eptp->bits.en_ad = 0;
- eptp->bits.pml4 = pml4_phys;
+ eptp->bits.pml4 = EPT_PHYS_4KB_PFN(pml4_phys);
+
+ VM_LOG(LVL_DEBUG, "%s: EPTP: 0x%16lx (0x%16lx)\n", __func__, eptp->val, context->eptp);
context->n_cr3 = pml4;
+ ept_create_pte(context, 0xFFF0ULL, 0, 4096, 0);
+
+ VM_LOG(LVL_DEBUG, "Invalidating EPT\n");
+
+ id.eptp = eptp->val;
+ invalidate_ept(INVEPT_SINGLE_CONTEXT, &id);
return VMM_OK;
}
--
2.27.0