Port the machine-check architecture device to the extension framework. From: Philip Soltero --- palacios/src/devices/Kconfig | 8 palacios/src/devices/Makefile | 2 palacios/src/devices/mcheck.c | 560 ---------------------------------- palacios/src/extensions/Kconfig | 10 - palacios/src/extensions/Makefile | 2 palacios/src/extensions/ext_mcheck.c | 558 ++++++++++++++++++++++++++++++++++ 6 files changed, 568 insertions(+), 572 deletions(-) delete mode 100644 palacios/src/devices/mcheck.c create mode 100644 palacios/src/extensions/ext_mcheck.c diff --git a/palacios/src/devices/Kconfig b/palacios/src/devices/Kconfig index d3a41f1..575223b 100644 --- a/palacios/src/devices/Kconfig +++ b/palacios/src/devices/Kconfig @@ -458,13 +458,5 @@ config CHAR_STREAM help Include Stream based character device frontend - -config MCHECK - bool "Machine Check architecture" - default n - depends on EXPERIMENTAL - help - Include Machine Check injection architecture - endmenu diff --git a/palacios/src/devices/Makefile b/palacios/src/devices/Makefile index f3a553e..0ab7f7f 100644 --- a/palacios/src/devices/Makefile +++ b/palacios/src/devices/Makefile @@ -42,8 +42,6 @@ obj-$(V3_CONFIG_PASSTHROUGH_PCI) += pci_passthrough.o obj-$(V3_CONFIG_SYMMOD) += lnx_virtio_symmod.o obj-$(V3_CONFIG_CHAR_STREAM) += char_stream.o -obj-$(V3_CONFIG_MCHECK) += mcheck.o - obj-$(V3_CONFIG_VGA) += vga.o obj-$(V3_CONFIG_PCI_FRONT) += pci_front.o diff --git a/palacios/src/devices/mcheck.c b/palacios/src/devices/mcheck.c deleted file mode 100644 index ec93d7f..0000000 --- a/palacios/src/devices/mcheck.c +++ /dev/null @@ -1,560 +0,0 @@ -/* - * This file is part of the Palacios Virtual Machine Monitor developed - * by the V3VEE Project with funding from the United States National - * Science Foundation and the Department of Energy. - * - * The V3VEE Project is a joint project between Northwestern University - * and the University of New Mexico. You can find out more at - * http://www.v3vee.org - * - * Copyright (c) 2008, The V3VEE Project - * All rights reserved. - * - * Copyright (c) 2008, Philip Soltero - * Copyright (c) 2008, The V3VEE Project - * All rights reserved. - * - * Author: Philip Soltero - * - * This is free software. You are permitted to use, - * redistribute, and modify it as specified in the file "V3VEE_LICENSE". - */ - -/** - * @file Virtualized machine-check architecture. - * - * @author Philip Soltero - */ - -#include -#include -#include -#include -#include -#include -#include - -#ifndef V3_CONFIG_DEBUG_MCHECK -#undef PrintDebug -#define PrintDebug(fmt, args...) -#endif - -#define CPUID_0000_0001 0x00000001 -#define CPUID_8000_0001 0x80000001 - -// 6 error reporting banks. This may be configurable in the future. -#define MC_REG_BANKS 6 -#define MCE_INTERRUPT 18 - -#define MSG_PRE "MCHECK: " - -#define MCG_CAP 0x0179 -#define MCG_STAT 0x017A -#define MCG_CTRL 0x017B - - - -/* I have no idea what Intel was thinking (or maybe they just weren't) - * but the MCi registers are completely non-standard across Intel's platforms and are a total mess. - * Any derivative of the pentium-M (i.e. all Core CPU lines) completely disregard the - * architectural standard that Intel itself created... - * For these CPUs: the MC4 MSRs switch locations with the MC3s, - * also every MCi below MC3 (including MC4) does not have a MCi_MISC MSR. - * - * So for now, screw it, we'll use AMD's standard - */ - -/* AMD MC Banks: - Bank 0 : Data Cache. - Bank 1 : Instruction Cache. - Bank 2 : Bus Unit. - Bank 3 : Load Store Unit. - Bank 4 : Northbridge and DRAM. -*/ - -static const uint32_t amd_mci_bases[] = {0x0400, 0x0404, 0x0408, 0x040c, 0x0410, 0x0414}; -static const uint32_t pentium_6_mci_bases[] = {0x0400, 0x0404, 0x0408, 0x040c, 0x0410, 0x0414}; -static const uint32_t pentium_m_mci_bases[] = {0x0400, 0x0404, 0x0408, 0x0410, 0x040c, 0x0414}; -static const uint32_t ia32_mci_bases[] = { 0x0400, 0x0404, 0x0408, 0x040c, - 0x0410, 0x0414, 0x0418, 0x041c, - 0x0420, 0x0424, 0x0428, 0x042c, - 0x0430, 0x0434, 0x0438, 0x043c, - 0x0440, 0x0444, 0x0448, 0x044c, - 0x0450, 0x0454 }; -#define MCi_MASK 0xfffffffc -#define MCi_CTRL 0x00 -#define MCi_STAT 0x01 -#define MCi_ADDR 0x02 -#define MCi_MISC 0x03 - - - - -/** - * MCA status low and high registers, MC4_STAT, MSR0000_0411. - */ -struct mc4_stat_msr { - union { - uint64_t value; - struct { - uint_t syndrome : 8; - uint_t reserved : 3; - uint_t error_code_ext : 5; - uint_t error_code : 16; - uint_t err_cpu : 4; - uint_t ltd_link : 4; - uint_t scrub : 1; - uint_t sublink : 1; - uint_t mca_stat_sub_cache : 2; - uint_t reserved_01 : 1; - uint_t uecc : 1; - uint_t cecc : 1; - uint_t syndrome2 : 8; - uint_t reserved_02 : 1; - uint_t err_cpu_val : 1; - uint_t pcc : 1; - uint_t addr_v : 1; - uint_t misc_v : 1; - uint_t en : 1; - uint_t uc : 1; - uint_t over : 1; - uint_t val : 1; - }__attribute__((packed)); - }__attribute__((packed)); -} __attribute__((packed)); - -/** - * MCA address low and high registers, MC4_ADDR, MSR0000_0412. - */ -struct mc4_addr_msr { - union { - uint64_t value; - - struct { - uint64_t addr32 : 36; - uint32_t reserved : 28; - } __attribute__((packed)); - - uint64_t addr64; - } __attribute__((packed)); -} __attribute__((packed)); - -/** - * Global machine-check capabilities register, MCG_CAP. - */ -struct mcg_cap_msr { - union { - uint64_t value; - struct { - uint32_t count : 8; - uint32_t mcg_ctl_p : 1; // CTRL Present - uint64_t reserved : 55; - } __attribute__((packed)); - } __attribute__((packed)); -} __attribute__((packed)); - -/** - * Global machine-check status register, MCG_STAT. - */ -struct mcg_stat_msr { - union { - uint64_t value; - struct { - uint32_t ripv : 1; - uint32_t eipv : 1; - uint32_t mcip : 1; // Machine-check in progress. - uint64_t reserved : 61; - } __attribute__((packed)); - } __attribute__((packed)); -} __attribute__((packed)); - -/** - * Global machine-check control register, MCG_CTRL. - */ -struct mcg_ctl_msr { - union { - uint64_t value; - struct { - uint32_t dce : 1; // Data cache register bank enable - uint32_t ice : 1; // Instruction cache register bank enable - uint32_t bue : 1; // Bus unit register bank enable - uint32_t lse : 1; // Load-store register bank enable - uint32_t nbe : 1; // Northbridge register bank enable - uint32_t fre : 1; // Fixed issue reorder buffer register bank enable - uint64_t unused : 58; - } __attribute__((packed)); - } __attribute__((packed)); -} __attribute__((packed)); - -/** - * A temporary structure for unimplemented machine-check error reporting banks. - */ -struct mci_bank { - uint32_t base; - struct v3_msr ctl; - struct v3_msr stat; - struct v3_msr addr; - struct v3_msr misc; -}; - -struct mcheck_state { - struct mcg_cap_msr mcg_cap; - struct mcg_stat_msr mcg_stat; - struct mcg_ctl_msr mcg_ctl; - - /* Note that these are in logical order not MSR order */ - /* So MC4 is always at mci_regs[4] even if the MSR is before MC3's */ - struct mci_bank mci_regs[MC_REG_BANKS]; -}; - - -/** - * Handles a guest read of cpuid function 0000_0001 and 8000_0001. - * All bits are passthrough except for bit 14, the MCA available bit, and bit 7, the MCE available - * bit. - * - * @b The virtual MCA only uses two bits in the entire 256 bit "return value". If other VMM - * subsystems or devices require the virtualization of other return value bits, it is suggested that - * this hook handler be moved to a common source file where all subsystems and devices can - * virtualize the bits they need to. - */ -static int cpuid_hook_handler(struct guest_info * const info, const uint32_t cpuid, - uint32_t * const eax, uint32_t * const ebx, - uint32_t * const ecx, uint32_t * const edx, - void * const private_data) { - - // Most bits are passthrough. - v3_cpuid(cpuid, eax, ebx, ecx, edx); - - // Bit 7, MCE availability - // Bit 14, MCA availability - *edx |= 0x00004080; - - return 0; -} - -static inline -void init_state(struct mcheck_state * const state) { - int i = 0; - - memset(state, 0, sizeof(struct mcheck_state)); - - // Set the initial MCI reg base values to the current architecture - for (i = 0; i < MC_REG_BANKS; i++) { - state->mci_regs[i].base = amd_mci_bases[i]; - } -} - -/** - * Handles guest writes to MCG MSRs. - */ -static -int mcg_write_handler(struct guest_info * core, uint32_t msr, struct v3_msr src, void * priv_data) { - struct mcheck_state * state = (struct mcheck_state *)priv_data; - - switch (msr) { - case MCG_CAP: - PrintDebug(MSG_PRE "Ignoring write to MCG_CAP MSR.\n"); - break; - - case MCG_STAT: - state->mcg_stat.value = 0; - break; - - case MCG_CTRL: - if (!state->mcg_cap.mcg_ctl_p) { - PrintDebug(MSG_PRE "Ignoring write to control MSR '0x%x'. Control MSRs not supported.\n", msr); - break; - } - - // The upper 58 bits are unused and read-only. - state->mcg_ctl.value &= ~0x3f; - state->mcg_ctl.value |= src.value & 0x3f; - - break; - - default: - PrintError(MSG_PRE "Reading from invalid MSR: %x\n", msr); - return -1; - } - - return 0; -} - - -/** - * Handles guest reads to MCG MSRs. - */ -static -int mcg_read_handler(struct guest_info * core, uint32_t msr, struct v3_msr * dst, void * priv_data) { - struct mcheck_state * state = (struct mcheck_state *)priv_data; - - switch(msr) { - case MCG_CAP: - dst->value = state->mcg_cap.value; - break; - - case MCG_STAT: - dst->value = state->mcg_stat.value; - break; - - case MCG_CTRL: - if (!state->mcg_cap.mcg_ctl_p) { - PrintDebug(MSG_PRE "Ignoring read of control MSR '0x%x'. Control MSRs not supported.\n", msr); - break; - } - - dst->value = state->mcg_ctl.value; - break; - - default: - PrintError(MSG_PRE "Reading from invalid MSR: %x\n", msr); - return -1; - } - - return 0; -} - -static struct mci_bank * get_mci_reg(struct mcheck_state * state, uint32_t msr) { - int i = 0; - - for (i = 0; i < MC_REG_BANKS; i++) { - if (state->mci_regs[i].base == (msr & MCi_MASK)) { - return &(state->mci_regs[i]); - } - } - - return NULL; -} - - -/** - * Handles guest reads to MCi MSRs. - */ -static -int mci_read_handler(struct guest_info * const core, - const uint32_t msr, - struct v3_msr * const dst, - void * const priv_data) { - struct mcheck_state * const state = (struct mcheck_state *)priv_data; - struct mci_bank * mci = get_mci_reg(state, msr); - - PrintDebug(MSG_PRE "Reading value '0x%llx' for MSR '0x%x'.\n", dst->value, msr); - - if (mci == NULL) { - PrintError(MSG_PRE " MSR read for invalid MCI register 0x%x\n", msr); - return -1; - } - - switch (msr & ~MCi_MASK) { - case MCi_CTRL: - if (!state->mcg_cap.mcg_ctl_p) { - PrintDebug(MSG_PRE "Ignoring read of control MSR '0x%x'. Control MSRs not supported.\n", msr); - break; - } - - dst->value = mci->ctl.value; - break; - - case MCi_STAT: - dst->value = mci->stat.value; - break; - - case MCi_ADDR: - dst->value = mci->addr.value; - break; - - case MCi_MISC: - dst->value = mci->misc.value; - break; - - default: - PrintError(MSG_PRE "Ignoring read of unhooked MSR '0x%x'. This is a bug.\n", msr); - break; - } - - return 0; -} - -/** - * Handles guest writes to MCi MSRs. - */ -static -int mci_write_handler(struct guest_info * const core, - const uint_t msr, - const struct v3_msr src, - void * const priv_data) { - struct mcheck_state * const state = (struct mcheck_state *)priv_data; - struct mci_bank * mci = get_mci_reg(state, msr); - - PrintDebug(MSG_PRE "Writing value '0x%llx' for MSR '0x%x'.\n", src.value, msr); - - switch (msr & ~MCi_MASK) { - case MCi_CTRL: - if (!state->mcg_cap.mcg_ctl_p) { - PrintDebug(MSG_PRE "Ignoring read of control MSR '0x%x'. Control MSRs not supported.\n", msr); - break; - } - - mci->ctl.value = src.value; - break; - - case MCi_STAT: - if (src.value != 0) { - // Should be a GPF. - PrintError(MSG_PRE "Ignoring write of illegal value '0x%llx'.\n", src.value); - return -1; - } - - mci->stat.value = 0; - break; - - case MCi_ADDR: - mci->addr.value = src.value; - break; - - case MCi_MISC: - V3_Print(MSG_PRE "Ignoring write to read only miscellaneous MSR '0x%x'.\n", msr); - break; - - default: - PrintError(MSG_PRE "Ignoring write of unhooked MSR '0x%x'. This is a bug.\n", msr); - break; - } - - return 0; -} - - -/** - * Hook CPUIDs - * CPUID functions 0000_0001 and 8000_0001 are hooked to signal MC availability - * - * @return 0 for success and -1 for failure. - */ -static inline -int hook_cpuids(struct v3_vm_info * const vm, - struct mcheck_state * const state) { - int ret = 0; - - ret = v3_hook_cpuid(vm, CPUID_0000_0001, cpuid_hook_handler, state); - - if (ret == -1) { - PrintError(MSG_PRE "Failed to hook CPUID function 0000_0001.\n"); - return -1; - } - - ret = v3_hook_cpuid(vm, CPUID_8000_0001, cpuid_hook_handler, state); - - if (ret == -1) { - PrintError(MSG_PRE "Failed to hook CPUID function 8000_0001.\n"); - v3_unhook_cpuid(vm, CPUID_0000_0001); - return -1; - } - - return 0; -} - - -static int mcheck_free(struct mcheck_state * state) { - V3_Free(state); - return 0; -} - -static struct v3_device_ops dev_ops = { - .free = (int (*)(void *))mcheck_free, -}; - - - - -static int mcheck_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { - struct mcheck_state * state = NULL; - // char * dev_id = v3_cfg_val(cfg, "ID"); - char * dev_id = "MCHECK"; // we hardcode the device ID for now so we can always find it for #MC insertion - int ret = 0; - int i = 0; - - state = (struct mcheck_state *)V3_Malloc(sizeof(struct mcheck_state)); - - if (state == NULL) { - PrintError(MSG_PRE "Failed to allocate machine-check architecture state.\n"); - return -1; - } - - struct vm_device * dev = v3_add_device(vm, dev_id, &dev_ops, state); - - if (dev == NULL) { - PrintError("Could not attach device %s\n", dev_id); - V3_Free(state); - return -1; - } - - init_state(state); - - state->mcg_cap.count = MC_REG_BANKS; - - ret |= hook_cpuids(vm, state); - - /* Hook the MSRs */ - ret |= v3_dev_hook_msr(dev, MCG_CAP, mcg_read_handler, mcg_write_handler); - ret |= v3_dev_hook_msr(dev, MCG_STAT, mcg_read_handler, mcg_write_handler); - ret |= v3_dev_hook_msr(dev, MCG_CTRL, mcg_read_handler, mcg_write_handler); - - for (i = 0; i < MC_REG_BANKS; i++) { - ret |= v3_dev_hook_msr(dev, state->mci_regs[i].base, mci_read_handler, mci_write_handler); - ret |= v3_dev_hook_msr(dev, state->mci_regs[i].base + 1, mci_read_handler, mci_write_handler); - ret |= v3_dev_hook_msr(dev, state->mci_regs[i].base + 2, mci_read_handler, mci_write_handler); - ret |= v3_dev_hook_msr(dev, state->mci_regs[i].base + 3, mci_read_handler, mci_write_handler); - } - - if (ret == -1) { - PrintError(MSG_PRE "Error hooking Device resources\n"); - v3_remove_device(dev); - return -1; - } - - return 0; -} - -int v3_mcheck_inject_nb_mce(struct v3_vm_info * const vm, const uint32_t cpu, - const struct mc4_stat_msr stat, - const struct mc4_addr_msr addr) { - struct vm_device * dev = v3_find_dev(vm, "MCHECK"); - struct mcheck_state * state = dev->private_data; - int ret; - - // For now only MCE injection on cpu 0 is supported. - if (cpu != 0) { - PrintError(MSG_PRE "Injecting MCE on cpu %u not supported.\n", cpu); - return -1; - } - - - // Is the Northbridge bank enabled? - if (state->mcg_ctl.nbe != 1) { - PrintDebug(MSG_PRE "Northbridge register bank disabled. Ignoring Northbridge MCE.\n"); - return 0; - } - - state->mci_regs[4].stat.value = stat.value; - state->mci_regs[4].addr.value = addr.value; - - state->mcg_stat.value = 0; - state->mcg_stat.ripv = 1; - state->mcg_stat.mcip = 1; - - PrintDebug(MSG_PRE "Injecting NB MCE on core %u.\n", 0); - - // Raise on core 0. - ret = v3_raise_exception(&(vm->cores[0]), MCE_INTERRUPT); - - if (ret == -1) { - PrintError(MSG_PRE "Failed to raise MCE.\n"); - return -1; - } - - return 0; -} - -device_register("MCHECK", mcheck_init); diff --git a/palacios/src/extensions/Kconfig b/palacios/src/extensions/Kconfig index c7b7d69..158ed42 100644 --- a/palacios/src/extensions/Kconfig +++ b/palacios/src/extensions/Kconfig @@ -26,11 +26,17 @@ config EXT_MTRRS Provides a virtualized set of MTTR registers config EXT_MACH_CHECK - bool "Support Machine Check functionality" + bool "Support machine-check functionality" default n help - Provides a virtualized machine check architecture + Provides a virtualized machine-check architecture +config DEGUB_EXT_MACH_CHECK + bool "Debug machine-check functionality" + default n + depends on EXT_MACH_CHECK + help + Enable virtual machine-check architecture debugging config EXT_INSPECTOR bool "VM Inspector" diff --git a/palacios/src/extensions/Makefile b/palacios/src/extensions/Makefile index ab31a13..abf382a 100644 --- a/palacios/src/extensions/Makefile +++ b/palacios/src/extensions/Makefile @@ -3,3 +3,5 @@ obj-$(V3_CONFIG_EXT_MTRRS) += ext_mtrr.o obj-$(V3_CONFIG_EXT_VTSC) += ext_vtsc.o obj-$(V3_CONFIG_EXT_VTIME) += ext_vtime.o obj-$(V3_CONFIG_EXT_INSPECTOR) += ext_inspector.o +obj-$(V3_CONFIG_EXT_MACH_CHECK) += ext_mcheck.o + diff --git a/palacios/src/extensions/ext_mcheck.c b/palacios/src/extensions/ext_mcheck.c new file mode 100644 index 0000000..51605ae --- /dev/null +++ b/palacios/src/extensions/ext_mcheck.c @@ -0,0 +1,558 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Copyright (c) 2008, Philip Soltero + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Philip Soltero + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +/** + * @file Virtualized machine-check architecture. + * + * @author Philip Soltero + */ + +// TODO: CPUID field add failure. +// IF adding CPUID fields fails on the second hook there is no way to back out of the first add. + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef V3_CONFIG_DEGUB_EXT_MACH_CHECK +#undef PrintDebug +#define PrintDebug(fmt, args...) +#endif + +#define MCHECK "MCHECK" + +#define CPUID_0000_0001 0x00000001 +#define CPUID_8000_0001 0x80000001 + +// Bit 7 - MCE availability. Bit 14 - MCA availability. +#define CPUID_FIELDS 0x00004080 + +// 6 error reporting banks. This may be configurable in the future. +#define MC_REG_BANKS 6 +#define MCE_INTERRUPT 18 + +#define MSG_PRE MCHECK": " + +#define MCG_CAP 0x0179 +#define MCG_STAT 0x017A +#define MCG_CTRL 0x017B + + + +/* I have no idea what Intel was thinking (or maybe they just weren't) + * but the MCi registers are completely non-standard across Intel's platforms and are a total mess. + * Any derivative of the pentium-M (i.e. all Core CPU lines) completely disregard the + * architectural standard that Intel itself created... + * For these CPUs: the MC4 MSRs switch locations with the MC3s, + * also every MCi below MC3 (including MC4) does not have a MCi_MISC MSR. + * + * So for now, screw it, we'll use AMD's standard + */ + +/* AMD MC Banks: + Bank 0 : Data Cache. + Bank 1 : Instruction Cache. + Bank 2 : Bus Unit. + Bank 3 : Load Store Unit. + Bank 4 : Northbridge and DRAM. +*/ + +static const uint32_t amd_mci_bases[] = {0x0400, 0x0404, 0x0408, 0x040c, 0x0410, 0x0414}; +static const uint32_t pentium_6_mci_bases[] = {0x0400, 0x0404, 0x0408, 0x040c, 0x0410, 0x0414}; +static const uint32_t pentium_m_mci_bases[] = {0x0400, 0x0404, 0x0408, 0x0410, 0x040c, 0x0414}; +static const uint32_t ia32_mci_bases[] = { 0x0400, 0x0404, 0x0408, 0x040c, + 0x0410, 0x0414, 0x0418, 0x041c, + 0x0420, 0x0424, 0x0428, 0x042c, + 0x0430, 0x0434, 0x0438, 0x043c, + 0x0440, 0x0444, 0x0448, 0x044c, + 0x0450, 0x0454 }; +#define MCi_MASK 0xfffffffc +#define MCi_CTRL 0x00 +#define MCi_STAT 0x01 +#define MCi_ADDR 0x02 +#define MCi_MISC 0x03 + + + + +/** + * MCA status low and high registers, MC4_STAT, MSR0000_0411. + */ +struct mc4_stat_msr { + union { + uint64_t value; + struct { + uint_t syndrome : 8; + uint_t reserved : 3; + uint_t error_code_ext : 5; + uint_t error_code : 16; + uint_t err_cpu : 4; + uint_t ltd_link : 4; + uint_t scrub : 1; + uint_t sublink : 1; + uint_t mca_stat_sub_cache : 2; + uint_t reserved_01 : 1; + uint_t uecc : 1; + uint_t cecc : 1; + uint_t syndrome2 : 8; + uint_t reserved_02 : 1; + uint_t err_cpu_val : 1; + uint_t pcc : 1; + uint_t addr_v : 1; + uint_t misc_v : 1; + uint_t en : 1; + uint_t uc : 1; + uint_t over : 1; + uint_t val : 1; + }__attribute__((packed)); + }__attribute__((packed)); +} __attribute__((packed)); + +/** + * MCA address low and high registers, MC4_ADDR, MSR0000_0412. + */ +struct mc4_addr_msr { + union { + uint64_t value; + + struct { + uint64_t addr32 : 36; + uint32_t reserved : 28; + } __attribute__((packed)); + + uint64_t addr64; + } __attribute__((packed)); +} __attribute__((packed)); + +/** + * Global machine-check capabilities register, MCG_CAP. + */ +struct mcg_cap_msr { + union { + uint64_t value; + struct { + uint32_t count : 8; + uint32_t mcg_ctl_p : 1; // CTRL Present + uint64_t reserved : 55; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + +/** + * Global machine-check status register, MCG_STAT. + */ +struct mcg_stat_msr { + union { + uint64_t value; + struct { + uint32_t ripv : 1; + uint32_t eipv : 1; + uint32_t mcip : 1; // Machine-check in progress. + uint64_t reserved : 61; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + +/** + * Global machine-check control register, MCG_CTRL. + */ +struct mcg_ctl_msr { + union { + uint64_t value; + struct { + uint32_t dce : 1; // Data cache register bank enable + uint32_t ice : 1; // Instruction cache register bank enable + uint32_t bue : 1; // Bus unit register bank enable + uint32_t lse : 1; // Load-store register bank enable + uint32_t nbe : 1; // Northbridge register bank enable + uint32_t fre : 1; // Fixed issue reorder buffer register bank enable + uint64_t unused : 58; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + +/** + * A temporary structure for unimplemented machine-check error reporting banks. + */ +struct mci_bank { + uint32_t base; + struct v3_msr ctl; + struct v3_msr stat; + struct v3_msr addr; + struct v3_msr misc; +}; + +struct mcheck_state { + struct mcg_cap_msr mcg_cap; + struct mcg_stat_msr mcg_stat; + struct mcg_ctl_msr mcg_ctl; + + /* Note that these are in logical order not MSR order */ + /* So MC4 is always at mci_regs[4] even if the MSR is before MC3's */ + struct mci_bank mci_regs[MC_REG_BANKS]; +}; + + +static inline +void init_state(struct mcheck_state * const state) { + int i = 0; + + memset(state, 0, sizeof(struct mcheck_state)); + + // Set the initial MCI reg base values to the current architecture + for (i = 0; i < MC_REG_BANKS; i++) { + state->mci_regs[i].base = amd_mci_bases[i]; + } +} + +/** + * Handles guest writes to MCG MSRs. + */ +static +int mcg_write_handler(struct guest_info * core, uint32_t msr, struct v3_msr src, void * priv_data) { + struct mcheck_state * state = (struct mcheck_state *)priv_data; + + switch (msr) { + case MCG_CAP: + PrintDebug(MSG_PRE "Ignoring write to MCG_CAP MSR.\n"); + break; + + case MCG_STAT: + state->mcg_stat.value = 0; + break; + + case MCG_CTRL: + if (!state->mcg_cap.mcg_ctl_p) { + PrintDebug(MSG_PRE "Ignoring write to control MSR '0x%x'. Control MSRs not supported.\n", msr); + break; + } + + // The upper 58 bits are unused and read-only. + state->mcg_ctl.value &= ~0x3f; + state->mcg_ctl.value |= src.value & 0x3f; + + break; + + default: + PrintError(MSG_PRE "Reading from invalid MSR: %x\n", msr); + return -1; + } + + return 0; +} + + +/** + * Handles guest reads to MCG MSRs. + */ +static +int mcg_read_handler(struct guest_info * core, uint32_t msr, struct v3_msr * dst, void * priv_data) { + struct mcheck_state * state = (struct mcheck_state *)priv_data; + + switch(msr) { + case MCG_CAP: + dst->value = state->mcg_cap.value; + break; + + case MCG_STAT: + dst->value = state->mcg_stat.value; + break; + + case MCG_CTRL: + if (!state->mcg_cap.mcg_ctl_p) { + PrintDebug(MSG_PRE "Ignoring read of control MSR '0x%x'. Control MSRs not supported.\n", msr); + break; + } + + dst->value = state->mcg_ctl.value; + break; + + default: + PrintError(MSG_PRE "Reading from invalid MSR: %x\n", msr); + return -1; + } + + return 0; +} + +static struct mci_bank * get_mci_reg(struct mcheck_state * state, uint32_t msr) { + int i = 0; + + for (i = 0; i < MC_REG_BANKS; i++) { + if (state->mci_regs[i].base == (msr & MCi_MASK)) { + return &(state->mci_regs[i]); + } + } + + return NULL; +} + + +/** + * Handles guest reads to MCi MSRs. + */ +static +int mci_read_handler(struct guest_info * const core, + const uint32_t msr, + struct v3_msr * const dst, + void * const priv_data) { + struct mcheck_state * const state = (struct mcheck_state *)priv_data; + struct mci_bank * mci = get_mci_reg(state, msr); + + PrintDebug(MSG_PRE "Reading value '0x%llx' for MSR '0x%x'.\n", dst->value, msr); + + if (mci == NULL) { + PrintError(MSG_PRE " MSR read for invalid MCI register 0x%x\n", msr); + return -1; + } + + switch (msr & ~MCi_MASK) { + case MCi_CTRL: + if (!state->mcg_cap.mcg_ctl_p) { + PrintDebug(MSG_PRE "Ignoring read of control MSR '0x%x'. Control MSRs not supported.\n", msr); + break; + } + + dst->value = mci->ctl.value; + break; + + case MCi_STAT: + dst->value = mci->stat.value; + break; + + case MCi_ADDR: + dst->value = mci->addr.value; + break; + + case MCi_MISC: + dst->value = mci->misc.value; + break; + + default: + PrintError(MSG_PRE "Ignoring read of unhooked MSR '0x%x'. This is a bug.\n", msr); + break; + } + + return 0; +} + +/** + * Handles guest writes to MCi MSRs. + */ +static +int mci_write_handler(struct guest_info * const core, + const uint_t msr, + const struct v3_msr src, + void * const priv_data) { + struct mcheck_state * const state = (struct mcheck_state *)priv_data; + struct mci_bank * mci = get_mci_reg(state, msr); + + PrintDebug(MSG_PRE "Writing value '0x%llx' for MSR '0x%x'.\n", src.value, msr); + + switch (msr & ~MCi_MASK) { + case MCi_CTRL: + if (!state->mcg_cap.mcg_ctl_p) { + PrintDebug(MSG_PRE "Ignoring read of control MSR '0x%x'. Control MSRs not supported.\n", msr); + break; + } + + mci->ctl.value = src.value; + break; + + case MCi_STAT: + if (src.value != 0) { + // Should be a GPF. + PrintError(MSG_PRE "Ignoring write of illegal value '0x%llx'.\n", src.value); + return -1; + } + + mci->stat.value = 0; + break; + + case MCi_ADDR: + mci->addr.value = src.value; + break; + + case MCi_MISC: + V3_Print(MSG_PRE "Ignoring write to read only miscellaneous MSR '0x%x'.\n", msr); + break; + + default: + PrintError(MSG_PRE "Ignoring write of unhooked MSR '0x%x'. This is a bug.\n", msr); + break; + } + + return 0; +} + +/** + * Add machine-check-related CPUID fields for CPUID functions 0000_0001 and 8000_0001. + * + * @return 0 for success and -1 for failure. + */ +static inline +int add_cpuid_fields(struct v3_vm_info * const vm, + struct mcheck_state * const state) { + int ret = 0; + + ret = v3_cpuid_add_fields(vm, CPUID_0000_0001, + 0, 0, + 0, 0, + 0, 0, + CPUID_FIELDS, CPUID_FIELDS); + + if (ret == -1) { + PrintError(MSG_PRE "Failed to add CPUID fields for function 0000_0001.\n"); + return -1; + } + + // Add bit 7, MCE availability, and bit 14, MCE availability. + ret = v3_cpuid_add_fields(vm, CPUID_8000_0001, + 0, 0, + 0, 0, + 0, 0, + CPUID_FIELDS, CPUID_FIELDS); + + if (ret == -1) { + PrintError(MSG_PRE "Failed to add CPUID fileds for function 8000_0001.\n"); + return -1; + } + + return 0; +} + + +static int deinit_mcheck(struct v3_vm_info * vm, void * priv_data) { + struct mcheck_state * state = (struct mcheck_state *)v3_get_extension_state(vm, priv_data); + if (state == NULL) { + PrintError(MSG_PRE "Failed to get machine-check architecture extension state.\n"); + return -1; + } + + V3_Free(state); + return 0; +} + +static int init_mcheck(struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data) { + struct mcheck_state * state = NULL; + int ret = 0; + int i = 0; + + state = (struct mcheck_state *)V3_Malloc(sizeof(struct mcheck_state)); + + if (state == NULL) { + PrintError(MSG_PRE "Failed to allocate machine-check architecture state.\n"); + return -1; + } + + + init_state(state); + + state->mcg_cap.count = MC_REG_BANKS; + state->mcg_cap.mcg_ctl_p = 1; + + ret |= add_cpuid_fields(vm, state); + + /* Hook the MSRs */ + ret |= v3_hook_msr(vm, MCG_CAP, mcg_read_handler, mcg_write_handler, state); + ret |= v3_hook_msr(vm, MCG_STAT, mcg_read_handler, mcg_write_handler, state); + ret |= v3_hook_msr(vm, MCG_CTRL, mcg_read_handler, mcg_write_handler, state); + + for (i = 0; i < MC_REG_BANKS; i++) { + ret |= v3_hook_msr(vm, state->mci_regs[i].base, mci_read_handler, mci_write_handler, state); + ret |= v3_hook_msr(vm, state->mci_regs[i].base + 1, mci_read_handler, mci_write_handler, state); + ret |= v3_hook_msr(vm, state->mci_regs[i].base + 2, mci_read_handler, mci_write_handler, state); + ret |= v3_hook_msr(vm, state->mci_regs[i].base + 3, mci_read_handler, mci_write_handler, state); + } + + if (ret == -1) { + PrintError(MSG_PRE "Error hooking machine-check architecture resources.\n"); + V3_Free(state); + return -1; + } + + *priv_data = state; + + PrintDebug(MSG_PRE "Initialized machine-check architecture.\n"); + + return 0; +} + +int v3_mcheck_inject_nb_mce(struct v3_vm_info * const vm, const uint32_t cpu, + const struct mc4_stat_msr stat, + const struct mc4_addr_msr addr) { + struct mcheck_state * state = (struct mcheck_state *)v3_get_extension_state(vm, MCHECK); + int ret; + + if (state == NULL) { + PrintError(MSG_PRE "Machine-check architecture extension state not found.\n"); + return -1; + } + + // For now only MCE injection on cpu 0 is supported. + if (cpu != 0) { + PrintError(MSG_PRE "Injecting MCE on cpu %u not supported.\n", cpu); + return -1; + } + + // Is the Northbridge bank enabled? + if (state->mcg_ctl.nbe != 1) { + PrintDebug(MSG_PRE "Northbridge register bank disabled. Ignoring Northbridge MCE.\n"); + return 0; + } + + state->mci_regs[4].stat.value = stat.value; + state->mci_regs[4].addr.value = addr.value; + + state->mcg_stat.value = 0; + state->mcg_stat.ripv = 1; + state->mcg_stat.mcip = 1; + + PrintDebug(MSG_PRE "Injecting NB MCE on core %u.\n", 0); + + // Raise on core 0. + ret = v3_raise_exception(&(vm->cores[0]), MCE_INTERRUPT); + + if (ret == -1) { + PrintError(MSG_PRE "Failed to raise MCE.\n"); + return -1; + } + + return 0; +} + +static struct v3_extension_impl mcheck_impl = { + .name = MCHECK, + .init = init_mcheck, + .deinit = deinit_mcheck, + .core_init = NULL, + .core_deinit = NULL, + .on_entry = NULL, + .on_exit = NULL +}; + +register_extension(&mcheck_impl);