fix: any SSE instruction raises "invalid opcode" exception.
and add handling sse exception.
In SakuraVPS(public QEMU/KVM service),
CR4.OSFXSR and CR4.OSXMMEXCPT are removed after realmode().
Therefore, MMX/SSE instructions(e.g. MOVD) raises "invalid opcode" exception.
(I don't know this behavior is whether by CPU specification or by QEMU/KVM problem.)
SakuraVPS's cpuid is below:
cpu0: 2402MHz GenuineIntel Core i7/Xeon (cpuid: AX 0x206C1 DX 0x1F8BF3FF)
This problem is reproduced:
---
// This code can assemble Go's assembler (go tool 8a).
TEXT main·main+0(SB), $0
MOVSD $1.0, X0
RET
TEXT main·init+0(SB), $0
RET
----
Reference: /n/sources/patch/sse-on-kvm
Date: Sat Oct 18 18:38:16 CES 2014
Signed-off-by:
lu...@lufia.org
--- /sys/src/9/pc/devarch.c Sat Oct 18 18:36:21 2014
+++ /sys/src/9/pc/devarch.c Sat Oct 18 18:36:13 2014
@@ -40,6 +40,7 @@
enum {
CR4Osfxsr = 1 << 9,
+ CR4Oxmmex = 1 << 10,
};
enum { /* cpuid standard function codes */
@@ -805,7 +806,7 @@
* If machine check was enabled clear out any lingering status.
*/
if(m->cpuiddx & (Pge|Mce|Pse)){
- cr4 = 0;
+ cr4 = getcr4();
if(m->cpuiddx & Pse)
cr4 |= 0x10; /* page size extensions */
if(p = getconf("*nomce"))
@@ -848,7 +849,7 @@
if(m->cpuiddx & Fxsr){ /* have sse fp? */
fpsave = fpssesave;
fprestore = fpsserestore;
- putcr4(getcr4() | CR4Osfxsr);
+ putcr4(getcr4() | CR4Osfxsr|CR4Oxmmex);
} else {
fpsave = fpx87save;
fprestore = fpx87restore;
--- /sys/src/9/pc/fns.h Sat Oct 18 18:36:28 2014
+++ /sys/src/9/pc/fns.h Sat Oct 18 18:36:24 2014
@@ -93,6 +93,7 @@
void kbdenable(void);
void kbdinit(void);
#define kmapinval()
+void ldmxcsr(ulong);
void lgdt(ushort[3]);
void lidt(ushort[3]);
void links(void);
--- /sys/src/9/pc/io.h Sat Oct 18 18:36:38 2014
+++ /sys/src/9/pc/io.h Sat Oct 18 18:36:32 2014
@@ -13,6 +13,7 @@
VectorPF = 14, /* page fault */
Vector15 = 15, /* reserved */
VectorCERR = 16, /* coprocessor error */
+ VectorSIMD = 19, /* simd error */
VectorPIC = 32, /* external i8259 interrupts */
IrqCLOCK = 0,
--- /sys/src/9/pc/l.s Sat Oct 18 18:36:54 2014
+++ /sys/src/9/pc/l.s Sat Oct 18 18:36:44 2014
@@ -19,6 +19,7 @@
#define WBINVD BYTE $0x0F; BYTE $0x09
#define FXSAVE BYTE $0x0f; BYTE $0xae; BYTE $0x00 /* SSE FP save */
#define FXRSTOR BYTE $0x0f; BYTE $0xae; BYTE $0x08 /* SSE FP restore */
+#define LDMXCSR BYTE $0x0f; BYTE $0xae; BYTE $0x10 /* SSE MXCSR */
/*
* Macros for calculating offsets within the page directory base
@@ -833,6 +834,11 @@
MOVL p+0(FP), AX
FXRSTOR
WAIT
+ RET
+
+TEXT ldmxcsr(SB), $0 /* Load MXCSR */
+ LEAL mxcsr+0(FP), AX
+ LDMXCSR
RET
/*
--- /sys/src/9/pc/main.c Sat Oct 18 18:37:06 2014
+++ /sys/src/9/pc/main.c Sat Oct 18 18:36:59 2014
@@ -648,6 +648,8 @@
switch(up->fpstate){
case FPinit:
fpinit();
+ if(fpsave == fpssesave)
+ ldmxcsr(0);
up->fpstate = FPactive;
break;
case FPinactive:
@@ -674,6 +676,17 @@
}
/*
+ * SIMD error
+ */
+static void
+simderror(Ureg *ureg, void*)
+{
+ fpsave(&up->fpsave);
+ up->fpstate = FPinactive;
+ mathnote();
+}
+
+/*
* math coprocessor segment overrun
*/
static void
@@ -690,6 +703,7 @@
intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror");
trapenable(VectorCNA, mathemu, 0, "mathemu");
trapenable(VectorCSO, mathover, 0, "mathover");
+ trapenable(VectorSIMD, simderror, 0, "simderror");
}
/*
--- /sys/src/9/pc/realmode.c Sat Oct 18 18:37:14 2014
+++ /sys/src/9/pc/realmode.c Sat Oct 18 18:37:10 2014
@@ -28,7 +28,7 @@
realmode(Ureg *ureg)
{
int s;
- ulong cr3;
+ ulong cr3, cr4;
extern void realmode0(void); /* in l.s */
if(getconf("*norealmode"))
@@ -43,6 +43,7 @@
s = splhi();
m->pdb[PDX(0)] = m->pdb[PDX(KZERO)]; /* identity map low */
cr3 = getcr3();
+ cr4 = getcr4();
putcr3(PADDR(m->pdb));
if (arch)
arch->introff();
@@ -61,6 +62,7 @@
}
m->pdb[PDX(0)] = 0; /* remove low mapping */
putcr3(cr3);
+ putcr4(cr4);
splx(s);
*ureg = realmoderegs;
unlock(&rmlock);
--- /sys/src/9/pc/trap.c Sat Oct 18 18:37:27 2014
+++ /sys/src/9/pc/trap.c Sat Oct 18 18:37:19 2014
@@ -249,7 +249,7 @@
"coprocessor error",
"alignment check",
"machine check",
- "19 (reserved)",
+ "simd error",
"20 (reserved)",
"21 (reserved)",
"22 (reserved)",