---
 *      MachFPInterrupt(status, cause, pc, frame)
:
 * fetch the instruction and emulate the instruction.
 */
	bgez		a1, 1f			# Check the branch delay bit.
	nop
/*
 * The instruction is in the branch delay slot.
 */
	b		2f
	lw		a0, 4(a2)		# a0 = coproc instruction
/*
 * This is not in the branch delay slot so calculate the resulting
 * PC (epc + 4) into v0 and continue to MachEmulateFP().
 */
1:
	lw		a0, 4(a2)		# a0 = coproc instruction
2:
	move		a2, a1
---
So address of the umimplemented instruction is always (PC+4)
regardless of the branch delay bit.
The offset value for non-BDslot case was changed from 0 to 4
in rev 1.171:
---
diff -u -p -r1.170 -r1.171
--- src/sys/arch/mips/mips/locore.S	2009/12/10 05:10:02	1.170
+++ src/sys/arch/mips/mips/locore.S	2009/12/14 00:46:06	1.171
:
 /*
  * This is not in the branch delay slot so calculate the resulting
  * PC (epc + 4) into v0 and continue to MachEmulateFP().
  */
 1:
-	lw	a0, 0(a2)			# a0 = coproc instruction
+	lw		a0, 4(a2)		# a0 = coproc instruction
 2:
---
Was this change intentional?
Does the umimplemented insn emulation in mips_emul.c work as expected?
Note "... so calculate the resulting PC (epc + 4) into v0 and continue
 to MachEmulateFP()" comment is just leftover that should have been
removed in rev 1.82:
http://cvsweb.NetBSD.org/bsdweb.cgi/src/sys/arch/mips/mips/locore.S#rev1.82
and there are more comments that should be updated.
(coproc insn -> umimplemented insn etc)
---
Izumi Tsutsui
--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-...@muc.de
>  /*
>   * This is not in the branch delay slot so calculate the resulting
>   * PC (epc + 4) into v0 and continue to MachEmulateFP().
>   */
>  1:
> -	lw	a0, 0(a2)			# a0 = coproc instruction
> +	lw		a0, 4(a2)		# a0 = coproc instruction
>  2:
> 
> ---
> 
> Was this change intentional?
> Does the umimplemented insn emulation in mips_emul.c work as expected?
This MachFPTrap() seems called only if T_FPE occurs in !SOFTFLOAT case
so I guess it will rarely happen, but reverting this seems to fix
"Illegal instruction" error during make install in pkgsrc/lang/pyphon2.6
on R5000 O2. (have not investigated what insn triggers it though)
I'll revert the above line in a few days if there is no objection.
Caused by swc1, and also reproducible by
src/regress/lib/libc/ieeefp/except/except.c in netbsd-5.
The attached patch also fixes PR 35326 (FPU siginfo),
36251 and 42887 (FPEmul cvt_d_s).
---
Izumi Tsutsui
Index: mips/fp.S
===================================================================
RCS file: /cvsroot/src/sys/arch/mips/mips/fp.S,v
retrieving revision 1.37
diff -u -p -r1.37 fp.S
--- mips/fp.S	14 Dec 2009 00:46:06 -0000	1.37
+++ mips/fp.S	27 Jan 2011 13:11:13 -0000
@@ -847,6 +847,7 @@ bcemul_branch:
 	jal	_C_LABEL(fuiword)
 
 	move	a0, v0
+	move	a3, a2			# save fpstat for bcemul_sigfpe()
 	REG_L	a1, CALLFRAME_FRAME(sp)
 	REG_L	a2, CALLFRAME_CAUSE(sp)
 
@@ -1813,6 +1814,8 @@ cvt_d_s:
 	beq	t2, zero, result_fs_d		# is FS zero?
 	jal	_C_LABEL(renorm_fs_s)
 	move	t8, zero
+	sll	t3, t2, 32 - 3			# convert S fraction to D
+	srl	t2, t2, 3
 	b	norm_d
 2:
 	addu	t1, t1, DEXP_BIAS - SEXP_BIAS	# bias exponent correctly
@@ -2762,6 +2765,7 @@ fpe_trap:
 	#nop
 	INT_S	a2, PCB_FPREGS+FRAME_FSR(v0)
 #endif
+	move	a3, a2				# fpustat
 	REG_L	a1, CALLFRAME_SIZ + 1*SZREG(sp)	# frame
 	REG_L	a2, CALLFRAME_SIZ + 2*SZREG(sp)	# cause
 	REG_L	ra, CALLFRAME_RA(sp)
@@ -5155,10 +5159,9 @@ STATIC_XLEAF(bcemul_sigill)
 	REG_S	a2, FRAME_CAUSE(a1)
 	REG_EPILOGUE
 
-	move	a2, a0				# code = instruction
-	move	a0, MIPS_CURLWP			# get current process
-	li	a1, SIGILL
-	j	_C_LABEL(fpemul_trapsignal)
+	move	a1, a0				# 2nd arg: instruction
+	move	a0, MIPS_CURLWP			# 1st arg: curlwp
+	j	_C_LABEL(mips_fpuillinst)
 END(fpemul_sigill)
 
 STATIC_LEAF(fpemul_sigfpe)
@@ -5169,10 +5172,9 @@ STATIC_LEAF(fpemul_sigfpe)
 	REG_S	a2, FRAME_CAUSE(a1)
 	REG_EPILOGUE
 
-	move	a2, a0				# code = instruction
-	move	a0, MIPS_CURLWP			# get current process
-	li	a1, SIGFPE
-	j	_C_LABEL(fpemul_trapsignal)
+	move	a1, a3				# 2nd arg: fpstat
+	move	a0, MIPS_CURLWP			# 1st arg: curlwp
+	j	_C_LABEL(mips_fpuexcept)
 END(fpemul_sigfpe)
 
 #ifdef SOFTFLOAT
@@ -5184,9 +5186,8 @@ STATIC_LEAF(bcemul_sigfpe)
 	REG_S	a2, FRAME_CAUSE(a1)
 	REG_EPILOGUE
 
-	move	a2, a0				# code = instruction
-	move	a0, MIPS_CURLWP			# get current process
-	li	a1, SIGFPE
-	j	_C_LABEL(fpemul_trapsignal)
+	move	a1, a3				# 2nd arg: fpstat
+	move	a0, MIPS_CURLWP			# 1st arg: current process
+	j	_C_LABEL(mips_fpuexcept)
 END(bcemul_sigfpe)
 #endif
Index: mips/locore.S
===================================================================
RCS file: /cvsroot/src/sys/arch/mips/mips/locore.S,v
retrieving revision 1.175
diff -u -p -r1.175 locore.S
--- mips/locore.S	26 Jan 2011 01:18:54 -0000	1.175
+++ mips/locore.S	27 Jan 2011 13:11:13 -0000
@@ -639,17 +639,11 @@ END(mips_cp0_tlb_page_mask_probe)
  *	Handle a floating point interrupt (r3k) or trap (r4k).
  *	the handlers are indentical, only the reporting mechanisms differ.
  *
- *	MachFPInterrupt(status, cause, pc, frame)
- *		unsigned status;
- *		unsigned cause;
- *		unsigned pc;
- *		int *frame;
+ *	MachFPInterrupt(uint32_t status, uint32_t cause, vaddr_t pc,
+ *	    struct frame *frame)
  *
- *	MachFPTrap(status, cause, pc, frame)
- *		unsigned status;
- *		unsigned cause;
- *		unsigned pc;
- *		int *frame;
+ *	MachFPTrap(uint32_t status, uint32_t cause, vaddr_t pc,
+ *	    struct frame *frame)
  *
  * Results:
  *	None.
@@ -676,13 +670,11 @@ XNESTED(MachFPTrap)
 	bgez		t2, 3f			# no, normal trap
 	nop
 /*
- * We got an unimplemented operation trap so
- * We received an unimplemented operation trap.
+ * We received an unimplemented operation trap so
+ * fetch the instruction and emulate the instruction.
  *
  * We check whether it's an unimplemented FP instruction here rather
  * than invoking MachEmulateInst(), since it is faster.
- *
- * fetch the instruction and emulate the instruction.
  */
 	bgez		a1, 1f			# Check the branch delay bit.
 	nop
@@ -692,13 +684,11 @@ XNESTED(MachFPTrap)
 	b		2f
 	lw		a0, 4(a2)		# a0 = coproc instruction
 /*
- * This is not in the branch delay slot so calculate the resulting
- * PC (epc + 4) into v0 and continue to MachEmulateFP().
+ * This is not in the branch delay slot.
  */
 1:
-	lw		a0, 4(a2)		# a0 = coproc instruction
+	lw		a0, 0(a2)		# a0 = coproc instruction
 2:
-	move		a2, a1
 
 /*
  * Check to see if the instruction to be emulated is a floating-point
@@ -718,9 +708,9 @@ XNESTED(MachFPTrap)
 	REG_S		a1, FRAME_CAUSE(a3)
 	REG_EPILOGUE
 
-	move		a1, a0				# code = instruction
+	move		a1, a0				# 2nd arg: instruction
 	jal		_C_LABEL(mips_fpuillinst)
-	move		a0, MIPS_CURLWP			# get current LWP
+	move		a0, MIPS_CURLWP			# 1st arg: curlwp
 
 	b		FPReturn
 	nop
@@ -737,9 +727,9 @@ XNESTED(MachFPTrap)
 	and		a0, t0, ~MIPS_FPU_EXCEPTION_BITS
 	ctc1		a0, MIPS_FPU_CSR
 
-	move		a1, t0			# FPU status
+	move		a1, t0			# 2nd arg: fpstat
 	jal		_C_LABEL(mips_fpuexcept)
-	move		a0, MIPS_CURLWP		# get current LWP
+	move		a0, MIPS_CURLWP		# 1st arg: curlwp
 
 	b		FPReturn
 	nop
@@ -748,8 +738,10 @@ XNESTED(MachFPTrap)
  * Finally, we can call MachEmulateFP() where a0 is the instruction to emulate.
  */
 4:
+						# 1st arg: a0 = instruction
+	move		a2, a1			# 3rd arg: cause
 	jal		_C_LABEL(MachEmulateFP)
-	move		a1, a3
+	move		a1, a3			# 2nd arg: frame
 
 /*
  * Turn off the floating point coprocessor and return.
Index: mips/mips_fputrap.c
===================================================================
RCS file: /cvsroot/src/sys/arch/mips/mips/mips_fputrap.c,v
retrieving revision 1.6
diff -u -p -r1.6 mips_fputrap.c
--- mips/mips_fputrap.c	14 Dec 2009 00:46:06 -0000	1.6
+++ mips/mips_fputrap.c	27 Jan 2011 13:11:13 -0000
@@ -34,33 +34,45 @@
 #include <mips/cpuregs.h>
 #include <mips/regnum.h>
 
-#ifndef SOFTFLOAT
-void mips_fpuexcept(struct lwp *, unsigned int);
-void mips_fpuillinst(struct lwp *, unsigned int, unsigned long);
-static int fpustat2sicode(unsigned int);
+/* #define FPE_DEBUG */
+
+void mips_fpuexcept(struct lwp *, uint32_t);
+void mips_fpuillinst(struct lwp *, uint32_t);
+static int fpustat2sicode(uint32_t);
 
 void
-mips_fpuexcept(struct lwp *l, unsigned int fpustat)
+mips_fpuexcept(struct lwp *l, uint32_t fpustat)
 {
 	ksiginfo_t ksi;
 
+#ifdef FPE_DEBUG
+	printf("%s(%x,%#"PRIxREGISTER")\n",
+	    __func__, fpustat, l->l_md.md_regs->f_regs[_R_PC]);
+#endif
+
 	KSI_INIT_TRAP(&ksi);
 	ksi.ksi_signo = SIGFPE;
 	ksi.ksi_code = fpustat2sicode(fpustat);
 	ksi.ksi_trap = fpustat;
+	ksi.ksi_addr = (void *)l->l_md.md_regs->f_regs[_R_PC];
 	(*l->l_proc->p_emul->e_trapsignal)(l, &ksi);
 }
 
 void
-mips_fpuillinst(struct lwp *l, unsigned int opcode, unsigned long vaddr)
+mips_fpuillinst(struct lwp *l, uint32_t opcode)
 {
 	ksiginfo_t ksi;
 
+#ifdef FPE_DEBUG
+	printf("%s(%x,%#"PRIxREGISTER")\n",
+	    __func__, opcode, l->l_md.md_regs->f_regs[_R_PC]);
+#endif
+
 	KSI_INIT_TRAP(&ksi);
 	ksi.ksi_signo = SIGILL;
 	ksi.ksi_code = ILL_ILLOPC;
 	ksi.ksi_trap = opcode;
-	ksi.ksi_addr = (void *)vaddr;
+	ksi.ksi_addr = (void *)l->l_md.md_regs->f_regs[_R_PC];
 	(*l->l_proc->p_emul->e_trapsignal)(l, &ksi);
 }
 
@@ -77,7 +89,7 @@ static struct {
 };
 
 static int
-fpustat2sicode(unsigned int fpustat)
+fpustat2sicode(uint32_t fpustat)
 {
 	int i;
 
@@ -86,23 +98,3 @@ fpustat2sicode(unsigned int fpustat)
 			return (fpecodes[i].code);
 	return (FPE_FLTINV);
 }
-#endif /* !SOFTFLOAT */
-
-void fpemul_trapsignal(struct lwp *, unsigned int, unsigned int);
-
-void
-fpemul_trapsignal(struct lwp *l, unsigned int sig, unsigned int code)
-{
-	ksiginfo_t ksi;
-
-#if DEBUG
-	printf("fpemul_trapsignal(%x,%x,%#"PRIxREGISTER")\n",
-	   sig, code, l->l_md.md_regs->f_regs[_R_PC]);
-#endif
-
-	KSI_INIT_TRAP(&ksi);
-	ksi.ksi_signo = sig;
-	ksi.ksi_code = 1; /* XXX */
-	ksi.ksi_trap = code;
-	(*l->l_proc->p_emul->e_trapsignal)(l, &ksi);
-}
> > > -	lw	a0, 0(a2)			# a0 = coproc instruction
> > > +	lw		a0, 4(a2)		# a0 = coproc instruction
>  :
> > I'll revert the above line in a few days if there is no objection.
> 
> The attached patch also fixes PR 35326 (FPU siginfo),
One more regression in fp.S:
Index: mips/fp.S
===================================================================
RCS file: /cvsroot/src/sys/arch/mips/mips/fp.S,v
retrieving revision 1.37
diff -u -p -r1.37 fp.S
--- mips/fp.S	14 Dec 2009 00:46:06 -0000	1.37
+++ mips/fp.S	29 Jan 2011 01:24:36 -0000
@@ -2529,6 +2532,8 @@ inexact_nobias_d:
 	PTR_L	v0, L_PCB(MIPS_CURLWP)		# get pcb of current process
 	#nop
 	INT_S	a2, PCB_FPREGS+FRAME_FSR(v0)
+#else
+	ctc1	a2, MIPS_FPU_CSR		# save exceptions
 #endif
 	b	done
 
Now all tests in src/tests/lib/libc/ieeefp are passed
on R4400 ews4800mips and R5000 sgimips.
As I noted in PR 35326, it looks FP underflow in mul.s and mul.d
instructions triggers MIPS_FPU_EXCEPTION_UNIMPL instead of
MIPS_FPU_EXCEPTION_UNDERFLOW, and that's the reason why
ieeefp tests are affected by MachEmulateFP() implementation
even on machines that have the real FP unit.
(at least on R4400/R5000/Rm5230)
---
Izumi Tsutsui