cmd/compile: (amd64) optimize float32(round64(float64(x)))
Not a fix because there are other architectures
still to be done.
Updates #75463.
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 5ddcb84..c594673 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -1478,7 +1478,7 @@
}
case ssa.OpAMD64LoweredRound32F, ssa.OpAMD64LoweredRound64F:
// input is already rounded
- case ssa.OpAMD64ROUNDSD:
+ case ssa.OpAMD64ROUNDSD, ssa.OpAMD64ROUNDSS:
p := s.Prog(v.Op.Asm())
val := v.AuxInt
// 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
index 353d272..4bee8fd 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
@@ -124,6 +124,8 @@
(Ceil x) => (ROUNDSD [2] x)
(Trunc x) => (ROUNDSD [3] x)
+(CVTSD2SS (ROUNDSD [c] (CVTSS2SD x))) => (ROUNDSS [c] x)
+
(FMA x y z) => (VFMADD231SD z x y)
// Lowering extension
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
index 2fb4fdf..18ef7da 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
@@ -798,6 +798,7 @@
// ROUNDSD instruction is only guaraneteed to be available if GOAMD64>=v2.
// For GOAMD64<v2, any use must be preceded by a successful check of runtime.x86HasSSE41.
{name: "ROUNDSD", argLength: 1, reg: fp11, aux: "Int8", asm: "ROUNDSD"},
+ {name: "ROUNDSS", argLength: 1, reg: fp11, aux: "Int8", asm: "ROUNDSS"},
// See why we need those in issue #71204
{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 00d581e..34e0344 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -929,6 +929,7 @@
OpAMD64SQRTSD
OpAMD64SQRTSS
OpAMD64ROUNDSD
+ OpAMD64ROUNDSS
OpAMD64LoweredRound32F
OpAMD64LoweredRound64F
OpAMD64VFMADD231SS
@@ -16239,6 +16240,20 @@
},
},
{
+ name: "ROUNDSS",
+ auxType: auxInt8,
+ argLen: 1,
+ asm: x86.AROUNDSS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
name: "LoweredRound32F",
argLen: 1,
resultInArg0: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 19f16e1..0440cdb 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -222,6 +222,8 @@
return rewriteValueAMD64_OpAMD64CMPXCHGLlock(v)
case OpAMD64CMPXCHGQlock:
return rewriteValueAMD64_OpAMD64CMPXCHGQlock(v)
+ case OpAMD64CVTSD2SS:
+ return rewriteValueAMD64_OpAMD64CVTSD2SS(v)
case OpAMD64DIVSD:
return rewriteValueAMD64_OpAMD64DIVSD(v)
case OpAMD64DIVSDload:
@@ -13513,6 +13515,27 @@
}
return false
}
+func rewriteValueAMD64_OpAMD64CVTSD2SS(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (CVTSD2SS (ROUNDSD [c] (CVTSS2SD x)))
+ // result: (ROUNDSS [c] x)
+ for {
+ if v_0.Op != OpAMD64ROUNDSD {
+ break
+ }
+ c := auxIntToInt8(v_0.AuxInt)
+ v_0_0 := v_0.Args[0]
+ if v_0_0.Op != OpAMD64CVTSS2SD {
+ break
+ }
+ x := v_0_0.Args[0]
+ v.reset(OpAMD64ROUNDSS)
+ v.AuxInt = int8ToAuxInt(c)
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64DIVSD(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Code-Review | +2 |
It would be good to have a test for this.
float32(math.Round(float64(x)), for some interesting x. In particular, make sure that the round32 instruction has the same behavior near maxfloat32.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Code-Review | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
It would be good to have a test for this.
float32(math.Round(float64(x)), for some interesting x. In particular, make sure that the round32 instruction has the same behavior near maxfloat32.
I want a correctness test, not a codegen test (although a codegen test is good to have). Just to make sure that our rewrite rules are faithfully transferring the rounding direction to the 32-bit versions.
Or do we already have such a test somewhere?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Keith RandallIt would be good to have a test for this.
float32(math.Round(float64(x)), for some interesting x. In particular, make sure that the round32 instruction has the same behavior near maxfloat32.
I want a correctness test, not a codegen test (although a codegen test is good to have). Just to make sure that our rewrite rules are faithfully transferring the rounding direction to the 32-bit versions.
Or do we already have such a test somewhere?
I don't think we have that test, I just haven't gotten around to the correctness and corner case tests. I *think* that there are cases where this optimization could do one fewer rounding and maybe change answers, but it would be fewer roudings and hence "better", and since we don't give any other way to express the 32-bit roundings I would normally assume that someone wants the better answer.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Keith RandallIt would be good to have a test for this.
float32(math.Round(float64(x)), for some interesting x. In particular, make sure that the round32 instruction has the same behavior near maxfloat32.
David ChaseI want a correctness test, not a codegen test (although a codegen test is good to have). Just to make sure that our rewrite rules are faithfully transferring the rounding direction to the 32-bit versions.
Or do we already have such a test somewhere?
I don't think we have that test, I just haven't gotten around to the correctness and corner case tests. I *think* that there are cases where this optimization could do one fewer rounding and maybe change answers, but it would be fewer roudings and hence "better", and since we don't give any other way to express the 32-bit roundings I would normally assume that someone wants the better answer.
Done
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |