diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 5ddcb84..64e518a 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -43,6 +43,10 @@
}
}
+func isGPReg(r int16) bool {
+ return x86.REG_AL <= r && r <= x86.REG_R15
+}
+
func isFPReg(r int16) bool {
return x86.REG_X0 <= r && r <= x86.REG_Z31
}
@@ -1225,14 +1229,23 @@
if v.Type.IsMemory() {
return
}
- x := v.Args[0].Reg()
+ arg := v.Args[0]
+ x := arg.Reg()
y := v.Reg()
if v.Type.IsSIMD() {
- x = simdOrMaskReg(v.Args[0])
+ x = simdOrMaskReg(arg)
y = simdOrMaskReg(v)
}
if x != y {
- opregreg(s, moveByRegsWidth(y, x, v.Type.Size()), y, x)
+ width := v.Type.Size()
+ if isGPReg(y) && width == 8 && ssa.ZeroUpper32Bits(arg, 3) {
+ // The source was naturally zext-ed from 32 to 64 bits,
+ // but we are asked to do a full 64-bit copy.
+ // Save the REX prefix byte in I-CACHE by using a 32-bit move,
+ // since it zeroes the upper 32 bits anyway.
+ width = 4
+ }
+ opregreg(s, moveByRegsWidth(y, x, width), y, x)
}
case ssa.OpLoadReg:
if v.Type.IsFlags() {
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64latelower.rules b/src/cmd/compile/internal/ssa/_gen/AMD64latelower.rules
index ead4ec4..9bdb5f8 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64latelower.rules
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64latelower.rules
@@ -8,6 +8,6 @@
(SHR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHRX(Q|L) x y)
// See comments in ARM64latelower.rules for why these are here.
-(MOVLQZX x) && zeroUpper32Bits(x,3) => x
-(MOVWQZX x) && zeroUpper48Bits(x,3) => x
-(MOVBQZX x) && zeroUpper56Bits(x,3) => x
+(MOVLQZX x) && ZeroUpper32Bits(x,3) => x
+(MOVWQZX x) && ZeroUpper48Bits(x,3) => x
+(MOVBQZX x) && ZeroUpper56Bits(x,3) => x
diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules b/src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules
index 8c43b960b..7945a54 100644
--- a/src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules
@@ -29,7 +29,7 @@
(MOVBUreg x:((Equal|NotEqual|LessThan|LessThanU|LessThanF|LessEqual|LessEqualU|LessEqualF|GreaterThan|GreaterThanU|GreaterThanF|GreaterEqual|GreaterEqualU|GreaterEqualF) _)) => x
// omit unsigned extension
-(MOVWUreg x) && zeroUpper32Bits(x, 3) => x
+(MOVWUreg x) && ZeroUpper32Bits(x, 3) => x
// don't extend after proper load
(MOVBreg x:(MOVBload _ _)) => (MOVDreg x)
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index d68bbf5..3c3fb56 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -1356,7 +1356,7 @@
// check if value zeroes out upper 32-bit of 64-bit register.
// depth limits recursion depth. In AMD64.rules 3 is used as limit,
// because it catches same amount of cases as 4.
-func zeroUpper32Bits(x *Value, depth int) bool {
+func ZeroUpper32Bits(x *Value, depth int) bool {
if x.Type.IsSigned() && x.Type.Size() < 8 {
// If the value is signed, it might get re-sign-extended
// during spill and restore. See issue 68227.
@@ -1376,6 +1376,8 @@
OpAMD64SETG, OpAMD64SETGE, OpAMD64SETB, OpAMD64SETBE,
OpAMD64SETA, OpAMD64SETAE, OpAMD64SETO:
return true
+ case OpAMD64MOVQconst:
+ return uint64(uint32(x.AuxInt)) == uint64(x.AuxInt)
case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
@@ -1391,7 +1393,7 @@
return false
}
for i := range x.Args {
- if !zeroUpper32Bits(x.Args[i], depth-1) {
+ if !ZeroUpper32Bits(x.Args[i], depth-1) {
return false
}
}
@@ -1401,8 +1403,8 @@
return false
}
-// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
-func zeroUpper48Bits(x *Value, depth int) bool {
+// ZeroUpper48Bits is similar to ZeroUpper32Bits, but for upper 48 bits.
+func ZeroUpper48Bits(x *Value, depth int) bool {
if x.Type.IsSigned() && x.Type.Size() < 8 {
return false
}
@@ -1412,6 +1414,8 @@
OpAMD64SETG, OpAMD64SETGE, OpAMD64SETB, OpAMD64SETBE,
OpAMD64SETA, OpAMD64SETAE, OpAMD64SETO:
return true
+ case OpAMD64MOVQconst, OpAMD64MOVLconst:
+ return uint64(uint16(x.AuxInt)) == uint64(x.AuxInt)
case OpArg: // note: but not ArgIntReg
return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
case OpPhi, OpSelect0, OpSelect1:
@@ -1421,7 +1425,7 @@
return false
}
for i := range x.Args {
- if !zeroUpper48Bits(x.Args[i], depth-1) {
+ if !ZeroUpper48Bits(x.Args[i], depth-1) {
return false
}
}
@@ -1431,8 +1435,8 @@
return false
}
-// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
-func zeroUpper56Bits(x *Value, depth int) bool {
+// ZeroUpper56Bits is similar to ZeroUpper32Bits, but for upper 56 bits.
+func ZeroUpper56Bits(x *Value, depth int) bool {
if x.Type.IsSigned() && x.Type.Size() < 8 {
return false
}
@@ -1442,6 +1446,8 @@
OpAMD64SETG, OpAMD64SETGE, OpAMD64SETB, OpAMD64SETBE,
OpAMD64SETA, OpAMD64SETAE, OpAMD64SETO:
return true
+ case OpAMD64MOVQconst, OpAMD64MOVLconst:
+ return uint64(uint8(x.AuxInt)) == uint64(x.AuxInt)
case OpArg: // note: but not ArgIntReg
return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
case OpPhi, OpSelect0, OpSelect1:
@@ -1451,7 +1457,7 @@
return false
}
for i := range x.Args {
- if !zeroUpper56Bits(x.Args[i], depth-1) {
+ if !ZeroUpper56Bits(x.Args[i], depth-1) {
return false
}
}
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64latelower.go b/src/cmd/compile/internal/ssa/rewriteAMD64latelower.go
index 11ecb0b..531fbe1 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64latelower.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64latelower.go
@@ -30,11 +30,11 @@
func rewriteValueAMD64latelower_OpAMD64MOVBQZX(v *Value) bool {
v_0 := v.Args[0]
// match: (MOVBQZX x)
- // cond: zeroUpper56Bits(x,3)
+ // cond: ZeroUpper56Bits(x,3)
// result: x
for {
x := v_0
- if !(zeroUpper56Bits(x, 3)) {
+ if !(ZeroUpper56Bits(x, 3)) {
break
}
v.copyOf(x)
@@ -45,11 +45,11 @@
func rewriteValueAMD64latelower_OpAMD64MOVLQZX(v *Value) bool {
v_0 := v.Args[0]
// match: (MOVLQZX x)
- // cond: zeroUpper32Bits(x,3)
+ // cond: ZeroUpper32Bits(x,3)
// result: x
for {
x := v_0
- if !(zeroUpper32Bits(x, 3)) {
+ if !(ZeroUpper32Bits(x, 3)) {
break
}
v.copyOf(x)
@@ -60,11 +60,11 @@
func rewriteValueAMD64latelower_OpAMD64MOVWQZX(v *Value) bool {
v_0 := v.Args[0]
// match: (MOVWQZX x)
- // cond: zeroUpper48Bits(x,3)
+ // cond: ZeroUpper48Bits(x,3)
// result: x
for {
x := v_0
- if !(zeroUpper48Bits(x, 3)) {
+ if !(ZeroUpper48Bits(x, 3)) {
break
}
v.copyOf(x)
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64latelower.go b/src/cmd/compile/internal/ssa/rewriteARM64latelower.go
index 0fa5e26..43ddb34 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64latelower.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64latelower.go
@@ -653,11 +653,11 @@
func rewriteValueARM64latelower_OpARM64MOVWUreg(v *Value) bool {
v_0 := v.Args[0]
// match: (MOVWUreg x)
- // cond: zeroUpper32Bits(x, 3)
+ // cond: ZeroUpper32Bits(x, 3)
// result: x
for {
x := v_0
- if !(zeroUpper32Bits(x, 3)) {
+ if !(ZeroUpper32Bits(x, 3)) {
break
}
v.copyOf(x)
diff --git a/test/codegen/constants.go b/test/codegen/constants.go
index 178a106..9b014b5 100644
--- a/test/codegen/constants.go
+++ b/test/codegen/constants.go
@@ -33,3 +33,12 @@
out[3] = 0xFFFFFFFE00000001
return
}
+
+func issue76449_1() (_, _, _ uint64) {
+ // amd64:-"MOVQ"
+ return 0, 0, 0
+}
+func issue76449_2() (_, _, _ uint64) {
+ // amd64:-"MOVQ"
+ return 1, 2, 1
+}