cmd/compile: support Zba extensions in riscv64 compiler
Add compiler support for Zba entensions, which are mandatory in the
rva22u64 profile. These can be used to accelerate address computation.
diff --git a/src/cmd/compile/internal/riscv64/ssa.go b/src/cmd/compile/internal/riscv64/ssa.go
index c9e75b2..f39dfaf 100644
--- a/src/cmd/compile/internal/riscv64/ssa.go
+++ b/src/cmd/compile/internal/riscv64/ssa.go
@@ -288,7 +288,9 @@
ssa.OpRISCV64FEQS, ssa.OpRISCV64FNES, ssa.OpRISCV64FLTS, ssa.OpRISCV64FLES,
ssa.OpRISCV64FADDD, ssa.OpRISCV64FSUBD, ssa.OpRISCV64FMULD, ssa.OpRISCV64FDIVD,
ssa.OpRISCV64FEQD, ssa.OpRISCV64FNED, ssa.OpRISCV64FLTD, ssa.OpRISCV64FLED,
- ssa.OpRISCV64FSGNJD:
+ ssa.OpRISCV64FSGNJD,
+ ssa.OpRISCV64ADDUW, ssa.OpRISCV64SH1ADD, ssa.OpRISCV64SH1ADDUW, ssa.OpRISCV64SH2ADD,
+ ssa.OpRISCV64SH2ADDUW, ssa.OpRISCV64SH3ADD, ssa.OpRISCV64SH3ADDUW:
r := v.Reg()
r1 := v.Args[0].Reg()
r2 := v.Args[1].Reg()
@@ -423,7 +425,7 @@
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpRISCV64ADDI, ssa.OpRISCV64ADDIW, ssa.OpRISCV64XORI, ssa.OpRISCV64ORI, ssa.OpRISCV64ANDI,
- ssa.OpRISCV64SLLI, ssa.OpRISCV64SLLIW, ssa.OpRISCV64SRAI, ssa.OpRISCV64SRAIW,
+ ssa.OpRISCV64SLLI, ssa.OpRISCV64SLLIW, ssa.OpRISCV64SLLIUW, ssa.OpRISCV64SRAI, ssa.OpRISCV64SRAIW,
ssa.OpRISCV64SRLI, ssa.OpRISCV64SRLIW, ssa.OpRISCV64SLTI, ssa.OpRISCV64SLTIU,
ssa.OpRISCV64RORI, ssa.OpRISCV64RORIW:
p := s.Prog(v.Op.Asm())
diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
index c2df433..0bce665 100644
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
@@ -834,3 +834,18 @@
(F(MADD|NMADD|MSUB|NMSUB)S x y neg:(FNEGS z)) && neg.Uses == 1 => (F(MSUB|NMSUB|MADD|NMADD)S x y z)
(F(MADD|NMADD|MSUB|NMSUB)D neg:(FNEGD x) y z) && neg.Uses == 1 => (F(NMSUB|MSUB|NMADD|MADD)D x y z)
(F(MADD|NMADD|MSUB|NMSUB)D x y neg:(FNEGD z)) && neg.Uses == 1 => (F(MSUB|NMSUB|MADD|NMADD)D x y z)
+
+// --------------------- rva22u64 --------------------- //
+// Fold left shift least significant word
+(SLLI [i] (MOVWUreg x)) && i < 64 && buildcfg.GORISCV64 >= 22 => (SLLIUW [i] x)
+
+// Fold the add with (left shift )least significant word
+(ADD (MOVWUreg x) y) && buildcfg.GORISCV64 >= 22 => (ADDUW x y)
+(ADD (SLLIUW [1] x) y) && buildcfg.GORISCV64 >= 22 => (SH1ADDUW x y)
+(ADD (SLLIUW [2] x) y) && buildcfg.GORISCV64 >= 22 => (SH2ADDUW x y)
+(ADD (SLLIUW [3] x) y) && buildcfg.GORISCV64 >= 22 => (SH3ADDUW x y)
+
+// Fold the add with left shift amount
+(ADD (SLLI [1] x) y) && buildcfg.GORISCV64 >= 22 => (SH1ADD x y)
+(ADD (SLLI [2] x) y) && buildcfg.GORISCV64 >= 22 => (SH2ADD x y)
+(ADD (SLLI [3] x) y) && buildcfg.GORISCV64 >= 22 => (SH3ADD x y)
diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
index 13fa918..7b18fc6 100644
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
@@ -468,6 +468,16 @@
{name: "FLED", argLength: 2, reg: fp2gp, asm: "FLED"}, // arg0 <= arg1
{name: "LoweredFMIND", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMIND", commutative: true, typ: "Float64"}, // min(arg0, arg1)
{name: "LoweredFMAXD", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXD", commutative: true, typ: "Float64"}, // max(arg0, arg1)
+
+ // B extension.
+ {name: "ADDUW", argLength: 2, reg: gp21, asm: "ADDUW"}, // ZeroExt32to64(Trunc64to32(arg0)) + arg1
+ {name: "SH1ADD", argLength: 2, reg: gp21, asm: "SH1ADD"}, // arg0<<1 + arg1
+ {name: "SH1ADDUW", argLength: 2, reg: gp21, asm: "SH1ADDUW"}, // ZeroExt32to64(Trunc64to32(arg0))<<1 + arg1
+ {name: "SH2ADD", argLength: 2, reg: gp21, asm: "SH2ADD"}, // arg0<<2 + arg1
+ {name: "SH2ADDUW", argLength: 2, reg: gp21, asm: "SH2ADDUW"}, // ZeroExt32to64(Trunc64to32(arg0))<<2 + arg1
+ {name: "SH3ADD", argLength: 2, reg: gp21, asm: "SH3ADD"}, // arg0<<3 + arg1
+ {name: "SH3ADDUW", argLength: 2, reg: gp21, asm: "SH3ADDUW"}, // ZeroExt32to64(Trunc64to32(arg0))<<3 + arg1
+ {name: "SLLIUW", argLength: 1, reg: gp11, asm: "SLLIUW", aux: "Int64"}, // ZeroExt32to64(Trunc64to32(arg0))<<auxint
}
RISCV64blocks := []blockData{
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 429c214..bc4f639 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -2504,6 +2504,14 @@
OpRISCV64FLED
OpRISCV64LoweredFMIND
OpRISCV64LoweredFMAXD
+ OpRISCV64ADDUW
+ OpRISCV64SH1ADD
+ OpRISCV64SH1ADDUW
+ OpRISCV64SH2ADD
+ OpRISCV64SH2ADDUW
+ OpRISCV64SH3ADD
+ OpRISCV64SH3ADDUW
+ OpRISCV64SLLIUW
OpS390XFADDS
OpS390XFADD
@@ -33661,6 +33669,118 @@
},
},
},
+ {
+ name: "ADDUW",
+ argLen: 2,
+ asm: riscv.AADDUW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
+ name: "SH1ADD",
+ argLen: 2,
+ asm: riscv.ASH1ADD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
+ name: "SH1ADDUW",
+ argLen: 2,
+ asm: riscv.ASH1ADDUW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
+ name: "SH2ADD",
+ argLen: 2,
+ asm: riscv.ASH2ADD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
+ name: "SH2ADDUW",
+ argLen: 2,
+ asm: riscv.ASH2ADDUW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
+ name: "SH3ADD",
+ argLen: 2,
+ asm: riscv.ASH3ADD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
+ name: "SH3ADDUW",
+ argLen: 2,
+ asm: riscv.ASH3ADDUW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
+ name: "SLLIUW",
+ auxType: auxInt64,
+ argLen: 1,
+ asm: riscv.ASLLIUW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
{
name: "FADDS",
diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
index f033b25..bfbcf22 100644
--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go
+++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
@@ -2,6 +2,7 @@
package ssa
+import "internal/buildcfg"
import "math"
import "cmd/compile/internal/types"
@@ -3234,6 +3235,139 @@
}
break
}
+ // match: (ADD (MOVWUreg x) y)
+ // cond: buildcfg.GORISCV64 >= 22
+ // result: (ADDUW x y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpRISCV64MOVWUreg {
+ continue
+ }
+ x := v_0.Args[0]
+ y := v_1
+ if !(buildcfg.GORISCV64 >= 22) {
+ continue
+ }
+ v.reset(OpRISCV64ADDUW)
+ v.AddArg2(x, y)
+ return true
+ }
+ break
+ }
+ // match: (ADD (SLLIUW [1] x) y)
+ // cond: buildcfg.GORISCV64 >= 22
+ // result: (SH1ADDUW x y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpRISCV64SLLIUW || auxIntToInt64(v_0.AuxInt) != 1 {
+ continue
+ }
+ x := v_0.Args[0]
+ y := v_1
+ if !(buildcfg.GORISCV64 >= 22) {
+ continue
+ }
+ v.reset(OpRISCV64SH1ADDUW)
+ v.AddArg2(x, y)
+ return true
+ }
+ break
+ }
+ // match: (ADD (SLLIUW [2] x) y)
+ // cond: buildcfg.GORISCV64 >= 22
+ // result: (SH2ADDUW x y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpRISCV64SLLIUW || auxIntToInt64(v_0.AuxInt) != 2 {
+ continue
+ }
+ x := v_0.Args[0]
+ y := v_1
+ if !(buildcfg.GORISCV64 >= 22) {
+ continue
+ }
+ v.reset(OpRISCV64SH2ADDUW)
+ v.AddArg2(x, y)
+ return true
+ }
+ break
+ }
+ // match: (ADD (SLLIUW [3] x) y)
+ // cond: buildcfg.GORISCV64 >= 22
+ // result: (SH3ADDUW x y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpRISCV64SLLIUW || auxIntToInt64(v_0.AuxInt) != 3 {
+ continue
+ }
+ x := v_0.Args[0]
+ y := v_1
+ if !(buildcfg.GORISCV64 >= 22) {
+ continue
+ }
+ v.reset(OpRISCV64SH3ADDUW)
+ v.AddArg2(x, y)
+ return true
+ }
+ break
+ }
+ // match: (ADD (SLLI [1] x) y)
+ // cond: buildcfg.GORISCV64 >= 22
+ // result: (SH1ADD x y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpRISCV64SLLI || auxIntToInt64(v_0.AuxInt) != 1 {
+ continue
+ }
+ x := v_0.Args[0]
+ y := v_1
+ if !(buildcfg.GORISCV64 >= 22) {
+ continue
+ }
+ v.reset(OpRISCV64SH1ADD)
+ v.AddArg2(x, y)
+ return true
+ }
+ break
+ }
+ // match: (ADD (SLLI [2] x) y)
+ // cond: buildcfg.GORISCV64 >= 22
+ // result: (SH2ADD x y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpRISCV64SLLI || auxIntToInt64(v_0.AuxInt) != 2 {
+ continue
+ }
+ x := v_0.Args[0]
+ y := v_1
+ if !(buildcfg.GORISCV64 >= 22) {
+ continue
+ }
+ v.reset(OpRISCV64SH2ADD)
+ v.AddArg2(x, y)
+ return true
+ }
+ break
+ }
+ // match: (ADD (SLLI [3] x) y)
+ // cond: buildcfg.GORISCV64 >= 22
+ // result: (SH3ADD x y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpRISCV64SLLI || auxIntToInt64(v_0.AuxInt) != 3 {
+ continue
+ }
+ x := v_0.Args[0]
+ y := v_1
+ if !(buildcfg.GORISCV64 >= 22) {
+ continue
+ }
+ v.reset(OpRISCV64SH3ADD)
+ v.AddArg2(x, y)
+ return true
+ }
+ break
+ }
return false
}
func rewriteValueRISCV64_OpRISCV64ADDI(v *Value) bool {
@@ -6209,6 +6343,23 @@
v.AuxInt = int64ToAuxInt(y << uint32(x))
return true
}
+ // match: (SLLI [i] (MOVWUreg x))
+ // cond: i < 64 && buildcfg.GORISCV64 >= 22
+ // result: (SLLIUW [i] x)
+ for {
+ i := auxIntToInt64(v.AuxInt)
+ if v_0.Op != OpRISCV64MOVWUreg {
+ break
+ }
+ x := v_0.Args[0]
+ if !(i < 64 && buildcfg.GORISCV64 >= 22) {
+ break
+ }
+ v.reset(OpRISCV64SLLIUW)
+ v.AuxInt = int64ToAuxInt(i)
+ v.AddArg(x)
+ return true
+ }
return false
}
func rewriteValueRISCV64_OpRISCV64SLLW(v *Value) bool {
diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go
index dc3bab7..68896a9 100644
--- a/test/codegen/arithmetic.go
+++ b/test/codegen/arithmetic.go
@@ -51,6 +51,11 @@
out[9] = a - 32769
}
+func AddWithLower32Bits(a uint64, b int64) uint64 {
+ // riscv64/rva22u64: "ADDUW\t"
+ return a + uint64(uint32(b))
+}
+
// ----------------- //
// Subtraction //
// ----------------- //
diff --git a/test/codegen/shift.go b/test/codegen/shift.go
index 50d6042..aed3496 100644
--- a/test/codegen/shift.go
+++ b/test/codegen/shift.go
@@ -474,3 +474,36 @@
// amd64:-"SHR",-"SHL","ANDQ"
u[1] = u[1] << 5 >> 5
}
+
+// ------------------------------- //
+// least significant word shifts //
+// ------------------------------- //
+
+func checkAddWithLeftShiftLowerUnsigned32Bits(a uint64, b int64) uint64 {
+ // riscv64/rva22u64: "SH1ADDUW\t"
+ x := a + uint64(uint32(b))<<1
+ // riscv64/rva22u64: "SH2ADDUW\t"
+ y := a + uint64(uint32(b))<<2
+ // riscv64/rva22u64: "SH3ADDUW\t"
+ z := a + uint64(uint32(b))<<3
+ return x + y + z
+}
+
+func checkLeftShiftLowerUnsigned32Bits(a int64) uint64 {
+ // riscv64/rva22u64: "SLLIUW\t"
+ return uint64(uint32(a)) << 6
+}
+
+// ---------------------- //
+// add with left shifts //
+// ---------------------- //
+
+func checkAddWithLeftShift(a int64, b int64) int64 {
+ // riscv64/rva22u64: "SH1ADD\t"
+ x := a + b<<1
+ // riscv64/rva22u64: "SH2ADD\t"
+ y := a + b<<2
+ // riscv64/rva22u64: "SH3ADD\t"
+ z := a + b<<3
+ return x + y + z
+}
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Hello, this is break down from previous patch(https://go-review.googlesource.com/c/go/+/560135) as well as Zbb(https://go-review.googlesource.com/c/go/+/579798) and Zbs(https://go-review.googlesource.com/c/go/+/579797), and I add the codegen test. Notice that I only add some trivial rules, I will add more complex rules in further patches.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Code-Review | +2 |
| Commit-Queue | +1 |
| Run-TryBot | +1 |
LGTM
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Code-Review | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// --------------------- rva22u64 --------------------- //I'd be inclined to make this more obvious/explicit, for example:
```
//
// Optimisations rules for rva22u64 and above.
//
```
(SLLI [i] (MOVWUreg x)) && i < 64 && buildcfg.GORISCV64 >= 22 => (SLLIUW [i] x)Nit: It would be more consistent with existing rules to use `x` and `y` rather than `i` (e.g. `(SLLI [x] (MOVWUreg y))`).
// Fold the add with (left shift )least significant word") "
// B extension.Seems like it would be preferable to mention Zba here (if we're going to add/group by extension) (e.g. `B extension (Zba)`).
{name: "ADDUW", argLength: 2, reg: gp21, asm: "ADDUW"}, // ZeroExt32to64(Trunc64to32(arg0)) + arg1These tend to use English descriptions rather than code (see RORW for example) - `add least significant word of arg0 to arg1`
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
https://ci.chromium.org/ui/p/golang/builders/try/gotip-linux-riscv64/b8748768081761525393/overview
The failure looks real and related to #66874. Wang, can you rebase to master, I will set trybot for you.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
func checkAddWithLeftShiftLowerUnsigned32Bits(a uint64, b int64) uint64 {
// riscv64/rva22u64: "SH1ADDUW\t"
x := a + uint64(uint32(b))<<1
// riscv64/rva22u64: "SH2ADDUW\t"
y := a + uint64(uint32(b))<<2
// riscv64/rva22u64: "SH3ADDUW\t"
z := a + uint64(uint32(b))<<3
return x + y + z
}
func checkLeftShiftLowerUnsigned32Bits(a int64) uint64 {
// riscv64/rva22u64: "SLLIUW\t"
return uint64(uint32(a)) << 6
}@wang...@linux.alibaba.com @jo...@sing.id.au @mark...@rivosinc.com @mengzh...@gmail.com
I encountered an issue while testing this CL locally.
While the historical WIP pipeline passed successfully and Joel Sing previously merged three instructions from this set(https://go-review.googlesource.com/c/go/+/606636), I discovered test failures when attempting to merge the remaining Zba instructions. The failures appear to stem from differences in the actual generated IR/instruction patterns that prevented the intended optimization rules from triggering.
**Test Failure Output:**
```
testdir_test.go:147:
codegen/shift.go:595: linux/riscv64/rva22u64: opcode not found: "^SH1ADDUW"
codegen/shift.go:597: linux/riscv64/rva22u64: opcode not found: "^SH2ADDUW"
codegen/shift.go:599: linux/riscv64/rva22u64: opcode not found: "^SH3ADDUW"
```
**Problem Analysis:**
For the addition folding test case:
```go
func checkAddWithLeftShiftLowerUnsigned32Bits(a uint64, b int64) uint64 {
// riscv64/rva22u64: "SH1ADDUW"
x := a + uint64(uint32(b))<<1
// riscv64/rva22u64: "SH2ADDUW"
y := a + uint64(uint32(b))<<2
// riscv64/rva22u64: "SH3ADDUW"
z := a + uint64(uint32(b))<<3
return x + y + z
}
```
The issue occurs because in the compiler's switch statement, `OpRISCV64ADD` patterns match before `OpRISCV64SLL` patterns. Rules like `(ADD (SLLI [1] x) y) && buildcfg.GORISCV64 >= 22 => (SH1ADD x y)` are triggered first, producing incorrect "SH1ADD" instructions instead of the expected "SH1ADDUW".
**Proposed Solution:**
I think we need to reorganize the rule placement by moving part of the "Combine left shift and addition" logic to `RISCV64latelower.rules`:
**Modified `RISCV64latelower.rules`:**
```diff
+ // Combine left shift and addition.
+ (ADD (SLLI [1] x) y) && buildcfg.GORISCV64 >= 22 => (SH1ADD x y)
+ (ADD (SLLI [2] x) y) && buildcfg.GORISCV64 >= 22 => (SH2ADD x y)
+ (ADD (SLLI [3] x) y) && buildcfg.GORISCV64 >= 22 => (SH3ADD x y)
```
**Updated `RISCV64.rules`:**
```diff
+ // Fold left shift least significant word
+ (SLLI [x] (MOVWUreg y)) && x < 64 && buildcfg.GORISCV64 >= 22 => (SLLIUW [x] y)
I'm puzzled why the earlier WIP pipeline passed successfully but fails now — have there been any recent changes that might have affected the code generation rules or their application order?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
I think the reason is that, during the ssa rewrite progress, `Lsh64x64` is matched to `(Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SLL x y)` rule in newly code instead of `(Lsh64x64 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] y)))` rule in the old code of this CL, which make the SHxADDUW rule of this CL invalid.
I think the reason is that, during the ssa rewrite progress, `Lsh64x64` is matched to `(Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SLL x y)` rule in newly code instead of `(Lsh64x64 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] y)))` rule in the old code of this CL, which make the SHxADDUW rule of this CL invalid.
Hello @9539...@qq.com and @18243...@qq.com , I think your suggestions are correct, and I will modify the code as @jo...@sing.id.au suggested and submit it on GitHub (because Google blocks access from Chinese IPs, submitting on GitHub is more convenient for us) this week. I will add you both as co-authors; what do you think? I will also resubmit the zbb and zbs extensions to GitHub.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Thank you! That's great news. Please let us know if there's anything we can do to help.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
The PR is here https://github.com/golang/go/pull/76211, I will find how to resolve the merging block.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
I spotted some possible problems with your PR:
1. You have a long 138 character line in the commit message body. Please add line breaks to long lines that should be wrapped. Lines in the commit message body should be wrapped at ~76 characters unless needed for things like URLs or tables. (Note: GitHub might render long lines as soft-wrapped, so double-check in the Gerrit commit message shown above.)
2. You usually need to reference a bug number for all but trivial or cosmetic fixes. For this repo, the format is usually 'Fixes #12345' or 'Updates #12345' at the end of the commit message. Should you have a bug reference?
Please address any problems by updating the GitHub PR.
When complete, mark this comment as 'Done' and click the [blue 'Reply' button](https://go.dev/wiki/GerritBot#i-left-a-reply-to-a-comment-in-gerrit-but-no-one-but-me-can-see-it) above. These findings are based on heuristics; if a finding does not apply, briefly reply here saying so.
To update the commit title or commit message body shown here in Gerrit, you must edit the GitHub PR title and PR description (the first comment) in the GitHub web interface using the 'Edit' button or 'Edit' menu entry there. Note: pushing a new commit to the PR will not automatically update the commit message used by Gerrit.
For more details, see:
(In general for Gerrit code reviews, the change author is expected to [log in to Gerrit](https://go-review.googlesource.com/login/) with a Gmail or other Google account and then close out each piece of feedback by marking it as 'Done' if implemented as suggested or otherwise reply to each review comment. See the [Review](https://go.dev/doc/contribute#review) section of the Contributing Guide for details.)
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
cmd/compile: support Zba extensions in riscv64 compiler
Add compiler support for Zba entensions, which are mandatory in the rva22u64 profile. These can be used to accelerate address computation.
diff --git a/src/cmd/compile/internal/riscv64/ssa.go b/src/cmd/compile/internal/riscv64/ssa.go
index 9aa77c3..81a71cf 100644
--- a/src/cmd/compile/internal/riscv64/ssa.go
+++ b/src/cmd/compile/internal/riscv64/ssa.go
@@ -294,7 +294,8 @@
ssa.OpRISCV64FADDD, ssa.OpRISCV64FSUBD, ssa.OpRISCV64FMULD, ssa.OpRISCV64FDIVD,
ssa.OpRISCV64FEQD, ssa.OpRISCV64FNED, ssa.OpRISCV64FLTD, ssa.OpRISCV64FLED, ssa.OpRISCV64FSGNJD,
ssa.OpRISCV64MIN, ssa.OpRISCV64MAX, ssa.OpRISCV64MINU, ssa.OpRISCV64MAXU,
- ssa.OpRISCV64SH1ADD, ssa.OpRISCV64SH2ADD, ssa.OpRISCV64SH3ADD:
+ ssa.OpRISCV64SH1ADD, ssa.OpRISCV64SH2ADD, ssa.OpRISCV64SH3ADD,
+ ssa.OpRISCV64ADDUW, ssa.OpRISCV64SH1ADDUW, ssa.OpRISCV64SH2ADDUW, ssa.OpRISCV64SH3ADDUW:
r := v.Reg()
r1 := v.Args[0].Reg()
r2 := v.Args[1].Reg()
@@ -433,7 +434,7 @@
case ssa.OpRISCV64ADDI, ssa.OpRISCV64ADDIW, ssa.OpRISCV64XORI, ssa.OpRISCV64ORI, ssa.OpRISCV64ANDI,
ssa.OpRISCV64SLLI, ssa.OpRISCV64SLLIW, ssa.OpRISCV64SRAI, ssa.OpRISCV64SRAIW,
ssa.OpRISCV64SRLI, ssa.OpRISCV64SRLIW, ssa.OpRISCV64SLTI, ssa.OpRISCV64SLTIU,
- ssa.OpRISCV64RORI, ssa.OpRISCV64RORIW:
+ ssa.OpRISCV64RORI, ssa.OpRISCV64RORIW, ssa.OpRISCV64SLLIUW:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt
diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
index 31829a5..0a9a21c7 100644
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
@@ -838,10 +838,14 @@
// Optimisations for rva22u64 and above.
//
+// Combine truncate and logic shift left.
+(SLLI [i] (MOVWUreg x)) && i < 64 && buildcfg.GORISCV64 >= 22 => (SLLIUW [i] x)
+
// Combine left shift and addition.
-(ADD (SLLI [1] x) y) && buildcfg.GORISCV64 >= 22 => (SH1ADD x y)
-(ADD (SLLI [2] x) y) && buildcfg.GORISCV64 >= 22 => (SH2ADD x y)
-(ADD (SLLI [3] x) y) && buildcfg.GORISCV64 >= 22 => (SH3ADD x y)
+(ADD (MOVWUreg x) y) && buildcfg.GORISCV64 >= 22 => (ADDUW x y)
+(ADD (SLLIUW [1] x) y) && buildcfg.GORISCV64 >= 22 => (SH1ADDUW x y)
+(ADD (SLLIUW [2] x) y) && buildcfg.GORISCV64 >= 22 => (SH2ADDUW x y)
+(ADD (SLLIUW [3] x) y) && buildcfg.GORISCV64 >= 22 => (SH3ADDUW x y)
// Integer minimum and maximum.
(Min64 x y) && buildcfg.GORISCV64 >= 22 => (MIN x y)
diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
index a0e1ab9..edaf7ad 100644
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
@@ -151,6 +151,7 @@
{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true}, // arg0 + arg1
{name: "ADDI", argLength: 1, reg: gp11sb, asm: "ADDI", aux: "Int64"}, // arg0 + auxint
{name: "ADDIW", argLength: 1, reg: gp11, asm: "ADDIW", aux: "Int64"}, // 32 low bits of arg0 + auxint, sign extended to 64 bits
+ {name: "ADDUW", argLength: 2, reg: gp21, asm: "ADDUW"}, // add least significant word of arg0 to arg1
{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0
{name: "NEGW", argLength: 1, reg: gp11, asm: "NEGW"}, // -arg0 of 32 bits, sign extended to 64 bits
{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0 - arg1
@@ -222,6 +223,7 @@
{name: "SRLW", argLength: 2, reg: gp21, asm: "SRLW"}, // arg0 >> (aux1 & 31), logical right shift of 32 bit value, sign extended to 64 bits
{name: "SLLI", argLength: 1, reg: gp11, asm: "SLLI", aux: "Int64"}, // arg0 << auxint, shift amount 0-63, logical left shift
{name: "SLLIW", argLength: 1, reg: gp11, asm: "SLLIW", aux: "Int64"}, // arg0 << auxint, shift amount 0-31, logical left shift of 32 bit value, sign extended to 64 bits
+ {name: "SLLIUW", argLength: 1, reg: gp11, asm: "SLLIUW", aux: "Int64"}, // arg0 << auxint, shift amount 0-31, logical left shift of 32 bit value, zero extended to 64 bits
{name: "SRAI", argLength: 1, reg: gp11, asm: "SRAI", aux: "Int64"}, // arg0 >> auxint, shift amount 0-63, arithmetic right shift
{name: "SRAIW", argLength: 1, reg: gp11, asm: "SRAIW", aux: "Int64"}, // arg0 >> auxint, shift amount 0-31, arithmetic right shift of 32 bit value, sign extended to 64 bits
{name: "SRLI", argLength: 1, reg: gp11, asm: "SRLI", aux: "Int64"}, // arg0 >> auxint, shift amount 0-63, logical right shift
@@ -231,6 +233,9 @@
{name: "SH1ADD", argLength: 2, reg: gp21, asm: "SH1ADD"}, // arg0 << 1 + arg1
{name: "SH2ADD", argLength: 2, reg: gp21, asm: "SH2ADD"}, // arg0 << 2 + arg1
{name: "SH3ADD", argLength: 2, reg: gp21, asm: "SH3ADD"}, // arg0 << 3 + arg1
+ {name: "SH1ADDUW", argLength: 2, reg: gp21, asm: "SH1ADDUW"}, // shift the least significant word of arg0 left by 1 and add it to arg1
+ {name: "SH2ADDUW", argLength: 2, reg: gp21, asm: "SH2ADDUW"}, // shift the least significant word of arg0 left by 2 and add it to arg1
+ {name: "SH3ADDUW", argLength: 2, reg: gp21, asm: "SH3ADDUW"}, // shift the least significant word of arg0 left by 3 and add it to arg1
// Bitwise ops
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64latelower.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64latelower.rules
index 7acaa2f..55b69fa 100644
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64latelower.rules
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64latelower.rules
@@ -23,3 +23,8 @@
(SRAI [0] x) => x
(SRLI [0] x) => x
(SLLI [0] x) => x
+
+// Combine left shift and addition.
+(ADD (SLLI [1] x) y) && buildcfg.GORISCV64 >= 22 => (SH1ADD x y)
+(ADD (SLLI [2] x) y) && buildcfg.GORISCV64 >= 22 => (SH2ADD x y)
+(ADD (SLLI [3] x) y) && buildcfg.GORISCV64 >= 22 => (SH3ADD x y)
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 264f4b3..7af1f62 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -2473,6 +2473,7 @@
OpRISCV64ADD
OpRISCV64ADDI
OpRISCV64ADDIW
+ OpRISCV64ADDUW
OpRISCV64NEG
OpRISCV64NEGW
OpRISCV64SUB
@@ -2526,6 +2527,7 @@
OpRISCV64SRLW
OpRISCV64SLLI
OpRISCV64SLLIW
+ OpRISCV64SLLIUW
OpRISCV64SRAI
OpRISCV64SRAIW
OpRISCV64SRLI
@@ -2533,6 +2535,9 @@
OpRISCV64SH1ADD
OpRISCV64SH2ADD
OpRISCV64SH3ADD
+ OpRISCV64SH1ADDUW
+ OpRISCV64SH2ADDUW
+ OpRISCV64SH3ADDUW
OpRISCV64AND
OpRISCV64ANDN
OpRISCV64ANDI
@@ -33219,6 +33224,20 @@
},
},
{
+ name: "ADDUW",
+ argLen: 2,
+ asm: riscv.AADDUW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
name: "NEG",
argLen: 1,
asm: riscv.ANEG,
@@ -33963,6 +33982,20 @@
},
},
{
+ name: "SLLIUW",
+ auxType: auxInt64,
+ argLen: 1,
+ asm: riscv.ASLLIUW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
name: "SRAI",
auxType: auxInt64,
argLen: 1,
@@ -34061,6 +34094,48 @@
},
},
{
+ name: "SH1ADDUW",
+ argLen: 2,
+ asm: riscv.ASH1ADDUW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
+ name: "SH2ADDUW",
+ argLen: 2,
+ asm: riscv.ASH2ADDUW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
+ name: "SH3ADDUW",
+ argLen: 2,
+ asm: riscv.ASH3ADDUW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
name: "AND",
argLen: 2,
commutative: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
index 52870fe..b9414d5 100644
--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go
+++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
@@ -3315,12 +3315,12 @@
}
break
}
- // match: (ADD (SLLI [1] x) y)
+ // match: (ADD (MOVWUreg x) y)
// cond: buildcfg.GORISCV64 >= 22
- // result: (SH1ADD x y)
+ // result: (ADDUW x y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpRISCV64SLLI || auxIntToInt64(v_0.AuxInt) != 1 {
+ if v_0.Op != OpRISCV64MOVWUreg {
continue
}
x := v_0.Args[0]
@@ -3328,18 +3328,18 @@
if !(buildcfg.GORISCV64 >= 22) {
continue
}
- v.reset(OpRISCV64SH1ADD)
+ v.reset(OpRISCV64ADDUW)
v.AddArg2(x, y)
return true
}
break
}
- // match: (ADD (SLLI [2] x) y)
+ // match: (ADD (SLLIUW [1] x) y)
// cond: buildcfg.GORISCV64 >= 22
- // result: (SH2ADD x y)
+ // result: (SH1ADDUW x y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpRISCV64SLLI || auxIntToInt64(v_0.AuxInt) != 2 {
+ if v_0.Op != OpRISCV64SLLIUW || auxIntToInt64(v_0.AuxInt) != 1 {
continue
}
x := v_0.Args[0]
@@ -3347,18 +3347,18 @@
if !(buildcfg.GORISCV64 >= 22) {
continue
}
- v.reset(OpRISCV64SH2ADD)
+ v.reset(OpRISCV64SH1ADDUW)
v.AddArg2(x, y)
return true
}
break
}
- // match: (ADD (SLLI [3] x) y)
+ // match: (ADD (SLLIUW [2] x) y)
// cond: buildcfg.GORISCV64 >= 22
- // result: (SH3ADD x y)
+ // result: (SH2ADDUW x y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpRISCV64SLLI || auxIntToInt64(v_0.AuxInt) != 3 {
+ if v_0.Op != OpRISCV64SLLIUW || auxIntToInt64(v_0.AuxInt) != 2 {
continue
}
x := v_0.Args[0]
@@ -3366,7 +3366,26 @@
if !(buildcfg.GORISCV64 >= 22) {
continue
}
- v.reset(OpRISCV64SH3ADD)
+ v.reset(OpRISCV64SH2ADDUW)
+ v.AddArg2(x, y)
+ return true
+ }
+ break
+ }
+ // match: (ADD (SLLIUW [3] x) y)
+ // cond: buildcfg.GORISCV64 >= 22
+ // result: (SH3ADDUW x y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpRISCV64SLLIUW || auxIntToInt64(v_0.AuxInt) != 3 {
+ continue
+ }
+ x := v_0.Args[0]
+ y := v_1
+ if !(buildcfg.GORISCV64 >= 22) {
+ continue
+ }
+ v.reset(OpRISCV64SH3ADDUW)
v.AddArg2(x, y)
return true
}
@@ -7163,6 +7182,23 @@
v.AuxInt = int64ToAuxInt(0)
return true
}
+ // match: (SLLI [i] (MOVWUreg x))
+ // cond: i < 64 && buildcfg.GORISCV64 >= 22
+ // result: (SLLIUW [i] x)
+ for {
+ i := auxIntToInt64(v.AuxInt)
+ if v_0.Op != OpRISCV64MOVWUreg {
+ break
+ }
+ x := v_0.Args[0]
+ if !(i < 64 && buildcfg.GORISCV64 >= 22) {
+ break
+ }
+ v.reset(OpRISCV64SLLIUW)
+ v.AuxInt = int64ToAuxInt(i)
+ v.AddArg(x)
+ return true
+ }
return false
}
func rewriteValueRISCV64_OpRISCV64SLLW(v *Value) bool {
diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64latelower.go b/src/cmd/compile/internal/ssa/rewriteRISCV64latelower.go
index d2c3a8f..aa45b4d 100644
--- a/src/cmd/compile/internal/ssa/rewriteRISCV64latelower.go
+++ b/src/cmd/compile/internal/ssa/rewriteRISCV64latelower.go
@@ -2,8 +2,12 @@
package ssa
+import "internal/buildcfg"
+
func rewriteValueRISCV64latelower(v *Value) bool {
switch v.Op {
+ case OpRISCV64ADD:
+ return rewriteValueRISCV64latelower_OpRISCV64ADD(v)
case OpRISCV64AND:
return rewriteValueRISCV64latelower_OpRISCV64AND(v)
case OpRISCV64NOT:
@@ -21,6 +25,68 @@
}
return false
}
+func rewriteValueRISCV64latelower_OpRISCV64ADD(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ return false
+}
func rewriteValueRISCV64latelower_OpRISCV64AND(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go
index 42d5d2e..ee51e63 100644
--- a/test/codegen/arithmetic.go
+++ b/test/codegen/arithmetic.go
@@ -220,6 +220,12 @@
return r
}
+func AddWithLeastSignificantWord(a uint64, b int64) uint64 {
+ // riscv64/rva20u64:"MOVWU" "ADD"
+ // riscv64/rva22u64,riscv64/rva23u64:"ADDUW"
+ return a + uint64(uint32(b))
+}
+
// -------------------- //
// Multiplication //
// -------------------- //
diff --git a/test/codegen/shift.go b/test/codegen/shift.go
index 1877247..db47e3f 100644
--- a/test/codegen/shift.go
+++ b/test/codegen/shift.go
@@ -623,6 +623,19 @@
return a
}
+func checkLeftShiftLeastSignificantWordWithAddition(a uint64, b []int64) uint64 {
+ // riscv64/rva20u64: "SLLI" "SRLI" "ADD"
+ // riscv64/rva22u64,riscv64/rva23u64: "SH1ADDUW"
+ x := a + uint64(uint32(b[0]))<<1
+ // riscv64/rva20u64: "SLLI" "SRLI" "ADD"
+ // riscv64/rva22u64,riscv64/rva23u64: "SH2ADDUW"
+ y := a + uint64(uint32(b[1]))<<2
+ // riscv64/rva20u64: "SLLI" "SRLI" "ADD"
+ // riscv64/rva22u64,riscv64/rva23u64: "SH3ADDUW"
+ z := a + uint64(uint32(b[2]))<<3
+ return x + y + z
+}
+
//
// Convert and shift.
//
@@ -687,6 +700,12 @@
return x
}
+func lsh32Uto64U(a int64) uint64 {
+ // riscv64/rva20u64:"SLLI" "SRLI"
+ // riscv64/rva22u64,riscv64/rva23u64:"SLLIUW"
+ return uint64(uint32(a)) << 6
+}
+
// We don't need to worry about shifting
// more than the type size.
// (There is still a negative shift test, but
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Hello all, this is the duplicate of https://go-review.googlesource.com/c/go/+/580276/2, as mentioned at which, I have to upstream on github from now on. Thanks for your understanding.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
func checkAddWithLeftShiftLowerUnsigned32Bits(a uint64, b int64) uint64 {
// riscv64/rva22u64: "SH1ADDUW\t"
x := a + uint64(uint32(b))<<1
// riscv64/rva22u64: "SH2ADDUW\t"
y := a + uint64(uint32(b))<<2
// riscv64/rva22u64: "SH3ADDUW\t"
z := a + uint64(uint32(b))<<3
return x + y + z
}
func checkLeftShiftLowerUnsigned32Bits(a int64) uint64 {
// riscv64/rva22u64: "SLLIUW\t"
return uint64(uint32(a)) << 6
}@wang...@linux.alibaba.com @jo...@sing.id.au @mark...@rivosinc.com @mengzh...@gmail.com
I encountered an issue while testing this CL locally.While the historical WIP pipeline passed successfully and Joel Sing previously merged three instructions from this set(https://go-review.googlesource.com/c/go/+/606636), I discovered test failures when attempting to merge the remaining Zba instructions. The failures appear to stem from differences in the actual generated IR/instruction patterns that prevented the intended optimization rules from triggering.
**Test Failure Output:**
```
testdir_test.go:147:
codegen/shift.go:595: linux/riscv64/rva22u64: opcode not found: "^SH1ADDUW"
codegen/shift.go:597: linux/riscv64/rva22u64: opcode not found: "^SH2ADDUW"
codegen/shift.go:599: linux/riscv64/rva22u64: opcode not found: "^SH3ADDUW"
```**Problem Analysis:**
For the addition folding test case:
```go
func checkAddWithLeftShiftLowerUnsigned32Bits(a uint64, b int64) uint64 {
// riscv64/rva22u64: "SH1ADDUW"
x := a + uint64(uint32(b))<<1
// riscv64/rva22u64: "SH2ADDUW"
y := a + uint64(uint32(b))<<2
// riscv64/rva22u64: "SH3ADDUW"
z := a + uint64(uint32(b))<<3
return x + y + z
}
```
The issue occurs because in the compiler's switch statement, `OpRISCV64ADD` patterns match before `OpRISCV64SLL` patterns. Rules like `(ADD (SLLI [1] x) y) && buildcfg.GORISCV64 >= 22 => (SH1ADD x y)` are triggered first, producing incorrect "SH1ADD" instructions instead of the expected "SH1ADDUW".
**Proposed Solution:**
I think we need to reorganize the rule placement by moving part of the "Combine left shift and addition" logic to `RISCV64latelower.rules`:
**Modified `RISCV64latelower.rules`:**
```diff
+ // Combine left shift and addition.
+ (ADD (SLLI [1] x) y) && buildcfg.GORISCV64 >= 22 => (SH1ADD x y)
+ (ADD (SLLI [2] x) y) && buildcfg.GORISCV64 >= 22 => (SH2ADD x y)
+ (ADD (SLLI [3] x) y) && buildcfg.GORISCV64 >= 22 => (SH3ADD x y)
```
**Updated `RISCV64.rules`:**
```diff
+ // Fold left shift least significant word
+ (SLLI [x] (MOVWUreg y)) && x < 64 && buildcfg.GORISCV64 >= 22 => (SLLIUW [x] y)
- // Combine left shift and addition.
- (ADD (SLLI [1] x) y) && buildcfg.GORISCV64 >= 22 => (SH1ADD x y)
- (ADD (SLLI [2] x) y) && buildcfg.GORISCV64 >= 22 => (SH2ADD x y)
- (ADD (SLLI [3] x) y) && buildcfg.GORISCV64 >= 22 => (SH3ADD x y)
- + // Fold the add with (left shift) least significant word
- + (ADD (MOVWUreg x) y) && buildcfg.GORISCV64 >= 22 => (ADDUW x y)
- + (ADD (SLLIUW [1] x) y) && buildcfg.GORISCV64 >= 22 => (SH1ADDUW x y)
- + (ADD (SLLIUW [2] x) y) && buildcfg.GORISCV64 >= 22 => (SH2ADDUW x y)
- + (ADD (SLLIUW [3] x) y) && buildcfg.GORISCV64 >= 22 => (SH3ADDUW x y)
- ```
I'm puzzled why the earlier WIP pipeline passed successfully but fails now — have there been any recent changes that might have affected the code generation rules or their application order?
Wang YaduoI think the reason is that, during the ssa rewrite progress, `Lsh64x64` is matched to `(Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SLL x y)` rule in newly code instead of `(Lsh64x64 <t> x y) && !shiftIsBounded(v) => (AND (SLL <t> x y) (Neg64 <t> (SLTIU <t> [64] y)))` rule in the old code of this CL, which make the SHxADDUW rule of this CL invalid.
Xueqi LuoHello @9539...@qq.com and @18243...@qq.com , I think your suggestions are correct, and I will modify the code as @jo...@sing.id.au suggested and submit it on GitHub (because Google blocks access from Chinese IPs, submitting on GitHub is more convenient for us) this week. I will add you both as co-authors; what do you think? I will also resubmit the zbb and zbs extensions to GitHub.
Wang YaduoThank you! That's great news. Please let us know if there's anything we can do to help.
The PR is here https://github.com/golang/go/pull/76211, I will find how to resolve the merging block.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |