diff --git a/src/cmd/asm/internal/asm/testdata/arm64enc.s b/src/cmd/asm/internal/asm/testdata/arm64enc.s
index 76151a3..a70398b 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64enc.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64enc.s
@@ -94,6 +94,12 @@
CLS R15, ZR // ff15c0da
CLZW R1, R14 // 2e10c05a
CLZ R21, R9 // a912c0da
+ ABS R1, R2 // 2220c0da
+ ABSW R1, R2 // 2220c05a
+ CNT R3, R4 // 641cc0da
+ CNTW R3, R4 // 641cc05a
+ CTZ R5, R6 // a618c0da
+ CTZW R5, R6 // a618c05a
CMNW R21.UXTB<<4, R15 // ff11352b
CMN R0.UXTW<<4, R16 // 1f5220ab
CMNW R13>>8, R9 // 3f214d2b
@@ -317,6 +323,11 @@
SBFX $2, R27, $54, R7 // 67df4293
SDIVW R22, R14, R9 // c90dd61a
SDIV R13, R21, R9 // a90ecd9a
+ SMAX R1, R2, R3 // 4360c19a
+ SMAXW R1, R2, R3 // 4360c11a
+ SMAX $-128, R2, R3 // 4300c291
+ SMIN R1, R2, R3 // 4368c19a
+ SMINW $127, R2, R3 // 43fcc911
SEV // 9f2003d5
SEVL // bf2003d5
SMADDL R3, R7, R11, R9 // 691d239b
@@ -416,6 +427,10 @@
UMSUBL R22, R4, R3, R7 // 6790b69b
UMNEGL R3, R19, R1 // 61fea39b
UMULH R24, R20, R24 // 987ed89b
+ UMAX R1, R2, R3 // 4364c19a
+ UMAX $255, R2, R3 // 43fcc791
+ UMINW R1, R2, R3 // 436cc11a
+ UMINW $0, R2, R3 // 4300cc11
UMULL R19, R22, R19 // d37eb39b
UXTBW R2, R6 // 461c0053
UXTHW R7, R20 // f43c0053
diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s
index 25dd523..d50e054 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64error.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64error.s
@@ -9,6 +9,8 @@
ADD R1.UXTB<<5, R2, R3 // ERROR "shift amount out of range 0 to 4"
ADDS R1.UXTX<<7, R2, R3 // ERROR "shift amount out of range 0 to 4"
ADDS R5, R6, RSP // ERROR "illegal destination register"
+ SMAX $128, R1, R2 // ERROR "signed comparison immediate not in the range -128 to 127"
+ UMAX $-1, R1, R2 // ERROR "unsigned comparison immediate not in the range 0 to 255"
SUBS R5, R6, RSP // ERROR "illegal destination register"
ADDSW R5, R6, RSP // ERROR "illegal destination register"
SUBSW R5, R6, RSP // ERROR "illegal destination register"
diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go
index 44bf01d..544e918 100644
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@@ -687,6 +687,14 @@
ssa.OpARM64UMOD,
ssa.OpARM64MODW,
ssa.OpARM64UMODW,
+ ssa.OpARM64SMAX,
+ ssa.OpARM64SMAXW,
+ ssa.OpARM64SMIN,
+ ssa.OpARM64SMINW,
+ ssa.OpARM64UMAX,
+ ssa.OpARM64UMAXW,
+ ssa.OpARM64UMIN,
+ ssa.OpARM64UMINW,
ssa.OpARM64SLL,
ssa.OpARM64SRL,
ssa.OpARM64SRA,
@@ -1421,6 +1429,10 @@
ssa.OpARM64RBITW,
ssa.OpARM64CLZ,
ssa.OpARM64CLZW,
+ ssa.OpARM64CNT,
+ ssa.OpARM64CNTW,
+ ssa.OpARM64CTZ,
+ ssa.OpARM64CTZW,
ssa.OpARM64FRINTAD,
ssa.OpARM64FRINTMD,
ssa.OpARM64FRINTND,
diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
index 4c6c437..f5393b1 100644
--- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
@@ -75,15 +75,28 @@
(Ctz(64|32|16|8)NonZero ...) => (Ctz(64|32|32|32) ...)
+(Ctz64 <t> x) && buildcfg.GOARM64.CSSC => (CTZ <t> x)
+(Ctz32 <t> x) && buildcfg.GOARM64.CSSC => (CTZW <t> x)
+(Ctz16 <t> x) && buildcfg.GOARM64.CSSC => (CTZW <t> (ORconst <typ.UInt32> [0x10000] x))
+(Ctz8 <t> x) && buildcfg.GOARM64.CSSC => (CTZW <t> (ORconst <typ.UInt32> [0x100] x))
(Ctz64 <t> x) => (CLZ (RBIT <t> x))
(Ctz32 <t> x) => (CLZW (RBITW <t> x))
(Ctz16 <t> x) => (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x)))
(Ctz8 <t> x) => (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x100] x)))
+(PopCount64 <t> x) && buildcfg.GOARM64.CSSC => (CNT <t> x)
+(PopCount32 <t> x) && buildcfg.GOARM64.CSSC => (CNTW <t> x)
+(PopCount16 <t> x) && buildcfg.GOARM64.CSSC => (CNTW <t> (ZeroExt16to32 x))
+(PopCount8 <t> x) && buildcfg.GOARM64.CSSC => (CNTW <t> (ZeroExt8to32 x))
(PopCount64 <t> x) => (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> x))))
(PopCount32 <t> x) => (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt32to64 x)))))
(PopCount16 <t> x) => (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt16to64 x)))))
+(Min64 x y) && buildcfg.GOARM64.CSSC => (SMIN x y)
+(Max64 x y) && buildcfg.GOARM64.CSSC => (SMAX x y)
+(Min64u x y) && buildcfg.GOARM64.CSSC => (UMIN x y)
+(Max64u x y) && buildcfg.GOARM64.CSSC => (UMAX x y)
+
// Load args directly into the register class where it will be used.
(FMOVDgpfp <t> (Arg [off] {sym})) => @b.Func.Entry (Arg <t> [off] {sym})
(FMOVDfpgp <t> (Arg [off] {sym})) => @b.Func.Entry (Arg <t> [off] {sym})
diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
index 04d960f..73af428 100644
--- a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
@@ -221,6 +221,14 @@
{name: "UMOD", argLength: 2, reg: gp21, asm: "UREM", earlyOk: true}, // arg0 % arg1, unsigned
{name: "MODW", argLength: 2, reg: gp21, asm: "REMW", earlyOk: true}, // arg0 % arg1, signed, 32 bit
{name: "UMODW", argLength: 2, reg: gp21, asm: "UREMW", earlyOk: true}, // arg0 % arg1, unsigned, 32 bit
+ {name: "SMAX", argLength: 2, reg: gp21, asm: "SMAX", commutative: true, earlyOk: true}, // max(arg0, arg1), signed
+ {name: "SMAXW", argLength: 2, reg: gp21, asm: "SMAXW", commutative: true, earlyOk: true}, // max(arg0, arg1), signed, 32 bit
+ {name: "SMIN", argLength: 2, reg: gp21, asm: "SMIN", commutative: true, earlyOk: true}, // min(arg0, arg1), signed
+ {name: "SMINW", argLength: 2, reg: gp21, asm: "SMINW", commutative: true, earlyOk: true}, // min(arg0, arg1), signed, 32 bit
+ {name: "UMAX", argLength: 2, reg: gp21, asm: "UMAX", commutative: true, earlyOk: true}, // max(arg0, arg1), unsigned
+ {name: "UMAXW", argLength: 2, reg: gp21, asm: "UMAXW", commutative: true, earlyOk: true}, // max(arg0, arg1), unsigned, 32 bit
+ {name: "UMIN", argLength: 2, reg: gp21, asm: "UMIN", commutative: true, earlyOk: true}, // min(arg0, arg1), unsigned
+ {name: "UMINW", argLength: 2, reg: gp21, asm: "UMINW", commutative: true, earlyOk: true}, // min(arg0, arg1), unsigned, 32 bit
{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true, earlyOk: true}, // arg0 + arg1
{name: "FADDD", argLength: 2, reg: fp21, asm: "FADDD", commutative: true, earlyOk: true}, // arg0 + arg1
@@ -266,6 +274,10 @@
{name: "RBITW", argLength: 1, reg: gp11, asm: "RBITW", earlyOk: true}, // bit reverse, 32-bit
{name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ", earlyOk: true}, // count leading zero, 64-bit
{name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW", earlyOk: true}, // count leading zero, 32-bit
+ {name: "CNT", argLength: 1, reg: gp11, asm: "CNT", earlyOk: true}, // count set bits, 64-bit
+ {name: "CNTW", argLength: 1, reg: gp11, asm: "CNTW", earlyOk: true}, // count set bits, 32-bit
+ {name: "CTZ", argLength: 1, reg: gp11, asm: "CTZ", earlyOk: true}, // count trailing zero, 64-bit
+ {name: "CTZW", argLength: 1, reg: gp11, asm: "CTZW", earlyOk: true}, // count trailing zero, 32-bit
{name: "VCNT", argLength: 1, reg: fp11, asm: "VCNT", earlyOk: true}, // count set bits for each 8-bit unit and store the result in each 8-bit unit
{name: "VUADDLV", argLength: 1, reg: fp11, asm: "VUADDLV", earlyOk: true}, // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit.
{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true, earlyOk: true},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index f79a83c..45f3254 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -4671,6 +4671,14 @@
OpARM64UMOD
OpARM64MODW
OpARM64UMODW
+ OpARM64SMAX
+ OpARM64SMAXW
+ OpARM64SMIN
+ OpARM64SMINW
+ OpARM64UMAX
+ OpARM64UMAXW
+ OpARM64UMIN
+ OpARM64UMINW
OpARM64FADDS
OpARM64FADDD
OpARM64FSUBS
@@ -4712,6 +4720,10 @@
OpARM64RBITW
OpARM64CLZ
OpARM64CLZW
+ OpARM64CNT
+ OpARM64CNTW
+ OpARM64CTZ
+ OpARM64CTZW
OpARM64VCNT
OpARM64VUADDLV
OpARM64LoweredRound32F
@@ -75178,6 +75190,134 @@
},
},
{
+ name: "SMAX",
+ argLen: 2,
+ commutative: true,
+ earlyOk: true,
+ asm: arm64.ASMAX,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {1, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, regMask{v1: 335544319, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "SMAXW",
+ argLen: 2,
+ commutative: true,
+ earlyOk: true,
+ asm: arm64.ASMAXW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {1, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, regMask{v1: 335544319, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "SMIN",
+ argLen: 2,
+ commutative: true,
+ earlyOk: true,
+ asm: arm64.ASMIN,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {1, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, regMask{v1: 335544319, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "SMINW",
+ argLen: 2,
+ commutative: true,
+ earlyOk: true,
+ asm: arm64.ASMINW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {1, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, regMask{v1: 335544319, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "UMAX",
+ argLen: 2,
+ commutative: true,
+ earlyOk: true,
+ asm: arm64.AUMAX,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {1, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, regMask{v1: 335544319, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "UMAXW",
+ argLen: 2,
+ commutative: true,
+ earlyOk: true,
+ asm: arm64.AUMAXW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {1, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, regMask{v1: 335544319, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "UMIN",
+ argLen: 2,
+ commutative: true,
+ earlyOk: true,
+ asm: arm64.AUMIN,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {1, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, regMask{v1: 335544319, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "UMINW",
+ argLen: 2,
+ commutative: true,
+ earlyOk: true,
+ asm: arm64.AUMINW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {1, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, regMask{v1: 335544319, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
name: "FADDS",
argLen: 2,
commutative: true,
@@ -75781,6 +75921,62 @@
},
},
{
+ name: "CNT",
+ argLen: 1,
+ earlyOk: true,
+ asm: arm64.ACNT,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, regMask{v1: 335544319, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "CNTW",
+ argLen: 1,
+ earlyOk: true,
+ asm: arm64.ACNTW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, regMask{v1: 335544319, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "CTZ",
+ argLen: 1,
+ earlyOk: true,
+ asm: arm64.ACTZ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, regMask{v1: 335544319, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "CTZW",
+ argLen: 1,
+ earlyOk: true,
+ asm: arm64.ACTZW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, regMask{v1: 402653183, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, regMask{v1: 335544319, v2: 0}}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
name: "VCNT",
argLen: 1,
earlyOk: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index 26d94b9..1c57fcf 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -2,6 +2,7 @@
package ssa
+import "internal/buildcfg"
import "cmd/compile/internal/types"
func rewriteValueARM64(v *Value) bool {
@@ -1494,9 +1495,13 @@
case OpMax32F:
v.Op = OpARM64FMAXS
return true
+ case OpMax64:
+ return rewriteValueARM64_OpMax64(v)
case OpMax64F:
v.Op = OpARM64FMAXD
return true
+ case OpMax64u:
+ return rewriteValueARM64_OpMax64u(v)
case OpMaxFloat32x4:
v.Op = OpARM64VFMAX4S
return true
@@ -1527,9 +1532,13 @@
case OpMin32F:
v.Op = OpARM64FMINS
return true
+ case OpMin64:
+ return rewriteValueARM64_OpMin64(v)
case OpMin64F:
v.Op = OpARM64FMIND
return true
+ case OpMin64u:
+ return rewriteValueARM64_OpMin64u(v)
case OpMinFloat32x4:
v.Op = OpARM64VFMIN4S
return true
@@ -1812,6 +1821,8 @@
return rewriteValueARM64_OpPopCount32(v)
case OpPopCount64:
return rewriteValueARM64_OpPopCount64(v)
+ case OpPopCount8:
+ return rewriteValueARM64_OpPopCount8(v)
case OpPrefetchCache:
return rewriteValueARM64_OpPrefetchCache(v)
case OpPrefetchCacheStreamed:
@@ -19898,6 +19909,23 @@
b := v.Block
typ := &b.Func.Config.Types
// match: (Ctz16 <t> x)
+ // cond: buildcfg.GOARM64.CSSC
+ // result: (CTZW <t> (ORconst <typ.UInt32> [0x10000] x))
+ for {
+ t := v.Type
+ x := v_0
+ if !(buildcfg.GOARM64.CSSC) {
+ break
+ }
+ v.reset(OpARM64CTZW)
+ v.Type = t
+ v0 := b.NewValue0(v.Pos, OpARM64ORconst, typ.UInt32)
+ v0.AuxInt = int64ToAuxInt(0x10000)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (Ctz16 <t> x)
// result: (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x)))
for {
t := v.Type
@@ -19917,6 +19945,20 @@
v_0 := v.Args[0]
b := v.Block
// match: (Ctz32 <t> x)
+ // cond: buildcfg.GOARM64.CSSC
+ // result: (CTZW <t> x)
+ for {
+ t := v.Type
+ x := v_0
+ if !(buildcfg.GOARM64.CSSC) {
+ break
+ }
+ v.reset(OpARM64CTZW)
+ v.Type = t
+ v.AddArg(x)
+ return true
+ }
+ // match: (Ctz32 <t> x)
// result: (CLZW (RBITW <t> x))
for {
t := v.Type
@@ -19932,6 +19974,20 @@
v_0 := v.Args[0]
b := v.Block
// match: (Ctz64 <t> x)
+ // cond: buildcfg.GOARM64.CSSC
+ // result: (CTZ <t> x)
+ for {
+ t := v.Type
+ x := v_0
+ if !(buildcfg.GOARM64.CSSC) {
+ break
+ }
+ v.reset(OpARM64CTZ)
+ v.Type = t
+ v.AddArg(x)
+ return true
+ }
+ // match: (Ctz64 <t> x)
// result: (CLZ (RBIT <t> x))
for {
t := v.Type
@@ -19948,6 +20004,23 @@
b := v.Block
typ := &b.Func.Config.Types
// match: (Ctz8 <t> x)
+ // cond: buildcfg.GOARM64.CSSC
+ // result: (CTZW <t> (ORconst <typ.UInt32> [0x100] x))
+ for {
+ t := v.Type
+ x := v_0
+ if !(buildcfg.GOARM64.CSSC) {
+ break
+ }
+ v.reset(OpARM64CTZW)
+ v.Type = t
+ v0 := b.NewValue0(v.Pos, OpARM64ORconst, typ.UInt32)
+ v0.AuxInt = int64ToAuxInt(0x100)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (Ctz8 <t> x)
// result: (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x100] x)))
for {
t := v.Type
@@ -21247,6 +21320,78 @@
}
return false
}
+func rewriteValueARM64_OpMax64(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Max64 x y)
+ // cond: buildcfg.GOARM64.CSSC
+ // result: (SMAX x y)
+ for {
+ x := v_0
+ y := v_1
+ if !(buildcfg.GOARM64.CSSC) {
+ break
+ }
+ v.reset(OpARM64SMAX)
+ v.AddArg2(x, y)
+ return true
+ }
+ return false
+}
+func rewriteValueARM64_OpMax64u(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Max64u x y)
+ // cond: buildcfg.GOARM64.CSSC
+ // result: (UMAX x y)
+ for {
+ x := v_0
+ y := v_1
+ if !(buildcfg.GOARM64.CSSC) {
+ break
+ }
+ v.reset(OpARM64UMAX)
+ v.AddArg2(x, y)
+ return true
+ }
+ return false
+}
+func rewriteValueARM64_OpMin64(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Min64 x y)
+ // cond: buildcfg.GOARM64.CSSC
+ // result: (SMIN x y)
+ for {
+ x := v_0
+ y := v_1
+ if !(buildcfg.GOARM64.CSSC) {
+ break
+ }
+ v.reset(OpARM64SMIN)
+ v.AddArg2(x, y)
+ return true
+ }
+ return false
+}
+func rewriteValueARM64_OpMin64u(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Min64u x y)
+ // cond: buildcfg.GOARM64.CSSC
+ // result: (UMIN x y)
+ for {
+ x := v_0
+ y := v_1
+ if !(buildcfg.GOARM64.CSSC) {
+ break
+ }
+ v.reset(OpARM64UMIN)
+ v.AddArg2(x, y)
+ return true
+ }
+ return false
+}
func rewriteValueARM64_OpMod16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -22144,6 +22289,22 @@
b := v.Block
typ := &b.Func.Config.Types
// match: (PopCount16 <t> x)
+ // cond: buildcfg.GOARM64.CSSC
+ // result: (CNTW <t> (ZeroExt16to32 x))
+ for {
+ t := v.Type
+ x := v_0
+ if !(buildcfg.GOARM64.CSSC) {
+ break
+ }
+ v.reset(OpARM64CNTW)
+ v.Type = t
+ v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (PopCount16 <t> x)
// result: (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt16to64 x)))))
for {
t := v.Type
@@ -22167,6 +22328,20 @@
b := v.Block
typ := &b.Func.Config.Types
// match: (PopCount32 <t> x)
+ // cond: buildcfg.GOARM64.CSSC
+ // result: (CNTW <t> x)
+ for {
+ t := v.Type
+ x := v_0
+ if !(buildcfg.GOARM64.CSSC) {
+ break
+ }
+ v.reset(OpARM64CNTW)
+ v.Type = t
+ v.AddArg(x)
+ return true
+ }
+ // match: (PopCount32 <t> x)
// result: (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt32to64 x)))))
for {
t := v.Type
@@ -22190,6 +22365,20 @@
b := v.Block
typ := &b.Func.Config.Types
// match: (PopCount64 <t> x)
+ // cond: buildcfg.GOARM64.CSSC
+ // result: (CNT <t> x)
+ for {
+ t := v.Type
+ x := v_0
+ if !(buildcfg.GOARM64.CSSC) {
+ break
+ }
+ v.reset(OpARM64CNT)
+ v.Type = t
+ v.AddArg(x)
+ return true
+ }
+ // match: (PopCount64 <t> x)
// result: (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> x))))
for {
t := v.Type
@@ -22206,6 +22395,28 @@
return true
}
}
+func rewriteValueARM64_OpPopCount8(v *Value) bool {
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (PopCount8 <t> x)
+ // cond: buildcfg.GOARM64.CSSC
+ // result: (CNTW <t> (ZeroExt8to32 x))
+ for {
+ t := v.Type
+ x := v_0
+ if !(buildcfg.GOARM64.CSSC) {
+ break
+ }
+ v.reset(OpARM64CNTW)
+ v.Type = t
+ v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+ return false
+}
func rewriteValueARM64_OpPrefetchCache(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go
index ff56c99..2e2feb8 100644
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@@ -4331,7 +4331,8 @@
}
if typ.IsInteger() {
- if Arch.LinkArch.Family == sys.RISCV64 && buildcfg.GORISCV64 >= 22 && typ.Size() == 8 {
+ if typ.Size() == 8 && ((Arch.LinkArch.Family == sys.RISCV64 && buildcfg.GORISCV64 >= 22) ||
+ (Arch.LinkArch.Family == sys.ARM64 && buildcfg.GOARM64.CSSC)) {
var op ssa.Op
switch {
case typ.IsSigned() && n.Op() == ir.OMIN:
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go
index 1426c72..aff8093 100644
--- a/src/cmd/internal/obj/arm64/a.out.go
+++ b/src/cmd/internal/obj/arm64/a.out.go
@@ -658,7 +658,9 @@
//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p arm64
const (
- AADC = obj.ABaseARM64 + obj.A_ARCHSPECIFIC + iota
+ AABS = obj.ABaseARM64 + obj.A_ARCHSPECIFIC + iota
+ AABSW
+ AADC
AADCS
AADCSW
AADCW
@@ -763,6 +765,8 @@
ACSINVW
ACSNEG
ACSNEGW
+ ACNT
+ ACNTW
ADC
ADCPS1
ADCPS2
@@ -1040,6 +1044,10 @@
ASHA512H2
ASHA512SU0
ASHA512SU1
+ ASMAX
+ ASMAXW
+ ASMIN
+ ASMINW
ASMADDL
ASMC
ASMNEGL
@@ -1097,6 +1105,8 @@
ATLBI
ATST
ATSTW
+ ACTZ
+ ACTZW
AUBFIZ
AUBFIZW
AUBFM
@@ -1110,6 +1120,10 @@
AUDIV
AUDIVW
AUMADDL
+ AUMAX
+ AUMAXW
+ AUMIN
+ AUMINW
AUMNEGL
AUMSUBL
AUMULH
diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go
index 62620e8..69b79c3 100644
--- a/src/cmd/internal/obj/arm64/anames.go
+++ b/src/cmd/internal/obj/arm64/anames.go
@@ -1,11 +1,13 @@
-// Code generated by stringer -i a.out.go -o anames.go -p arm64; DO NOT EDIT.
+// Code generated by go run ../stringer.go -i a.out.go -o anames.go -p arm64; DO NOT EDIT.
package arm64
import "cmd/internal/obj"
var Anames = []string{
- obj.A_ARCHSPECIFIC: "ADC",
+ obj.A_ARCHSPECIFIC: "ABS",
+ "ABSW",
+ "ADC",
"ADCS",
"ADCSW",
"ADCW",
@@ -110,6 +112,8 @@
"CSINVW",
"CSNEG",
"CSNEGW",
+ "CNT",
+ "CNTW",
"DC",
"DCPS1",
"DCPS2",
@@ -387,6 +391,10 @@
"SHA512H2",
"SHA512SU0",
"SHA512SU1",
+ "SMAX",
+ "SMAXW",
+ "SMIN",
+ "SMINW",
"SMADDL",
"SMC",
"SMNEGL",
@@ -444,6 +452,8 @@
"TLBI",
"TST",
"TSTW",
+ "CTZ",
+ "CTZW",
"UBFIZ",
"UBFIZW",
"UBFM",
@@ -457,6 +467,10 @@
"UDIV",
"UDIVW",
"UMADDL",
+ "UMAX",
+ "UMAXW",
+ "UMIN",
+ "UMINW",
"UMNEGL",
"UMSUBL",
"UMULH",
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 5bd3c4a..c85fdd8 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -369,6 +369,8 @@
{AREM, C_ZREG, C_NONE, C_NONE, C_ZREG, C_NONE, 16, 8, 0, 0, 0},
{ASDIV, C_ZREG, C_NONE, C_NONE, C_ZREG, C_NONE, 1, 4, 0, 0, 0},
{ASDIV, C_ZREG, C_ZREG, C_NONE, C_ZREG, C_NONE, 1, 4, 0, 0, 0},
+ {ASMAX, C_ZREG, C_ZREG, C_NONE, C_ZREG, C_NONE, 9, 4, 0, 0, 0},
+ {ASMAX, C_VCON, C_ZREG, C_NONE, C_ZREG, C_NONE, 111, 4, 0, 0, 0},
{AFADDS, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 54, 4, 0, 0, 0},
{AFADDS, C_FREG, C_FREG, C_NONE, C_FREG, C_NONE, 54, 4, 0, 0, 0},
@@ -484,6 +486,7 @@
{ABFI, C_VCON, C_ZREG, C_VCON, C_ZREG, C_NONE, 43, 4, 0, 0, 0},
{AEXTR, C_VCON, C_ZREG, C_ZREG, C_ZREG, C_NONE, 44, 4, 0, 0, 0},
{ASXTB, C_ZREG, C_NONE, C_NONE, C_ZREG, C_NONE, 45, 4, 0, 0, 0},
+ {AABS, C_ZREG, C_NONE, C_NONE, C_ZREG, C_NONE, 46, 4, 0, 0, 0},
{ACLS, C_ZREG, C_NONE, C_NONE, C_ZREG, C_NONE, 46, 4, 0, 0, 0},
{ALSL, C_VCON, C_ZREG, C_NONE, C_ZREG, C_NONE, 8, 4, 0, 0, 0},
{ALSL, C_VCON, C_NONE, C_NONE, C_ZREG, C_NONE, 8, 4, 0, 0, 0},
@@ -2912,6 +2915,13 @@
oprangeset(AREV16W, t)
oprangeset(AREV32, t)
+ case AABS:
+ oprangeset(AABSW, t)
+ oprangeset(ACNT, t)
+ oprangeset(ACNTW, t)
+ oprangeset(ACTZ, t)
+ oprangeset(ACTZW, t)
+
case ASDIV:
oprangeset(ASDIVW, t)
oprangeset(AUDIV, t)
@@ -2925,6 +2935,15 @@
oprangeset(ACRC32W, t)
oprangeset(ACRC32X, t)
+ case ASMAX:
+ oprangeset(ASMAXW, t)
+ oprangeset(ASMIN, t)
+ oprangeset(ASMINW, t)
+ oprangeset(AUMAX, t)
+ oprangeset(AUMAXW, t)
+ oprangeset(AUMIN, t)
+ oprangeset(AUMINW, t)
+
case AMADD:
oprangeset(AMADDW, t)
oprangeset(AMSUB, t)
@@ -6190,6 +6209,26 @@
o1 = c.opirr(p, p.As)
o1 |= (uint32(rm&31) << 16) | (uint32(rn&31) << 5) | uint32(encodedOperation)
+ case 111: /* smax/smin/umax/umin $imm8, Rn, Rd */
+ imm := c.regoff(&p.From)
+ switch p.As {
+ case ASMAX, ASMAXW, ASMIN, ASMINW:
+ if imm < -128 || imm > 127 {
+ c.ctxt.Diag("signed comparison immediate not in the range -128 to 127: %v", p)
+ }
+ case AUMAX, AUMAXW, AUMIN, AUMINW:
+ if imm < 0 || imm > 255 {
+ c.ctxt.Diag("unsigned comparison immediate not in the range 0 to 255: %v", p)
+ }
+ default:
+ c.ctxt.Diag("bad CSSC comparison immediate op: %v", p)
+ }
+ if p.Reg == obj.REG_NONE {
+ c.ctxt.Diag("missing source register: %v", p)
+ }
+ o1 = c.opcsscimm(p, p.As)
+ o1 |= uint32(imm&0xff)<<10 | uint32(p.Reg&31)<<5 | uint32(p.To.Reg&31)
+
case 127:
// Generic SVE instruction encoding
matched := false
@@ -6363,6 +6402,30 @@
case ARORW:
op = S32 | OPDP2(11)
+ case ASMAX:
+ op = S64 | OPDP2(24)
+
+ case ASMAXW:
+ op = S32 | OPDP2(24)
+
+ case AUMAX:
+ op = S64 | OPDP2(25)
+
+ case AUMAXW:
+ op = S32 | OPDP2(25)
+
+ case ASMIN:
+ op = S64 | OPDP2(26)
+
+ case ASMINW:
+ op = S32 | OPDP2(26)
+
+ case AUMIN:
+ op = S64 | OPDP2(27)
+
+ case AUMINW:
+ op = S32 | OPDP2(27)
+
case ACCMN:
op = S64 | 0<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4 /* cond<<12 | nzcv<<0 */
@@ -7423,6 +7486,24 @@
func (c *ctxt7) opbit(p *obj.Prog, a obj.As) uint32 {
switch a {
+ case AABS:
+ return S64 | OPBIT(8)
+
+ case AABSW:
+ return S32 | OPBIT(8)
+
+ case ACNT:
+ return S64 | OPBIT(7)
+
+ case ACNTW:
+ return S32 | OPBIT(7)
+
+ case ACTZ:
+ return S64 | OPBIT(6)
+
+ case ACTZW:
+ return S32 | OPBIT(6)
+
case ACLS:
return S64 | OPBIT(5)
@@ -7462,6 +7543,38 @@
}
}
+func (c *ctxt7) opcsscimm(p *obj.Prog, a obj.As) uint32 {
+ switch a {
+ case ASMAX:
+ return S64 | 0x47<<22
+
+ case ASMAXW:
+ return S32 | 0x47<<22
+
+ case AUMAX:
+ return S64 | 0x47<<22 | 1<<18
+
+ case AUMAXW:
+ return S32 | 0x47<<22 | 1<<18
+
+ case ASMIN:
+ return S64 | 0x47<<22 | 1<<19
+
+ case ASMINW:
+ return S32 | 0x47<<22 | 1<<19
+
+ case AUMIN:
+ return S64 | 0x47<<22 | 1<<19 | 1<<18
+
+ case AUMINW:
+ return S32 | 0x47<<22 | 1<<19 | 1<<18
+
+ default:
+ c.ctxt.Diag("bad CSSC comparison immediate op\n%v", p)
+ return 0
+ }
+}
+
/*
* add/subtract sign or zero-extended register
*/
diff --git a/src/internal/buildcfg/cfg.go b/src/internal/buildcfg/cfg.go
index 89fd74e..ab1ea49 100644
--- a/src/internal/buildcfg/cfg.go
+++ b/src/internal/buildcfg/cfg.go
@@ -184,6 +184,8 @@
Version string
// Large Systems Extension
LSE bool
+ // FEAT_CSSC enables Common Short Sequence Compression instructions.
+ CSSC bool
// ARM v8.0 Cryptographic Extension. It includes the following features:
// * FEAT_AES, which includes the AESD and AESE instructions.
// * FEAT_PMULL, which includes the PMULL, PMULL2 instructions.
@@ -197,6 +199,9 @@
if g.LSE {
arm64Str += ",lse"
}
+ if g.CSSC {
+ arm64Str += ",feat_cssc"
+ }
if g.Crypto {
arm64Str += ",crypto"
}
@@ -206,10 +211,12 @@
func ParseGoarm64(v string) (g Goarm64Features, e error) {
const (
lseOpt = ",lse"
+ csscOpt = ",feat_cssc"
cryptoOpt = ",crypto"
)
g.LSE = false
+ g.CSSC = false
g.Crypto = false
// We allow any combination of suffixes, in any order
for {
@@ -219,6 +226,12 @@
continue
}
+ if strings.HasSuffix(v, csscOpt) {
+ g.CSSC = true
+ v = v[:len(v)-len(csscOpt)]
+ continue
+ }
+
if strings.HasSuffix(v, cryptoOpt) {
g.Crypto = true
v = v[:len(v)-len(cryptoOpt)]
@@ -231,14 +244,31 @@
switch v {
case "v8.0":
g.Version = v
- case "v8.1", "v8.2", "v8.3", "v8.4", "v8.5", "v8.6", "v8.7", "v8.8", "v8.9",
- "v9.0", "v9.1", "v9.2", "v9.3", "v9.4", "v9.5":
+ if g.CSSC {
+ e = fmt.Errorf("invalid GOARM64: %q requires v8.7 or later, or v9.2 or later", csscOpt)
+ g.CSSC = false
+ }
+ case "v8.1", "v8.2", "v8.3", "v8.4", "v8.5", "v8.6",
+ "v9.0", "v9.1":
g.Version = v
// LSE extension is mandatory starting from 8.1
g.LSE = true
+ if g.CSSC {
+ e = fmt.Errorf("invalid GOARM64: %q requires v8.7 or later, or v9.2 or later", csscOpt)
+ g.CSSC = false
+ }
+ case "v8.7", "v8.8", "v9.2", "v9.3":
+ g.Version = v
+ // LSE extension is mandatory starting from 8.1
+ g.LSE = true
+ case "v8.9", "v9.4", "v9.5":
+ g.Version = v
+ // LSE extension is mandatory starting from 8.1
+ g.LSE = true
+ g.CSSC = true
default:
- e = fmt.Errorf("invalid GOARM64: must start with v8.{0-9} or v9.{0-5} and may optionally end in %q and/or %q",
- lseOpt, cryptoOpt)
+ e = fmt.Errorf("invalid GOARM64: must start with v8.{0-9} or v9.{0-5} and may optionally end in %q, %q and/or %q",
+ lseOpt, csscOpt, cryptoOpt)
g.Version = DefaultGOARM64
}
diff --git a/src/internal/buildcfg/cfg_test.go b/src/internal/buildcfg/cfg_test.go
index 2bbd478..f49492f 100644
--- a/src/internal/buildcfg/cfg_test.go
+++ b/src/internal/buildcfg/cfg_test.go
@@ -76,6 +76,28 @@
if goarm64().Version != "v9.0" || goarm64().LSE != true || goarm64().Crypto != false {
t.Errorf("Wrong parsing of GOARM64=v9.0")
}
+ os.Setenv("GOARM64", "v8.7,feat_cssc")
+ if goarm64().Version != "v8.7" || goarm64().CSSC != true {
+ t.Errorf("Wrong parsing of GOARM64=v8.7,feat_cssc")
+ }
+ os.Setenv("GOARM64", "v9.2,feat_cssc")
+ if goarm64().Version != "v9.2" || goarm64().CSSC != true {
+ t.Errorf("Wrong parsing of GOARM64=v9.2,feat_cssc")
+ }
+ os.Setenv("GOARM64", "v8.9")
+ if goarm64().Version != "v8.9" || goarm64().CSSC != true {
+ t.Errorf("Wrong parsing of GOARM64=v8.9")
+ }
+ Error = nil
+ os.Setenv("GOARM64", "v8.6,feat_cssc")
+ if _ = goarm64(); Error == nil {
+ t.Errorf("Wrong parsing of GOARM64=v8.6,feat_cssc")
+ }
+ Error = nil
+ os.Setenv("GOARM64", "v9.1,feat_cssc")
+ if _ = goarm64(); Error == nil {
+ t.Errorf("Wrong parsing of GOARM64=v9.1,feat_cssc")
+ }
}
func TestGoarm64FeaturesSupports(t *testing.T) {
@@ -102,6 +124,67 @@
}
}
+func TestGoarm64CSSCFeaturePolicy(t *testing.T) {
+ tests := []struct {
+ goarm64 string
+ wantCSSC bool
+ wantErr bool
+ }{
+ {goarm64: "v8.6,feat_cssc", wantErr: true},
+ {goarm64: "v9.1,feat_cssc", wantErr: true},
+ {goarm64: "v8.7", wantCSSC: false},
+ {goarm64: "v8.7,feat_cssc", wantCSSC: true},
+ {goarm64: "v8.8", wantCSSC: false},
+ {goarm64: "v8.8,feat_cssc", wantCSSC: true},
+ {goarm64: "v8.9", wantCSSC: true},
+ {goarm64: "v8.9,feat_cssc", wantCSSC: true},
+ {goarm64: "v9.2", wantCSSC: false},
+ {goarm64: "v9.2,feat_cssc", wantCSSC: true},
+ {goarm64: "v9.3", wantCSSC: false},
+ {goarm64: "v9.3,feat_cssc", wantCSSC: true},
+ {goarm64: "v9.4", wantCSSC: true},
+ {goarm64: "v9.4,feat_cssc", wantCSSC: true},
+ {goarm64: "v9.5", wantCSSC: true},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.goarm64, func(t *testing.T) {
+ g, err := ParseGoarm64(tt.goarm64)
+ if tt.wantErr {
+ if err == nil {
+ t.Fatalf("ParseGoarm64(%q) succeeded, want error", tt.goarm64)
+ }
+ if g.CSSC {
+ t.Fatalf("ParseGoarm64(%q).CSSC = true with error, want false", tt.goarm64)
+ }
+ return
+ }
+ if err != nil {
+ t.Fatalf("ParseGoarm64(%q) failed: %v", tt.goarm64, err)
+ }
+ if g.CSSC != tt.wantCSSC {
+ t.Fatalf("ParseGoarm64(%q).CSSC = %v, want %v", tt.goarm64, g.CSSC, tt.wantCSSC)
+ }
+ })
+ }
+
+ g, err := ParseGoarm64("v8.9")
+ if err != nil {
+ t.Fatalf("ParseGoarm64(%q) failed: %v", "v8.9", err)
+ }
+ if got, want := g.String(), "v8.9,lse,feat_cssc"; got != want {
+ t.Fatalf("ParseGoarm64(%q).String() = %q, want %q", "v8.9", got, want)
+ }
+
+ g, err = ParseGoarm64("v8.7,crypto,feat_cssc,lse")
+ if err != nil {
+ t.Fatalf("ParseGoarm64(%q) failed: %v", "v8.7,crypto,feat_cssc,lse", err)
+ }
+ if got, want := g.String(), "v8.7,lse,feat_cssc,crypto"; got != want {
+ t.Fatalf("ParseGoarm64(%q).String() = %q, want %q", "v8.7,crypto,feat_cssc,lse", got, want)
+ }
+}
+
func TestGogoarchTags(t *testing.T) {
old_goarch := GOARCH
old_goarm64 := GOARM64
diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go
index 5488009..0bb611a 100644
--- a/test/codegen/arithmetic.go
+++ b/test/codegen/arithmetic.go
@@ -761,6 +761,7 @@
func Int64Min(a, b int64) int64 {
// amd64: "CMPQ" "CMOVQLT"
// arm64: "CMP" "CSEL"
+ // arm64/v8.9: "SMIN" -"CMP" -"CSEL"
// riscv64/rva20u64:"BLT "
// riscv64/rva22u64,riscv64/rva23u64:"MIN "
return min(a, b)
@@ -769,6 +770,7 @@
func Int64Max(a, b int64) int64 {
// amd64: "CMPQ" "CMOVQGT"
// arm64: "CMP" "CSEL"
+ // arm64/v8.9: "SMAX" -"CMP" -"CSEL"
// riscv64/rva20u64:"BLT "
// riscv64/rva22u64,riscv64/rva23u64:"MAX "
return max(a, b)
@@ -777,6 +779,7 @@
func Uint64Min(a, b uint64) uint64 {
// amd64: "CMPQ" "CMOVQCS"
// arm64: "CMP" "CSEL"
+ // arm64/v8.9: "UMIN" -"CMP" -"CSEL"
// riscv64/rva20u64:"BLTU"
// riscv64/rva22u64,riscv64/rva23u64:"MINU"
return min(a, b)
@@ -785,6 +788,7 @@
func Uint64Max(a, b uint64) uint64 {
// amd64: "CMPQ" "CMOVQHI"
// arm64: "CMP" "CSEL"
+ // arm64/v8.9: "UMAX" -"CMP" -"CSEL"
// riscv64/rva20u64:"BLTU"
// riscv64/rva22u64,riscv64/rva23u64:"MAXU"
return max(a, b)
diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go
index 4d02bfb..4673238 100644
--- a/test/codegen/mathbits.go
+++ b/test/codegen/mathbits.go
@@ -180,6 +180,7 @@
// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
// amd64:"POPCNTQ"
// arm64:"VCNT" "VUADDLV"
+ // arm64/v8.9:"CNT" -"VCNT" -"VUADDLV"
// loong64:"VPCNTV"
// ppc64x:"POPCNTD"
// riscv64:"CPOP "
@@ -192,6 +193,7 @@
// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
// amd64:"POPCNTQ"
// arm64:"VCNT" "VUADDLV"
+ // arm64/v8.9:"CNT" -"VCNT" -"VUADDLV"
// loong64:"VPCNTV"
// ppc64x:"POPCNTD"
// riscv64:"CPOP "
@@ -204,6 +206,7 @@
// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
// amd64:"POPCNTL"
// arm64:"VCNT" "VUADDLV"
+ // arm64/v8.9:"CNTW" -"VCNT" -"VUADDLV"
// loong64:"VPCNTW"
// ppc64x:"POPCNTW"
// riscv64:"CPOPW"
@@ -216,6 +219,7 @@
// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
// amd64:"POPCNTL"
// arm64:"VCNT" "VUADDLV"
+ // arm64/v8.9:"CNTW" -"VCNT" -"VUADDLV"
// loong64:"VPCNTH"
// ppc64x:"POPCNTW"
// riscv64:"CPOP "
@@ -394,6 +398,7 @@
// amd64/v3:"TZCNTQ"
// arm:"CLZ"
// arm64:"RBIT" "CLZ"
+ // arm64/v8.9:"CTZ" -"RBIT" -"CLZ"
// loong64:"CTZV"
// ppc64x/power8:"ANDN" "POPCNTD"
// ppc64x/power9: "CNTTZD"
@@ -408,6 +413,7 @@
// amd64/v1,amd64/v2:"BSFQ" "MOVL [$]64" "CMOVQEQ"
// amd64/v3:"TZCNTQ"
// arm64:"RBIT" "CLZ"
+ // arm64/v8.9:"CTZ" -"RBIT" -"CLZ"
// loong64:"CTZV"
// ppc64x/power8:"ANDN" "POPCNTD"
// ppc64x/power9: "CNTTZD"
@@ -429,6 +435,7 @@
// amd64/v3:"TZCNTL"
// arm:"CLZ"
// arm64:"RBITW" "CLZW"
+ // arm64/v8.9:"CTZW" -"RBIT" -"CLZ"
// loong64:"CTZW"
// ppc64x/power8:"ANDN" "POPCNTW"
// ppc64x/power9: "CNTTZW"
@@ -443,6 +450,7 @@
// amd64:"BSFL" "ORL [$]65536"
// arm:"ORR [$]65536" "CLZ" -"MOVHU R"
// arm64:"ORR [$]65536" "RBITW" "CLZW" -"MOVHU R" -"RBIT " -"CLZ "
+ // arm64/v8.9:"ORR [$]65536" "CTZW" -"RBIT" -"CLZ"
// loong64:"CTZV"
// ppc64x/power8:"POPCNTW" "ADD [$]-1"
// ppc64x/power9:"CNTTZD" "ORIS [$]1"
@@ -457,6 +465,7 @@
// amd64:"BSFL" "ORL [$]256"
// arm:"ORR [$]256" "CLZ" -"MOVBU R"
// arm64:"ORR [$]256" "RBITW" "CLZW" -"MOVBU R" -"RBIT " -"CLZ "
+ // arm64/v8.9:"ORR [$]256" "CTZW" -"RBIT" -"CLZ"
// loong64:"CTZV"
// ppc64x/power8:"POPCNTB" "ADD [$]-1"
// ppc64x/power9:"CNTTZD" "OR [$]256"