diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
index a58f5ac..36c1e33 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
@@ -1745,61 +1745,61 @@
(VPMOVMToVec64x8 (VCMPPD512 [3] x y))
// Include these rules because you never know about rewrite order
-(KANDQ (VCMPPD512 [3] x x) (VCMPPD512 [3] y y)) => (VCMPPD512 [3] x x)
-(KANDD (VCMPPS512 [3] x x) (VCMPPS512 [3] y y)) => (VCMPPS512 [3] x y)
+(KANDB (VCMPPD512 [3] x x) (VCMPPD512 [3] y y)) => (VCMPPD512 [3] x x) // 512 = 64x8 -> KANDB
+(KANDW (VCMPPS512 [3] x x) (VCMPPS512 [3] y y)) => (VCMPPS512 [3] x y) // 512 = 32x16 -> KANDW
// These larger simplifying rules must come before the smaller simplifying rules that might break them).
// Rewrite rules for binary logical mask operations that apply to 8-bit elements (B, for bytes) of 128, 256, and 512-bit vectors
-(VPAND128 (VPMOVMToVec8x16 x) (VPMOVMToVec8x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x16 (KANDB x y))
-(VPAND256 (VPMOVMToVec8x32 x) (VPMOVMToVec8x32 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x32 (KANDB x y))
-(VPANDD512 (VPMOVMToVec8x64 x) (VPMOVMToVec8x64 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x64 (KANDB x y))
+(VPAND128 (VPMOVMToVec8x16 x) (VPMOVMToVec8x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x16 (KANDW x y))
+(VPAND256 (VPMOVMToVec8x32 x) (VPMOVMToVec8x32 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x32 (KANDD x y))
+(VPANDD512 (VPMOVMToVec8x64 x) (VPMOVMToVec8x64 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x64 (KANDQ x y))
-(VPOR128 (VPMOVMToVec8x16 x) (VPMOVMToVec8x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x16 (KORB x y))
-(VPOR256 (VPMOVMToVec8x32 x) (VPMOVMToVec8x32 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x32 (KORB x y))
-(VPORD512 (VPMOVMToVec8x64 x) (VPMOVMToVec8x64 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x64 (KORB x y))
+(VPOR128 (VPMOVMToVec8x16 x) (VPMOVMToVec8x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x16 (KORW x y))
+(VPOR256 (VPMOVMToVec8x32 x) (VPMOVMToVec8x32 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x32 (KORD x y))
+(VPORD512 (VPMOVMToVec8x64 x) (VPMOVMToVec8x64 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x64 (KORQ x y))
-(VPXOR128 (VPMOVMToVec8x16 x) (VPMOVMToVec8x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x16 (KXORB x y))
-(VPXOR256 (VPMOVMToVec8x32 x) (VPMOVMToVec8x32 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x32 (KXORB x y))
-(VPXORD512 (VPMOVMToVec8x64 x) (VPMOVMToVec8x64 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x64 (KXORB x y))
+(VPXOR128 (VPMOVMToVec8x16 x) (VPMOVMToVec8x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x16 (KXORW x y))
+(VPXOR256 (VPMOVMToVec8x32 x) (VPMOVMToVec8x32 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x32 (KXORD x y))
+(VPXORD512 (VPMOVMToVec8x64 x) (VPMOVMToVec8x64 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec8x64 (KXORQ x y))
// Rewrite rules for binary logical mask operations that apply to 16-bit elements (W, for words) of 128, 256, and 512-bit vectors
-(VPAND128 (VPMOVMToVec16x8 x) (VPMOVMToVec16x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x8 (KANDW x y))
+(VPAND128 (VPMOVMToVec16x8 x) (VPMOVMToVec16x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x8 (KANDB x y))
(VPAND256 (VPMOVMToVec16x16 x) (VPMOVMToVec16x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x16 (KANDW x y))
-(VPANDD512 (VPMOVMToVec16x32 x) (VPMOVMToVec16x32 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x32 (KANDW x y))
+(VPANDD512 (VPMOVMToVec16x32 x) (VPMOVMToVec16x32 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x32 (KANDD x y))
-(VPOR128 (VPMOVMToVec16x8 x) (VPMOVMToVec16x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x8 (KORW x y))
+(VPOR128 (VPMOVMToVec16x8 x) (VPMOVMToVec16x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x8 (KORB x y))
(VPOR256 (VPMOVMToVec16x16 x) (VPMOVMToVec16x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x16 (KORW x y))
-(VPORD512 (VPMOVMToVec16x32 x) (VPMOVMToVec16x32 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x32 (KORW x y))
+(VPORD512 (VPMOVMToVec16x32 x) (VPMOVMToVec16x32 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x32 (KORD x y))
-(VPXOR128 (VPMOVMToVec16x8 x) (VPMOVMToVec16x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x8 (KXORW x y))
+(VPXOR128 (VPMOVMToVec16x8 x) (VPMOVMToVec16x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x8 (KXORB x y))
(VPXOR256 (VPMOVMToVec16x16 x) (VPMOVMToVec16x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x16 (KXORW x y))
-(VPXORD512 (VPMOVMToVec16x32 x) (VPMOVMToVec16x32 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x32 (KXORW x y))
+(VPXORD512 (VPMOVMToVec16x32 x) (VPMOVMToVec16x32 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec16x32 (KXORD x y))
// Rewrite rules for binary logical mask operations that apply to 32-bit elements (D, for doublewords) of 128, 256, and 512-bit vectors
-(VPAND128 (VPMOVMToVec32x4 x) (VPMOVMToVec32x4 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x4 (KANDD x y))
-(VPAND256 (VPMOVMToVec32x8 x) (VPMOVMToVec32x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x8 (KANDD x y))
-(VPANDD512 (VPMOVMToVec32x16 x) (VPMOVMToVec32x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x16 (KANDD x y))
+(VPAND128 (VPMOVMToVec32x4 x) (VPMOVMToVec32x4 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x4 (KANDB x y))
+(VPAND256 (VPMOVMToVec32x8 x) (VPMOVMToVec32x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x8 (KANDB x y))
+(VPANDD512 (VPMOVMToVec32x16 x) (VPMOVMToVec32x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x16 (KANDW x y))
-(VPOR128 (VPMOVMToVec32x4 x) (VPMOVMToVec32x4 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x4 (KORD x y))
-(VPOR256 (VPMOVMToVec32x8 x) (VPMOVMToVec32x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x8 (KORD x y))
-(VPORD512 (VPMOVMToVec32x16 x) (VPMOVMToVec32x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x16 (KORD x y))
+(VPOR128 (VPMOVMToVec32x4 x) (VPMOVMToVec32x4 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x4 (KORB x y))
+(VPOR256 (VPMOVMToVec32x8 x) (VPMOVMToVec32x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x8 (KORB x y))
+(VPORD512 (VPMOVMToVec32x16 x) (VPMOVMToVec32x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x16 (KORW x y))
-(VPXOR128 (VPMOVMToVec32x4 x) (VPMOVMToVec32x4 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x4 (KXORD x y))
-(VPXOR256 (VPMOVMToVec32x8 x) (VPMOVMToVec32x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x8 (KXORD x y))
-(VPXORD512 (VPMOVMToVec32x16 x) (VPMOVMToVec32x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x16 (KXORD x y))
+(VPXOR128 (VPMOVMToVec32x4 x) (VPMOVMToVec32x4 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x4 (KXORB x y))
+(VPXOR256 (VPMOVMToVec32x8 x) (VPMOVMToVec32x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x8 (KXORB x y))
+(VPXORD512 (VPMOVMToVec32x16 x) (VPMOVMToVec32x16 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec32x16 (KXORW x y))
// Rewrite rules for binary logical mask operations that apply to 64-bit elements (Q, for quadwords) of 128, 256, and 512-bit vectors
-(VPAND128 (VPMOVMToVec64x2 x) (VPMOVMToVec64x2 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x2 (KANDQ x y))
-(VPAND256 (VPMOVMToVec64x4 x) (VPMOVMToVec64x4 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x4 (KANDQ x y))
-(VPANDD512 (VPMOVMToVec64x8 x) (VPMOVMToVec64x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x8 (KANDQ x y))
+(VPAND128 (VPMOVMToVec64x2 x) (VPMOVMToVec64x2 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x2 (KANDB x y))
+(VPAND256 (VPMOVMToVec64x4 x) (VPMOVMToVec64x4 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x4 (KANDB x y))
+(VPANDD512 (VPMOVMToVec64x8 x) (VPMOVMToVec64x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x8 (KANDB x y))
-(VPOR128 (VPMOVMToVec64x2 x) (VPMOVMToVec64x2 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x2 (KORQ x y))
-(VPOR256 (VPMOVMToVec64x4 x) (VPMOVMToVec64x4 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x4 (KORQ x y))
-(VPORD512 (VPMOVMToVec64x8 x) (VPMOVMToVec64x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x8 (KORQ x y))
+(VPOR128 (VPMOVMToVec64x2 x) (VPMOVMToVec64x2 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x2 (KORB x y))
+(VPOR256 (VPMOVMToVec64x4 x) (VPMOVMToVec64x4 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x4 (KORB x y))
+(VPORD512 (VPMOVMToVec64x8 x) (VPMOVMToVec64x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x8 (KORB x y))
-(VPXOR128 (VPMOVMToVec64x2 x) (VPMOVMToVec64x2 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x2 (KXORQ x y))
-(VPXOR256 (VPMOVMToVec64x4 x) (VPMOVMToVec64x4 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x4 (KXORQ x y))
-(VPXORD512 (VPMOVMToVec64x8 x) (VPMOVMToVec64x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x8 (KXORQ x y))
+(VPXOR128 (VPMOVMToVec64x2 x) (VPMOVMToVec64x2 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x2 (KXORB x y))
+(VPXOR256 (VPMOVMToVec64x4 x) (VPMOVMToVec64x4 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x4 (KXORB x y))
+(VPXORD512 (VPMOVMToVec64x8 x) (VPMOVMToVec64x8 y)) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVMToVec64x8 (KXORB x y))
(VPMOVVec8x16ToM (VPMOVMToVec8x16 x)) => x
(VPMOVVec8x32ToM (VPMOVMToVec8x32 x)) => x
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index ce7b6b0..e374590 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -240,10 +240,10 @@
return rewriteValueAMD64_OpAMD64HMULQ(v)
case OpAMD64HMULQU:
return rewriteValueAMD64_OpAMD64HMULQU(v)
- case OpAMD64KANDD:
- return rewriteValueAMD64_OpAMD64KANDD(v)
- case OpAMD64KANDQ:
- return rewriteValueAMD64_OpAMD64KANDQ(v)
+ case OpAMD64KANDB:
+ return rewriteValueAMD64_OpAMD64KANDB(v)
+ case OpAMD64KANDW:
+ return rewriteValueAMD64_OpAMD64KANDW(v)
case OpAMD64KMOVBk:
return rewriteValueAMD64_OpAMD64KMOVBk(v)
case OpAMD64KMOVDk:
@@ -23758,34 +23758,10 @@
}
return false
}
-func rewriteValueAMD64_OpAMD64KANDD(v *Value) bool {
+func rewriteValueAMD64_OpAMD64KANDB(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
- // match: (KANDD (VCMPPS512 [3] x x) (VCMPPS512 [3] y y))
- // result: (VCMPPS512 [3] x y)
- for {
- if v_0.Op != OpAMD64VCMPPS512 || auxIntToUint8(v_0.AuxInt) != 3 {
- break
- }
- x := v_0.Args[1]
- if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPS512 || auxIntToUint8(v_1.AuxInt) != 3 {
- break
- }
- y := v_1.Args[1]
- if y != v_1.Args[0] {
- break
- }
- v.reset(OpAMD64VCMPPS512)
- v.AuxInt = uint8ToAuxInt(3)
- v.AddArg2(x, y)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64KANDQ(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (KANDQ (VCMPPD512 [3] x x) (VCMPPD512 [3] y y))
+ // match: (KANDB (VCMPPD512 [3] x x) (VCMPPD512 [3] y y))
// result: (VCMPPD512 [3] x x)
for {
if v_0.Op != OpAMD64VCMPPD512 || auxIntToUint8(v_0.AuxInt) != 3 {
@@ -23806,6 +23782,30 @@
}
return false
}
+func rewriteValueAMD64_OpAMD64KANDW(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (KANDW (VCMPPS512 [3] x x) (VCMPPS512 [3] y y))
+ // result: (VCMPPS512 [3] x y)
+ for {
+ if v_0.Op != OpAMD64VCMPPS512 || auxIntToUint8(v_0.AuxInt) != 3 {
+ break
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPS512 || auxIntToUint8(v_1.AuxInt) != 3 {
+ break
+ }
+ y := v_1.Args[1]
+ if y != v_1.Args[0] {
+ break
+ }
+ v.reset(OpAMD64VCMPPS512)
+ v.AuxInt = uint8ToAuxInt(3)
+ v.AddArg2(x, y)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64KMOVBk(v *Value) bool {
v_0 := v.Args[0]
// match: (KMOVBk l:(MOVBload [off] {sym} ptr mem))
@@ -58610,7 +58610,7 @@
typ := &b.Func.Config.Types
// match: (VPAND128 (VPMOVMToVec8x16 x) (VPMOVMToVec8x16 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec8x16 (KANDB x y))
+ // result: (VPMOVMToVec8x16 (KANDW x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec8x16 {
@@ -58625,7 +58625,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec8x16)
- v0 := b.NewValue0(v.Pos, OpAMD64KANDB, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KANDW, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -58634,7 +58634,7 @@
}
// match: (VPAND128 (VPMOVMToVec16x8 x) (VPMOVMToVec16x8 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec16x8 (KANDW x y))
+ // result: (VPMOVMToVec16x8 (KANDB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec16x8 {
@@ -58649,7 +58649,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec16x8)
- v0 := b.NewValue0(v.Pos, OpAMD64KANDW, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KANDB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -58658,7 +58658,7 @@
}
// match: (VPAND128 (VPMOVMToVec32x4 x) (VPMOVMToVec32x4 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec32x4 (KANDD x y))
+ // result: (VPMOVMToVec32x4 (KANDB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec32x4 {
@@ -58673,7 +58673,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec32x4)
- v0 := b.NewValue0(v.Pos, OpAMD64KANDD, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KANDB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -58682,7 +58682,7 @@
}
// match: (VPAND128 (VPMOVMToVec64x2 x) (VPMOVMToVec64x2 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec64x2 (KANDQ x y))
+ // result: (VPMOVMToVec64x2 (KANDB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec64x2 {
@@ -58697,7 +58697,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec64x2)
- v0 := b.NewValue0(v.Pos, OpAMD64KANDQ, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KANDB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -58814,7 +58814,7 @@
typ := &b.Func.Config.Types
// match: (VPAND256 (VPMOVMToVec8x32 x) (VPMOVMToVec8x32 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec8x32 (KANDB x y))
+ // result: (VPMOVMToVec8x32 (KANDD x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec8x32 {
@@ -58829,7 +58829,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec8x32)
- v0 := b.NewValue0(v.Pos, OpAMD64KANDB, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KANDD, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -58862,7 +58862,7 @@
}
// match: (VPAND256 (VPMOVMToVec32x8 x) (VPMOVMToVec32x8 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec32x8 (KANDD x y))
+ // result: (VPMOVMToVec32x8 (KANDB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec32x8 {
@@ -58877,7 +58877,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec32x8)
- v0 := b.NewValue0(v.Pos, OpAMD64KANDD, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KANDB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -58886,7 +58886,7 @@
}
// match: (VPAND256 (VPMOVMToVec64x4 x) (VPMOVMToVec64x4 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec64x4 (KANDQ x y))
+ // result: (VPMOVMToVec64x4 (KANDB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec64x4 {
@@ -58901,7 +58901,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec64x4)
- v0 := b.NewValue0(v.Pos, OpAMD64KANDQ, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KANDB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -59018,7 +59018,7 @@
typ := &b.Func.Config.Types
// match: (VPANDD512 (VPMOVMToVec8x64 x) (VPMOVMToVec8x64 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec8x64 (KANDB x y))
+ // result: (VPMOVMToVec8x64 (KANDQ x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec8x64 {
@@ -59033,7 +59033,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec8x64)
- v0 := b.NewValue0(v.Pos, OpAMD64KANDB, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KANDQ, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -59042,7 +59042,7 @@
}
// match: (VPANDD512 (VPMOVMToVec16x32 x) (VPMOVMToVec16x32 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec16x32 (KANDW x y))
+ // result: (VPMOVMToVec16x32 (KANDD x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec16x32 {
@@ -59057,7 +59057,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec16x32)
- v0 := b.NewValue0(v.Pos, OpAMD64KANDW, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KANDD, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -59066,7 +59066,7 @@
}
// match: (VPANDD512 (VPMOVMToVec32x16 x) (VPMOVMToVec32x16 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec32x16 (KANDD x y))
+ // result: (VPMOVMToVec32x16 (KANDW x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec32x16 {
@@ -59081,7 +59081,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec32x16)
- v0 := b.NewValue0(v.Pos, OpAMD64KANDD, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KANDW, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -59090,7 +59090,7 @@
}
// match: (VPANDD512 (VPMOVMToVec64x8 x) (VPMOVMToVec64x8 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec64x8 (KANDQ x y))
+ // result: (VPMOVMToVec64x8 (KANDB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec64x8 {
@@ -59105,7 +59105,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec64x8)
- v0 := b.NewValue0(v.Pos, OpAMD64KANDQ, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KANDB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -79414,7 +79414,7 @@
}
// match: (VPOR128 (VPMOVMToVec8x16 x) (VPMOVMToVec8x16 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec8x16 (KORB x y))
+ // result: (VPMOVMToVec8x16 (KORW x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec8x16 {
@@ -79429,7 +79429,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec8x16)
- v0 := b.NewValue0(v.Pos, OpAMD64KORB, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KORW, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -79438,7 +79438,7 @@
}
// match: (VPOR128 (VPMOVMToVec16x8 x) (VPMOVMToVec16x8 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec16x8 (KORW x y))
+ // result: (VPMOVMToVec16x8 (KORB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec16x8 {
@@ -79453,7 +79453,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec16x8)
- v0 := b.NewValue0(v.Pos, OpAMD64KORW, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KORB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -79462,7 +79462,7 @@
}
// match: (VPOR128 (VPMOVMToVec32x4 x) (VPMOVMToVec32x4 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec32x4 (KORD x y))
+ // result: (VPMOVMToVec32x4 (KORB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec32x4 {
@@ -79477,7 +79477,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec32x4)
- v0 := b.NewValue0(v.Pos, OpAMD64KORD, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KORB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -79486,7 +79486,7 @@
}
// match: (VPOR128 (VPMOVMToVec64x2 x) (VPMOVMToVec64x2 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec64x2 (KORQ x y))
+ // result: (VPMOVMToVec64x2 (KORB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec64x2 {
@@ -79501,7 +79501,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec64x2)
- v0 := b.NewValue0(v.Pos, OpAMD64KORQ, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KORB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -79586,7 +79586,7 @@
}
// match: (VPOR256 (VPMOVMToVec8x32 x) (VPMOVMToVec8x32 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec8x32 (KORB x y))
+ // result: (VPMOVMToVec8x32 (KORD x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec8x32 {
@@ -79601,7 +79601,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec8x32)
- v0 := b.NewValue0(v.Pos, OpAMD64KORB, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KORD, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -79634,7 +79634,7 @@
}
// match: (VPOR256 (VPMOVMToVec32x8 x) (VPMOVMToVec32x8 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec32x8 (KORD x y))
+ // result: (VPMOVMToVec32x8 (KORB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec32x8 {
@@ -79649,7 +79649,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec32x8)
- v0 := b.NewValue0(v.Pos, OpAMD64KORD, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KORB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -79658,7 +79658,7 @@
}
// match: (VPOR256 (VPMOVMToVec64x4 x) (VPMOVMToVec64x4 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec64x4 (KORQ x y))
+ // result: (VPMOVMToVec64x4 (KORB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec64x4 {
@@ -79673,7 +79673,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec64x4)
- v0 := b.NewValue0(v.Pos, OpAMD64KORQ, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KORB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -79778,7 +79778,7 @@
}
// match: (VPORD512 (VPMOVMToVec8x64 x) (VPMOVMToVec8x64 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec8x64 (KORB x y))
+ // result: (VPMOVMToVec8x64 (KORQ x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec8x64 {
@@ -79793,7 +79793,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec8x64)
- v0 := b.NewValue0(v.Pos, OpAMD64KORB, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KORQ, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -79802,7 +79802,7 @@
}
// match: (VPORD512 (VPMOVMToVec16x32 x) (VPMOVMToVec16x32 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec16x32 (KORW x y))
+ // result: (VPMOVMToVec16x32 (KORD x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec16x32 {
@@ -79817,7 +79817,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec16x32)
- v0 := b.NewValue0(v.Pos, OpAMD64KORW, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KORD, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -79826,7 +79826,7 @@
}
// match: (VPORD512 (VPMOVMToVec32x16 x) (VPMOVMToVec32x16 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec32x16 (KORD x y))
+ // result: (VPMOVMToVec32x16 (KORW x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec32x16 {
@@ -79841,7 +79841,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec32x16)
- v0 := b.NewValue0(v.Pos, OpAMD64KORD, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KORW, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -79850,7 +79850,7 @@
}
// match: (VPORD512 (VPMOVMToVec64x8 x) (VPMOVMToVec64x8 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec64x8 (KORQ x y))
+ // result: (VPMOVMToVec64x8 (KORB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec64x8 {
@@ -79865,7 +79865,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec64x8)
- v0 := b.NewValue0(v.Pos, OpAMD64KORQ, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KORB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -89648,7 +89648,7 @@
typ := &b.Func.Config.Types
// match: (VPXOR128 (VPMOVMToVec8x16 x) (VPMOVMToVec8x16 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec8x16 (KXORB x y))
+ // result: (VPMOVMToVec8x16 (KXORW x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec8x16 {
@@ -89663,7 +89663,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec8x16)
- v0 := b.NewValue0(v.Pos, OpAMD64KXORB, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KXORW, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -89672,7 +89672,7 @@
}
// match: (VPXOR128 (VPMOVMToVec16x8 x) (VPMOVMToVec16x8 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec16x8 (KXORW x y))
+ // result: (VPMOVMToVec16x8 (KXORB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec16x8 {
@@ -89687,7 +89687,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec16x8)
- v0 := b.NewValue0(v.Pos, OpAMD64KXORW, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KXORB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -89696,7 +89696,7 @@
}
// match: (VPXOR128 (VPMOVMToVec32x4 x) (VPMOVMToVec32x4 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec32x4 (KXORD x y))
+ // result: (VPMOVMToVec32x4 (KXORB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec32x4 {
@@ -89711,7 +89711,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec32x4)
- v0 := b.NewValue0(v.Pos, OpAMD64KXORD, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KXORB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -89720,7 +89720,7 @@
}
// match: (VPXOR128 (VPMOVMToVec64x2 x) (VPMOVMToVec64x2 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec64x2 (KXORQ x y))
+ // result: (VPMOVMToVec64x2 (KXORB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec64x2 {
@@ -89735,7 +89735,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec64x2)
- v0 := b.NewValue0(v.Pos, OpAMD64KXORQ, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KXORB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -89776,7 +89776,7 @@
typ := &b.Func.Config.Types
// match: (VPXOR256 (VPMOVMToVec8x32 x) (VPMOVMToVec8x32 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec8x32 (KXORB x y))
+ // result: (VPMOVMToVec8x32 (KXORD x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec8x32 {
@@ -89791,7 +89791,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec8x32)
- v0 := b.NewValue0(v.Pos, OpAMD64KXORB, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KXORD, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -89824,7 +89824,7 @@
}
// match: (VPXOR256 (VPMOVMToVec32x8 x) (VPMOVMToVec32x8 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec32x8 (KXORD x y))
+ // result: (VPMOVMToVec32x8 (KXORB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec32x8 {
@@ -89839,7 +89839,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec32x8)
- v0 := b.NewValue0(v.Pos, OpAMD64KXORD, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KXORB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -89848,7 +89848,7 @@
}
// match: (VPXOR256 (VPMOVMToVec64x4 x) (VPMOVMToVec64x4 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec64x4 (KXORQ x y))
+ // result: (VPMOVMToVec64x4 (KXORB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec64x4 {
@@ -89863,7 +89863,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec64x4)
- v0 := b.NewValue0(v.Pos, OpAMD64KXORQ, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KXORB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -89904,7 +89904,7 @@
typ := &b.Func.Config.Types
// match: (VPXORD512 (VPMOVMToVec8x64 x) (VPMOVMToVec8x64 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec8x64 (KXORB x y))
+ // result: (VPMOVMToVec8x64 (KXORQ x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec8x64 {
@@ -89919,7 +89919,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec8x64)
- v0 := b.NewValue0(v.Pos, OpAMD64KXORB, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KXORQ, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -89928,7 +89928,7 @@
}
// match: (VPXORD512 (VPMOVMToVec16x32 x) (VPMOVMToVec16x32 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec16x32 (KXORW x y))
+ // result: (VPMOVMToVec16x32 (KXORD x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec16x32 {
@@ -89943,7 +89943,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec16x32)
- v0 := b.NewValue0(v.Pos, OpAMD64KXORW, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KXORD, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -89952,7 +89952,7 @@
}
// match: (VPXORD512 (VPMOVMToVec32x16 x) (VPMOVMToVec32x16 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec32x16 (KXORD x y))
+ // result: (VPMOVMToVec32x16 (KXORW x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec32x16 {
@@ -89967,7 +89967,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec32x16)
- v0 := b.NewValue0(v.Pos, OpAMD64KXORD, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KXORW, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -89976,7 +89976,7 @@
}
// match: (VPXORD512 (VPMOVMToVec64x8 x) (VPMOVMToVec64x8 y))
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPMOVMToVec64x8 (KXORQ x y))
+ // result: (VPMOVMToVec64x8 (KXORB x y))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpAMD64VPMOVMToVec64x8 {
@@ -89991,7 +89991,7 @@
continue
}
v.reset(OpAMD64VPMOVMToVec64x8)
- v0 := b.NewValue0(v.Pos, OpAMD64KXORQ, typ.Mask)
+ v0 := b.NewValue0(v.Pos, OpAMD64KXORB, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
diff --git a/src/simd/archsimd/internal/simd_test/simd_amd64_test.go b/src/simd/archsimd/internal/simd_test/simd_amd64_test.go
index 2a69b2a..0b1139f 100644
--- a/src/simd/archsimd/internal/simd_test/simd_amd64_test.go
+++ b/src/simd/archsimd/internal/simd_test/simd_amd64_test.go
@@ -1350,3 +1350,208 @@
})
}
}
+
+func TestMask8x64(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ return
+ }
+
+ lookup0 := archsimd.LoadUint8x64Array(&[64]uint8{
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 255, 255, 255})
+ lookup1 := archsimd.LoadUint8x64Array(&[64]uint8{
+ 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255, 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255,
+ })
+
+ s0 := archsimd.BroadcastUint8x64(127)
+ s1 := archsimd.BroadcastUint8x64(0xff)
+
+ next, _ := archsimd.LoadUint8x64Part([]byte("bGVhc3VyZS4="))
+
+ vals := lookup0.ConcatPermute(lookup1, next)
+
+ hasNonAscii := next.Greater(s0)
+ hasNonAlphabet := vals.Equal(s1)
+ or := hasNonAscii.Or(hasNonAlphabet)
+ var sor [64]int8
+ or.ToInt8x64().Store(sor[:])
+
+ want := []int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}
+
+ checkSlices(t, sor[:], want)
+}
+
+func TestMask16x32(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ return
+ }
+ s := make([]int16, 32)
+ want := []int16{-1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0}
+ var a archsimd.Int16x32
+ b := archsimd.LoadInt16x32(want)
+ m1 := a.Less(a)
+ m2 := b.Less(a)
+ m3 := m1.Or(m2)
+ c := m3.ToInt16x32()
+ c.Store(s)
+ checkSlices(t, s, want)
+}
+
+func TestMask32x16(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ return
+ }
+ s := make([]int32, 16)
+ want := []int32{-1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0}
+ var a archsimd.Int32x16
+ b := archsimd.LoadInt32x16(want)
+ m1 := a.Less(a)
+ m2 := b.Less(a)
+ m3 := m1.Or(m2)
+ c := m3.ToInt32x16()
+ c.Store(s)
+ checkSlices(t, s, want)
+}
+
+func TestMask64x8(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ return
+ }
+ s := make([]int64, 8)
+ want := []int64{-1, 0, 0, 0, 0, 0, -1, -1}
+ var a archsimd.Int64x8
+ b := archsimd.LoadInt64x8(want)
+ m1 := a.Less(a)
+ m2 := b.Less(a)
+ m3 := m1.Or(m2)
+ c := m3.ToInt64x8()
+ c.Store(s)
+ checkSlices(t, s, want)
+}
+
+func TestMask8x32(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ return
+ }
+ s := make([]int8, 32)
+ want := []int8{-1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0}
+ var a archsimd.Int8x32
+ b := archsimd.LoadInt8x32(want)
+ m1 := a.Less(a)
+ m2 := b.Less(a)
+ m3 := m1.Or(m2)
+ c := m3.ToInt8x32()
+ c.Store(s)
+ checkSlices(t, s, want)
+}
+
+func TestMask16x16(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ return
+ }
+ s := make([]int16, 16)
+ want := []int16{-1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0}
+ var a archsimd.Int16x16
+ b := archsimd.LoadInt16x16(want)
+ m1 := a.Less(a)
+ m2 := b.Less(a)
+ m3 := m1.Or(m2)
+ c := m3.ToInt16x16()
+ c.Store(s)
+ checkSlices(t, s, want)
+}
+
+func TestMask32x8(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ return
+ }
+ s := make([]int32, 8)
+ want := []int32{-1, 0, 0, 0, 0, 0, -1, -1}
+ var a archsimd.Int32x8
+ b := archsimd.LoadInt32x8(want)
+ m1 := a.Less(a)
+ m2 := b.Less(a)
+ m3 := m1.Or(m2)
+ c := m3.ToInt32x8()
+ c.Store(s)
+ checkSlices(t, s, want)
+}
+
+func TestMask64x4(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ return
+ }
+ s := make([]int64, 4)
+ want := []int64{-1, 0, 0, -1}
+ var a archsimd.Int64x4
+ b := archsimd.LoadInt64x4(want)
+ m1 := a.Less(a)
+ m2 := b.Less(a)
+ m3 := m1.Or(m2)
+ c := m3.ToInt64x4()
+ c.Store(s)
+ checkSlices(t, s, want)
+}
+
+func TestMask8x16(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ return
+ }
+ s := make([]int8, 16)
+ want := []int8{-1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0}
+ var a archsimd.Int8x16
+ b := archsimd.LoadInt8x16(want)
+ m1 := a.Less(a)
+ m2 := b.Less(a)
+ m3 := m1.Or(m2)
+ c := m3.ToInt8x16()
+ c.Store(s)
+ checkSlices(t, s, want)
+}
+
+func TestMask16x8(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ return
+ }
+ s := make([]int16, 8)
+ want := []int16{-1, 0, 0, 0, 0, 0, -1, -1}
+ var a archsimd.Int16x8
+ b := archsimd.LoadInt16x8(want)
+ m1 := a.Less(a)
+ m2 := b.Less(a)
+ m3 := m1.Or(m2)
+ c := m3.ToInt16x8()
+ c.Store(s)
+ checkSlices(t, s, want)
+}
+
+func TestMask32x4(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ return
+ }
+ s := make([]int32, 4)
+ want := []int32{-1, 0, 0, -1}
+ var a archsimd.Int32x4
+ b := archsimd.LoadInt32x4(want)
+ m1 := a.Less(a)
+ m2 := b.Less(a)
+ m3 := m1.Or(m2)
+ c := m3.ToInt32x4()
+ c.Store(s)
+ checkSlices(t, s, want)
+}
+
+func TestMask64x2(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ return
+ }
+ s := make([]int64, 2)
+ want := []int64{-1, 0}
+ var a archsimd.Int64x2
+ b := archsimd.LoadInt64x2(want)
+ m1 := a.Less(a)
+ m2 := b.Less(a)
+ m3 := m1.Or(m2)
+ c := m3.ToInt64x2()
+ c.Store(s)
+ checkSlices(t, s, want)
+}