diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index 63b37f9..7ac6ed4 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -995,32 +995,32 @@
(SetLoUint64x4 x y) => (VINSERTI128256 [0] x y)
(SetLoUint64x8 x y) => (VINSERTI64X4512 [0] x y)
(ShiftAllLeftInt16x8 ...) => (VPSLLW128 ...)
-(VPSLLW128 x (MOVQconst [c])) => (VPSLLW128const [uint8(c)] x)
(ShiftAllLeftInt16x16 ...) => (VPSLLW256 ...)
-(VPSLLW256 x (MOVQconst [c])) => (VPSLLW256const [uint8(c)] x)
(ShiftAllLeftInt16x32 ...) => (VPSLLW512 ...)
-(VPSLLW512 x (MOVQconst [c])) => (VPSLLW512const [uint8(c)] x)
(ShiftAllLeftInt32x4 ...) => (VPSLLD128 ...)
-(VPSLLD128 x (MOVQconst [c])) => (VPSLLD128const [uint8(c)] x)
(ShiftAllLeftInt32x8 ...) => (VPSLLD256 ...)
-(VPSLLD256 x (MOVQconst [c])) => (VPSLLD256const [uint8(c)] x)
(ShiftAllLeftInt32x16 ...) => (VPSLLD512 ...)
-(VPSLLD512 x (MOVQconst [c])) => (VPSLLD512const [uint8(c)] x)
(ShiftAllLeftInt64x2 ...) => (VPSLLQ128 ...)
-(VPSLLQ128 x (MOVQconst [c])) => (VPSLLQ128const [uint8(c)] x)
(ShiftAllLeftInt64x4 ...) => (VPSLLQ256 ...)
-(VPSLLQ256 x (MOVQconst [c])) => (VPSLLQ256const [uint8(c)] x)
(ShiftAllLeftInt64x8 ...) => (VPSLLQ512 ...)
-(VPSLLQ512 x (MOVQconst [c])) => (VPSLLQ512const [uint8(c)] x)
(ShiftAllLeftUint16x8 ...) => (VPSLLW128 ...)
+(VPSLLW128 x (MOVQconst [c])) => (VPSLLW128const [uint8(c)] x)
(ShiftAllLeftUint16x16 ...) => (VPSLLW256 ...)
+(VPSLLW256 x (MOVQconst [c])) => (VPSLLW256const [uint8(c)] x)
(ShiftAllLeftUint16x32 ...) => (VPSLLW512 ...)
+(VPSLLW512 x (MOVQconst [c])) => (VPSLLW512const [uint8(c)] x)
(ShiftAllLeftUint32x4 ...) => (VPSLLD128 ...)
+(VPSLLD128 x (MOVQconst [c])) => (VPSLLD128const [uint8(c)] x)
(ShiftAllLeftUint32x8 ...) => (VPSLLD256 ...)
+(VPSLLD256 x (MOVQconst [c])) => (VPSLLD256const [uint8(c)] x)
(ShiftAllLeftUint32x16 ...) => (VPSLLD512 ...)
+(VPSLLD512 x (MOVQconst [c])) => (VPSLLD512const [uint8(c)] x)
(ShiftAllLeftUint64x2 ...) => (VPSLLQ128 ...)
+(VPSLLQ128 x (MOVQconst [c])) => (VPSLLQ128const [uint8(c)] x)
(ShiftAllLeftUint64x4 ...) => (VPSLLQ256 ...)
+(VPSLLQ256 x (MOVQconst [c])) => (VPSLLQ256const [uint8(c)] x)
(ShiftAllLeftUint64x8 ...) => (VPSLLQ512 ...)
+(VPSLLQ512 x (MOVQconst [c])) => (VPSLLQ512const [uint8(c)] x)
(ShiftAllLeftConcatInt16x8 ...) => (VPSHLDW128 ...)
(ShiftAllLeftConcatInt16x16 ...) => (VPSHLDW256 ...)
(ShiftAllLeftConcatInt16x32 ...) => (VPSHLDW512 ...)
@@ -1049,32 +1049,32 @@
(VPSLLQMasked256 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [uint8(c)] x mask)
(VPSLLQMasked512 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [uint8(c)] x mask)
(ShiftAllRightInt16x8 ...) => (VPSRAW128 ...)
-(VPSRAW128 x (MOVQconst [c])) => (VPSRAW128const [uint8(c)] x)
(ShiftAllRightInt16x16 ...) => (VPSRAW256 ...)
-(VPSRAW256 x (MOVQconst [c])) => (VPSRAW256const [uint8(c)] x)
(ShiftAllRightInt16x32 ...) => (VPSRAW512 ...)
-(VPSRAW512 x (MOVQconst [c])) => (VPSRAW512const [uint8(c)] x)
(ShiftAllRightInt32x4 ...) => (VPSRAD128 ...)
-(VPSRAD128 x (MOVQconst [c])) => (VPSRAD128const [uint8(c)] x)
(ShiftAllRightInt32x8 ...) => (VPSRAD256 ...)
-(VPSRAD256 x (MOVQconst [c])) => (VPSRAD256const [uint8(c)] x)
(ShiftAllRightInt32x16 ...) => (VPSRAD512 ...)
-(VPSRAD512 x (MOVQconst [c])) => (VPSRAD512const [uint8(c)] x)
(ShiftAllRightInt64x2 ...) => (VPSRAQ128 ...)
-(VPSRAQ128 x (MOVQconst [c])) => (VPSRAQ128const [uint8(c)] x)
(ShiftAllRightInt64x4 ...) => (VPSRAQ256 ...)
-(VPSRAQ256 x (MOVQconst [c])) => (VPSRAQ256const [uint8(c)] x)
(ShiftAllRightInt64x8 ...) => (VPSRAQ512 ...)
-(VPSRAQ512 x (MOVQconst [c])) => (VPSRAQ512const [uint8(c)] x)
(ShiftAllRightUint16x8 ...) => (VPSRLW128 ...)
+(VPSRLW128 x (MOVQconst [c])) => (VPSRLW128const [uint8(c)] x)
(ShiftAllRightUint16x16 ...) => (VPSRLW256 ...)
+(VPSRLW256 x (MOVQconst [c])) => (VPSRLW256const [uint8(c)] x)
(ShiftAllRightUint16x32 ...) => (VPSRLW512 ...)
+(VPSRLW512 x (MOVQconst [c])) => (VPSRLW512const [uint8(c)] x)
(ShiftAllRightUint32x4 ...) => (VPSRLD128 ...)
+(VPSRLD128 x (MOVQconst [c])) => (VPSRLD128const [uint8(c)] x)
(ShiftAllRightUint32x8 ...) => (VPSRLD256 ...)
+(VPSRLD256 x (MOVQconst [c])) => (VPSRLD256const [uint8(c)] x)
(ShiftAllRightUint32x16 ...) => (VPSRLD512 ...)
+(VPSRLD512 x (MOVQconst [c])) => (VPSRLD512const [uint8(c)] x)
(ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...)
+(VPSRLQ128 x (MOVQconst [c])) => (VPSRLQ128const [uint8(c)] x)
(ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...)
+(VPSRLQ256 x (MOVQconst [c])) => (VPSRLQ256const [uint8(c)] x)
(ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...)
+(VPSRLQ512 x (MOVQconst [c])) => (VPSRLQ512const [uint8(c)] x)
(ShiftAllRightConcatInt16x8 ...) => (VPSHRDW128 ...)
(ShiftAllRightConcatInt16x16 ...) => (VPSHRDW256 ...)
(ShiftAllRightConcatInt16x32 ...) => (VPSHRDW512 ...)
@@ -1093,15 +1093,15 @@
(ShiftAllRightConcatUint64x2 ...) => (VPSHRDQ128 ...)
(ShiftAllRightConcatUint64x4 ...) => (VPSHRDQ256 ...)
(ShiftAllRightConcatUint64x8 ...) => (VPSHRDQ512 ...)
-(VPSRAWMasked128 x (MOVQconst [c]) mask) => (VPSRAWMasked128const [uint8(c)] x mask)
-(VPSRAWMasked256 x (MOVQconst [c]) mask) => (VPSRAWMasked256const [uint8(c)] x mask)
-(VPSRAWMasked512 x (MOVQconst [c]) mask) => (VPSRAWMasked512const [uint8(c)] x mask)
-(VPSRADMasked128 x (MOVQconst [c]) mask) => (VPSRADMasked128const [uint8(c)] x mask)
-(VPSRADMasked256 x (MOVQconst [c]) mask) => (VPSRADMasked256const [uint8(c)] x mask)
-(VPSRADMasked512 x (MOVQconst [c]) mask) => (VPSRADMasked512const [uint8(c)] x mask)
-(VPSRAQMasked128 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [uint8(c)] x mask)
-(VPSRAQMasked256 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [uint8(c)] x mask)
-(VPSRAQMasked512 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [uint8(c)] x mask)
+(VPSRLWMasked128 x (MOVQconst [c]) mask) => (VPSRLWMasked128const [uint8(c)] x mask)
+(VPSRLWMasked256 x (MOVQconst [c]) mask) => (VPSRLWMasked256const [uint8(c)] x mask)
+(VPSRLWMasked512 x (MOVQconst [c]) mask) => (VPSRLWMasked512const [uint8(c)] x mask)
+(VPSRLDMasked128 x (MOVQconst [c]) mask) => (VPSRLDMasked128const [uint8(c)] x mask)
+(VPSRLDMasked256 x (MOVQconst [c]) mask) => (VPSRLDMasked256const [uint8(c)] x mask)
+(VPSRLDMasked512 x (MOVQconst [c]) mask) => (VPSRLDMasked512const [uint8(c)] x mask)
+(VPSRLQMasked128 x (MOVQconst [c]) mask) => (VPSRLQMasked128const [uint8(c)] x mask)
+(VPSRLQMasked256 x (MOVQconst [c]) mask) => (VPSRLQMasked256const [uint8(c)] x mask)
+(VPSRLQMasked512 x (MOVQconst [c]) mask) => (VPSRLQMasked512const [uint8(c)] x mask)
(ShiftLeftInt16x8 ...) => (VPSLLVW128 ...)
(ShiftLeftInt16x16 ...) => (VPSLLVW256 ...)
(ShiftLeftInt16x32 ...) => (VPSLLVW512 ...)
@@ -1960,15 +1960,15 @@
(VMOVDQU64Masked128 (VPSLLQ128const [a] x) mask) => (VPSLLQMasked128const [a] x mask)
(VMOVDQU64Masked256 (VPSLLQ256const [a] x) mask) => (VPSLLQMasked256const [a] x mask)
(VMOVDQU64Masked512 (VPSLLQ512const [a] x) mask) => (VPSLLQMasked512const [a] x mask)
-(VMOVDQU16Masked128 (VPSRAW128const [a] x) mask) => (VPSRAWMasked128const [a] x mask)
-(VMOVDQU16Masked256 (VPSRAW256const [a] x) mask) => (VPSRAWMasked256const [a] x mask)
-(VMOVDQU16Masked512 (VPSRAW512const [a] x) mask) => (VPSRAWMasked512const [a] x mask)
-(VMOVDQU32Masked128 (VPSRAD128const [a] x) mask) => (VPSRADMasked128const [a] x mask)
-(VMOVDQU32Masked256 (VPSRAD256const [a] x) mask) => (VPSRADMasked256const [a] x mask)
-(VMOVDQU32Masked512 (VPSRAD512const [a] x) mask) => (VPSRADMasked512const [a] x mask)
-(VMOVDQU64Masked128 (VPSRAQ128const [a] x) mask) => (VPSRAQMasked128const [a] x mask)
-(VMOVDQU64Masked256 (VPSRAQ256const [a] x) mask) => (VPSRAQMasked256const [a] x mask)
-(VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512const [a] x mask)
+(VMOVDQU16Masked128 (VPSRLW128const [a] x) mask) => (VPSRLWMasked128const [a] x mask)
+(VMOVDQU16Masked256 (VPSRLW256const [a] x) mask) => (VPSRLWMasked256const [a] x mask)
+(VMOVDQU16Masked512 (VPSRLW512const [a] x) mask) => (VPSRLWMasked512const [a] x mask)
+(VMOVDQU32Masked128 (VPSRLD128const [a] x) mask) => (VPSRLDMasked128const [a] x mask)
+(VMOVDQU32Masked256 (VPSRLD256const [a] x) mask) => (VPSRLDMasked256const [a] x mask)
+(VMOVDQU32Masked512 (VPSRLD512const [a] x) mask) => (VPSRLDMasked512const [a] x mask)
+(VMOVDQU64Masked128 (VPSRLQ128const [a] x) mask) => (VPSRLQMasked128const [a] x mask)
+(VMOVDQU64Masked256 (VPSRLQ256const [a] x) mask) => (VPSRLQMasked256const [a] x mask)
+(VMOVDQU64Masked512 (VPSRLQ512const [a] x) mask) => (VPSRLQMasked512const [a] x mask)
(VPBLENDMBMasked512 dst (VGF2P8MULB512 x y) mask) => (VGF2P8MULBMasked512Merging dst x y mask)
(VPBLENDMBMasked512 dst (VPABSB512 x) mask) => (VPABSBMasked512Merging dst x mask)
(VPBLENDMBMasked512 dst (VPADDB512 x y) mask) => (VPADDBMasked512Merging dst x y mask)
@@ -2022,8 +2022,8 @@
(VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) => (VPSHUFDMasked512Merging dst [a] x mask)
(VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) => (VPSLLDMasked512constMerging dst [a] x mask)
(VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) => (VPSLLVDMasked512Merging dst x y mask)
-(VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) => (VPSRADMasked512constMerging dst [a] x mask)
(VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) => (VPSRAVDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPSRLD512const [a] x) mask) => (VPSRLDMasked512constMerging dst [a] x mask)
(VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) => (VPSRLVDMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) => (VPSUBDMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VPXORD512 x y) mask) => (VPXORDMasked512Merging dst x y mask)
@@ -2076,8 +2076,8 @@
(VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) => (VPSHRDQMasked512Merging dst [a] x y mask)
(VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) => (VPSLLQMasked512constMerging dst [a] x mask)
(VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) => (VPSLLVQMasked512Merging dst x y mask)
-(VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512constMerging dst [a] x mask)
(VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) => (VPSRAVQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPSRLQ512const [a] x) mask) => (VPSRLQMasked512constMerging dst [a] x mask)
(VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) => (VPSRLVQMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) => (VPSUBQMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) => (VPXORQMasked512Merging dst x y mask)
@@ -2113,8 +2113,8 @@
(VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) => (VPSLLVWMasked512Merging dst x y mask)
(VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) => (VPSLLWMasked512constMerging dst [a] x mask)
(VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask)
-(VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) => (VPSRAWMasked512constMerging dst [a] x mask)
(VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) => (VPSRLVWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPSRLW512const [a] x) mask) => (VPSRLWMasked512constMerging dst [a] x mask)
(VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) => (VPSUBSWMasked512Merging dst x y mask)
(VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512Merging dst x y mask)
(VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) => (VPSUBWMasked512Merging dst x y mask)
@@ -2282,15 +2282,15 @@
(VPBLENDVB128 dst (VPSLLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSLLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSLLW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSRAD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRAVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRAVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRAVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSRAW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSRLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSRLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSRLW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSUBB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSUBD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSUBQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
@@ -2434,15 +2434,15 @@
(VPBLENDVB256 dst (VPSLLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSLLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSLLW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSRAD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRAVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRAVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRAVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSRAW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSRLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSRLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSRLW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSUBB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSUBD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSUBQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 4b8d6e1..611b9112 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -1646,48 +1646,24 @@
return rewriteValueAMD64_OpAMD64VPSLLWMasked256(v)
case OpAMD64VPSLLWMasked512:
return rewriteValueAMD64_OpAMD64VPSLLWMasked512(v)
- case OpAMD64VPSRAD128:
- return rewriteValueAMD64_OpAMD64VPSRAD128(v)
- case OpAMD64VPSRAD256:
- return rewriteValueAMD64_OpAMD64VPSRAD256(v)
- case OpAMD64VPSRAD512:
- return rewriteValueAMD64_OpAMD64VPSRAD512(v)
case OpAMD64VPSRAD512const:
return rewriteValueAMD64_OpAMD64VPSRAD512const(v)
- case OpAMD64VPSRADMasked128:
- return rewriteValueAMD64_OpAMD64VPSRADMasked128(v)
case OpAMD64VPSRADMasked128const:
return rewriteValueAMD64_OpAMD64VPSRADMasked128const(v)
- case OpAMD64VPSRADMasked256:
- return rewriteValueAMD64_OpAMD64VPSRADMasked256(v)
case OpAMD64VPSRADMasked256const:
return rewriteValueAMD64_OpAMD64VPSRADMasked256const(v)
- case OpAMD64VPSRADMasked512:
- return rewriteValueAMD64_OpAMD64VPSRADMasked512(v)
case OpAMD64VPSRADMasked512const:
return rewriteValueAMD64_OpAMD64VPSRADMasked512const(v)
- case OpAMD64VPSRAQ128:
- return rewriteValueAMD64_OpAMD64VPSRAQ128(v)
case OpAMD64VPSRAQ128const:
return rewriteValueAMD64_OpAMD64VPSRAQ128const(v)
- case OpAMD64VPSRAQ256:
- return rewriteValueAMD64_OpAMD64VPSRAQ256(v)
case OpAMD64VPSRAQ256const:
return rewriteValueAMD64_OpAMD64VPSRAQ256const(v)
- case OpAMD64VPSRAQ512:
- return rewriteValueAMD64_OpAMD64VPSRAQ512(v)
case OpAMD64VPSRAQ512const:
return rewriteValueAMD64_OpAMD64VPSRAQ512const(v)
- case OpAMD64VPSRAQMasked128:
- return rewriteValueAMD64_OpAMD64VPSRAQMasked128(v)
case OpAMD64VPSRAQMasked128const:
return rewriteValueAMD64_OpAMD64VPSRAQMasked128const(v)
- case OpAMD64VPSRAQMasked256:
- return rewriteValueAMD64_OpAMD64VPSRAQMasked256(v)
case OpAMD64VPSRAQMasked256const:
return rewriteValueAMD64_OpAMD64VPSRAQMasked256const(v)
- case OpAMD64VPSRAQMasked512:
- return rewriteValueAMD64_OpAMD64VPSRAQMasked512(v)
case OpAMD64VPSRAQMasked512const:
return rewriteValueAMD64_OpAMD64VPSRAQMasked512const(v)
case OpAMD64VPSRAVD512:
@@ -1710,32 +1686,44 @@
return rewriteValueAMD64_OpAMD64VPSRAVQMasked256(v)
case OpAMD64VPSRAVQMasked512:
return rewriteValueAMD64_OpAMD64VPSRAVQMasked512(v)
- case OpAMD64VPSRAW128:
- return rewriteValueAMD64_OpAMD64VPSRAW128(v)
- case OpAMD64VPSRAW256:
- return rewriteValueAMD64_OpAMD64VPSRAW256(v)
- case OpAMD64VPSRAW512:
- return rewriteValueAMD64_OpAMD64VPSRAW512(v)
- case OpAMD64VPSRAWMasked128:
- return rewriteValueAMD64_OpAMD64VPSRAWMasked128(v)
- case OpAMD64VPSRAWMasked256:
- return rewriteValueAMD64_OpAMD64VPSRAWMasked256(v)
- case OpAMD64VPSRAWMasked512:
- return rewriteValueAMD64_OpAMD64VPSRAWMasked512(v)
+ case OpAMD64VPSRLD128:
+ return rewriteValueAMD64_OpAMD64VPSRLD128(v)
+ case OpAMD64VPSRLD256:
+ return rewriteValueAMD64_OpAMD64VPSRLD256(v)
+ case OpAMD64VPSRLD512:
+ return rewriteValueAMD64_OpAMD64VPSRLD512(v)
case OpAMD64VPSRLD512const:
return rewriteValueAMD64_OpAMD64VPSRLD512const(v)
+ case OpAMD64VPSRLDMasked128:
+ return rewriteValueAMD64_OpAMD64VPSRLDMasked128(v)
case OpAMD64VPSRLDMasked128const:
return rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v)
+ case OpAMD64VPSRLDMasked256:
+ return rewriteValueAMD64_OpAMD64VPSRLDMasked256(v)
case OpAMD64VPSRLDMasked256const:
return rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v)
+ case OpAMD64VPSRLDMasked512:
+ return rewriteValueAMD64_OpAMD64VPSRLDMasked512(v)
case OpAMD64VPSRLDMasked512const:
return rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v)
+ case OpAMD64VPSRLQ128:
+ return rewriteValueAMD64_OpAMD64VPSRLQ128(v)
+ case OpAMD64VPSRLQ256:
+ return rewriteValueAMD64_OpAMD64VPSRLQ256(v)
+ case OpAMD64VPSRLQ512:
+ return rewriteValueAMD64_OpAMD64VPSRLQ512(v)
case OpAMD64VPSRLQ512const:
return rewriteValueAMD64_OpAMD64VPSRLQ512const(v)
+ case OpAMD64VPSRLQMasked128:
+ return rewriteValueAMD64_OpAMD64VPSRLQMasked128(v)
case OpAMD64VPSRLQMasked128const:
return rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v)
+ case OpAMD64VPSRLQMasked256:
+ return rewriteValueAMD64_OpAMD64VPSRLQMasked256(v)
case OpAMD64VPSRLQMasked256const:
return rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v)
+ case OpAMD64VPSRLQMasked512:
+ return rewriteValueAMD64_OpAMD64VPSRLQMasked512(v)
case OpAMD64VPSRLQMasked512const:
return rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v)
case OpAMD64VPSRLVD512:
@@ -1754,6 +1742,18 @@
return rewriteValueAMD64_OpAMD64VPSRLVQMasked256(v)
case OpAMD64VPSRLVQMasked512:
return rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v)
+ case OpAMD64VPSRLW128:
+ return rewriteValueAMD64_OpAMD64VPSRLW128(v)
+ case OpAMD64VPSRLW256:
+ return rewriteValueAMD64_OpAMD64VPSRLW256(v)
+ case OpAMD64VPSRLW512:
+ return rewriteValueAMD64_OpAMD64VPSRLW512(v)
+ case OpAMD64VPSRLWMasked128:
+ return rewriteValueAMD64_OpAMD64VPSRLWMasked128(v)
+ case OpAMD64VPSRLWMasked256:
+ return rewriteValueAMD64_OpAMD64VPSRLWMasked256(v)
+ case OpAMD64VPSRLWMasked512:
+ return rewriteValueAMD64_OpAMD64VPSRLWMasked512(v)
case OpAMD64VPSUBD512:
return rewriteValueAMD64_OpAMD64VPSUBD512(v)
case OpAMD64VPSUBDMasked128:
@@ -33631,16 +33631,16 @@
v.AddArg2(x, mask)
return true
}
- // match: (VMOVDQU16Masked128 (VPSRAW128const [a] x) mask)
- // result: (VPSRAWMasked128const [a] x mask)
+ // match: (VMOVDQU16Masked128 (VPSRLW128const [a] x) mask)
+ // result: (VPSRLWMasked128const [a] x mask)
for {
- if v_0.Op != OpAMD64VPSRAW128const {
+ if v_0.Op != OpAMD64VPSRLW128const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
- v.reset(OpAMD64VPSRAWMasked128const)
+ v.reset(OpAMD64VPSRLWMasked128const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
@@ -34219,16 +34219,16 @@
v.AddArg2(x, mask)
return true
}
- // match: (VMOVDQU16Masked256 (VPSRAW256const [a] x) mask)
- // result: (VPSRAWMasked256const [a] x mask)
+ // match: (VMOVDQU16Masked256 (VPSRLW256const [a] x) mask)
+ // result: (VPSRLWMasked256const [a] x mask)
for {
- if v_0.Op != OpAMD64VPSRAW256const {
+ if v_0.Op != OpAMD64VPSRLW256const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
- v.reset(OpAMD64VPSRAWMasked256const)
+ v.reset(OpAMD64VPSRLWMasked256const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
@@ -34735,16 +34735,16 @@
v.AddArg2(x, mask)
return true
}
- // match: (VMOVDQU16Masked512 (VPSRAW512const [a] x) mask)
- // result: (VPSRAWMasked512const [a] x mask)
+ // match: (VMOVDQU16Masked512 (VPSRLW512const [a] x) mask)
+ // result: (VPSRLWMasked512const [a] x mask)
for {
- if v_0.Op != OpAMD64VPSRAW512const {
+ if v_0.Op != OpAMD64VPSRLW512const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
- v.reset(OpAMD64VPSRAWMasked512const)
+ v.reset(OpAMD64VPSRLWMasked512const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
@@ -35494,16 +35494,16 @@
v.AddArg2(x, mask)
return true
}
- // match: (VMOVDQU32Masked128 (VPSRAD128const [a] x) mask)
- // result: (VPSRADMasked128const [a] x mask)
+ // match: (VMOVDQU32Masked128 (VPSRLD128const [a] x) mask)
+ // result: (VPSRLDMasked128const [a] x mask)
for {
- if v_0.Op != OpAMD64VPSRAD128const {
+ if v_0.Op != OpAMD64VPSRLD128const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
- v.reset(OpAMD64VPSRADMasked128const)
+ v.reset(OpAMD64VPSRLDMasked128const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
@@ -36375,16 +36375,16 @@
v.AddArg2(x, mask)
return true
}
- // match: (VMOVDQU32Masked256 (VPSRAD256const [a] x) mask)
- // result: (VPSRADMasked256const [a] x mask)
+ // match: (VMOVDQU32Masked256 (VPSRLD256const [a] x) mask)
+ // result: (VPSRLDMasked256const [a] x mask)
for {
- if v_0.Op != OpAMD64VPSRAD256const {
+ if v_0.Op != OpAMD64VPSRLD256const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
- v.reset(OpAMD64VPSRADMasked256const)
+ v.reset(OpAMD64VPSRLDMasked256const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
@@ -37260,16 +37260,16 @@
v.AddArg2(x, mask)
return true
}
- // match: (VMOVDQU32Masked512 (VPSRAD512const [a] x) mask)
- // result: (VPSRADMasked512const [a] x mask)
+ // match: (VMOVDQU32Masked512 (VPSRLD512const [a] x) mask)
+ // result: (VPSRLDMasked512const [a] x mask)
for {
- if v_0.Op != OpAMD64VPSRAD512const {
+ if v_0.Op != OpAMD64VPSRLD512const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
- v.reset(OpAMD64VPSRADMasked512const)
+ v.reset(OpAMD64VPSRLDMasked512const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
@@ -38123,16 +38123,16 @@
v.AddArg2(x, mask)
return true
}
- // match: (VMOVDQU64Masked128 (VPSRAQ128const [a] x) mask)
- // result: (VPSRAQMasked128const [a] x mask)
+ // match: (VMOVDQU64Masked128 (VPSRLQ128const [a] x) mask)
+ // result: (VPSRLQMasked128const [a] x mask)
for {
- if v_0.Op != OpAMD64VPSRAQ128const {
+ if v_0.Op != OpAMD64VPSRLQ128const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
- v.reset(OpAMD64VPSRAQMasked128const)
+ v.reset(OpAMD64VPSRLQMasked128const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
@@ -39000,16 +39000,16 @@
v.AddArg2(x, mask)
return true
}
- // match: (VMOVDQU64Masked256 (VPSRAQ256const [a] x) mask)
- // result: (VPSRAQMasked256const [a] x mask)
+ // match: (VMOVDQU64Masked256 (VPSRLQ256const [a] x) mask)
+ // result: (VPSRLQMasked256const [a] x mask)
for {
- if v_0.Op != OpAMD64VPSRAQ256const {
+ if v_0.Op != OpAMD64VPSRLQ256const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
- v.reset(OpAMD64VPSRAQMasked256const)
+ v.reset(OpAMD64VPSRLQMasked256const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
@@ -39797,16 +39797,16 @@
v.AddArg2(x, mask)
return true
}
- // match: (VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask)
- // result: (VPSRAQMasked512const [a] x mask)
+ // match: (VMOVDQU64Masked512 (VPSRLQ512const [a] x) mask)
+ // result: (VPSRLQMasked512const [a] x mask)
for {
- if v_0.Op != OpAMD64VPSRAQ512const {
+ if v_0.Op != OpAMD64VPSRLQ512const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
- v.reset(OpAMD64VPSRAQMasked512const)
+ v.reset(OpAMD64VPSRLQMasked512const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
@@ -43795,21 +43795,6 @@
v.AddArg4(dst, x, y, mask)
return true
}
- // match: (VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask)
- // result: (VPSRADMasked512constMerging dst [a] x mask)
- for {
- dst := v_0
- if v_1.Op != OpAMD64VPSRAD512const {
- break
- }
- a := auxIntToUint8(v_1.AuxInt)
- x := v_1.Args[0]
- mask := v_2
- v.reset(OpAMD64VPSRADMasked512constMerging)
- v.AuxInt = uint8ToAuxInt(a)
- v.AddArg3(dst, x, mask)
- return true
- }
// match: (VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask)
// result: (VPSRAVDMasked512Merging dst x y mask)
for {
@@ -43824,6 +43809,21 @@
v.AddArg4(dst, x, y, mask)
return true
}
+ // match: (VPBLENDMDMasked512 dst (VPSRLD512const [a] x) mask)
+ // result: (VPSRLDMasked512constMerging dst [a] x mask)
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPSRLD512const {
+ break
+ }
+ a := auxIntToUint8(v_1.AuxInt)
+ x := v_1.Args[0]
+ mask := v_2
+ v.reset(OpAMD64VPSRLDMasked512constMerging)
+ v.AuxInt = uint8ToAuxInt(a)
+ v.AddArg3(dst, x, mask)
+ return true
+ }
// match: (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask)
// result: (VPSRLVDMasked512Merging dst x y mask)
for {
@@ -44566,21 +44566,6 @@
v.AddArg4(dst, x, y, mask)
return true
}
- // match: (VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask)
- // result: (VPSRAQMasked512constMerging dst [a] x mask)
- for {
- dst := v_0
- if v_1.Op != OpAMD64VPSRAQ512const {
- break
- }
- a := auxIntToUint8(v_1.AuxInt)
- x := v_1.Args[0]
- mask := v_2
- v.reset(OpAMD64VPSRAQMasked512constMerging)
- v.AuxInt = uint8ToAuxInt(a)
- v.AddArg3(dst, x, mask)
- return true
- }
// match: (VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask)
// result: (VPSRAVQMasked512Merging dst x y mask)
for {
@@ -44595,6 +44580,21 @@
v.AddArg4(dst, x, y, mask)
return true
}
+ // match: (VPBLENDMQMasked512 dst (VPSRLQ512const [a] x) mask)
+ // result: (VPSRLQMasked512constMerging dst [a] x mask)
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPSRLQ512const {
+ break
+ }
+ a := auxIntToUint8(v_1.AuxInt)
+ x := v_1.Args[0]
+ mask := v_2
+ v.reset(OpAMD64VPSRLQMasked512constMerging)
+ v.AuxInt = uint8ToAuxInt(a)
+ v.AddArg3(dst, x, mask)
+ return true
+ }
// match: (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask)
// result: (VPSRLVQMasked512Merging dst x y mask)
for {
@@ -45115,21 +45115,6 @@
v.AddArg4(dst, x, y, mask)
return true
}
- // match: (VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask)
- // result: (VPSRAWMasked512constMerging dst [a] x mask)
- for {
- dst := v_0
- if v_1.Op != OpAMD64VPSRAW512const {
- break
- }
- a := auxIntToUint8(v_1.AuxInt)
- x := v_1.Args[0]
- mask := v_2
- v.reset(OpAMD64VPSRAWMasked512constMerging)
- v.AuxInt = uint8ToAuxInt(a)
- v.AddArg3(dst, x, mask)
- return true
- }
// match: (VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask)
// result: (VPSRLVWMasked512Merging dst x y mask)
for {
@@ -45144,6 +45129,21 @@
v.AddArg4(dst, x, y, mask)
return true
}
+ // match: (VPBLENDMWMasked512 dst (VPSRLW512const [a] x) mask)
+ // result: (VPSRLWMasked512constMerging dst [a] x mask)
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPSRLW512const {
+ break
+ }
+ a := auxIntToUint8(v_1.AuxInt)
+ x := v_1.Args[0]
+ mask := v_2
+ v.reset(OpAMD64VPSRLWMasked512constMerging)
+ v.AuxInt = uint8ToAuxInt(a)
+ v.AddArg3(dst, x, mask)
+ return true
+ }
// match: (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask)
// result: (VPSUBSWMasked512Merging dst x y mask)
for {
@@ -48404,48 +48404,6 @@
v.AddArg3(dst, x, v0)
return true
}
- // match: (VPBLENDVB128 dst (VPSRAD128const [a] x) mask)
- // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
- for {
- dst := v_0
- if v_1.Op != OpAMD64VPSRAD128const {
- break
- }
- a := auxIntToUint8(v_1.AuxInt)
- x := v_1.Args[0]
- mask := v_2
- if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
- break
- }
- v.reset(OpAMD64VPSRADMasked128constMerging)
- v.AuxInt = uint8ToAuxInt(a)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(dst, x, v0)
- return true
- }
- // match: (VPBLENDVB128 dst (VPSRAQ128const [a] x) mask)
- // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
- for {
- dst := v_0
- if v_1.Op != OpAMD64VPSRAQ128const {
- break
- }
- a := auxIntToUint8(v_1.AuxInt)
- x := v_1.Args[0]
- mask := v_2
- if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
- break
- }
- v.reset(OpAMD64VPSRAQMasked128constMerging)
- v.AuxInt = uint8ToAuxInt(a)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(dst, x, v0)
- return true
- }
// match: (VPBLENDVB128 dst (VPSRAVD128 x y) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
@@ -48506,12 +48464,12 @@
v.AddArg4(dst, x, y, v0)
return true
}
- // match: (VPBLENDVB128 dst (VPSRAW128const [a] x) mask)
+ // match: (VPBLENDVB128 dst (VPSRLD128const [a] x) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
+ // result: (VPSRLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
dst := v_0
- if v_1.Op != OpAMD64VPSRAW128const {
+ if v_1.Op != OpAMD64VPSRLD128const {
break
}
a := auxIntToUint8(v_1.AuxInt)
@@ -48520,9 +48478,30 @@
if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
break
}
- v.reset(OpAMD64VPSRAWMasked128constMerging)
+ v.reset(OpAMD64VPSRLDMasked128constMerging)
v.AuxInt = uint8ToAuxInt(a)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(dst, x, v0)
+ return true
+ }
+ // match: (VPBLENDVB128 dst (VPSRLQ128const [a] x) mask)
+ // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+ // result: (VPSRLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPSRLQ128const {
+ break
+ }
+ a := auxIntToUint8(v_1.AuxInt)
+ x := v_1.Args[0]
+ mask := v_2
+ if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+ break
+ }
+ v.reset(OpAMD64VPSRLQMasked128constMerging)
+ v.AuxInt = uint8ToAuxInt(a)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(dst, x, v0)
return true
@@ -48587,6 +48566,27 @@
v.AddArg4(dst, x, y, v0)
return true
}
+ // match: (VPBLENDVB128 dst (VPSRLW128const [a] x) mask)
+ // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+ // result: (VPSRLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPSRLW128const {
+ break
+ }
+ a := auxIntToUint8(v_1.AuxInt)
+ x := v_1.Args[0]
+ mask := v_2
+ if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+ break
+ }
+ v.reset(OpAMD64VPSRLWMasked128constMerging)
+ v.AuxInt = uint8ToAuxInt(a)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(dst, x, v0)
+ return true
+ }
// match: (VPBLENDVB128 dst (VPSUBB128 x y) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
@@ -51426,48 +51426,6 @@
v.AddArg3(dst, x, v0)
return true
}
- // match: (VPBLENDVB256 dst (VPSRAD256const [a] x) mask)
- // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
- for {
- dst := v_0
- if v_1.Op != OpAMD64VPSRAD256const {
- break
- }
- a := auxIntToUint8(v_1.AuxInt)
- x := v_1.Args[0]
- mask := v_2
- if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
- break
- }
- v.reset(OpAMD64VPSRADMasked256constMerging)
- v.AuxInt = uint8ToAuxInt(a)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(dst, x, v0)
- return true
- }
- // match: (VPBLENDVB256 dst (VPSRAQ256const [a] x) mask)
- // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
- for {
- dst := v_0
- if v_1.Op != OpAMD64VPSRAQ256const {
- break
- }
- a := auxIntToUint8(v_1.AuxInt)
- x := v_1.Args[0]
- mask := v_2
- if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
- break
- }
- v.reset(OpAMD64VPSRAQMasked256constMerging)
- v.AuxInt = uint8ToAuxInt(a)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
- v0.AddArg(mask)
- v.AddArg3(dst, x, v0)
- return true
- }
// match: (VPBLENDVB256 dst (VPSRAVD256 x y) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
@@ -51528,12 +51486,12 @@
v.AddArg4(dst, x, y, v0)
return true
}
- // match: (VPBLENDVB256 dst (VPSRAW256const [a] x) mask)
+ // match: (VPBLENDVB256 dst (VPSRLD256const [a] x) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
- // result: (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+ // result: (VPSRLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
dst := v_0
- if v_1.Op != OpAMD64VPSRAW256const {
+ if v_1.Op != OpAMD64VPSRLD256const {
break
}
a := auxIntToUint8(v_1.AuxInt)
@@ -51542,9 +51500,30 @@
if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
break
}
- v.reset(OpAMD64VPSRAWMasked256constMerging)
+ v.reset(OpAMD64VPSRLDMasked256constMerging)
v.AuxInt = uint8ToAuxInt(a)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(dst, x, v0)
+ return true
+ }
+ // match: (VPBLENDVB256 dst (VPSRLQ256const [a] x) mask)
+ // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+ // result: (VPSRLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPSRLQ256const {
+ break
+ }
+ a := auxIntToUint8(v_1.AuxInt)
+ x := v_1.Args[0]
+ mask := v_2
+ if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+ break
+ }
+ v.reset(OpAMD64VPSRLQMasked256constMerging)
+ v.AuxInt = uint8ToAuxInt(a)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(dst, x, v0)
return true
@@ -51609,6 +51588,27 @@
v.AddArg4(dst, x, y, v0)
return true
}
+ // match: (VPBLENDVB256 dst (VPSRLW256const [a] x) mask)
+ // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+ // result: (VPSRLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPSRLW256const {
+ break
+ }
+ a := auxIntToUint8(v_1.AuxInt)
+ x := v_1.Args[0]
+ mask := v_2
+ if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+ break
+ }
+ v.reset(OpAMD64VPSRLWMasked256constMerging)
+ v.AuxInt = uint8ToAuxInt(a)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(dst, x, v0)
+ return true
+ }
// match: (VPBLENDVB256 dst (VPSUBB256 x y) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
@@ -60473,60 +60473,6 @@
}
return false
}
-func rewriteValueAMD64_OpAMD64VPSRAD128(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRAD128 x (MOVQconst [c]))
- // result: (VPSRAD128const [uint8(c)] x)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- v.reset(OpAMD64VPSRAD128const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg(x)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAD256(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRAD256 x (MOVQconst [c]))
- // result: (VPSRAD256const [uint8(c)] x)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- v.reset(OpAMD64VPSRAD256const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg(x)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAD512(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRAD512 x (MOVQconst [c]))
- // result: (VPSRAD512const [uint8(c)] x)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- v.reset(OpAMD64VPSRAD512const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg(x)
- return true
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPSRAD512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRAD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
@@ -60553,26 +60499,6 @@
}
return false
}
-func rewriteValueAMD64_OpAMD64VPSRADMasked128(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRADMasked128 x (MOVQconst [c]) mask)
- // result: (VPSRADMasked128const [uint8(c)] x mask)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- mask := v_2
- v.reset(OpAMD64VPSRADMasked128const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg2(x, mask)
- return true
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPSRADMasked128const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -60601,26 +60527,6 @@
}
return false
}
-func rewriteValueAMD64_OpAMD64VPSRADMasked256(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRADMasked256 x (MOVQconst [c]) mask)
- // result: (VPSRADMasked256const [uint8(c)] x mask)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- mask := v_2
- v.reset(OpAMD64VPSRADMasked256const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg2(x, mask)
- return true
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPSRADMasked256const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -60649,26 +60555,6 @@
}
return false
}
-func rewriteValueAMD64_OpAMD64VPSRADMasked512(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRADMasked512 x (MOVQconst [c]) mask)
- // result: (VPSRADMasked512const [uint8(c)] x mask)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- mask := v_2
- v.reset(OpAMD64VPSRADMasked512const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg2(x, mask)
- return true
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPSRADMasked512const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -60697,24 +60583,6 @@
}
return false
}
-func rewriteValueAMD64_OpAMD64VPSRAQ128(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRAQ128 x (MOVQconst [c]))
- // result: (VPSRAQ128const [uint8(c)] x)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- v.reset(OpAMD64VPSRAQ128const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg(x)
- return true
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPSRAQ128const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRAQ128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem))
@@ -60741,24 +60609,6 @@
}
return false
}
-func rewriteValueAMD64_OpAMD64VPSRAQ256(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRAQ256 x (MOVQconst [c]))
- // result: (VPSRAQ256const [uint8(c)] x)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- v.reset(OpAMD64VPSRAQ256const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg(x)
- return true
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPSRAQ256const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRAQ256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem))
@@ -60785,24 +60635,6 @@
}
return false
}
-func rewriteValueAMD64_OpAMD64VPSRAQ512(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRAQ512 x (MOVQconst [c]))
- // result: (VPSRAQ512const [uint8(c)] x)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- v.reset(OpAMD64VPSRAQ512const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg(x)
- return true
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPSRAQ512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRAQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
@@ -60829,26 +60661,6 @@
}
return false
}
-func rewriteValueAMD64_OpAMD64VPSRAQMasked128(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRAQMasked128 x (MOVQconst [c]) mask)
- // result: (VPSRAQMasked128const [uint8(c)] x mask)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- mask := v_2
- v.reset(OpAMD64VPSRAQMasked128const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg2(x, mask)
- return true
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPSRAQMasked128const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -60877,26 +60689,6 @@
}
return false
}
-func rewriteValueAMD64_OpAMD64VPSRAQMasked256(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRAQMasked256 x (MOVQconst [c]) mask)
- // result: (VPSRAQMasked256const [uint8(c)] x mask)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- mask := v_2
- v.reset(OpAMD64VPSRAQMasked256const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg2(x, mask)
- return true
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPSRAQMasked256const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -60925,26 +60717,6 @@
}
return false
}
-func rewriteValueAMD64_OpAMD64VPSRAQMasked512(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRAQMasked512 x (MOVQconst [c]) mask)
- // result: (VPSRAQMasked512const [uint8(c)] x mask)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- mask := v_2
- v.reset(OpAMD64VPSRAQMasked512const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg2(x, mask)
- return true
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPSRAQMasked512const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -61255,120 +61027,60 @@
}
return false
}
-func rewriteValueAMD64_OpAMD64VPSRAW128(v *Value) bool {
+func rewriteValueAMD64_OpAMD64VPSRLD128(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
- // match: (VPSRAW128 x (MOVQconst [c]))
- // result: (VPSRAW128const [uint8(c)] x)
+ // match: (VPSRLD128 x (MOVQconst [c]))
+ // result: (VPSRLD128const [uint8(c)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
- v.reset(OpAMD64VPSRAW128const)
+ v.reset(OpAMD64VPSRLD128const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg(x)
return true
}
return false
}
-func rewriteValueAMD64_OpAMD64VPSRAW256(v *Value) bool {
+func rewriteValueAMD64_OpAMD64VPSRLD256(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
- // match: (VPSRAW256 x (MOVQconst [c]))
- // result: (VPSRAW256const [uint8(c)] x)
+ // match: (VPSRLD256 x (MOVQconst [c]))
+ // result: (VPSRLD256const [uint8(c)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
- v.reset(OpAMD64VPSRAW256const)
+ v.reset(OpAMD64VPSRLD256const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg(x)
return true
}
return false
}
-func rewriteValueAMD64_OpAMD64VPSRAW512(v *Value) bool {
+func rewriteValueAMD64_OpAMD64VPSRLD512(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
- // match: (VPSRAW512 x (MOVQconst [c]))
- // result: (VPSRAW512const [uint8(c)] x)
+ // match: (VPSRLD512 x (MOVQconst [c]))
+ // result: (VPSRLD512const [uint8(c)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
- v.reset(OpAMD64VPSRAW512const)
+ v.reset(OpAMD64VPSRLD512const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg(x)
return true
}
return false
}
-func rewriteValueAMD64_OpAMD64VPSRAWMasked128(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRAWMasked128 x (MOVQconst [c]) mask)
- // result: (VPSRAWMasked128const [uint8(c)] x mask)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- mask := v_2
- v.reset(OpAMD64VPSRAWMasked128const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg2(x, mask)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAWMasked256(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRAWMasked256 x (MOVQconst [c]) mask)
- // result: (VPSRAWMasked256const [uint8(c)] x mask)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- mask := v_2
- v.reset(OpAMD64VPSRAWMasked256const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg2(x, mask)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPSRAWMasked512 x (MOVQconst [c]) mask)
- // result: (VPSRAWMasked512const [uint8(c)] x mask)
- for {
- x := v_0
- if v_1.Op != OpAMD64MOVQconst {
- break
- }
- c := auxIntToInt64(v_1.AuxInt)
- mask := v_2
- v.reset(OpAMD64VPSRAWMasked512const)
- v.AuxInt = uint8ToAuxInt(uint8(c))
- v.AddArg2(x, mask)
- return true
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
@@ -61395,6 +61107,26 @@
}
return false
}
+func rewriteValueAMD64_OpAMD64VPSRLDMasked128(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLDMasked128 x (MOVQconst [c]) mask)
+ // result: (VPSRLDMasked128const [uint8(c)] x mask)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ mask := v_2
+ v.reset(OpAMD64VPSRLDMasked128const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg2(x, mask)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -61423,6 +61155,26 @@
}
return false
}
+func rewriteValueAMD64_OpAMD64VPSRLDMasked256(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLDMasked256 x (MOVQconst [c]) mask)
+ // result: (VPSRLDMasked256const [uint8(c)] x mask)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ mask := v_2
+ v.reset(OpAMD64VPSRLDMasked256const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg2(x, mask)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -61451,6 +61203,26 @@
}
return false
}
+func rewriteValueAMD64_OpAMD64VPSRLDMasked512(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLDMasked512 x (MOVQconst [c]) mask)
+ // result: (VPSRLDMasked512const [uint8(c)] x mask)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ mask := v_2
+ v.reset(OpAMD64VPSRLDMasked512const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg2(x, mask)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -61479,6 +61251,60 @@
}
return false
}
+func rewriteValueAMD64_OpAMD64VPSRLQ128(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLQ128 x (MOVQconst [c]))
+ // result: (VPSRLQ128const [uint8(c)] x)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ v.reset(OpAMD64VPSRLQ128const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64VPSRLQ256(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLQ256 x (MOVQconst [c]))
+ // result: (VPSRLQ256const [uint8(c)] x)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ v.reset(OpAMD64VPSRLQ256const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64VPSRLQ512(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLQ512 x (MOVQconst [c]))
+ // result: (VPSRLQ512const [uint8(c)] x)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ v.reset(OpAMD64VPSRLQ512const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
@@ -61505,6 +61331,26 @@
}
return false
}
+func rewriteValueAMD64_OpAMD64VPSRLQMasked128(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLQMasked128 x (MOVQconst [c]) mask)
+ // result: (VPSRLQMasked128const [uint8(c)] x mask)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ mask := v_2
+ v.reset(OpAMD64VPSRLQMasked128const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg2(x, mask)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -61533,6 +61379,26 @@
}
return false
}
+func rewriteValueAMD64_OpAMD64VPSRLQMasked256(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLQMasked256 x (MOVQconst [c]) mask)
+ // result: (VPSRLQMasked256const [uint8(c)] x mask)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ mask := v_2
+ v.reset(OpAMD64VPSRLQMasked256const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg2(x, mask)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -61561,6 +61427,26 @@
}
return false
}
+func rewriteValueAMD64_OpAMD64VPSRLQMasked512(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLQMasked512 x (MOVQconst [c]) mask)
+ // result: (VPSRLQMasked512const [uint8(c)] x mask)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ mask := v_2
+ v.reset(OpAMD64VPSRLQMasked512const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg2(x, mask)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -61817,6 +61703,120 @@
}
return false
}
+func rewriteValueAMD64_OpAMD64VPSRLW128(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLW128 x (MOVQconst [c]))
+ // result: (VPSRLW128const [uint8(c)] x)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ v.reset(OpAMD64VPSRLW128const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64VPSRLW256(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLW256 x (MOVQconst [c]))
+ // result: (VPSRLW256const [uint8(c)] x)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ v.reset(OpAMD64VPSRLW256const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64VPSRLW512(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLW512 x (MOVQconst [c]))
+ // result: (VPSRLW512const [uint8(c)] x)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ v.reset(OpAMD64VPSRLW512const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg(x)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64VPSRLWMasked128(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLWMasked128 x (MOVQconst [c]) mask)
+ // result: (VPSRLWMasked128const [uint8(c)] x mask)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ mask := v_2
+ v.reset(OpAMD64VPSRLWMasked128const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg2(x, mask)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64VPSRLWMasked256(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLWMasked256 x (MOVQconst [c]) mask)
+ // result: (VPSRLWMasked256const [uint8(c)] x mask)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ mask := v_2
+ v.reset(OpAMD64VPSRLWMasked256const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg2(x, mask)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64VPSRLWMasked512(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPSRLWMasked512 x (MOVQconst [c]) mask)
+ // result: (VPSRLWMasked512const [uint8(c)] x mask)
+ for {
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ mask := v_2
+ v.reset(OpAMD64VPSRLWMasked512const)
+ v.AuxInt = uint8ToAuxInt(uint8(c))
+ v.AddArg2(x, mask)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64VPSUBD512(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdrules.go b/src/simd/archsimd/_gen/simdgen/gen_simdrules.go
index 7a88234..cf32806 100644
--- a/src/simd/archsimd/_gen/simdgen/gen_simdrules.go
+++ b/src/simd/archsimd/_gen/simdgen/gen_simdrules.go
@@ -229,8 +229,8 @@
if gOp.SpecialLower != nil {
if *gOp.SpecialLower == "sftimm" {
- if data.GoType[0] == 'I' {
- // only do these for signed types, it is a duplicate rewrite for unsigned
+ if data.GoType[0] == 'U' {
+ // only do these for unsigned types, it is a duplicate rewrite for signed
sftImmData := data
if tplName == "maskIn" {
sftImmData.tplName = "masksftimm"
diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go
index 3489f34..8eb0699 100644
--- a/src/simd/archsimd/ops_amd64.go
+++ b/src/simd/archsimd/ops_amd64.go
@@ -5359,7 +5359,7 @@
/* SHA256Message1 */
-// SHA256Message1 does the sigma and addtion of 1 in SHA256 algorithm defined in FIPS 180-4.
+// SHA256Message1 does the sigma and addition of 1 in SHA256 algorithm defined in FIPS 180-4.
// x = {W0, W1, W2, W3}
// y = {W4, 0, 0, 0}
// result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)}