Unreviewed changes
1 is the latest approved patch-set.
The change was submitted with unreviewed changes in the following files:
```
The name of the file: src/simd/archsimd/_gen/tmplgen/main.go
Insertions: 6, Deletions: 0.
@@ -1284,6 +1284,8 @@
var reduceSumTemplateArm64 = shapedTemplateOf(arm64ReduceIntegerShapes, "arm64_ReduceSum methods", `
// ReduceSum reduces x by summing all elements.
+//
+// Emulated, CPU Feature: NEON
func (x {{.VType}}) ReduceSum() {{.Etype}} {
return x.reduceSum().GetElem(0)
}
@@ -1291,11 +1293,15 @@
var reduceMinMaxTemplateArm64 = shapedTemplateOf(arm64ReduceAllShapes, "arm64_ReduceMax/Min methods", `
// ReduceMax reduces x by taking the maximum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x {{.VType}}) ReduceMax() {{.Etype}} {
return x.reduceMax().GetElem(0)
}
// ReduceMin reduces x by taking the minimum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x {{.VType}}) ReduceMin() {{.Etype}} {
return x.reduceMin().GetElem(0)
}
```
```
The name of the file: src/simd/archsimd/other_gen_arm64.go
Insertions: 40, Deletions: 0.
@@ -379,101 +379,141 @@
}
// ReduceSum reduces x by summing all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Int8x16) ReduceSum() int8 {
return x.reduceSum().GetElem(0)
}
// ReduceSum reduces x by summing all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Int16x8) ReduceSum() int16 {
return x.reduceSum().GetElem(0)
}
// ReduceSum reduces x by summing all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Int32x4) ReduceSum() int32 {
return x.reduceSum().GetElem(0)
}
// ReduceSum reduces x by summing all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Uint8x16) ReduceSum() uint8 {
return x.reduceSum().GetElem(0)
}
// ReduceSum reduces x by summing all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Uint16x8) ReduceSum() uint16 {
return x.reduceSum().GetElem(0)
}
// ReduceSum reduces x by summing all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Uint32x4) ReduceSum() uint32 {
return x.reduceSum().GetElem(0)
}
// ReduceMax reduces x by taking the maximum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Int8x16) ReduceMax() int8 {
return x.reduceMax().GetElem(0)
}
// ReduceMin reduces x by taking the minimum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Int8x16) ReduceMin() int8 {
return x.reduceMin().GetElem(0)
}
// ReduceMax reduces x by taking the maximum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Int16x8) ReduceMax() int16 {
return x.reduceMax().GetElem(0)
}
// ReduceMin reduces x by taking the minimum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Int16x8) ReduceMin() int16 {
return x.reduceMin().GetElem(0)
}
// ReduceMax reduces x by taking the maximum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Int32x4) ReduceMax() int32 {
return x.reduceMax().GetElem(0)
}
// ReduceMin reduces x by taking the minimum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Int32x4) ReduceMin() int32 {
return x.reduceMin().GetElem(0)
}
// ReduceMax reduces x by taking the maximum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Uint8x16) ReduceMax() uint8 {
return x.reduceMax().GetElem(0)
}
// ReduceMin reduces x by taking the minimum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Uint8x16) ReduceMin() uint8 {
return x.reduceMin().GetElem(0)
}
// ReduceMax reduces x by taking the maximum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Uint16x8) ReduceMax() uint16 {
return x.reduceMax().GetElem(0)
}
// ReduceMin reduces x by taking the minimum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Uint16x8) ReduceMin() uint16 {
return x.reduceMin().GetElem(0)
}
// ReduceMax reduces x by taking the maximum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Uint32x4) ReduceMax() uint32 {
return x.reduceMax().GetElem(0)
}
// ReduceMin reduces x by taking the minimum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Uint32x4) ReduceMin() uint32 {
return x.reduceMin().GetElem(0)
}
// ReduceMax reduces x by taking the maximum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Float32x4) ReduceMax() float32 {
return x.reduceMax().GetElem(0)
}
// ReduceMin reduces x by taking the minimum of all elements.
+//
+// Emulated, CPU Feature: NEON
func (x Float32x4) ReduceMin() float32 {
return x.reduceMin().GetElem(0)
}
```
Change information
Commit message:
simd: for 1.27, make ARM64 Reduce* methods return scalar
This is a cherry pick of Alexander Musman's CL 784800,
done this way to help with 1.27 release deadlines and
a minor amount of merging friction.
ReduceSum/ReduceMax/ReduceMin on all types now return scalar (e.g. int32)
instead of vector. Internally, the intrinsics are unexported (lowercase)
and return a vector with the result in lane 0. The exported wrappers
call GetElem(0) to extract the scalar, generated by tmplgen.
A follow-up CL will add peephole rules to elide the GetElem(0)
when the result feeds directly into another SIMD operation.
Former-Change-Id: I630cec6a26d54ed9ed1412a37076e20df9e51038
Change-Id: Id8ff6e07083bc62de4ca595dea3c9884b75ec517
Files:
- M src/cmd/compile/internal/ssa/_gen/simdARM64.rules
- M src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
- M src/cmd/compile/internal/ssa/opGen.go
- M src/cmd/compile/internal/ssa/rewriteARM64.go
- M src/cmd/compile/internal/ssagen/simdARM64intrinsics.go
- M src/simd/archsimd/_gen/simdgen/ops/Reduce/categories.yaml
- M src/simd/archsimd/_gen/simdgen/ops/Reduce/go_arm64.yaml
- M src/simd/archsimd/_gen/tmplgen/main.go
- A src/simd/archsimd/internal/simd_test/reduce_arm64_helpers_test.go
- M src/simd/archsimd/internal/simd_test/reduce_arm64_test.go
- M src/simd/archsimd/internal/simd_test/simulation_helpers_test.go
- M src/simd/archsimd/ops_arm64.go
- M src/simd/archsimd/ops_internal_arm64.go
- M src/simd/archsimd/other_gen_arm64.go
Change size: XL
Delta: 14 files changed, 757 insertions(+), 406 deletions(-)
Branch: refs/heads/master