cmd/compile: pair NEON loads/stores
diff --git a/src/cmd/compile/internal/ssa/pair.go b/src/cmd/compile/internal/ssa/pair.go
index be524dc..0d76237 100644
--- a/src/cmd/compile/internal/ssa/pair.go
+++ b/src/cmd/compile/internal/ssa/pair.go
@@ -24,14 +24,14 @@
pairStores(f)
}
-type pairableLoadInfo struct {
+type pairInfo struct {
width int64 // width of one element in the pair, in bytes
pair Op
}
// All pairableLoad ops must take 2 arguments, a pointer and a memory.
// They must also take an offset in Aux/AuxInt.
-var pairableLoads = map[Op]pairableLoadInfo{
+var pairableLoads = map[Op]pairInfo{
OpARM64MOVDload: {8, OpARM64LDP},
OpARM64MOVWUload: {4, OpARM64LDPW},
OpARM64MOVWload: {4, OpARM64LDPSW},
@@ -39,21 +39,20 @@
// if we knew the upper bits of one of them weren't being used.
OpARM64FMOVDload: {8, OpARM64FLDPD},
OpARM64FMOVSload: {4, OpARM64FLDPS},
-}
-
-type pairableStoreInfo struct {
- width int64 // width of one element in the pair, in bytes
- pair Op
+ // NEON loads
+ OpARM64FMOVQload: {16, OpARM64FLDPQ},
}
// All pairableStore keys must take 3 arguments, a pointer, a value, and a memory.
// All pairableStore values must take 4 arguments, a pointer, 2 values, and a memory.
// They must also take an offset in Aux/AuxInt.
-var pairableStores = map[Op]pairableStoreInfo{
+var pairableStores = map[Op]pairInfo{
OpARM64MOVDstore: {8, OpARM64STP},
OpARM64MOVWstore: {4, OpARM64STPW},
OpARM64FMOVDstore: {8, OpARM64FSTPD},
OpARM64FMOVSstore: {4, OpARM64FSTPS},
+ // NEON stores
+ OpARM64FMOVQstore: {16, OpARM64FSTPQ},
}
// offsetOk returns true if a pair instruction should be used
@@ -115,6 +114,8 @@
if off >= -512 && off <= 504 && off%8 == 0 {
return true
}
+ // If offsetOk is re-enabled in the future,
+ // width==16 (FLDPQ/FSTPQ) will need support here.
}
return false
}
@@ -400,12 +401,14 @@
// storeWidth returns the width of store,
// or 0 if it is not a store this pass understands.
storeWidth := func(op Op) int64 {
+ if info, ok := pairableStores[op]; ok {
+ return info.width
+ }
+
+ // We don't pair these stores, but returning zero here
+ // would flush the memory chain.
var width int64
switch op {
- case OpARM64MOVDstore, OpARM64FMOVDstore:
- width = 8
- case OpARM64MOVWstore, OpARM64FMOVSstore:
- width = 4
case OpARM64MOVHstore:
width = 2
case OpARM64MOVBstore:
@@ -413,6 +416,7 @@
default:
width = 0
}
+
return width
}
@@ -457,12 +461,15 @@
if v.Uses != 1 && len(memChain) > 0 ||
len(memChain) > 0 && (v.Args[0] != memChain[0].Args[0] || v.Aux != memChain[0].Aux) ||
len(memChain) == limit {
- // If v has multiple uses and it is not the latest store in the chain,
+ // 1. If v has multiple uses and it is not the latest store in the chain,
// we cannot merge it with other store instructions.
- // If v has a different base pointer or Aux value from the current chain,
+ //
+ // 2. If v has a different base pointer or Aux value from the current chain,
// we need to flush memChain and start a new one with v.
- // If memChain length limit is exceeded, we also need to flush the chain
+ //
+ // 3. If memChain length limit is exceeded, we also need to flush the chain
// and start a new one with v.
+ //
// Only look back so far.
// This keeps us in O(n) territory, and it
// also prevents us from keeping values
diff --git a/test/codegen/memcombine_simd.go b/test/codegen/memcombine_simd.go
new file mode 100644
index 0000000..46dfc7d
--- /dev/null
+++ b/test/codegen/memcombine_simd.go
@@ -0,0 +1,22 @@
+// asmcheck
+//go:build goexperiment.simd
+
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "simd/archsimd"
+
+// TODO: Move these tests to memcombine.go when GOEXPERIMENT=simd becomes the default
+func dwloadInt64x2(p *struct{ a, b archsimd.Int64x2 }) (archsimd.Int64x2, archsimd.Int64x2) {
+ // arm64:"FLDPQ "
+ return p.a, p.b
+}
+
+func dwstoreInt64x2(p *struct{ a, b archsimd.Int64x2 }, a, b archsimd.Int64x2) {
+ // arm64:`FSTPQ\s\(F[0-9]+, F[0-9]+\), \(R[0-9]+\)`
+ p.a = a
+ p.b = b
+}
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Hi! The CL 760100 was the last on pair pass, so I've added reviewers from it.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Note: this CL exposed that current master fails `test/codegen` on `arm64` with `GOEXPERIMENT=simd` enabled. I’ve reported it here: https://github.com/golang/go/issues/79899
| Code-Review | +2 |
| Commit-Queue | +1 |
// If offsetOk is re-enabled in the future,I believe FMOVQ instructions encode the offset as (7-bit signed) * 16, so we should probably have a width==16 case with off >= -1024 && off <= 1008 && off%16 == 0.
(Although there is a reasonable chance we will only be aligned to 8, So maybe that would prevent this optimization a fair amount of time? Not sure.)
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |