Alexander Musman (Gerrit)

unread,

Sep 9, 2025, 12:53:48 AM9/9/25

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Alexander Musman has uploaded the change for review

Commit message

[dev.simd] simd: arm64 neon toy example

Change-Id: I3ecb2e1cc669ec227cf2c6bb5980048d8fc3e1b4

Change diff

diff --git a/src/cmd/compile/internal/arm64/simdssa.go b/src/cmd/compile/internal/arm64/simdssa.go
new file mode 100644
index 0000000..a948719
--- /dev/null
+++ b/src/cmd/compile/internal/arm64/simdssa.go
@@ -0,0 +1,33 @@
+// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+
+package arm64
+
+import (
+	"cmd/compile/internal/ssa"
+	"cmd/compile/internal/ssagen"
+	"cmd/internal/obj"
+	"cmd/internal/obj/arm64"
+)
+
+func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
+	var p *obj.Prog
+	switch v.Op {
+	case ssa.OpARM64VFADD32x4,
+		ssa.OpARM64VADD32x4:
+		p = simdV21(s, v, arng_32x4)
+
+	case ssa.OpARM64VFADD64x2,
+		ssa.OpARM64VADD64x2:
+		p = simdV21(s, v, arng_64x2)
+
+	default:
+		// Unknown reg shape
+		return false
+	}
+
+	// Ensure p and architecture package are marked as used
+	// (they may not be used in all generated code paths)
+	_ = p
+	_ = arm64.REG_V0
+	return true
+}
diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go
index 534954f..4d753b6 100644
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@@ -21,7 +21,11 @@
 
 // loadByType returns the load instruction of the given type.
 func loadByType(t *types.Type) obj.As {
-	if t.IsFloat() {
+	if t.IsSIMD() {
+		if t.Size() == 16 {
+			return arm64.AFMOVQ // Use FMOVQ (LDR Q) for 128-bit SIMD loads
+		}
+	} else if t.IsFloat() {
 		switch t.Size() {
 		case 4:
 			return arm64.AFMOVS
@@ -57,7 +61,11 @@
 
 // storeByType returns the store instruction of the given type.
 func storeByType(t *types.Type) obj.As {
-	if t.IsFloat() {
+	if t.IsSIMD() {
+		if t.Size() == 16 {
+			return arm64.AFMOVQ // Use FMOVQ (STR Q) for 128-bit SIMD stores
+		}
+	} else if t.IsFloat() {
 		switch t.Size() {
 		case 4:
 			return arm64.AFMOVS
@@ -1472,7 +1480,9 @@
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
 	default:
-		v.Fatalf("genValue not implemented: %s", v.LongString())
+		if !ssaGenSIMDValue(s, v) {
+			v.Fatalf("genValue not implemented: %s", v.LongString())
+		}
 	}
 }
 
@@ -1729,3 +1739,56 @@
 	st.To.Reg = dst
 	st.To.Offset = off
 }
+
+// Arrangement constants for ARM64 SIMD operations (matches cmd/internal/obj/arm64/a.out.go)
+// Currently we expect only full-width arrangements (.16B, .8H, .4S and .2D) from generator.
+const (
+	arng_8x8  = iota // ARNG_8B
+	arng_8x16        // ARNG_16B: 16 lanes of 8-bit elements
+	arng_64x1        // ARNG_1D
+	arng_16x4        // ARNG_4H
+	arng_16x8        // ARNG_8H: 8 lanes of 16-bit elements
+	arng_16x4        // ARNG_2S
+	arng_32x4        // ARNG_4S: 4 lanes of 32-bit elements
+	arng_64x2        // ARNG_2D: 2 lanes of 64-bit elements
+)
+
+// simdV21 generates a three-register SIMD instruction: op Vn, Vm, Vd
+// For example: VADD V1.4S, V0.4S, V0.4S
+// The arrangement parameter specifies the vector element arrangement (e.g., 4S, 2D)
+func simdV21(s *ssagen.State, v *ssa.Value, arrangement int16) *obj.Prog {
+	p := s.Prog(v.Op.Asm())
+	p.From.Type = obj.TYPE_REG
+	p.From.Reg, p.From.Class = simdReg(v.Args[1], arrangement)
+	p.Reg, _ = simdReg(v.Args[0], arrangement)
+	p.To.Type = obj.TYPE_REG
+	p.To.Reg, p.To.Class = simdReg(v, arrangement)
+	return p
+}
+
+// simdReg converts an SSA SIMD register to the appropriate ARM64 register
+// with the specified arrangement (e.g., V0.4S for 32x4, V0.2D for 64x2).
+// Returns the register and corresponding addressing class.
+// Need to be consistent with ARM64RegisterExtension.
+func simdReg(v *ssa.Value, arrangement int16) (int16, int8) {
+	// Get the register number (0-31)
+	reg := v.Reg()
+	var regNum int16
+	switch {
+	case arm64.REG_F0 <= reg && reg <= arm64.REG_F31:
+		// SSA uses F registers for SIMD - convert to V register number
+		regNum = reg - arm64.REG_F0
+	case arm64.REG_V0 <= reg && reg <= arm64.REG_V31:
+		// Currently SSA uses F registers for SIMD values, detect if this assumption changes
+		panic("simdReg: got V register from SSA - this path needs investigation")
+		regNum = reg - arm64.REG_V0
+	default:
+		// Unexpected register type
+		base.Fatalf("simdReg: unexpected register %v for SIMD value", reg)
+	}
+
+	// Return V register with arrangement for SIMD arithmetic operations
+	// REG_ARNG is the base for registers with arrangement
+	// The arrangement goes in bits [8:5], register number in bits [4:0]
+	return arm64.REG_ARNG | (arrangement << 5) | regNum, arm64.C_ARNG
+}
diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
index f54a692..c184422 100644
--- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
@@ -341,6 +341,7 @@
 (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVDload ptr mem)
 (Load <t> ptr mem) && is32BitFloat(t) => (FMOVSload ptr mem)
 (Load <t> ptr mem) && is64BitFloat(t) => (FMOVDload ptr mem)
+(Load <t> ptr mem) && t.Size() == 16 => (FMOVQload ptr mem)
 
 // stores
 (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem)
@@ -349,6 +350,7 @@
 (Store {t} ptr val mem) && t.Size() == 8 && !t.IsFloat() => (MOVDstore ptr val mem)
 (Store {t} ptr val mem) && t.Size() == 4 &&  t.IsFloat() => (FMOVSstore ptr val mem)
 (Store {t} ptr val mem) && t.Size() == 8 &&  t.IsFloat() => (FMOVDstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 16 => (FMOVQstore ptr val mem)
 
 // zeroing
 (Zero [0] _   mem) => mem
diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
index 6b1ae48..10e2040 100644
--- a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
@@ -802,7 +802,8 @@
 		name:               "ARM64",
 		pkg:                "cmd/internal/obj/arm64",
 		genfile:            "../../arm64/ssa.go",
-		ops:                ops,
+		genSIMDfile:        "../../arm64/simdssa.go",
+		ops:                append(ops, simdARM64Ops(fp11, fp21)...),
 		blocks:             blocks,
 		regnames:           regNamesARM64,
 		ParamIntRegNames:   "R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15",
diff --git a/src/cmd/compile/internal/ssa/_gen/simdARM64.rules b/src/cmd/compile/internal/ssa/_gen/simdARM64.rules
new file mode 100644
index 0000000..1f0ecff
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/_gen/simdARM64.rules
@@ -0,0 +1,6 @@
+// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+
+(AddFloat32x4 ...) => (VFADD32x4 ...)
+(AddFloat64x2 ...) => (VFADD64x2 ...)
+(AddInt32x4 ...) => (VADD32x4 ...)
+(AddInt64x2 ...) => (VADD64x2 ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/simdARM64ops.go b/src/cmd/compile/internal/ssa/_gen/simdARM64ops.go
new file mode 100644
index 0000000..1d560b1
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/_gen/simdARM64ops.go
@@ -0,0 +1,12 @@
+// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+
+package main
+
+func simdARM64Ops(v11, v21 regInfo) []opData {
+	return []opData{
+		{name: "VADD32x4", argLength: 2, reg: v21, asm: "VADD", commutative: true, typ: "Vec128", resultInArg0: false},
+		{name: "VADD64x2", argLength: 2, reg: v21, asm: "VADD", commutative: true, typ: "Vec128", resultInArg0: false},
+		{name: "VFADD32x4", argLength: 2, reg: v21, asm: "VFADD", commutative: true, typ: "Vec128", resultInArg0: false},
+		{name: "VFADD64x2", argLength: 2, reg: v21, asm: "VFADD", commutative: true, typ: "Vec128", resultInArg0: false},
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 0593470..ae5534d 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -3109,6 +3109,10 @@
 	OpARM64PRFM
 	OpARM64DMB
 	OpARM64ZERO
+	OpARM64VADD32x4
+	OpARM64VADD64x2
+	OpARM64VFADD32x4
+	OpARM64VFADD64x2
 
 	OpLOONG64NEGV
 	OpLOONG64NEGF
@@ -46171,6 +46175,66 @@
 		fixedReg:  true,
 		reg:       regInfo{},
 	},
+	{
+		name:        "VADD32x4",
+		argLen:      2,
+		commutative: true,
+		asm:         arm64.AVADD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+				{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+			outputs: []outputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+		},
+	},
+	{
+		name:        "VADD64x2",
+		argLen:      2,
+		commutative: true,
+		asm:         arm64.AVADD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+				{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+			outputs: []outputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+		},
+	},
+	{
+		name:        "VFADD32x4",
+		argLen:      2,
+		commutative: true,
+		asm:         arm64.AVFADD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+				{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+			outputs: []outputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+		},
+	},
+	{
+		name:        "VFADD64x2",
+		argLen:      2,
+		commutative: true,
+		asm:         arm64.AVFADD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+				{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+			outputs: []outputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+		},
+	},
 
 	{
 		name:   "NEGV",
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index 6af1558..12da362 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -421,6 +421,18 @@
 	case OpAdd8:
 		v.Op = OpARM64ADD
 		return true
+	case OpAddFloat32x4:
+		v.Op = OpARM64VFADD32x4
+		return true
+	case OpAddFloat64x2:
+		v.Op = OpARM64VFADD64x2
+		return true
+	case OpAddInt32x4:
+		v.Op = OpARM64VADD32x4
+		return true
+	case OpAddInt64x2:
+		v.Op = OpARM64VADD64x2
+		return true
 	case OpAddPtr:
 		v.Op = OpARM64ADD
 		return true
@@ -18259,6 +18271,20 @@
 		v.AddArg2(ptr, mem)
 		return true
 	}
+	// match: (Load <t> ptr mem)
+	// cond: t.Size() == 16
+	// result: (FMOVQload ptr mem)
+	for {
+		t := v.Type
+		ptr := v_0
+		mem := v_1
+		if !(t.Size() == 16) {
+			break
+		}
+		v.reset(OpARM64FMOVQload)
+		v.AddArg2(ptr, mem)
+		return true
+	}
 	return false
 }
 func rewriteValueARM64_OpLocalAddr(v *Value) bool {
@@ -21994,6 +22020,21 @@
 		v.AddArg3(ptr, val, mem)
 		return true
 	}
+	// match: (Store {t} ptr val mem)
+	// cond: t.Size() == 16
+	// result: (FMOVQstore ptr val mem)
+	for {
+		t := auxToType(v.Aux)
+		ptr := v_0
+		val := v_1
+		mem := v_2
+		if !(t.Size() == 16) {
+			break
+		}
+		v.reset(OpARM64FMOVQstore)
+		v.AddArg3(ptr, val, mem)
+		return true
+	}
 	return false
 }
 func rewriteValueARM64_OpZero(v *Value) bool {
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 4ce329e..8a0647d 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -1695,4 +1695,29 @@
 	addF(simdPackage, "Mask64x8.StoreToBits", simdStoreMask(64, 8), sys.AMD64)
 	addF(simdPackage, "Mask64x8FromBits", simdCvtVToMask(64, 8), sys.AMD64)
 	addF(simdPackage, "Mask64x8.ToBits", simdCvtMaskToV(64, 8), sys.AMD64)
+	// NEON
+	addF(simdPackage, "Float32x4.Add", opLen2(ssa.OpAddFloat32x4, types.TypeVec128), sys.ARM64)
+	addF(simdPackage, "Float64x2.Add", opLen2(ssa.OpAddFloat64x2, types.TypeVec128), sys.ARM64)
+	addF(simdPackage, "Int32x4.Add", opLen2(ssa.OpAddInt32x4, types.TypeVec128), sys.ARM64)
+	addF(simdPackage, "Int64x2.Add", opLen2(ssa.OpAddInt64x2, types.TypeVec128), sys.ARM64)
+	addF(simdPackage, "Float32x4.AsFloat64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.ARM64)
+	addF(simdPackage, "Float32x4.AsInt32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.ARM64)
+	addF(simdPackage, "Float32x4.AsInt64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.ARM64)
+	addF(simdPackage, "Float64x2.AsFloat32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.ARM64)
+	addF(simdPackage, "Float64x2.AsInt32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.ARM64)
+	addF(simdPackage, "Float64x2.AsInt64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.ARM64)
+	addF(simdPackage, "Int32x4.AsFloat32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.ARM64)
+	addF(simdPackage, "Int32x4.AsFloat64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.ARM64)
+	addF(simdPackage, "Int32x4.AsInt64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.ARM64)
+	addF(simdPackage, "Int64x2.AsFloat32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.ARM64)
+	addF(simdPackage, "Int64x2.AsFloat64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.ARM64)
+	addF(simdPackage, "Int64x2.AsInt32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.ARM64)
+	addF(simdPackage, "LoadFloat32x4", simdLoad(), sys.ARM64)
+	addF(simdPackage, "Float32x4.Store", simdStore(), sys.ARM64)
+	addF(simdPackage, "LoadFloat64x2", simdLoad(), sys.ARM64)
+	addF(simdPackage, "Float64x2.Store", simdStore(), sys.ARM64)
+	addF(simdPackage, "LoadInt32x4", simdLoad(), sys.ARM64)
+	addF(simdPackage, "Int32x4.Store", simdStore(), sys.ARM64)
+	addF(simdPackage, "LoadInt64x2", simdLoad(), sys.ARM64)
+	addF(simdPackage, "Int64x2.Store", simdStore(), sys.ARM64)
 }
diff --git a/src/simd/_gen/simdgen/arm64.neon.yaml.toy b/src/simd/_gen/simdgen/arm64.neon.yaml.toy
new file mode 100644
index 0000000..901fcd5
--- /dev/null
+++ b/src/simd/_gen/simdgen/arm64.neon.yaml.toy
@@ -0,0 +1,146 @@
+!sum
+
+# How to generate simd support for an ARM64 NEON subset specified in this file:
+# 1) Run the generator tool with "-arch arm64" option:
+# src/simd/_gen/simdgen% go run . -arch arm64 -o godefs -goroot output arm64.neon.yaml.toy types.yaml
+# 2) Merge manually files shared between amd64 and arm64:
+# src/simd/_gen/simdgen% export C=../../../ ; export O=output/src/
+# export F=cmd/compile/internal/ssagen/simdintrinsics.go ; vimdiff $O/$F $C/$F
+# export F=cmd/compile/internal/ssa/_gen/simdgenericOps.go ; vimdiff $O/$F $C/$F
+# export F=simd/cpu.go ; vimdiff $O/$F $C/$F
+# 3) The rest generated files are arm64-specific and may be copied over:
+# export F=cmd/compile/internal/ssa/_gen/simdARM64ops.go ; cp $O/$F $C/$F
+# export F=cmd/compile/internal/ssa/_gen/simdARM64.rules ; cp $O/$F $C/$F
+# export F=simd/types_arm64.go ; cp $O/$F $C/$F
+# export F=simd/ops_arm64.go ; cp $O/$F $C/$F
+# 4) Ready to build and test:
+# cd WSROOT/src/cmd/compile/internal/ssa && gp generate
+# cd WSROOT/src && ./make.bash
+
+# ARM64 FADD instruction (NEON 4-lane 32-bit floating-point add)
+- go: Add
+  goarch: arm64
+  asm: VFADD
+  arrangement: "32x4"
+  cpuFeature: NEON
+  commutative: true
+  inVariant: []
+  in:
+  - class: vreg
+    go: Float32x4
+    base: float
+    elemBits: 32
+    bits: 128
+    lanes: 4
+    asmPos: 1
+  - class: vreg
+    go: Float32x4
+    base: float
+    elemBits: 32
+    bits: 128
+    lanes: 4
+    asmPos: 2
+  out:
+  - class: vreg
+    go: Float32x4
+    base: float
+    elemBits: 32
+    bits: 128
+    lanes: 4
+    asmPos: 0
+
+# ARM64 FADD instruction (NEON 2-lane 64-bit floating-point add)
+- go: Add
+  goarch: arm64
+  asm: VFADD
+  arrangement: "64x2"
+  cpuFeature: NEON
+  commutative: true
+  inVariant: []
+  in:
+  - class: vreg
+    go: Float64x2
+    base: float
+    elemBits: 64
+    bits: 128
+    lanes: 2
+    asmPos: 1
+  - class: vreg
+    go: Float64x2
+    base: float
+    elemBits: 64
+    bits: 128
+    lanes: 2
+    asmPos: 2
+  out:
+  - class: vreg
+    go: Float64x2
+    base: float
+    elemBits: 64
+    bits: 128
+    lanes: 2
+    asmPos: 0
+
+# ARM64 ADD instruction (NEON 4-lane 32-bit integer add)
+- go: Add
+  goarch: arm64
+  asm: VADD
+  arrangement: "32x4"
+  cpuFeature: NEON
+  commutative: true
+  inVariant: []
+  in:
+  - class: vreg
+    go: Int32x4
+    base: int
+    elemBits: 32
+    bits: 128
+    lanes: 4
+    asmPos: 1
+  - class: vreg
+    go: Int32x4
+    base: int
+    elemBits: 32
+    bits: 128
+    lanes: 4
+    asmPos: 2
+  out:
+  - class: vreg
+    go: Int32x4
+    base: int
+    elemBits: 32
+    bits: 128
+    lanes: 4
+    asmPos: 0
+
+# ARM64 ADD instruction (NEON 2-lane 64-bit integer add)
+- go: Add
+  goarch: arm64
+  asm: VADD
+  arrangement: "64x2"
+  cpuFeature: NEON
+  commutative: true
+  inVariant: []
+  in:
+  - class: vreg
+    go: Int64x2
+    base: int
+    elemBits: 64
+    bits: 128
+    lanes: 2
+    asmPos: 1
+  - class: vreg
+    go: Int64x2
+    base: int
+    elemBits: 64
+    bits: 128
+    lanes: 2
+    asmPos: 2
+  out:
+  - class: vreg
+    go: Int64x2
+    base: int
+    elemBits: 64
+    bits: 128
+    lanes: 2
+    asmPos: 0
diff --git a/src/simd/_gen/simdgen/gen_simdIntrinsics.go b/src/simd/_gen/simdgen/gen_simdIntrinsics.go
index 0ef5124..6b5d6aa 100644
--- a/src/simd/_gen/simdgen/gen_simdIntrinsics.go
+++ b/src/simd/_gen/simdgen/gen_simdIntrinsics.go
@@ -141,10 +141,13 @@
 		}
 	}
 
-	for _, typ := range typesFromTypeMap(typeMap) {
-		if typ.MaskedLoadStoreFilter() {
-			if err := t.ExecuteTemplate(buffer, "maskedLoadStore", typ); err != nil {
-				panic(fmt.Errorf("failed to execute maskedLoadStore template: %w", err))
+	// TODO: Does NEON need masked ops here (it doesn't have native masked load/store)
+	if archInfo.Arch != "arm64" {
+		for _, typ := range typesFromTypeMap(typeMap) {
+			if typ.MaskedLoadStoreFilter() {
+				if err := t.ExecuteTemplate(buffer, "maskedLoadStore", typ); err != nil {
+					panic(fmt.Errorf("failed to execute maskedLoadStore template: %w", err))
+				}
 			}
 		}
 	}
diff --git a/src/simd/_gen/simdgen/gen_simdMachineOps.go b/src/simd/_gen/simdgen/gen_simdMachineOps.go
index cf952fc..ecdd19f 100644
--- a/src/simd/_gen/simdgen/gen_simdMachineOps.go
+++ b/src/simd/_gen/simdgen/gen_simdMachineOps.go
@@ -14,7 +14,7 @@
 const simdMachineOpsTmpl = `
 package main
 
-func simd{{.ArchUpper}}Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw regInfo) []opData {
+func simd{{.ArchUpper}}Ops({{if eq .ArchUpper "ARM64"}}v11, v21 regInfo{{else}}v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw regInfo{{end}}) []opData {
 	return []opData{
 {{- range .OpsData }}
 		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
diff --git a/src/simd/_gen/simdgen/gen_simdTypes.go b/src/simd/_gen/simdgen/gen_simdTypes.go
index 8ce348d..d43620d 100644
--- a/src/simd/_gen/simdgen/gen_simdTypes.go
+++ b/src/simd/_gen/simdgen/gen_simdTypes.go
@@ -145,6 +145,9 @@
 const simdFeaturesTemplate = `
 import "internal/cpu"
 
+// Ensure internal/cpu is marked as used (package may not be used on some targets)
+var _ = cpu.DebugOptions
+
 {{range .}}
 {{- if eq .Feature "AVX512"}}
 // Has{{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features.
@@ -496,7 +499,7 @@
 }
 
 // writeSIMDTypes generates the simd vector types into a bytes.Buffer
-func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
+func writeSIMDTypes(typeMap simdTypeMap, archInfo ArchInfo) *bytes.Buffer {
 	t := templateOf(simdTypesTemplates, "types_amd64")
 	loadStore := templateOf(simdLoadStoreTemplate, "loadstore_amd64")
 	maskedLoadStore := templateOf(simdMaskedLoadStoreTemplate, "maskedloadstore_amd64")
@@ -533,7 +536,8 @@
 					panic(fmt.Errorf("failed to execute loadstore template for type %s: %w", typeDef.Name, err))
 				}
 				// restrict to AVX2 masked loads/stores first.
-				if typeDef.MaskedLoadStoreFilter() {
+				// TODO: Does NEON need masked ops here (it doesn't have native masked load/store).
+				if archInfo.Arch != "arm64" && typeDef.MaskedLoadStoreFilter() {
 					if err := maskedLoadStore.ExecuteTemplate(buffer, "maskedloadstore_amd64", typeDef); err != nil {
 						panic(fmt.Errorf("failed to execute maskedloadstore template for type %s: %w", typeDef.Name, err))
 					}
diff --git a/src/simd/_gen/simdgen/gen_simdssa.go b/src/simd/_gen/simdgen/gen_simdssa.go
index 56aee04..6b0a66e 100644
--- a/src/simd/_gen/simdgen/gen_simdssa.go
+++ b/src/simd/_gen/simdgen/gen_simdssa.go
@@ -46,6 +46,10 @@
 	}
 {{end}}
 {{define "ending"}}
+	// Ensure p and architecture package are marked as used
+	// (they may not be used in all generated code paths)
+	_ = p
+	_ = {{.ObjArch}}.REG_V0
 	return true
 }
 {{end}}`))
@@ -224,8 +228,8 @@
 		}
 	}
 
-	if err := ssaTemplates.ExecuteTemplate(buffer, "ending", nil); err != nil {
-		panic(fmt.Errorf("failed to execute footer template: %w", err))
+	if err := ssaTemplates.ExecuteTemplate(buffer, "ending", archInfo); err != nil {
+		panic(fmt.Errorf("failed to execute ending template: %w", err))
 	}
 
 	return buffer
diff --git a/src/simd/_gen/simdgen/godefs.go b/src/simd/_gen/simdgen/godefs.go
index c13e4cd..f3cae95 100644
--- a/src/simd/_gen/simdgen/godefs.go
+++ b/src/simd/_gen/simdgen/godefs.go
@@ -385,7 +385,7 @@
 
 	archInfo := CurrentArch()
 
-	formatWriteAndClose(writeSIMDTypes(typeMap), path, fmt.Sprintf("src/%s/types_%s.go", simdPackage, archInfo.Arch))
+	formatWriteAndClose(writeSIMDTypes(typeMap, archInfo), path, fmt.Sprintf("src/%s/types_%s.go", simdPackage, archInfo.Arch))
 	formatWriteAndClose(writeSIMDFeatures(deduped), path, "src/"+simdPackage+"/cpu.go")
 	formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, fmt.Sprintf("src/%s/ops_%s.go", simdPackage, archInfo.Arch))
 	formatWriteAndClose(writeSIMDIntrinsics(deduped, typeMap), path, "src/cmd/compile/internal/ssagen/simdintrinsics.go")
diff --git a/src/simd/cpu.go b/src/simd/cpu.go
index cbde9a8..ef56b9a 100644
--- a/src/simd/cpu.go
+++ b/src/simd/cpu.go
@@ -90,3 +90,14 @@
 func HasAVXVNNI() bool {
 	return cpu.X86.HasAVXVNNI
 }
+
+// Ensure internal/cpu is marked as used (package may not be used on some targets)
+var _ = cpu.DebugOptions
+
+// HasNEON returns whether the CPU supports the NEON feature.
+//
+// HasNEON is defined on all GOARCHes, but will only return true on
+// GOARCH arm64.
+func HasNEON() bool {
+	return true // NEON is mandatory on ARM64
+}
diff --git a/src/simd/dummy_arm64.s b/src/simd/dummy_arm64.s
new file mode 100644
index 0000000..6f73a91
--- /dev/null
+++ b/src/simd/dummy_arm64.s
@@ -0,0 +1,7 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build arm64
+
+// Empty file to allow bodyless functions.
\ No newline at end of file
diff --git a/src/simd/ops_arm64.go b/src/simd/ops_arm64.go
new file mode 100644
index 0000000..4132ec6
--- /dev/null
+++ b/src/simd/ops_arm64.go
@@ -0,0 +1,63 @@
+// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+
+//go:build goexperiment.simd
+
+package simd
+
+/* Add */
+
+// UNDOCUMENTED
+//
+// Asm: VFADD, CPU Feature: NEON
+func (x Float32x4) Add(y Float32x4) Float32x4
+
+// UNDOCUMENTED
+//
+// Asm: VFADD, CPU Feature: NEON
+func (x Float64x2) Add(y Float64x2) Float64x2
+
+// UNDOCUMENTED
+//
+// Asm: VADD, CPU Feature: NEON
+func (x Int32x4) Add(y Int32x4) Int32x4
+
+// UNDOCUMENTED
+//
+// Asm: VADD, CPU Feature: NEON
+func (x Int64x2) Add(y Int64x2) Int64x2
+
+// Float64x2 converts from Float32x4 to Float64x2
+func (from Float32x4) AsFloat64x2() (to Float64x2)
+
+// Int32x4 converts from Float32x4 to Int32x4
+func (from Float32x4) AsInt32x4() (to Int32x4)
+
+// Int64x2 converts from Float32x4 to Int64x2
+func (from Float32x4) AsInt64x2() (to Int64x2)
+
+// Float32x4 converts from Float64x2 to Float32x4
+func (from Float64x2) AsFloat32x4() (to Float32x4)
+
+// Int32x4 converts from Float64x2 to Int32x4
+func (from Float64x2) AsInt32x4() (to Int32x4)
+
+// Int64x2 converts from Float64x2 to Int64x2
+func (from Float64x2) AsInt64x2() (to Int64x2)
+
+// Float32x4 converts from Int32x4 to Float32x4
+func (from Int32x4) AsFloat32x4() (to Float32x4)
+
+// Float64x2 converts from Int32x4 to Float64x2
+func (from Int32x4) AsFloat64x2() (to Float64x2)
+
+// Int64x2 converts from Int32x4 to Int64x2
+func (from Int32x4) AsInt64x2() (to Int64x2)
+
+// Float32x4 converts from Int64x2 to Float32x4
+func (from Int64x2) AsFloat32x4() (to Float32x4)
+
+// Float64x2 converts from Int64x2 to Float64x2
+func (from Int64x2) AsFloat64x2() (to Float64x2)
+
+// Int32x4 converts from Int64x2 to Int32x4
+func (from Int64x2) AsInt32x4() (to Int32x4)
diff --git a/src/simd/testneon/sample.go b/src/simd/testneon/sample.go
new file mode 100644
index 0000000..c55c5a8
--- /dev/null
+++ b/src/simd/testneon/sample.go
@@ -0,0 +1,189 @@
+//go:build goexperiment.simd
+
+// NEON SIMD example
+// Run with: GOEXPERIMENT=simd go run sample.go
+package main
+
+import (
+	"fmt"
+	"os"
+	"simd"
+)
+
+//go:noinline
+func testFloat32x4() {
+	fmt.Println("=== Float32x4 Vector Addition ===")
+
+	a := [4]float32{1.0, 2.0, 3.0, 4.0}
+	b := [4]float32{5.0, 6.0, 7.0, 8.0}
+
+	// Load arrays into SIMD vectors
+	va := simd.LoadFloat32x4(&a)
+	vb := simd.LoadFloat32x4(&b)
+
+	// Perform vector addition (all 4 elements at once)
+	result := va.Add(vb)
+
+	// Store result back to array
+	var output [4]float32
+	result.Store(&output)
+
+	fmt.Printf("a:      %v\n", a)
+	fmt.Printf("b:      %v\n", b)
+	fmt.Printf("a + b:  %v\n", output)
+}
+
+//go:noinline
+func testFloat64x2() {
+	fmt.Println("\n=== Float64x2 Vector Addition ===")
+
+	a := [2]float64{10.5, 20.5}
+	b := [2]float64{2.0, 4.0}
+
+	// Load arrays into SIMD vectors
+	va := simd.LoadFloat64x2(&a)
+	vb := simd.LoadFloat64x2(&b)
+
+	// Perform vector addition (all 2 elements at once)
+	result := va.Add(vb)
+
+	// Store result back to array
+	var output [2]float64
+	result.Store(&output)
+
+	fmt.Printf("a:      %v\n", a)
+	fmt.Printf("b:      %v\n", b)
+	fmt.Printf("a + b:  %v\n", output)
+}
+
+//go:noinline
+func testInt32x4() {
+	fmt.Println("\n=== Int32x4 Vector Addition ===")
+
+	a := [4]int32{10, 20, 30, 40}
+	b := [4]int32{5, 6, 7, 8}
+
+	// Load arrays into SIMD vectors
+	va := simd.LoadInt32x4(&a)
+	vb := simd.LoadInt32x4(&b)
+
+	// Perform vector addition (all 4 elements at once)
+	result := va.Add(vb)
+
+	// Store result back to array
+	var output [4]int32
+	result.Store(&output)
+
+	fmt.Printf("a:      %v\n", a)
+	fmt.Printf("b:      %v\n", b)
+	fmt.Printf("a + b:  %v\n", output)
+}
+
+//go:noinline
+func testInt64x2() {
+	fmt.Println("\n=== Int64x2 Vector Addition ===")
+
+	a := [2]int64{100, 200}
+	b := [2]int64{50, 75}
+
+	// Load arrays into SIMD vectors
+	va := simd.LoadInt64x2(&a)
+	vb := simd.LoadInt64x2(&b)
+
+	// Perform vector addition (all 2 elements at once)
+	result := va.Add(vb)
+
+	// Store result back to array
+	var output [2]int64
+	result.Store(&output)
+
+	fmt.Printf("a:      %v\n", a)
+	fmt.Printf("b:      %v\n", b)
+	fmt.Printf("a + b:  %v\n", output)
+}
+
+func main() {
+	testFloat32x4()
+	testFloat64x2()
+	testInt32x4()
+	testInt64x2()
+
+	// Test validation - return non-zero on unexpected results
+	fail := false
+
+	// Test Float32x4
+	a32 := [4]float32{1.0, 2.0, 3.0, 4.0}
+	b32 := [4]float32{5.0, 6.0, 7.0, 8.0}
+	va32 := simd.LoadFloat32x4(&a32)
+	vb32 := simd.LoadFloat32x4(&b32)
+	result32 := va32.Add(vb32)
+	var output32 [4]float32
+	result32.Store(&output32)
+
+	expected32 := [4]float32{6.0, 8.0, 10.0, 12.0}
+	for i := range output32 {
+		if output32[i] != expected32[i] {
+			fmt.Printf("Float32x4 test failed: expected %v, got %v\n", expected32, output32)
+			fail = true
+			break
+		}
+	}
+
+	// Test Float64x2
+	a64 := [2]float64{10.5, 20.5}
+	b64 := [2]float64{2.0, 4.0}
+	va64 := simd.LoadFloat64x2(&a64)
+	vb64 := simd.LoadFloat64x2(&b64)
+	result64 := va64.Add(vb64)
+	var output64 [2]float64
+	result64.Store(&output64)
+
+	expected64 := [2]float64{12.5, 24.5}
+	for i := range output64 {
+		if output64[i] != expected64[i] {
+			fmt.Printf("Float64x2 test failed: expected %v, got %v\n", expected64, output64)
+			fail = true
+			break
+		}
+	}
+
+	// Test Int32x4
+	a_i32 := [4]int32{10, 20, 30, 40}
+	b_i32 := [4]int32{5, 6, 7, 8}
+	va_i32 := simd.LoadInt32x4(&a_i32)
+	vb_i32 := simd.LoadInt32x4(&b_i32)
+	result_i32 := va_i32.Add(vb_i32)
+	var output_i32 [4]int32
+	result_i32.Store(&output_i32)
+
+	expected_i32 := [4]int32{15, 26, 37, 48}
+	for i := range output_i32 {
+		if output_i32[i] != expected_i32[i] {
+			fmt.Printf("Int32x4 test failed: expected %v, got %v\n", expected_i32, output_i32)
+			fail = true
+			break
+		}
+	}
+
+	// Test Int64x2
+	a_i64 := [2]int64{100, 200}
+	b_i64 := [2]int64{50, 75}
+	va_i64 := simd.LoadInt64x2(&a_i64)
+	vb_i64 := simd.LoadInt64x2(&b_i64)
+	result_i64 := va_i64.Add(vb_i64)
+	var output_i64 [2]int64
+	result_i64.Store(&output_i64)
+
+	expected_i64 := [2]int64{150, 275}
+	for i := range output_i64 {
+		if output_i64[i] != expected_i64[i] {
+			fmt.Printf("Int64x2 test failed: expected %v, got %v\n", expected_i64, output_i64)
+			fail = true
+			break
+		}
+	}
+
+	if fail {
+		os.Exit(1)
+	}
+}
diff --git a/src/simd/types_arm64.go b/src/simd/types_arm64.go
new file mode 100644
index 0000000..5f6a398
--- /dev/null
+++ b/src/simd/types_arm64.go
@@ -0,0 +1,86 @@
+// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+
+//go:build goexperiment.simd
+
+package simd
+
+// v128 is a tag type that tells the compiler that this is really 128-bit SIMD
+type v128 struct {
+	_128 struct{}
+}
+
+// Float32x4 is a 128-bit SIMD vector of 4 float32
+type Float32x4 struct {
+	float32x4 v128
+	vals      [4]float32
+}
+
+// Len returns the number of elements in a Float32x4
+func (x Float32x4) Len() int { return 4 }
+
+// LoadFloat32x4 loads a Float32x4 from an array
+//
+//go:noescape
+func LoadFloat32x4(y *[4]float32) Float32x4
+
+// Store stores a Float32x4 to an array
+//
+//go:noescape
+func (x Float32x4) Store(y *[4]float32)
+
+// Float64x2 is a 128-bit SIMD vector of 2 float64
+type Float64x2 struct {
+	float64x2 v128
+	vals      [2]float64
+}
+
+// Len returns the number of elements in a Float64x2
+func (x Float64x2) Len() int { return 2 }
+
+// LoadFloat64x2 loads a Float64x2 from an array
+//
+//go:noescape
+func LoadFloat64x2(y *[2]float64) Float64x2
+
+// Store stores a Float64x2 to an array
+//
+//go:noescape
+func (x Float64x2) Store(y *[2]float64)
+
+// Int32x4 is a 128-bit SIMD vector of 4 int32
+type Int32x4 struct {
+	int32x4 v128
+	vals    [4]int32
+}
+
+// Len returns the number of elements in a Int32x4
+func (x Int32x4) Len() int { return 4 }
+
+// LoadInt32x4 loads a Int32x4 from an array
+//
+//go:noescape
+func LoadInt32x4(y *[4]int32) Int32x4
+
+// Store stores a Int32x4 to an array
+//
+//go:noescape
+func (x Int32x4) Store(y *[4]int32)
+
+// Int64x2 is a 128-bit SIMD vector of 2 int64
+type Int64x2 struct {
+	int64x2 v128
+	vals    [2]int64
+}
+
+// Len returns the number of elements in a Int64x2
+func (x Int64x2) Len() int { return 2 }
+
+// LoadInt64x2 loads a Int64x2 from an array
+//
+//go:noescape
+func LoadInt64x2(y *[2]int64) Int64x2
+
+// Store stores a Int64x2 to an array
+//
+//go:noescape
+func (x Int64x2) Store(y *[2]int64)

Change information

Files:

A src/cmd/compile/internal/arm64/simdssa.go
M src/cmd/compile/internal/arm64/ssa.go
M src/cmd/compile/internal/ssa/_gen/ARM64.rules
M src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
A src/cmd/compile/internal/ssa/_gen/simdARM64.rules
A src/cmd/compile/internal/ssa/_gen/simdARM64ops.go
M src/cmd/compile/internal/ssa/opGen.go
M src/cmd/compile/internal/ssa/rewriteARM64.go
M src/cmd/compile/internal/ssagen/simdintrinsics.go
A src/simd/_gen/simdgen/arm64.neon.yaml.toy
M src/simd/_gen/simdgen/gen_simdIntrinsics.go
M src/simd/_gen/simdgen/gen_simdMachineOps.go
M src/simd/_gen/simdgen/gen_simdTypes.go
M src/simd/_gen/simdgen/gen_simdssa.go
M src/simd/_gen/simdgen/godefs.go
M src/simd/cpu.go
A src/simd/dummy_arm64.s
A src/simd/ops_arm64.go
A src/simd/testneon/sample.go
A src/simd/types_arm64.go

Change size: L

Delta: 20 files changed, 774 insertions(+), 14 deletions(-)

Open in Gerrit

Related details

Attention set is empty

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

Austin Clements (Gerrit)

unread,

Sep 9, 2025, 3:50:31 PM9/9/25

to Alexander Musman, goph...@pubsubhelper.golang.org, Austin Clements, golang-co...@googlegroups.com

Attention needed from Alexander Musman

Austin Clements added 1 comment

Patchset-level comments

File-level comment, Patchset 1 (Latest):

Austin Clements . resolved

Thanks for working on this!

I just wanted to warn you that there's definitely some cleanup needed in simdgen before it's really ready for more than one architecture. I just want to make sure you don't dig yourself into a hole in simdgen.

A significant known issue is that some x86-isms have crept into the categories.yaml files. Those should really be shared across architectures. A lot of it's already clean, but at least the comparison operations definitely aren't and there are probably some others.

We'd also like to drive the instruction input to simdgen from the AARCHMRS (ARM Architecture Machine Readable Specification), just like we use XED for Intel.

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

Oct 11, 2025, 4:22:53 AM10/11/25

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention needed from Alexander Musman

Alexander Musman uploaded new patchset

Alexander Musman uploaded patch set #2 to this change.

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

Oct 11, 2025, 4:52:32 AM10/11/25

to goph...@pubsubhelper.golang.org, Austin Clements, golang-co...@googlegroups.com

Attention needed from Austin Clements

Alexander Musman added 1 comment

Patchset-level comments

File-level comment, Patchset 1:

Austin Clements . resolved

Thanks for working on this!
I just wanted to warn you that there's definitely some cleanup needed in simdgen before it's really ready for more than one architecture. I just want to make sure you don't dig yourself into a hole in simdgen.
A significant known issue is that some x86-isms have crept into the categories.yaml files. Those should really be shared across architectures. A lot of it's already clean, but at least the comparison operations definitely aren't and there are probably some others.
We'd also like to drive the instruction input to simdgen from the AARCHMRS (ARM Architecture Machine Readable Specification), just like we use XED for Intel.

Alexander Musman

Thanks for the heads up! For now, I'm focusing on exploring different ARM64 register arrangements and helpers for the ssa->Prog translation in arm64/ssa.go (just added in subsequent CLs), so I've deliberately skipped using the categories for the moment to concentrate on that. These changes mostly affect adding Arrangement operation field and some other helpers (e.g. to allow different encoding for some instructions) and representing it in simdgen.

Open in Gerrit

Related details

Attention is currently required from:

Austin Clements

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

Feb 23, 2026, 7:03:44 AMFeb 23

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention needed from Austin Clements

Alexander Musman uploaded new patchset

Alexander Musman uploaded patch set #3 to this change.

Open in Gerrit

Related details

Attention is currently required from:

Austin Clements

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

Jonathan Swinney (Gerrit)

unread,

Mar 6, 2026, 3:37:11 PMMar 6

to Alexander Musman, goph...@pubsubhelper.golang.org, Austin Clements, golang-co...@googlegroups.com

Attention needed from Alexander Musman and Austin Clements

Jonathan Swinney added 1 comment

Patchset-level comments

File-level comment, Patchset 3 (Latest):

Jonathan Swinney . unresolved

Hi Alexander — thanks for all the work on this chain. I've been working on ARM64 SIMD support independently (on a local branch off dev.simd) and wanted to reach out about coordinating.

Re Austin's comment about x86-isms in the categories — I hit the same issues when mapping the API surface to ARM64 NEON. The main areas that will need attention are comparisons (constImm encodes x86-specific immediate predicates, and the signed/unsigned split works differently on ARM64), and mask representation (NEON masks are full vectors rather than compact bitmasks, which makes operations like ToBits/FromBits multi-instruction sequences — worth considering alongside SVE predicates for the mask abstraction design).

I've been testing your XML parser and it handles the NEON instruction set well. I'd like to help expand the ARM64 category coverage — I could contribute category files for BitwiseLogic, MinMax, Compares, and other operation groups in your arm64/ops/ format.

I also noticed that gen_simdMachineOps.go doesn't have a register constraint template for unary operations (v11 — 1 input, 1 output) on arm64, which blocks categories like IntOnlyArith (Abs, Neg, CLZ, CNT) and FPonlyArith (Sqrt, Abs, Neg). I'd be happy to work on adding that support as well.

Would it be useful to coordinate?

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements

Submit Requirements:

Code-Review

No-Unresolved-Comments

Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

Mar 7, 2026, 1:21:05 PMMar 7

to goph...@pubsubhelper.golang.org, David Chase, Junyang Shao, Jonathan Swinney, Austin Clements, golang-co...@googlegroups.com

Attention needed from Austin Clements, David Chase, Jonathan Swinney and Junyang Shao

Alexander Musman added 1 comment

Patchset-level comments

File-level comment, Patchset 3 (Latest):

Jonathan Swinney . unresolved

Hi Alexander — thanks for all the work on this chain. I've been working on ARM64 SIMD support independently (on a local branch off dev.simd) and wanted to reach out about coordinating.
Re Austin's comment about x86-isms in the categories — I hit the same issues when mapping the API surface to ARM64 NEON. The main areas that will need attention are comparisons (constImm encodes x86-specific immediate predicates, and the signed/unsigned split works differently on ARM64), and mask representation (NEON masks are full vectors rather than compact bitmasks, which makes operations like ToBits/FromBits multi-instruction sequences — worth considering alongside SVE predicates for the mask abstraction design).
I've been testing your XML parser and it handles the NEON instruction set well. I'd like to help expand the ARM64 category coverage — I could contribute category files for BitwiseLogic, MinMax, Compares, and other operation groups in your arm64/ops/ format.
I also noticed that gen_simdMachineOps.go doesn't have a register constraint template for unary operations (v11 — 1 input, 1 output) on arm64, which blocks categories like IntOnlyArith (Abs, Neg, CLZ, CNT) and FPonlyArith (Sqrt, Abs, Neg). I'd be happy to work on adding that support as well.
Would it be useful to coordinate?

Alexander Musman

Thanks — yes, let's coordinate! A few things to be aware of:

Most of this chain is not yet reviewed, so the base may change. This CL itself seems mostly ready for review, but nothing below it is settled yet.

On my end, the short-term priority is a change to support bounded immediates (replacing the current up-to-255 encoding) — arm64 instructions won't assemble with out-of-range immediates, and this blocks a lot of categories. After that I'm working on GetElem/SetElem — I plan to reuse some slice-parts code from @drc...@google.com 's WIP wasm SIMD support (CL 745080) to test arm64 GetElem/SetElem for integer and FP NEON vector types. That will be a follow-up CL.

Adding @drc...@google.com , @shaoj...@google.com — they'll need to review this work.

Re the v11 register constraint template — in the current version of this CL it should be enough to add entries into `arch.go`; `arm64/ssa.go` already has a `simdV11` function that gets used underneath. Category file contributions for BitwiseLogic, MinMax, Compares etc. in arm64/ops/ format also sound great — just expect some churn underneath until the earlier CLs are reviewed.

On the comparison/mask design — for NEON we should try to stay close to wasm SIMD128 where possible, since wasm comparisons also use full-vector masks (all-ones per lane for true, all-zeros for false) in regular v128 registers, same as NEON. That said, the broader mask abstraction (especially looking ahead to SVE predicates) will still need discussion.

Open in Gerrit

Related details

Attention is currently required from:

Austin Clements
David Chase
Jonathan Swinney
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

open

diffy

Jonathan Swinney (Gerrit)

unread,

Mar 9, 2026, 6:04:18 PMMar 9

to Alexander Musman, goph...@pubsubhelper.golang.org, David Chase, Junyang Shao, Austin Clements, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, David Chase and Junyang Shao

Jonathan Swinney added 1 comment

Patchset-level comments

File-level comment, Patchset 3 (Latest):

Jonathan Swinney . unresolved

Hi Alexander — thanks for all the work on this chain. I've been working on ARM64 SIMD support independently (on a local branch off dev.simd) and wanted to reach out about coordinating.
Re Austin's comment about x86-isms in the categories — I hit the same issues when mapping the API surface to ARM64 NEON. The main areas that will need attention are comparisons (constImm encodes x86-specific immediate predicates, and the signed/unsigned split works differently on ARM64), and mask representation (NEON masks are full vectors rather than compact bitmasks, which makes operations like ToBits/FromBits multi-instruction sequences — worth considering alongside SVE predicates for the mask abstraction design).
I've been testing your XML parser and it handles the NEON instruction set well. I'd like to help expand the ARM64 category coverage — I could contribute category files for BitwiseLogic, MinMax, Compares, and other operation groups in your arm64/ops/ format.
I also noticed that gen_simdMachineOps.go doesn't have a register constraint template for unary operations (v11 — 1 input, 1 output) on arm64, which blocks categories like IntOnlyArith (Abs, Neg, CLZ, CNT) and FPonlyArith (Sqrt, Abs, Neg). I'd be happy to work on adding that support as well.
Would it be useful to coordinate?

Alexander Musman

Thanks — yes, let's coordinate! A few things to be aware of:
Most of this chain is not yet reviewed, so the base may change. This CL itself seems mostly ready for review, but nothing below it is settled yet.
On my end, the short-term priority is a change to support bounded immediates (replacing the current up-to-255 encoding) — arm64 instructions won't assemble with out-of-range immediates, and this blocks a lot of categories. After that I'm working on GetElem/SetElem — I plan to reuse some slice-parts code from @drc...@google.com 's WIP wasm SIMD support (CL 745080) to test arm64 GetElem/SetElem for integer and FP NEON vector types. That will be a follow-up CL.
Adding @drc...@google.com , @shaoj...@google.com — they'll need to review this work.
Re the v11 register constraint template — in the current version of this CL it should be enough to add entries into `arch.go`; `arm64/ssa.go` already has a `simdV11` function that gets used underneath. Category file contributions for BitwiseLogic, MinMax, Compares etc. in arm64/ops/ format also sound great — just expect some churn underneath until the earlier CLs are reviewed.
On the comparison/mask design — for NEON we should try to stay close to wasm SIMD128 where possible, since wasm comparisons also use full-vector masks (all-ones per lane for true, all-zeros for false) in regular v128 registers, same as NEON. That said, the broader mask abstraction (especially looking ahead to SVE predicates) will still need discussion.

Jonathan Swinney

Thanks for the quick response! I will finish preparing a fix for the v11 issue and submit a CL in the next day or two.

I also have some work to address the concerns that Austin raised. That work is independent so I'll try to get that posted soon too, so we can begin getting things reviewed in parallel.

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
David Chase
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

Mar 27, 2026, 4:50:39 PMMar 27

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, David Chase and Junyang Shao

Alexander Musman uploaded new patchset

Alexander Musman uploaded patch set #5 to this change.

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
David Chase
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

Apr 15, 2026, 9:52:16 AMApr 15

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, David Chase and Junyang Shao

Alexander Musman uploaded new patchset

Alexander Musman uploaded patch set #6 to this change.

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
David Chase
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

Apr 26, 2026, 8:13:47 AMApr 26

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, David Chase and Junyang Shao

Alexander Musman uploaded new patchset

Alexander Musman uploaded patch set #7 to this change.

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
David Chase
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

open

diffy

Cherry Mui (Gerrit)

unread,

Apr 28, 2026, 2:40:48 PMApr 28

to Alexander Musman, goph...@pubsubhelper.golang.org, David Chase, Junyang Shao, Jonathan Swinney, Austin Clements, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, David Chase and Junyang Shao

Cherry Mui added 2 comments

Patchset-level comments

File-level comment, Patchset 7 (Latest):

Cherry Mui . resolved

Thanks for the CL.

File src/simd/archsimd/_gen/simdgen/arm64/ops/AddSub/categories.yaml

File-level comment, Patchset 7 (Latest):

Cherry Mui . unresolved

I don't think we want to create an "arm64" directory. This would create asymmetry between amd64 and arm64.

Also, some of the existing YAML files are meant to be portable, used across architectures. The "categories.yaml" are one of them. (They may list operations that do not unify on some architectures, and that is fine.) So we don't want to create arm64/.../categories.yaml files.

go.yaml's are currently arch-specific. We could consider renaming them go_amd64.yaml, and add go_arm64.yaml. Or we could consider adding "goarch: amd64" entries to the current go.yaml, and then we can add arm64 entries to the same file.

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
David Chase
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

May 1, 2026, 4:21:30 PMMay 1

to goph...@pubsubhelper.golang.org, Cherry Mui, David Chase, Junyang Shao, Jonathan Swinney, Austin Clements, golang-co...@googlegroups.com

Attention needed from Austin Clements, Cherry Mui, David Chase, Jonathan Swinney and Junyang Shao

Alexander Musman added 2 comments

Patchset-level comments

File-level comment, Patchset 3:

Jonathan Swinney . resolved

Hi Alexander — thanks for all the work on this chain. I've been working on ARM64 SIMD support independently (on a local branch off dev.simd) and wanted to reach out about coordinating.
Re Austin's comment about x86-isms in the categories — I hit the same issues when mapping the API surface to ARM64 NEON. The main areas that will need attention are comparisons (constImm encodes x86-specific immediate predicates, and the signed/unsigned split works differently on ARM64), and mask representation (NEON masks are full vectors rather than compact bitmasks, which makes operations like ToBits/FromBits multi-instruction sequences — worth considering alongside SVE predicates for the mask abstraction design).
I've been testing your XML parser and it handles the NEON instruction set well. I'd like to help expand the ARM64 category coverage — I could contribute category files for BitwiseLogic, MinMax, Compares, and other operation groups in your arm64/ops/ format.
I also noticed that gen_simdMachineOps.go doesn't have a register constraint template for unary operations (v11 — 1 input, 1 output) on arm64, which blocks categories like IntOnlyArith (Abs, Neg, CLZ, CNT) and FPonlyArith (Sqrt, Abs, Neg). I'd be happy to work on adding that support as well.
Would it be useful to coordinate?

Alexander Musman

Thanks — yes, let's coordinate! A few things to be aware of:
Most of this chain is not yet reviewed, so the base may change. This CL itself seems mostly ready for review, but nothing below it is settled yet.
On my end, the short-term priority is a change to support bounded immediates (replacing the current up-to-255 encoding) — arm64 instructions won't assemble with out-of-range immediates, and this blocks a lot of categories. After that I'm working on GetElem/SetElem — I plan to reuse some slice-parts code from @drc...@google.com 's WIP wasm SIMD support (CL 745080) to test arm64 GetElem/SetElem for integer and FP NEON vector types. That will be a follow-up CL.
Adding @drc...@google.com , @shaoj...@google.com — they'll need to review this work.
Re the v11 register constraint template — in the current version of this CL it should be enough to add entries into `arch.go`; `arm64/ssa.go` already has a `simdV11` function that gets used underneath. Category file contributions for BitwiseLogic, MinMax, Compares etc. in arm64/ops/ format also sound great — just expect some churn underneath until the earlier CLs are reviewed.
On the comparison/mask design — for NEON we should try to stay close to wasm SIMD128 where possible, since wasm comparisons also use full-vector masks (all-ones per lane for true, all-zeros for false) in regular v128 registers, same as NEON. That said, the broader mask abstraction (especially looking ahead to SVE predicates) will still need discussion.

Jonathan Swinney

Thanks for the quick response! I will finish preparing a fix for the v11 issue and submit a CL in the next day or two.
I also have some work to address the concerns that Austin raised. That work is independent so I'll try to get that posted soon too, so we can begin getting things reviewed in parallel.

Alexander Musman

Acknowledged

File src/simd/archsimd/_gen/simdgen/arm64/ops/AddSub/categories.yaml

File-level comment, Patchset 7:

Cherry Mui . resolved

I don't think we want to create an "arm64" directory. This would create asymmetry between amd64 and arm64.
Also, some of the existing YAML files are meant to be portable, used across architectures. The "categories.yaml" are one of them. (They may list operations that do not unify on some architectures, and that is fine.) So we don't want to create arm64/.../categories.yaml files.
go.yaml's are currently arch-specific. We could consider renaming them go_amd64.yaml, and add go_arm64.yaml. Or we could consider adding "goarch: amd64" entries to the current go.yaml, and then we can add arm64 entries to the same file.

Alexander Musman

Done in the next CL 773240 . Thanks!

Open in Gerrit

Related details

Attention is currently required from:

Austin Clements
Cherry Mui
David Chase
Jonathan Swinney
Junyang Shao

Submit Requirements:

Code-Review

No-Unresolved-Comments

Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

David Chase (Gerrit)

unread,

May 4, 2026, 11:46:54 AMMay 4

to Alexander Musman, goph...@pubsubhelper.golang.org, Cherry Mui, Junyang Shao, Jonathan Swinney, Austin Clements, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, Cherry Mui, Jonathan Swinney and Junyang Shao

David Chase voted Commit-Queue+1

Commit-Queue

+1

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
Cherry Mui
Jonathan Swinney
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

May 5, 2026, 1:40:26 PM (14 days ago) May 5

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, Cherry Mui, Jonathan Swinney and Junyang Shao

Alexander Musman uploaded new patchset

Alexander Musman uploaded patch set #10 to this change.

Following approvals got outdated and were removed:

TryBots-Pass: LUCI-TryBot-Result+1 by golang...@luci-project-accounts.iam.gserviceaccount.com

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
Cherry Mui
Jonathan Swinney
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

David Chase (Gerrit)

unread,

May 6, 2026, 10:25:40 AM (13 days ago) May 6

to Alexander Musman, goph...@pubsubhelper.golang.org, golang...@luci-project-accounts.iam.gserviceaccount.com, Cherry Mui, Junyang Shao, Jonathan Swinney, Austin Clements, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, Cherry Mui, Jonathan Swinney and Junyang Shao

David Chase voted Commit-Queue+1

Commit-Queue

+1

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
Cherry Mui
Jonathan Swinney
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

David Chase (Gerrit)

unread,

May 6, 2026, 1:36:34 PM (13 days ago) May 6

to Alexander Musman, goph...@pubsubhelper.golang.org, golang...@luci-project-accounts.iam.gserviceaccount.com, Cherry Mui, Junyang Shao, Jonathan Swinney, Austin Clements, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, Cherry Mui, Jonathan Swinney and Junyang Shao

David Chase voted and added 1 comment

Votes added by David Chase

Code-Review	+2
Commit-Queue	+1

1 comment

Patchset-level comments

File-level comment, Patchset 10 (Latest):

David Chase . resolved

I'll kick the TryBots again, just to see if it works.

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
Cherry Mui
Jonathan Swinney
Junyang Shao

Submit Requirements:

Code-Review

No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

satisfied_requirement

unsatisfied_requirement

open

diffy

David Chase (Gerrit)

unread,

May 8, 2026, 11:16:26 AM (11 days ago) May 8

to Alexander Musman, goph...@pubsubhelper.golang.org, golang...@luci-project-accounts.iam.gserviceaccount.com, Cherry Mui, Junyang Shao, Jonathan Swinney, Austin Clements, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, Cherry Mui, Jonathan Swinney and Junyang Shao

David Chase voted and added 1 comment

Votes added by David Chase

TryBot-Bypass

+1

1 comment

Patchset-level comments

File-level comment, Patchset 10 (Latest):

David Chase . resolved

Bypassing the trybots because that failure is not this.

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
Cherry Mui
Jonathan Swinney
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement

TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

satisfied_requirement

unsatisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

May 8, 2026, 1:14:11 PM (11 days ago) May 8

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, Cherry Mui, David Chase, Jonathan Swinney and Junyang Shao

Alexander Musman uploaded new patchset

Alexander Musman uploaded patch set #11 to this change.

Following approvals got outdated and were removed:

Code-Review: +2 by David Chase
TryBots-Pass: LUCI-TryBot-Result-1 by golang...@luci-project-accounts.iam.gserviceaccount.com, TryBot-Bypass+1 by David Chase

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
Cherry Mui

David Chase
Jonathan Swinney
Junyang Shao

Submit Requirements:

Code-Review

No-Unresolved-Comments
Review-Enforcement

TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

May 8, 2026, 1:17:25 PM (11 days ago) May 8

to goph...@pubsubhelper.golang.org, David Chase, golang...@luci-project-accounts.iam.gserviceaccount.com, Cherry Mui, Junyang Shao, Jonathan Swinney, Austin Clements, golang-co...@googlegroups.com

Attention needed from Austin Clements, Cherry Mui, David Chase, Jonathan Swinney and Junyang Shao

Alexander Musman voted Commit-Queue+1

Commit-Queue

+1

Open in Gerrit

Related details

Attention is currently required from:

Austin Clements
Cherry Mui
David Chase
Jonathan Swinney
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

May 9, 2026, 2:19:10 AM (10 days ago) May 9

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, Cherry Mui, David Chase, Jonathan Swinney and Junyang Shao

Alexander Musman uploaded new patchset

Alexander Musman uploaded patch set #12 to this change.

Following approvals got outdated and were removed:

TryBots-Pass: LUCI-TryBot-Result-1 by golang...@luci-project-accounts.iam.gserviceaccount.com

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman

Austin Clements
Cherry Mui
David Chase
Jonathan Swinney
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

May 9, 2026, 2:56:27 AM (10 days ago) May 9

to goph...@pubsubhelper.golang.org, golang...@luci-project-accounts.iam.gserviceaccount.com, David Chase, Cherry Mui, Junyang Shao, Jonathan Swinney, Austin Clements, golang-co...@googlegroups.com

Attention needed from Austin Clements, Cherry Mui, David Chase, Jonathan Swinney and Junyang Shao

Alexander Musman voted and added 1 comment

Votes added by Alexander Musman

Commit-Queue

+1

1 comment

Patchset-level comments

File-level comment, Patchset 12 (Latest):

Alexander Musman . resolved

I've rebased and resolved conflicts on this CL and the next one, CL 773240 (simd: reorganize simdgen YAML configs by architecture) — both should be ready to merge now (would appreciate a quick look at the conflict resolution). The rest of the chain is in progress; I'll push updates as I work through them.

Open in Gerrit

Related details

Attention is currently required from:

Austin Clements
Cherry Mui
David Chase
Jonathan Swinney
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

Jonathan Swinney (Gerrit)

unread,

May 12, 2026, 12:04:40 PM (7 days ago) May 12

to Alexander Musman, goph...@pubsubhelper.golang.org, golang...@luci-project-accounts.iam.gserviceaccount.com, David Chase, Cherry Mui, Junyang Shao, Austin Clements, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, Cherry Mui, David Chase and Junyang Shao

Jonathan Swinney voted Code-Review+1

Code-Review

+1

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman

Austin Clements
Cherry Mui
David Chase

Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

David Chase (Gerrit)

unread,

May 18, 2026, 1:35:03 PM (19 hours ago) May 18

to Alexander Musman, goph...@pubsubhelper.golang.org, Jonathan Swinney, golang...@luci-project-accounts.iam.gserviceaccount.com, Cherry Mui, Junyang Shao, Austin Clements, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, Cherry Mui and Junyang Shao

David Chase voted and added 1 comment

Votes added by David Chase

Commit-Queue

+1

1 comment

Patchset-level comments

File-level comment, Patchset 13 (Latest):

David Chase . resolved

Thank you for the rebase, I am going to have a shot at getting this all in. I'm a little worried about recent churn in simd for amd64, I guess we'll see.

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
Cherry Mui

Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

David Chase (Gerrit)

unread,

May 18, 2026, 2:17:44 PM (18 hours ago) May 18

to Alexander Musman, goph...@pubsubhelper.golang.org, golang...@luci-project-accounts.iam.gserviceaccount.com, Jonathan Swinney, Cherry Mui, Junyang Shao, Austin Clements, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, Cherry Mui and Junyang Shao

David Chase voted and added 1 comment

Votes added by David Chase

Auto-Submit	+1
Code-Review	+2

1 comment

Patchset-level comments

File-level comment, Patchset 13 (Latest):

David Chase . resolved

LGTM

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
Cherry Mui
Junyang Shao

Submit Requirements:

Code-Review

No-Unresolved-Comments
Review-Enforcement

TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

satisfied_requirement

unsatisfied_requirement

open

diffy

David Chase (Gerrit)

unread,

May 18, 2026, 5:13:13 PM (15 hours ago) May 18

to Alexander Musman, goph...@pubsubhelper.golang.org, golang...@luci-project-accounts.iam.gserviceaccount.com, Jonathan Swinney, Cherry Mui, Junyang Shao, Austin Clements, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, Cherry Mui and Junyang Shao

David Chase added 1 comment

Patchset-level comments

File-level comment, Patchset 13 (Latest):

David Chase . resolved

Sorry about the conflict, it's in gen_simdIntrinsics.go, roughly
`1,$s/Foo/TypeDotMethod/g`

satisfied_requirement

unsatisfied_requirement

open

diffy

Junyang Shao (Gerrit)

unread,

May 18, 2026, 5:16:32 PM (15 hours ago) May 18

to Alexander Musman, goph...@pubsubhelper.golang.org, David Chase, golang...@luci-project-accounts.iam.gserviceaccount.com, Jonathan Swinney, Cherry Mui, Austin Clements, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements and Cherry Mui

Junyang Shao voted Code-Review+1

Code-Review

+1

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
Cherry Mui

Submit Requirements:

Code-Review
No-Unresolved-Comments

Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

satisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

2:25 AM (6 hours ago) 2:25 AM

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, Cherry Mui, David Chase, Jonathan Swinney and Junyang Shao

Alexander Musman uploaded new patchset

Alexander Musman uploaded patch set #14 to this change.

Following approvals got outdated and were removed:

Code-Review: +1 by Junyang Shao, +2 by David Chase, +1 by Jonathan Swinney
TryBots-Pass: LUCI-TryBot-Result+1 by golang...@luci-project-accounts.iam.gserviceaccount.com

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
Cherry Mui

David Chase
Jonathan Swinney
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments

Review-Enforcement

TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

Alexander Musman (Gerrit)

unread,

2:27 AM (6 hours ago) 2:27 AM

to goph...@pubsubhelper.golang.org, Junyang Shao, David Chase, golang...@luci-project-accounts.iam.gserviceaccount.com, Jonathan Swinney, Cherry Mui, Austin Clements, golang-co...@googlegroups.com

Attention needed from Austin Clements, Cherry Mui, David Chase, Jonathan Swinney and Junyang Shao

Alexander Musman voted Commit-Queue+1

Commit-Queue

+1

Open in Gerrit

Related details

Attention is currently required from:

Austin Clements
Cherry Mui
David Chase
Jonathan Swinney
Junyang Shao

Submit Requirements:

Code-Review
No-Unresolved-Comments
Review-Enforcement
TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

unsatisfied_requirement

satisfied_requirement

open

diffy

David Chase (Gerrit)

unread,

7:18 AM (1 hour ago) 7:18 AM

to Alexander Musman, goph...@pubsubhelper.golang.org, golang...@luci-project-accounts.iam.gserviceaccount.com, Junyang Shao, Jonathan Swinney, Cherry Mui, Austin Clements, golang-co...@googlegroups.com

Attention needed from Alexander Musman, Austin Clements, Cherry Mui, Jonathan Swinney and Junyang Shao

David Chase voted Code-Review+2

Code-Review

+2

Open in Gerrit

Related details

Attention is currently required from:

Alexander Musman
Austin Clements
Cherry Mui

Jonathan Swinney
Junyang Shao

Submit Requirements:

Code-Review

No-Unresolved-Comments
Review-Enforcement

TryBots-Pass

Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings.

Gerrit

satisfied_requirement

unsatisfied_requirement

open

diffy

Reply all

Reply to author

Forward

[go/dev.simd] [dev.simd] simd: arm64 neon toy example

Alexander Musman (Gerrit)

Alexander Musman has uploaded the change for review

Commit message

Change diff

Change information

Related details

Austin Clements (Gerrit)

Austin Clements added 1 comment

Related details

Alexander Musman (Gerrit)

Alexander Musman uploaded new patchset

Related details

Alexander Musman (Gerrit)

Alexander Musman added 1 comment

Related details

Alexander Musman (Gerrit)

Alexander Musman uploaded new patchset

Related details

Jonathan Swinney (Gerrit)

Jonathan Swinney added 1 comment

Related details

Alexander Musman (Gerrit)

Alexander Musman added 1 comment

Related details

Jonathan Swinney (Gerrit)

Jonathan Swinney added 1 comment

Related details

Alexander Musman (Gerrit)

Alexander Musman uploaded new patchset

Related details

Alexander Musman (Gerrit)

Alexander Musman uploaded new patchset

Related details

Alexander Musman (Gerrit)

Alexander Musman uploaded new patchset

Related details

Cherry Mui (Gerrit)

Cherry Mui added 2 comments

Related details

Alexander Musman (Gerrit)

Alexander Musman added 2 comments

Related details

David Chase (Gerrit)

David Chase voted Commit-Queue+1

Related details

Alexander Musman (Gerrit)

Alexander Musman uploaded new patchset

Related details

David Chase (Gerrit)

David Chase voted Commit-Queue+1

Related details

David Chase (Gerrit)

David Chase voted and added 1 comment

Votes added by David Chase

1 comment

Related details

David Chase (Gerrit)

David Chase voted and added 1 comment

Votes added by David Chase

1 comment

Related details

Alexander Musman (Gerrit)

Alexander Musman uploaded new patchset

Related details

Alexander Musman (Gerrit)

Alexander Musman voted Commit-Queue+1

Related details

Alexander Musman (Gerrit)

Alexander Musman uploaded new patchset

Related details

Alexander Musman (Gerrit)

Alexander Musman voted and added 1 comment

Votes added by Alexander Musman

1 comment

Related details

Jonathan Swinney (Gerrit)

Jonathan Swinney voted Code-Review+1

Related details

David Chase (Gerrit)