[go] simd, cmd/compile: turns out mul-add is not supported on some WASM

1 view
Skip to first unread message

David Chase (Gerrit)

unread,
Jun 9, 2026, 1:13:22 PM (16 hours ago) Jun 9
to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

David Chase has uploaded the change for review

Commit message

simd, cmd/compile: turns out mul-add is not supported on some WASM

not clear where it is supported, but it's clearly not
on some popular runtimes.
Change-Id: I36f3dee206f974957526219ee6b264e69d3a9b69

Change diff

diff --git a/src/cmd/compile/internal/ssa/_gen/simdWasm.rules b/src/cmd/compile/internal/ssa/_gen/simdWasm.rules
index a315fde..0afcc84 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdWasm.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdWasm.rules
@@ -153,8 +153,6 @@
(MulInt64x2 ...) => (I64x2Mul ...)
(MulUint64x2 ...) => (I64x2Mul ...)
(MulFloat64x2 ...) => (F64x2Mul ...)
-(MulAddFloat32x4 ...) => (F32x4RelaxedMadd ...)
-(MulAddFloat64x2 ...) => (F64x2RelaxedMadd ...)
(MulWidenHiInt8x16 ...) => (I16x8ExtmulHighI8x16S ...)
(MulWidenHiUint8x16 ...) => (I16x8ExtmulHighI8x16U ...)
(MulWidenHiInt16x8 ...) => (I32x4ExtmulHighI16x8S ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/simdWasmops.go b/src/cmd/compile/internal/ssa/_gen/simdWasmops.go
index 28ac2a1..9a1356e 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdWasmops.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdWasmops.go
@@ -14,13 +14,11 @@
{name: "F32x4Pmin", argLength: 2, reg: v21, asm: "F32x4Pmin", commutative: true, typ: "Vec128"},
{name: "F32x4RelaxedMax", argLength: 2, reg: v21, asm: "F32x4RelaxedMax", commutative: true, typ: "Vec128"},
{name: "F32x4RelaxedMin", argLength: 2, reg: v21, asm: "F32x4RelaxedMin", commutative: true, typ: "Vec128"},
- {name: "F32x4RelaxedNmadd", argLength: 3, reg: v31, asm: "F32x4RelaxedNmadd", typ: "Vec128"},
{name: "I64x2AllTrue", argLength: 1, reg: v11, asm: "I64x2AllTrue", typ: "Bool"},
{name: "F64x2Pmax", argLength: 2, reg: v21, asm: "F64x2Pmax", commutative: true, typ: "Vec128"},
{name: "F64x2Pmin", argLength: 2, reg: v21, asm: "F64x2Pmin", commutative: true, typ: "Vec128"},
{name: "F64x2RelaxedMax", argLength: 2, reg: v21, asm: "F64x2RelaxedMax", commutative: true, typ: "Vec128"},
{name: "F64x2RelaxedMin", argLength: 2, reg: v21, asm: "F64x2RelaxedMin", commutative: true, typ: "Vec128"},
- {name: "F64x2RelaxedNmadd", argLength: 3, reg: v31, asm: "F64x2RelaxedNmadd", typ: "Vec128"},
{name: "I8x16Abs", argLength: 1, reg: v11, asm: "I8x16Abs", typ: "Vec128"},
{name: "I16x8Abs", argLength: 1, reg: v11, asm: "I16x8Abs", typ: "Vec128"},
{name: "I32x4Abs", argLength: 1, reg: v11, asm: "I32x4Abs", typ: "Vec128"},
@@ -144,8 +142,6 @@
{name: "F32x4Mul", argLength: 2, reg: v21, asm: "F32x4Mul", commutative: true, typ: "Vec128"},
{name: "I64x2Mul", argLength: 2, reg: v21, asm: "I64x2Mul", commutative: true, typ: "Vec128"},
{name: "F64x2Mul", argLength: 2, reg: v21, asm: "F64x2Mul", commutative: true, typ: "Vec128"},
- {name: "F32x4RelaxedMadd", argLength: 3, reg: v31, asm: "F32x4RelaxedMadd", typ: "Vec128"},
- {name: "F64x2RelaxedMadd", argLength: 3, reg: v31, asm: "F64x2RelaxedMadd", typ: "Vec128"},
{name: "I16x8ExtmulHighI8x16S", argLength: 2, reg: v21, asm: "I16x8ExtmulHighI8x16S", commutative: true, typ: "Vec128"},
{name: "I16x8ExtmulHighI8x16U", argLength: 2, reg: v21, asm: "I16x8ExtmulHighI8x16U", commutative: true, typ: "Vec128"},
{name: "I32x4ExtmulHighI16x8S", argLength: 2, reg: v21, asm: "I32x4ExtmulHighI16x8S", commutative: true, typ: "Vec128"},
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index b0791e5..829d6b4 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -712,10 +712,10 @@
{name: "MulAddEvenSubOddFloat64x2", argLength: 3}, // ARCH:amd64
{name: "MulAddEvenSubOddFloat64x4", argLength: 3}, // ARCH:amd64
{name: "MulAddEvenSubOddFloat64x8", argLength: 3}, // ARCH:amd64
- {name: "MulAddFloat32x4", argLength: 3}, // ARCH:amd64,arm64,wasm
+ {name: "MulAddFloat32x4", argLength: 3}, // ARCH:amd64,arm64
{name: "MulAddFloat32x8", argLength: 3}, // ARCH:amd64
{name: "MulAddFloat32x16", argLength: 3}, // ARCH:amd64
- {name: "MulAddFloat64x2", argLength: 3}, // ARCH:amd64,arm64,wasm
+ {name: "MulAddFloat64x2", argLength: 3}, // ARCH:amd64,arm64
{name: "MulAddFloat64x4", argLength: 3}, // ARCH:amd64
{name: "MulAddFloat64x8", argLength: 3}, // ARCH:amd64
{name: "MulAddInt8x16", argLength: 3}, // ARCH:arm64
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 6a0fb4f..f79a83c 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -6567,13 +6567,11 @@
OpWasmF32x4Pmin
OpWasmF32x4RelaxedMax
OpWasmF32x4RelaxedMin
- OpWasmF32x4RelaxedNmadd
OpWasmI64x2AllTrue
OpWasmF64x2Pmax
OpWasmF64x2Pmin
OpWasmF64x2RelaxedMax
OpWasmF64x2RelaxedMin
- OpWasmF64x2RelaxedNmadd
OpWasmI8x16Abs
OpWasmI16x8Abs
OpWasmI32x4Abs
@@ -6697,8 +6695,6 @@
OpWasmF32x4Mul
OpWasmI64x2Mul
OpWasmF64x2Mul
- OpWasmF32x4RelaxedMadd
- OpWasmF64x2RelaxedMadd
OpWasmI16x8ExtmulHighI8x16S
OpWasmI16x8ExtmulHighI8x16U
OpWasmI32x4ExtmulHighI16x8S
@@ -101989,21 +101985,6 @@
},
},
{
- name: "F32x4RelaxedNmadd",
- argLen: 3,
- asm: wasm.AF32x4RelaxedNmadd,
- reg: regInfo{
- inputs: []inputInfo{
- {0, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- {1, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- {2, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- },
- outputs: []outputInfo{
- {0, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- },
- },
- },
- {
name: "I64x2AllTrue",
argLen: 1,
asm: wasm.AI64x2AllTrue,
@@ -102077,21 +102058,6 @@
},
},
{
- name: "F64x2RelaxedNmadd",
- argLen: 3,
- asm: wasm.AF64x2RelaxedNmadd,
- reg: regInfo{
- inputs: []inputInfo{
- {0, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- {1, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- {2, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- },
- outputs: []outputInfo{
- {0, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- },
- },
- },
- {
name: "I8x16Abs",
argLen: 1,
asm: wasm.AI8x16Abs,
@@ -103821,36 +103787,6 @@
},
},
{
- name: "F32x4RelaxedMadd",
- argLen: 3,
- asm: wasm.AF32x4RelaxedMadd,
- reg: regInfo{
- inputs: []inputInfo{
- {0, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- {1, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- {2, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- },
- outputs: []outputInfo{
- {0, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- },
- },
- },
- {
- name: "F64x2RelaxedMadd",
- argLen: 3,
- asm: wasm.AF64x2RelaxedMadd,
- reg: regInfo{
- inputs: []inputInfo{
- {0, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- {1, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- {2, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- },
- outputs: []outputInfo{
- {0, regMask{v1: 18446462598732840960, v2: 0}}, // V0 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15
- },
- },
- },
- {
name: "I16x8ExtmulHighI8x16S",
argLen: 2,
commutative: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteWasm.go b/src/cmd/compile/internal/ssa/rewriteWasm.go
index 3011859..eb46c55 100644
--- a/src/cmd/compile/internal/ssa/rewriteWasm.go
+++ b/src/cmd/compile/internal/ssa/rewriteWasm.go
@@ -806,12 +806,6 @@
case OpMul8:
v.Op = OpWasmI64Mul
return true
- case OpMulAddFloat32x4:
- v.Op = OpWasmF32x4RelaxedMadd
- return true
- case OpMulAddFloat64x2:
- v.Op = OpWasmF64x2RelaxedMadd
- return true
case OpMulFloat32x4:
v.Op = OpWasmF32x4Mul
return true
diff --git a/src/cmd/compile/internal/ssagen/simdWasmintrinsics.go b/src/cmd/compile/internal/ssagen/simdWasmintrinsics.go
index 622f922..f50d947 100644
--- a/src/cmd/compile/internal/ssagen/simdWasmintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdWasmintrinsics.go
@@ -229,8 +229,6 @@
addWasmSIMD("simd/archsimd", "Int64x2.Mul", makeSimdOp2(ssa.OpMulInt64x2))
addWasmSIMD("simd/archsimd", "Uint64x2.Mul", makeSimdOp2(ssa.OpMulUint64x2))
addWasmSIMD("simd/archsimd", "Float64x2.Mul", makeSimdOp2(ssa.OpMulFloat64x2))
- addWasmSIMD("simd/archsimd", "Float32x4.MulAdd", makeSimdOp3(ssa.OpMulAddFloat32x4))
- addWasmSIMD("simd/archsimd", "Float64x2.MulAdd", makeSimdOp3(ssa.OpMulAddFloat64x2))
addWasmSIMD("simd/archsimd", "Int8x16.MulWidenHi", makeSimdOp2(ssa.OpMulWidenHiInt8x16))
addWasmSIMD("simd/archsimd", "Uint8x16.MulWidenHi", makeSimdOp2(ssa.OpMulWidenHiUint8x16))
addWasmSIMD("simd/archsimd", "Int16x8.MulWidenHi", makeSimdOp2(ssa.OpMulWidenHiInt16x8))
diff --git a/src/cmd/compile/internal/wasm/simdssa.go b/src/cmd/compile/internal/wasm/simdssa.go
index cbbcf32..2d28657 100644
--- a/src/cmd/compile/internal/wasm/simdssa.go
+++ b/src/cmd/compile/internal/wasm/simdssa.go
@@ -119,8 +119,7 @@
getValue128(s, v.Args[0])
getValue32(s, v.Args[1])
s.Prog(v.Op.Asm())
- case ssa.OpWasmF32x4RelaxedNmadd, ssa.OpWasmF64x2RelaxedNmadd, ssa.OpWasmV128Bitselect,
- ssa.OpWasmF32x4RelaxedMadd, ssa.OpWasmF64x2RelaxedMadd:
+ case ssa.OpWasmV128Bitselect:
getValue128(s, v.Args[0])
getValue128(s, v.Args[1])
getValue128(s, v.Args[2])
diff --git a/src/simd/archsimd/_gen/wasmgen/main.go b/src/simd/archsimd/_gen/wasmgen/main.go
index 4b3bdea..90c783c 100644
--- a/src/simd/archsimd/_gen/wasmgen/main.go
+++ b/src/simd/archsimd/_gen/wasmgen/main.go
@@ -741,7 +741,7 @@
"ge_u": "GreaterEqual",
"lt_u": "Less",
"gt_u": "Greater",
- "relaxed_madd": "MulAdd",
+ "relaxed_madd": "-",
"shl": "ShiftAllLeft",

"extract_lane": "GetElem",
@@ -887,7 +887,7 @@

addWasmOps(floats, f_2, 2, binShape)

- addWasmOps(floats, f_3, 3, nil)
+ // addWasmOps(floats, f_3, 3, nil) // relaxed_madd does not work
// addWasmOps(ints, i_3, 3, nil)
addWasmOps(ints, i_t, 1, isTest)

diff --git a/src/simd/archsimd/ops_emulated_wasm.go b/src/simd/archsimd/ops_emulated_wasm.go
index 46180f1..b8f0552 100644
--- a/src/simd/archsimd/ops_emulated_wasm.go
+++ b/src/simd/archsimd/ops_emulated_wasm.go
@@ -163,6 +163,16 @@
return x.BitsToInt64().OnesCount().ToBits()
}

+// MulAdd returns elementwise x * y + z.
+func (x Float32x4) MulAdd(y Float32x4, z Float32x4) Float32x4 {
+ return x.Mul(y).Add(z)
+}
+
+// MulAdd returns elementwise x * y + z.
+func (x Float64x2) MulAdd(y Float64x2, z Float64x2) Float64x2 {
+ return x.Mul(y).Add(z)
+}
+
// CarrylessMultiplyEven computes the carryless
// multiplications of selected even halves of the elements of x and y.
//
diff --git a/src/simd/archsimd/ops_wasm.go b/src/simd/archsimd/ops_wasm.go
index 2839764..962c648 100644
--- a/src/simd/archsimd/ops_wasm.go
+++ b/src/simd/archsimd/ops_wasm.go
@@ -805,16 +805,6 @@
// Asm: F64x2Mul
func (x Float64x2) Mul(y Float64x2) Float64x2

-// MulAdd returns the elementwise multiply-add of x, y, and z.
-//
-// Asm: F32x4RelaxedMadd
-func (x Float32x4) MulAdd(y Float32x4, z Float32x4) Float32x4
-
-// MulAdd returns the elementwise multiply-add of x, y, and z.
-//
-// Asm: F64x2RelaxedMadd
-func (x Float64x2) MulAdd(y Float64x2, z Float64x2) Float64x2
-
// MulWidenHi returns the doubled-width product of respective elements of the upper halves of x and y.
//
// Result[i] = x[i+8] * y[i+8], for 0 <= i < 8 == |x|/2.

Change information

Files:
  • M src/cmd/compile/internal/ssa/_gen/simdWasm.rules
  • M src/cmd/compile/internal/ssa/_gen/simdWasmops.go
  • M src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
  • M src/cmd/compile/internal/ssa/opGen.go
  • M src/cmd/compile/internal/ssa/rewriteWasm.go
  • M src/cmd/compile/internal/ssagen/simdWasmintrinsics.go
  • M src/cmd/compile/internal/wasm/simdssa.go
  • M src/simd/archsimd/_gen/wasmgen/main.go
  • M src/simd/archsimd/ops_emulated_wasm.go
  • M src/simd/archsimd/ops_wasm.go
Change size: M
Delta: 10 files changed, 15 insertions(+), 94 deletions(-)
Open in Gerrit

Related details

Attention set is empty
Submit Requirements:
  • requirement is not satisfiedCode-Review
  • requirement satisfiedNo-Unresolved-Comments
  • requirement is not satisfiedReview-Enforcement
  • requirement is not satisfiedTryBots-Pass
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
Gerrit-MessageType: newchange
Gerrit-Project: go
Gerrit-Branch: master
Gerrit-Change-Id: I36f3dee206f974957526219ee6b264e69d3a9b69
Gerrit-Change-Number: 788841
Gerrit-PatchSet: 1
Gerrit-Owner: David Chase <drc...@google.com>
Gerrit-Reviewer: David Chase <drc...@google.com>
unsatisfied_requirement
satisfied_requirement
open
diffy

David Chase (Gerrit)

unread,
Jun 9, 2026, 5:23:11 PM (12 hours ago) Jun 9
to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com
Attention needed from David Chase, Keith Randall, Martin Möhrmann and Matthew Dempsky

David Chase uploaded new patchset

David Chase uploaded patch set #2 to this change.
Following approvals got outdated and were removed:
Open in Gerrit

Related details

Attention is currently required from:
  • David Chase
  • Keith Randall
  • Martin Möhrmann
  • Matthew Dempsky
Submit Requirements:
  • requirement is not satisfiedCode-Review
  • requirement satisfiedNo-Unresolved-Comments
  • requirement is not satisfiedReview-Enforcement
  • requirement is not satisfiedTryBots-Pass
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
Gerrit-MessageType: newpatchset
Gerrit-Project: go
Gerrit-Branch: master
Gerrit-Change-Id: I36f3dee206f974957526219ee6b264e69d3a9b69
Gerrit-Change-Number: 788841
Gerrit-PatchSet: 2
Gerrit-Owner: David Chase <drc...@google.com>
Gerrit-Reviewer: David Chase <drc...@google.com>
Gerrit-Reviewer: Keith Randall <k...@golang.org>
Gerrit-Reviewer: Martin Möhrmann <moeh...@google.com>
Gerrit-Reviewer: Matthew Dempsky <mat...@go.dev>
Gerrit-CC: Gopher Robot <go...@golang.org>
Gerrit-Attention: Keith Randall <k...@golang.org>
Gerrit-Attention: David Chase <drc...@google.com>
Gerrit-Attention: Martin Möhrmann <moeh...@google.com>
Gerrit-Attention: Matthew Dempsky <mat...@go.dev>
unsatisfied_requirement
satisfied_requirement
open
diffy
Reply all
Reply to author
Forward
0 new messages