diff --git a/src/cmd/asm/internal/asm/testdata/arm64enc.s b/src/cmd/asm/internal/asm/testdata/arm64enc.s
index b74ddfe..32edaa2 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64enc.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64enc.s
@@ -576,11 +576,11 @@
VFMINP V10.S2, F20 // 54f9b07e
VFMINP V1.D2, V10.D2, V3.D2 // 43f5e16e
VFMINV V11.S4, F9 // 69f9b06e
- //TODO VFMLA V6.S[0], F2, F14 // 4e10865f
- //TODO VFMLA V28.S[2], V2.S2, V30.S2 // 5e189c0f
+ VFMLA V6.S[0], F2, F14 // 4e10865f
+ VFMLA V28.S[2], V2.S2, V30.S2 // 5e189c0f
VFMLA V29.S2, V20.S2, V14.S2 // 8ece3d0e
- //TODO VFMLS V24.D[1], F3, F17 // 7158d85f
- //TODO VFMLS V10.S[0], V11.S2, V10.S2 // 6a518a0f
+ VFMLS V24.D[1], F3, F17 // 7158d85f
+ VFMLS V10.S[0], V11.S2, V10.S2 // 6a518a0f
VFMLS V29.S2, V27.S2, V17.S2 // 71cfbd0e
//TODO FMOVS $(-1.625), F13 // 0d503f1e
//TODO FMOVD $12.5, F30 // 1e30651e
@@ -594,8 +594,8 @@
//TODO VFMOV $3.125, V8.D2 // 28f5006f
FMSUBS F13, F21, F13, F19 // b3d50d1f
FMSUBD F11, F7, F15, F31 // ff9d4b1f
- //TODO VFMUL V9.S[2], F21, F19 // b39a895f
- //TODO VFMUL V26.S[2], V26.S2, V2.S2 // 429b9a0f
+ VFMUL V9.S[2], F21, F19 // b39a895f
+ VFMUL V26.S[2], V26.S2, V2.S2 // 429b9a0f
VFMUL V21.D2, V17.D2, V25.D2 // 39de756e
FMULS F0, F6, F24 // d808201e
FMULD F5, F29, F9 // a90b651e
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index e1bd8e1..1825361 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -396,6 +396,12 @@
{AVFADDP, C_ARNG, C_ARNG, C_NONE, C_ARNG, C_NONE, 72, 4, 0, 0, 0},
{AVFADDP, C_ARNG, C_NONE, C_NONE, C_FREG, C_NONE, 110, 4, 0, 0, 0},
+ /* indexed element forms: vfmla/vfmls/vfmul Vm.<T>[index], Rn, Rd
+ Use AVFMLA as the root opcode so buildop includes these in the AVFMLA oprange,
+ which is shared with AVFMLS and AVFMUL via oprangeset. */
+ {AVFMLA, C_ELEM, C_FREG, C_NONE, C_FREG, C_NONE, 111, 4, 0, 0, 0},
+ {AVFMLA, C_ELEM, C_ARNG, C_NONE, C_ARNG, C_NONE, 111, 4, 0, 0, 0},
+
/* logical operations */
{AAND, C_ZREG, C_ZREG, C_NONE, C_ZREG, C_NONE, 1, 4, 0, 0, 0},
{AAND, C_ZREG, C_NONE, C_NONE, C_ZREG, C_NONE, 1, 4, 0, 0, 0},
@@ -6077,17 +6083,16 @@
switch p.As {
case AVFADDP:
opcode = 0x0d
- case AVFMAXP, AVFMAXNMP:
+ case AVFMAXP:
opcode = 0x0f
- if p.As == AVFMAXNMP {
- opcode = 0x0c
- }
- case AVFMINP, AVFMINNMP:
+ case AVFMAXNMP:
+ opcode = 0x0c
+ case AVFMINP:
sz |= 2 // set sz[1] for min variants
opcode = 0x0f
- if p.As == AVFMINNMP {
- opcode = 0x0c
- }
+ case AVFMINNMP:
+ sz |= 2 // set sz[1] for min variants
+ opcode = 0x0c
default:
c.ctxt.Diag("unsupported op %v\n", p.As)
}
@@ -6095,6 +6100,65 @@
rd := uint32(p.To.Reg & 31)
o1 = 0x7e300800 | sz<<22 | opcode<<12 | rn<<5 | rd
+ case 111: /* indexed element: vfmla/vfmls/vfmul Vm.<T>[index], Rn, Rd */
+ // AdvSIMD scalar x indexed element (C_FREG dest):
+ // 0 1 0 1 1 1 1 1 1 sz L M Rm opcode H 0 Rn Rd (base: 0x5f800000)
+ // AdvSIMD vector x indexed element (C_ARNG dest):
+ // 0 Q 0 0 1 1 1 1 1 sz L M Rm opcode H 0 Rn Rd (base: 0x0f800000)
+ // For .S type: sz=0, L=index[0], M=Vm[4], Rm[3:0]=Vm[3:0], H=index[1]
+ // For .D type: sz=1, L=0, M=Vm[4], Rm[3:0]=Vm[3:0], H=index
+ af := int((p.From.Reg >> 5) & 15) // arrangement of C_ELEM (ARNG_S or ARNG_D)
+ index := int(p.From.Index)
+ rm := int(p.From.Reg & 31)
+ rn := uint32(p.Reg & 31)
+ rd := uint32(p.To.Reg & 31)
+ var sz, H, L, M uint32
+ switch af {
+ case ARNG_S:
+ sz = 0
+ H = uint32(index>>1) & 1
+ L = uint32(index) & 1
+ M = uint32(rm>>4) & 1
+ case ARNG_D:
+ sz = 1
+ H = uint32(index) & 1
+ L = 0
+ M = uint32(rm>>4) & 1
+ default:
+ c.ctxt.Diag("invalid arrangement: %v\n", p)
+ }
+ var opcode uint32
+ switch p.As {
+ case AVFMLA:
+ opcode = 0x1
+ case AVFMLS:
+ opcode = 0x5
+ case AVFMUL:
+ opcode = 0x9
+ default:
+ c.ctxt.Diag("unsupported op %v\n", p.As)
+ }
+ rmLow := uint32(rm) & 0xf
+ if p.To.Reg >= REG_F0 && p.To.Reg <= REG_F31 {
+ // scalar indexed element
+ o1 = 0x5f800000 | sz<<22 | L<<21 | M<<20 | rmLow<<16 | opcode<<12 | H<<11 | rn<<5 | rd
+ } else {
+ // vector indexed element: Q from destination arrangement
+ at := int((p.To.Reg >> 5) & 15)
+ var Q uint32
+ switch at {
+ case ARNG_2S:
+ Q = 0
+ case ARNG_4S:
+ Q = 1
+ case ARNG_2D:
+ Q = 1
+ default:
+ c.ctxt.Diag("invalid arrangement: %v\n", p)
+ }
+ o1 = 0x0f800000 | Q<<30 | sz<<22 | L<<21 | M<<20 | rmLow<<16 | opcode<<12 | H<<11 | rn<<5 | rd
+ }
+
case 127:
// Generic SVE instruction encoding
matched := false