cmd/compile: add new arm64 assembler data structure
These data structure will be used for SVE/SVE2.
Original author Eric Fang (@eric...@arm.com)
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go
index 814dba2..ef6c3d0 100644
--- a/src/cmd/internal/obj/arm64/a.out.go
+++ b/src/cmd/internal/obj/arm64/a.out.go
@@ -39,7 +39,8 @@
NFREG = 32 /* number of floating point registers */
)
-// General purpose registers, kept in the low bits of Prog.Reg.
+// Arm64 registers, the order matters, make sure that each
+// kind of register starts numbering from the lowest bit.
const (
// integer
REG_R0 = obj.RBaseARM64 + iota
@@ -143,6 +144,78 @@
REG_V30
REG_V31
+ // SVE(Scalable Vector Extension) scalable vector registers
+ REG_Z0
+ REG_Z1
+ REG_Z2
+ REG_Z3
+ REG_Z4
+ REG_Z5
+ REG_Z6
+ REG_Z7
+ REG_Z8
+ REG_Z9
+ REG_Z10
+ REG_Z11
+ REG_Z12
+ REG_Z13
+ REG_Z14
+ REG_Z15
+ REG_Z16
+ REG_Z17
+ REG_Z18
+ REG_Z19
+ REG_Z20
+ REG_Z21
+ REG_Z22
+ REG_Z23
+ REG_Z24
+ REG_Z25
+ REG_Z26
+ REG_Z27
+ REG_Z28
+ REG_Z29
+ REG_Z30
+ REG_Z31
+
+ // SVE scalable predicate registers
+ REG_P0
+ REG_P1
+ REG_P2
+ REG_P3
+ REG_P4
+ REG_P5
+ REG_P6
+ REG_P7
+ REG_P8
+ REG_P9
+ REG_P10
+ REG_P11
+ REG_P12
+ REG_P13
+ REG_P14
+ REG_P15
+
+ // SVE scalable predicate registers, with predicate-as-counter encoding.
+ // These are actually P registers, but encoded differently.
+ // In order to distinguish with P registers, define them as PN registers.
+ REG_PN0
+ REG_PN1
+ REG_PN2
+ REG_PN3
+ REG_PN4
+ REG_PN5
+ REG_PN6
+ REG_PN7
+ REG_PN8
+ REG_PN9
+ REG_PN10
+ REG_PN11
+ REG_PN12
+ REG_PN13
+ REG_PN14
+ REG_PN15
+
REG_RSP = REG_V31 + 32 // to differentiate ZR/SP, REG_RSP&0x1f = 31
)
@@ -250,6 +323,24 @@
REG_R29: 29,
REG_R30: 30,
+ // SVE predicate registers
+ REG_P0: 48,
+ REG_P1: 49,
+ REG_P2: 50,
+ REG_P3: 51,
+ REG_P4: 52,
+ REG_P5: 53,
+ REG_P6: 54,
+ REG_P7: 55,
+ REG_P8: 56,
+ REG_P9: 57,
+ REG_P10: 58,
+ REG_P11: 59,
+ REG_P12: 60,
+ REG_P13: 61,
+ REG_P14: 62,
+ REG_P15: 63,
+
// floating point
REG_F0: 64,
REG_F1: 65,
@@ -317,6 +408,40 @@
REG_V29: 93,
REG_V30: 94,
REG_V31: 95,
+
+ // SVE vector registers
+ REG_Z0: 96,
+ REG_Z1: 97,
+ REG_Z2: 98,
+ REG_Z3: 99,
+ REG_Z4: 100,
+ REG_Z5: 101,
+ REG_Z6: 102,
+ REG_Z7: 103,
+ REG_Z8: 104,
+ REG_Z9: 105,
+ REG_Z10: 106,
+ REG_Z11: 107,
+ REG_Z12: 108,
+ REG_Z13: 109,
+ REG_Z14: 110,
+ REG_Z15: 111,
+ REG_Z16: 112,
+ REG_Z17: 113,
+ REG_Z18: 114,
+ REG_Z19: 115,
+ REG_Z20: 116,
+ REG_Z21: 117,
+ REG_Z22: 118,
+ REG_Z23: 119,
+ REG_Z24: 120,
+ REG_Z25: 121,
+ REG_Z26: 122,
+ REG_Z27: 123,
+ REG_Z28: 124,
+ REG_Z29: 125,
+ REG_Z30: 126,
+ REG_Z31: 127,
}
const (
@@ -485,6 +610,46 @@
C_XPOST = 1 << 5 // match arm.C_PBIT, so Prog.String know how to print it
)
+type AClass uint16 // operand type
+
+// The classification table below will eventually replace the classification table above.
+// instTab is sorted based on the order of these constants and the first match is chosen.
+//
+//go:generate stringer -type AClass -trimprefix AC_
+const (
+ AC_NONE AClass = iota
+ AC_REG // general purpose registers R0..R30 and ZR
+ AC_RSP // general purpose registers R0..R30 and RSP
+ AC_FREG // floating point registers, such as F1
+ AC_VREG // vector registers, such as V1
+ AC_ZREG // the scalable vector registers, such as Z1
+ AC_PREG // the scalable predicate registers, such as P1
+ AC_PNREG // the scalable predicate registers, with predicate-as-counter encoding, such as PN1
+ AC_PREGM // Pg/M
+ AC_PREGZ // Pg/Z
+ AC_REGIDX // P8[1]
+ AC_PAIR // register pair, such as (R1, R3)
+ AC_REGSHIFT // general purpose register with shift, such as R1<<2
+ AC_REGEXT // general purpose register with extend, such as R7.SXTW<<1
+ AC_ARNG // vector register with arrangement, such as V11.D2
+ AC_ARNGIDX // vector register with arrangement and index, such as V12.D[1]
+
+ AC_IMM // constants
+
+ AC_REGLIST1 // list of 1 vector register, such as [V1]
+ AC_REGLIST2 // list of 2 vector registers, such as [V1, V2], [Z0, Z8]
+ AC_REGLIST3 // list of 3 vector registers, such as [V1, V2, V3]
+ AC_REGLIST4 // list of 4 vector registers, such as [V1, V2, V3, V4], [Z0, Z4, Z8, Z12]
+ AC_LISTIDX // list with index, such as [V1.B, V2.B][2]
+
+ AC_MEMIMM // address with optional offset, the offset is an immediate, such as 4(R1)
+ AC_MEMIMMEXT // address with optional offset, the offset is an immediate with extension, such as (2*VL)(R1)
+ AC_MEMEXT // address with extend offset, such as (R2)(R5.SXTX<<1)
+ AC_MEMPOSTIMM // address of the post-index class, offset is an immediate
+ AC_MEMPOSTREG // address of the post-index class, offset is a register
+ AC_MEMPREIMM // address of the pre-index class, offset is an immediate
+)
+
//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p arm64
const (
diff --git a/src/cmd/internal/obj/arm64/inst.go b/src/cmd/internal/obj/arm64/inst.go
new file mode 100644
index 0000000..4402595
--- /dev/null
+++ b/src/cmd/internal/obj/arm64/inst.go
@@ -0,0 +1,118 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package arm64
+
+import "cmd/internal/obj"
+
+// inst represents an instruction.
+type Inst struct {
+ GoOp obj.As // Go opcode mnemonic
+ ArmOp A64Type // Arm64 opcode mnemonic
+ Feature uint16 // such as "FEAT_LSE", "FEAT_CSSC"
+ FixedBits uint32 // known bits
+ // unknown bits, key is its name
+ VarBits map[string]VarBits
+ Mask uint32 // mask for disassembly, 1 for known bits, 0 for unknown bits
+ Alias bool // whether it is an alias
+ Args []Operand // operands, in Go order
+}
+
+type VarBits struct {
+ // The low and high bit index in the binary encoding, exclusive on hi
+ lo, hi int
+ encoded bool // if true then its value is already encoded
+ bits uint32
+}
+
+// Operand is the operand type of an instruction.
+type Operand struct {
+ Class AClass // operand class, register, constant, memory operation etc.
+ Elms []ElmType // the elements that this operand includes
+}
+
+// A64Type is the Arm64 opcode type, an Arm64 opcode is prefixed with "A64",
+// a Go opcode is defined with a constant and is prefixed with "A".
+type A64Type uint16
+
+// ElmType is the element type, an element represents a symbol of a specific encoding form,
+// such as <Xn>, #<uimm4>, <T>.
+type ElmType struct {
+ // The natural language encoding text.
+ // e.g. '''
+ // <T> Is the size specifier, encoded in size:
+ //
+ // size <T>
+ // 00 B
+ // 01 H
+ // 10 S
+ // 11 D
+ // '''
+ encodingText string
+ encodingName string // key in VarBits
+ // The AI-implemented encoding func.
+ encodingFunc func(uint32) uint32
+}
+
+func (i *Inst) bin() uint32 {
+ bits := i.FixedBits
+ for _, v := range i.VarBits {
+ if !v.encoded {
+ panic("encoding incomplete")
+ }
+ bits |= v.bits
+ }
+ return bits
+}
+
+type Icmp []Inst
+
+func (x Icmp) Len() int {
+ return len(x)
+}
+
+func (x Icmp) Swap(i, j int) {
+ x[i], x[j] = x[j], x[i]
+}
+
+func (x Icmp) Less(i, j int) bool {
+ p1 := &x[i]
+ p2 := &x[j]
+ if p1.GoOp != p2.GoOp {
+ return p1.GoOp < p2.GoOp
+ }
+ if len(p1.Args) != len(p2.Args) {
+ return len(p1.Args) < len(p2.Args)
+ }
+ for k := 0; k < len(p1.Args); k++ {
+ if p1.Args[k].Class != p2.Args[k].Class {
+ return p1.Args[k].Class < p2.Args[k].Class
+ }
+ }
+ if p1.FixedBits != p2.FixedBits {
+ return p1.FixedBits < p2.FixedBits
+ }
+ if p1.Mask != p2.Mask {
+ return p1.Mask < p2.Mask
+ }
+ return false
+}
+
+// These constants represent Arm a-profile architecture extensions. For details,
+// please refer to the Arm a-profile architecture reference manual.
+// Update this table if new extensions are found when parsing the XML files.
+const (
+ FEAT_NONE uint16 = iota
+ FEAT_SVE2p1
+ // Scalable Vector AES instructions.
+ FEAT_SVE_AES
+ FEAT_SVE_B16B16
+ // Scalable Vector Bit Permutes instructions.
+ FEAT_SVE_BitPerm
+ // Scalable Vector PMULL instructions.
+ FEAT_SVE_PMULL128
+ // Scalable Vector SHA3 instructions.
+ FEAT_SVE_SHA3
+ FEAT_SVE_SM4
+)
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
cmd/compile: add new arm64 assembler data structure
These data structure will be used for SVE/SVE2.
Original author Eric Fang (@eric...@arm.com)
index 0000000..4ccef4a
--- /dev/null
+++ b/src/cmd/internal/obj/arm64/inst.go
@@ -0,0 +1,109 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package arm64
+
+import "cmd/internal/obj"
+
+// inst represents an instruction.
+type Inst struct {
+ GoOp obj.As // Go opcode mnemonic
+ ArmOp A64Type // Arm64 opcode mnemonic
+ Feature uint16 // Such as "FEAT_SVE"
+ FixedBits uint32 // Known bits
+ // Unknown bits, key is its name
+ VarBits map[string]VarBits
+ Mask uint32 // Mask for disassembly, 1 for known bits, 0 for unknown bits
+ Alias bool // Whether it is an alias
+ Args []Operand // Operands, in Go order
+}
+
+type VarBits struct {
+ // The low and high bit index in the binary encoding, exclusive on hi
+ lo, hi int
+ encoded bool // If true then its value is already encoded
+ bits uint32
+}
+
+// Operand is the operand type of an instruction.
+type Operand struct {
+ Class AClass // Operand class, register, constant, memory operation etc.
+ Text string // The text representation of this operand, e.g. <Pg>/Z.
+ // The elements that this operand includes, this only includes the encoding-related parts
+ Elms []ElmType
+}
+
+// A64Type is the Arm64 opcode type, an Arm64 opcode is prefixed with "A64",
+// a Go opcode is defined with a constant and is prefixed with "A".
+type A64Type uint16
+
+// ElmType is the element type, an element represents a symbol of a specific encoding form,
+// such as <Xn>, #<uimm4>, <T>.
+type ElmType struct {
+ encodingName string // The key in VarBits| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Ah, I am sorry I put these CLs in the main branch.
I will cherry-pick them to dev.simd later.
I am not expecting code changes so please proceed with reviews 😊
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
func a64ElmBinFrom(a *obj.Addr, acl AClass, index int) uint32 {
switch acl {The assembly parser will need to follow the assumption of this function when filling in progs.
func a64ElmBinFrom(a *obj.Addr, acl AClass, index int) uint32 {
switch acl {The assembly parser will need to follow the assumption of this function when filling in progs.
I am thinking about just make the SVE a special path in the parser as well.
That will make the implementation much easier 😕.
There might be slightly more codes and increased binary size, but I assume it wouldn't be more than 1k LOC, so should be negligible.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Code-Review | +2 |
// They are represented as a list of pointer to the encoding funcI need to know more about this. Looking at the code that calls these functions, it looks like some operands encode into more than one set of bits, and each function generates some more bits, and they must all be OR'd together. But I don't have a lot of confidence in that interpretation, so I think what ever is really going on (including this, if my guess is right) ought to be mentioned here.
And also, all the encoding functions need to work, otherwise, it's a failed encoding.
// Offset[5:8] = arrangement or predicationThis is like Go slice notation, so 5:8 == {5,6,7}?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// They are represented as a list of pointer to the encoding funcI need to know more about this. Looking at the code that calls these functions, it looks like some operands encode into more than one set of bits, and each function generates some more bits, and they must all be OR'd together. But I don't have a lot of confidence in that interpretation, so I think what ever is really going on (including this, if my guess is right) ought to be mentioned here.
And also, all the encoding functions need to work, otherwise, it's a failed encoding.
Thanks for the review!
What is your worry about this interpretation? I think OR-ing bits together is what the assembler do all the time.
// Offset[5:8] = arrangement or predicationThis is like Go slice notation, so 5:8 == {5,6,7}?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
REG_Z0 = obj.RBaseARM64 + 768 + iota
REG_Z1
REG_Z2Maybe RSP+1 as the range start point
// aclass returns the AClass of an Addr.
// Right now AClass is just stored at the lower 5 bits of the Offset field...
func aclass(a *obj.Addr) AClass {
return AClass(a.Offset & 0x1f)
}Put into any
Original author Eric Fang (@eric...@arm.com)Maybe reference CL number. Also remove the "@" before the email.
// SVE(Scalable Vector Extension) scalable vector registersAdd a space before (.
// We use an offset of 768 to avoid colliding with other registers.It is better for the registers to be consecutive. We could change others, LSL or ARNG. I still don't think they should be registers (but that can be addressed later).
// SVE scalable predicate registers, with predicate-as-counter encoding.
// These are actually P registers, but encoded differently.
// In order to distinguish with P registers, define them as PN registers.The question here is what we want from user assembly code. Do we want user to write Pn, or PNn, for those?
If the former, I think they should not be defined as registers. Instead, we should track which encoding to use internally.
If it is the latter, what is the reason user cares and chooses which one to write?
An example would be good.
}Why not P registers? DWARF doesn't understand them?
// The classification table below will eventually replace the classification table above.It is unclear whether this is the goal. Integer instructions can still use the old path, and there are special logic there, especially handling large constants and offsets. I think that should be evaluated separately.
AC_PREGM // Pg/MUse Go syntax. Have we decided to use "/" syntax? That seems a bit weird.
AC_MEMIMM // address with optional offset, the offset is an immediate, such as 4(Z1.D)"address with optional constant offset"?
Maybe name it AC_MEMOFF?
AC_MEMIMMEXT // address with optional offset, the offset is an immediate with extension, such as (2*VL)(Z1.D)The example doesn't look like "immediate with extension".
AC_MEMEXT // address with extend offset, such as (Z2.D.UXTW<<3)(RSP)"address with register offset with extension"?
AC_VL // VLxi pattern, one of: VLx2, VLx4.Is this multiply? Use `*` to match the assembly syntax.
AC_PREFETCH // Prefetch pattern, such as PLDL1KEEPLimiting it to prefetch seems to special. There are other special symbolic operands (and probably be more in the future). Maybe AC_SPECIAL.
type Inst struct {Most types probably could be unexported.
Alias bool // Whether it is an aliasWhy the assembler needs to know this?
type A64BinarySymbolName uint16The "A64" prefix doesn't seem helpful. This is the arm64 assembler, so everything is A64. Drop them, everywhere.
ElemEncodings []func(uint32) (uint32, A64BinarySymbolName, bool)elemEncoders
Perhaps use "encoder" for encoding functions, everywhere.
var curI int
if p.From.Type != obj.TYPE_NONE {
if curI == i {
return &p.From
}
curI++
}
for j := range p.RestArgs {
if curI == i {
return &p.RestArgs[j].Addr
}
curI++
}
if p.To.Type != obj.TYPE_NONE {
if curI == i {
return &p.To
}
}
This seems a rather inefficient way to iterate over the operands. Depending on how it is used, maybe rewrite to a more efficient way.
(See also the comment in tryEncode below.)
// a64ElmBinFrom returns the binary of the stored elm in a at index, for operand of type aclass.What is "the stored elm"? "Element", or at least "elem", is probably more clear, at least in comment. (For variables, feel free to choose the name.)
func a64ElmBinFrom(a *obj.Addr, acl AClass, index int) uint32 {Maybe "encodeAddr"?
func (i *Inst) tryEncodeA64Inst(p *obj.Prog) (uint32, bool) {Just "tryEncode".
How is a Prog supposed to be converted to an Inst? It looks like this function expects the Inst to already be populated. But it still needs to classify each operand in the prog. That seems confusing. What is the expected workflow here?
In general, I would think we want to either work with only Prog and Addr, or do a translation them work only with Inst and Operand, not both at same time in most code.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
Please move the reviews to CL 748600
Maybe reference CL number. Also remove the "@" before the email.
Done
// SVE(Scalable Vector Extension) scalable vector registersAdd a space before (.
Done
// SVE scalable predicate registers, with predicate-as-counter encoding.
// These are actually P registers, but encoded differently.
// In order to distinguish with P registers, define them as PN registers.The question here is what we want from user assembly code. Do we want user to write Pn, or PNn, for those?
If the former, I think they should not be defined as registers. Instead, we should track which encoding to use internally.
If it is the latter, what is the reason user cares and chooses which one to write?
An example would be good.
Initially I thought Eric kept them separate for deduplication purposes.
But I tried to merge `AC_PREG` and `AC_PNREG` in the generator, no duplication was found. I think it's safe to remove them then. It looks like `PN` reg always comes with different peer operands compared to `P` reg in instructions.
If later I find out that they are needed I will add them back, in that case I think we want to user to write them as `P` or `PN` specifically.
A imaginary example which isn't found by my generator's logic:
```
FANTOMI <Zn>.<T>, <P>.<T>
FANTOMI <Zn>.<T>, <PN>.<T>
```
Without the users specifying them explicitly, they will be indistinguishable.
Why not P registers? DWARF doesn't understand them?
I think P registers are listed above?
// The classification table below will eventually replace the classification table above.It is unclear whether this is the goal. Integer instructions can still use the old path, and there are special logic there, especially handling large constants and offsets. I think that should be evaluated separately.
That's right, I have removed the comment, thanks.
Use Go syntax. Have we decided to use "/" syntax? That seems a bit weird.
Sorry, yes they should be Pg.M and Pg.Z
AC_MEMIMM // address with optional offset, the offset is an immediate, such as 4(Z1.D)"address with optional constant offset"?
Maybe name it AC_MEMOFF?
Done
AC_MEMIMMEXT // address with optional offset, the offset is an immediate with extension, such as (2*VL)(Z1.D)The example doesn't look like "immediate with extension".
`2` is the immediate here.
This is an concrete example in GNU mnemonic:
`[<Xn|SP>{, #<imm>, MUL VL}]`.
`MUL VL` is the hardware vector size extension denotation.
Actually this is the only form of this class, I changed its name to `AC_MEMOFFMULVL`
AC_MEMEXT // address with extend offset, such as (Z2.D.UXTW<<3)(RSP)"address with register offset with extension"?
Done
Is this multiply? Use `*` to match the assembly syntax.
Done
Limiting it to prefetch seems to special. There are other special symbolic operands (and probably be more in the future). Maybe AC_SPECIAL.
Hmm you are right, I also merge it with AC_VL, they are both now AC_SPECIAL
Most types probably could be unexported.
Done
Why the assembler needs to know this?
That probably is for the disassembler, I have removed it.
The "A64" prefix doesn't seem helpful. This is the arm64 assembler, so everything is A64. Drop them, everywhere.
Done
// They are represented as a list of pointer to the encoding funcJunyang ShaoI need to know more about this. Looking at the code that calls these functions, it looks like some operands encode into more than one set of bits, and each function generates some more bits, and they must all be OR'd together. But I don't have a lot of confidence in that interpretation, so I think what ever is really going on (including this, if my guess is right) ought to be mentioned here.
And also, all the encoding functions need to work, otherwise, it's a failed encoding.
Thanks for the review!
What is your worry about this interpretation? I think OR-ing bits together is what the assembler do all the time.
Done
ElemEncodings []func(uint32) (uint32, A64BinarySymbolName, bool)elemEncoders
Perhaps use "encoder" for encoding functions, everywhere.
Done
var curI int
if p.From.Type != obj.TYPE_NONE {
if curI == i {
return &p.From
}
curI++
}
for j := range p.RestArgs {
if curI == i {
return &p.RestArgs[j].Addr
}
curI++
}
if p.To.Type != obj.TYPE_NONE {
if curI == i {
return &p.To
}
}
This seems a rather inefficient way to iterate over the operands. Depending on how it is used, maybe rewrite to a more efficient way.
(See also the comment in tryEncode below.)
Yeah I realize that this is very inefficient, I have ported it to its only callsite, and there shouldn't be O(n^2) iterations now.
// a64ElmBinFrom returns the binary of the stored elm in a at index, for operand of type aclass.What is "the stored elm"? "Element", or at least "elem", is probably more clear, at least in comment. (For variables, feel free to choose the name.)
Chose "element"
func a64ElmBinFrom(a *obj.Addr, acl AClass, index int) uint32 {Junyang ShaoMaybe "encodeAddr"?
Done
func (i *Inst) tryEncodeA64Inst(p *obj.Prog) (uint32, bool) {Just "tryEncode".
How is a Prog supposed to be converted to an Inst? It looks like this function expects the Inst to already be populated. But it still needs to classify each operand in the prog. That seems confusing. What is the expected workflow here?
In general, I would think we want to either work with only Prog and Addr, or do a translation them work only with Inst and Operand, not both at same time in most code.
The Inst (`i`) here is the pre-populated entry in the instruction table (which is generated by the generator, which will be the next CL in the chain), it's just like an optab entry. So this is the logic to match a prog to an Inst, and along the matching, if it's a success then it returns the binary. The reason that this is a `tryEncode` instead of `encode` is documented in the CL description, also in the comment of `elemEncoder`.
I think this layer of Inst -> Prog interop has to be somewhere to lower prog to inst, in asm7 it is scattered around various of switch cases, here they are just processed in a centralized stage which is this function. You can find an example of the workflow in the test I added (CL 747920).
I have made the addr accesses better, thanks.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Apparently some comments are marked as resolved but the code is not updated. Please address them. Thanks.
// SVE scalable predicate registers, with predicate-as-counter encoding.
// These are actually P registers, but encoded differently.
// In order to distinguish with P registers, define them as PN registers.Junyang ShaoThe question here is what we want from user assembly code. Do we want user to write Pn, or PNn, for those?
If the former, I think they should not be defined as registers. Instead, we should track which encoding to use internally.
If it is the latter, what is the reason user cares and chooses which one to write?
An example would be good.
Initially I thought Eric kept them separate for deduplication purposes.
But I tried to merge `AC_PREG` and `AC_PNREG` in the generator, no duplication was found. I think it's safe to remove them then. It looks like `PN` reg always comes with different peer operands compared to `P` reg in instructions.
If later I find out that they are needed I will add them back, in that case I think we want to user to write them as `P` or `PN` specifically.
A imaginary example which isn't found by my generator's logic:
```
FANTOMI <Zn>.<T>, <P>.<T>
FANTOMI <Zn>.<T>, <PN>.<T>
```
Without the users specifying them explicitly, they will be indistinguishable.
The question is whether they need to be distinguishable. Are the two instructions do exactly the same thing? Just different encodings? If they do exactly the same thing we can just choose one encoding. It is okay we never encode the other form.
I think it's safe to remove them then.
Let's try that.
var curI int
if p.From.Type != obj.TYPE_NONE {
if curI == i {
return &p.From
}
curI++
}
for j := range p.RestArgs {
if curI == i {
return &p.RestArgs[j].Addr
}
curI++
}
if p.To.Type != obj.TYPE_NONE {
if curI == i {
return &p.To
}
}
Junyang ShaoThis seems a rather inefficient way to iterate over the operands. Depending on how it is used, maybe rewrite to a more efficient way.
(See also the comment in tryEncode below.)
Yeah I realize that this is very inefficient, I have ported it to its only callsite, and there shouldn't be O(n^2) iterations now.
If you need to iterate over operands, you probably could write it as an iterator. Something like
```
func operands(p *Prog) (...) {
return func(yield func(*Addr) bool) {
if p.From.Type != obj.TYPE_NONE {
if !yield(&p.From) { return }
}
for _, a := range p.RestArgs {
if !yield(&a.Addr) { return }
}
...
}
}
```
func (i *Inst) tryEncodeA64Inst(p *obj.Prog) (uint32, bool) {Junyang ShaoJust "tryEncode".
How is a Prog supposed to be converted to an Inst? It looks like this function expects the Inst to already be populated. But it still needs to classify each operand in the prog. That seems confusing. What is the expected workflow here?
In general, I would think we want to either work with only Prog and Addr, or do a translation them work only with Inst and Operand, not both at same time in most code.
The Inst (`i`) here is the pre-populated entry in the instruction table (which is generated by the generator, which will be the next CL in the chain), it's just like an optab entry. So this is the logic to match a prog to an Inst, and along the matching, if it's a success then it returns the binary. The reason that this is a `tryEncode` instead of `encode` is documented in the CL description, also in the comment of `elemEncoder`.
I think this layer of Inst -> Prog interop has to be somewhere to lower prog to inst, in asm7 it is scattered around various of switch cases, here they are just processed in a centralized stage which is this function. You can find an example of the workflow in the test I added (CL 747920).
I have made the addr accesses better, thanks.
Reading it again, I think what you want to do is that you want to compute the encoder functions of a Prog, and work on the Prog and Addr. The `Inst` is actually an encoder, which doesn't carry data. That is a reasonable approach.
If this is the case, we should make the name clearer. Instead of `Inst` and `Operand`, probably `encoder` and `fieldEncoder` (or `argEncoder`). So the workflow is, when a Prog comes in, we compute its encoder based on its As and operand classes, and use the encoder (which includes a list of argEncoder's) to encode the Addr's.
Either way, I think we need to make the workflow clear. Let's write an example with one instruction. And document in a comment.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Apparently some comments are marked as resolved but the code is not updated. Please address them. Thanks.
Yeah they were addressed in the dev.simd cherrypicks, I haven't sync that back... Will do :D
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Ah, I am sorry I put these CLs in the main branch.
I will cherry-pick them to dev.simd later.
I am not expecting code changes so please proceed with reviews 😊
Done
Please move the reviews to CL 748600
Done
Junyang ShaoApparently some comments are marked as resolved but the code is not updated. Please address them. Thanks.
Yeah they were addressed in the dev.simd cherrypicks, I haven't sync that back... Will do :D
Done
// SVE scalable predicate registers, with predicate-as-counter encoding.
// These are actually P registers, but encoded differently.
// In order to distinguish with P registers, define them as PN registers.Junyang ShaoThe question here is what we want from user assembly code. Do we want user to write Pn, or PNn, for those?
If the former, I think they should not be defined as registers. Instead, we should track which encoding to use internally.
If it is the latter, what is the reason user cares and chooses which one to write?
An example would be good.
Cherry MuiInitially I thought Eric kept them separate for deduplication purposes.
But I tried to merge `AC_PREG` and `AC_PNREG` in the generator, no duplication was found. I think it's safe to remove them then. It looks like `PN` reg always comes with different peer operands compared to `P` reg in instructions.
If later I find out that they are needed I will add them back, in that case I think we want to user to write them as `P` or `PN` specifically.
A imaginary example which isn't found by my generator's logic:
```
FANTOMI <Zn>.<T>, <P>.<T>
FANTOMI <Zn>.<T>, <PN>.<T>
```
Without the users specifying them explicitly, they will be indistinguishable.
The question is whether they need to be distinguishable. Are the two instructions do exactly the same thing? Just different encodings? If they do exactly the same thing we can just choose one encoding. It is okay we never encode the other form.
I think it's safe to remove them then.Let's try that.
Done
var curI int
if p.From.Type != obj.TYPE_NONE {
if curI == i {
return &p.From
}
curI++
}
for j := range p.RestArgs {
if curI == i {
return &p.RestArgs[j].Addr
}
curI++
}
if p.To.Type != obj.TYPE_NONE {
if curI == i {
return &p.To
}
}
Junyang ShaoThis seems a rather inefficient way to iterate over the operands. Depending on how it is used, maybe rewrite to a more efficient way.
(See also the comment in tryEncode below.)
Cherry MuiYeah I realize that this is very inefficient, I have ported it to its only callsite, and there shouldn't be O(n^2) iterations now.
If you need to iterate over operands, you probably could write it as an iterator. Something like
```
func operands(p *Prog) (...) {
return func(yield func(*Addr) bool) {
if p.From.Type != obj.TYPE_NONE {
if !yield(&p.From) { return }
}
for _, a := range p.RestArgs {
if !yield(&a.Addr) { return }
}
...
}
}
```
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
// We use an offset of 768 to avoid colliding with other registers.Junyang ShaoIt is better for the registers to be consecutive. We could change others, LSL or ARNG. I still don't think they should be registers (but that can be addressed later).
Done
Maybe RSP+1 as the range start point
Done
// aclass returns the AClass of an Addr.
// Right now AClass is just stored at the lower 5 bits of the Offset field...
func aclass(a *obj.Addr) AClass {
return AClass(a.Offset & 0x1f)
}Junyang ShaoPut into any
I don't think I should put `aclass` in prog anymore, so closing this comment.
func (i *Inst) tryEncodeA64Inst(p *obj.Prog) (uint32, bool) {Junyang ShaoJust "tryEncode".
How is a Prog supposed to be converted to an Inst? It looks like this function expects the Inst to already be populated. But it still needs to classify each operand in the prog. That seems confusing. What is the expected workflow here?
In general, I would think we want to either work with only Prog and Addr, or do a translation them work only with Inst and Operand, not both at same time in most code.
Cherry MuiThe Inst (`i`) here is the pre-populated entry in the instruction table (which is generated by the generator, which will be the next CL in the chain), it's just like an optab entry. So this is the logic to match a prog to an Inst, and along the matching, if it's a success then it returns the binary. The reason that this is a `tryEncode` instead of `encode` is documented in the CL description, also in the comment of `elemEncoder`.
I think this layer of Inst -> Prog interop has to be somewhere to lower prog to inst, in asm7 it is scattered around various of switch cases, here they are just processed in a centralized stage which is this function. You can find an example of the workflow in the test I added (CL 747920).
I have made the addr accesses better, thanks.
Reading it again, I think what you want to do is that you want to compute the encoder functions of a Prog, and work on the Prog and Addr. The `Inst` is actually an encoder, which doesn't carry data. That is a reasonable approach.
If this is the case, we should make the name clearer. Instead of `Inst` and `Operand`, probably `encoder` and `fieldEncoder` (or `argEncoder`). So the workflow is, when a Prog comes in, we compute its encoder based on its As and operand classes, and use the encoder (which includes a list of argEncoder's) to encode the Addr's.
Either way, I think we need to make the workflow clear. Let's write an example with one instruction. And document in a comment.
Thanks! Working on it.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
func (i *Inst) tryEncodeA64Inst(p *obj.Prog) (uint32, bool) {Junyang ShaoJust "tryEncode".
How is a Prog supposed to be converted to an Inst? It looks like this function expects the Inst to already be populated. But it still needs to classify each operand in the prog. That seems confusing. What is the expected workflow here?
In general, I would think we want to either work with only Prog and Addr, or do a translation them work only with Inst and Operand, not both at same time in most code.
Cherry MuiThe Inst (`i`) here is the pre-populated entry in the instruction table (which is generated by the generator, which will be the next CL in the chain), it's just like an optab entry. So this is the logic to match a prog to an Inst, and along the matching, if it's a success then it returns the binary. The reason that this is a `tryEncode` instead of `encode` is documented in the CL description, also in the comment of `elemEncoder`.
I think this layer of Inst -> Prog interop has to be somewhere to lower prog to inst, in asm7 it is scattered around various of switch cases, here they are just processed in a centralized stage which is this function. You can find an example of the workflow in the test I added (CL 747920).
I have made the addr accesses better, thanks.
Junyang ShaoReading it again, I think what you want to do is that you want to compute the encoder functions of a Prog, and work on the Prog and Addr. The `Inst` is actually an encoder, which doesn't carry data. That is a reasonable approach.
If this is the case, we should make the name clearer. Instead of `Inst` and `Operand`, probably `encoder` and `fieldEncoder` (or `argEncoder`). So the workflow is, when a Prog comes in, we compute its encoder based on its As and operand classes, and use the encoder (which includes a list of argEncoder's) to encode the Addr's.
Either way, I think we need to make the workflow clear. Let's write an example with one instruction. And document in a comment.
Thanks! Working on it.
I have added a e2e test, now the assembler can assemble
`ZADD` (we add the prefix `Z` to all SVE instructions that takes a Z, for deduplication purposes.)
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
for _, op := range i.args {Many Inst entries could share identical `args: []Operand` patterns (e.g. the three-operand `<Zm>.<T>, <Zn>.<T>, <Zd>.<T>` shape will appear many times with the same encoders). Would it be worth having the generator emit shared operand pattern variables? It would reduce table size and also give readable names to the distinct operand signatures, which might make the generated instruction table easier to audit.
addr, ok := next()And taking that one step further — each such shared generated `args:[]Operand` pattern looks essentially like an NFA over (AClass, arrangement) pairs, with the element encoders as output actions on transitions. Constraints like `T=4×Tb` would just mean only certain paths exist (`S→B→B`, `S→H→H`, `H→B→B`, `D→H→H` for SDOT example above in comment) with no accepting path for invalid combinations, rather than needing runtime checks. Could that replace the `encoded` mapping for same BinarySymbolName checks? But that would need more states (not sure how much more) and could be less in line with the XML specs?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
for _, op := range i.args {Many Inst entries could share identical `args: []Operand` patterns (e.g. the three-operand `<Zm>.<T>, <Zn>.<T>, <Zd>.<T>` shape will appear many times with the same encoders). Would it be worth having the generator emit shared operand pattern variables? It would reduce table size and also give readable names to the distinct operand signatures, which might make the generated instruction table easier to audit.
Thanks for the comment, yes we can do this to make the code smaller, I will do it in a later CL.
addr, ok := next()And taking that one step further — each such shared generated `args:[]Operand` pattern looks essentially like an NFA over (AClass, arrangement) pairs, with the element encoders as output actions on transitions. Constraints like `T=4×Tb` would just mean only certain paths exist (`S→B→B`, `S→H→H`, `H→B→B`, `D→H→H` for SDOT example above in comment) with no accepting path for invalid combinations, rather than needing runtime checks. Could that replace the `encoded` mapping for same BinarySymbolName checks? But that would need more states (not sure how much more) and could be less in line with the XML specs?
I am not sure if I understand the logic here... Can you give me some example codes of the design in your mind? Thanks
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
for _, op := range i.args {Junyang ShaoMany Inst entries could share identical `args: []Operand` patterns (e.g. the three-operand `<Zm>.<T>, <Zn>.<T>, <Zd>.<T>` shape will appear many times with the same encoders). Would it be worth having the generator emit shared operand pattern variables? It would reduce table size and also give readable names to the distinct operand signatures, which might make the generated instruction table easier to audit.
Thanks for the comment, yes we can do this to make the code smaller, I will do it in a later CL.
Acknowledged
addr, ok := next()Junyang ShaoAnd taking that one step further — each such shared generated `args:[]Operand` pattern looks essentially like an NFA over (AClass, arrangement) pairs, with the element encoders as output actions on transitions. Constraints like `T=4×Tb` would just mean only certain paths exist (`S→B→B`, `S→H→H`, `H→B→B`, `D→H→H` for SDOT example above in comment) with no accepting path for invalid combinations, rather than needing runtime checks. Could that replace the `encoded` mapping for same BinarySymbolName checks? But that would need more states (not sure how much more) and could be less in line with the XML specs?
I am not sure if I understand the logic here... Can you give me some example codes of the design in your mind? Thanks
Well, for SDOT in comment methions the following variants from three different xml files:
1) `SDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>` such that `T=4×Tb` (i.e. 2 possible variants, either T="S" with Tb="B" (1a) or T="D" with Tb="H"(1b) ).
2) `SDOT <Zda>.H, <Zn>.B, <Zm>.B`
3) `SDOT <Zda>.S, <Zn>.H, <Zm>.H`
That gives 4 possible variants which (if we build NFA from right to left on these cases) would give something like following:
```
{ARNG,B} {ARNG,B} {ARNG,S}
S0 (SDOT) ──────→ S1 ──────→ S3 ──────→ [accept: .S .B .B → enc1a]
│
│{ARNG,H}
└──────→ [accept: .H .B .B → enc2]
{ARNG,H} {ARNG,H} {ARNG,D}
└──────→ S2 ──────→ S4 ──────→ [accept: .D .H .H → enc1b]
│
│{ARNG,S}
└──────→ [accept: .S .H .H → enc3]
```
Here each arrow denotes transition the code would do on argument with corresponding {aclass, arrangement} pair such as `{ARNG,H}`. Each `enc` would be some generated function responsible for encoding whole instruction. That would separate two concerns: "is this a valid operand combination?" (NFA) and "what bits do we emit?" (resulting encoder function). Current state could also help to emit diagnostic in future, e.g. in state `S4` if the next input is not `{ARNG,D}` or `{ARNG,S}`, the assembler could print out something like "expected vector register with `.D` or `.S` arrangement".| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
addr, ok := next()Junyang ShaoAnd taking that one step further — each such shared generated `args:[]Operand` pattern looks essentially like an NFA over (AClass, arrangement) pairs, with the element encoders as output actions on transitions. Constraints like `T=4×Tb` would just mean only certain paths exist (`S→B→B`, `S→H→H`, `H→B→B`, `D→H→H` for SDOT example above in comment) with no accepting path for invalid combinations, rather than needing runtime checks. Could that replace the `encoded` mapping for same BinarySymbolName checks? But that would need more states (not sure how much more) and could be less in line with the XML specs?
Alexander MusmanI am not sure if I understand the logic here... Can you give me some example codes of the design in your mind? Thanks
Well, for SDOT in comment methions the following variants from three different xml files:
1) `SDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>` such that `T=4×Tb` (i.e. 2 possible variants, either T="S" with Tb="B" (1a) or T="D" with Tb="H"(1b) ).
2) `SDOT <Zda>.H, <Zn>.B, <Zm>.B`
3) `SDOT <Zda>.S, <Zn>.H, <Zm>.H`
That gives 4 possible variants which (if we build NFA from right to left on these cases) would give something like following:
```
{ARNG,B} {ARNG,B} {ARNG,S}
S0 (SDOT) ──────→ S1 ──────→ S3 ──────→ [accept: .S .B .B → enc1a]
│
│{ARNG,H}
└──────→ [accept: .H .B .B → enc2]{ARNG,H} {ARNG,H} {ARNG,D}
└──────→ S2 ──────→ S4 ──────→ [accept: .D .H .H → enc1b]
│
│{ARNG,S}
└──────→ [accept: .S .H .H → enc3]
```
Here each arrow denotes transition the code would do on argument with corresponding {aclass, arrangement} pair such as `{ARNG,H}`. Each `enc` would be some generated function responsible for encoding whole instruction. That would separate two concerns: "is this a valid operand combination?" (NFA) and "what bits do we emit?" (resulting encoder function). Current state could also help to emit diagnostic in future, e.g. in state `S4` if the next input is not `{ARNG,D}` or `{ARNG,S}`, the assembler could print out something like "expected vector register with `.D` or `.S` arrangement".
Oh I see what you mean. Thanks for the comprehensive example. 😊
However this `T=4*Tb` logic is not something we can parse mechanically, it is actually specified in the natural language description of assembler symbols. This means we need to integrate the understanding of these natural language specs into the instruction matching phase, which will be scattered around everywhere.
The current design puts all those natural language understanding in `encoding_gen.go` and by checking `encoded` they can do the same thing as this NFA design.
I am leaning towards just keeping the current design.
We can still emit comprehensive enough diagnostics from the current design - for example we can emit all encodings tried ordered by how much partial match they got.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
var AC_ARNG_164_226 = Operand{Probably we shouldn't export them, will fix
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
Probably we shouldn't export them, will fix
Done
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// Pg
ENC_18I think this would be better if it were
// 18
ENC_Pg
and maybe the "// 18" mentions why "18" has any special significance.
Etc for all the others, and into the next CL.
// Check this is a merging predication
func encodeGen8(v uint32) (uint32, BinarySymbolName, bool) {I don't have a great idea for all of these encodeGen functions, but the ones that just check, could be named for what they check, so
```
func isMergePred(...)
```
It's not a complete solution, but everywhere that they appear, someone reading will know that much just from looking at them. If "8" has significance, that can be mentioned in the comment.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// Pg
ENC_18I think this would be better if it were
// 18
ENC_Pgand maybe the "// 18" mentions why "18" has any special significance.
Etc for all the others, and into the next CL.
If we are adopting this pattern, I am not sure why we will still have 18, 18 is just an ID generated by the generator.
Ack, will do in next CL.
// Check this is a merging predication
func encodeGen8(v uint32) (uint32, BinarySymbolName, bool) {I don't have a great idea for all of these encodeGen functions, but the ones that just check, could be named for what they check, so
```
func isMergePred(...)
```
It's not a complete solution, but everywhere that they appear, someone reading will know that much just from looking at them. If "8" has significance, that can be mentioned in the comment.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
// Check this is a merging predication
func encodeGen8(v uint32) (uint32, BinarySymbolName, bool) {Junyang ShaoI don't have a great idea for all of these encodeGen functions, but the ones that just check, could be named for what they check, so
```
func isMergePred(...)
```
It's not a complete solution, but everywhere that they appear, someone reading will know that much just from looking at them. If "8" has significance, that can be mentioned in the comment.
Acked, they will all go to the new CL, thanks
I just realize - this means those magic strings for operands and elements is going to have a "isMergePred" or "M" or "MP", etc.
I am not sure this is a good idea, a uniformed ID number looks easier for the user to locate the encoding functions.
var A64Anames = []string{Again, remove the A64 prefix. Everywhere.
And we should put the names into one slice. You can generate a sveNames slice, append it to Anames.
AZADD obj.As = ALAST + 1 + iotaALAST is the very LAST one, by definition. Don't put anything after ALAST.
If necessary, define new constants, and adjust ALAST.
type BinarySymbolName uint16What is this for? BinarySymbolName is a very confusing term in assembler/linker context. It would usually mean a symbol name in the binary, like an ELF symbol, which is probably not what you mean here.
Add a comment explaining what it is. And probably rename it.
func encodeAddr(a *obj.Addr, acl AClass, index int) uint32 {What is the meaning if the index? Why an Addr has multiple elements? How are they indexed?
// tryEncode tries to encode p to i, it returns the encoded binary and ok signal.This doesn't sound right. i (as an Inst) doesn't seem to carry data, so "encode p to i" is not right. Again, we should rename the type.
func (i *Inst) tryEncodeA64Inst(p *obj.Prog) (uint32, bool) {Junyang ShaoJust "tryEncode".
How is a Prog supposed to be converted to an Inst? It looks like this function expects the Inst to already be populated. But it still needs to classify each operand in the prog. That seems confusing. What is the expected workflow here?
In general, I would think we want to either work with only Prog and Addr, or do a translation them work only with Inst and Operand, not both at same time in most code.
Cherry MuiThe Inst (`i`) here is the pre-populated entry in the instruction table (which is generated by the generator, which will be the next CL in the chain), it's just like an optab entry. So this is the logic to match a prog to an Inst, and along the matching, if it's a success then it returns the binary. The reason that this is a `tryEncode` instead of `encode` is documented in the CL description, also in the comment of `elemEncoder`.
I think this layer of Inst -> Prog interop has to be somewhere to lower prog to inst, in asm7 it is scattered around various of switch cases, here they are just processed in a centralized stage which is this function. You can find an example of the workflow in the test I added (CL 747920).
I have made the addr accesses better, thanks.
Junyang ShaoReading it again, I think what you want to do is that you want to compute the encoder functions of a Prog, and work on the Prog and Addr. The `Inst` is actually an encoder, which doesn't carry data. That is a reasonable approach.
If this is the case, we should make the name clearer. Instead of `Inst` and `Operand`, probably `encoder` and `fieldEncoder` (or `argEncoder`). So the workflow is, when a Prog comes in, we compute its encoder based on its As and operand classes, and use the encoder (which includes a list of argEncoder's) to encode the Addr's.
Either way, I think we need to make the workflow clear. Let's write an example with one instruction. And document in a comment.
Junyang ShaoThanks! Working on it.
I have added a e2e test, now the assembler can assemble
`ZADD` (we add the prefix `Z` to all SVE instructions that takes a Z, for deduplication purposes.)
I don't think this comment is addressed. At least, the confusing naming Inst and Operand are still there.
RBaseARM64 = 8 * 1024 // range [8k, 13k)Is this still right?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
type BinarySymbolName uint16What is this for? BinarySymbolName is a very confusing term in assembler/linker context. It would usually mean a symbol name in the binary, like an ELF symbol, which is probably not what you mean here.
Add a comment explaining what it is. And probably rename it.
It's those symbol names in the xml binary specification.
i.e. Zdn, Zm, Pg
I can rename it `machineCodeSymbolName`.
func encodeAddr(a *obj.Addr, acl AClass, index int) uint32 {What is the meaning if the index? Why an Addr has multiple elements? How are they indexed?
`index` is an index into the element list, you can find it in `inst_gen.go`.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// Check this is a merging predication
func encodeGen8(v uint32) (uint32, BinarySymbolName, bool) {Junyang ShaoI don't have a great idea for all of these encodeGen functions, but the ones that just check, could be named for what they check, so
```
func isMergePred(...)
```
It's not a complete solution, but everywhere that they appear, someone reading will know that much just from looking at them. If "8" has significance, that can be mentioned in the comment.
Junyang ShaoAcked, they will all go to the new CL, thanks
I just realize - this means those magic strings for operands and elements is going to have a "isMergePred" or "M" or "MP", etc.
I am not sure this is a good idea, a uniformed ID number looks easier for the user to locate the encoding functions.
The number is not better. "isMergePred" is no worse than "encodeGen8" in those magic strings, and might be better -- it means it isn't doing any actual encoding, for example.
func encodeGen164(v uint32) (uint32, BinarySymbolName, bool) {Similarly in the renaming, perhaps this would could be "Enc_Zm_5_10".
And above, could be "Enc_Pg_10_13"
func encodeGen226(v uint32) (uint32, BinarySymbolName, bool) {this one's harder, maybe it doesn't get a special name?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
type BinarySymbolName uint16Junyang ShaoWhat is this for? BinarySymbolName is a very confusing term in assembler/linker context. It would usually mean a symbol name in the binary, like an ELF symbol, which is probably not what you mean here.
Add a comment explaining what it is. And probably rename it.
It's those symbol names in the xml binary specification.
i.e. Zdn, Zm, PgI can rename it `machineCodeSymbolName`.
Let's avoid the word "Symbol". If is a confusing name in this context. Maybe "component" or "operandComponent"?
func encodeAddr(a *obj.Addr, acl AClass, index int) uint32 {Junyang ShaoWhat is the meaning if the index? Why an Addr has multiple elements? How are they indexed?
`index` is an index into the element list, you can find it in `inst_gen.go`.
It looks like it is to identify a component of an Addr, like the "Zm" or "T" from a "Zm.T"? Add a comment explain this.
This function doesn't actually encode the Addr, but extract the component. So the function probably should be renamed to addrComponent or so.
This also makes me wonder: should we just "flatten" it? I.e. instead of iterating first on Addr, then on the components of it, just directly iterate on the components. Same for the encoder side: one flattened list of component encoders, instead of two levels ([]Operand, then []elemEncoder).
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
Again, remove the A64 prefix. Everywhere.
And we should put the names into one slice. You can generate a sveNames slice, append it to Anames.
Done
// Check this is a merging predication
func encodeGen8(v uint32) (uint32, BinarySymbolName, bool) {Junyang ShaoI don't have a great idea for all of these encodeGen functions, but the ones that just check, could be named for what they check, so
```
func isMergePred(...)
```
It's not a complete solution, but everywhere that they appear, someone reading will know that much just from looking at them. If "8" has significance, that can be mentioned in the comment.
Junyang ShaoAcked, they will all go to the new CL, thanks
David ChaseI just realize - this means those magic strings for operands and elements is going to have a "isMergePred" or "M" or "MP", etc.
I am not sure this is a good idea, a uniformed ID number looks easier for the user to locate the encoding functions.
The number is not better. "isMergePred" is no worse than "encodeGen8" in those magic strings, and might be better -- it means it isn't doing any actual encoding, for example.
I am not sure why do you think this is better.
Let's see an example here:
```
// ABS <Zd>.<T>, <Pg>/M, <Zn>.<T>
{
goOp: AZABS,
fixedBits: 0x416a000,
args: oc_ARNG_173_226_173_226_PREGZM_141_8_141_8_ARNG_123_226_123_226, // <Zn>.<T>, <Pg>/M, <Zd>.<T>
},
```
In your new naming scheme, it will appear to be:
```
// ABS <Zd>.<T>, <Pg>/M, <Zn>.<T>
{
goOp: AZABS,
fixedBits: 0x416a000,
args: oc_ARNG_Zn_5_10_size_22_24_Zn_5_10_size_22_24_PREGZM_Pg_10_13_MP._Pg_10_13_MP_ARNG_Zd_0_5_size_22_24_Zd_0_5_size_22_24, // <Zn>.<T>, <Pg>/M, <Zd>.<T>
},
```
Here, `size_22_24` means it's a `size` component encoded in the 22 to 24 bits.
However, this has a problem, as size encoding has different semantics.
For example we have these 2 guys:
```
// Is the size specifier,
// size <T>
// 01 H
// 1x D
// bit range mappings:
// size: [22:24)
func encodeGen234(v uint32) (uint32, BinarySymbolName, bool) {
switch v {
case ARNG_H:
return 1 << 22, ENC_57, true
case ARNG_D:
return 3 << 22, ENC_57, true
}
return 0, ENC_57, false
}
// Is the size specifier,
// size <Tb>
// 00 B
// 01 H
// 10 S
// 11 D
// bit range mappings:
// size: [22:24)
func encodeGen235(v uint32) (uint32, BinarySymbolName, bool) {
switch v {
case ARNG_B:
return 0 << 22, ENC_57, true
case ARNG_H:
return 1 << 22, ENC_57, true
case ARNG_S:
return 2 << 22, ENC_57, true
case ARNG_D:
return 3 << 22, ENC_57, true
}
return 0, ENC_57, false
}
```
How are we going to do with it? Split `size_22_24` to `size_22_24_type1` and `size_22_24_type2`? But `type1` and `type2` does not say about the semantics of these encoding functions, how can I find them now? Their IDs are gone, I will need to manually grep `size`, or maybe also grep `[22, 24)`, to find the actual descriptions of this encoding function. Now reading these data becomes a nightmare.
I just don't think this whole logic make sense, right now the user has a truth table (namely `encoding_gen.go`), and they can just grab the IDs from that magic string and go look at that encoding function and they will understand it.
These encoding functions are just irregular like that, let's just name them by IDs and don't create more reading indirections, it might make you reading a specific example easier, but the nature of this spec is not really as nice as what you think of.
func encodeGen164(v uint32) (uint32, BinarySymbolName, bool) {Similarly in the renaming, perhaps this would could be "Enc_Zm_5_10".
And above, could be "Enc_Pg_10_13"
I also don't think this is a good idea, see my other comments aggregating all your naming ideas.
func encodeGen226(v uint32) (uint32, BinarySymbolName, bool) {this one's harder, maybe it doesn't get a special name?
I don't think we should give them special names at all, just give them IDs. See my other long comment for details.
ALAST is the very LAST one, by definition. Don't put anything after ALAST.
If necessary, define new constants, and adjust ALAST.
Done
Junyang ShaoWhat is this for? BinarySymbolName is a very confusing term in assembler/linker context. It would usually mean a symbol name in the binary, like an ELF symbol, which is probably not what you mean here.
Add a comment explaining what it is. And probably rename it.
Cherry MuiIt's those symbol names in the xml binary specification.
i.e. Zdn, Zm, PgI can rename it `machineCodeSymbolName`.
Let's avoid the word "Symbol". If is a confusing name in this context. Maybe "component" or "operandComponent"?
Thanks, that sounds like a better name, and indeed we don't need to export them.
I think we probably don't need to export most of these data types here, so I unexported them altogether.
I still export `AClass` because they might be needed by the assembler parser at some point when I support more addressing mode.
func encodeAddr(a *obj.Addr, acl AClass, index int) uint32 {Junyang ShaoWhat is the meaning if the index? Why an Addr has multiple elements? How are they indexed?
Cherry Mui`index` is an index into the element list, you can find it in `inst_gen.go`.
It looks like it is to identify a component of an Addr, like the "Zm" or "T" from a "Zm.T"? Add a comment explain this.
This function doesn't actually encode the Addr, but extract the component. So the function probably should be renamed to addrComponent or so.
This also makes me wonder: should we just "flatten" it? I.e. instead of iterating first on Addr, then on the components of it, just directly iterate on the components. Same for the encoder side: one flattened list of component encoders, instead of two levels ([]Operand, then []elemEncoder).
I think the switch case comment documented this pretty well, but to make it clearer I also copy-pasted it to the function comment.
I am not sure I understand the flattening idea. What do you mean iterate on components? Before we iterate the `obj.Addr`s, we simply won't know its `AClass`, and we won't know its component layouts. And the assembling pipeline upstream gave us a list of `obj.Addr`s, not a list of components. So we have to iterate `obj.Addr`s first.
And we won't be able to remove the switch case too, because every index has its unique semantics for every `AClass`, so we need a place to check those `{AClass, index}` pairs and it will eventually be a switch case.
// tryEncode tries to encode p to i, it returns the encoded binary and ok signal.This doesn't sound right. i (as an Inst) doesn't seem to carry data, so "encode p to i" is not right. Again, we should rename the type.
May you elaborate more why this isn't right?
I changed the comment, does this look right to you now?
func (i *Inst) tryEncodeA64Inst(p *obj.Prog) (uint32, bool) {Junyang ShaoJust "tryEncode".
How is a Prog supposed to be converted to an Inst? It looks like this function expects the Inst to already be populated. But it still needs to classify each operand in the prog. That seems confusing. What is the expected workflow here?
In general, I would think we want to either work with only Prog and Addr, or do a translation them work only with Inst and Operand, not both at same time in most code.
Cherry MuiThe Inst (`i`) here is the pre-populated entry in the instruction table (which is generated by the generator, which will be the next CL in the chain), it's just like an optab entry. So this is the logic to match a prog to an Inst, and along the matching, if it's a success then it returns the binary. The reason that this is a `tryEncode` instead of `encode` is documented in the CL description, also in the comment of `elemEncoder`.
I think this layer of Inst -> Prog interop has to be somewhere to lower prog to inst, in asm7 it is scattered around various of switch cases, here they are just processed in a centralized stage which is this function. You can find an example of the workflow in the test I added (CL 747920).
I have made the addr accesses better, thanks.
Junyang ShaoReading it again, I think what you want to do is that you want to compute the encoder functions of a Prog, and work on the Prog and Addr. The `Inst` is actually an encoder, which doesn't carry data. That is a reasonable approach.
If this is the case, we should make the name clearer. Instead of `Inst` and `Operand`, probably `encoder` and `fieldEncoder` (or `argEncoder`). So the workflow is, when a Prog comes in, we compute its encoder based on its As and operand classes, and use the encoder (which includes a list of argEncoder's) to encode the Addr's.
Either way, I think we need to make the workflow clear. Let's write an example with one instruction. And document in a comment.
Junyang ShaoThanks! Working on it.
Cherry MuiI have added a e2e test, now the assembler can assemble
`ZADD` (we add the prefix `Z` to all SVE instructions that takes a Z, for deduplication purposes.)
I don't think this comment is addressed. At least, the confusing naming Inst and Operand are still there.
I have renamed `Inst` to `instEncoder`, I keep the `inst` to distinguish with `elemEncoders`.
RBaseARM64 = 8 * 1024 // range [8k, 13k)Junyang ShaoIs this still right?
Thanks for noticing this, updated.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// Check this is a merging predication
func encodeGen8(v uint32) (uint32, BinarySymbolName, bool) {I would replace oc_ARNG_173_226_173_226_PREGZM_141_8_141_8_ARNG_123_226_123_226
with Zn_T__PgM__Zd_T__2.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// Check this is a merging predication
func encodeGen8(v uint32) (uint32, BinarySymbolName, bool) {I still don't understand why that is better, it's now saying nothing more than "I have Z, P, Z registers", which is already in the comment of this magic string.
We can simply replace `oc_ARNG_173_226_173_226_PREGZM_141_8_141_8_ARNG_123_226_123_226` with `Magic1`, and it will be the same thing.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
I have applied your naming schemes.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// TODO: ADDQP // not sure if we care about stray white space in asm testdata, but this is stray whitespace.
// you should subtract obj.RBaseARM64 first. From this difference, bit 11bit 11, or bit 12?
// Special registers, after subtracting obj.RBaseARM64, bit 12 indicates12, or 13?
ALASTI am curious where this went, wondering if it landed in an SVE-related file.
Yes, it did.
// license that can be found in the LICENSE file.Not sure what happened here, caused a bunch of failures.
// ABS <Zd>.<T>, <Pg>/M, <Zn>.<T>If we could reverse this, I think that would be good.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// Check this is a merging predication
func encodeGen8(v uint32) (uint32, BinarySymbolName, bool) {I don't have a great idea for all of these encodeGen functions, but the ones that just check, could be named for what they check, so
```
func isMergePred(...)
```
It's not a complete solution, but everywhere that they appear, someone reading will know that much just from looking at them. If "8" has significance, that can be mentioned in the comment.
Junyang ShaoAcked, they will all go to the new CL, thanks
David ChaseI just realize - this means those magic strings for operands and elements is going to have a "isMergePred" or "M" or "MP", etc.
I am not sure this is a good idea, a uniformed ID number looks easier for the user to locate the encoding functions.
Junyang ShaoThe number is not better. "isMergePred" is no worse than "encodeGen8" in those magic strings, and might be better -- it means it isn't doing any actual encoding, for example.
I am not sure why do you think this is better.
Let's see an example here:
```
// ABS <Zd>.<T>, <Pg>/M, <Zn>.<T>
{
goOp: AZABS,
fixedBits: 0x416a000,
args: oc_ARNG_173_226_173_226_PREGZM_141_8_141_8_ARNG_123_226_123_226, // <Zn>.<T>, <Pg>/M, <Zd>.<T>
},
```
In your new naming scheme, it will appear to be:
```
// ABS <Zd>.<T>, <Pg>/M, <Zn>.<T>
Done
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
not sure if we care about stray white space in asm testdata, but this is stray whitespace.
Done
// you should subtract obj.RBaseARM64 first. From this difference, bit 11bit 11, or bit 12?
Thanks for noticing this, yes it should be bit 12
// Special registers, after subtracting obj.RBaseARM64, bit 12 indicatesJunyang Shao12, or 13?
Done
Not sure what happened here, caused a bunch of failures.
Hmm I didn't recall that I wrote it like that, maybe my antigravity is naughty.
// ABS <Zd>.<T>, <Pg>/M, <Zn>.<T>If we could reverse this, I think that would be good.
This is handy for the users to locate the ARM spec (they are from ARM spec directly), are we sure that's what the user want?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// ABS <Zd>.<T>, <Pg>/M, <Zn>.<T>Junyang ShaoIf we could reverse this, I think that would be good.
This is handy for the users to locate the ARM spec (they are from ARM spec directly), are we sure that's what the user want?
We should use one set of conventions in a package. Otherwise it is just confusing. As this is the Go assembler, we should use Go syntax everywhere.
The Go-GNU syntax translation should be included in doc.go.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
// ABS <Zd>.<T>, <Pg>/M, <Zn>.<T>Junyang ShaoIf we could reverse this, I think that would be good.
Cherry MuiThis is handy for the users to locate the ARM spec (they are from ARM spec directly), are we sure that's what the user want?
We should use one set of conventions in a package. Otherwise it is just confusing. As this is the Go assembler, we should use Go syntax everywhere.
The Go-GNU syntax translation should be included in doc.go.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// tryEncode tries to encode p into the instruction encoding i represents,encode p with i
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
// tryEncode tries to encode p to i, it returns the encoded binary and ok signal.Junyang ShaoThis doesn't sound right. i (as an Inst) doesn't seem to carry data, so "encode p to i" is not right. Again, we should rename the type.
May you elaborate more why this isn't right?
I changed the comment, does this look right to you now?
Done
// tryEncode tries to encode p into the instruction encoding i represents,Junyang Shaoencode p with i
Done
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
REG_ZARNG // Zn.<T>
REG_PARNG // Pn.<T> or Pn/M, Pn/ZAre these masks? Merge mask and zero mask? Maybe they should not be called arrangement (ARNG).
AB = obj.AJMP
ABL = obj.ACALLIt is fine to keep these two here. It has nothing to do with SVE.
ARNG_M
ARNG_ZSame here. If these are not arrangements, give them a different name.
// Check this is a merging predication
func encodeGen8(v uint32) (uint32, BinarySymbolName, bool) {Junyang ShaoI don't have a great idea for all of these encodeGen functions, but the ones that just check, could be named for what they check, so
```
func isMergePred(...)
```
It's not a complete solution, but everywhere that they appear, someone reading will know that much just from looking at them. If "8" has significance, that can be mentioned in the comment.
Junyang ShaoAcked, they will all go to the new CL, thanks
David ChaseI just realize - this means those magic strings for operands and elements is going to have a "isMergePred" or "M" or "MP", etc.
I am not sure this is a good idea, a uniformed ID number looks easier for the user to locate the encoding functions.
Junyang ShaoThe number is not better. "isMergePred" is no worse than "encodeGen8" in those magic strings, and might be better -- it means it isn't doing any actual encoding, for example.
I am not sure why do you think this is better.
Let's see an example here:
```
// ABS <Zd>.<T>, <Pg>/M, <Zn>.<T>
{
goOp: AZABS,
fixedBits: 0x416a000,
args: oc_ARNG_173_226_173_226_PREGZM_141_8_141_8_ARNG_123_226_123_226, // <Zn>.<T>, <Pg>/M, <Zd>.<T>
},
```
In your new naming scheme, it will appear to be:
```
// ABS <Zd>.<T>, <Pg>/M, <Zn>.<T>
Is this resolved? As of PS39, the encoding functions are still named with numbers.
APAND obj.As = AAUTIB1716 + 1 + iotaInstead of using a specific instruction, it is probably better to define something like ASVEStart in a.out.go, to mark the start of SVE instructions. Then it doesn't need to change if we add more non-SVE instructions.
elemEncoders []func(uint32) (uint32, component, bool)Does the "component" result depend on the input of the function? It seems every encode function always returns the same value? If this is the case, it seems a bit weird to write it as a return value. Maybe use a separate field?
// Right now AClass is just stored at the lower 5 bits of the Offset field...Where is it stored?
func encodeAddr(a *obj.Addr, acl AClass, index int) uint32 {Junyang ShaoWhat is the meaning if the index? Why an Addr has multiple elements? How are they indexed?
Cherry Mui`index` is an index into the element list, you can find it in `inst_gen.go`.
Junyang ShaoIt looks like it is to identify a component of an Addr, like the "Zm" or "T" from a "Zm.T"? Add a comment explain this.
This function doesn't actually encode the Addr, but extract the component. So the function probably should be renamed to addrComponent or so.
This also makes me wonder: should we just "flatten" it? I.e. instead of iterating first on Addr, then on the components of it, just directly iterate on the components. Same for the encoder side: one flattened list of component encoders, instead of two levels ([]Operand, then []elemEncoder).
I think the switch case comment documented this pretty well, but to make it clearer I also copy-pasted it to the function comment.
I am not sure I understand the flattening idea. What do you mean iterate on components? Before we iterate the `obj.Addr`s, we simply won't know its `AClass`, and we won't know its component layouts. And the assembling pipeline upstream gave us a list of `obj.Addr`s, not a list of components. So we have to iterate `obj.Addr`s first.
And we won't be able to remove the switch case too, because every index has its unique semantics for every `AClass`, so we need a place to check those `{AClass, index}` pairs and it will eventually be a switch case.
Thanks. The function should probably still be renamed, to addrComponent or so. It doesn't actually encode an Addr.
for _, op := range i.args {Instead of range over i.args, we can flip and range over opsInProg(p). Then we don't need a pull iterator. We can directly index into i.args.
While this code is not performance critical, a coroutine for every instruction is probably still too much.
case ARNG_M:
return "M"
case ARNG_Z:
return "Z"Are these masks? Merge mask and zero mask?
case ARNG_M:
return "M"
case ARNG_Z:
return "Z"Are these masks? Merge mask and zero mask?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
REG_PARNG // Pn.<T> or Pn/M, Pn/ZAre these masks? Merge mask and zero mask? Maybe they should not be called arrangement (ARNG).
`REG_ZARNG` is Z register with arrangement, it's a vector register.
`REG_PARNG` is a combination of P register with arrangement and P register with Masking and Zeroing predications.
I can call it `REG_PARNGZM` to note that they are such a combination.
P with arrangement and P with predication have exactly the same `Prog` layout pattern, that's why I merged them.
AB = obj.AJMP
ABL = obj.ACALLIt is fine to keep these two here. It has nothing to do with SVE.
Done
Same here. If these are not arrangements, give them a different name.
Done
// Check this is a merging predication
func encodeGen8(v uint32) (uint32, BinarySymbolName, bool) {Hope you like the new names.
Instead of using a specific instruction, it is probably better to define something like ASVEStart in a.out.go, to mark the start of SVE instructions. Then it doesn't need to change if we add more non-SVE instructions.
Done
Does the "component" result depend on the input of the function? It seems every encode function always returns the same value? If this is the case, it seems a bit weird to write it as a return value. Maybe use a separate field?
Done
// Right now AClass is just stored at the lower 5 bits of the Offset field...Junyang ShaoWhere is it stored?
Stale comment.
func encodeAddr(a *obj.Addr, acl AClass, index int) uint32 {Junyang ShaoWhat is the meaning if the index? Why an Addr has multiple elements? How are they indexed?
Cherry Mui`index` is an index into the element list, you can find it in `inst_gen.go`.
Junyang ShaoIt looks like it is to identify a component of an Addr, like the "Zm" or "T" from a "Zm.T"? Add a comment explain this.
This function doesn't actually encode the Addr, but extract the component. So the function probably should be renamed to addrComponent or so.
This also makes me wonder: should we just "flatten" it? I.e. instead of iterating first on Addr, then on the components of it, just directly iterate on the components. Same for the encoder side: one flattened list of component encoders, instead of two levels ([]Operand, then []elemEncoder).
Cherry MuiI think the switch case comment documented this pretty well, but to make it clearer I also copy-pasted it to the function comment.
I am not sure I understand the flattening idea. What do you mean iterate on components? Before we iterate the `obj.Addr`s, we simply won't know its `AClass`, and we won't know its component layouts. And the assembling pipeline upstream gave us a list of `obj.Addr`s, not a list of components. So we have to iterate `obj.Addr`s first.
And we won't be able to remove the switch case too, because every index has its unique semantics for every `AClass`, so we need a place to check those `{AClass, index}` pairs and it will eventually be a switch case.
Thanks. The function should probably still be renamed, to addrComponent or so. It doesn't actually encode an Addr.
Done
Instead of range over i.args, we can flip and range over opsInProg(p). Then we don't need a pull iterator. We can directly index into i.args.
While this code is not performance critical, a coroutine for every instruction is probably still too much.
return "M"
case ARNG_Z:
return "Z"Are these masks? Merge mask and zero mask?
Done
case ARNG_M:
return "M"
case ARNG_Z:
return "Z"Are these masks? Merge mask and zero mask?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
if s == "SVESTART" {
sveStartIndex = i
}
if sveStartIndex != -1 {
instructions[s] = arm64.ASVESTART + obj.As(i) - obj.As(sveStartIndex)Why this code needs to know SVESTART? It should still be sequential, right?
case op > arm64.ASVESTART:Probably define an IsARM64SVE helper function.
ZABS Z11.B, P5.M, Z6.B // 66b51604
ZABS Z11.B, P5.Z, Z6.B // 66b50604
ZADCLB Z1.S, Z26.S, Z25.S // 59d30145Minor: would be good to align the comments.
ASVESTART = AAUTIB1716 + 1Define this right after AAUTIB1716, so it doesn't need explicit +1.
var sveOptab = Optab{0, C_GOK, C_GOK, C_GOK, C_GOK, C_GOK, 120, 4, 0, 0, 0}Probably define a case number that is far from others, so we have plenty of room to add instructions if necessary.
// ABS <Zn>.<T>, <Pg>/M, <Zd>.<T>Could we use the Go assembly name, ZABS?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
sveStartIndex = i
}
if sveStartIndex != -1 {
instructions[s] = arm64.ASVESTART + obj.As(i) - obj.As(sveStartIndex)Why this code needs to know SVESTART? It should still be sequential, right?
Yes, that was some stale code I was stumbled on, removed.
Probably define an IsARM64SVE helper function.
Done
ZABS Z11.B, P5.M, Z6.B // 66b51604
ZABS Z11.B, P5.Z, Z6.B // 66b50604
ZADCLB Z1.S, Z26.S, Z25.S // 59d30145Minor: would be good to align the comments.
Done
Define this right after AAUTIB1716, so it doesn't need explicit +1.
Done
var sveOptab = Optab{0, C_GOK, C_GOK, C_GOK, C_GOK, C_GOK, 120, 4, 0, 0, 0}Probably define a case number that is far from others, so we have plenty of room to add instructions if necessary.
I think the largest we can do for `int8` is 127, I set it to 127.
// (tszh :: tszl)Junyang ShaoWhat is this?
It's the component symbol string, handy for the user and myself to map back to the assembly. See one example in this file:
https://developer.arm.com/documentation/ddi0602/2025-12/SVE-Instructions/SRI--Shift-right-and-insert--immediate--?lang=en
If you think they are not necessary, I have removed them.
Could we use the Go assembly name, ZABS?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// (tszh :: tszl)Junyang ShaoWhat is this?
It's the component symbol string, handy for the user and myself to map back to the assembly. See one example in this file:
https://developer.arm.com/documentation/ddi0602/2025-12/SVE-Instructions/SRI--Shift-right-and-insert--immediate--?lang=enIf you think they are not necessary, I have removed them.
I didn't mean to remove all the comments (but it is fine to remove as it is just repeating the constant name). I was wondering what "tszh :: tszl" specifically means. Others are pretty understandable.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
// (tszh :: tszl)Junyang ShaoWhat is this?
Cherry MuiIt's the component symbol string, handy for the user and myself to map back to the assembly. See one example in this file:
https://developer.arm.com/documentation/ddi0602/2025-12/SVE-Instructions/SRI--Shift-right-and-insert--immediate--?lang=enIf you think they are not necessary, I have removed them.
I didn't mean to remove all the comments (but it is fine to remove as it is just repeating the constant name). I was wondering what "tszh :: tszl" specifically means. Others are pretty understandable.
Ack. Since you mentioned it's ok to remove, so I think this comment is resolved.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
I think these should all be easy, some of them are fixed in the generating CL. +1 for now, will do another pass later.
} else if reg <= REG_Z31 && reg >= REG_Z0 {It is a minor thing, but I try to write these in the style `LOW <= x && x <= HIGH` to make it even more obvious that it is one variable (x) in the range LOW <= x <= HIGH (which is what we would write as informal mathematics, if not legal Go).
But, see below, I notice that you are mimicking the existing style. Sigh.
} else if reg <= REG_PN15 && reg >= REG_P0 {Same as above, `REG_P0 <= reg && reg <= REG_PN15`.
} else if reg <= REG_V31 && reg >= REG_V0 {and here I see that you were just matching the unfortunate style of whoever it was who was here earlier. Oh well.
// Check this is a B arrangement
func encodeArngBCheck(v uint32) (uint32, bool) {Change the generated comment to start with the function name so that these are all doc comments, even though not exported:
```
// encodeArngBCheck checks that v is a B arrangement
```
// For the "Byte and halfword" variant: is the size specifier,I think this (and others begining "For the..." below) should read
```
// encodeSzByteHalfword encodes the size specifier for the "Byte and halfword" variant
// Is an arrangement specifier,I think this should at least lead with the name of the function, I am less sure of the best systematic rewrite (since this is auto-generated). But perhaps:
```
// encodeSize16B8H4S2D encodes an arrangement specifier,
```
// Is the name of the destination SIMD&FP register, encoded in the "Vd" field.For comments in this style, "Is the name of ... , encoded in ...", perhaps
```
// encodeVd encodes the name of the destination SIMD&FP register, in the "Vd" field.
```
I am less thrilled about the comma, but looking ahead, if applied mechanically, it probably needs to be there. Possibly the comma insertion could be conditional on a comma in the preceding text.
// Is the name of the destination scalable predicate register PN8-PN15, with predicate-as-counter encoding, encoded in the "PNd" field.Similarly (this one is more complex), trying:
```
// encodePNd encodes the name of the destination scalable predicate register PN8-PN15, with predicate-as-counter encoding, in the "PNd" field.
```
func encodePn592(v uint32) (uint32, bool) {Change the name of encode functions with multiple versions to include a "v<number>" suffix, in this case "encodePn59v2". Use "v" because (1) underscore would make the concatenated names (that include underscores) even more confusing to read and (2) there is no conflict with "v" followed by a number in the other encodings.
ALSO, for the first version of this function (similarly for all others with variants) use a "v1" suffix so that anyone who sees that one will know that there are others, and that, if there is no "v<number>" suffix, that there are no others.
// the first encoding domain entails the rest 2. And at instruction matching phase we simply"other 2" ?
panic(fmt.Sprintf("unknown elm index at %d in AClass %d", index, acl))fmt.Errorf would be better I think (here and elsewhere).
// Some elements are encoded in the same symbol, they need to be equal.I think the words here need an update, one of them is now "component" I think, since the map key is "component". Are components encoded into symbols, or elements encoded into components?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Code-Review | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
It is a minor thing, but I try to write these in the style `LOW <= x && x <= HIGH` to make it even more obvious that it is one variable (x) in the range LOW <= x <= HIGH (which is what we would write as informal mathematics, if not legal Go).
But, see below, I notice that you are mimicking the existing style. Sigh.
Done
Same as above, `REG_P0 <= reg && reg <= REG_PN15`.
Done
// Check this is a B arrangement
func encodeArngBCheck(v uint32) (uint32, bool) {Change the generated comment to start with the function name so that these are all doc comments, even though not exported:
```
// encodeArngBCheck checks that v is a B arrangement
```
Done
// For the "Byte and halfword" variant: is the size specifier,I think this (and others begining "For the..." below) should read
```
// encodeSzByteHalfword encodes the size specifier for the "Byte and halfword" variant
Done
I think this should at least lead with the name of the function, I am less sure of the best systematic rewrite (since this is auto-generated). But perhaps:
```
// encodeSize16B8H4S2D encodes an arrangement specifier,
```
For the sake of not parsing natural language, I just make them uniformly:
```
// {{.Name}} is the implementation of the following encoding logic:
// {{.OriginalComment}}
```
// Is the name of the destination SIMD&FP register, encoded in the "Vd" field.For comments in this style, "Is the name of ... , encoded in ...", perhaps
```
// encodeVd encodes the name of the destination SIMD&FP register, in the "Vd" field.
```I am less thrilled about the comma, but looking ahead, if applied mechanically, it probably needs to be there. Possibly the comma insertion could be conditional on a comma in the preceding text.
Done
// Is the name of the destination scalable predicate register PN8-PN15, with predicate-as-counter encoding, encoded in the "PNd" field.Similarly (this one is more complex), trying:
```
// encodePNd encodes the name of the destination scalable predicate register PN8-PN15, with predicate-as-counter encoding, in the "PNd" field.
```
Done
Change the name of encode functions with multiple versions to include a "v<number>" suffix, in this case "encodePn59v2". Use "v" because (1) underscore would make the concatenated names (that include underscores) even more confusing to read and (2) there is no conflict with "v" followed by a number in the other encodings.
ALSO, for the first version of this function (similarly for all others with variants) use a "v1" suffix so that anyone who sees that one will know that there are others, and that, if there is no "v<number>" suffix, that there are no others.
Done
// the first encoding domain entails the rest 2. And at instruction matching phase we simplyJunyang Shao"other 2" ?
Done
panic(fmt.Sprintf("unknown elm index at %d in AClass %d", index, acl))fmt.Errorf would be better I think (here and elsewhere).
Done
// Some elements are encoded in the same symbol, they need to be equal.I think the words here need an update, one of them is now "component" I think, since the map key is "component". Are components encoded into symbols, or elements encoded into components?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Would be good to have a test for the error case. It should error out, and should not panic.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
Would be good to have a test for the error case. It should error out, and should not panic.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
SETFFR // 00902c25This instruction does not have an error case, because it has no operand, as long as the mnemonic is correct it is correct.