cmd/compile: add new arm64 assembler data structure
These data structure will be used for SVE/SVE2.
Original author Eric Fang (@eric...@arm.com)
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go
index 814dba2..ef6c3d0 100644
--- a/src/cmd/internal/obj/arm64/a.out.go
+++ b/src/cmd/internal/obj/arm64/a.out.go
@@ -39,7 +39,8 @@
NFREG = 32 /* number of floating point registers */
)
-// General purpose registers, kept in the low bits of Prog.Reg.
+// Arm64 registers, the order matters, make sure that each
+// kind of register starts numbering from the lowest bit.
const (
// integer
REG_R0 = obj.RBaseARM64 + iota
@@ -143,6 +144,78 @@
REG_V30
REG_V31
+ // SVE(Scalable Vector Extension) scalable vector registers
+ REG_Z0
+ REG_Z1
+ REG_Z2
+ REG_Z3
+ REG_Z4
+ REG_Z5
+ REG_Z6
+ REG_Z7
+ REG_Z8
+ REG_Z9
+ REG_Z10
+ REG_Z11
+ REG_Z12
+ REG_Z13
+ REG_Z14
+ REG_Z15
+ REG_Z16
+ REG_Z17
+ REG_Z18
+ REG_Z19
+ REG_Z20
+ REG_Z21
+ REG_Z22
+ REG_Z23
+ REG_Z24
+ REG_Z25
+ REG_Z26
+ REG_Z27
+ REG_Z28
+ REG_Z29
+ REG_Z30
+ REG_Z31
+
+ // SVE scalable predicate registers
+ REG_P0
+ REG_P1
+ REG_P2
+ REG_P3
+ REG_P4
+ REG_P5
+ REG_P6
+ REG_P7
+ REG_P8
+ REG_P9
+ REG_P10
+ REG_P11
+ REG_P12
+ REG_P13
+ REG_P14
+ REG_P15
+
+ // SVE scalable predicate registers, with predicate-as-counter encoding.
+ // These are actually P registers, but encoded differently.
+ // In order to distinguish with P registers, define them as PN registers.
+ REG_PN0
+ REG_PN1
+ REG_PN2
+ REG_PN3
+ REG_PN4
+ REG_PN5
+ REG_PN6
+ REG_PN7
+ REG_PN8
+ REG_PN9
+ REG_PN10
+ REG_PN11
+ REG_PN12
+ REG_PN13
+ REG_PN14
+ REG_PN15
+
REG_RSP = REG_V31 + 32 // to differentiate ZR/SP, REG_RSP&0x1f = 31
)
@@ -250,6 +323,24 @@
REG_R29: 29,
REG_R30: 30,
+ // SVE predicate registers
+ REG_P0: 48,
+ REG_P1: 49,
+ REG_P2: 50,
+ REG_P3: 51,
+ REG_P4: 52,
+ REG_P5: 53,
+ REG_P6: 54,
+ REG_P7: 55,
+ REG_P8: 56,
+ REG_P9: 57,
+ REG_P10: 58,
+ REG_P11: 59,
+ REG_P12: 60,
+ REG_P13: 61,
+ REG_P14: 62,
+ REG_P15: 63,
+
// floating point
REG_F0: 64,
REG_F1: 65,
@@ -317,6 +408,40 @@
REG_V29: 93,
REG_V30: 94,
REG_V31: 95,
+
+ // SVE vector registers
+ REG_Z0: 96,
+ REG_Z1: 97,
+ REG_Z2: 98,
+ REG_Z3: 99,
+ REG_Z4: 100,
+ REG_Z5: 101,
+ REG_Z6: 102,
+ REG_Z7: 103,
+ REG_Z8: 104,
+ REG_Z9: 105,
+ REG_Z10: 106,
+ REG_Z11: 107,
+ REG_Z12: 108,
+ REG_Z13: 109,
+ REG_Z14: 110,
+ REG_Z15: 111,
+ REG_Z16: 112,
+ REG_Z17: 113,
+ REG_Z18: 114,
+ REG_Z19: 115,
+ REG_Z20: 116,
+ REG_Z21: 117,
+ REG_Z22: 118,
+ REG_Z23: 119,
+ REG_Z24: 120,
+ REG_Z25: 121,
+ REG_Z26: 122,
+ REG_Z27: 123,
+ REG_Z28: 124,
+ REG_Z29: 125,
+ REG_Z30: 126,
+ REG_Z31: 127,
}
const (
@@ -485,6 +610,46 @@
C_XPOST = 1 << 5 // match arm.C_PBIT, so Prog.String know how to print it
)
+type AClass uint16 // operand type
+
+// The classification table below will eventually replace the classification table above.
+// instTab is sorted based on the order of these constants and the first match is chosen.
+//
+//go:generate stringer -type AClass -trimprefix AC_
+const (
+ AC_NONE AClass = iota
+ AC_REG // general purpose registers R0..R30 and ZR
+ AC_RSP // general purpose registers R0..R30 and RSP
+ AC_FREG // floating point registers, such as F1
+ AC_VREG // vector registers, such as V1
+ AC_ZREG // the scalable vector registers, such as Z1
+ AC_PREG // the scalable predicate registers, such as P1
+ AC_PNREG // the scalable predicate registers, with predicate-as-counter encoding, such as PN1
+ AC_PREGM // Pg/M
+ AC_PREGZ // Pg/Z
+ AC_REGIDX // P8[1]
+ AC_PAIR // register pair, such as (R1, R3)
+ AC_REGSHIFT // general purpose register with shift, such as R1<<2
+ AC_REGEXT // general purpose register with extend, such as R7.SXTW<<1
+ AC_ARNG // vector register with arrangement, such as V11.D2
+ AC_ARNGIDX // vector register with arrangement and index, such as V12.D[1]
+
+ AC_IMM // constants
+
+ AC_REGLIST1 // list of 1 vector register, such as [V1]
+ AC_REGLIST2 // list of 2 vector registers, such as [V1, V2], [Z0, Z8]
+ AC_REGLIST3 // list of 3 vector registers, such as [V1, V2, V3]
+ AC_REGLIST4 // list of 4 vector registers, such as [V1, V2, V3, V4], [Z0, Z4, Z8, Z12]
+ AC_LISTIDX // list with index, such as [V1.B, V2.B][2]
+
+ AC_MEMIMM // address with optional offset, the offset is an immediate, such as 4(R1)
+ AC_MEMIMMEXT // address with optional offset, the offset is an immediate with extension, such as (2*VL)(R1)
+ AC_MEMEXT // address with extend offset, such as (R2)(R5.SXTX<<1)
+ AC_MEMPOSTIMM // address of the post-index class, offset is an immediate
+ AC_MEMPOSTREG // address of the post-index class, offset is a register
+ AC_MEMPREIMM // address of the pre-index class, offset is an immediate
+)
+
//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p arm64
const (
diff --git a/src/cmd/internal/obj/arm64/inst.go b/src/cmd/internal/obj/arm64/inst.go
new file mode 100644
index 0000000..4402595
--- /dev/null
+++ b/src/cmd/internal/obj/arm64/inst.go
@@ -0,0 +1,118 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package arm64
+
+import "cmd/internal/obj"
+
+// inst represents an instruction.
+type Inst struct {
+ GoOp obj.As // Go opcode mnemonic
+ ArmOp A64Type // Arm64 opcode mnemonic
+ Feature uint16 // such as "FEAT_LSE", "FEAT_CSSC"
+ FixedBits uint32 // known bits
+ // unknown bits, key is its name
+ VarBits map[string]VarBits
+ Mask uint32 // mask for disassembly, 1 for known bits, 0 for unknown bits
+ Alias bool // whether it is an alias
+ Args []Operand // operands, in Go order
+}
+
+type VarBits struct {
+ // The low and high bit index in the binary encoding, exclusive on hi
+ lo, hi int
+ encoded bool // if true then its value is already encoded
+ bits uint32
+}
+
+// Operand is the operand type of an instruction.
+type Operand struct {
+ Class AClass // operand class, register, constant, memory operation etc.
+ Elms []ElmType // the elements that this operand includes
+}
+
+// A64Type is the Arm64 opcode type, an Arm64 opcode is prefixed with "A64",
+// a Go opcode is defined with a constant and is prefixed with "A".
+type A64Type uint16
+
+// ElmType is the element type, an element represents a symbol of a specific encoding form,
+// such as <Xn>, #<uimm4>, <T>.
+type ElmType struct {
+ // The natural language encoding text.
+ // e.g. '''
+ // <T> Is the size specifier, encoded in size:
+ //
+ // size <T>
+ // 00 B
+ // 01 H
+ // 10 S
+ // 11 D
+ // '''
+ encodingText string
+ encodingName string // key in VarBits
+ // The AI-implemented encoding func.
+ encodingFunc func(uint32) uint32
+}
+
+func (i *Inst) bin() uint32 {
+ bits := i.FixedBits
+ for _, v := range i.VarBits {
+ if !v.encoded {
+ panic("encoding incomplete")
+ }
+ bits |= v.bits
+ }
+ return bits
+}
+
+type Icmp []Inst
+
+func (x Icmp) Len() int {
+ return len(x)
+}
+
+func (x Icmp) Swap(i, j int) {
+ x[i], x[j] = x[j], x[i]
+}
+
+func (x Icmp) Less(i, j int) bool {
+ p1 := &x[i]
+ p2 := &x[j]
+ if p1.GoOp != p2.GoOp {
+ return p1.GoOp < p2.GoOp
+ }
+ if len(p1.Args) != len(p2.Args) {
+ return len(p1.Args) < len(p2.Args)
+ }
+ for k := 0; k < len(p1.Args); k++ {
+ if p1.Args[k].Class != p2.Args[k].Class {
+ return p1.Args[k].Class < p2.Args[k].Class
+ }
+ }
+ if p1.FixedBits != p2.FixedBits {
+ return p1.FixedBits < p2.FixedBits
+ }
+ if p1.Mask != p2.Mask {
+ return p1.Mask < p2.Mask
+ }
+ return false
+}
+
+// These constants represent Arm a-profile architecture extensions. For details,
+// please refer to the Arm a-profile architecture reference manual.
+// Update this table if new extensions are found when parsing the XML files.
+const (
+ FEAT_NONE uint16 = iota
+ FEAT_SVE2p1
+ // Scalable Vector AES instructions.
+ FEAT_SVE_AES
+ FEAT_SVE_B16B16
+ // Scalable Vector Bit Permutes instructions.
+ FEAT_SVE_BitPerm
+ // Scalable Vector PMULL instructions.
+ FEAT_SVE_PMULL128
+ // Scalable Vector SHA3 instructions.
+ FEAT_SVE_SHA3
+ FEAT_SVE_SM4
+)
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
cmd/compile: add new arm64 assembler data structure
These data structure will be used for SVE/SVE2.
Original author Eric Fang (@eric...@arm.com)
index 0000000..4ccef4a
--- /dev/null
+++ b/src/cmd/internal/obj/arm64/inst.go
@@ -0,0 +1,109 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package arm64
+
+import "cmd/internal/obj"
+
+// inst represents an instruction.
+type Inst struct {
+ GoOp obj.As // Go opcode mnemonic
+ ArmOp A64Type // Arm64 opcode mnemonic
+ Feature uint16 // Such as "FEAT_SVE"
+ FixedBits uint32 // Known bits
+ // Unknown bits, key is its name
+ VarBits map[string]VarBits
+ Mask uint32 // Mask for disassembly, 1 for known bits, 0 for unknown bits
+ Alias bool // Whether it is an alias
+ Args []Operand // Operands, in Go order
+}
+
+type VarBits struct {
+ // The low and high bit index in the binary encoding, exclusive on hi
+ lo, hi int
+ encoded bool // If true then its value is already encoded
+ bits uint32
+}
+
+// Operand is the operand type of an instruction.
+type Operand struct {
+ Class AClass // Operand class, register, constant, memory operation etc.
+ Text string // The text representation of this operand, e.g. <Pg>/Z.
+ // The elements that this operand includes, this only includes the encoding-related parts
+ Elms []ElmType
+}
+
+// A64Type is the Arm64 opcode type, an Arm64 opcode is prefixed with "A64",
+// a Go opcode is defined with a constant and is prefixed with "A".
+type A64Type uint16
+
+// ElmType is the element type, an element represents a symbol of a specific encoding form,
+// such as <Xn>, #<uimm4>, <T>.
+type ElmType struct {
+ encodingName string // The key in VarBits| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Ah, I am sorry I put these CLs in the main branch.
I will cherry-pick them to dev.simd later.
I am not expecting code changes so please proceed with reviews 😊
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
func a64ElmBinFrom(a *obj.Addr, acl AClass, index int) uint32 {
switch acl {The assembly parser will need to follow the assumption of this function when filling in progs.
func a64ElmBinFrom(a *obj.Addr, acl AClass, index int) uint32 {
switch acl {The assembly parser will need to follow the assumption of this function when filling in progs.
I am thinking about just make the SVE a special path in the parser as well.
That will make the implementation much easier 😕.
There might be slightly more codes and increased binary size, but I assume it wouldn't be more than 1k LOC, so should be negligible.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Code-Review | +2 |
// They are represented as a list of pointer to the encoding funcI need to know more about this. Looking at the code that calls these functions, it looks like some operands encode into more than one set of bits, and each function generates some more bits, and they must all be OR'd together. But I don't have a lot of confidence in that interpretation, so I think what ever is really going on (including this, if my guess is right) ought to be mentioned here.
And also, all the encoding functions need to work, otherwise, it's a failed encoding.
// Offset[5:8] = arrangement or predicationThis is like Go slice notation, so 5:8 == {5,6,7}?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// They are represented as a list of pointer to the encoding funcI need to know more about this. Looking at the code that calls these functions, it looks like some operands encode into more than one set of bits, and each function generates some more bits, and they must all be OR'd together. But I don't have a lot of confidence in that interpretation, so I think what ever is really going on (including this, if my guess is right) ought to be mentioned here.
And also, all the encoding functions need to work, otherwise, it's a failed encoding.
Thanks for the review!
What is your worry about this interpretation? I think OR-ing bits together is what the assembler do all the time.
// Offset[5:8] = arrangement or predicationThis is like Go slice notation, so 5:8 == {5,6,7}?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
REG_Z0 = obj.RBaseARM64 + 768 + iota
REG_Z1
REG_Z2Maybe RSP+1 as the range start point
// aclass returns the AClass of an Addr.
// Right now AClass is just stored at the lower 5 bits of the Offset field...
func aclass(a *obj.Addr) AClass {
return AClass(a.Offset & 0x1f)
}Put into any
Original author Eric Fang (@eric...@arm.com)Maybe reference CL number. Also remove the "@" before the email.
// SVE(Scalable Vector Extension) scalable vector registersAdd a space before (.
// We use an offset of 768 to avoid colliding with other registers.It is better for the registers to be consecutive. We could change others, LSL or ARNG. I still don't think they should be registers (but that can be addressed later).
// SVE scalable predicate registers, with predicate-as-counter encoding.
// These are actually P registers, but encoded differently.
// In order to distinguish with P registers, define them as PN registers.The question here is what we want from user assembly code. Do we want user to write Pn, or PNn, for those?
If the former, I think they should not be defined as registers. Instead, we should track which encoding to use internally.
If it is the latter, what is the reason user cares and chooses which one to write?
An example would be good.
}Why not P registers? DWARF doesn't understand them?
// The classification table below will eventually replace the classification table above.It is unclear whether this is the goal. Integer instructions can still use the old path, and there are special logic there, especially handling large constants and offsets. I think that should be evaluated separately.
AC_PREGM // Pg/MUse Go syntax. Have we decided to use "/" syntax? That seems a bit weird.
AC_MEMIMM // address with optional offset, the offset is an immediate, such as 4(Z1.D)"address with optional constant offset"?
Maybe name it AC_MEMOFF?
AC_MEMIMMEXT // address with optional offset, the offset is an immediate with extension, such as (2*VL)(Z1.D)The example doesn't look like "immediate with extension".
AC_MEMEXT // address with extend offset, such as (Z2.D.UXTW<<3)(RSP)"address with register offset with extension"?
AC_VL // VLxi pattern, one of: VLx2, VLx4.Is this multiply? Use `*` to match the assembly syntax.
AC_PREFETCH // Prefetch pattern, such as PLDL1KEEPLimiting it to prefetch seems to special. There are other special symbolic operands (and probably be more in the future). Maybe AC_SPECIAL.
type Inst struct {Most types probably could be unexported.
Alias bool // Whether it is an aliasWhy the assembler needs to know this?
type A64BinarySymbolName uint16The "A64" prefix doesn't seem helpful. This is the arm64 assembler, so everything is A64. Drop them, everywhere.
ElemEncodings []func(uint32) (uint32, A64BinarySymbolName, bool)elemEncoders
Perhaps use "encoder" for encoding functions, everywhere.
var curI int
if p.From.Type != obj.TYPE_NONE {
if curI == i {
return &p.From
}
curI++
}
for j := range p.RestArgs {
if curI == i {
return &p.RestArgs[j].Addr
}
curI++
}
if p.To.Type != obj.TYPE_NONE {
if curI == i {
return &p.To
}
}
This seems a rather inefficient way to iterate over the operands. Depending on how it is used, maybe rewrite to a more efficient way.
(See also the comment in tryEncode below.)
// a64ElmBinFrom returns the binary of the stored elm in a at index, for operand of type aclass.What is "the stored elm"? "Element", or at least "elem", is probably more clear, at least in comment. (For variables, feel free to choose the name.)
func a64ElmBinFrom(a *obj.Addr, acl AClass, index int) uint32 {Maybe "encodeAddr"?
func (i *Inst) tryEncodeA64Inst(p *obj.Prog) (uint32, bool) {Just "tryEncode".
How is a Prog supposed to be converted to an Inst? It looks like this function expects the Inst to already be populated. But it still needs to classify each operand in the prog. That seems confusing. What is the expected workflow here?
In general, I would think we want to either work with only Prog and Addr, or do a translation them work only with Inst and Operand, not both at same time in most code.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
Please move the reviews to CL 748600
Maybe reference CL number. Also remove the "@" before the email.
Done
// SVE(Scalable Vector Extension) scalable vector registersAdd a space before (.
Done
// SVE scalable predicate registers, with predicate-as-counter encoding.
// These are actually P registers, but encoded differently.
// In order to distinguish with P registers, define them as PN registers.The question here is what we want from user assembly code. Do we want user to write Pn, or PNn, for those?
If the former, I think they should not be defined as registers. Instead, we should track which encoding to use internally.
If it is the latter, what is the reason user cares and chooses which one to write?
An example would be good.
Initially I thought Eric kept them separate for deduplication purposes.
But I tried to merge `AC_PREG` and `AC_PNREG` in the generator, no duplication was found. I think it's safe to remove them then. It looks like `PN` reg always comes with different peer operands compared to `P` reg in instructions.
If later I find out that they are needed I will add them back, in that case I think we want to user to write them as `P` or `PN` specifically.
A imaginary example which isn't found by my generator's logic:
```
FANTOMI <Zn>.<T>, <P>.<T>
FANTOMI <Zn>.<T>, <PN>.<T>
```
Without the users specifying them explicitly, they will be indistinguishable.
Why not P registers? DWARF doesn't understand them?
I think P registers are listed above?
// The classification table below will eventually replace the classification table above.It is unclear whether this is the goal. Integer instructions can still use the old path, and there are special logic there, especially handling large constants and offsets. I think that should be evaluated separately.
That's right, I have removed the comment, thanks.
Use Go syntax. Have we decided to use "/" syntax? That seems a bit weird.
Sorry, yes they should be Pg.M and Pg.Z
AC_MEMIMM // address with optional offset, the offset is an immediate, such as 4(Z1.D)"address with optional constant offset"?
Maybe name it AC_MEMOFF?
Done
AC_MEMIMMEXT // address with optional offset, the offset is an immediate with extension, such as (2*VL)(Z1.D)The example doesn't look like "immediate with extension".
`2` is the immediate here.
This is an concrete example in GNU mnemonic:
`[<Xn|SP>{, #<imm>, MUL VL}]`.
`MUL VL` is the hardware vector size extension denotation.
Actually this is the only form of this class, I changed its name to `AC_MEMOFFMULVL`
AC_MEMEXT // address with extend offset, such as (Z2.D.UXTW<<3)(RSP)"address with register offset with extension"?
Done
Is this multiply? Use `*` to match the assembly syntax.
Done
Limiting it to prefetch seems to special. There are other special symbolic operands (and probably be more in the future). Maybe AC_SPECIAL.
Hmm you are right, I also merge it with AC_VL, they are both now AC_SPECIAL
Most types probably could be unexported.
Done
Why the assembler needs to know this?
That probably is for the disassembler, I have removed it.
The "A64" prefix doesn't seem helpful. This is the arm64 assembler, so everything is A64. Drop them, everywhere.
Done
// They are represented as a list of pointer to the encoding funcJunyang ShaoI need to know more about this. Looking at the code that calls these functions, it looks like some operands encode into more than one set of bits, and each function generates some more bits, and they must all be OR'd together. But I don't have a lot of confidence in that interpretation, so I think what ever is really going on (including this, if my guess is right) ought to be mentioned here.
And also, all the encoding functions need to work, otherwise, it's a failed encoding.
Thanks for the review!
What is your worry about this interpretation? I think OR-ing bits together is what the assembler do all the time.
Done
ElemEncodings []func(uint32) (uint32, A64BinarySymbolName, bool)elemEncoders
Perhaps use "encoder" for encoding functions, everywhere.
Done
var curI int
if p.From.Type != obj.TYPE_NONE {
if curI == i {
return &p.From
}
curI++
}
for j := range p.RestArgs {
if curI == i {
return &p.RestArgs[j].Addr
}
curI++
}
if p.To.Type != obj.TYPE_NONE {
if curI == i {
return &p.To
}
}
This seems a rather inefficient way to iterate over the operands. Depending on how it is used, maybe rewrite to a more efficient way.
(See also the comment in tryEncode below.)
Yeah I realize that this is very inefficient, I have ported it to its only callsite, and there shouldn't be O(n^2) iterations now.
// a64ElmBinFrom returns the binary of the stored elm in a at index, for operand of type aclass.What is "the stored elm"? "Element", or at least "elem", is probably more clear, at least in comment. (For variables, feel free to choose the name.)
Chose "element"
func a64ElmBinFrom(a *obj.Addr, acl AClass, index int) uint32 {Junyang ShaoMaybe "encodeAddr"?
Done
func (i *Inst) tryEncodeA64Inst(p *obj.Prog) (uint32, bool) {Just "tryEncode".
How is a Prog supposed to be converted to an Inst? It looks like this function expects the Inst to already be populated. But it still needs to classify each operand in the prog. That seems confusing. What is the expected workflow here?
In general, I would think we want to either work with only Prog and Addr, or do a translation them work only with Inst and Operand, not both at same time in most code.
The Inst (`i`) here is the pre-populated entry in the instruction table (which is generated by the generator, which will be the next CL in the chain), it's just like an optab entry. So this is the logic to match a prog to an Inst, and along the matching, if it's a success then it returns the binary. The reason that this is a `tryEncode` instead of `encode` is documented in the CL description, also in the comment of `elemEncoder`.
I think this layer of Inst -> Prog interop has to be somewhere to lower prog to inst, in asm7 it is scattered around various of switch cases, here they are just processed in a centralized stage which is this function. You can find an example of the workflow in the test I added (CL 747920).
I have made the addr accesses better, thanks.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Apparently some comments are marked as resolved but the code is not updated. Please address them. Thanks.
// SVE scalable predicate registers, with predicate-as-counter encoding.
// These are actually P registers, but encoded differently.
// In order to distinguish with P registers, define them as PN registers.Junyang ShaoThe question here is what we want from user assembly code. Do we want user to write Pn, or PNn, for those?
If the former, I think they should not be defined as registers. Instead, we should track which encoding to use internally.
If it is the latter, what is the reason user cares and chooses which one to write?
An example would be good.
Initially I thought Eric kept them separate for deduplication purposes.
But I tried to merge `AC_PREG` and `AC_PNREG` in the generator, no duplication was found. I think it's safe to remove them then. It looks like `PN` reg always comes with different peer operands compared to `P` reg in instructions.
If later I find out that they are needed I will add them back, in that case I think we want to user to write them as `P` or `PN` specifically.
A imaginary example which isn't found by my generator's logic:
```
FANTOMI <Zn>.<T>, <P>.<T>
FANTOMI <Zn>.<T>, <PN>.<T>
```
Without the users specifying them explicitly, they will be indistinguishable.
The question is whether they need to be distinguishable. Are the two instructions do exactly the same thing? Just different encodings? If they do exactly the same thing we can just choose one encoding. It is okay we never encode the other form.
I think it's safe to remove them then.
Let's try that.
var curI int
if p.From.Type != obj.TYPE_NONE {
if curI == i {
return &p.From
}
curI++
}
for j := range p.RestArgs {
if curI == i {
return &p.RestArgs[j].Addr
}
curI++
}
if p.To.Type != obj.TYPE_NONE {
if curI == i {
return &p.To
}
}
Junyang ShaoThis seems a rather inefficient way to iterate over the operands. Depending on how it is used, maybe rewrite to a more efficient way.
(See also the comment in tryEncode below.)
Yeah I realize that this is very inefficient, I have ported it to its only callsite, and there shouldn't be O(n^2) iterations now.
If you need to iterate over operands, you probably could write it as an iterator. Something like
```
func operands(p *Prog) (...) {
return func(yield func(*Addr) bool) {
if p.From.Type != obj.TYPE_NONE {
if !yield(&p.From) { return }
}
for _, a := range p.RestArgs {
if !yield(&a.Addr) { return }
}
...
}
}
```
func (i *Inst) tryEncodeA64Inst(p *obj.Prog) (uint32, bool) {Junyang ShaoJust "tryEncode".
How is a Prog supposed to be converted to an Inst? It looks like this function expects the Inst to already be populated. But it still needs to classify each operand in the prog. That seems confusing. What is the expected workflow here?
In general, I would think we want to either work with only Prog and Addr, or do a translation them work only with Inst and Operand, not both at same time in most code.
The Inst (`i`) here is the pre-populated entry in the instruction table (which is generated by the generator, which will be the next CL in the chain), it's just like an optab entry. So this is the logic to match a prog to an Inst, and along the matching, if it's a success then it returns the binary. The reason that this is a `tryEncode` instead of `encode` is documented in the CL description, also in the comment of `elemEncoder`.
I think this layer of Inst -> Prog interop has to be somewhere to lower prog to inst, in asm7 it is scattered around various of switch cases, here they are just processed in a centralized stage which is this function. You can find an example of the workflow in the test I added (CL 747920).
I have made the addr accesses better, thanks.
Reading it again, I think what you want to do is that you want to compute the encoder functions of a Prog, and work on the Prog and Addr. The `Inst` is actually an encoder, which doesn't carry data. That is a reasonable approach.
If this is the case, we should make the name clearer. Instead of `Inst` and `Operand`, probably `encoder` and `fieldEncoder` (or `argEncoder`). So the workflow is, when a Prog comes in, we compute its encoder based on its As and operand classes, and use the encoder (which includes a list of argEncoder's) to encode the Addr's.
Either way, I think we need to make the workflow clear. Let's write an example with one instruction. And document in a comment.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Apparently some comments are marked as resolved but the code is not updated. Please address them. Thanks.
Yeah they were addressed in the dev.simd cherrypicks, I haven't sync that back... Will do :D
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Ah, I am sorry I put these CLs in the main branch.
I will cherry-pick them to dev.simd later.
I am not expecting code changes so please proceed with reviews 😊
Done
Please move the reviews to CL 748600
Done
Junyang ShaoApparently some comments are marked as resolved but the code is not updated. Please address them. Thanks.
Yeah they were addressed in the dev.simd cherrypicks, I haven't sync that back... Will do :D
Done
// SVE scalable predicate registers, with predicate-as-counter encoding.
// These are actually P registers, but encoded differently.
// In order to distinguish with P registers, define them as PN registers.Junyang ShaoThe question here is what we want from user assembly code. Do we want user to write Pn, or PNn, for those?
If the former, I think they should not be defined as registers. Instead, we should track which encoding to use internally.
If it is the latter, what is the reason user cares and chooses which one to write?
An example would be good.
Cherry MuiInitially I thought Eric kept them separate for deduplication purposes.
But I tried to merge `AC_PREG` and `AC_PNREG` in the generator, no duplication was found. I think it's safe to remove them then. It looks like `PN` reg always comes with different peer operands compared to `P` reg in instructions.
If later I find out that they are needed I will add them back, in that case I think we want to user to write them as `P` or `PN` specifically.
A imaginary example which isn't found by my generator's logic:
```
FANTOMI <Zn>.<T>, <P>.<T>
FANTOMI <Zn>.<T>, <PN>.<T>
```
Without the users specifying them explicitly, they will be indistinguishable.
The question is whether they need to be distinguishable. Are the two instructions do exactly the same thing? Just different encodings? If they do exactly the same thing we can just choose one encoding. It is okay we never encode the other form.
I think it's safe to remove them then.Let's try that.
Done
var curI int
if p.From.Type != obj.TYPE_NONE {
if curI == i {
return &p.From
}
curI++
}
for j := range p.RestArgs {
if curI == i {
return &p.RestArgs[j].Addr
}
curI++
}
if p.To.Type != obj.TYPE_NONE {
if curI == i {
return &p.To
}
}
Junyang ShaoThis seems a rather inefficient way to iterate over the operands. Depending on how it is used, maybe rewrite to a more efficient way.
(See also the comment in tryEncode below.)
Cherry MuiYeah I realize that this is very inefficient, I have ported it to its only callsite, and there shouldn't be O(n^2) iterations now.
If you need to iterate over operands, you probably could write it as an iterator. Something like
```
func operands(p *Prog) (...) {
return func(yield func(*Addr) bool) {
if p.From.Type != obj.TYPE_NONE {
if !yield(&p.From) { return }
}
for _, a := range p.RestArgs {
if !yield(&a.Addr) { return }
}
...
}
}
```
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
// We use an offset of 768 to avoid colliding with other registers.Junyang ShaoIt is better for the registers to be consecutive. We could change others, LSL or ARNG. I still don't think they should be registers (but that can be addressed later).
Done
Maybe RSP+1 as the range start point
Done
// aclass returns the AClass of an Addr.
// Right now AClass is just stored at the lower 5 bits of the Offset field...
func aclass(a *obj.Addr) AClass {
return AClass(a.Offset & 0x1f)
}Junyang ShaoPut into any
I don't think I should put `aclass` in prog anymore, so closing this comment.
func (i *Inst) tryEncodeA64Inst(p *obj.Prog) (uint32, bool) {Junyang ShaoJust "tryEncode".
How is a Prog supposed to be converted to an Inst? It looks like this function expects the Inst to already be populated. But it still needs to classify each operand in the prog. That seems confusing. What is the expected workflow here?
In general, I would think we want to either work with only Prog and Addr, or do a translation them work only with Inst and Operand, not both at same time in most code.
Cherry MuiThe Inst (`i`) here is the pre-populated entry in the instruction table (which is generated by the generator, which will be the next CL in the chain), it's just like an optab entry. So this is the logic to match a prog to an Inst, and along the matching, if it's a success then it returns the binary. The reason that this is a `tryEncode` instead of `encode` is documented in the CL description, also in the comment of `elemEncoder`.
I think this layer of Inst -> Prog interop has to be somewhere to lower prog to inst, in asm7 it is scattered around various of switch cases, here they are just processed in a centralized stage which is this function. You can find an example of the workflow in the test I added (CL 747920).
I have made the addr accesses better, thanks.
Reading it again, I think what you want to do is that you want to compute the encoder functions of a Prog, and work on the Prog and Addr. The `Inst` is actually an encoder, which doesn't carry data. That is a reasonable approach.
If this is the case, we should make the name clearer. Instead of `Inst` and `Operand`, probably `encoder` and `fieldEncoder` (or `argEncoder`). So the workflow is, when a Prog comes in, we compute its encoder based on its As and operand classes, and use the encoder (which includes a list of argEncoder's) to encode the Addr's.
Either way, I think we need to make the workflow clear. Let's write an example with one instruction. And document in a comment.
Thanks! Working on it.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
func (i *Inst) tryEncodeA64Inst(p *obj.Prog) (uint32, bool) {Junyang ShaoJust "tryEncode".
How is a Prog supposed to be converted to an Inst? It looks like this function expects the Inst to already be populated. But it still needs to classify each operand in the prog. That seems confusing. What is the expected workflow here?
In general, I would think we want to either work with only Prog and Addr, or do a translation them work only with Inst and Operand, not both at same time in most code.
Cherry MuiThe Inst (`i`) here is the pre-populated entry in the instruction table (which is generated by the generator, which will be the next CL in the chain), it's just like an optab entry. So this is the logic to match a prog to an Inst, and along the matching, if it's a success then it returns the binary. The reason that this is a `tryEncode` instead of `encode` is documented in the CL description, also in the comment of `elemEncoder`.
I think this layer of Inst -> Prog interop has to be somewhere to lower prog to inst, in asm7 it is scattered around various of switch cases, here they are just processed in a centralized stage which is this function. You can find an example of the workflow in the test I added (CL 747920).
I have made the addr accesses better, thanks.
Junyang ShaoReading it again, I think what you want to do is that you want to compute the encoder functions of a Prog, and work on the Prog and Addr. The `Inst` is actually an encoder, which doesn't carry data. That is a reasonable approach.
If this is the case, we should make the name clearer. Instead of `Inst` and `Operand`, probably `encoder` and `fieldEncoder` (or `argEncoder`). So the workflow is, when a Prog comes in, we compute its encoder based on its As and operand classes, and use the encoder (which includes a list of argEncoder's) to encode the Addr's.
Either way, I think we need to make the workflow clear. Let's write an example with one instruction. And document in a comment.
Thanks! Working on it.
I have added a e2e test, now the assembler can assemble
`ZADD` (we add the prefix `Z` to all SVE instructions that takes a Z, for deduplication purposes.)
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |