cmd/compile: set alignment for all content-addressable symbols
Also change the linker to use the largest alignment when
deduplicating a content-addressable symbol.
There is nothing particularly special about content-addressable
symbols, it's just a place to start.
This reduces the size of the tailscaled binary by about 16K.
This happens mainly because before this CL the linker's symalign
function kicks in for all static composite literals and PCDATA symbols,
and gives them an alignment based on their size. If the size happens
to be a multiple of 32, it gets an alignment of 32.
That wastes space.
For #6853
For #36313
diff --git a/src/cmd/compile/internal/liveness/arg.go b/src/cmd/compile/internal/liveness/arg.go
index 77960f5..33e7f18 100644
--- a/src/cmd/compile/internal/liveness/arg.go
+++ b/src/cmd/compile/internal/liveness/arg.go
@@ -316,6 +316,7 @@
lsym := base.Ctxt.Lookup(lv.fn.LSym.Name + ".argliveinfo")
lsym.Set(obj.AttrContentAddressable, true)
+ lsym.Align = 1
off := objw.Uint8(lsym, 0, argOffsets[0]) // smallest offset that needs liveness info.
for idx, live := range livenessMaps {
diff --git a/src/cmd/compile/internal/liveness/plive.go b/src/cmd/compile/internal/liveness/plive.go
index 63c85ea..601cefd 100644
--- a/src/cmd/compile/internal/liveness/plive.go
+++ b/src/cmd/compile/internal/liveness/plive.go
@@ -1473,6 +1473,7 @@
// Format must match runtime/stack.go:stackObjectRecord.
x := base.Ctxt.Lookup(lv.fn.LSym.Name + ".stkobj")
x.Set(obj.AttrContentAddressable, true)
+ x.Align = 4
lv.fn.LSym.Func().StackObjects = x
off := 0
off = objw.Uintptr(x, off, uint64(len(vars)))
diff --git a/src/cmd/compile/internal/reflectdata/reflect.go b/src/cmd/compile/internal/reflectdata/reflect.go
index ea3d0b5..2b35154 100644
--- a/src/cmd/compile/internal/reflectdata/reflect.go
+++ b/src/cmd/compile/internal/reflectdata/reflect.go
@@ -184,6 +184,7 @@
ot := dnameData(s, 0, p.Path, "", nil, false, false)
objw.Global(s, int32(ot), obj.DUPOK|obj.RODATA)
s.Set(obj.AttrContentAddressable, true)
+ s.Align = 1
p.Pathsym = s
}
@@ -308,6 +309,7 @@
ot := dnameData(s, 0, name, tag, pkg, exported, embedded)
objw.Global(s, int32(ot), obj.DUPOK|obj.RODATA)
s.Set(obj.AttrContentAddressable, true)
+ s.Align = 1
return s
}
@@ -1083,6 +1085,7 @@
// Nothing writes static itabs, so they are read only.
objw.Global(lsym, int32(rttype.ITab.Size()+delta), int16(obj.DUPOK|obj.RODATA))
lsym.Set(obj.AttrContentAddressable, true)
+ lsym.Align = int16(types.PtrSize)
}
func WritePluginTable() {
@@ -1278,6 +1281,9 @@
}
objw.Global(lsym, int32(len(ptrmask)), obj.DUPOK|obj.RODATA|obj.LOCAL)
lsym.Set(obj.AttrContentAddressable, true)
+ // The runtime expects ptrmasks to be aligned
+ // as a uintptr.
+ lsym.Align = int16(types.PtrSize)
}
return lsym
}
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go
index 33fcf97..d89ecd6 100644
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@@ -283,6 +283,7 @@
x := base.Ctxt.Lookup(s.curfn.LSym.Name + ".opendefer")
x.Set(obj.AttrContentAddressable, true)
+ x.Align = 1
s.curfn.LSym.Func().OpenCodedDeferInfo = x
off := 0
@@ -6781,6 +6782,7 @@
// emit argument info (locations on stack) of f for traceback.
func EmitArgInfo(f *ir.Func, abiInfo *abi.ABIParamResultInfo) *obj.LSym {
x := base.Ctxt.Lookup(fmt.Sprintf("%s.arginfo%d", f.LSym.Name, f.ABI))
+ x.Align = 1
// NOTE: do not set ContentAddressable here. This may be referenced from
// assembly code by name (in this case f is a declaration).
// Instead, set it in emitArgInfo above.
@@ -6900,6 +6902,7 @@
x := base.Ctxt.LookupInit(fmt.Sprintf("%s.wrapinfo", wsym.Name), func(x *obj.LSym) {
objw.SymPtrOff(x, 0, wsym)
x.Set(obj.AttrContentAddressable, true)
+ x.Align = 4
})
e.curfn.LSym.Func().WrapInfo = x
diff --git a/src/cmd/compile/internal/staticdata/data.go b/src/cmd/compile/internal/staticdata/data.go
index acafe9d..51d576e 100644
--- a/src/cmd/compile/internal/staticdata/data.go
+++ b/src/cmd/compile/internal/staticdata/data.go
@@ -92,6 +92,7 @@
off := dstringdata(symdata, 0, s, pos, "string")
objw.Global(symdata, int32(off), obj.DUPOK|obj.RODATA|obj.LOCAL)
symdata.Set(obj.AttrContentAddressable, true)
+ symdata.Align = 1
}
return symdata
@@ -185,6 +186,7 @@
info.Name = file
info.Size = size
objw.Global(symdata, int32(size), obj.DUPOK|obj.RODATA|obj.LOCAL)
+ symdata.Align = 1
// Note: AttrContentAddressable cannot be set here,
// because the content-addressable-handling code
// does not know about file symbols.
@@ -211,6 +213,7 @@
lsym := types.LocalPkg.Lookup(symname).LinksymABI(obj.ABI0)
off := dstringdata(lsym, 0, s, pos, "slice")
objw.Global(lsym, int32(off), obj.NOPTR|obj.LOCAL)
+ lsym.Align = 1
return lsym
}
diff --git a/src/cmd/compile/internal/staticinit/sched.go b/src/cmd/compile/internal/staticinit/sched.go
index 5e39bb5..d3acd6d 100644
--- a/src/cmd/compile/internal/staticinit/sched.go
+++ b/src/cmd/compile/internal/staticinit/sched.go
@@ -760,6 +760,7 @@
typecheck.Target.Externs = append(typecheck.Target.Externs, n)
n.Linksym().Set(obj.AttrStatic, true)
+ n.Linksym().Align = int16(t.Alignment())
return n
}
diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go
index 53691cc..ea1483d 100644
--- a/src/cmd/internal/obj/objfile.go
+++ b/src/cmd/internal/obj/objfile.go
@@ -371,38 +371,8 @@
}
align := uint32(s.Align)
if s.ContentAddressable() && s.Size != 0 && align == 0 {
- // We generally assume data symbols are naturally aligned
- // (e.g. integer constants), except for strings and a few
- // compiler-emitted funcdata. If we dedup a string symbol and
- // a non-string symbol with the same content, we should keep
- // the largest alignment.
- // TODO: maybe the compiler could set the alignment for all
- // data symbols more carefully.
- switch {
- case strings.HasPrefix(s.Name, "go:string."),
- strings.HasPrefix(name, "type:.namedata."),
- strings.HasPrefix(name, "type:.importpath."),
- strings.HasSuffix(name, ".opendefer"),
- strings.HasSuffix(name, ".arginfo0"),
- strings.HasSuffix(name, ".arginfo1"),
- strings.HasSuffix(name, ".argliveinfo"):
- // These are just bytes, or varints.
- align = 1
- case strings.HasPrefix(name, "gclocals·"):
- // It has 32-bit fields.
- align = 4
- default:
- switch {
- case w.ctxt.Arch.PtrSize == 8 && s.Size%8 == 0:
- align = 8
- case s.Size%4 == 0:
- align = 4
- case s.Size%2 == 0:
- align = 2
- default:
- align = 1
- }
- }
+ // TODO: Check that alignment is set for all symbols.
+ w.ctxt.Diag("%s: is content-addressable but alignment is not set (size is %d)", s.Name, s.Size)
}
if s.Size > cutoff {
w.ctxt.Diag("%s: symbol too large (%d bytes > %d bytes)", s.Name, s.Size, cutoff)
diff --git a/src/cmd/internal/obj/pcln.go b/src/cmd/internal/obj/pcln.go
index 1cfcde7..852c706 100644
--- a/src/cmd/internal/obj/pcln.go
+++ b/src/cmd/internal/obj/pcln.go
@@ -28,6 +28,7 @@
sym := &LSym{
Type: objabi.SRODATA,
Attribute: AttrContentAddressable | AttrPcdata,
+ Align: 1,
}
if dbg {
@@ -335,6 +336,7 @@
pcln.Pcdata[i] = &LSym{
Type: objabi.SRODATA,
Attribute: AttrContentAddressable | AttrPcdata,
+ Align: 1,
}
} else {
pcln.Pcdata[i] = funcpctab(ctxt, cursym, "pctopcdata", pctopcdata, any(uint32(i)))
diff --git a/src/cmd/internal/obj/sym.go b/src/cmd/internal/obj/sym.go
index 08c50ec..cb3b680 100644
--- a/src/cmd/internal/obj/sym.go
+++ b/src/cmd/internal/obj/sym.go
@@ -147,6 +147,7 @@
name := fmt.Sprintf("$f32.%08x%s", i, suffix)
return ctxt.LookupInit(name, func(s *LSym) {
s.Size = 4
+ s.Align = 4
s.WriteFloat32(ctxt, 0, f)
s.Type = typ
s.Set(AttrLocal, true)
@@ -161,6 +162,7 @@
name := fmt.Sprintf("$f64.%016x%s", i, suffix)
return ctxt.LookupInit(name, func(s *LSym) {
s.Size = 8
+ s.Align = int16(ctxt.Arch.PtrSize)
s.WriteFloat64(ctxt, 0, f)
s.Type = typ
s.Set(AttrLocal, true)
@@ -174,6 +176,7 @@
name := fmt.Sprintf("$i32.%08x%s", uint64(i), suffix)
return ctxt.LookupInit(name, func(s *LSym) {
s.Size = 4
+ s.Align = 4
s.WriteInt(ctxt, 0, 4, i)
s.Type = typ
s.Set(AttrLocal, true)
@@ -187,6 +190,7 @@
name := fmt.Sprintf("$i64.%016x%s", uint64(i), suffix)
return ctxt.LookupInit(name, func(s *LSym) {
s.Size = 8
+ s.Align = int16(ctxt.Arch.PtrSize)
s.WriteInt(ctxt, 0, 8, i)
s.Type = typ
s.Set(AttrLocal, true)
@@ -200,6 +204,7 @@
name := fmt.Sprintf("$i128.%016x%016x%s", uint64(hi), uint64(lo), suffix)
return ctxt.LookupInit(name, func(s *LSym) {
s.Size = 16
+ s.Align = int16(ctxt.Arch.PtrSize)
if ctxt.Arch.ByteOrder == binary.LittleEndian {
s.WriteInt(ctxt, 0, 8, lo)
s.WriteInt(ctxt, 8, 8, hi)
@@ -221,6 +226,7 @@
return ctxt.LookupInit(fmt.Sprintf("gclocals·%s", str), func(lsym *LSym) {
lsym.P = data
lsym.Set(AttrContentAddressable, true)
+ lsym.Align = 4
})
}
diff --git a/src/cmd/internal/obj/x86/seh.go b/src/cmd/internal/obj/x86/seh.go
index ceca949..19ff0a4 100644
--- a/src/cmd/internal/obj/x86/seh.go
+++ b/src/cmd/internal/obj/x86/seh.go
@@ -148,6 +148,7 @@
symname := fmt.Sprintf("%d.%s", len(buf.data), hash)
return ctxt.LookupInit("go:sehuw."+symname, func(s *obj.LSym) {
s.WriteBytes(ctxt, 0, buf.data)
+ s.Align = 4
s.Type = objabi.SSEHUNWINDINFO
s.Set(obj.AttrDuplicateOK, true)
s.Set(obj.AttrLocal, true)
diff --git a/src/cmd/link/internal/loader/loader.go b/src/cmd/link/internal/loader/loader.go
index 9ab5564..95a248c 100644
--- a/src/cmd/link/internal/loader/loader.go
+++ b/src/cmd/link/internal/loader/loader.go
@@ -425,6 +425,10 @@
l.objSyms[s.sym] = objSym{r.objidx, li}
addToHashMap(symAndSize{s.sym, siz})
}
+ // Use the larger alignment.
+ if align := int32(osym.Align()); align > l.SymAlign(s.sym) {
+ l.SetSymAlign(s.sym, align)
+ }
return s.sym
}
addToHashMap(symAndSize{i, siz})
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Code-Review | +2 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Keith, please see the code in staticinit/sched.go. Without something like that, arm64 crashes with a misaligned load. Is it intentional that the compiler emits an aligned load from a composite literal of type [32]byte? The type alignment is of course only 1, but the linker was previously using an alignment of 8, and the compiler was apparently relying on that, though I don't know where. Thanks.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Code-Review | +2 |
Keith, please see the code in staticinit/sched.go. Without something like that, arm64 crashes with a misaligned load. Is it intentional that the compiler emits an aligned load from a composite literal of type [32]byte? The type alignment is of course only 1, but the linker was previously using an alignment of 8, and the compiler was apparently relying on that, though I don't know where. Thanks.
arm64 should do unaligned loads no problem.
Maybe it is a sync/atomic op? I think those need alignment, but the compiler should request alignment for those appropriately.
Maybe simd is different? Not sure about that.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Keith RandallKeith, please see the code in staticinit/sched.go. Without something like that, arm64 crashes with a misaligned load. Is it intentional that the compiler emits an aligned load from a composite literal of type [32]byte? The type alignment is of course only 1, but the linker was previously using an alignment of 8, and the compiler was apparently relying on that, though I don't know where. Thanks.
arm64 should do unaligned loads no problem.
Maybe it is a sync/atomic op? I think those need alignment, but the compiler should request alignment for those appropriately.
Maybe simd is different? Not sure about that.
Sorry, I didn't give all the details. The error was actually coming from the linker, from this line:
https://go.googlesource.com/go/+/refs/heads/master/src/cmd/link/internal/arm64/asm.go#920
I haven't looked at the details, but perhaps ARM64 ELF doesn't support a PC-relative reference to an unaligned address for a 32-bit load.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Keith RandallKeith, please see the code in staticinit/sched.go. Without something like that, arm64 crashes with a misaligned load. Is it intentional that the compiler emits an aligned load from a composite literal of type [32]byte? The type alignment is of course only 1, but the linker was previously using an alignment of 8, and the compiler was apparently relying on that, though I don't know where. Thanks.
Ian Lance Taylorarm64 should do unaligned loads no problem.
Maybe it is a sync/atomic op? I think those need alignment, but the compiler should request alignment for those appropriately.
Maybe simd is different? Not sure about that.
Sorry, I didn't give all the details. The error was actually coming from the linker, from this line:
https://go.googlesource.com/go/+/refs/heads/master/src/cmd/link/internal/arm64/asm.go#920
I haven't looked at the details, but perhaps ARM64 ELF doesn't support a PC-relative reference to an unaligned address for a 32-bit load.
By the way you can recreate the problem on arm64-linux by patching in this CL series and changing staticinit/sched.go to just do
n.Linksym().Align = int16(t.Alignment())
and run make.bash.
Keith RandallKeith, please see the code in staticinit/sched.go. Without something like that, arm64 crashes with a misaligned load. Is it intentional that the compiler emits an aligned load from a composite literal of type [32]byte? The type alignment is of course only 1, but the linker was previously using an alignment of 8, and the compiler was apparently relying on that, though I don't know where. Thanks.
Ian Lance Taylorarm64 should do unaligned loads no problem.
Maybe it is a sync/atomic op? I think those need alignment, but the compiler should request alignment for those appropriately.
Maybe simd is different? Not sure about that.
Ian Lance TaylorSorry, I didn't give all the details. The error was actually coming from the linker, from this line:
https://go.googlesource.com/go/+/refs/heads/master/src/cmd/link/internal/arm64/asm.go#920
I haven't looked at the details, but perhaps ARM64 ELF doesn't support a PC-relative reference to an unaligned address for a 32-bit load.
By the way you can recreate the problem on arm64-linux by patching in this CL series and changing staticinit/sched.go to just do
n.Linksym().Align = int16(t.Alignment())and run make.bash.
This looks like it comes from here:
crypto/internal/fips140/aes/gcm/cast.go:23
It compiles to:
```
MOVD crypto/internal/fips140/aes/gcm..stmp_0(SB), R4
MOVWU crypto/internal/fips140/aes/gcm..stmp_0+8(SB), R5
crypto/internal/fips140/aes/gcm..stmp_0 SRODATAFIPS static size=12 align=0x1
0x0000 21 22 23 24 25 26 27 28 29 2a 2b 2c !"#$%&'()*+,
```
So maybe increasing the alignment of these `stmp` blobs that we load constants from would help?
This one is kind of weird because I think it is the second instruction that triggers the failure (the Reloc has an addend of 8). The first instruction doesn't fail?
We do have some code in obj that tries to deal with unaligned global variable access (e.g. https://github.com/golang/go/blob/36bca3166e18db52687a4d91ead3f98ffe6d00b8/src/cmd/internal/obj/arm64/asm7.go#L4581). Maybe it doesn't handle MOVWU for some reason.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
(Mostly note for myself, I can reproduce doing
```
GOARCH=arm64 go test -c -gcflags=-S crypto/internal/fips140/aes/gcm
```
or for that matter, just `GOARCH=arm64 go test -c strings`.
)
Just a note that increasing the alignment of the stmp blobs is basically what the current patch does. It just feels a bit weird that the type alignment is not correct. It's also possible that the same will have to be done with variables of types like [32]byte--they won't be able to have 1-byte alignment, they'll need pointer-sized alignment. Maybe. It would help to have some decisions and guidelines for what alignment the compiler currently expects.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
I think the compiler proper is doing the right thing. The stmp has the expected alignment (1 byte, for [12]byte and similar).
I think the obj library is failing here, not being able to assemble a load to a global that is only 1-byte aligned. It can certainly do that in some cases, at least. Either we need to fix obj so it can always handle it, or come up with a rule about when it can and when it can't, and then modify the compiler to obey that rule.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Thanks, I think I found the problem. The function symAlign in cmd/internal/obj/arm64 does its own alignment computations based on the symbol size. I'm going to try replacing that.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
OK, I think things make sense now.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Code-Review | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |