Cherry Zhang has uploaded this change for review.
cmd/link: support trampoline insertion on ARM64
Compared to ARM32 or PPC64, ARM64 has larger range for direct jumps.
But for very large programs it can still go over the limit. Add
trampoline insertion for ARM64.
Updates #40492.
Change-Id: Id97301dbc35fb577ba3f8d5f3316a8424d4f53c4
---
M src/cmd/link/internal/arm64/asm.go
M src/cmd/link/internal/arm64/obj.go
M src/cmd/link/link_test.go
3 files changed, 131 insertions(+), 1 deletion(-)
diff --git a/src/cmd/link/internal/arm64/asm.go b/src/cmd/link/internal/arm64/asm.go
index 68e59f2..2a4f60e 100644
--- a/src/cmd/link/internal/arm64/asm.go
+++ b/src/cmd/link/internal/arm64/asm.go
@@ -413,6 +413,30 @@
// (e.g. go version).
return true
}
+
+ case objabi.R_ARM64_GOTPCREL:
+ if target.IsExternal() {
+ // External linker will do this relocation.
+ return true
+ }
+ if targType != sym.SDYNIMPORT {
+ ldr.Errorf(s, "R_ARM64_GOTPCREL target is not SDYNIMPORT symbol: %v", ldr.SymName(targ))
+ }
+ if r.Add() != 0 {
+ ldr.Errorf(s, "R_ARM64_GOTPCREL with non-zero addend (%v)", r.Add())
+ }
+ ld.AddGotSym(target, ldr, syms, targ, 0)
+ // turn into two relocations, one for each instruction.
+ su := ldr.MakeSymbolUpdater(s)
+ r.SetType(objabi.R_ARM64_GOT)
+ r.SetSiz(4)
+ r.SetSym(syms.GOT)
+ r.SetAdd(int64(ldr.SymGot(targ)))
+ r2, _ := su.AddRel(objabi.R_ARM64_GOT)
+ r2.SetSiz(4)
+ r2.SetOff(r.Off() + 4)
+ r2.SetSym(syms.GOT)
+ r2.SetAdd(int64(ldr.SymGot(targ)))
}
return false
}
@@ -1154,3 +1178,107 @@
}
return fmt.Sprintf("%s+%d", ldr.SymExtname(s), off)
}
+
+// Convert the direct jump relocation r to refer to a trampoline if the target is too far
+func trampoline(ctxt *ld.Link, ldr *loader.Loader, ri int, rs, s loader.Sym) {
+ relocs := ldr.Relocs(s)
+ r := relocs.At(ri)
+ switch r.Type() {
+ case objabi.R_CALLARM64:
+ var t int64
+ // ldr.SymValue(rs) == 0 indicates a cross-package jump to a function that is not yet
+ // laid out. Conservatively use a trampoline. This should be rare, as we lay out packages
+ // in dependency order.
+ if ldr.SymValue(rs) != 0 {
+ t = ldr.SymValue(rs) + r.Add() - (ldr.SymValue(s) + int64(r.Off()))
+ }
+ if t >= 1<<27 || t < -1<<27 || ldr.SymValue(rs) == 0 || (*ld.FlagDebugTramp > 1 && ldr.SymPkg(s) != ldr.SymPkg(rs)) {
+ // direct call too far, need to insert trampoline.
+ // look up existing trampolines first. if we found one within the range
+ // of direct call, we can reuse it. otherwise create a new one.
+ var tramp loader.Sym
+ for i := 0; ; i++ {
+ oName := ldr.SymName(rs)
+ name := oName + fmt.Sprintf("%+x-tramp%d", r.Add(), i)
+ tramp = ldr.LookupOrCreateSym(name, int(ldr.SymVersion(rs)))
+ ldr.SetAttrReachable(tramp, true)
+ if ldr.SymType(tramp) == sym.SDYNIMPORT {
+ // don't reuse trampoline defined in other module
+ continue
+ }
+ if oName == "runtime.deferreturn" {
+ ldr.SetIsDeferReturnTramp(tramp, true)
+ }
+ if ldr.SymValue(tramp) == 0 {
+ // either the trampoline does not exist -- we need to create one,
+ // or found one the address which is not assigned -- this will be
+ // laid down immediately after the current function. use this one.
+ break
+ }
+
+ t = ldr.SymValue(tramp) - (ldr.SymValue(s) + int64(r.Off()))
+ if t >= -1<<27 && t < 1<<27 {
+ // found an existing trampoline that is not too far
+ // we can just use it
+ break
+ }
+ }
+ if ldr.SymType(tramp) == 0 {
+ // trampoline does not exist, create one
+ trampb := ldr.MakeSymbolUpdater(tramp)
+ ctxt.AddTramp(trampb)
+ if ldr.SymType(rs) == sym.SDYNIMPORT {
+ if r.Add() != 0 {
+ ctxt.Errorf(s, "nonzero addend for DYNIMPORT call: %v+%d", ldr.SymName(rs), r.Add())
+ }
+ gentrampgot(ctxt, ldr, trampb, rs)
+ } else {
+ gentramp(ctxt, ldr, trampb, rs, r.Add())
+ }
+ }
+ // modify reloc to point to tramp, which will be resolved later
+ sb := ldr.MakeSymbolUpdater(s)
+ relocs := sb.Relocs()
+ r := relocs.At(ri)
+ r.SetSym(tramp)
+ r.SetAdd(0) // clear the offset embedded in the instruction
+ }
+ default:
+ ctxt.Errorf(s, "trampoline called with non-jump reloc: %d (%s)", r.Type(), sym.RelocName(ctxt.Arch, r.Type()))
+ }
+}
+
+// generate a trampoline to target+offset.
+func gentramp(ctxt *ld.Link, ldr *loader.Loader, tramp *loader.SymbolBuilder, target loader.Sym, offset int64) {
+ tramp.SetSize(12) // 3 instructions
+ P := make([]byte, tramp.Size())
+ o1 := uint32(0x90000010) // adrp x16, target
+ o2 := uint32(0x91000210) // add x16, pc-relative-offset
+ o3 := uint32(0xd61f0200) // br x16
+ ctxt.Arch.ByteOrder.PutUint32(P, o1)
+ ctxt.Arch.ByteOrder.PutUint32(P[4:], o2)
+ ctxt.Arch.ByteOrder.PutUint32(P[8:], o3)
+ tramp.SetData(P)
+
+ r, _ := tramp.AddRel(objabi.R_ADDRARM64)
+ r.SetSiz(8)
+ r.SetSym(target)
+ r.SetAdd(offset)
+}
+
+// generate a trampoline to target+offset for a DYNIMPORT symbol via GOT.
+func gentrampgot(ctxt *ld.Link, ldr *loader.Loader, tramp *loader.SymbolBuilder, target loader.Sym) {
+ tramp.SetSize(12) // 3 instructions
+ P := make([]byte, tramp.Size())
+ o1 := uint32(0x90000010) // adrp x16, target@GOT
+ o2 := uint32(0xf9400210) // ldr x16, [x16, offset]
+ o3 := uint32(0xd61f0200) // br x16
+ ctxt.Arch.ByteOrder.PutUint32(P, o1)
+ ctxt.Arch.ByteOrder.PutUint32(P[4:], o2)
+ ctxt.Arch.ByteOrder.PutUint32(P[8:], o3)
+ tramp.SetData(P)
+
+ r, _ := tramp.AddRel(objabi.R_ARM64_GOTPCREL)
+ r.SetSiz(8)
+ r.SetSym(target)
+}
diff --git a/src/cmd/link/internal/arm64/obj.go b/src/cmd/link/internal/arm64/obj.go
index 18a3253..9c74598 100644
--- a/src/cmd/link/internal/arm64/obj.go
+++ b/src/cmd/link/internal/arm64/obj.go
@@ -45,6 +45,7 @@
Minalign: minAlign,
Dwarfregsp: dwarfRegSP,
Dwarfreglr: dwarfRegLR,
+ TrampLimit: 0x7c00000, // 26-bit signed offset * 4, leave room for PLT etc.
Adddynrel: adddynrel,
Archinit: archinit,
@@ -59,6 +60,7 @@
Machoreloc1: machoreloc1,
MachorelocSize: 8,
PEreloc1: pereloc1,
+ Trampoline: trampoline,
Androiddynld: "/system/bin/linker64",
Linuxdynld: "/lib/ld-linux-aarch64.so.1",
diff --git a/src/cmd/link/link_test.go b/src/cmd/link/link_test.go
index 285c650..bf4c2ab 100644
--- a/src/cmd/link/link_test.go
+++ b/src/cmd/link/link_test.go
@@ -618,7 +618,7 @@
// threshold for trampoline generation, and essentially all cross-package
// calls will use trampolines.
switch runtime.GOARCH {
- case "arm", "ppc64", "ppc64le":
+ case "arm", "arm64", "ppc64", "ppc64le":
default:
t.Skipf("trampoline insertion is not implemented on %s", runtime.GOARCH)
}
To view, visit change 314451. To unsubscribe, or for help writing mail filters, visit settings.
Patch set 1:Run-TryBot +1
Attention is currently required from: Jeremy Faller, Cherry Zhang.
Patch set 1:Code-Review +2
1 comment:
File src/cmd/link/internal/arm64/asm.go:
Patch Set #1, Line 434: SymGot
This has nothing to do with your CL, but if you have time it would be nice to update the comments on Loader.SymGot / Loader.SetSymbGot (right now they refer to "pe", whereas the there are definitely used elsewhere as well).
To view, visit change 314451. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Jeremy Faller.
Patch set 2:Run-TryBot +1
Attention is currently required from: Jeremy Faller.
Cherry Zhang uploaded patch set #3 to this change.
cmd/link: support trampoline insertion on ARM64
Compared to ARM32 or PPC64, ARM64 has larger range for direct jumps.
But for very large programs it can still go over the limit. Add
trampoline insertion for ARM64.
Updates #40492.
Change-Id: Id97301dbc35fb577ba3f8d5f3316a8424d4f53c4
---
M src/cmd/link/internal/arm64/asm.go
M src/cmd/link/internal/arm64/obj.go
M src/cmd/link/link_test.go
3 files changed, 135 insertions(+), 1 deletion(-)
To view, visit change 314451. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Jeremy Faller.
1 comment:
File src/cmd/link/internal/arm64/asm.go:
Patch Set #1, Line 434: SymGot
This has nothing to do with your CL, but if you have time it would be nice to update the comments on […]
Done. CL 314929. Thanks.
To view, visit change 314451. To unsubscribe, or for help writing mail filters, visit settings.
Patchset:
Thanks.
To view, visit change 314451. To unsubscribe, or for help writing mail filters, visit settings.
Cherry Zhang submitted this change.
cmd/link: support trampoline insertion on ARM64
Compared to ARM32 or PPC64, ARM64 has larger range for direct jumps.
But for very large programs it can still go over the limit. Add
trampoline insertion for ARM64.
Updates #40492.
Change-Id: Id97301dbc35fb577ba3f8d5f3316a8424d4f53c4
Reviewed-on: https://go-review.googlesource.com/c/go/+/314451
Trust: Cherry Zhang <cher...@google.com>
Reviewed-by: Than McIntosh <th...@google.com>
---
M src/cmd/link/internal/arm64/asm.go
M src/cmd/link/internal/arm64/obj.go
M src/cmd/link/link_test.go
3 files changed, 135 insertions(+), 1 deletion(-)
diff --git a/src/cmd/link/internal/arm64/asm.go b/src/cmd/link/internal/arm64/asm.go
index 68e59f2..fad709d 100644
--- a/src/cmd/link/internal/arm64/asm.go
+++ b/src/cmd/link/internal/arm64/asm.go
@@ -413,6 +413,34 @@
// (e.g. go version).
return true
}
+
+ case objabi.R_ARM64_GOTPCREL:
+ if target.IsExternal() {
+ // External linker will do this relocation.
+ return true
+ }
+ if targType != sym.SDYNIMPORT {
+ ldr.Errorf(s, "R_ARM64_GOTPCREL target is not SDYNIMPORT symbol: %v", ldr.SymName(targ))
+ }
+ if r.Add() != 0 {
+ ldr.Errorf(s, "R_ARM64_GOTPCREL with non-zero addend (%v)", r.Add())
+ }
+ if target.IsElf() {
+ ld.AddGotSym(target, ldr, syms, targ, uint32(elf.R_AARCH64_GLOB_DAT))
+ } else {
+ ld.AddGotSym(target, ldr, syms, targ, 0)
+ }
+ // turn into two relocations, one for each instruction.
+ su := ldr.MakeSymbolUpdater(s)
+ r.SetType(objabi.R_ARM64_GOT)
+ r.SetSiz(4)
+ r.SetSym(syms.GOT)
+ r.SetAdd(int64(ldr.SymGot(targ)))
+ r2, _ := su.AddRel(objabi.R_ARM64_GOT)
+ r2.SetSiz(4)
+ r2.SetOff(r.Off() + 4)
+ r2.SetSym(syms.GOT)
+ r2.SetAdd(int64(ldr.SymGot(targ)))
}
return false
}
@@ -1154,3 +1182,107 @@To view, visit change 314451. To unsubscribe, or for help writing mail filters, visit settings.