Attention is currently required from: Josh Bleecher Snyder.
Keith Randall would like Josh Bleecher Snyder to review this change.
cmd/compile: implement jump tables
name old time/op new time/op delta
Switch-16 1.67ns ± 1% 1.17ns ± 1% -30.11% (p=0.000 n=10+9)
Fixes #5496
Update #34381
Change-Id: I3ff56011d02be53f605ca5fd3fb96b905517c34f
---
M src/cmd/compile/internal/ssa/gen/AMD64.rules
M src/cmd/internal/obj/link.go
A src/cmd/compile/internal/ssa/jumptable.go
M src/cmd/compile/internal/ssa/compile.go
M src/cmd/compile/internal/ssa/gen/rulegen.go
A src/cmd/compile/internal/test/switch_test.go
M src/cmd/compile/internal/ssa/rewriteAMD64.go
M src/cmd/compile/internal/ssagen/ssa.go
M src/cmd/compile/internal/ssa/gen/genericOps.go
M src/cmd/compile/internal/ssa/rewrite.go
M src/cmd/compile/internal/amd64/ssa.go
M src/cmd/internal/obj/x86/asm6.go
M src/cmd/compile/internal/ssa/check.go
M src/cmd/compile/internal/ssa/block.go
M src/cmd/compile/internal/gc/obj.go
M src/cmd/compile/internal/ssa/gen/AMD64Ops.go
M src/cmd/compile/internal/ssa/opGen.go
17 files changed, 545 insertions(+), 33 deletions(-)
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index b0e5c34..45c9c69 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -1384,6 +1384,16 @@
}
}
+ case ssa.BlockAMD64JUMPTABLE:
+ // JMP *(TABLE)(INDEX*8)
+ p := s.Prog(obj.AJMP)
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = b.ControlValue(0).Reg()
+ p.To.Index = b.ControlValue(1).Reg()
+ p.To.Scale = 8
+ // Save jump tables for later resolution of the target blocks.
+ s.JumpTables = append(s.JumpTables, b)
+
default:
b.Fatalf("branch not implemented: %s", b.LongString())
}
diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go
index 432c003..58d80c9 100644
--- a/src/cmd/compile/internal/gc/obj.go
+++ b/src/cmd/compile/internal/gc/obj.go
@@ -260,6 +260,9 @@
objw.Global(x, int32(len(x.P)), obj.RODATA|obj.DUPOK)
x.Set(obj.AttrStatic, true)
}
+ for _, jt := range fn.JumpTables {
+ objw.Global(jt.Sym, int32(len(jt.Targets)*base.Ctxt.Arch.PtrSize), obj.RODATA)
+ }
}
}
diff --git a/src/cmd/compile/internal/ssa/block.go b/src/cmd/compile/internal/ssa/block.go
index 71ca774..aef4654 100644
--- a/src/cmd/compile/internal/ssa/block.go
+++ b/src/cmd/compile/internal/ssa/block.go
@@ -157,6 +157,11 @@
return 2
}
+// ConrolValue returns the nth control value of b.
+func (b *Block) ControlValue(n int) *Value {
+ return b.Controls[n]
+}
+
// ControlValues returns a slice containing the non-nil control
// values of the block. The index of each control value will be
// the same as it is in the Controls property and can be used
diff --git a/src/cmd/compile/internal/ssa/check.go b/src/cmd/compile/internal/ssa/check.go
index 28edfd2..df677e6 100644
--- a/src/cmd/compile/internal/ssa/check.go
+++ b/src/cmd/compile/internal/ssa/check.go
@@ -100,6 +100,10 @@
if b.NumControls() != 0 {
f.Fatalf("plain/dead block %s has a control value", b)
}
+ case BlockJumpTable:
+ if b.NumControls() != 1 {
+ f.Fatalf("jumpTable block %s has no control value", b)
+ }
}
if len(b.Succs) != 2 && b.Likely != BranchUnknown {
f.Fatalf("likeliness prediction %d for block %s with %d successors", b.Likely, b, len(b.Succs))
diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go
index f87ea5b..e418b20 100644
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@@ -484,6 +484,7 @@
{name: "branchelim", fn: branchelim},
{name: "late fuse", fn: fuseLate},
{name: "dse", fn: dse},
+ {name: "jumptable", fn: jumpTable},
{name: "writebarrier", fn: writebarrier, required: true}, // expand write barrier ops
{name: "insert resched checks", fn: insertLoopReschedChecks,
disabled: !buildcfg.Experiment.PreemptibleLoops}, // insert resched checks in loops.
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 47a6af0..cedc35e 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -2245,3 +2245,5 @@
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
=> @mergePoint(b,x0,x1) (MOVBEQload [i] {s} p1 mem)
+
+(JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} (LEAQ <typ.Uintptr> {makeJumpTableSym(b)} (SB)) idx)
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index e3c94e4..7a2953c 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -947,6 +947,12 @@
{name: "NEF", controls: 1},
{name: "ORD", controls: 1}, // FP, ordered comparison (parity zero)
{name: "NAN", controls: 1}, // FP, unordered comparison (parity one)
+
+ // JUMPTABLE implements jump tables.
+ // control[0] is the address of the jump table
+ // control[1] is the index into the jump table
+ // Aux is the symbol (a *obj.LSym) for the jump table (the thing control[0] is the address of)
+ {name: "JUMPTABLE", controls: 2, aux: "Sym"},
}
archs = append(archs, arch{
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index 4f133b1..e04b7db 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -639,12 +639,13 @@
// First [] [always, never]
var genericBlocks = []blockData{
- {name: "Plain"}, // a single successor
- {name: "If", controls: 1}, // if Controls[0] goto Succs[0] else goto Succs[1]
- {name: "Defer", controls: 1}, // Succs[0]=defer queued, Succs[1]=defer recovered. Controls[0] is call op (of memory type)
- {name: "Ret", controls: 1}, // no successors, Controls[0] value is memory result
- {name: "RetJmp", controls: 1}, // no successors, Controls[0] value is a tail call
- {name: "Exit", controls: 1}, // no successors, Controls[0] value generates a panic
+ {name: "Plain"}, // a single successor
+ {name: "If", controls: 1}, // if Controls[0] goto Succs[0] else goto Succs[1]
+ {name: "Defer", controls: 1}, // Succs[0]=defer queued, Succs[1]=defer recovered. Controls[0] is call op (of memory type)
+ {name: "Ret", controls: 1}, // no successors, Controls[0] value is memory result
+ {name: "RetJmp", controls: 1}, // no successors, Controls[0] value is a tail call
+ {name: "Exit", controls: 1}, // no successors, Controls[0] value generates a panic
+ {name: "JumpTable", controls: 1}, // multiple successors, the integer Controls[0] selects which one
// transient block state used for dead code removal
{name: "First"}, // 2 successors, always takes the first one (second is dead)
diff --git a/src/cmd/compile/internal/ssa/gen/rulegen.go b/src/cmd/compile/internal/ssa/gen/rulegen.go
index fe8db4e..0f7e970 100644
--- a/src/cmd/compile/internal/ssa/gen/rulegen.go
+++ b/src/cmd/compile/internal/ssa/gen/rulegen.go
@@ -1838,6 +1838,8 @@
// auxType returns the Go type that this block should store in its aux field.
func (b blockData) auxType() string {
switch b.aux {
+ case "Sym":
+ return "Sym"
case "S390XCCMask", "S390XCCMaskInt8", "S390XCCMaskUint8":
return "s390x.CCMask"
case "S390XRotateParams":
diff --git a/src/cmd/compile/internal/ssa/jumptable.go b/src/cmd/compile/internal/ssa/jumptable.go
new file mode 100644
index 0000000..a89c2e5
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/jumptable.go
@@ -0,0 +1,376 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+ "cmd/compile/internal/base"
+ "cmd/internal/src"
+ "fmt"
+ "math"
+ "sort"
+)
+
+// We look for a group of blocks like this:
+//
+// b1 ----------> b2 --> t1
+// | |
+// v v
+// b3 --> t2 t3
+// |
+// v
+// t4
+//
+// Where all the b's control values are comparisons of the same value to
+// different constants. Also:
+// - None of the b's, except b1, can have any side effects.
+// For now, we forbid b2+ from having any code at all, except the comparison test.
+// - The constants must be fairly dense.
+//
+// This pattern matching handles both linear and binary search code generated
+// by the switch rewriting done in ../walk/switch.go.
+//
+// We convert that structure to a jump table.
+//
+// idx := swiched_on_value - min(constants)
+// if idx < 0 || idx > max(constants)-min(constants) { goto default }
+// jump to table[idx]
+//
+// table[i] contains the t that is branched to when the switched-on value
+// is equal to i+min(constants). Unmatched table entries are filled with default.
+func jumpTable(f *Func) {
+ switch f.Config.arch {
+ default:
+ // Most architectures can't do this (yet).
+ return
+ case "amd64":
+ }
+
+ // Find all blocks that do constant comparisons against the same value
+ // as their parent.
+ parent := map[*Block]*Block{}
+ for _, b := range f.Blocks {
+ if b.Kind != BlockIf {
+ continue
+ }
+ c := b.ControlValue(0)
+
+ // The only op in the block can be the control value.
+ if len(b.Values) != 1 {
+ continue
+ }
+ if b.Values[0] != c {
+ continue
+ }
+ if c.Op != OpEq64 && c.Op != OpNeq64 && c.Op != OpLeq64 && c.Op != OpLess64 { // TODO: Unsigned, {32,16,8}, and maybe String.
+ continue
+ }
+ if c.Uses != 1 {
+ continue
+ }
+ x, y := c.Args[0], c.Args[1]
+ if x.Op == OpConst64 {
+ if y.Op == OpConst64 {
+ // Can't handle const/const comparisons because later
+ // parts of this pass can't tell which is the variable
+ // and which is the constant.
+ continue
+ }
+ x, y = y, x
+ } else if y.Op != OpConst64 {
+ continue
+ }
+ value := x
+
+ // Look at predecessor block, see if it is branching on the same value.
+ if len(b.Preds) != 1 {
+ continue
+ }
+ p := b.Preds[0].b
+ if p.Kind != BlockIf {
+ continue
+ }
+ // Note: don't need len(p.Values)==1.
+ c = p.ControlValue(0)
+ if c.Op != OpEq64 && c.Op != OpNeq64 && c.Op != OpLeq64 && c.Op != OpLess64 {
+ continue
+ }
+ // Note: don't need c.Uses == 1
+ x, y = c.Args[0], c.Args[1]
+ if x.Op == OpConst64 {
+ if y.Op == OpConst64 {
+ continue
+ }
+ x, y = y, x
+ } else if y.Op != OpConst64 {
+ continue
+ }
+ if x != value {
+ // Both blocks must be comparing the same value against a constant.
+ continue
+ }
+ if p.Likely != BranchUnknown {
+ // Don't hide likely branch info by using a jump table.
+ continue
+ }
+
+ parent[b] = p
+ }
+
+ // Compress parent links to point directly to the root.
+ for {
+ changed := false
+ for b, p := range parent {
+ gp := parent[p]
+ if gp != nil {
+ parent[b] = gp
+ changed = true
+ }
+ }
+ if !changed {
+ break
+ }
+ }
+
+ // Make map from root to all the blocks in that group.
+ groups := map[*Block][]*Block{}
+ for b, p := range parent {
+ groups[p] = append(groups[p], b)
+ }
+
+ roots := make([]*Block, 0, len(groups))
+ for root, group := range groups {
+ groups[root] = append(groups[root], root) // add root to its own group
+ sort.Slice(group, func(i, j int) bool { // groups sort for determinism
+ return group[i].ID < group[j].ID
+ })
+ roots = append(roots, root)
+ }
+ // Sort groups by root ID for determinism.
+ sort.Slice(roots, func(i, j int) bool { // sort roots for determinism
+ return roots[i].ID < roots[j].ID
+ })
+
+ // getConst returns the constant compared against by a block.
+ getConst := func(b *Block) int64 {
+ v := b.ControlValue(0)
+ if v.Args[0].Op == OpConst64 {
+ return v.Args[0].AuxInt
+ }
+ return v.Args[1].AuxInt
+ }
+ // getVal returns the value that we're switching on.
+ getVal := func(b *Block) *Value {
+ v := b.ControlValue(0)
+ if v.Args[0].Op == OpConst64 {
+ return v.Args[1]
+ }
+ return v.Args[0]
+ }
+
+ // nextBlock returns the edge outgoing from b if the choice
+ // variable is equal to c.
+ nextBlock := func(b *Block, c int64) Edge {
+ d := getConst(b)
+ var r bool
+ switch b.ControlValue(0).Op {
+ case OpEq64:
+ r = c == d
+ case OpNeq64:
+ r = c != d
+ case OpLess64:
+ r = c < d
+ case OpLeq64:
+ r = c <= d
+ default:
+ // TODO: unsigned, <64 bit
+ f.Fatalf("unknown op %s", b.ControlValue(0).Op)
+ }
+ if r {
+ return b.Succs[0]
+ }
+ return b.Succs[1]
+ }
+
+ // This is the main loop, processing a group at a time.
+grouploop:
+ for _, root := range roots {
+ group := groups[root]
+ if f.pass.debug > 0 {
+ fmt.Printf("%s: processing root=%s group=%v\n", f.Name, root, group)
+ }
+
+ // TODO: keep track of more than min/max.
+ // For now, just min/max in signed domain. Later we can do
+ // unsigned domain, mask low bits, etc.
+ // Note: the constants are always int64, even if in the original
+ // source code they were uint64, or int16, or whatever.
+
+ // Figure out the range of constants we compare against.
+ var min, max int64
+ var cnt int
+ for _, b := range group {
+ c := getConst(b)
+ if cnt == 0 {
+ min = c
+ max = c
+ } else {
+ if c < min {
+ min = c
+ }
+ if c > max {
+ max = c
+ }
+ }
+ cnt++ // TODO: just include ==,!= or should we also count <,<= here?
+ }
+ if min == math.MinInt64 || max == math.MaxInt64 {
+ // We use these as sentinel values later. Abort if we compare against them.
+ continue
+ }
+ width := uint64(max - min + 1) // number of jump table entries
+
+ // Check to see if a jump table is appropriate.
+ if cnt < 4 {
+ // Not wide enough to make this worthwhile.
+ if f.pass.debug > 0 {
+ fmt.Printf(" abort: only %d equality tests\n", cnt)
+ }
+ continue
+ }
+ if width/4 > uint64(cnt) { // <25% full. TODO: what's the right number here?
+ if f.pass.debug > 0 {
+ fmt.Printf(" abort: density too small, %d out of %d\n", cnt, width)
+ }
+ continue
+ }
+
+ // groupExit returns the edge that exits the group if the
+ // choice variable is c.
+ groupExit := func(c int64) Edge {
+ b := root
+ for {
+ e := nextBlock(b, c)
+ if parent[e.b] != root {
+ // Note: branching back to the root is treated
+ // as leaving the group.
+ return e
+ }
+ b = e.b
+ }
+
+ }
+
+ // Find the places where we'll exit the group if the choice variable is
+ // not in [min,max]. We can do that with two probes, minint and maxint.
+ default_ := groupExit(math.MinInt64)
+ default2 := groupExit(math.MaxInt64)
+
+ // Skip ahead, to help detect when default_ and default2 are identical.
+ for default_.b.Kind == BlockPlain && len(default_.b.Values) == 0 {
+ default_ = default_.b.Succs[0]
+ }
+ for default2.b.Kind == BlockPlain && len(default2.b.Values) == 0 {
+ default2 = default2.b.Succs[0]
+ }
+
+ // Check that <min and >max defaults are the same.
+ if default_.b != default2.b {
+ if f.pass.debug > 0 {
+ fmt.Printf(" abort: default exits %s and %s are different\n", default_.b, default2.b)
+ }
+ continue
+ }
+ // Check for phi compatibility between the two default edges.
+ for _, v := range default_.b.Values {
+ if v.Op != OpPhi {
+ continue
+ }
+ if v.Args[default_.i] != v.Args[default2.i] {
+ // The two edges to the default block induce a different value
+ // of some phi op. Abandon the group.
+ if f.pass.debug > 0 {
+ fmt.Printf(" abort: default exits have different phi args: %v %d and %d\n", v, default_.i, default2.i)
+ }
+ continue grouploop
+ }
+ }
+ // TODO: we could use 2 different compare/branch in the bcb block below
+ // if the <min and >max destinations are different.
+ // (In that case we could do CMP X, $min; BLT default1; CMP X $max; BGT default2; LEAQ ?, JT; JMP (-8*min)(JT)(X*8))
+
+ //////////////////////////////////////////////////////////////
+ // At this point, we're committed to making the jump table. //
+ //////////////////////////////////////////////////////////////
+
+ // Build the jump table block itself.
+ jump := f.NewBlock(BlockJumpTable)
+ jump.Pos = root.Pos
+ // Add outgoing edges for each value in the table.
+ for c := min; c <= max; c++ {
+ e := groupExit(c)
+ if f.pass.debug > 0 {
+ fmt.Printf(" %d -> %s[%d]\n", c, e.b, e.i)
+ }
+ jump.Succs = append(jump.Succs, Edge{b: e.b, i: len(e.b.Preds)})
+ e.b.Preds = append(e.b.Preds, Edge{b: jump, i: len(jump.Succs) - 1})
+ for _, v := range e.b.Values {
+ if v.Op == OpPhi {
+ // Use the same phi argument for this edge as the
+ // original edge from the group to this block.
+ v.AddArg(v.Args[e.i])
+ }
+ }
+ }
+ if f.pass.debug > 0 {
+ fmt.Printf(" default_ -> %s[%d]\n", default_.b, default_.i)
+ }
+
+ // Build bounds check block.
+ // TODO: this pass is after prove, so if this comparison is obviously satisifiable (e.g. switch (x&3) { case 0: ... case 3: ... }).
+ // We might want to squash this bounds check. Or move this pass before prove.
+ bcb := f.NewBlock(BlockIf)
+ val := getVal(root)
+ minVal := f.Entry.NewValue0I(src.NoXPos, OpConst64, f.Config.Types.UInt64, min)
+ widthVal := f.Entry.NewValue0I(src.NoXPos, OpConst64, f.Config.Types.UInt64, int64(width))
+ idx := bcb.NewValue2(root.Pos, OpSub64, f.Config.Types.UInt64, val, minVal)
+ cmp := bcb.NewValue2(root.Pos, OpLess64U, f.Config.Types.Bool, idx, widthVal)
+ bcb.SetControl(cmp)
+ // bcb's true branch goes to the jump block.
+ bcb.Succs = append(bcb.Succs, Edge{b: jump, i: 0})
+ jump.Preds = append(jump.Preds, Edge{b: bcb, i: 0})
+ bcb.Likely = BranchLikely // TODO: assumes missing the table entirely is unlikely. True?
+ // bcb's false branch goes to the default block.
+ bcb.Succs = append(bcb.Succs, Edge{b: default_.b, i: len(default_.b.Preds)})
+ default_.b.Preds = append(default_.b.Preds, Edge{b: bcb, i: 1})
+ for _, v := range default_.b.Values {
+ if v.Op == OpPhi {
+ v.AddArg(v.Args[default_.i])
+ }
+ }
+
+ // The jump block uses the in-bounds index as its control value.
+ if base.Flag.Cfg.SpectreIndex {
+ idx = jump.NewValue2(root.Pos, OpSpectreIndex, f.Config.Types.UInt64, idx, widthVal)
+ }
+ jump.SetControl(idx)
+
+ // Modify the original root to unconditionally branch to the bounds check block.
+ // One of root's successors is guaranteed to be in the group. That successor block
+ // is easy to remove an edge from, because we know it has exactly 1 predecessor.
+ if parent[root.Succs[0].b] != root {
+ root.swapSuccessors()
+ }
+ // Always go to the bcb block.
+ root.Succs[0].b.Preds = root.Succs[0].b.Preds[:0] // remove incoming edge to root.Succs[0]
+ root.Succs[0] = Edge{b: bcb, i: 0} // add outgoing edge to bcb
+ bcb.Preds = append(bcb.Preds, Edge{b: root, i: 0}) // add incoming edge from root
+ root.Reset(BlockFirst) // we don't go to root.Succs[1] either
+
+ // At this point, the whole group except the root should be dead code, and the next deadcode
+ // pass will remove it all.
+
+ f.invalidateCFG()
+ }
+}
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 091f43f..14d7bf4 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -50,6 +50,7 @@
BlockAMD64NEF
BlockAMD64ORD
BlockAMD64NAN
+ BlockAMD64JUMPTABLE
BlockARMEQ
BlockARMNE
@@ -149,6 +150,7 @@
BlockRet
BlockRetJmp
BlockExit
+ BlockJumpTable
BlockFirst
)
@@ -172,22 +174,23 @@
Block386ORD: "ORD",
Block386NAN: "NAN",
- BlockAMD64EQ: "EQ",
- BlockAMD64NE: "NE",
- BlockAMD64LT: "LT",
- BlockAMD64LE: "LE",
- BlockAMD64GT: "GT",
- BlockAMD64GE: "GE",
- BlockAMD64OS: "OS",
- BlockAMD64OC: "OC",
- BlockAMD64ULT: "ULT",
- BlockAMD64ULE: "ULE",
- BlockAMD64UGT: "UGT",
- BlockAMD64UGE: "UGE",
- BlockAMD64EQF: "EQF",
- BlockAMD64NEF: "NEF",
- BlockAMD64ORD: "ORD",
- BlockAMD64NAN: "NAN",
+ BlockAMD64EQ: "EQ",
+ BlockAMD64NE: "NE",
+ BlockAMD64LT: "LT",
+ BlockAMD64LE: "LE",
+ BlockAMD64GT: "GT",
+ BlockAMD64GE: "GE",
+ BlockAMD64OS: "OS",
+ BlockAMD64OC: "OC",
+ BlockAMD64ULT: "ULT",
+ BlockAMD64ULE: "ULE",
+ BlockAMD64UGT: "UGT",
+ BlockAMD64UGE: "UGE",
+ BlockAMD64EQF: "EQF",
+ BlockAMD64NEF: "NEF",
+ BlockAMD64ORD: "ORD",
+ BlockAMD64NAN: "NAN",
+ BlockAMD64JUMPTABLE: "JUMPTABLE",
BlockARMEQ: "EQ",
BlockARMNE: "NE",
@@ -281,13 +284,14 @@
BlockS390XCLIJ: "CLIJ",
BlockS390XCLGIJ: "CLGIJ",
- BlockPlain: "Plain",
- BlockIf: "If",
- BlockDefer: "Defer",
- BlockRet: "Ret",
- BlockRetJmp: "RetJmp",
- BlockExit: "Exit",
- BlockFirst: "First",
+ BlockPlain: "Plain",
+ BlockIf: "If",
+ BlockDefer: "Defer",
+ BlockRet: "Ret",
+ BlockRetJmp: "RetJmp",
+ BlockExit: "Exit",
+ BlockJumpTable: "JumpTable",
+ BlockFirst: "First",
}
func (k BlockKind) String() string { return blockString[k] }
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index 2fe0ca6..6fedfdc 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -5,6 +5,7 @@
package ssa
import (
+ "cmd/compile/internal/base"
"cmd/compile/internal/logopt"
"cmd/compile/internal/types"
"cmd/internal/obj"
@@ -1943,3 +1944,7 @@
fcb.N = x < 0
return fcb.encode()
}
+
+func makeJumpTableSym(b *Block) *obj.LSym {
+ return base.Ctxt.Lookup(fmt.Sprintf("%s.%s.jump%d", types.LocalPkg.Prefix, b.Func.Name, b.ID))
+}
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 0c789d6..555d07b 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -3,10 +3,12 @@
package ssa
-import "internal/buildcfg"
-import "math"
-import "cmd/internal/obj"
-import "cmd/compile/internal/types"
+import (
+ "cmd/compile/internal/types"
+ "cmd/internal/obj"
+ "internal/buildcfg"
+ "math"
+)
func rewriteValueAMD64(v *Value) bool {
switch v.Op {
@@ -33762,6 +33764,7 @@
return false
}
func rewriteBlockAMD64(b *Block) bool {
+ typ := &b.Func.Config.Types
switch b.Kind {
case BlockAMD64EQ:
// match: (EQ (TESTL (SHLL (MOVLconst [1]) x) y))
@@ -34296,6 +34299,19 @@
b.resetWithControl(BlockAMD64NE, v0)
return true
}
+ case BlockJumpTable:
+ // match: (JumpTable idx)
+ // result: (JUMPTABLE {makeJumpTableSym(b)} (LEAQ <typ.Uintptr> {makeJumpTableSym(b)} (SB)) idx)
+ for {
+ idx := b.Controls[0]
+ v0 := b.NewValue0(idx.Pos, OpAMD64LEAQ, typ.Uintptr)
+ v0.Aux = symToAux(makeJumpTableSym(b))
+ v1 := b.NewValue0(idx.Pos, OpSB, typ.Uintptr)
+ v0.AddArg(v1)
+ b.resetWithControl2(BlockAMD64JUMPTABLE, v0, idx)
+ b.Aux = symToAux(makeJumpTableSym(b))
+ return true
+ }
case BlockAMD64LE:
// match: (LE (InvertFlags cmp) yes no)
// result: (GE cmp yes no)
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go
index 5a958a5..d1e26e5 100644
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@@ -6459,6 +6459,9 @@
// and where they would like to go.
Branches []Branch
+ // JumpTables remembers all the jump tables we've seen.
+ JumpTables []*ssa.Block
+
// bstart remembers where each block starts (indexed by block ID)
bstart []*obj.Prog
@@ -7007,6 +7010,20 @@
}
+ // Resolve jump table destinations.
+ for _, jt := range s.JumpTables {
+ // Convert from *Block targets to *Prog targets.
+ targets := make([]*obj.Prog, len(jt.Succs))
+ for i, e := range jt.Succs {
+ targets[i] = s.bstart[e.Block().ID]
+ }
+ // Add to list of jump tables to be resolved at assembly time.
+ // The assembler converts from *Prog entries to absolute addresses
+ // once it knows instruction byte offsets.
+ fi := pp.CurFunc.LSym.Func()
+ fi.JumpTables = append(fi.JumpTables, obj.JumpTable{Sym: jt.Aux.(*obj.LSym), Targets: targets})
+ }
+
if e.log { // spew to stdout
filename := ""
for p := pp.Text; p != nil; p = p.Link {
diff --git a/src/cmd/compile/internal/test/switch_test.go b/src/cmd/compile/internal/test/switch_test.go
new file mode 100644
index 0000000..3854bb0
--- /dev/null
+++ b/src/cmd/compile/internal/test/switch_test.go
@@ -0,0 +1,30 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import "testing"
+
+func BenchmarkSwitch(b *testing.B) {
+ x := 0
+ for i := 0; i < b.N; i++ {
+ switch x {
+ case 0:
+ x = 2
+ case 2:
+ x = 7
+ case 7:
+ x = 3
+ case 3:
+ x = 9
+ case 9:
+ x = 1
+ case 1:
+ x = 4
+ case 4:
+ x = 0
+ }
+ }
+ sink = x
+}
diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go
index abb3741..89b4a7f 100644
--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@@ -486,10 +486,16 @@
StackObjects *LSym
OpenCodedDeferInfo *LSym
ArgInfo *LSym // argument info for traceback
+ JumpTables []JumpTable
FuncInfoSym *LSym
}
+type JumpTable struct {
+ Sym *LSym
+ Targets []*Prog
+}
+
// NewFuncInfo allocates and returns a FuncInfo for LSym.
func (s *LSym) NewFuncInfo() *FuncInfo {
if s.Extra != nil {
diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go
index 6555756..3d28ede 100644
--- a/src/cmd/internal/obj/x86/asm6.go
+++ b/src/cmd/internal/obj/x86/asm6.go
@@ -2185,6 +2185,7 @@
return
}
}
+
// splice padding nops into Progs
for _, n := range nops {
pp := n.p
@@ -2229,6 +2230,14 @@
}
obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
}
+
+ // Now that we know byte offsets, we can generate jump table entries.
+ // TODO: could this live in obj instead of obj/$ARCH?
+ for _, jt := range s.Func().JumpTables {
+ for i, p := range jt.Targets {
+ jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc)
+ }
+ }
}
func instinit(ctxt *obj.Link) {
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Josh Bleecher Snyder.
1 comment:
Patchset:
Not sure if it is worth going with this yet, but I'm posting because it makes it easy for people to try it and see if it helps.
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Josh Bleecher Snyder, Keith Randall.
Keith Randall uploaded patch set #2 to this change.
17 files changed, 554 insertions(+), 33 deletions(-)
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Josh Bleecher Snyder, Keith Randall.
1 comment:
File src/cmd/compile/internal/ssa/jumptable.go:
Patch Set #2, Line 250: if width/4 > uint64(cnt) { // <25% full. TODO: what's the right number here?
Might be interesting, if splitting those groups can help. Maybe add a Todo?
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Keith Randall.
Patch set 2:Trust +1
43 comments:
Patchset:
nice. the assembler piece of this is much easier than i expected.
this is a first round of comments. let me know for the next round whether you're interested in a +2 to try to get it in for 1.18. (it's pretty close now.)
File src/cmd/compile/internal/ssa/block.go:
Patch Set #1, Line 160: // ConrolValue returns the nth control value of b.
ControlValue
File src/cmd/compile/internal/ssa/compile.go:
Patch Set #1, Line 487: {name: "jumptable", fn: jumpTable},
can you say a few words here or in the commit message about why this particular pass location? or add a few constraints to passOrder below?
File src/cmd/compile/internal/ssa/gen/AMD64.rules:
Patch Set #2, Line 2249: (JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} (LEAQ <typ.Uintptr> {makeJumpTableSym(b)} (SB)) idx)
maybe put next to other block lowering, like line 212. might even be nice to put block optimizations in their own files, to make these long rules files easier to follow (separately).
it's a bit unfortunate to make the sym twice. it's tempting to reach into the LEAQ to extract the sym when we need it.
File src/cmd/compile/internal/ssa/gen/AMD64Ops.go:
Patch Set #1, Line 954: // Aux is the symbol (a *obj.LSym) for the jump table (the thing control[0] is the address of)
if you doc Aux before control[0], then the parenthetical can go with control[0] and will be easier to follow
File src/cmd/compile/internal/ssa/gen/AMD64Ops.go:
Patch Set #2, Line 952: // control[0] is the address of the jump table
suggest switching control[0] and control[1] so that control[0] is idx regardless of whether the jump table is lowered or not.
File src/cmd/compile/internal/ssa/jumptable.go:
Patch Set #2, Line 28: // For now, we forbid b2+ from having any code at all, except the comparison test.
have you done some printf to find out what other kinds of values tend to reside in b2+? my experience working on short-circuit is that many of them are easy to handle.
Patch Set #2, Line 31: // This pattern matching handles both linear and binary search code generated
it's a bit unfortunate to have a bunch of code to generate this search code and then also a bunch of code to detect it. it does let us generate a jump table for (fairly specific) code that isn't a switch, but it feels like we're doing a lot more work than necessary.
Patch Set #2, Line 32: // by the switch rewriting done in ../walk/switch.go.
have you tested that it handles "integer range" switches like
switch x {
case 1,2,3,4:
// ...
case 5,6,7:
// ...
}those integer range switches generate different code.
Patch Set #2, Line 34: // We convert that structure to a jump table.
it might be preferable to include the default as the final entry in the jump table.
idx := switch_exp - min(constants)
if idx > 0 || idx > max(constants)-min(constants) {
idx = len(jumptable)
}
jump to table[idx]
ideally that'd compile to a conditional move followed by a jump, without any intervening branches. it might also provide a cleaner handling of multiple defaults.
Patch Set #2, Line 36: // idx := swiched_on_value - min(constants)
switched_on_value
but see comments below about how to name this
Patch Set #2, Line 51: // as their parent.
explain why. e.g. add that we are searching for treelike structures as in the graph above.
Patch Set #2, Line 63: if b.Values[0] != c {
consider adding a check for c.Block != b. it's redundant now, but it'll be nice to have if/when we accept other values.
Patch Set #2, Line 66: if c.Op != OpEq64 && c.Op != OpNeq64 && c.Op != OpLeq64 && c.Op != OpLess64 { // TODO: Unsigned, {32,16,8}, and maybe String.
switch c.Op
strings get decomposed into length and pointer, starting with length, so this might just work for the top level search for strings. it'd be interesting to check.
Patch Set #2, Line 73: if x.Op == OpConst64 {
i think this would be clearer without the nested ifs, e.g.
if x.Op == OpConst64 {
x, y = y, x
}
if y.Op != OpConst64 {
// Neither x nor y is const.
continue
}
if x.Op == OpConst64 {
// Both x and y are const.
// Can't handle ...
continue
}but do we even have const/const comparisons at this point? i'd expect previous rewrite passes to optimize those away.
Patch Set #2, Line 84: value := x
value is overloaded in this context. i don't have any great ideas. maybe exp for "switch expression"?
Patch Set #2, Line 86: // Look at predecessor block, see if it is branching on the same value.
consider moving this check (lines 86-93) earlier, since it is cheap. and it would put all the cfg checks together.
i've been bitten one too many times by ascii cfgs not matching the cfg-checking code; it's nice to have it all in one place so you can stare at it hard and convince yourself that the code actually enforces the pretty graphs. (i haven't managed to do that yet in this particular case, although it looks fine on a first pass. not sure about possible cycles.)
Patch Set #2, Line 90: p := b.Preds[0].b
are there cases in which there are intervening BlockPlains? (thinking about other possible equivalent cfgs that look superficially different.)
Patch Set #2, Line 94: // Note: don't need len(p.Values)==1.
say why. it's not obvious to me, given the compression of parent links that we do next.
Patch Set #2, Line 96: if c.Op != OpEq64 && c.Op != OpNeq64 && c.Op != OpLeq64 && c.Op != OpLess64 {
switch c.Op
Patch Set #2, Line 99: // Note: don't need c.Uses == 1
say why
Patch Set #2, Line 101: if x.Op == OpConst64 {
as above. and maybe refactor some of this out into a helper function.
Patch Set #2, Line 121: // Compress parent links to point directly to the root.
does this do the right thing if there are cycles?
should we detect cycles in groups and remove such groups?
Patch Set #2, Line 150: // Sort groups by root ID for determinism.
duplicate of line comment on next line; delete one of them
Patch Set #2, Line 156: getConst := func(b *Block) int64 {
if you combine getConst and getVal into a single method that returns two values (const, val), you can use it above as well when checking for suitability. naming it is hard, though. optional.
Patch Set #2, Line 172: // nextBlock returns the edge outgoing from b if the choice
"choice variable" -- let's find a consistent terminology and use it, and introduce it during the top level docs. choice variable seems fine, although it doesn't lend itself to short variable names. (switch expression? jump expression?)
Patch Set #2, Line 175: d := getConst(b)
ctrl := b.ControlValue(0) and use it below
Patch Set #2, Line 244: // Not wide enough to make this worthwhile.
there are two factors: width and depth (max possible number of comparisons). it's not obvious to me which matters more. but 4 is such a low cutoff that in practice it's probably irrelevant.
Patch Set #2, Line 250: if width/4 > uint64(cnt) { // <25% full. TODO: what's the right number here?
one way to pick a number is to do whatever empirically minimizes total executable size over a range of executables.
Patch Set #2, Line 264: // Note: branching back to the root is treated
another reason to chunk out cyclical groups at some point above
Patch Set #2, Line 274: // not in [min,max]. We can do that with two probes, minint and maxint.
this is fine for now, but we'll end up having to rework this if we start to do other clever things like masking off bits. at some point (not necessarily now), it'd be nice to refactor some of this from one big function into a smaller named data types (groups) with methods (exit).
Patch Set #2, Line 275: default_ := groupExit(math.MinInt64)
default1, to match default2? or defaultMin and defaultMax? or defaultLo and defaultHi?
Patch Set #2, Line 298: if v.Args[default_.i] != v.Args[default2.i] {
depending on exactly what phase we are at, one of these args could easily be a copy of the other instead of strictly identical.
Patch Set #2, Line 312: // At this point, we're committed to making the jump table. //
optional suggestion: split this loop into two. in the first part (above), build a list of groups we want to process. in the second part, process only those. that'll likely make future refactoring easier.
Patch Set #2, Line 339: // TODO: this pass is after prove, so if this comparison is obviously satisifiable (e.g. switch (x&3) { case 0: ... case 3: ... }).
i had a hard time parsing this. i think you want to remove the trailing period here and s/We/we/ on the next line.
Patch Set #2, Line 340: // We might want to squash this bounds check. Or move this pass before prove.
many such trivial bounds check removals are handled during rewrite rules. and if these relevant ones are not, we should add them there, which will be useful for other reasons as well.
Patch Set #2, Line 370: // is easy to remove an edge from, because we know it has exactly 1 predecessor.
a before/after ascii cfg chart here would be nice
File src/cmd/compile/internal/ssa/rewrite.go:
Patch Set #2, Line 1949: return base.Ctxt.Lookup(fmt.Sprintf("%s.%s.jump%d", types.LocalPkg.Prefix, b.Func.Name, b.ID))
i don't remember offhand. is base.Ctxt.Lookup concurrency-safe?
File src/cmd/compile/internal/ssa/rewriteAMD64.go:
Patch Set #2, Line 6: import (
is the newly grouped+sorted imports because of a change to gofmt? or because you hit save and goimports/gopls rewrote it? if the former, can we re-generate all the architectures in a separate cl?
File src/cmd/compile/internal/ssagen/ssa.go:
Patch Set #2, Line 7021: // The assembler converts from *Prog entries to absolute addresses
naive question: how does using absolute jump addresses compare to storing offsets and using PC-relative jumps?
File src/cmd/internal/obj/link.go:
Patch Set #2, Line 494: type JumpTable struct {
short doc here please
File src/cmd/internal/obj/x86/asm6.go:
Patch Set #2, Line 2235: // TODO: could this live in obj instead of obj/$ARCH?
seems like it could. may as well start with it there.
Patch Set #2, Line 2238: jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc)
add a few words, like
// The ith jumptable entry points to offset p.Pc in the function symbol s.
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Keith Randall.
1 comment:
File src/cmd/compile/internal/ssa/jumptable.go:
Patch Set #2, Line 34: // We convert that structure to a jump table.
it might be preferable to include the default as the final entry in the jump table. […]
(and if the relevant default is already in the jump table, you can use that as the index instead of adding a final entry)
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Josh Bleecher Snyder.
38 comments:
File src/cmd/compile/internal/ssa/block.go:
Patch Set #1, Line 160: // ConrolValue returns the nth control value of b.
ControlValue
Done
File src/cmd/compile/internal/ssa/compile.go:
Patch Set #1, Line 487: {name: "jumptable", fn: jumpTable},
can you say a few words here or in the commit message about why this particular pass location? or ad […]
I put them reasonably late, but before lowering as they are a generic pass.
I added one edge to passOrder.
File src/cmd/compile/internal/ssa/gen/AMD64.rules:
Patch Set #2, Line 2249: (JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} (LEAQ <typ.Uintptr> {makeJumpTableSym(b)} (SB)) idx)
maybe put next to other block lowering, like line 212. […]
Yep. It would be nice to have a block of code executed on successful rule match that could evaluate things once and put the result in variables, so the RHS could use them.
Not really worth it for just this. base.Ctxt.Lookup does the right thing on repeated accesses. We just have to build the string twice.
I thought about reaching into the LEAQ, but there's always the possibility it gets spilled/restored/whatever, and I'd rather not introduce that complication (although it would probably work, LEAQ is kinda special).
File src/cmd/compile/internal/ssa/gen/AMD64Ops.go:
Patch Set #1, Line 954: // Aux is the symbol (a *obj.LSym) for the jump table (the thing control[0] is the address of)
if you doc Aux before control[0], then the parenthetical can go with control[0] and will be easier t […]
Done
File src/cmd/compile/internal/ssa/gen/AMD64Ops.go:
Patch Set #2, Line 952: // control[0] is the address of the jump table
suggest switching control[0] and control[1] so that control[0] is idx regardless of whether the jump […]
Done
File src/cmd/compile/internal/ssa/jumptable.go:
Patch Set #2, Line 28: // For now, we forbid b2+ from having any code at all, except the comparison test.
have you done some printf to find out what other kinds of values tend to reside in b2+? my experienc […]
I haven't looked at why the jump table fails due to other things being in the block. The switch table generator in walk doesn't put anything there.
Something to investigate for the future (there are lots of these in this CL).
Patch Set #2, Line 31: // This pattern matching handles both linear and binary search code generated
it's a bit unfortunate to have a bunch of code to generate this search code and then also a bunch of […]
I don't think there's a lot of work involved in detecting the output of walk's switch generator. At least big-O wise, it's pretty easy.
I looked into doing it in other ways, but it's tricky because if we abort here for some reason, we want the old binary search behavior. So either walk and this pass need the same decision procedure (hard), or we make walk introduce the jump table (new Nodes for jump tables, etc.). None of those options seemed particularly palatable.
Patch Set #2, Line 32: // by the switch rewriting done in ../walk/switch.go.
have you tested that it handles "integer range" switches like […]
Good point, I don't think that gets handled correctly at the moment. walk generates uint64(x-c) <= d for those kinds of cases.
(It's *correct*, but the jump table isn't generated in that case.)
I'll see if I can handle those in a followup CL. Definitely the kind of case this optimization should handle.
Patch Set #2, Line 34: // We convert that structure to a jump table.
(and if the relevant default is already in the jump table, you can use that as the index instead of […]
I worry that the "not any of the cases" branch would be very predictable (hence the unlikely annotation I used on it) and thus not better than a conditional move.
Patch Set #2, Line 51: // as their parent.
explain why. e.g. add that we are searching for treelike structures as in the graph above.
Done
Patch Set #2, Line 63: if b.Values[0] != c {
consider adding a check for c.Block != b. […]
Done
Patch Set #2, Line 66: if c.Op != OpEq64 && c.Op != OpNeq64 && c.Op != OpLeq64 && c.Op != OpLess64 { // TODO: Unsigned, {32,16,8}, and maybe String.
switch c.Op […]
Done.
Yep, it generates really nice code already for constant string cases. It knows the length and just does the CMPx directly with memory in each length case to check the contents.
Patch Set #2, Line 73: if x.Op == OpConst64 {
i think this would be clearer without the nested ifs, e.g. […]
It should get optimized away, except possibly with -N, and then this pass won't run anyway.
But I'd rather not make assumptions about input that we don't need to make. If you did -N and -d=ssa/jumptable/on or something, it might barf or generate bad code. Better to be safe.
Patch Set #2, Line 86: // Look at predecessor block, see if it is branching on the same value.
consider moving this check (lines 86-93) earlier, since it is cheap. […]
Done
Patch Set #2, Line 90: p := b.Preds[0].b
are there cases in which there are intervening BlockPlains? (thinking about other possible equivalen […]
I haven't seen any yet.
Not that I've been looking terribly hard.
Patch Set #2, Line 94: // Note: don't need len(p.Values)==1.
say why. it's not obvious to me, given the compression of parent links that we do next.
Done
Patch Set #2, Line 96: if c.Op != OpEq64 && c.Op != OpNeq64 && c.Op != OpLeq64 && c.Op != OpLess64 {
switch c. […]
Done
Patch Set #2, Line 99: // Note: don't need c.Uses == 1
say why
Done
Patch Set #2, Line 101: if x.Op == OpConst64 {
as above. and maybe refactor some of this out into a helper function.
Done
Patch Set #2, Line 121: // Compress parent links to point directly to the root.
does this do the right thing if there are cycles? […]
Cycles can't happen because then len(b.Preds)==1 for every block in the cycle. Then it is unreachable. Maybe unreachable cycles are gone by now? Maybe worth detecting something in case deadcode has been introduced since the last deadcode pass.
Patch Set #2, Line 150: // Sort groups by root ID for determinism.
duplicate of line comment on next line; delete one of them
Done
Patch Set #2, Line 156: getConst := func(b *Block) int64 {
if you combine getConst and getVal into a single method that returns two values (const, val), you ca […]
I think I'll leave them as is - the names are good. (At least, getConst is good. getVal has the naming problem discussed elsewhere.)
Patch Set #2, Line 175: d := getConst(b)
ctrl := b. […]
Done
Patch Set #2, Line 244: // Not wide enough to make this worthwhile.
there are two factors: width and depth (max possible number of comparisons). […]
Yes, this probably needs some tuning and/or better heuristics altogether. This CL is mostly introducing the mechanism. I was thinking of upping the constant just to make sure when it triggers, it is *really* obvious that it is worthwhile. We can then ratchet down as we get more evidence.
Patch Set #2, Line 250: if width/4 > uint64(cnt) { // <25% full. TODO: what's the right number here?
one way to pick a number is to do whatever empirically minimizes total executable size over a range […]
Ack
Patch Set #2, Line 264: // Note: branching back to the root is treated
another reason to chunk out cyclical groups at some point above
Edges back to the root do happen. They are different from cycles above, because the root can have more than one predecessor.
Patch Set #2, Line 274: // not in [min,max]. We can do that with two probes, minint and maxint.
this is fine for now, but we'll end up having to rework this if we start to do other clever things l […]
Ack
Patch Set #2, Line 275: default_ := groupExit(math.MinInt64)
default1, to match default2? or defaultMin and defaultMax? or defaultLo and defaultHi?
Done
Patch Set #2, Line 298: if v.Args[default_.i] != v.Args[default2.i] {
depending on exactly what phase we are at, one of these args could easily be a copy of the other ins […]
Certainly possible. But safe at least. I'll leave a TODO here.
Patch Set #2, Line 339: // TODO: this pass is after prove, so if this comparison is obviously satisifiable (e.g. switch (x&3) { case 0: ... case 3: ... }).
i had a hard time parsing this. […]
Done
Patch Set #2, Line 340: // We might want to squash this bounds check. Or move this pass before prove.
many such trivial bounds check removals are handled during rewrite rules. […]
Ack
Patch Set #2, Line 370: // is easy to remove an edge from, because we know it has exactly 1 predecessor.
a before/after ascii cfg chart here would be nice
Done
File src/cmd/compile/internal/ssa/rewrite.go:
Patch Set #2, Line 1949: return base.Ctxt.Lookup(fmt.Sprintf("%s.%s.jump%d", types.LocalPkg.Prefix, b.Func.Name, b.ID))
i don't remember offhand. is base.Ctxt. […]
Yes, it is. Unlike objw.Global, which isn't, and took me a while to figure out 😞
File src/cmd/compile/internal/ssa/rewriteAMD64.go:
Patch Set #2, Line 6: import (
is the newly grouped+sorted imports because of a change to gofmt? or because you hit save and goimpo […]
Yeah, this is version skew in gofmt. I'll back out the formatting in this change. Might be worth waiting for a first-in-1.19 CL to switch over.
File src/cmd/compile/internal/ssagen/ssa.go:
Patch Set #2, Line 7021: // The assembler converts from *Prog entries to absolute addresses
naive question: how does using absolute jump addresses compare to storing offsets and using PC-relat […]
Not sure exactly what you are intending. But gcc, for example, uses tables with 4-byte entries and does LEAQ to materialize a PC, MOVDQSX to load the offset, ADDQ, then JMP.
There's no pc-relative jump that takes a non-constant distance.
This scheme seemed easier. Fewer instructions, but larger table. I don't think there is an easy answer to which is better.
File src/cmd/internal/obj/link.go:
Patch Set #2, Line 494: type JumpTable struct {
short doc here please
Done
File src/cmd/internal/obj/x86/asm6.go:
Patch Set #2, Line 2235: // TODO: could this live in obj instead of obj/$ARCH?
seems like it could. may as well start with it there.
The thing I worry about is that jump tables for other archs might want different layout, e.g. deltas from the current PC, or start of function, instead of absolute addresses.
In other words, if the instructions to use the table are built in amd64/ssa.go, then the code to build the table should probably also be arch-specific.
But I'm not sure if that's worrying about a future that isn't a real problem.
Patch Set #2, Line 2238: jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc)
add a few words, like […]
Done
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Josh Bleecher Snyder.
Keith Randall uploaded patch set #3 to this change.
17 files changed, 581 insertions(+), 29 deletions(-)
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Keith Randall.
Patch set 3:Code-Review +2Trust +1
9 comments:
Commit Message:
Patch Set #3, Line 10: Switch-16 1.67ns ± 1% 1.17ns ± 1% -30.11% (p=0.000 n=10+9)
Thinking out loud, this is the best possible scenario, because the jump table stays in the cache. I wonder whether we can measure the performance impact with a cold cache.
...which also makes me wonder whether we could/should put the jump table at the end of the function. But that's going to be hard.
File src/cmd/compile/internal/ssa/jumptable.go:
Patch Set #2, Line 31: // This pattern matching handles both linear and binary search code generated
I don't think there's a lot of work involved in detecting the output of walk's switch generator. […]
Ack
Patch Set #2, Line 32: // by the switch rewriting done in ../walk/switch.go.
Good point, I don't think that gets handled correctly at the moment. […]
Yep, although only if d - c is small, otherwise we replace a single comparison with a large jump table. (This is, incidentally, part of what I meant when I commented that the heuristic to use a jump table should depend not just on the density of the cases and the width of the jump table but also the depth, i.e. the max number of comparisons required to implement without the jump table.)
Anyway, leaving for a follow-up seems fine.
Patch Set #2, Line 34: // We convert that structure to a jump table.
I worry that the "not any of the cases" branch would be very predictable (hence the unlikely annotat […]
The branch predictor is a global resource; even perfectly predictable branches have costs, even though those costs are non-local. Unless it causes a local performance regression, I'd strongly favor using a CMOV.
Certainly OK either way as a follow-up, though, if you'd like to get this in.
Patch Set #2, Line 121: // Compress parent links to point directly to the root.
Cycles can't happen because then len(b.Preds)==1 for every block in the cycle. […]
Add a short comment to that effect? And maybe a TODO?
Patch Set #2, Line 274: // not in [min,max]. We can do that with two probes, minint and maxint.
Ack
(In particular, the thing I'd be most excited to see in a follow-up is probably masking off some bits of the type hash and using a jump table for small type switches.)
File src/cmd/compile/internal/ssa/jumptable.go:
Patch Set #3, Line 337: // v _| _|
Nit: Why _| instead of \ here? Makes me nervous that I'm misreading the diagram somehow.
Edit: Oh, I see, those are arrows. I'd rather just use | and - and \, and trust the user to understand. Or use a few words, like "all edges are directional and run top-left to bottom-right". Or go unicode and use ↘, ↓, and →.
File src/cmd/compile/internal/ssagen/ssa.go:
Patch Set #2, Line 7021: // The assembler converts from *Prog entries to absolute addresses
Not sure exactly what you are intending. […]
Yeah, I was thinking of gcc's approach. I was mainly curious how you'd decided; I agree that there's no obvious correct approach.
File src/cmd/internal/obj/x86/asm6.go:
Patch Set #2, Line 2235: // TODO: could this live in obj instead of obj/$ARCH?
The thing I worry about is that jump tables for other archs might want different layout, e.g. […]
I'd rather start naively idealistic and then have to refactor as we encounter issues with other architectures than pessimize from the start. But that's probably as much a personality characteristic as a technical judgement. :)
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Keith Randall.
1 comment:
Patchset:
Might also add a 'const go118UseJumpTables = true' early exit to the jumptable pass, since this is happening fairly late in the cycle.
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
1 comment:
Patchset:
I think I'm going to leave this for next cycle. No hurry, and there's definitely more to do.
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Keith Randall.
Patch set 3:-Code-Review
1 comment:
Patchset:
I think I'm going to leave this for next cycle. No hurry, and there's definitely more to do.
Ack. I'll remove the +2 for now to avoid accidental submission.
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Keith Randall.
Keith Randall uploaded patch set #4 to this change.
cmd/compile: implement jump tables
Performance is kind of hard to exactly quantify.
One big difference between jump tables and the old binary search
scheme is that there's only 1 branch statement instead of O(n) of
them. That can be both a blessing and a curse, and can make evaluating
jump tables very hard to do.
The single branch can become a choke point for the hardware branch
predictor. A branch table jump must fit all of its state in a single
branch predictor entry (technically, a branch target predictor entry).
With binary search that predictor state can be spread among lots of
entries. In cases where the case selection is repetitive and thus
predictable, binary search can perform better.
The big win for a jump table is that it doesn't consume so much of the
branch predictor's resources. But that benefit is essentially never
observed in microbenchmarks, because the branch predictor can easily
keep state for all the binary search branches in a microbenchmark. So
that benefit is really hard to measure.
So predictable switch microbenchmarks are ~useless - they will almost
always favor the binary search scheme. Fully unpredictable switch
microbenchmarks are better, as they are aren't lying to us quite so
much. In a perfectly unpredictable situation, a jump table will expect
to incur 1-1/N branch mispredicts, where a binary search would incur
lg(N)/2 of them. That makes the crossover point at about N=4. But of
course switches in real programs are seldom fully unpredictable, so
we'll use a higher crossover point.
Beyond the branch predictor, jump tables tend to execute more
instructions per switch but have no additional instructions per case,
which also argues for a larger crossover.
As far as code size goes, with this CL cmd/go has a slightly smaller
code segment and a slightly larger overall size (from the jump tables
themselves which live in the data segment).
This is a case where some FDO (feedback-directed optimization) would
be really nice to have. #28262
Some large-program benchmarks might help make the case for this
CL. Especially if we can turn on branch mispredict counters so we can
see how much using jump tables can free up branch prediction resources
that can be gainfully used elsewhere in the program.
name old time/op new time/op delta
Switch8Predictable-8 1.68ns ± 6% 1.28ns ± 2% -23.63% (p=0.000 n=10+10)
Switch8Unpredictable-8 9.44ns ±11% 7.21ns ±11% -23.61% (p=0.000 n=10+10)
Switch32Predictable-8 2.20ns ± 4% 1.40ns ± 0% -36.13% (p=0.000 n=10+8)
Switch32Unpredictable-8 10.3ns ± 2% 7.4ns ± 4% -27.45% (p=0.000 n=10+10)
Fixes #5496
Update #34381
Change-Id: I3ff56011d02be53f605ca5fd3fb96b905517c34f
---
M src/cmd/compile/internal/amd64/ssa.go
M src/cmd/compile/internal/gc/obj.go
M src/cmd/compile/internal/ir/node.go
M src/cmd/compile/internal/ir/node_gen.go
M src/cmd/compile/internal/ir/op_string.go
M src/cmd/compile/internal/ir/stmt.go
M src/cmd/compile/internal/ssa/block.go
M src/cmd/compile/internal/ssa/check.go
M src/cmd/compile/internal/ssa/gen/AMD64.rules
M src/cmd/compile/internal/ssa/gen/AMD64Ops.go
M src/cmd/compile/internal/ssa/gen/genericOps.go
M src/cmd/compile/internal/ssa/gen/rulegen.go
M src/cmd/compile/internal/ssa/opGen.go
M src/cmd/compile/internal/ssa/rewrite.go
M src/cmd/compile/internal/ssa/rewriteAMD64.go
M src/cmd/compile/internal/ssagen/ssa.go
A src/cmd/compile/internal/test/switch_test.go
M src/cmd/compile/internal/walk/stmt.go
M src/cmd/compile/internal/walk/switch.go
M src/cmd/internal/obj/link.go
M src/cmd/internal/obj/x86/asm6.go
M src/cmd/internal/sys/arch.go
22 files changed, 484 insertions(+), 40 deletions(-)
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Keith Randall.
Keith Randall uploaded patch set #5 to this change.
Switch8Predictable 1.89ns ± 2% 1.27ns ± 3% -32.58% (p=0.000 n=9+10)
Switch8Unpredictable 9.33ns ± 1% 7.50ns ± 1% -19.60% (p=0.000 n=10+9)
Switch32Predictable 2.20ns ± 2% 1.64ns ± 1% -25.39% (p=0.000 n=10+9)
Switch32Unpredictable 10.0ns ± 2% 7.6ns ± 2% -24.04% (p=0.000 n=10+10)
22 files changed, 483 insertions(+), 40 deletions(-)
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Cherry Mui.
Patch set 5:Trust +1
Attention is currently required from: Josh Bleecher Snyder, Keith Randall.
Patch set 5:Code-Review +2
8 comments:
Commit Message:
Patch Set #5, Line 31: are aren't
Typo.
Patchset:
Might also add a 'const go118UseJumpTables = true' early exit to the jumptable pass, since this is h […]
This may be a good idea for 1.19 also.
Patchset:
Do we want to disable jump table if -spectre=ret is specified, which I think is to avoid indirect jumps?
File src/cmd/compile/internal/ssa/block.go:
Patch Set #5, Line 160: // ControlValue returns the nth control value of b.
Any reason to define a method instead of directly using b.Controls[n]?
File src/cmd/compile/internal/ssa/rewrite.go:
Patch Set #5, Line 1955: return base.Ctxt.Lookup(fmt.Sprintf("%s.%s.jump%d", types.LocalPkg.Prefix, b.Func.Name, b.ID))
Will this ensure uniqueness if the function being compiled isn't actually in the current package? (E.g. generated functions? Instantiation?)
File src/cmd/compile/internal/walk/switch.go:
Patch Set #5, Line 244: jumpTableCases
This should be minCases? jumpTableCases doesn't seem to appear anywhere else.
Also next line.
Patch Set #5, Line 262: constant.BinaryOp(constant.MakeInt64(int64(len(cc))), token.MUL, constant.MakeInt64(minDensity))
We could directly do len(cc)*minDensity here. Or we want to avoid overflow here? If it overflows, we probably shouldn't use jump table anyway.
File src/cmd/internal/obj/x86/asm6.go:
Patch Set #2, Line 2235: // TODO: could this live in obj instead of obj/$ARCH?
I'd rather start naively idealistic and then have to refactor as we encounter issues with other arch […]
I'm fine with starting here. But would it be better to use relative PCs if possible? That would reduce dynamic relocations for PIE binaries.
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Josh Bleecher Snyder.
8 comments:
Commit Message:
Patch Set #5, Line 31: are aren't
Typo.
Done
Patchset:
This may be a good idea for 1.19 also.
Done
Patchset:
Do we want to disable jump table if -spectre=ret is specified, which I think is to avoid indirect ju […]
-spectre=ret is really targeted at disabling the return address predictor, not the indirect jump (or call) predictor. For instance, every interface call is also an indirect jump.
I think there is a spectre issue here, but it's no worse than other indirect jumps we use.
That said, if we do add spectre mitigation for indirect jumps, if jump tables are a problem it is easy to just disable them.
File src/cmd/compile/internal/ssa/block.go:
Patch Set #5, Line 160: // ControlValue returns the nth control value of b.
Any reason to define a method instead of directly using b. […]
Not really. Removed and used b.Controls[n].
File src/cmd/compile/internal/ssa/rewrite.go:
Patch Set #5, Line 1955: return base.Ctxt.Lookup(fmt.Sprintf("%s.%s.jump%d", types.LocalPkg.Prefix, b.Func.Name, b.ID))
Will this ensure uniqueness if the function being compiled isn't actually in the current package? (E […]
Maybe I should use the function's LSym, which is what we use for, e.g., arginfo symbols.
File src/cmd/compile/internal/walk/switch.go:
Patch Set #5, Line 244: jumpTableCases
This should be minCases? jumpTableCases doesn't seem to appear anywhere else. […]
Done
Patch Set #5, Line 262: constant.BinaryOp(constant.MakeInt64(int64(len(cc))), token.MUL, constant.MakeInt64(minDensity))
We could directly do len(cc)*minDensity here. […]
Done
File src/cmd/internal/obj/x86/asm6.go:
Patch Set #2, Line 2235: // TODO: could this live in obj instead of obj/$ARCH?
I'm fine with starting here. […]
We could do relative PCs, but it might take an extra instruction or 2. Relative PCs might let us use 4-byte instead of 8-byte entries, which would be good.
I'm going to leave it as is for now.
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Josh Bleecher Snyder.
Keith Randall uploaded patch set #6 to this change.
cmd/compile: implement jump tables
Performance is kind of hard to exactly quantify.
One big difference between jump tables and the old binary search
scheme is that there's only 1 branch statement instead of O(n) of
them. That can be both a blessing and a curse, and can make evaluating
jump tables very hard to do.
The single branch can become a choke point for the hardware branch
predictor. A branch table jump must fit all of its state in a single
branch predictor entry (technically, a branch target predictor entry).
With binary search that predictor state can be spread among lots of
entries. In cases where the case selection is repetitive and thus
predictable, binary search can perform better.
The big win for a jump table is that it doesn't consume so much of the
branch predictor's resources. But that benefit is essentially never
observed in microbenchmarks, because the branch predictor can easily
keep state for all the binary search branches in a microbenchmark. So
that benefit is really hard to measure.
So predictable switch microbenchmarks are ~useless - they will almost
always favor the binary search scheme. Fully unpredictable switch
microbenchmarks are better, as they aren't lying to us quite so
M src/cmd/compile/internal/ssa/check.go
M src/cmd/compile/internal/ssa/config.go
M src/cmd/compile/internal/ssa/export_test.go
M src/cmd/compile/internal/ssa/gen/AMD64.rules
M src/cmd/compile/internal/ssa/gen/AMD64Ops.go
M src/cmd/compile/internal/ssa/gen/genericOps.go
M src/cmd/compile/internal/ssa/gen/rulegen.go
M src/cmd/compile/internal/ssa/opGen.go
M src/cmd/compile/internal/ssa/rewrite.go
M src/cmd/compile/internal/ssa/rewriteAMD64.go
M src/cmd/compile/internal/ssagen/ssa.go
A src/cmd/compile/internal/test/switch_test.go
M src/cmd/compile/internal/walk/stmt.go
M src/cmd/compile/internal/walk/switch.go
M src/cmd/internal/obj/link.go
M src/cmd/internal/obj/x86/asm6.go
M src/cmd/internal/sys/arch.go
23 files changed, 489 insertions(+), 40 deletions(-)
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Josh Bleecher Snyder, Keith Randall.
Patch set 6:Code-Review +2
2 comments:
Patchset:
-spectre=ret is really targeted at disabling the return address predictor, not the indirect jump (or […]
I think -spectre=ret turns indirect jumps to retpolines. The code at https://cs.opensource.google/go/go/+/master:src/cmd/internal/obj/x86/asm6.go;l=2075 doesn't seem to handle indexed jump.
File src/cmd/compile/internal/ssa/rewrite.go:
Patch Set #5, Line 1955: return base.Ctxt.Lookup(fmt.Sprintf("%s.%s.jump%d", types.LocalPkg.Prefix, b.Func.Name, b.ID))
Maybe I should use the function's LSym, which is what we use for, e.g., arginfo symbols.
If the function being compiled is DUPOK, the table also needs to be DUPOK, right?
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Josh Bleecher Snyder, Cherry Mui.
1 comment:
File src/cmd/compile/internal/ssa/rewrite.go:
Patch Set #5, Line 1955: return base.Ctxt.Lookup(fmt.Sprintf("%s.%s.jump%d", types.LocalPkg.Prefix, b.Func.Name, b.ID))
If the function being compiled is DUPOK, the table also needs to be DUPOK, right?
Yes, good catch.
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Josh Bleecher Snyder, Cherry Mui.
Keith Randall uploaded patch set #7 to this change.
23 files changed, 491 insertions(+), 40 deletions(-)
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Josh Bleecher Snyder, Keith Randall.
Patch set 7:Code-Review +2
1 comment:
Patchset:
LGTM.
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Josh Bleecher Snyder, Keith Randall.
Patch set 7:Code-Review +1
Keith Randall submitted this change.
Reviewed-on: https://go-review.googlesource.com/c/go/+/357330
Run-TryBot: Keith Randall <k...@golang.org>
TryBot-Result: Gopher Robot <go...@golang.org>
Reviewed-by: Cherry Mui <cher...@google.com>
Reviewed-by: Keith Randall <k...@google.com>
---
M src/cmd/compile/internal/amd64/ssa.go
M src/cmd/compile/internal/gc/obj.go
M src/cmd/compile/internal/ir/node.go
M src/cmd/compile/internal/ir/node_gen.go
M src/cmd/compile/internal/ir/op_string.go
M src/cmd/compile/internal/ir/stmt.go
M src/cmd/compile/internal/ssa/check.go
M src/cmd/compile/internal/ssa/config.go
M src/cmd/compile/internal/ssa/export_test.go
M src/cmd/compile/internal/ssa/gen/AMD64.rules
M src/cmd/compile/internal/ssa/gen/AMD64Ops.go
M src/cmd/compile/internal/ssa/gen/genericOps.go
M src/cmd/compile/internal/ssa/gen/rulegen.go
M src/cmd/compile/internal/ssa/opGen.go
M src/cmd/compile/internal/ssa/rewrite.go
M src/cmd/compile/internal/ssa/rewriteAMD64.go
M src/cmd/compile/internal/ssagen/ssa.go
A src/cmd/compile/internal/test/switch_test.go
M src/cmd/compile/internal/walk/stmt.go
M src/cmd/compile/internal/walk/switch.go
M src/cmd/internal/obj/link.go
M src/cmd/internal/obj/x86/asm6.go
M src/cmd/internal/sys/arch.go
23 files changed, 496 insertions(+), 40 deletions(-)
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 98f9074..7049d4e 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -1400,6 +1400,16 @@
}
}
+ case ssa.BlockAMD64JUMPTABLE:
+ // JMP *(TABLE)(INDEX*8)
+ p := s.Prog(obj.AJMP)
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = b.Controls[1].Reg()
+ p.To.Index = b.Controls[0].Reg()
+ p.To.Scale = 8
+ // Save jump tables for later resolution of the target blocks.
+ s.JumpTables = append(s.JumpTables, b)
+
default:
b.Fatalf("branch not implemented: %s", b.LongString())
}
diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go
index 74e4c0a..fe8b6e9 100644
--- a/src/cmd/compile/internal/gc/obj.go
+++ b/src/cmd/compile/internal/gc/obj.go
@@ -271,6 +271,9 @@
objw.Global(x, int32(len(x.P)), obj.RODATA|obj.DUPOK)
x.Set(obj.AttrStatic, true)
}
+ for _, jt := range fn.JumpTables {
+ objw.Global(jt.Sym, int32(len(jt.Targets)*base.Ctxt.Arch.PtrSize), obj.RODATA)
+ }
}
}
diff --git a/src/cmd/compile/internal/ir/node.go b/src/cmd/compile/internal/ir/node.go
index 24908f3..9ccb8e3 100644
--- a/src/cmd/compile/internal/ir/node.go
+++ b/src/cmd/compile/internal/ir/node.go
@@ -310,6 +310,7 @@
ORESULT // result of a function call; Xoffset is stack offset
OINLMARK // start of an inlined body, with file/line of caller. Xoffset is an index into the inline tree.
OLINKSYMOFFSET // offset within a name
+ OJUMPTABLE // A jump table structure for implementing dense expression switches
// opcodes for generics
ODYNAMICDOTTYPE // x = i.(T) where T is a type parameter (or derived from a type parameter)
diff --git a/src/cmd/compile/internal/ir/node_gen.go b/src/cmd/compile/internal/ir/node_gen.go
index 22ff885..8d6fc8c 100644
--- a/src/cmd/compile/internal/ir/node_gen.go
+++ b/src/cmd/compile/internal/ir/node_gen.go
@@ -712,6 +712,28 @@
editNodes(n.Targs, edit)
}
+func (n *JumpTableStmt) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }
+func (n *JumpTableStmt) copy() Node {
+ c := *n
+ c.init = copyNodes(c.init)
+ return &c
+}
+func (n *JumpTableStmt) doChildren(do func(Node) bool) bool {
+ if doNodes(n.init, do) {
+ return true
+ }
+ if n.Idx != nil && do(n.Idx) {
+ return true
+ }
+ return false
+}
+func (n *JumpTableStmt) editChildren(edit func(Node) Node) {
+ editNodes(n.init, edit)
+ if n.Idx != nil {
+ n.Idx = edit(n.Idx).(Node)
+ }
+}
+
func (n *KeyExpr) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }
func (n *KeyExpr) copy() Node {
c := *n
diff --git a/src/cmd/compile/internal/ir/op_string.go b/src/cmd/compile/internal/ir/op_string.go
index 14eb840..8927f18 100644
--- a/src/cmd/compile/internal/ir/op_string.go
+++ b/src/cmd/compile/internal/ir/op_string.go
@@ -154,19 +154,20 @@
_ = x[ORESULT-143]
_ = x[OINLMARK-144]
_ = x[OLINKSYMOFFSET-145]
- _ = x[ODYNAMICDOTTYPE-146]
- _ = x[ODYNAMICDOTTYPE2-147]
- _ = x[ODYNAMICTYPE-148]
- _ = x[OTAILCALL-149]
- _ = x[OGETG-150]
- _ = x[OGETCALLERPC-151]
- _ = x[OGETCALLERSP-152]
- _ = x[OEND-153]
+ _ = x[OJUMPTABLE-146]
+ _ = x[ODYNAMICDOTTYPE-147]
+ _ = x[ODYNAMICDOTTYPE2-148]
+ _ = x[ODYNAMICTYPE-149]
+ _ = x[OTAILCALL-150]
+ _ = x[OGETG-151]
+ _ = x[OGETCALLERPC-152]
+ _ = x[OGETCALLERSP-153]
+ _ = x[OEND-154]
}
-const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVIDATACONVNOPCOPYDCLDCLFUNCDCLCONSTDCLTYPEDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERRECOVERRECOVERFPRECVRUNESTRSELRECV2REALIMAGCOMPLEXALIGNOFOFFSETOFSIZEOFUNSAFEADDUNSAFESLICEMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORFORUNTILGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWFUNCINSTTFUNCINLCALLEFACEITABIDATASPTRCFUNCCHECKNILVARDEFVARKILLVARLIVERESULTINLMARKLINKSYMOFFSETDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERPCGETCALLERSPEND"
+const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVIDATACONVNOPCOPYDCLDCLFUNCDCLCONSTDCLTYPEDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERRECOVERRECOVERFPRECVRUNESTRSELRECV2REALIMAGCOMPLEXALIGNOFOFFSETOFSIZEOFUNSAFEADDUNSAFESLICEMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORFORUNTILGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWFUNCINSTTFUNCINLCALLEFACEITABIDATASPTRCFUNCCHECKNILVARDEFVARKILLVARLIVERESULTINLMARKLINKSYMOFFSETJUMPTABLEDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERPCGETCALLERSPEND"
-var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 132, 134, 137, 147, 154, 161, 168, 172, 176, 184, 192, 201, 204, 209, 216, 223, 229, 238, 246, 254, 260, 264, 273, 282, 289, 293, 296, 303, 311, 318, 324, 327, 333, 340, 348, 352, 359, 367, 369, 371, 373, 375, 377, 379, 384, 389, 397, 400, 409, 412, 416, 424, 431, 440, 453, 456, 459, 462, 465, 468, 471, 477, 480, 483, 489, 493, 496, 500, 505, 510, 516, 521, 525, 530, 538, 546, 552, 561, 572, 579, 588, 592, 599, 607, 611, 615, 622, 629, 637, 643, 652, 663, 671, 680, 685, 690, 694, 702, 707, 711, 714, 722, 726, 728, 733, 735, 740, 746, 752, 758, 764, 772, 777, 784, 789, 793, 798, 802, 807, 815, 821, 828, 835, 841, 848, 861, 875, 890, 901, 909, 913, 924, 935, 938}
+var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 132, 134, 137, 147, 154, 161, 168, 172, 176, 184, 192, 201, 204, 209, 216, 223, 229, 238, 246, 254, 260, 264, 273, 282, 289, 293, 296, 303, 311, 318, 324, 327, 333, 340, 348, 352, 359, 367, 369, 371, 373, 375, 377, 379, 384, 389, 397, 400, 409, 412, 416, 424, 431, 440, 453, 456, 459, 462, 465, 468, 471, 477, 480, 483, 489, 493, 496, 500, 505, 510, 516, 521, 525, 530, 538, 546, 552, 561, 572, 579, 588, 592, 599, 607, 611, 615, 622, 629, 637, 643, 652, 663, 671, 680, 685, 690, 694, 702, 707, 711, 714, 722, 726, 728, 733, 735, 740, 746, 752, 758, 764, 772, 777, 784, 789, 793, 798, 802, 807, 815, 821, 828, 835, 841, 848, 861, 870, 884, 899, 910, 918, 922, 933, 944, 947}
func (i Op) String() string {
if i >= Op(len(_Op_index)-1) {
diff --git a/src/cmd/compile/internal/ir/stmt.go b/src/cmd/compile/internal/ir/stmt.go
index 80bd205..0e76f17 100644
--- a/src/cmd/compile/internal/ir/stmt.go
+++ b/src/cmd/compile/internal/ir/stmt.go
@@ -8,6 +8,7 @@
"cmd/compile/internal/base"
"cmd/compile/internal/types"
"cmd/internal/src"
+ "go/constant"
)
// A Decl is a declaration of a const, type, or var. (A declared func is a Func.)
@@ -262,6 +263,37 @@
return n
}
+// A JumpTableStmt is used to implement switches. Its semantics are:
+// tmp := jt.Idx
+// if tmp == Cases[0] goto Targets[0]
+// if tmp == Cases[1] goto Targets[1]
+// ...
+// if tmp == Cases[n] goto Targets[n]
+// Note that a JumpTableStmt is more like a multiway-goto than
+// a multiway-if. In particular, the case bodies are just
+// labels to jump to, not not full Nodes lists.
+type JumpTableStmt struct {
+ miniStmt
+
+ // Value used to index the jump table.
+ // We support only integer types that
+ // are at most the size of a uintptr.
+ Idx Node
+
+ // If Idx is equal to Cases[i], jump to Targets[i].
+ // Cases entries must be distinct and in increasing order.
+ // The length of Cases and Targets must be equal.
+ Cases []constant.Value
+ Targets []*types.Sym
+}
+
+func NewJumpTableStmt(pos src.XPos, idx Node) *JumpTableStmt {
+ n := &JumpTableStmt{Idx: idx}
+ n.pos = pos
+ n.op = OJUMPTABLE
+ return n
+}
+
// An InlineMarkStmt is a marker placed just before an inlined body.
type InlineMarkStmt struct {
miniStmt
diff --git a/src/cmd/compile/internal/ssa/check.go b/src/cmd/compile/internal/ssa/check.go
index 28edfd2..df677e6 100644
--- a/src/cmd/compile/internal/ssa/check.go
+++ b/src/cmd/compile/internal/ssa/check.go
@@ -100,6 +100,10 @@
if b.NumControls() != 0 {
f.Fatalf("plain/dead block %s has a control value", b)
}
+ case BlockJumpTable:
+ if b.NumControls() != 1 {
+ f.Fatalf("jumpTable block %s has no control value", b)
+ }
}
if len(b.Succs) != 2 && b.Likely != BranchUnknown {
f.Fatalf("likeliness prediction %d for block %s with %d successors", b.Likely, b, len(b.Succs))
diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go
index f112881..ddf2190 100644
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@@ -168,6 +168,9 @@
// MyImportPath provides the import name (roughly, the package) for the function being compiled.
MyImportPath() string
+
+ // LSym returns the linker symbol of the function being compiled.
+ LSym() string
}
// NewConfig returns a new configuration object for the given architecture.
diff --git a/src/cmd/compile/internal/ssa/export_test.go b/src/cmd/compile/internal/ssa/export_test.go
index c4e87ec..87d1b41 100644
--- a/src/cmd/compile/internal/ssa/export_test.go
+++ b/src/cmd/compile/internal/ssa/export_test.go
@@ -102,6 +102,9 @@
func (d TestFrontend) MyImportPath() string {
return "my/import/path"
}
+func (d TestFrontend) LSym() string {
+ return "my/import/path.function"
+}
var testTypes Types
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 1fd36bf..81fdeba 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -517,6 +517,8 @@
(If cond yes no) => (NE (TESTB cond cond) yes no)
+(JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} idx (LEAQ <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
+
// Atomic loads. Other than preserving their ordering with respect to other loads, nothing special here.
(AtomicLoad8 ptr mem) => (MOVBatomicload ptr mem)
(AtomicLoad32 ptr mem) => (MOVLatomicload ptr mem)
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index becee87..fc42fa5 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -1001,6 +1001,12 @@
{name: "NEF", controls: 1},
{name: "ORD", controls: 1}, // FP, ordered comparison (parity zero)
{name: "NAN", controls: 1}, // FP, unordered comparison (parity one)
+
+ // JUMPTABLE implements jump tables.
+ // Aux is the symbol (an *obj.LSym) for the jump table.
+ // control[0] is the index into the jump table.
+ // control[1] is the address of the jump table (the address of the symbol stored in Aux).
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index db52b53..0357fdb 100644
index 248060d..4d615a0 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -5,6 +5,7 @@
package ssa
import (
+ "cmd/compile/internal/base"
"cmd/compile/internal/logopt"
"cmd/compile/internal/types"
"cmd/internal/obj"
@@ -1954,3 +1955,9 @@
fcb.N = x < 0
return fcb.encode()
}
+
+func makeJumpTableSym(b *Block) *obj.LSym {
+ s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.LSym(), b.ID))
+ s.Set(obj.AttrDuplicateOK, true)
+ return s
+}
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 67ccc99..36e6978 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -36873,6 +36873,7 @@
return false
}
func rewriteBlockAMD64(b *Block) bool {
+ typ := &b.Func.Config.Types
switch b.Kind {
case BlockAMD64EQ:
// match: (EQ (TESTL (SHLL (MOVLconst [1]) x) y))
@@ -37455,6 +37456,19 @@
b.resetWithControl(BlockAMD64NE, v0)
return true
}
+ case BlockJumpTable:
+ // match: (JumpTable idx)
+ // result: (JUMPTABLE {makeJumpTableSym(b)} idx (LEAQ <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
+ for {
+ idx := b.Controls[0]
+ v0 := b.NewValue0(b.Pos, OpAMD64LEAQ, typ.Uintptr)
+ v0.Aux = symToAux(makeJumpTableSym(b))
+ v1 := b.NewValue0(b.Pos, OpSB, typ.Uintptr)
+ v0.AddArg(v1)
+ b.resetWithControl2(BlockAMD64JUMPTABLE, idx, v0)
+ b.Aux = symToAux(makeJumpTableSym(b))
+ return true
+ }
case BlockAMD64LE:
// match: (LE (InvertFlags cmp) yes no)
// result: (GE cmp yes no)
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go
index 7da145e..7b6b69f 100644
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@@ -1861,6 +1861,84 @@
}
s.startBlock(bEnd)
+ case ir.OJUMPTABLE:
+ n := n.(*ir.JumpTableStmt)
+
+ // Make blocks we'll need.
+ jt := s.f.NewBlock(ssa.BlockJumpTable)
+ bEnd := s.f.NewBlock(ssa.BlockPlain)
+
+ // The only thing that needs evaluating is the index we're looking up.
+ idx := s.expr(n.Idx)
+ unsigned := idx.Type.IsUnsigned()
+
+ // Extend so we can do everything in uintptr arithmetic.
+ t := types.Types[types.TUINTPTR]
+ idx = s.conv(nil, idx, idx.Type, t)
+
+ // The ending condition for the current block decides whether we'll use
+ // the jump table at all.
+ // We check that min <= idx <= max and jump around the jump table
+ // if that test fails.
+ // We implement min <= idx <= max with 0 <= idx-min <= max-min, because
+ // we'll need idx-min anyway as the control value for the jump table.
+ var min, max uint64
+ if unsigned {
+ min, _ = constant.Uint64Val(n.Cases[0])
+ max, _ = constant.Uint64Val(n.Cases[len(n.Cases)-1])
+ } else {
+ mn, _ := constant.Int64Val(n.Cases[0])
+ mx, _ := constant.Int64Val(n.Cases[len(n.Cases)-1])
+ min = uint64(mn)
+ max = uint64(mx)
+ }
+ // Compare idx-min with max-min, to see if we can use the jump table.
+ idx = s.newValue2(s.ssaOp(ir.OSUB, t), t, idx, s.uintptrConstant(min))
+ width := s.uintptrConstant(max - min)
+ cmp := s.newValue2(s.ssaOp(ir.OLE, t), types.Types[types.TBOOL], idx, width)
+ b := s.endBlock()
+ b.Kind = ssa.BlockIf
+ b.SetControl(cmp)
+ b.AddEdgeTo(jt) // in range - use jump table
+ b.AddEdgeTo(bEnd) // out of range - no case in the jump table will trigger
+ b.Likely = ssa.BranchLikely // TODO: assumes missing the table entirely is unlikely. True?
+
+ // Build jump table block.
+ s.startBlock(jt)
+ jt.Pos = n.Pos()
+ if base.Flag.Cfg.SpectreIndex {
+ idx = s.newValue2(ssa.OpSpectreSliceIndex, t, idx, width)
+ }
+ jt.SetControl(idx)
+
+ // Figure out where we should go for each index in the table.
+ table := make([]*ssa.Block, max-min+1)
+ for i := range table {
+ table[i] = bEnd // default target
+ }
+ for i := range n.Targets {
+ c := n.Cases[i]
+ lab := s.label(n.Targets[i])
+ if lab.target == nil {
+ lab.target = s.f.NewBlock(ssa.BlockPlain)
+ }
+ var val uint64
+ if unsigned {
+ val, _ = constant.Uint64Val(c)
+ } else {
+ vl, _ := constant.Int64Val(c)
+ val = uint64(vl)
+ }
+ // Overwrite the default target.
+ table[val-min] = lab.target
+ }
+ for _, t := range table {
+ jt.AddEdgeTo(t)
+ }
+ s.endBlock()
+
+ s.startBlock(bEnd)
+
case ir.OVARDEF:
n := n.(*ir.UnaryExpr)
if !s.canSSA(n.X) {
@@ -2351,6 +2429,13 @@
return x
}
+func (s *state) uintptrConstant(v uint64) *ssa.Value {
+ if s.config.PtrSize == 4 {
+ return s.newValue0I(ssa.OpConst32, types.Types[types.TUINTPTR], int64(v))
+ }
+ return s.newValue0I(ssa.OpConst64, types.Types[types.TUINTPTR], int64(v))
+}
+
func (s *state) conv(n ir.Node, v *ssa.Value, ft, tt *types.Type) *ssa.Value {
if ft.IsBoolean() && tt.IsKind(types.TUINT8) {
// Bool -> uint8 is generated internally when indexing into runtime.staticbyte.
@@ -6440,6 +6525,9 @@
// and where they would like to go.
Branches []Branch
+ // JumpTables remembers all the jump tables we've seen.
+ JumpTables []*ssa.Block
+
// bstart remembers where each block starts (indexed by block ID)
bstart []*obj.Prog
@@ -7052,6 +7140,20 @@
}
+ // Resolve jump table destinations.
+ for _, jt := range s.JumpTables {
+ // Convert from *Block targets to *Prog targets.
+ targets := make([]*obj.Prog, len(jt.Succs))
+ for i, e := range jt.Succs {
+ targets[i] = s.bstart[e.Block().ID]
+ }
+ // Add to list of jump tables to be resolved at assembly time.
+ // The assembler converts from *Prog entries to absolute addresses
+ // once it knows instruction byte offsets.
+ fi := pp.CurFunc.LSym.Func()
+ fi.JumpTables = append(fi.JumpTables, obj.JumpTable{Sym: jt.Aux.(*obj.LSym), Targets: targets})
+ }
+
if e.log { // spew to stdout
filename := ""
for p := pp.Text; p != nil; p = p.Link {
@@ -7705,6 +7807,10 @@
return base.Ctxt.Pkgpath
}
+func (e *ssafn) LSym() string {
+ return e.curfn.LSym.Name
+}
+
func clobberBase(n ir.Node) ir.Node {
if n.Op() == ir.ODOT {
n := n.(*ir.SelectorExpr)
diff --git a/src/cmd/compile/internal/test/switch_test.go b/src/cmd/compile/internal/test/switch_test.go
new file mode 100644
index 0000000..6f7bfcf
--- /dev/null
+++ b/src/cmd/compile/internal/test/switch_test.go
@@ -0,0 +1,94 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+ "math/bits"
+ "testing"
+)
+
+func BenchmarkSwitch8Predictable(b *testing.B) {
+ benchmarkSwitch8(b, true)
+}
+func BenchmarkSwitch8Unpredictable(b *testing.B) {
+ benchmarkSwitch8(b, false)
+}
+func benchmarkSwitch8(b *testing.B, predictable bool) {
+ n := 0
+ rng := newRNG()
+ for i := 0; i < b.N; i++ {
+ rng = rng.next(predictable)
+ switch rng.value() & 7 {
+ case 0:
+ n += 1
+ case 1:
+ n += 2
+ case 2:
+ n += 3
+ case 3:
+ n += 4
+ case 4:
+ n += 5
+ case 5:
+ n += 6
+ case 6:
+ n += 7
+ case 7:
+ n += 8
+ }
+ }
+ sink = n
+}
+
+func BenchmarkSwitch32Predictable(b *testing.B) {
+ benchmarkSwitch32(b, true)
+}
+func BenchmarkSwitch32Unpredictable(b *testing.B) {
+ benchmarkSwitch32(b, false)
+}
+func benchmarkSwitch32(b *testing.B, predictable bool) {
+ n := 0
+ rng := newRNG()
+ for i := 0; i < b.N; i++ {
+ rng = rng.next(predictable)
+ switch rng.value() & 31 {
+ case 0, 1, 2:
+ n += 1
+ case 4, 5, 6:
+ n += 2
+ case 8, 9, 10:
+ n += 3
+ case 12, 13, 14:
+ n += 4
+ case 16, 17, 18:
+ n += 5
+ case 20, 21, 22:
+ n += 6
+ case 24, 25, 26:
+ n += 7
+ case 28, 29, 30:
+ n += 8
+ default:
+ n += 9
+ }
+ }
+ sink = n
+}
+
+// A simple random number generator used to make switches conditionally predictable.
+type rng uint64
+
+func newRNG() rng {
+ return 1
+}
+func (r rng) next(predictable bool) rng {
+ if predictable {
+ return r + 1
+ }
+ return rng(bits.RotateLeft64(uint64(r), 13) * 0x3c374d)
+}
+func (r rng) value() uint64 {
+ return uint64(r)
+}
diff --git a/src/cmd/compile/internal/walk/stmt.go b/src/cmd/compile/internal/walk/stmt.go
index 4f38cb2..8a42dbf 100644
--- a/src/cmd/compile/internal/walk/stmt.go
+++ b/src/cmd/compile/internal/walk/stmt.go
@@ -85,6 +85,7 @@
ir.OFALL,
ir.OGOTO,
ir.OLABEL,
+ ir.OJUMPTABLE,
ir.ODCL,
ir.ODCLCONST,
ir.ODCLTYPE,
diff --git a/src/cmd/compile/internal/walk/switch.go b/src/cmd/compile/internal/walk/switch.go
index 3705c5b..a4003ec 100644
--- a/src/cmd/compile/internal/walk/switch.go
+++ b/src/cmd/compile/internal/walk/switch.go
@@ -11,6 +11,7 @@
"cmd/compile/internal/base"
"cmd/compile/internal/ir"
+ "cmd/compile/internal/ssagen"
"cmd/compile/internal/typecheck"
"cmd/compile/internal/types"
"cmd/internal/src"
@@ -223,6 +224,9 @@
}
func (s *exprSwitch) search(cc []exprClause, out *ir.Nodes) {
+ if s.tryJumpTable(cc, out) {
+ return
+ }
binarySearch(len(cc), out,
func(i int) ir.Node {
return ir.NewBinaryExpr(base.Pos, ir.OLE, s.exprname, cc[i-1].hi)
@@ -235,6 +239,49 @@
)
}
+// Try to implement the clauses with a jump table. Returns true if successful.
+func (s *exprSwitch) tryJumpTable(cc []exprClause, out *ir.Nodes) bool {
+ const go119UseJumpTables = true
+ const minCases = 8 // have at least minCases cases in the switch
+ const minDensity = 4 // use at least 1 out of every minDensity entries
+
+ if !go119UseJumpTables || !ssagen.Arch.LinkArch.CanJumpTable {
+ return false
+ }
+ if len(cc) < minCases {
+ return false // not enough cases for it to be worth it
+ }
+ if cc[0].lo.Val().Kind() != constant.Int {
+ return false // e.g. float
+ }
+ if s.exprname.Type().Size() > int64(types.PtrSize) {
+ return false // 64-bit switches on 32-bit archs
+ }
+ min := cc[0].lo.Val()
+ max := cc[len(cc)-1].hi.Val()
+ width := constant.BinaryOp(constant.BinaryOp(max, token.SUB, min), token.ADD, constant.MakeInt64(1))
+ limit := constant.MakeInt64(int64(len(cc)) * minDensity)
+ if constant.Compare(width, token.GTR, limit) {
+ // We disable jump tables if we use less than a minimum fraction of the entries.
+ // i.e. for switch x {case 0: case 1000: case 2000:} we don't want to use a jump table.
+ return false
+ }
+ jt := ir.NewJumpTableStmt(base.Pos, s.exprname)
+ for _, c := range cc {
+ jmp := c.jmp.(*ir.BranchStmt)
+ if jmp.Op() != ir.OGOTO || jmp.Label == nil {
+ panic("bad switch case body")
+ }
+ for i := c.lo.Val(); constant.Compare(i, token.LEQ, c.hi.Val()); i = constant.BinaryOp(i, token.ADD, constant.MakeInt64(1)) {
+ jt.Cases = append(jt.Cases, i)
+ jt.Targets = append(jt.Targets, jmp.Label)
+ }
+ }
+ out.Append(jt)
+ // TODO: handle the size portion of string switches using a jump table.
+ return true
+}
+
func (c *exprClause) test(exprname ir.Node) ir.Node {
// Integer range.
if c.hi != c.lo {
@@ -562,7 +609,7 @@
// then cases before i will be tested; otherwise, cases i and later.
//
// leaf(i, nif) should setup nif (an OIF node) to test case i. In
-// particular, it should set nif.Left and nif.Nbody.
+// particular, it should set nif.Cond and nif.Body.
func binarySearch(n int, out *ir.Nodes, less func(i int) ir.Node, leaf func(i int, nif *ir.IfStmt)) {
const binarySearchMin = 4 // minimum number of cases for binary search
diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go
index a3eba73..dc06a3a 100644
--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@@ -495,10 +495,20 @@
ArgInfo *LSym // argument info for traceback
ArgLiveInfo *LSym // argument liveness info for traceback
WrapInfo *LSym // for wrapper, info of wrapped function
+ JumpTables []JumpTable
FuncInfoSym *LSym
}
+// JumpTable represents a table used for implementing multi-way
+// computed branching, used typically for implementing switches.
+// Sym is the table itself, and Targets is a list of target
+// instructions to go to for the computed branch index.
+type JumpTable struct {
+ Sym *LSym
+ Targets []*Prog
+}
+
// NewFuncInfo allocates and returns a FuncInfo for LSym.
func (s *LSym) NewFuncInfo() *FuncInfo {
if s.Extra != nil {
diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go
index 0e4c87dd..b625845 100644
--- a/src/cmd/internal/obj/x86/asm6.go
+++ b/src/cmd/internal/obj/x86/asm6.go
@@ -2230,6 +2230,16 @@
}
obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
}
+
+ // Now that we know byte offsets, we can generate jump table entries.
+ // TODO: could this live in obj instead of obj/$ARCH?
+ for _, jt := range s.Func().JumpTables {
+ for i, p := range jt.Targets {
+ // The ith jumptable entry points to the p.Pc'th
+ // byte in the function symbol s.
+ jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc)
+ }
+ }
}
func instinit(ctxt *obj.Link) {
diff --git a/src/cmd/internal/sys/arch.go b/src/cmd/internal/sys/arch.go
index ea76b59..84ed35b 100644
--- a/src/cmd/internal/sys/arch.go
+++ b/src/cmd/internal/sys/arch.go
@@ -52,6 +52,10 @@
// can combine adjacent loads into a single larger, possibly unaligned, load.
// Note that currently the optimizations must be able to handle little endian byte order.
CanMergeLoads bool
+
+ // CanJumpTable reports whether the backend can handle
+ // compiling a jump table.
+ CanJumpTable bool
}
// InFamily reports whether a is a member of any of the specified
@@ -85,6 +89,7 @@
MinLC: 1,
Alignment: 1,
CanMergeLoads: true,
+ CanJumpTable: true,
}
var ArchARM = &Arch{
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
1 comment:
Patchset:
This CL seems to result in an ICE during ./make.bash when GO_GCFLAGS='-N -l' is used to disable optimizations and inlining. This problem is detected by the linux-amd64-noopt builder (https://build.golang.org/log/f48227f2d7e082bb160424c9daeaf7e27dadd340).
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
1 comment:
Patchset:
This CL seems to result in an ICE during ./make. […]
I believe the fix is CL 400455.
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.
1 comment:
Patchset:
RELNOTE=yes
To view, visit change 357330. To unsubscribe, or for help writing mail filters, visit settings.