Congratulations on opening your first change. Thank you for your contribution!
Next steps:
A maintainer will review your change and provide feedback. See
https://go.dev/doc/contribute#review for more info and tips to get your
patch through code review.
Most changes in the Go project go through a few rounds of revision. This can be
surprising to people new to the project. The careful, iterative review process
is our way of helping mentor contributors and ensuring that their contributions
have a lasting impact.
During May-July and Nov-Jan the Go project is in a code freeze, during which
little code gets reviewed or merged. If a reviewer responds with a comment like
R=go1.11 or adds a tag like "wait-release", it means that this CL will be
reviewed as part of the next development cycle. See https://go.dev/s/release
for more details.
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Raj Barik has uploaded this change for review.
cmd/compile: Enables PGO in Go and performs profile-guided inlining
Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M api/go1.19.txt
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
M src/cmd/dist/buildtool.go
A src/internal/profile/graph.go
A src/internal/profile/legacy_java_profile.go
A src/pgo/inline/inline_hot.go
A src/pgo/inline/inline_hot_test.go
11 files changed, 2,443 insertions(+), 7 deletions(-)
diff --git a/api/go1.19.txt b/api/go1.19.txt
index 523f752..100c2af 100644
--- a/api/go1.19.txt
+++ b/api/go1.19.txt
@@ -244,6 +244,13 @@
pkg os/exec, method (*Cmd) Environ() []string #50599
pkg os/exec, type Cmd struct, Err error #43724
pkg os/exec, var ErrDot error #43724
+pkg pgo/inline, func A() #43724
+pkg pgo/inline, func D(uint) int #43724
+pkg pgo/inline, func N(uint) *BS #43724
+pkg pgo/inline, func T(uint64) uint #43724
+pkg pgo/inline, type BS struct #43724
+pkg pgo/inline, method (*BS) NS(uint) (uint, bool) #43724
+pkg pgo/inline, method (*BS) S(uint) *BS #43724
pkg regexp/syntax, const ErrNestingDepth = "expression nests too deeply" #51684
pkg regexp/syntax, const ErrNestingDepth ErrorCode #51684
pkg runtime/debug, func SetMemoryLimit(int64) int64 #48409
diff --git a/src/cmd/compile/internal/base/flag.go b/src/cmd/compile/internal/base/flag.go
index f2728d9..7420d6d 100644
--- a/src/cmd/compile/internal/base/flag.go
+++ b/src/cmd/compile/internal/base/flag.go
@@ -121,6 +121,9 @@
TraceProfile string "help:\"write an execution trace to `file`\""
TrimPath string "help:\"remove `prefix` from recorded source file paths\""
WB bool "help:\"enable write barrier\"" // TODO: remove
+ ProfileUse string "help:\"read profile from `file`\""
+ InlineHotThreshold string "help:\"threshold percentage for determining hot methods and callsites for inlining\""
+ InlineHotBudget int "help:\"inline budget for hot methods\""
// Configuration derived from flags; not a flag itself.
Cfg struct {
diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go
index c9493bf..1929b2f 100644
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -16,6 +16,7 @@
"cmd/compile/internal/ir"
"cmd/compile/internal/logopt"
"cmd/compile/internal/noder"
+ "cmd/compile/internal/pgo"
"cmd/compile/internal/pkginit"
"cmd/compile/internal/reflectdata"
"cmd/compile/internal/ssa"
@@ -29,6 +30,7 @@
"flag"
"fmt"
"internal/buildcfg"
+ "internal/profile"
"log"
"os"
"runtime"
@@ -232,10 +234,28 @@
typecheck.AllImportedBodies()
}
+ // Read cpu profile file and build cross-package pprof-graph and per-package weighted-call-graph.
+ base.Timer.Start("fe", "profileuse")
+ if base.Flag.ProfileUse != "" {
+ if pgo.PProfGraph == nil {
+ pgo.PProfGraph = pgo.BuildPProfGraph(base.Flag.ProfileUse, &profile.Options{
+ CallTree: false,
+ SampleValue: func(v []int64) int64 { return v[1] },
+ })
+ }
+ pgo.WeightedCG = pgo.BuildWeightedCallGraph()
+ }
+
// Inlining
base.Timer.Start("fe", "inlining")
if base.Flag.LowerL != 0 {
+ if pgo.WeightedCG != nil {
+ inline.InlinePrologue()
+ }
inline.InlinePackage()
+ if pgo.WeightedCG != nil {
+ inline.InlineEpilogue()
+ }
}
noder.MakeWrappers(typecheck.Target) // must happen after inlining
diff --git a/src/cmd/compile/internal/inline/inl.go b/src/cmd/compile/internal/inline/inl.go
index 817f2fd..078eeb7 100644
--- a/src/cmd/compile/internal/inline/inl.go
+++ b/src/cmd/compile/internal/inline/inl.go
@@ -29,11 +29,13 @@
import (
"fmt"
"go/constant"
+ "strconv"
"strings"
"cmd/compile/internal/base"
"cmd/compile/internal/ir"
"cmd/compile/internal/logopt"
+ "cmd/compile/internal/pgo"
"cmd/compile/internal/typecheck"
"cmd/compile/internal/types"
"cmd/internal/obj"
@@ -42,7 +44,8 @@
// Inlining budget parameters, gathered in one place
const (
- inlineMaxBudget = 80
+ inlineMaxBudget = 80
+ // Budget increased due to hotness.
inlineExtraAppendCost = 0
// default is to inline if there's at most one call. -l=4 overrides this by using 1 instead.
inlineExtraCallCost = 57 // 57 was benchmarked to provided most benefit with no bad surprises; see https://github.com/golang/go/issues/19348#issuecomment-439370742
@@ -51,9 +54,87 @@
inlineBigFunctionNodes = 5000 // Functions with this many nodes are considered "big".
inlineBigFunctionMaxCost = 20 // Max cost of inlinee when inlining into a "big" function.
+
)
-// InlinePackage finds functions that can be inlined and clones them before walk expands them.
+var (
+ // Per-caller data structure to track the list of hot call sites. This gets rewritten every caller leaving it to GC for cleanup.
+ listOfHotCallSites = make(map[pgo.CallSiteInfo]struct{})
+
+ // List of all hot call sites.
+ candHotEdgeMap = make(map[string]struct{})
+
+ // List of inlined call sites.
+ inlinedCallSites = make(map[pgo.CallSiteInfo]struct{})
+
+ // Threshold for Hot callsite inlining.
+ inlineHotThresholdPercent = float64(2)
+
+ // Budget increased due to hotness.
+ inlineHotMaxBudget int32 = 160
+)
+
+// InlinePrologue records the hot callsites from ir-graph.
+func InlinePrologue() {
+ if s, err := strconv.ParseFloat(base.Flag.InlineHotThreshold, 64); err == nil {
+ inlineHotThresholdPercent = s
+ if base.Flag.LowerM != 0 {
+ fmt.Printf("hot-thres=%v\n", inlineHotThresholdPercent)
+ }
+ }
+
+ if base.Flag.InlineHotBudget != 0 {
+ inlineHotMaxBudget = int32(base.Flag.InlineHotBudget)
+ }
+
+ ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+ for _, f := range list {
+ name := ir.PkgFuncName(f)
+ if n, ok := pgo.WeightedCG.IRNodes[name]; ok {
+ nodeweight := pgo.WeightInPercentage(n.Flat, pgo.GlobalTotalNodeWeight)
+ if nodeweight > inlineHotThresholdPercent {
+ n.HotNode = true
+ }
+ for _, e := range pgo.WeightedCG.OutEdges[n] {
+ if e.Weight != 0 {
+ weightpercent := pgo.WeightInPercentage(e.Weight, pgo.GlobalTotalEdgeWeight)
+ if weightpercent > inlineHotThresholdPercent {
+ splits := strings.Split(e.CallSite, ":")
+ line2, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
+ lineno := fmt.Sprintf("%v", line2)
+ canonicalName := ir.PkgFuncName(n.AST) + "-" + lineno + "-" + ir.PkgFuncName(e.Dst.AST)
+ if _, ok := candHotEdgeMap[canonicalName]; !ok {
+ candHotEdgeMap[canonicalName] = struct{}{}
+ }
+ }
+ }
+ }
+ }
+ }
+ })
+ if base.Flag.LowerM > 4 {
+ fmt.Printf("hot-cg before inline in dot format:")
+ pgo.PrintWeightedCallGraphDOT(inlineHotThresholdPercent)
+ }
+}
+
+// InlineEpilogue updates IRGraph after inlining.
+func InlineEpilogue() {
+ ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+ for _, f := range list {
+ name := ir.PkgFuncName(f)
+ if n, ok := pgo.WeightedCG.IRNodes[name]; ok {
+ pgo.RedirectEdges(n, inlinedCallSites)
+ }
+ }
+ })
+ if base.Flag.LowerM > 4 {
+ fmt.Printf("hot-cg after inline in dot:")
+ pgo.PrintWeightedCallGraphDOT(inlineHotThresholdPercent)
+ }
+}
+
+// InlinePackage finds functions that can be inlined and clones them.
func InlinePackage() {
ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
numfns := numNonClosures(list)
@@ -81,6 +162,9 @@
base.Fatalf("CanInline no nname %+v", fn)
}
+ // Initialize an empty list of hot callsites for this caller.
+ listOfHotCallSites = make(map[pgo.CallSiteInfo]struct{})
+
var reason string // reason, if any, that the function was not inlined
if base.Flag.LowerM > 1 || logopt.Enabled() {
defer func() {
@@ -181,10 +265,14 @@
budget: inlineMaxBudget,
extraCallCost: cc,
}
+ savefn := ir.CurFunc
+ ir.CurFunc = fn
if visitor.tooHairy(fn) {
reason = visitor.reason
+ ir.CurFunc = savefn
return
}
+ ir.CurFunc = savefn
n.Func.Inl = &ir.Inline{
Cost: inlineMaxBudget - visitor.budget,
@@ -252,6 +340,19 @@
return true
}
if v.budget < 0 {
+ if pgo.WeightedCG != nil {
+ // Find the existing node in WeightedCallGraph.
+ if n, ok := pgo.WeightedCG.IRNodes[ir.PkgFuncName(fn)]; ok {
+ // If the cost of hot function is greater than inlineHotMaxBudget,
+ // the inliner won't inline this function.
+ if inlineMaxBudget-v.budget < inlineHotMaxBudget && n.HotNode == true {
+ if base.Flag.LowerM > 1 {
+ fmt.Printf("hot-node enabled increased budget for func=%v\n", ir.PkgFuncName(fn))
+ }
+ return false
+ }
+ }
+ }
v.reason = fmt.Sprintf("function too complex: cost %d exceeds budget %d", inlineMaxBudget-v.budget, inlineMaxBudget)
return true
}
@@ -315,6 +416,23 @@
}
}
+ // Determine if callee edge is a hot callee or not.
+ if pgo.WeightedCG != nil && ir.CurFunc != nil {
+ if fn := inlCallee(n.X); fn != nil && typecheck.HaveInlineBody(fn) {
+ lineno := fmt.Sprintf("%v", ir.Line(n))
+ splits := strings.Split(lineno, ":")
+ l, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
+ linenum := fmt.Sprintf("%v", l)
+ canonicalName := ir.PkgFuncName(ir.CurFunc) + "-" + linenum + "-" + ir.PkgFuncName(fn)
+ if _, o := candHotEdgeMap[canonicalName]; o {
+ if base.Flag.LowerM > 1 {
+ fmt.Printf("hot-callsite identified at line=%v for func=%v\n", ir.Line(n), ir.PkgFuncName(ir.CurFunc))
+ }
+ listOfHotCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}] = struct{}{}
+ }
+ }
+ }
+
if ir.IsIntrinsicCall(n) {
// Treat like any other node.
break
@@ -464,10 +582,12 @@
v.budget--
- // When debugging, don't stop early, to get full cost of inlining this function
- if v.budget < 0 && base.Flag.LowerM < 2 && !logopt.Enabled() {
- v.reason = "too expensive"
- return true
+ if pgo.WeightedCG == nil {
+ // When debugging, don't stop early, to get full cost of inlining this function
+ if v.budget < 0 && base.Flag.LowerM < 2 && !logopt.Enabled() {
+ v.reason = "too expensive"
+ return true
+ }
}
return ir.DoChildren(n, v.do)
@@ -716,7 +836,18 @@
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(ir.CurFunc),
fmt.Sprintf("cost %d of %s exceeds max large caller cost %d", fn.Inl.Cost, ir.PkgFuncName(fn), maxCost))
}
- return n
+
+ // If the callsite is hot and it is under the inlineHotMaxBudget budget, then inline it, or else bail.
+ if _, ok := listOfHotCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; ok {
+ if fn.Inl.Cost > inlineHotMaxBudget {
+ return n
+ }
+ if base.Flag.LowerM > 1 {
+ fmt.Printf("hot-budget check allows inlining at %v\n", ir.Line(n))
+ }
+ } else {
+ return n
+ }
}
if fn == ir.CurFunc {
@@ -817,7 +948,12 @@
fmt.Printf("%v: Before inlining: %+v\n", ir.Line(n), n)
}
+ if _, ok := inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; !ok {
+ inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}] = struct{}{}
+ }
+
res := InlineCall(n, fn, inlIndex)
+
if res == nil {
base.FatalfAt(n.Pos(), "inlining call to %v failed", fn)
}
diff --git a/src/cmd/compile/internal/pgo/irgraph.go b/src/cmd/compile/internal/pgo/irgraph.go
new file mode 100644
index 0000000..d27a5e1
--- /dev/null
+++ b/src/cmd/compile/internal/pgo/irgraph.go
@@ -0,0 +1,493 @@
+package pgo
+
+import (
+ "cmd/compile/internal/base"
+ "cmd/compile/internal/ir"
+ "cmd/compile/internal/typecheck"
+ "cmd/compile/internal/types"
+ "fmt"
+ "internal/profile"
+ "log"
+ "os"
+ "strconv"
+ "strings"
+)
+
+// IRGraph is the key datastrcture that is built from pprof profile. It is essentially a call graph with nodes pointing to IRs of functions and edges carrying weights and callsite information. The graph is bidirectional that helps in removing nodes efficiently. The IrGraph is updated as we pass through optimization phases, for example, after a function is inlined, we update the graph with new nodes and edges.
+type IRGraph struct {
+ // Nodes of the graph
+ IRNodes map[string]*IRNode
+ OutEdges IREdgeMap
+ InEdges IREdgeMap
+}
+
+// IRNode represents a node in the IRGraph.
+type IRNode struct {
+ // Pointer to the IR of the Function represented by this node.
+ AST *ir.Func
+ // Flat weight of the IRNode, obtained from pprof.
+ Flat int64
+ // Cumulative weight of the IRNode.
+ Cum int64
+ // Is this function a recursive function.
+ Recursive bool
+ // Is this function a hot node?
+ HotNode bool
+}
+
+// IREdgeMap maps an IRNode to its successors.
+type IREdgeMap map[*IRNode][]*IREdge
+
+// IREdge represents a call edge in the IRGraph with source, destination, weight, callsite, and line number information.
+type IREdge struct {
+ Src, Dst *IRNode
+ DstNode *ir.Node
+ Weight int64
+ CallSite string
+}
+
+// NodeMapInfo represents a hash key to identify unique call-edges in pprof and in IR. Used for deduplication of call edges found in pprof.
+type NodeMapInfo struct {
+ FuncName string
+ DstName string
+ CallSite int
+}
+
+// Weights capture both node weight and edge weight.
+type Weights struct {
+ NWeight int64
+ NTotalWeight int64
+ EWeight int64
+}
+
+// CallSiteInfo captures call site information and its static callee.
+type CallSiteInfo struct {
+ Line string
+ Caller *ir.Func
+}
+
+var (
+ // Aggregated NodeWeights and EdgeWeights across profiles. This helps us determine the percentage threshold for hot/cold partitioning.
+ GlobalTotalNodeWeight = int64(0)
+ GlobalTotalEdgeWeight = int64(0)
+
+ // Global node and their aggregated weight information.
+ GlobalNodeMap = make(map[NodeMapInfo]*Weights)
+
+ // WeightedCG represents the IRGraph built from pprof profile, which we will update as part of inlining.
+ WeightedCG *IRGraph = nil
+
+ // Original cross-package PProf Graph.
+ PProfGraph *profile.Graph = nil
+)
+
+// BuildPProfGraph generates a pprof-graph from cpu-profile.
+func BuildPProfGraph(profileFile string, opt *profile.Options) *profile.Graph {
+
+ if PProfGraph != nil {
+ return PProfGraph
+ }
+ // open the pprof profile file.
+ f, err := os.Open(profileFile)
+ if err != nil {
+ log.Fatal("failed to open file " + profileFile)
+ return nil
+ }
+ defer f.Close()
+ p, err := profile.Parse(f)
+ if err != nil {
+ log.Fatal("failed to Parse profile file.")
+ return nil
+ }
+ // Build the graph using google's pprof package.
+ pProfGraph := profile.New(p, opt)
+
+ // Build various global maps from pprof profile.
+ preprocessPProfGraph(pProfGraph)
+
+ return pProfGraph
+}
+
+// BuildWeightedCallGraph generates a weighted callgraph from the pprof profile for the current package.
+func BuildWeightedCallGraph() *IRGraph {
+
+ // Bail if there is no pprof-graph available.
+ if PProfGraph == nil {
+ return nil
+ }
+
+ // Create package-level call graph with weights from pprof profile and IR.
+ weightedCG := createIRGraph()
+
+ if weightedCG != nil && base.Flag.LowerM > 1 {
+ log.Println("weighted call graph created successfully!")
+ }
+
+ return weightedCG
+}
+
+// preprocessPProfGraph builds various maps from profiles. It builds GlobalNodeMap and other information based on the name and callsite to compute node and edge weights which will be used later on to create edges of WeightedCG.
+func preprocessPProfGraph(pProfGraph *profile.Graph) {
+ nweight := make(map[string]int64)
+ nTotalWeight := make(map[string]int64)
+
+ // Accummulate weights for the same nodes.
+ for _, n := range pProfGraph.Nodes {
+ canonicalName := n.Info.Name
+ if _, ok := nweight[canonicalName]; ok {
+ nweight[canonicalName] += n.FlatValue()
+ nTotalWeight[canonicalName] += n.CumValue()
+ } else {
+ nweight[canonicalName] = n.FlatValue()
+ nTotalWeight[canonicalName] = n.CumValue()
+ }
+ }
+
+ // Process PProfGraph and build various node and edge maps which will be consumed by AST walk.
+ for _, n := range pProfGraph.Nodes {
+ GlobalTotalNodeWeight += n.FlatValue()
+ canonicalName := n.Info.Name
+ // Create the key to the NodeMap.
+ nodeinfo := NodeMapInfo{
+ FuncName: canonicalName,
+ CallSite: n.Info.Lineno,
+ }
+ // If there are no outgoing edges, we still need to create the node [for cold sites] with no callee information.
+ if len(n.Out) == 0 {
+ nodeinfo.DstName = ""
+ nodeinfo.CallSite = -1
+
+ weights := new(Weights)
+ weights.NWeight = nweight[canonicalName]
+ weights.NTotalWeight = nTotalWeight[canonicalName]
+ weights.EWeight = 0
+
+ GlobalNodeMap[nodeinfo] = weights
+ }
+
+ for _, e := range n.Out {
+ GlobalTotalEdgeWeight += e.WeightValue()
+ nodeinfo.DstName = e.Dest.Info.Name
+ if w, ok := GlobalNodeMap[nodeinfo]; ok {
+ w.EWeight += e.WeightValue()
+ } else {
+ weights := new(Weights)
+ weights.NWeight = nweight[canonicalName]
+ weights.NTotalWeight = nTotalWeight[canonicalName]
+ weights.EWeight = e.WeightValue()
+ GlobalNodeMap[nodeinfo] = weights
+ }
+ }
+ }
+}
+
+// createIRGraph builds the IRGraph by visting all the ir.Func in decl list of a package.
+func createIRGraph() *IRGraph {
+ var g IRGraph
+ // Bottomup walk over the function to create IRGraph.
+ ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+ for _, n := range list {
+ g.Visit(n, recursive)
+ }
+ })
+ return &g
+}
+
+// Visit traverses the body of each ir.Func and use GlobalNodeMap to determine if we need to add an edge from ir.Func and any node in the ir.Func body.
+func (g *IRGraph) Visit(fn *ir.Func, recursive bool) {
+ if g.IRNodes == nil {
+ g.IRNodes = make(map[string]*IRNode)
+ }
+ if g.OutEdges == nil {
+ g.OutEdges = make(map[*IRNode][]*IREdge)
+ }
+ if g.InEdges == nil {
+ g.InEdges = make(map[*IRNode][]*IREdge)
+ }
+ name := ir.PkgFuncName(fn)
+ node := new(IRNode)
+ node.AST = fn
+ node.Recursive = recursive
+ if g.IRNodes[name] == nil {
+ g.IRNodes[name] = node
+ }
+ // Create the hash key of the GlobalNodeMap.
+ nodeinfo := NodeMapInfo{
+ FuncName: name,
+ DstName: "",
+ CallSite: -1,
+ }
+ // If the node exists, then update its node weight.
+ if weights, ok := GlobalNodeMap[nodeinfo]; ok {
+ g.IRNodes[name].Flat = weights.NWeight
+ g.IRNodes[name].Cum = weights.NTotalWeight
+ }
+
+ // Recursively walk over the body of the function to create IRGraph edges.
+ g.createIRGraphEdge(fn, g.IRNodes[name], name)
+}
+
+// addEdge adds an edge between node1 and new node that points to f based on pprof graph using GlobalNodeMap. node1 represents the caller. Callee as f. CallSite n.
+func (g *IRGraph) addEdge(node1 *IRNode, f *ir.Func, n *ir.Node, name string, line string) {
+
+ splits := strings.Split(line, ":")
+ line2, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
+
+ // Create an IRNode for Callee.
+ node2 := new(IRNode)
+ node2.AST = f
+ name2 := ir.PkgFuncName(f)
+
+ // Create a hash key using NodeMapInfo with Caller FuncName and Callee fname.
+ nodeinfo := NodeMapInfo{
+ FuncName: name,
+ DstName: name2,
+ CallSite: int(line2),
+ }
+
+ // A new callee node? If so create the node with node weight.
+ // Remove this TODO.
+ if g.IRNodes[name2] == nil {
+ g.IRNodes[name2] = node2
+ nodeinfo2 := NodeMapInfo{
+ FuncName: name2,
+ DstName: "",
+ CallSite: -1,
+ }
+ if weights, ok := GlobalNodeMap[nodeinfo2]; ok {
+ g.IRNodes[name2].Flat = weights.NWeight
+ g.IRNodes[name2].Cum = weights.NTotalWeight
+ }
+ }
+
+ if weights, ok := GlobalNodeMap[nodeinfo]; ok {
+ node1.Flat = weights.NWeight
+ node1.Cum = weights.NTotalWeight
+
+ // Add edge in the IRGraph from caller to callee [callee is an interface type here which can have multiple targets].
+ info := &IREdge{Src: node1, Dst: g.IRNodes[name2], DstNode: n, Weight: weights.EWeight, CallSite: line}
+ g.OutEdges[node1] = append(g.OutEdges[node1], info)
+ g.InEdges[g.IRNodes[name2]] = append(g.InEdges[g.IRNodes[name2]], info)
+ } else {
+ nodeinfo.DstName = ""
+ nodeinfo.CallSite = -1
+ if weights, ok := GlobalNodeMap[nodeinfo]; ok {
+ node1.Flat = weights.NWeight
+ node1.Cum = weights.NTotalWeight
+ info := &IREdge{Src: node1, Dst: g.IRNodes[name2], DstNode: n, Weight: 0, CallSite: line}
+ g.OutEdges[node1] = append(g.OutEdges[node1], info)
+ g.InEdges[g.IRNodes[name2]] = append(g.InEdges[g.IRNodes[name2]], info)
+ } else {
+ info := &IREdge{Src: node1, Dst: g.IRNodes[name2], DstNode: n, Weight: 0, CallSite: line}
+ g.OutEdges[node1] = append(g.OutEdges[node1], info)
+ g.InEdges[g.IRNodes[name2]] = append(g.InEdges[g.IRNodes[name2]], info)
+ }
+ }
+}
+
+// createIRGraphEdge traverses the nodes in the body of ir.Func and add edges between node1 which points to the ir.Func and the nodes in the body.
+func (g *IRGraph) createIRGraphEdge(fn *ir.Func, node1 *IRNode, name string) {
+ var doNode func(ir.Node) bool
+ doNode = func(n ir.Node) bool {
+ switch n.Op() {
+ default:
+ ir.DoChildren(n, doNode)
+ case ir.OCALLFUNC:
+ call := n.(*ir.CallExpr)
+ line := ir.Line(n)
+ // Find the callee function from the call site and add the edge.
+ f := inlCallee(call.X)
+ if f != nil {
+ g.addEdge(node1, f, &n, name, line)
+ }
+ case ir.OCALLMETH:
+ call := n.(*ir.CallExpr)
+ // Find the callee method from the call site and add the edge.
+ fn2 := ir.MethodExprName(call.X).Func
+ line := ir.Line(n)
+ g.addEdge(node1, fn2, &n, name, line)
+ }
+ return false
+ }
+ doNode(fn)
+}
+
+// WeightInPercentage converts profile weights to a percentage.
+func WeightInPercentage(value int64, total int64) float64 {
+ var ratio float64
+ // percentage is computed at the (weight/totalweights) * 100
+ // e.g. if edge weight is 30 and the sum of all the edges weight is 126
+ // the percentage will be 23.8%
+ if total != 0 {
+ ratio = (float64(value) / float64(total)) * 100
+ }
+ return ratio
+}
+
+// PrintWeightedCallGraphDOT prints IRGraph in DOT format..
+func PrintWeightedCallGraphDOT(threshold float64) {
+ fmt.Printf("digraph G {\n")
+ fmt.Printf("forcelabels=true;\n")
+
+ // List of functions in this package.
+ funcs := make(map[string]struct{})
+ ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+ for _, f := range list {
+ name := ir.PkgFuncName(f)
+ funcs[name] = struct{}{}
+ }
+ })
+
+ // Determine nodes of DOT.
+ nodes := make(map[string]*ir.Func)
+ for name, _ := range funcs {
+ if n, ok := WeightedCG.IRNodes[name]; ok {
+ nodeweight := WeightInPercentage(n.Flat, GlobalTotalNodeWeight)
+ for _, e := range WeightedCG.OutEdges[n] {
+ if e.Weight != 0 {
+ p := WeightInPercentage(e.Weight, GlobalTotalEdgeWeight)
+ if p > threshold {
+ nodes[ir.PkgFuncName(e.Src.AST)] = e.Src.AST
+ nodes[ir.PkgFuncName(e.Dst.AST)] = e.Dst.AST
+ }
+ }
+ }
+ if nodeweight > threshold {
+ nodes[ir.PkgFuncName(n.AST)] = n.AST
+ }
+ }
+ }
+
+ // Print nodes.
+ for name, ast := range nodes {
+ if n, ok := WeightedCG.IRNodes[name]; ok {
+ nodeweight := WeightInPercentage(n.Flat, GlobalTotalNodeWeight)
+ if ast.Inl != nil {
+ fmt.Printf("\"%v\" [label=\"%v,freq=%.2f,inl_cost=%d\"];\n", ir.PkgFuncName(ast), ir.PkgFuncName(ast), nodeweight, ast.Inl.Cost)
+ } else {
+ fmt.Printf("\"%v\" [label=\"%v,freq=%.2f\"];\n", ir.PkgFuncName(ast), ir.PkgFuncName(ast), nodeweight)
+ }
+ }
+ }
+ // Print edges.
+ ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+ for _, f := range list {
+ name := ir.PkgFuncName(f)
+ if n, ok := WeightedCG.IRNodes[name]; ok {
+ for _, e := range WeightedCG.OutEdges[n] {
+ if e.Weight != 0 {
+ p := WeightInPercentage(e.Weight, GlobalTotalEdgeWeight)
+ if p > threshold {
+ fmt.Printf("edge [color=red, style=solid];\n")
+ } else {
+ fmt.Printf("edge [color=black, style=solid];\n")
+ }
+
+ fmt.Printf("\"%v\" -> \"%v\" [label=\"%.2f\"];\n", ir.PkgFuncName(n.AST), ir.PkgFuncName(e.Dst.AST), p)
+ }
+ }
+ }
+ }
+ })
+ fmt.Printf("}\n")
+}
+
+// redirectEdges deletes the cur node out-edges and redirect them so now these edges are the parent node out-edges.
+func redirectEdges(g *IRGraph, parent *IRNode, cur *IRNode) {
+ for _, outEdge := range g.OutEdges[cur] {
+ outEdge.Src = parent
+ g.OutEdges[parent] = append(g.OutEdges[parent], outEdge)
+ }
+ delete(g.OutEdges, cur)
+}
+
+// RedirectEdges deletes and redirects out-edges from node cur based on inlining information via inlinedCallSites.
+func RedirectEdges(cur *IRNode, inlinedCallSites map[CallSiteInfo]struct{}) {
+ g := WeightedCG
+ for i, outEdge := range g.OutEdges[cur] {
+ if _, found := inlinedCallSites[CallSiteInfo{outEdge.CallSite, cur.AST}]; !found {
+ for _, InEdge := range g.InEdges[cur] {
+ if _, ok := inlinedCallSites[CallSiteInfo{InEdge.CallSite, InEdge.Src.AST}]; ok {
+ weight := calculateweight(g, InEdge.Src, cur)
+ redirectEdge(g, InEdge.Src, cur, outEdge, weight, i)
+ }
+ }
+ } else {
+ remove(g, cur, i, outEdge.Dst.AST.Nname)
+ }
+ }
+ removeall(g, cur)
+}
+
+// calculateweight calculates the weight of the new redirected edge.
+func calculateweight(g *IRGraph, parent *IRNode, cur *IRNode) int64 {
+ sum := int64(0)
+ pw := int64(0)
+ for _, InEdge := range g.InEdges[cur] {
+ sum = sum + InEdge.Weight
+ if InEdge.Src == parent {
+ pw = InEdge.Weight
+ }
+ }
+ weight := int64(0)
+ if sum != 0 {
+ weight = pw / sum
+ } else {
+ weight = pw
+ }
+ return weight
+}
+
+// redirectEdge deletes the cur-node's out-edges and redirect them so now these edges are the parent node out-edges.
+func redirectEdge(g *IRGraph, parent *IRNode, cur *IRNode, outEdge *IREdge, weight int64, idx int) {
+ outEdge.Src = parent
+ outEdge.Weight = weight * outEdge.Weight
+ g.OutEdges[parent] = append(g.OutEdges[parent], outEdge)
+ remove(g, cur, idx, outEdge.Dst.AST.Nname)
+}
+
+// remove deletes the cur-node's out-edges at index idx.
+func remove(g *IRGraph, cur *IRNode, idx int, name *ir.Name) {
+ if len(g.OutEdges[cur]) >= 2 {
+ g.OutEdges[cur][idx] = &IREdge{CallSite: "removed"}
+ } else {
+ delete(g.OutEdges, cur)
+ }
+}
+
+// removeall deletes all cur-node's out-edges that marked to be removed .
+func removeall(g *IRGraph, cur *IRNode) {
+ for i := len(g.OutEdges[cur]) - 1; i >= 0; i-- {
+ if g.OutEdges[cur][i].CallSite == "removed" {
+ g.OutEdges[cur][i] = g.OutEdges[cur][len(g.OutEdges[cur])-1]
+ g.OutEdges[cur] = g.OutEdges[cur][:len(g.OutEdges[cur])-1]
+ }
+ }
+}
+
+// inlCallee is same as the implementation for inl.go with one change. The change is that we do not invoke CanInline on a closure.
+func inlCallee(fn ir.Node) *ir.Func {
+ fn = ir.StaticValue(fn)
+ switch fn.Op() {
+ case ir.OMETHEXPR:
+ fn := fn.(*ir.SelectorExpr)
+ n := ir.MethodExprName(fn)
+ // Check that receiver type matches fn.X.
+ // TODO(mdempsky): Handle implicit dereference
+ // of pointer receiver argument?
+ if n == nil || !types.Identical(n.Type().Recv().Type, fn.X.Type()) {
+ return nil
+ }
+ return n.Func
+ case ir.ONAME:
+ fn := fn.(*ir.Name)
+ if fn.Class == ir.PFUNC {
+ return fn.Func
+ }
+ case ir.OCLOSURE:
+ fn := fn.(*ir.ClosureExpr)
+ c := fn.Func
+ return c
+ }
+ return nil
+}
diff --git a/src/cmd/compile/internal/test/pgo_inl_test.go b/src/cmd/compile/internal/test/pgo_inl_test.go
new file mode 100644
index 0000000..d5ca156
--- /dev/null
+++ b/src/cmd/compile/internal/test/pgo_inl_test.go
@@ -0,0 +1,154 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+ "bufio"
+ "bytes"
+ "internal/testenv"
+ "io"
+ "io/ioutil"
+ "os/exec"
+ "path/filepath"
+ "regexp"
+ "strings"
+ "testing"
+)
+
+// TestIntendedInlining tests that specific functions are inlined.
+// This allows refactoring for code clarity and re-use without fear that
+// changes to the compiler will cause silent performance regressions.
+func TestPGOIntendedInlining(t *testing.T) {
+ testenv.MustHaveGoRun(t)
+ t.Parallel()
+
+ // Make a temporary directory to work in.
+ tmpdir, err := ioutil.TempDir("", "TestCode")
+ if err != nil {
+ t.Fatalf("Failed to create temporary directory: %v", err)
+ }
+ //defer os.RemoveAll(tmpdir)
+
+ want := map[string][]string{
+ "pgo/inline": {
+ "(*BS).NS",
+ },
+ }
+
+ // The functions which are not expected to be inlined are as follows.
+ wantNot := map[string][]string{
+ "pgo/inline": {
+ // The calling edge main->A is hot and the cost of A is large than
+ // inlineHotCalleeMaxBudget.
+ "A",
+ // The calling edge BenchmarkA" -> benchmarkB is cold
+ // and the cost of A is large than inlineMaxBudget.
+ "benchmarkB",
+ },
+ }
+
+ must := map[string]bool{
+ "(*BS).NS": true,
+ }
+
+ notInlinedReason := make(map[string]string)
+ pkgs := make([]string, 0, len(want))
+ for pname, fnames := range want {
+ pkgs = append(pkgs, pname)
+ for _, fname := range fnames {
+ fullName := pname + "." + fname
+ if _, ok := notInlinedReason[fullName]; ok {
+ t.Errorf("duplicate func: %s", fullName)
+ }
+ notInlinedReason[fullName] = "unknown reason"
+ }
+ }
+
+ // If the compiler emit "cannot inline for function A", the entry A
+ // in expectedNotInlinedList will be removed.
+ expectedNotInlinedList := make(map[string]struct{})
+ for pname, fnames := range wantNot {
+ for _, fname := range fnames {
+ fullName := pname + "." + fname
+ expectedNotInlinedList[fullName] = struct{}{}
+ }
+ }
+
+ args := append([]string{"test", "-o", filepath.Join(tmpdir, "inline_hot.test"), "-bench=.", "-cpuprofile", filepath.Join(tmpdir, "inline_hot.pprof")}, pkgs...)
+ gotool := testenv.GoToolPath(t)
+ cmd := exec.Command(gotool, args...)
+ var stdout, stderr bytes.Buffer
+ cmd.Stdout = &stdout
+ cmd.Stderr = &stderr
+ err = cmd.Run()
+ if err != nil {
+ t.Fatalf("Failed: %v:\nOut: %s\nStderr: %s\n", err, &stdout, &stderr)
+ }
+
+ args = append([]string{"test", "-run=nope", "-tags=", "-timeout=9m0s", "-gcflags=-m -m -profileuse " + filepath.Join(tmpdir, "inline_hot.pprof")}, pkgs...)
+ cmd = testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), args...))
+
+ pr, pw := io.Pipe()
+ cmd.Stdout = pw
+ cmd.Stderr = pw
+ cmdErr := make(chan error, 1)
+ go func() {
+ cmdErr <- cmd.Run()
+ pw.Close()
+ }()
+ scanner := bufio.NewScanner(pr)
+ curPkg := ""
+ canInline := regexp.MustCompile(`: can inline ([^ ]*)`)
+ haveInlined := regexp.MustCompile(`: inlining call to ([^ ]*)`)
+ cannotInline := regexp.MustCompile(`: cannot inline ([^ ]*): (.*)`)
+ for scanner.Scan() {
+ line := scanner.Text()
+ if strings.HasPrefix(line, "# ") {
+ curPkg = line[2:]
+ splits := strings.Split(curPkg, " ")
+ curPkg = splits[0]
+ continue
+ }
+ if m := haveInlined.FindStringSubmatch(line); m != nil {
+ fname := m[1]
+ delete(notInlinedReason, curPkg+"."+fname)
+ continue
+ }
+ if m := canInline.FindStringSubmatch(line); m != nil {
+ fname := m[1]
+ fullname := curPkg + "." + fname
+ // If function must be inlined somewhere, being inlinable is not enough
+ if _, ok := must[fullname]; !ok {
+ delete(notInlinedReason, fullname)
+ continue
+ }
+ }
+ if m := cannotInline.FindStringSubmatch(line); m != nil {
+ fname, reason := m[1], m[2]
+ fullName := curPkg + "." + fname
+ if _, ok := notInlinedReason[fullName]; ok {
+ // cmd/compile gave us a reason why
+ notInlinedReason[fullName] = reason
+ }
+ delete(expectedNotInlinedList, fullName)
+ continue
+ }
+ }
+ if err := <-cmdErr; err != nil {
+ t.Fatal(err)
+ }
+ if err := scanner.Err(); err != nil {
+ t.Fatal(err)
+ }
+ for fullName, reason := range notInlinedReason {
+ t.Errorf("%s was not inlined: %s", fullName, reason)
+ }
+
+ // If the list expectedNotInlinedList is not empty, it indicates
+ // the functions in the expectedNotInlinedList are marked with caninline.
+ for fullName, _ := range expectedNotInlinedList {
+ t.Errorf("%s was expected not inlined", fullName)
+ }
+}
diff --git a/src/cmd/dist/buildtool.go b/src/cmd/dist/buildtool.go
index 0725039..a7901d9 100644
--- a/src/cmd/dist/buildtool.go
+++ b/src/cmd/dist/buildtool.go
@@ -64,6 +64,7 @@
"internal/goexperiment",
"internal/goversion",
"internal/pkgbits",
+ "internal/profile",
"internal/race",
"internal/saferio",
"internal/unsafeheader",
diff --git a/src/internal/profile/graph.go b/src/internal/profile/graph.go
new file mode 100644
index 0000000..4fcd081
--- /dev/null
+++ b/src/internal/profile/graph.go
@@ -0,0 +1,1173 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package graph collects a set of samples into a directed graph.
+
+// Original file location: https://github.com/google/pprof/tree/main/internal/graph/graph.go
+package profile
+
+import (
+ "fmt"
+ "math"
+ "path/filepath"
+ "regexp"
+ "sort"
+ "strconv"
+ "strings"
+)
+
+var (
+ // Removes package name and method arguments for Java method names.
+ // See tests for examples.
+ javaRegExp = regexp.MustCompile(`^(?:[a-z]\w*\.)*([A-Z][\w\$]*\.(?:<init>|[a-z][\w\$]*(?:\$\d+)?))(?:(?:\()|$)`)
+ // Removes package name and method arguments for Go function names.
+ // See tests for examples.
+ goRegExp = regexp.MustCompile(`^(?:[\w\-\.]+\/)+(.+)`)
+ // Removes potential module versions in a package path.
+ goVerRegExp = regexp.MustCompile(`^(.*?)/v(?:[2-9]|[1-9][0-9]+)([./].*)$`)
+ // Strips C++ namespace prefix from a C++ function / method name.
+ // NOTE: Make sure to keep the template parameters in the name. Normally,
+ // template parameters are stripped from the C++ names but when
+ // -symbolize=demangle=templates flag is used, they will not be.
+ // See tests for examples.
+ cppRegExp = regexp.MustCompile(`^(?:[_a-zA-Z]\w*::)+(_*[A-Z]\w*::~?[_a-zA-Z]\w*(?:<.*>)?)`)
+ cppAnonymousPrefixRegExp = regexp.MustCompile(`^\(anonymous namespace\)::`)
+)
+
+// This was obtained from https://github.com/google/pprof/blob/main/internal/graph/dotgraph.go.
+const maxNodelets = 4 // Number of nodelets for labels (both numeric and non)
+
+// Options encodes the options for constructing a graph
+type Options struct {
+ SampleValue func(s []int64) int64 // Function to compute the value of a sample
+ SampleMeanDivisor func(s []int64) int64 // Function to compute the divisor for mean graphs, or nil
+ FormatTag func(int64, string) string // Function to format a sample tag value into a string
+ ObjNames bool // Always preserve obj filename
+ OrigFnNames bool // Preserve original (eg mangled) function names
+
+ CallTree bool // Build a tree instead of a graph
+ DropNegative bool // Drop nodes with overall negative values
+
+ KeptNodes NodeSet // If non-nil, only use nodes in this set
+}
+
+// Nodes is an ordered collection of graph nodes.
+type Nodes []*Node
+
+// Node is an entry on a profiling report. It represents a unique
+// program location.
+type Node struct {
+ // Info describes the source location associated to this node.
+ Info NodeInfo
+
+ // Function represents the function that this node belongs to. On
+ // graphs with sub-function resolution (eg line number or
+ // addresses), two nodes in a NodeMap that are part of the same
+ // function have the same value of Node.Function. If the Node
+ // represents the whole function, it points back to itself.
+ Function *Node
+
+ // Values associated to this node. Flat is exclusive to this node,
+ // Cum includes all descendents.
+ Flat, FlatDiv, Cum, CumDiv int64
+
+ // In and out Contains the nodes immediately reaching or reached by
+ // this node.
+ In, Out EdgeMap
+
+ // LabelTags provide additional information about subsets of a sample.
+ LabelTags TagMap
+
+ // NumericTags provide additional values for subsets of a sample.
+ // Numeric tags are optionally associated to a label tag. The key
+ // for NumericTags is the name of the LabelTag they are associated
+ // to, or "" for numeric tags not associated to a label tag.
+ NumericTags map[string]TagMap
+}
+
+// Graph summarizes a performance profile into a format that is
+// suitable for visualization.
+type Graph struct {
+ Nodes Nodes
+}
+
+// FlatValue returns the exclusive value for this node, computing the
+// mean if a divisor is available.
+func (n *Node) FlatValue() int64 {
+ if n.FlatDiv == 0 {
+ return n.Flat
+ }
+ return n.Flat / n.FlatDiv
+}
+
+// CumValue returns the inclusive value for this node, computing the
+// mean if a divisor is available.
+func (n *Node) CumValue() int64 {
+ if n.CumDiv == 0 {
+ return n.Cum
+ }
+ return n.Cum / n.CumDiv
+}
+
+// AddToEdge increases the weight of an edge between two nodes. If
+// there isn't such an edge one is created.
+func (n *Node) AddToEdge(to *Node, v int64, residual, inline bool) {
+ n.AddToEdgeDiv(to, 0, v, residual, inline)
+}
+
+// AddToEdgeDiv increases the weight of an edge between two nodes. If
+// there isn't such an edge one is created.
+func (n *Node) AddToEdgeDiv(to *Node, dv, v int64, residual, inline bool) {
+ if n.Out[to] != to.In[n] {
+ panic(fmt.Errorf("asymmetric edges %v %v", *n, *to))
+ }
+
+ if e := n.Out[to]; e != nil {
+ e.WeightDiv += dv
+ e.Weight += v
+ if residual {
+ e.Residual = true
+ }
+ if !inline {
+ e.Inline = false
+ }
+ return
+ }
+
+ info := &Edge{Src: n, Dest: to, WeightDiv: dv, Weight: v, Residual: residual, Inline: inline}
+ n.Out[to] = info
+ to.In[n] = info
+}
+
+// NodeInfo contains the attributes for a node.
+type NodeInfo struct {
+ Name string
+ OrigName string
+ Address uint64
+ File string
+ StartLine, Lineno int
+ Objfile string
+}
+
+// PrintableName calls the Node's Formatter function with a single space separator.
+func (i *NodeInfo) PrintableName() string {
+ return strings.Join(i.NameComponents(), " ")
+}
+
+// NameComponents returns the components of the printable name to be used for a node.
+func (i *NodeInfo) NameComponents() []string {
+ var name []string
+ if i.Address != 0 {
+ name = append(name, fmt.Sprintf("%016x", i.Address))
+ }
+ if fun := i.Name; fun != "" {
+ name = append(name, fun)
+ }
+
+ switch {
+ case i.Lineno != 0:
+ // User requested line numbers, provide what we have.
+ name = append(name, fmt.Sprintf("%s:%d", i.File, i.Lineno))
+ case i.File != "":
+ // User requested file name, provide it.
+ name = append(name, i.File)
+ case i.Name != "":
+ // User requested function name. It was already included.
+ case i.Objfile != "":
+ // Only binary name is available
+ name = append(name, "["+filepath.Base(i.Objfile)+"]")
+ default:
+ // Do not leave it empty if there is no information at all.
+ name = append(name, "<unknown>")
+ }
+ return name
+}
+
+// NodeMap maps from a node info struct to a node. It is used to merge
+// report entries with the same info.
+type NodeMap map[NodeInfo]*Node
+
+// NodeSet is a collection of node info structs.
+type NodeSet map[NodeInfo]bool
+
+// NodePtrSet is a collection of nodes. Trimming a graph or tree requires a set
+// of objects which uniquely identify the nodes to keep. In a graph, NodeInfo
+// works as a unique identifier; however, in a tree multiple nodes may share
+// identical NodeInfos. A *Node does uniquely identify a node so we can use that
+// instead. Though a *Node also uniquely identifies a node in a graph,
+// currently, during trimming, graphs are rebuilt from scratch using only the
+// NodeSet, so there would not be the required context of the initial graph to
+// allow for the use of *Node.
+type NodePtrSet map[*Node]bool
+
+// FindOrInsertNode takes the info for a node and either returns a matching node
+// from the node map if one exists, or adds one to the map if one does not.
+// If kept is non-nil, nodes are only added if they can be located on it.
+func (nm NodeMap) FindOrInsertNode(info NodeInfo, kept NodeSet) *Node {
+ if kept != nil {
+ if _, ok := kept[info]; !ok {
+ return nil
+ }
+ }
+
+ if n, ok := nm[info]; ok {
+ return n
+ }
+
+ n := &Node{
+ Info: info,
+ In: make(EdgeMap),
+ Out: make(EdgeMap),
+ LabelTags: make(TagMap),
+ NumericTags: make(map[string]TagMap),
+ }
+ nm[info] = n
+ if info.Address == 0 && info.Lineno == 0 {
+ // This node represents the whole function, so point Function
+ // back to itself.
+ n.Function = n
+ return n
+ }
+ // Find a node that represents the whole function.
+ info.Address = 0
+ info.Lineno = 0
+ n.Function = nm.FindOrInsertNode(info, nil)
+ return n
+}
+
+// EdgeMap is used to represent the incoming/outgoing edges from a node.
+type EdgeMap map[*Node]*Edge
+
+// Edge contains any attributes to be represented about edges in a graph.
+type Edge struct {
+ Src, Dest *Node
+ // The summary weight of the edge
+ Weight, WeightDiv int64
+
+ // residual edges connect nodes that were connected through a
+ // separate node, which has been removed from the report.
+ Residual bool
+ // An inline edge represents a call that was inlined into the caller.
+ Inline bool
+}
+
+// WeightValue returns the weight value for this edge, normalizing if a
+// divisor is available.
+func (e *Edge) WeightValue() int64 {
+ if e.WeightDiv == 0 {
+ return e.Weight
+ }
+ return e.Weight / e.WeightDiv
+}
+
+// Tag represent sample annotations
+type Tag struct {
+ Name string
+ Unit string // Describe the value, "" for non-numeric tags
+ Value int64
+ Flat, FlatDiv int64
+ Cum, CumDiv int64
+}
+
+// FlatValue returns the exclusive value for this tag, computing the
+// mean if a divisor is available.
+func (t *Tag) FlatValue() int64 {
+ if t.FlatDiv == 0 {
+ return t.Flat
+ }
+ return t.Flat / t.FlatDiv
+}
+
+// CumValue returns the inclusive value for this tag, computing the
+// mean if a divisor is available.
+func (t *Tag) CumValue() int64 {
+ if t.CumDiv == 0 {
+ return t.Cum
+ }
+ return t.Cum / t.CumDiv
+}
+
+// TagMap is a collection of tags, classified by their name.
+type TagMap map[string]*Tag
+
+// SortTags sorts a slice of tags based on their weight.
+func SortTags(t []*Tag, flat bool) []*Tag {
+ ts := tags{t, flat}
+ sort.Sort(ts)
+ return ts.t
+}
+
+// New summarizes performance data from a profile into a graph.
+func New(prof *Profile, o *Options) *Graph {
+ if o.CallTree {
+ return newTree(prof, o)
+ }
+ g, _ := newGraph(prof, o)
+ return g
+}
+
+// newGraph computes a graph from a profile. It returns the graph, and
+// a map from the profile location indices to the corresponding graph
+// nodes.
+func newGraph(prof *Profile, o *Options) (*Graph, map[uint64]Nodes) {
+ nodes, locationMap := CreateNodes(prof, o)
+ seenNode := make(map[*Node]bool)
+ seenEdge := make(map[nodePair]bool)
+ for _, sample := range prof.Sample {
+ var w, dw int64
+ w = o.SampleValue(sample.Value)
+ if o.SampleMeanDivisor != nil {
+ dw = o.SampleMeanDivisor(sample.Value)
+ }
+ if dw == 0 && w == 0 {
+ continue
+ }
+ for k := range seenNode {
+ delete(seenNode, k)
+ }
+ for k := range seenEdge {
+ delete(seenEdge, k)
+ }
+ var parent *Node
+ // A residual edge goes over one or more nodes that were not kept.
+ residual := false
+
+ labels := joinLabels(sample)
+ // Group the sample frames, based on a global map.
+ for i := len(sample.Location) - 1; i >= 0; i-- {
+ l := sample.Location[i]
+ locNodes := locationMap[l.ID]
+ for ni := len(locNodes) - 1; ni >= 0; ni-- {
+ n := locNodes[ni]
+ if n == nil {
+ residual = true
+ continue
+ }
+ // Add cum weight to all nodes in stack, avoiding double counting.
+ if _, ok := seenNode[n]; !ok {
+ seenNode[n] = true
+ n.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, false)
+ }
+ // Update edge weights for all edges in stack, avoiding double counting.
+ if _, ok := seenEdge[nodePair{n, parent}]; !ok && parent != nil && n != parent {
+ seenEdge[nodePair{n, parent}] = true
+ parent.AddToEdgeDiv(n, dw, w, residual, ni != len(locNodes)-1)
+ }
+ parent = n
+ residual = false
+ }
+ }
+ if parent != nil && !residual {
+ // Add flat weight to leaf node.
+ parent.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, true)
+ }
+ }
+
+ return selectNodesForGraph(nodes, o.DropNegative), locationMap
+}
+
+func selectNodesForGraph(nodes Nodes, dropNegative bool) *Graph {
+ // Collect nodes into a graph.
+ gNodes := make(Nodes, 0, len(nodes))
+ for _, n := range nodes {
+ if n == nil {
+ continue
+ }
+ if n.Cum == 0 && n.Flat == 0 {
+ continue
+ }
+ if dropNegative && isNegative(n) {
+ continue
+ }
+ gNodes = append(gNodes, n)
+ }
+ return &Graph{gNodes}
+}
+
+type nodePair struct {
+ src, dest *Node
+}
+
+func newTree(prof *Profile, o *Options) (g *Graph) {
+ parentNodeMap := make(map[*Node]NodeMap, len(prof.Sample))
+ for _, sample := range prof.Sample {
+ var w, dw int64
+ w = o.SampleValue(sample.Value)
+ if o.SampleMeanDivisor != nil {
+ dw = o.SampleMeanDivisor(sample.Value)
+ }
+ if dw == 0 && w == 0 {
+ continue
+ }
+ var parent *Node
+ labels := joinLabels(sample)
+ // Group the sample frames, based on a per-node map.
+ for i := len(sample.Location) - 1; i >= 0; i-- {
+ l := sample.Location[i]
+ lines := l.Line
+ if len(lines) == 0 {
+ lines = []Line{{}} // Create empty line to include location info.
+ }
+ for lidx := len(lines) - 1; lidx >= 0; lidx-- {
+ nodeMap := parentNodeMap[parent]
+ if nodeMap == nil {
+ nodeMap = make(NodeMap)
+ parentNodeMap[parent] = nodeMap
+ }
+ n := nodeMap.findOrInsertLine(l, lines[lidx], o)
+ if n == nil {
+ continue
+ }
+ n.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, false)
+ if parent != nil {
+ parent.AddToEdgeDiv(n, dw, w, false, lidx != len(lines)-1)
+ }
+ parent = n
+ }
+ }
+ if parent != nil {
+ parent.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, true)
+ }
+ }
+
+ nodes := make(Nodes, len(prof.Location))
+ for _, nm := range parentNodeMap {
+ nodes = append(nodes, nm.nodes()...)
+ }
+ return selectNodesForGraph(nodes, o.DropNegative)
+}
+
+// ShortenFunctionName returns a shortened version of a function's name.
+func ShortenFunctionName(f string) string {
+ f = cppAnonymousPrefixRegExp.ReplaceAllString(f, "")
+ f = goVerRegExp.ReplaceAllString(f, `${1}${2}`)
+ for _, re := range []*regexp.Regexp{goRegExp, javaRegExp, cppRegExp} {
+ if matches := re.FindStringSubmatch(f); len(matches) >= 2 {
+ return strings.Join(matches[1:], "")
+ }
+ }
+ return f
+}
+
+// TrimTree trims a Graph in forest form, keeping only the nodes in kept. This
+// will not work correctly if even a single node has multiple parents.
+func (g *Graph) TrimTree(kept NodePtrSet) {
+ // Creates a new list of nodes
+ oldNodes := g.Nodes
+ g.Nodes = make(Nodes, 0, len(kept))
+
+ for _, cur := range oldNodes {
+ // A node may not have multiple parents
+ if len(cur.In) > 1 {
+ panic("TrimTree only works on trees")
+ }
+
+ // If a node should be kept, add it to the new list of nodes
+ if _, ok := kept[cur]; ok {
+ g.Nodes = append(g.Nodes, cur)
+ continue
+ }
+
+ // If a node has no parents, then delete all of the in edges of its
+ // children to make them each roots of their own trees.
+ if len(cur.In) == 0 {
+ for _, outEdge := range cur.Out {
+ delete(outEdge.Dest.In, cur)
+ }
+ continue
+ }
+
+ // Get the parent. This works since at this point cur.In must contain only
+ // one element.
+ if len(cur.In) != 1 {
+ panic("Get parent assertion failed. cur.In expected to be of length 1.")
+ }
+ var parent *Node
+ for _, edge := range cur.In {
+ parent = edge.Src
+ }
+
+ parentEdgeInline := parent.Out[cur].Inline
+
+ // Remove the edge from the parent to this node
+ delete(parent.Out, cur)
+
+ // Reconfigure every edge from the current node to now begin at the parent.
+ for _, outEdge := range cur.Out {
+ child := outEdge.Dest
+
+ delete(child.In, cur)
+ child.In[parent] = outEdge
+ parent.Out[child] = outEdge
+
+ outEdge.Src = parent
+ outEdge.Residual = true
+ // If the edge from the parent to the current node and the edge from the
+ // current node to the child are both inline, then this resulting residual
+ // edge should also be inline
+ outEdge.Inline = parentEdgeInline && outEdge.Inline
+ }
+ }
+ g.RemoveRedundantEdges()
+}
+
+func joinLabels(s *Sample) string {
+ if len(s.Label) == 0 {
+ return ""
+ }
+
+ var labels []string
+ for key, vals := range s.Label {
+ for _, v := range vals {
+ labels = append(labels, key+":"+v)
+ }
+ }
+ sort.Strings(labels)
+ return strings.Join(labels, `\n`)
+}
+
+// isNegative returns true if the node is considered as "negative" for the
+// purposes of drop_negative.
+func isNegative(n *Node) bool {
+ switch {
+ case n.Flat < 0:
+ return true
+ case n.Flat == 0 && n.Cum < 0:
+ return true
+ default:
+ return false
+ }
+}
+
+// CreateNodes creates graph nodes for all locations in a profile. It
+// returns set of all nodes, plus a mapping of each location to the
+// set of corresponding nodes (one per location.Line).
+func CreateNodes(prof *Profile, o *Options) (Nodes, map[uint64]Nodes) {
+ locations := make(map[uint64]Nodes, len(prof.Location))
+ nm := make(NodeMap, len(prof.Location))
+ for _, l := range prof.Location {
+ lines := l.Line
+ if len(lines) == 0 {
+ lines = []Line{{}} // Create empty line to include location info.
+ }
+ nodes := make(Nodes, len(lines))
+ for ln := range lines {
+ nodes[ln] = nm.findOrInsertLine(l, lines[ln], o)
+ }
+ locations[l.ID] = nodes
+ }
+ return nm.nodes(), locations
+}
+
+func (nm NodeMap) nodes() Nodes {
+ nodes := make(Nodes, 0, len(nm))
+ for _, n := range nm {
+ nodes = append(nodes, n)
+ }
+ return nodes
+}
+
+func (nm NodeMap) findOrInsertLine(l *Location, li Line, o *Options) *Node {
+ var objfile string
+ if m := l.Mapping; m != nil && m.File != "" {
+ objfile = m.File
+ }
+
+ if ni := nodeInfo(l, li, objfile, o); ni != nil {
+ return nm.FindOrInsertNode(*ni, o.KeptNodes)
+ }
+ return nil
+}
+
+func nodeInfo(l *Location, line Line, objfile string, o *Options) *NodeInfo {
+ if line.Function == nil {
+ return &NodeInfo{Address: l.Address, Objfile: objfile}
+ }
+ ni := &NodeInfo{
+ Address: l.Address,
+ Lineno: int(line.Line),
+ Name: line.Function.Name,
+ }
+ if fname := line.Function.Filename; fname != "" {
+ ni.File = filepath.Clean(fname)
+ }
+ if o.OrigFnNames {
+ ni.OrigName = line.Function.SystemName
+ }
+ if o.ObjNames || (ni.Name == "" && ni.OrigName == "") {
+ ni.Objfile = objfile
+ ni.StartLine = int(line.Function.StartLine)
+ }
+ return ni
+}
+
+type tags struct {
+ t []*Tag
+ flat bool
+}
+
+func (t tags) Len() int { return len(t.t) }
+func (t tags) Swap(i, j int) { t.t[i], t.t[j] = t.t[j], t.t[i] }
+func (t tags) Less(i, j int) bool {
+ if !t.flat {
+ if t.t[i].Cum != t.t[j].Cum {
+ return abs64(t.t[i].Cum) > abs64(t.t[j].Cum)
+ }
+ }
+ if t.t[i].Flat != t.t[j].Flat {
+ return abs64(t.t[i].Flat) > abs64(t.t[j].Flat)
+ }
+ return t.t[i].Name < t.t[j].Name
+}
+
+// Sum adds the flat and cum values of a set of nodes.
+func (ns Nodes) Sum() (flat int64, cum int64) {
+ for _, n := range ns {
+ flat += n.Flat
+ cum += n.Cum
+ }
+ return
+}
+
+func (n *Node) addSample(dw, w int64, labels string, numLabel map[string][]int64, numUnit map[string][]string, format func(int64, string) string, flat bool) {
+ // Update sample value
+ if flat {
+ n.FlatDiv += dw
+ n.Flat += w
+ } else {
+ n.CumDiv += dw
+ n.Cum += w
+ }
+
+ // Add string tags
+ if labels != "" {
+ t := n.LabelTags.findOrAddTag(labels, "", 0)
+ if flat {
+ t.FlatDiv += dw
+ t.Flat += w
+ } else {
+ t.CumDiv += dw
+ t.Cum += w
+ }
+ }
+
+ numericTags := n.NumericTags[labels]
+ if numericTags == nil {
+ numericTags = TagMap{}
+ n.NumericTags[labels] = numericTags
+ }
+ // Add numeric tags
+ if format == nil {
+ format = defaultLabelFormat
+ }
+ for k, nvals := range numLabel {
+ units := numUnit[k]
+ for i, v := range nvals {
+ var t *Tag
+ if len(units) > 0 {
+ t = numericTags.findOrAddTag(format(v, units[i]), units[i], v)
+ } else {
+ t = numericTags.findOrAddTag(format(v, k), k, v)
+ }
+ if flat {
+ t.FlatDiv += dw
+ t.Flat += w
+ } else {
+ t.CumDiv += dw
+ t.Cum += w
+ }
+ }
+ }
+}
+
+func defaultLabelFormat(v int64, key string) string {
+ return strconv.FormatInt(v, 10)
+}
+
+func (m TagMap) findOrAddTag(label, unit string, value int64) *Tag {
+ l := m[label]
+ if l == nil {
+ l = &Tag{
+ Name: label,
+ Unit: unit,
+ Value: value,
+ }
+ m[label] = l
+ }
+ return l
+}
+
+// String returns a text representation of a graph, for debugging purposes.
+func (g *Graph) String() string {
+ var s []string
+
+ nodeIndex := make(map[*Node]int, len(g.Nodes))
+
+ for i, n := range g.Nodes {
+ nodeIndex[n] = i + 1
+ }
+
+ for i, n := range g.Nodes {
+ name := n.Info.PrintableName()
+ var in, out []int
+
+ for _, from := range n.In {
+ in = append(in, nodeIndex[from.Src])
+ }
+ for _, to := range n.Out {
+ out = append(out, nodeIndex[to.Dest])
+ }
+ s = append(s, fmt.Sprintf("%d: %s[flat=%d cum=%d] %x -> %v ", i+1, name, n.Flat, n.Cum, in, out))
+ }
+ return strings.Join(s, "\n")
+}
+
+// DiscardLowFrequencyNodes returns a set of the nodes at or over a
+// specific cum value cutoff.
+func (g *Graph) DiscardLowFrequencyNodes(nodeCutoff int64) NodeSet {
+ return makeNodeSet(g.Nodes, nodeCutoff)
+}
+
+// DiscardLowFrequencyNodePtrs returns a NodePtrSet of nodes at or over a
+// specific cum value cutoff.
+func (g *Graph) DiscardLowFrequencyNodePtrs(nodeCutoff int64) NodePtrSet {
+ cutNodes := getNodesAboveCumCutoff(g.Nodes, nodeCutoff)
+ kept := make(NodePtrSet, len(cutNodes))
+ for _, n := range cutNodes {
+ kept[n] = true
+ }
+ return kept
+}
+
+func makeNodeSet(nodes Nodes, nodeCutoff int64) NodeSet {
+ cutNodes := getNodesAboveCumCutoff(nodes, nodeCutoff)
+ kept := make(NodeSet, len(cutNodes))
+ for _, n := range cutNodes {
+ kept[n.Info] = true
+ }
+ return kept
+}
+
+// getNodesAboveCumCutoff returns all the nodes which have a Cum value greater
+// than or equal to cutoff.
+func getNodesAboveCumCutoff(nodes Nodes, nodeCutoff int64) Nodes {
+ cutoffNodes := make(Nodes, 0, len(nodes))
+ for _, n := range nodes {
+ if abs64(n.Cum) < nodeCutoff {
+ continue
+ }
+ cutoffNodes = append(cutoffNodes, n)
+ }
+ return cutoffNodes
+}
+
+// TrimLowFrequencyTags removes tags that have less than
+// the specified weight.
+func (g *Graph) TrimLowFrequencyTags(tagCutoff int64) {
+ // Remove nodes with value <= total*nodeFraction
+ for _, n := range g.Nodes {
+ n.LabelTags = trimLowFreqTags(n.LabelTags, tagCutoff)
+ for s, nt := range n.NumericTags {
+ n.NumericTags[s] = trimLowFreqTags(nt, tagCutoff)
+ }
+ }
+}
+
+func trimLowFreqTags(tags TagMap, minValue int64) TagMap {
+ kept := TagMap{}
+ for s, t := range tags {
+ if abs64(t.Flat) >= minValue || abs64(t.Cum) >= minValue {
+ kept[s] = t
+ }
+ }
+ return kept
+}
+
+// TrimLowFrequencyEdges removes edges that have less than
+// the specified weight. Returns the number of edges removed
+func (g *Graph) TrimLowFrequencyEdges(edgeCutoff int64) int {
+ var droppedEdges int
+ for _, n := range g.Nodes {
+ for src, e := range n.In {
+ if abs64(e.Weight) < edgeCutoff {
+ delete(n.In, src)
+ delete(src.Out, n)
+ droppedEdges++
+ }
+ }
+ }
+ return droppedEdges
+}
+
+// SortNodes sorts the nodes in a graph based on a specific heuristic.
+func (g *Graph) SortNodes(cum bool, visualMode bool) {
+ // Sort nodes based on requested mode
+ switch {
+ case visualMode:
+ // Specialized sort to produce a more visually-interesting graph
+ g.Nodes.Sort(EntropyOrder)
+ case cum:
+ g.Nodes.Sort(CumNameOrder)
+ default:
+ g.Nodes.Sort(FlatNameOrder)
+ }
+}
+
+// SelectTopNodePtrs returns a set of the top maxNodes *Node in a graph.
+func (g *Graph) SelectTopNodePtrs(maxNodes int, visualMode bool) NodePtrSet {
+ set := make(NodePtrSet)
+ for _, node := range g.selectTopNodes(maxNodes, visualMode) {
+ set[node] = true
+ }
+ return set
+}
+
+// SelectTopNodes returns a set of the top maxNodes nodes in a graph.
+func (g *Graph) SelectTopNodes(maxNodes int, visualMode bool) NodeSet {
+ return makeNodeSet(g.selectTopNodes(maxNodes, visualMode), 0)
+}
+
+// selectTopNodes returns a slice of the top maxNodes nodes in a graph.
+func (g *Graph) selectTopNodes(maxNodes int, visualMode bool) Nodes {
+ if maxNodes > 0 {
+ if visualMode {
+ var count int
+ // If generating a visual graph, count tags as nodes. Update
+ // maxNodes to account for them.
+ for i, n := range g.Nodes {
+ tags := countTags(n)
+ if tags > maxNodelets {
+ tags = maxNodelets
+ }
+ if count += tags + 1; count >= maxNodes {
+ maxNodes = i + 1
+ break
+ }
+ }
+ }
+ }
+ if maxNodes > len(g.Nodes) {
+ maxNodes = len(g.Nodes)
+ }
+ return g.Nodes[:maxNodes]
+}
+
+// countTags counts the tags with flat count. This underestimates the
+// number of tags being displayed, but in practice is close enough.
+func countTags(n *Node) int {
+ count := 0
+ for _, e := range n.LabelTags {
+ if e.Flat != 0 {
+ count++
+ }
+ }
+ for _, t := range n.NumericTags {
+ for _, e := range t {
+ if e.Flat != 0 {
+ count++
+ }
+ }
+ }
+ return count
+}
+
+// RemoveRedundantEdges removes residual edges if the destination can
+// be reached through another path. This is done to simplify the graph
+// while preserving connectivity.
+func (g *Graph) RemoveRedundantEdges() {
+ // Walk the nodes and outgoing edges in reverse order to prefer
+ // removing edges with the lowest weight.
+ for i := len(g.Nodes); i > 0; i-- {
+ n := g.Nodes[i-1]
+ in := n.In.Sort()
+ for j := len(in); j > 0; j-- {
+ e := in[j-1]
+ if !e.Residual {
+ // Do not remove edges heavier than a non-residual edge, to
+ // avoid potential confusion.
+ break
+ }
+ if isRedundantEdge(e) {
+ delete(e.Src.Out, e.Dest)
+ delete(e.Dest.In, e.Src)
+ }
+ }
+ }
+}
+
+// isRedundantEdge determines if there is a path that allows e.Src
+// to reach e.Dest after removing e.
+func isRedundantEdge(e *Edge) bool {
+ src, n := e.Src, e.Dest
+ seen := map[*Node]bool{n: true}
+ queue := Nodes{n}
+ for len(queue) > 0 {
+ n := queue[0]
+ queue = queue[1:]
+ for _, ie := range n.In {
+ if e == ie || seen[ie.Src] {
+ continue
+ }
+ if ie.Src == src {
+ return true
+ }
+ seen[ie.Src] = true
+ queue = append(queue, ie.Src)
+ }
+ }
+ return false
+}
+
+// nodeSorter is a mechanism used to allow a report to be sorted
+// in different ways.
+type nodeSorter struct {
+ rs Nodes
+ less func(l, r *Node) bool
+}
+
+func (s nodeSorter) Len() int { return len(s.rs) }
+func (s nodeSorter) Swap(i, j int) { s.rs[i], s.rs[j] = s.rs[j], s.rs[i] }
+func (s nodeSorter) Less(i, j int) bool { return s.less(s.rs[i], s.rs[j]) }
+
+// Sort reorders a slice of nodes based on the specified ordering
+// criteria. The result is sorted in decreasing order for (absolute)
+// numeric quantities, alphabetically for text, and increasing for
+// addresses.
+func (ns Nodes) Sort(o NodeOrder) error {
+ var s nodeSorter
+
+ switch o {
+ case FlatNameOrder:
+ s = nodeSorter{ns,
+ func(l, r *Node) bool {
+ if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv {
+ return iv > jv
+ }
+ if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv {
+ return iv < jv
+ }
+ if iv, jv := abs64(l.Cum), abs64(r.Cum); iv != jv {
+ return iv > jv
+ }
+ return compareNodes(l, r)
+ },
+ }
+ case FlatCumNameOrder:
+ s = nodeSorter{ns,
+ func(l, r *Node) bool {
+ if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv {
+ return iv > jv
+ }
+ if iv, jv := abs64(l.Cum), abs64(r.Cum); iv != jv {
+ return iv > jv
+ }
+ if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv {
+ return iv < jv
+ }
+ return compareNodes(l, r)
+ },
+ }
+ case NameOrder:
+ s = nodeSorter{ns,
+ func(l, r *Node) bool {
+ if iv, jv := l.Info.Name, r.Info.Name; iv != jv {
+ return iv < jv
+ }
+ return compareNodes(l, r)
+ },
+ }
+ case FileOrder:
+ s = nodeSorter{ns,
+ func(l, r *Node) bool {
+ if iv, jv := l.Info.File, r.Info.File; iv != jv {
+ return iv < jv
+ }
+ if iv, jv := l.Info.StartLine, r.Info.StartLine; iv != jv {
+ return iv < jv
+ }
+ return compareNodes(l, r)
+ },
+ }
+ case AddressOrder:
+ s = nodeSorter{ns,
+ func(l, r *Node) bool {
+ if iv, jv := l.Info.Address, r.Info.Address; iv != jv {
+ return iv < jv
+ }
+ return compareNodes(l, r)
+ },
+ }
+ case CumNameOrder, EntropyOrder:
+ // Hold scoring for score-based ordering
+ var score map[*Node]int64
+ scoreOrder := func(l, r *Node) bool {
+ if iv, jv := abs64(score[l]), abs64(score[r]); iv != jv {
+ return iv > jv
+ }
+ if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv {
+ return iv < jv
+ }
+ if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv {
+ return iv > jv
+ }
+ return compareNodes(l, r)
+ }
+
+ switch o {
+ case CumNameOrder:
+ score = make(map[*Node]int64, len(ns))
+ for _, n := range ns {
+ score[n] = n.Cum
+ }
+ s = nodeSorter{ns, scoreOrder}
+ case EntropyOrder:
+ score = make(map[*Node]int64, len(ns))
+ for _, n := range ns {
+ score[n] = entropyScore(n)
+ }
+ s = nodeSorter{ns, scoreOrder}
+ }
+ default:
+ return fmt.Errorf("report: unrecognized sort ordering: %d", o)
+ }
+ sort.Sort(s)
+ return nil
+}
+
+// compareNodes compares two nodes to provide a deterministic ordering
+// between them. Two nodes cannot have the same Node.Info value.
+func compareNodes(l, r *Node) bool {
+ return fmt.Sprint(l.Info) < fmt.Sprint(r.Info)
+}
+
+// entropyScore computes a score for a node representing how important
+// it is to include this node on a graph visualization. It is used to
+// sort the nodes and select which ones to display if we have more
+// nodes than desired in the graph. This number is computed by looking
+// at the flat and cum weights of the node and the incoming/outgoing
+// edges. The fundamental idea is to penalize nodes that have a simple
+// fallthrough from their incoming to the outgoing edge.
+func entropyScore(n *Node) int64 {
+ score := float64(0)
+
+ if len(n.In) == 0 {
+ score++ // Favor entry nodes
+ } else {
+ score += edgeEntropyScore(n, n.In, 0)
+ }
+
+ if len(n.Out) == 0 {
+ score++ // Favor leaf nodes
+ } else {
+ score += edgeEntropyScore(n, n.Out, n.Flat)
+ }
+
+ return int64(score*float64(n.Cum)) + n.Flat
+}
+
+// edgeEntropyScore computes the entropy value for a set of edges
+// coming in or out of a node. Entropy (as defined in information
+// theory) refers to the amount of information encoded by the set of
+// edges. A set of edges that have a more interesting distribution of
+// samples gets a higher score.
+func edgeEntropyScore(n *Node, edges EdgeMap, self int64) float64 {
+ score := float64(0)
+ total := self
+ for _, e := range edges {
+ if e.Weight > 0 {
+ total += abs64(e.Weight)
+ }
+ }
+ if total != 0 {
+ for _, e := range edges {
+ frac := float64(abs64(e.Weight)) / float64(total)
+ score += -frac * math.Log2(frac)
+ }
+ if self > 0 {
+ frac := float64(abs64(self)) / float64(total)
+ score += -frac * math.Log2(frac)
+ }
+ }
+ return score
+}
+
+// NodeOrder sets the ordering for a Sort operation
+type NodeOrder int
+
+// Sorting options for node sort.
+const (
+ FlatNameOrder NodeOrder = iota
+ FlatCumNameOrder
+ CumNameOrder
+ NameOrder
+ FileOrder
+ AddressOrder
+ EntropyOrder
+)
+
+// Sort returns a slice of the edges in the map, in a consistent
+// order. The sort order is first based on the edge weight
+// (higher-to-lower) and then by the node names to avoid flakiness.
+func (e EdgeMap) Sort() []*Edge {
+ el := make(edgeList, 0, len(e))
+ for _, w := range e {
+ el = append(el, w)
+ }
+
+ sort.Sort(el)
+ return el
+}
+
+// Sum returns the total weight for a set of nodes.
+func (e EdgeMap) Sum() int64 {
+ var ret int64
+ for _, edge := range e {
+ ret += edge.Weight
+ }
+ return ret
+}
+
+type edgeList []*Edge
+
+func (el edgeList) Len() int {
+ return len(el)
+}
+
+func (el edgeList) Less(i, j int) bool {
+ if el[i].Weight != el[j].Weight {
+ return abs64(el[i].Weight) > abs64(el[j].Weight)
+ }
+
+ from1 := el[i].Src.Info.PrintableName()
+ from2 := el[j].Src.Info.PrintableName()
+ if from1 != from2 {
+ return from1 < from2
+ }
+
+ to1 := el[i].Dest.Info.PrintableName()
+ to2 := el[j].Dest.Info.PrintableName()
+
+ return to1 < to2
+}
+
+func (el edgeList) Swap(i, j int) {
+ el[i], el[j] = el[j], el[i]
+}
+
+func abs64(i int64) int64 {
+ if i < 0 {
+ return -i
+ }
+ return i
+}
diff --git a/src/internal/profile/legacy_java_profile.go b/src/internal/profile/legacy_java_profile.go
new file mode 100644
index 0000000..4f9f2a7
--- /dev/null
+++ b/src/internal/profile/legacy_java_profile.go
@@ -0,0 +1,317 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file implements parsers to convert java legacy profiles into
+// the profile.proto format.
+
+// Original file location: https://github.com/google/pprof/blob/main/profile/legacy_java_profile.go
+
+package profile
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "path/filepath"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+var (
+ attributeRx = regexp.MustCompile(`([\w ]+)=([\w ]+)`)
+ javaSampleRx = regexp.MustCompile(` *(\d+) +(\d+) +@ +([ x0-9a-f]*)`)
+ javaLocationRx = regexp.MustCompile(`^\s*0x([[:xdigit:]]+)\s+(.*)\s*$`)
+ javaLocationFileLineRx = regexp.MustCompile(`^(.*)\s+\((.+):(-?[[:digit:]]+)\)$`)
+ javaLocationPathRx = regexp.MustCompile(`^(.*)\s+\((.*)\)$`)
+)
+
+// javaCPUProfile returns a new Profile from profilez data.
+// b is the profile bytes after the header, period is the profiling
+// period, and parse is a function to parse 8-byte chunks from the
+// profile in its native endianness.
+func javaCPUProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
+ p := &Profile{
+ Period: period * 1000,
+ PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
+ SampleType: []*ValueType{{Type: "samples", Unit: "count"}, {Type: "cpu", Unit: "nanoseconds"}},
+ }
+ var err error
+ var locs map[uint64]*Location
+ if b, locs, err = parseCPUSamples(b, parse, false, p); err != nil {
+ return nil, err
+ }
+
+ if err = parseJavaLocations(b, locs, p); err != nil {
+ return nil, err
+ }
+
+ // Strip out addresses for better merge.
+ if err = p.Aggregate(true, true, true, true, false); err != nil {
+ return nil, err
+ }
+
+ return p, nil
+}
+
+// parseJavaProfile returns a new profile from heapz or contentionz
+// data. b is the profile bytes after the header.
+func parseJavaProfile(b []byte) (*Profile, error) {
+ h := bytes.SplitAfterN(b, []byte("\n"), 2)
+ if len(h) < 2 {
+ return nil, errUnrecognized
+ }
+
+ p := &Profile{
+ PeriodType: &ValueType{},
+ }
+ header := string(bytes.TrimSpace(h[0]))
+
+ var err error
+ var pType string
+ switch header {
+ case "--- heapz 1 ---":
+ pType = "heap"
+ case "--- contentionz 1 ---":
+ pType = "contention"
+ default:
+ return nil, errUnrecognized
+ }
+
+ if b, err = parseJavaHeader(pType, h[1], p); err != nil {
+ return nil, err
+ }
+ var locs map[uint64]*Location
+ if b, locs, err = parseJavaSamples(pType, b, p); err != nil {
+ return nil, err
+ }
+ if err = parseJavaLocations(b, locs, p); err != nil {
+ return nil, err
+ }
+
+ // Strip out addresses for better merge.
+ if err = p.Aggregate(true, true, true, true, false); err != nil {
+ return nil, err
+ }
+
+ return p, nil
+}
+
+// parseJavaHeader parses the attribute section on a java profile and
+// populates a profile. Returns the remainder of the buffer after all
+// attributes.
+func parseJavaHeader(pType string, b []byte, p *Profile) ([]byte, error) {
+ nextNewLine := bytes.IndexByte(b, byte('\n'))
+ for nextNewLine != -1 {
+ line := string(bytes.TrimSpace(b[0:nextNewLine]))
+ if line != "" {
+ h := attributeRx.FindStringSubmatch(line)
+ if h == nil {
+ // Not a valid attribute, exit.
+ return b, nil
+ }
+
+ attribute, value := strings.TrimSpace(h[1]), strings.TrimSpace(h[2])
+ var err error
+ switch pType + "/" + attribute {
+ case "heap/format", "cpu/format", "contention/format":
+ if value != "java" {
+ return nil, errUnrecognized
+ }
+ case "heap/resolution":
+ p.SampleType = []*ValueType{
+ {Type: "inuse_objects", Unit: "count"},
+ {Type: "inuse_space", Unit: value},
+ }
+ case "contention/resolution":
+ p.SampleType = []*ValueType{
+ {Type: "contentions", Unit: "count"},
+ {Type: "delay", Unit: value},
+ }
+ case "contention/sampling period":
+ p.PeriodType = &ValueType{
+ Type: "contentions", Unit: "count",
+ }
+ if p.Period, err = strconv.ParseInt(value, 0, 64); err != nil {
+ return nil, fmt.Errorf("failed to parse attribute %s: %v", line, err)
+ }
+ case "contention/ms since reset":
+ millis, err := strconv.ParseInt(value, 0, 64)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse attribute %s: %v", line, err)
+ }
+ p.DurationNanos = millis * 1000 * 1000
+ default:
+ return nil, errUnrecognized
+ }
+ }
+ // Grab next line.
+ b = b[nextNewLine+1:]
+ nextNewLine = bytes.IndexByte(b, byte('\n'))
+ }
+ return b, nil
+}
+
+// parseJavaSamples parses the samples from a java profile and
+// populates the Samples in a profile. Returns the remainder of the
+// buffer after the samples.
+func parseJavaSamples(pType string, b []byte, p *Profile) ([]byte, map[uint64]*Location, error) {
+ nextNewLine := bytes.IndexByte(b, byte('\n'))
+ locs := make(map[uint64]*Location)
+ for nextNewLine != -1 {
+ line := string(bytes.TrimSpace(b[0:nextNewLine]))
+ if line != "" {
+ sample := javaSampleRx.FindStringSubmatch(line)
+ if sample == nil {
+ // Not a valid sample, exit.
+ return b, locs, nil
+ }
+
+ // Java profiles have data/fields inverted compared to other
+ // profile types.
+ var err error
+ value1, value2, value3 := sample[2], sample[1], sample[3]
+ addrs := parseHexAddresses(value3)
+ if err != nil {
+ return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
+ }
+
+ var sloc []*Location
+ for _, addr := range addrs {
+ loc := locs[addr]
+ if locs[addr] == nil {
+ loc = &Location{
+ Address: addr,
+ }
+ p.Location = append(p.Location, loc)
+ locs[addr] = loc
+ }
+ sloc = append(sloc, loc)
+ }
+ s := &Sample{
+ Value: make([]int64, 2),
+ Location: sloc,
+ }
+
+ if s.Value[0], err = strconv.ParseInt(value1, 0, 64); err != nil {
+ return nil, nil, fmt.Errorf("parsing sample %s: %v", line, err)
+ }
+ if s.Value[1], err = strconv.ParseInt(value2, 0, 64); err != nil {
+ return nil, nil, fmt.Errorf("parsing sample %s: %v", line, err)
+ }
+
+ switch pType {
+ case "heap":
+ const javaHeapzSamplingRate = 524288 // 512K
+ if s.Value[0] == 0 {
+ return nil, nil, fmt.Errorf("parsing sample %s: second value must be non-zero", line)
+ }
+ s.NumLabel = map[string][]int64{"bytes": {s.Value[1] / s.Value[0]}}
+ s.Value[0], s.Value[1] = scaleHeapSample(s.Value[0], s.Value[1], javaHeapzSamplingRate)
+ case "contention":
+ if period := p.Period; period != 0 {
+ s.Value[0] = s.Value[0] * p.Period
+ s.Value[1] = s.Value[1] * p.Period
+ }
+ }
+ p.Sample = append(p.Sample, s)
+ }
+ // Grab next line.
+ b = b[nextNewLine+1:]
+ nextNewLine = bytes.IndexByte(b, byte('\n'))
+ }
+ return b, locs, nil
+}
+
+// parseJavaLocations parses the location information in a java
+// profile and populates the Locations in a profile. It uses the
+// location addresses from the profile as both the ID of each
+// location.
+func parseJavaLocations(b []byte, locs map[uint64]*Location, p *Profile) error {
+ r := bytes.NewBuffer(b)
+ fns := make(map[string]*Function)
+ for {
+ line, err := r.ReadString('\n')
+ if err != nil {
+ if err != io.EOF {
+ return err
+ }
+ if line == "" {
+ break
+ }
+ }
+
+ if line = strings.TrimSpace(line); line == "" {
+ continue
+ }
+
+ jloc := javaLocationRx.FindStringSubmatch(line)
+ if len(jloc) != 3 {
+ continue
+ }
+ addr, err := strconv.ParseUint(jloc[1], 16, 64)
+ if err != nil {
+ return fmt.Errorf("parsing sample %s: %v", line, err)
+ }
+ loc := locs[addr]
+ if loc == nil {
+ // Unused/unseen
+ continue
+ }
+ var lineFunc, lineFile string
+ var lineNo int64
+
+ if fileLine := javaLocationFileLineRx.FindStringSubmatch(jloc[2]); len(fileLine) == 4 {
+ // Found a line of the form: "function (file:line)"
+ lineFunc, lineFile = fileLine[1], fileLine[2]
+ if n, err := strconv.ParseInt(fileLine[3], 10, 64); err == nil && n > 0 {
+ lineNo = n
+ }
+ } else if filePath := javaLocationPathRx.FindStringSubmatch(jloc[2]); len(filePath) == 3 {
+ // If there's not a file:line, it's a shared library path.
+ // The path isn't interesting, so just give the .so.
+ lineFunc, lineFile = filePath[1], filepath.Base(filePath[2])
+ } else if strings.Contains(jloc[2], "generated stub/JIT") {
+ lineFunc = "STUB"
+ } else {
+ // Treat whole line as the function name. This is used by the
+ // java agent for internal states such as "GC" or "VM".
+ lineFunc = jloc[2]
+ }
+ fn := fns[lineFunc]
+
+ if fn == nil {
+ fn = &Function{
+ Name: lineFunc,
+ SystemName: lineFunc,
+ Filename: lineFile,
+ }
+ fns[lineFunc] = fn
+ p.Function = append(p.Function, fn)
+ }
+ loc.Line = []Line{
+ {
+ Function: fn,
+ Line: lineNo,
+ },
+ }
+ loc.Address = 0
+ }
+
+ p.remapLocationIDs()
+ p.remapFunctionIDs()
+ p.remapMappingIDs()
+
+ return nil
+}
diff --git a/src/pgo/inline/inline_hot.go b/src/pgo/inline/inline_hot.go
new file mode 100644
index 0000000..d2e0bf7
--- /dev/null
+++ b/src/pgo/inline/inline_hot.go
@@ -0,0 +1,81 @@
+package main
+
+import (
+ "time"
+)
+
+type BS struct {
+ length uint
+ s []uint64
+}
+
+const wSize = uint(64)
+const lWSize = uint(6)
+
+func D(i uint) int {
+ return int((i + (wSize - 1)) >> lWSize)
+}
+
+func N(length uint) (bs *BS) {
+ bs = &BS{
+ length,
+ make([]uint64, D(length)),
+ }
+
+ return bs
+}
+
+func (b *BS) S(i uint) *BS {
+ b.s[i>>lWSize] |= 1 << (i & (wSize - 1))
+ return b
+}
+
+var jn = [...]byte{
+ 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
+ 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
+ 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
+ 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
+}
+
+func T(v uint64) uint {
+ return uint(jn[((v&-v)*0x03f79d71b4ca8b09)>>58])
+}
+
+func (b *BS) NS(i uint) (uint, bool) {
+ x := int(i >> lWSize)
+ if x >= len(b.s) {
+ return 0, false
+ }
+ w := b.s[x]
+ w = w >> (i & (wSize - 1))
+ if w != 0 {
+ return i + T(w), true
+ }
+ x = x + 1
+ for x < len(b.s) {
+ if b.s[x] != 0 {
+ return uint(x)*wSize + T(b.s[x]), true
+ }
+ x = x + 1
+
+ }
+ return 0, false
+}
+
+func A() {
+ s := N(100000)
+ for i := 0; i < 1000; i += 30 {
+ s.S(uint(i))
+ }
+ for j := 0; j < 1000; j++ {
+ c := uint(0)
+ for i, e := s.NS(0); e; i, e = s.NS(i + 1) {
+ c++
+ }
+ }
+}
+
+func main() {
+ time.Sleep(time.Second)
+ A()
+}
diff --git a/src/pgo/inline/inline_hot_test.go b/src/pgo/inline/inline_hot_test.go
new file mode 100644
index 0000000..f906ee7
--- /dev/null
+++ b/src/pgo/inline/inline_hot_test.go
@@ -0,0 +1,42 @@
+package main
+
+import "testing"
+
+func BenchmarkA(b *testing.B) {
+ benchmarkB(b)
+}
+func benchmarkB(b *testing.B) {
+
+ for i := 0; true; {
+ A()
+ i = i + 1
+ if i >= b.N {
+ break
+ }
+ A()
+ i = i + 1
+ if i >= b.N {
+ break
+ }
+ A()
+ i = i + 1
+ if i >= b.N {
+ break
+ }
+ A()
+ i = i + 1
+ if i >= b.N {
+ break
+ }
+ A()
+ i = i + 1
+ if i >= b.N {
+ break
+ }
+ A()
+ i = i + 1
+ if i >= b.N {
+ break
+ }
+ }
+}
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.
9 comments:
Patchset:
Thanks for sharing the CL!
Haven't read the detail. Just some high-level comments. Thanks.
File api/go1.19.txt:
pkg pgo/inline, func A() #43724
pkg pgo/inline, func D(uint) int #43724
pkg pgo/inline, func N(uint) *BS #43724
pkg pgo/inline, func T(uint64) uint #43724
pkg pgo/inline, type BS struct #43724
pkg pgo/inline, method (*BS) NS(uint) (uint, bool) #43724
pkg pgo/inline, method (*BS) S(uint) *BS #43724
Undo. Let's not add this. (See the comment at pgo/inline/inline_hot.go)
File src/cmd/compile/internal/base/flag.go:
Patch Set #1, Line 124: ProfileUse
Maybe PGOProfile (-pgoprofile for the flag)? Not sure.
Patch Set #1, Line 125: methods
Maybe say functions instead of methods? I think it applies to non-method functions.
"callsites" -> "call sites"
File src/cmd/compile/internal/gc/main.go:
&profile.Options{
CallTree: false,
SampleValue: func(v []int64) int64 { return v[1] },
}
It is unclear to me what the options mean here, especially the function for SampleValues. Can we make Options completely internal to the cmd/compile/internal/pgo package? (see also the comment at internal/profile/graph.go)
File src/cmd/compile/internal/pgo/irgraph.go:
Add copyright header. Thanks.
File src/internal/profile/graph.go:
I think this can live in the cmd/compile/internal/pgo package (unless/until we have a use case outside of the compiler). Also, it may be better to have a re-implementation instead of a copy from pprof. Then we can have APIs that is more suitable for us. For example, do we need the "Options" type? Unless the compiler needs to build multiple graphs with different options, I think it can be completely internal. Also unused options can be removed.
File src/internal/profile/legacy_java_profile.go:
I don't think we need to support Java profiles. Could we not add this?
File src/pgo/inline/inline_hot.go:
This is just an example, right? I don't think we need to add this package to the source tree.
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.
Raj Barik uploaded patch set #2 to this change.
cmd/compile: Enables PGO in Go and performs profile-guided inlining
Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
9 files changed, 2,124 insertions(+), 7 deletions(-)
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Cherry Mui, Michael Pratt.
4 comments:
File src/cmd/compile/internal/base/flag.go:
Patch Set #1, Line 124: ProfileUse
Maybe PGOProfile (-pgoprofile for the flag)? Not sure.
Done
Patch Set #1, Line 125: methods
Maybe say functions instead of methods? I think it applies to non-method functions. […]
Done
File src/cmd/compile/internal/gc/main.go:
&profile.Options{
CallTree: false,
SampleValue: func(v []int64) int64 { return v[1] },
}
It is unclear to me what the options mean here, especially the function for SampleValues. […]
Done
File src/cmd/compile/internal/pgo/irgraph.go:
Add copyright header. Thanks.
Done
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.
33 comments:
File src/cmd/compile/internal/base/flag.go:
InlineHotThreshold string "help:\"threshold percentage for determining hot functions and call sites for inlining\""
InlineHotBudget int "help:\"inline budget for hot functions\""
As we discussed, maybe make them debug flags under -d, see cmd/compile/internal/base/debug.go
File src/cmd/compile/internal/gc/main.go:
Patch Set #3, Line 239: if pgo.PProfGraph == nil {
The check doesn't seem necessary.
File src/cmd/compile/internal/inline/inl.go:
Patch Set #3, Line 48: // Budget increased due to hotness.
This comment should be on the line above? Maybe say this is the non-PGO inline budget.
Patch Set #3, Line 61: // Per-caller data structure to track the list of hot call sites. This gets rewritten every caller leaving it to GC for cleanup.
Mutable global states make the code hard to reasoning. Can we make them local? Or just using the single global state from the pgo package?
160 doesn't seem large. If a function is hot, I think we can increase the budget more than that. Maybe 1000?
Patch Set #3, Line 121: // InlineEpilogue updates IRGraph after inlining.
At least for now, I think nothing will need the IRGraph after inlining, right? Then delete. We could add that in the future when we need it.
Patch Set #3, Line 137: // InlinePackage finds functions that can be inlined and clones them.
Keep the original comment.
savefn := ir.CurFunc
ir.CurFunc = fn
This needs some explanation.
Patch Set #3, Line 348: inlineMaxBudget-v.budget < inlineHotMaxBudget
Can we start with a higher budget, instead of checking here when it goes negative? Maybe increase the budget at line 265 above?
Patch Set #3, Line 348: == true
"== true" is unnecessary.
Patch Set #3, Line 422: ir.CurFunc
Maybe we want to add the current function in hairyVisitor instead of using the global variable.
File src/cmd/compile/internal/pgo/graph.go:
This seems building a more general-purpose graph than we need. Can we just build what we need? Like, I don't think we need all the Options.
Patch Set #3, Line 34: javaRegExp = regexp.MustCompile(`^(?:[a-z]\w*\.)*([A-Z][\w\$]*\.(?:<init>|[a-z][\w\$]*(?:\$\d+)?))(?:(?:\()|$)`)
I think we don't need the non-Go ones.
File src/cmd/compile/internal/pgo/irgraph.go:
Patch Set #3, Line 20: The IrGraph is updated as we pass through optimization phases, for example, after a function is inlined, we update the graph with new nodes and edges.
I'm not sure we need to update the IRGraph (see the other comment at inline.go). Even if we do, it probably doesn't need to be mentioned here.
Dst *IRNode
DstNode *ir.Node
What is the difference between Dst and DstNode?
Also, I think we want to use ir.Node by value instead of by pointer.
Patch Set #3, Line 53: pprof
Let's say "profile" for things that not specific to pprof format.
Patch Set #3, Line 53: NodeMapInfo
Maybe NodeMapKey to be clear.
FuncName string
DstName string
CallSite int
Maybe have some comment. Looking at this it is unclear what the fields are.
NWeight int64
NTotalWeight int64
Maybe just call them NFlat and NCum.
Patch Set #3, Line 69: Line string
I think we don't want to use the string form of the line number. Can we just use number? In the proposal we mentioned that we don't want it to be file name sensitive (so it tolerates code moving and file renaming). So ir.Line probably isn't what we want.
Patch Set #3, Line 82: = nil
"= nil" isn't necessary. Also next one.
Patch Set #3, Line 112: pProfGraph := New(p, opt)
This builds a graph doe the whole binary. Can we build just the part we need, that is, the package we're compiling and its dependencies? Like, if we're compiling package A and the profile contains a package B which has no relation with A, we don't need to build the graph of B. Maybe we could use Options.KeptNodes?
Patch Set #3, Line 117: return pProfGraph
It seems we can just assign it to the global variable, without retuning it. Having both the global variable and the return value looks redundant.
Patch Set #3, Line 132: log.Println("weighted call graph created successfully!")
This information doesn't seem very helpful.
Patch Set #3, Line 135: return weightedCG
Same here.
Patch Set #3, Line 141: nTotalWeight
Name it nCumWeight, to be consistent with pprof naming and also clear what it actually means.
Patch Set #3, Line 143: // Accummulate weights for the same nodes.
Does the graph have duplicate nodes? Can we build a graph without duplicate in the first place?
else {
nweight[canonicalName] = n.FlatValue()
nTotalWeight[canonicalName] = n.CumValue()
}
I think we don't need the conditional. The two += assignments should do the right thing even if the entry did not exist.
Patch Set #3, Line 167: nodeinfo.CallSite = -1
Why?
Patch Set #3, Line 239: node1 represents the caller. Callee as f
Maybe just name them caller, callee, to be clear. Also in the function body. Using 1 and 2 isn't very clear.
Patch Set #3, Line 276: [callee is an interface type here which can have multiple targets].
Not sure why/how this matters here.
nodeinfo.DstName = ""
nodeinfo.CallSite = -1
if weights, ok := GlobalNodeMap[nodeinfo]; ok {
node1.Flat = weights.NWeight
node1.Cum = weights.NTotalWeight
info := &IREdge{Src: node1, Dst: g.IRNodes[name2], DstNode: n, Weight: 0, CallSite: line}
g.OutEdges[node1] = append(g.OutEdges[node1], info)
g.InEdges[g.IRNodes[name2]] = append(g.InEdges[g.IRNodes[name2]], info)
} else {
info := &IREdge{Src: node1, Dst: g.IRNodes[name2], DstNode: n, Weight: 0, CallSite: line}
g.OutEdges[node1] = append(g.OutEdges[node1], info)
g.InEdges[g.IRNodes[name2]] = append(g.InEdges[g.IRNodes[name2]], info)
}
}
This else branch needs some comments.
Patch Set #3, Line 298: node1
Same here. Maybe say caller instead of node1.
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.
Raj Barik uploaded patch set #6 to this change.
cmd/compile: Enables PGO in Go and performs profile-guided inlining
Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
10 files changed, 2,110 insertions(+), 5 deletions(-)
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Cherry Mui, Michael Pratt.
34 comments:
File api/go1.19.txt:
pkg pgo/inline, func A() #43724
pkg pgo/inline, func D(uint) int #43724
pkg pgo/inline, func N(uint) *BS #43724
pkg pgo/inline, func T(uint64) uint #43724
pkg pgo/inline, type BS struct #43724
pkg pgo/inline, method (*BS) NS(uint) (uint, bool) #43724
pkg pgo/inline, method (*BS) S(uint) *BS #43724
Undo. Let's not add this. (See the comment at pgo/inline/inline_hot. […]
Done
File src/cmd/compile/internal/base/flag.go:
InlineHotThreshold string "help:\"threshold percentage for determining hot functions and call sites for inlining\""
InlineHotBudget int "help:\"inline budget for hot functions\""
As we discussed, maybe make them debug flags under -d, see cmd/compile/internal/base/debug. […]
Done
File src/cmd/compile/internal/gc/main.go:
Patch Set #3, Line 239: if pgo.PProfGraph == nil {
The check doesn't seem necessary.
Done
File src/cmd/compile/internal/inline/inl.go:
Patch Set #3, Line 48: // Budget increased due to hotness.
This comment should be on the line above? Maybe say this is the non-PGO inline budget.
Done
Patch Set #3, Line 61: // Per-caller data structure to track the list of hot call sites. This gets rewritten every caller leaving it to GC for cleanup.
Mutable global states make the code hard to reasoning. […]
Done
160 doesn't seem large. If a function is hot, I think we can increase the budget more than that. […]
Let's discuss this in our meeting. Raising this number too high would increase code size due to inlining. We should discuss pro and cons.
Patch Set #3, Line 121: // InlineEpilogue updates IRGraph after inlining.
At least for now, I think nothing will need the IRGraph after inlining, right? Then delete. […]
I assigned nil to both graphs. It might be a good idea keep the updated graph, just for debugging purposes.
Patch Set #3, Line 137: // InlinePackage finds functions that can be inlined and clones them.
Keep the original comment.
Done
savefn := ir.CurFunc
ir.CurFunc = fn
This needs some explanation.
I updated this code to keep the function as part of the HairyVisitor. We would have liked to use ir.CurFunc to do this.
Patch Set #3, Line 348: inlineMaxBudget-v.budget < inlineHotMaxBudget
Can we start with a higher budget, instead of checking here when it goes negative? Maybe increase th […]
Need to understand what you mean.
Patch Set #3, Line 348: == true
"== true" is unnecessary.
Done
Patch Set #3, Line 422: ir.CurFunc
Maybe we want to add the current function in hairyVisitor instead of using the global variable.
Done
File src/cmd/compile/internal/pgo/graph.go:
This seems building a more general-purpose graph than we need. […]
The `graph.go` was obtained from https://github.com/google/pprof/tree/main/internal/graph/. There are many functionalities we do not need from it or we may extend certain capabilities. What is your recommendation moving forward?
File src/cmd/compile/internal/pgo/irgraph.go:
Patch Set #3, Line 20: The IrGraph is updated as we pass through optimization phases, for example, after a function is inlined, we update the graph with new nodes and edges.
I'm not sure we need to update the IRGraph (see the other comment at inline.go). […]
Done
Dst *IRNode
DstNode *ir.Node
What is the difference between Dst and DstNode? […]
Done
Patch Set #3, Line 53: NodeMapInfo
Maybe NodeMapKey to be clear.
Done
Patch Set #3, Line 53: pprof
Let's say "profile" for things that not specific to pprof format.
Done
FuncName string
DstName string
CallSite int
Maybe have some comment. Looking at this it is unclear what the fields are.
Done
NWeight int64
NTotalWeight int64
Maybe just call them NFlat and NCum.
Done
Patch Set #3, Line 69: Line string
I think we don't want to use the string form of the line number. […]
Let's discuss this in our meeting.
Patch Set #3, Line 82: = nil
"= nil" isn't necessary. Also next one.
Done
Patch Set #3, Line 112: pProfGraph := New(p, opt)
This builds a graph doe the whole binary. […]
This is interesting and needs modifications to `graph.go`, which I am not sure if we should do. KeptNodes is used for some other purposes. We may have to add functionalities to filter nodes based on current package.
Patch Set #3, Line 117: return pProfGraph
It seems we can just assign it to the global variable, without retuning it. […]
Done
Patch Set #3, Line 132: log.Println("weighted call graph created successfully!")
This information doesn't seem very helpful.
Done
Patch Set #3, Line 135: return weightedCG
Same here.
Done
Patch Set #3, Line 141: nTotalWeight
Name it nCumWeight, to be consistent with pprof naming and also clear what it actually means.
Done
else {
nweight[canonicalName] = n.FlatValue()
nTotalWeight[canonicalName] = n.CumValue()
}
I think we don't need the conditional. […]
Done
Patch Set #3, Line 167: nodeinfo.CallSite = -1
Why?
Done
Patch Set #3, Line 239: node1 represents the caller. Callee as f
Maybe just name them caller, callee, to be clear. Also in the function body. […]
Done
Patch Set #3, Line 276: [callee is an interface type here which can have multiple targets].
Not sure why/how this matters here.
Done
Patch Set #3, Line 298: node1
Same here. Maybe say caller instead of node1.
Done
File src/internal/profile/graph.go:
I think this can live in the cmd/compile/internal/pgo package (unless/until we have a use case outsi […]
Done
File src/internal/profile/legacy_java_profile.go:
I don't think we need to support Java profiles. […]
Done
File src/pgo/inline/inline_hot.go:
This is just an example, right? I don't think we need to add this package to the source tree.
Done
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.
17 comments:
File src/cmd/compile/internal/inline/inl.go:
Patch Set #3, Line 121: // InlineEpilogue updates IRGraph after inlining.
I assigned nil to both graphs. […]
Okay, thanks. As we discussed, only update the graph if a debug flag is set.
Patch Set #3, Line 348: inlineMaxBudget-v.budget < inlineHotMaxBudget
Need to understand what you mean.
As discussed offline, start with a higher budget, by changing inlineMaxBudget at line 263 (as of Patch Set 6) to inlineHotMaxBudget, if the function is a hot callee.
File src/cmd/compile/internal/inline/inl.go:
Undo adding the blank line.
line2, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
lineno := fmt.Sprintf("%v", line2)
This looks redundant parsing the string to integer then convert back to string.
Patch Set #6, Line 101: canonicalName := ir.PkgFuncName(n.AST) + "-" + lineno + "-" + ir.PkgFuncName(e.Dst.AST)
Can we make it keyed on a struct with caller, callee and the line number, without making a string?
Patch Set #6, Line 583: if pgo.WeightedCG == nil {
I think if we start with inlineHotMaxBudget in the first place then we don't need this change here.
Patch Set #6, Line 839: if _, ok := pgo.ListOfHotCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; ok {
Only do the check if PGO is enabled.
if _, ok := inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; !ok {
inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}] = struct{}{}
}
Only do this update in debug mode.
File src/cmd/compile/internal/pgo/graph.go:
The `graph.go` was obtained from https://github.com/google/pprof/tree/main/internal/graph/. […]
As discussed, we should make this code more suitable to our need, without keeping exact copy of pprof/internal/graph. Let's simplify this code. Thanks.
File src/cmd/compile/internal/pgo/irgraph.go:
Patch Set #3, Line 69: Line string
Let's discuss this in our meeting.
Reading more of the code, I think we really should move away from using ir.Line. It seems in many places in the code we just need the number, but the code starting with ir.Line, which makes a string, then parse back to number, then sometimes convert to string again, which seems more complex than it should be.
Patch Set #3, Line 143: // Accummulate weights for the same nodes.
Does the graph have duplicate nodes? Can we build a graph without duplicate in the first place?
Ack
File src/cmd/compile/internal/pgo/irgraph.go:
Patch Set #6, Line 36: Recursive bool
This field doesn't seem to be read anywhere? Maybe remove?
Patch Set #6, Line 38: HotNode bool
This field is defined here but only set and used in the inliner. Maybe move it to the inliner (perhaps as a map)?
Patch Set #6, Line 56: CallSite int
It is still not quite clear what CallSite is. Looks like a line number?
Should this commented-out code be deleted?
callee? (Also, remove quotes)
Patch Set #6, Line 300: line := ir.Line(n)
That we convert the line numbers to string here then parse it back to numbers in addEdge looks quite convoluted. Can we just use the number (for now, even not the relative line number as we eventually want)?
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.
1 comment:
File src/cmd/compile/internal/inline/inl.go:
Patch Set #6, Line 97: inlineHotThresholdPercent
Using the same threshold for nodes and edges doesn't seem to work well. For a reasonable large program, the edge percentages are really really small. I think we need a better way to select hot call sites.
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.
4 comments:
File src/cmd/compile/internal/test/pgo_inl_test.go:
// TestIntendedInlining tests that specific functions are inlined.
// This allows refactoring for code clarity and re-use without fear that
// changes to the compiler will cause silent performance regressions.
This comment needs update.
Profiling doesn't work well on some platforms https://cs.opensource.google/go/go/+/master:src/runtime/pprof/pprof_test.go;l=400 . Maybe we want to skip the test there?
Alternatively, maybe we could use a pre-collected profile (which is pretty small).
Patch Set #6, Line 90: "-run=nope
You can pass "-c" so it only builds the test but does not run it. (Maybe also pass "-o" to make sure the output is in the temp directory.) Also "-tags" and "-timeout" seems not necessary.
File src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go:
Add copyright header.
It doesn't seem necessary to have two files. I think we can put everything in inline_hot_test.go ?
Also, this file seems to contain more things than necessary (e.g. time.Sleep). I think we only need A and BS.NS? Other things (D, N, jn, etc.) probably can be simplified, to make it clear what we're testing.
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.
1 comment:
File src/cmd/compile/internal/pgo/irgraph.go:
Patch Set #6, Line 328: threshold
For debugging purposes, I think we should probably print the nodes below the threshold as well. This can be helpful e.g. if we want to adjust the threshold.
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.
Raj Barik uploaded patch set #7 to this change.
cmd/compile: Enables PGO in Go and performs profile-guided inlining
Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
10 files changed, 2,142 insertions(+), 44 deletions(-)
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.
Raj Barik uploaded patch set #8 to this change.
cmd/compile: Enables PGO in Go and performs profile-guided inlining
Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
Handled most of the concerns from Cherry
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
10 files changed, 2,143 insertions(+), 44 deletions(-)
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.
Raj Barik uploaded patch set #9 to this change.
cmd/compile: Enables PGO in Go and performs profile-guided inlining
Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
10 files changed, 2,142 insertions(+), 44 deletions(-)
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Cherry Mui, Michael Pratt.
16 comments:
File src/cmd/compile/internal/inline/inl.go:
Patch Set #3, Line 121: // InlineEpilogue updates IRGraph after inlining.
Okay, thanks. As we discussed, only update the graph if a debug flag is set.
Done
Patch Set #3, Line 348: inlineMaxBudget-v.budget < inlineHotMaxBudget
As discussed offline, start with a higher budget, by changing inlineMaxBudget at line 263 (as of Pat […]
Done
File src/cmd/compile/internal/inline/inl.go:
Undo adding the blank line.
Done
Patch Set #6, Line 97: inlineHotThresholdPercent
Using the same threshold for nodes and edges doesn't seem to work well. […]
Separated them out! Take a look.
line2, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
lineno := fmt.Sprintf("%v", line2)
This looks redundant parsing the string to integer then convert back to string.
Done
Patch Set #6, Line 101: canonicalName := ir.PkgFuncName(n.AST) + "-" + lineno + "-" + ir.PkgFuncName(e.Dst.AST)
Can we make it keyed on a struct with caller, callee and the line number, without making a string?
Done
Patch Set #6, Line 583: if pgo.WeightedCG == nil {
I think if we start with inlineHotMaxBudget in the first place then we don't need this change here.
Done
Patch Set #6, Line 839: if _, ok := pgo.ListOfHotCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; ok {
Only do the check if PGO is enabled.
Done
if _, ok := inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; !ok {
inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}] = struct{}{}
}
Only do this update in debug mode.
Done
File src/cmd/compile/internal/pgo/irgraph.go:
Patch Set #3, Line 69: Line string
Reading more of the code, I think we really should move away from using ir.Line. […]
changed to integer. should discuss more tomorrow.
File src/cmd/compile/internal/pgo/irgraph.go:
Patch Set #6, Line 36: Recursive bool
This field doesn't seem to be read anywhere? Maybe remove?
Done
Patch Set #6, Line 38: HotNode bool
This field is defined here but only set and used in the inliner. […]
Done
Patch Set #6, Line 56: CallSite int
It is still not quite clear what CallSite is. […]
changed to integer universally. We should discuss if this is semantically correct.
Should this commented-out code be deleted?
Done
callee? (Also, remove quotes)
Done
Patch Set #6, Line 300: line := ir.Line(n)
That we convert the line numbers to string here then parse it back to numbers in addEdge looks quite […]
Done
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Cherry Mui, Raj Barik.
1 comment:
Patchset:
Did you mean to push a new version? The latest push you did today was identical to the patch set 9.
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Raj Barik.
Patch set 10:Run-TryBot +1
8 comments:
Commit Message:
Patch Set #10, Line 7: Enables
Minor: lowercase "enable", and without the "s".
Add "For #55022" to reference the issue.
Patchset:
Thanks. Looks mostly okay. We can iterate and make improvements from here.
File src/cmd/compile/internal/inline/inl.go:
Let's discuss this in our meeting. […]
Ack
File src/cmd/compile/internal/inline/inl.go:
Patch Set #6, Line 97: inlineHotThresholdPercent
Separated them out! Take a look.
Done
File src/cmd/compile/internal/pgo/irgraph.go:
Patch Set #6, Line 56: CallSite int
changed to integer universally. We should discuss if this is semantically correct.
Done
File src/cmd/compile/internal/pgo/irgraph.go:
Add a "WORK IN PROGRESS" comment at the top, as we discussed.
File src/cmd/compile/internal/test/pgo_inl_test.go:
As discussed offline, we may want to go with a checked-in profile for the test. Maybe remove this test for now and we can add it back later.
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Raj Barik.
1 comment:
Patchset:
29 of 30 TryBots failed. […]
This needs to be fixed before checking it in. This means, for bootstrapping from Go 1.17, we need to not use strings.Cut in internal/profile, but the old strings.Index.
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Cherry Mui.
5 comments:
Patchset:
Some cleanups in the tests...
File src/cmd/compile/internal/inline/inl.go:
Patch Set #6, Line 97: inlineHotThresholdPercent
Separated them out! Take a look.
Done
File src/cmd/compile/internal/pgo/irgraph.go:
Patch Set #3, Line 69: Line string
changed to integer. should discuss more tomorrow.
Done
File src/cmd/compile/internal/pgo/irgraph.go:
Patch Set #6, Line 56: CallSite int
changed to integer universally. We should discuss if this is semantically correct.
Done
Patch Set #6, Line 328: threshold
For debugging purposes, I think we should probably print the nodes below the threshold as well. […]
Done
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Cherry Mui.
Raj Barik uploaded patch set #12 to this change.
cmd/compile: enable PGO in Go and performs profile-guided inlining
Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
10 files changed, 2,142 insertions(+), 44 deletions(-)
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Cherry Mui.
Raj Barik uploaded patch set #13 to this change.
cmd/compile: enable PGO in Go and performs profile-guided inlining (for issue #55022)
Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
10 files changed, 2,142 insertions(+), 44 deletions(-)
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Cherry Mui.
8 comments:
Commit Message:
Patch Set #10, Line 7: Enables
Minor: lowercase "enable", and without the "s".
Done
Patchset:
This needs to be fixed before checking it in. This means, for bootstrapping from Go 1. […]
Should this file [perhaps all files in this directory] be synced to files at https://github.com/google/pprof/blob/main/profile ? They do not use Cut.
File src/cmd/compile/internal/pgo/graph.go:
As discussed, we should make this code more suitable to our need, without keeping exact copy of ppro […]
Done
Patch Set #3, Line 34: javaRegExp = regexp.MustCompile(`^(?:[a-z]\w*\.)*([A-Z][\w\$]*\.(?:<init>|[a-z][\w\$]*(?:\$\d+)?))(?:(?:\()|$)`)
I think we don't need the non-Go ones.
Done
File src/cmd/compile/internal/pgo/irgraph.go:
nodeinfo.DstName = ""
nodeinfo.CallSite = -1
if weights, ok := GlobalNodeMap[nodeinfo]; ok {
node1.Flat = weights.NWeight
node1.Cum = weights.NTotalWeight
info := &IREdge{Src: node1, Dst: g.IRNodes[name2], DstNode: n, Weight: 0, CallSite: line}
g.OutEdges[node1] = append(g.OutEdges[node1], info)
g.InEdges[g.IRNodes[name2]] = append(g.InEdges[g.IRNodes[name2]], info)
} else {
info := &IREdge{Src: node1, Dst: g.IRNodes[name2], DstNode: n, Weight: 0, CallSite: line}
g.OutEdges[node1] = append(g.OutEdges[node1], info)
g.InEdges[g.IRNodes[name2]] = append(g.InEdges[g.IRNodes[name2]], info)
}
}
This else branch needs some comments.
Done
File src/cmd/compile/internal/test/pgo_inl_test.go:
// TestIntendedInlining tests that specific functions are inlined.
// This allows refactoring for code clarity and re-use without fear that
// changes to the compiler will cause silent performance regressions.
This comment needs update.
Done
Profiling doesn't work well on some platforms https://cs.opensource. […]
Done
File src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go:
Add copyright header. […]
Done
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Raj Barik.
Patch set 13:Run-TryBot +1
1 comment:
Patchset:
Should this file [perhaps all files in this directory] be synced to files at https://github. […]
No, at least not in this CL. Just do the minimal change to fix the build.
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Raj Barik.
3 comments:
Commit Message:
Add "For #55022" to reference the issue.
To clarify, the commit message should look like (and Gerrit recognizes #NNNN as an issue link; you don't have to do anything to make that a link):
cmd/compile: enable PGO in Go and perform profile-guided inlining
For #55022
File src/cmd/compile/internal/pgo/irgraph.go:
Patch Set #3, Line 112: pProfGraph := New(p, opt)
This is interesting and needs modifications to `graph.go`, which I am not sure if we should do. […]
We can do this in a follow-up CL
File src/cmd/compile/internal/test/pgo_inl_test.go:
As discussed offline, we may want to go with a checked-in profile for the test. […]
I think it would be okay to do this in a follow-up CL.
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Cherry Mui, Raj Barik.
Raj Barik uploaded patch set #14 to this change.
The following approvals got outdated and were removed: Run-TryBot+1 by Cherry Mui, TryBot-Result-1 by Gopher Robot
cmd/compile: Enables PGO in Go and performs profile-guided inlining
Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
M src/internal/profile/legacy_profile.go
12 files changed, 2,022 insertions(+), 45 deletions(-)
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Cherry Mui, Raj Barik.
Raj Barik uploaded patch set #15 to this change.
cmd/compile: Enables PGO in Go and performs profile-guided inlining
For #55022
Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
M src/internal/profile/legacy_profile.go
12 files changed, 2,023 insertions(+), 45 deletions(-)
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Cherry Mui.
3 comments:
Commit Message:
To clarify, the commit message should look like (and Gerrit recognizes #NNNN as an issue link; you d […]
Done
File src/cmd/compile/internal/pgo/irgraph.go:
Patch Set #3, Line 112: pProfGraph := New(p, opt)
We can do this in a follow-up CL
Done
File src/cmd/compile/internal/test/pgo_inl_test.go:
I think it would be okay to do this in a follow-up CL.
Done
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Cherry Mui.
2 comments:
File src/cmd/compile/internal/pgo/irgraph.go:
Add a "WORK IN PROGRESS" comment at the top, as we discussed.
Done
File src/cmd/compile/internal/test/pgo_inl_test.go:
Patch Set #6, Line 90: "-run=nope
You can pass "-c" so it only builds the test but does not run it. […]
Ack
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Cherry Mui.
Raj Barik uploaded patch set #17 to this change.
cmd/compile: Enables PGO in Go and performs profile-guided inlining
For #55022
Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
M src/internal/profile/legacy_profile.go
12 files changed, 2,025 insertions(+), 45 deletions(-)
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Cherry Mui.
Raj Barik uploaded patch set #18 to this change.
cmd/compile: Enables PGO in Go and performs profile-guided inlining
For #55022
Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
M src/internal/profile/legacy_profile.go
12 files changed, 2,025 insertions(+), 45 deletions(-)
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Raj Barik.
Patch set 18:Run-TryBot +1
Attention is currently required from: Austin Clements, Raj Barik.
Patch set 18:Code-Review +2
1 comment:
Patchset:
Thanks. LGTM for the first round. There are things we can improve. We can iterate and improve from here.
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Austin Clements, Cherry Mui, Raj Barik.
Patchset:
No, at least not in this CL. Just do the minimal change to fix the build.
Done
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.
Michael Pratt submitted this change.
cmd/compile: Enables PGO in Go and performs profile-guided inlining
For #55022
Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
Reviewed-on: https://go-review.googlesource.com/c/go/+/429863
TryBot-Result: Gopher Robot <go...@golang.org>
Reviewed-by: Cherry Mui <cher...@google.com>
Reviewed-by: Michael Pratt <mpr...@google.com>
Run-TryBot: Cherry Mui <cher...@google.com>
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
M src/internal/profile/legacy_profile.go
12 files changed, 2,030 insertions(+), 45 deletions(-)
diff --git a/src/cmd/compile/internal/base/debug.go b/src/cmd/compile/internal/base/debug.go
index 32a45d7..ba21491 100644
--- a/src/cmd/compile/internal/base/debug.go
+++ b/src/cmd/compile/internal/base/debug.go
@@ -16,35 +16,39 @@
// The -d option takes a comma-separated list of settings.
// Each setting is name=value; for ints, name is short for name=1.
type DebugFlags struct {
- Append int `help:"print information about append compilation"`
- Checkptr int `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation"`
- Closure int `help:"print information about closure compilation"`
- DclStack int `help:"run internal dclstack check"`
- Defer int `help:"print information about defer compilation"`
- DisableNil int `help:"disable nil checks"`
- DumpPtrs int `help:"show Node pointers values in dump output"`
- DwarfInl int `help:"print information about DWARF inlined function creation"`
- Export int `help:"print export data"`
- GCProg int `help:"print dump of GC programs"`
- InlFuncsWithClosures int `help:"allow functions with closures to be inlined"`
- Libfuzzer int `help:"enable coverage instrumentation for libfuzzer"`
- LocationLists int `help:"print information about DWARF location list creation"`
- Nil int `help:"print information about nil checks"`
- NoOpenDefer int `help:"disable open-coded defers"`
- NoRefName int `help:"do not include referenced symbol names in object file"`
- PCTab string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"`
- Panic int `help:"show all compiler panics"`
- Reshape int `help:"print information about expression reshaping"`
- Shapify int `help:"print information about shaping recursive types"`
- Slice int `help:"print information about slice compilation"`
- SoftFloat int `help:"force compiler to emit soft-float code"`
- SyncFrames int `help:"how many writer stack frames to include at sync points in unified export data"`
- TypeAssert int `help:"print information about type assertion inlining"`
- TypecheckInl int `help:"eager typechecking of inline function bodies"`
- Unified int `help:"enable unified IR construction"`
- WB int `help:"print information about write barriers"`
- ABIWrap int `help:"print information about ABI wrapper generation"`
- MayMoreStack string `help:"call named function before all stack growth checks"`
+ Append int `help:"print information about append compilation"`
+ Checkptr int `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation"`
+ Closure int `help:"print information about closure compilation"`
+ DclStack int `help:"run internal dclstack check"`
+ Defer int `help:"print information about defer compilation"`
+ DisableNil int `help:"disable nil checks"`
+ DumpPtrs int `help:"show Node pointers values in dump output"`
+ DwarfInl int `help:"print information about DWARF inlined function creation"`
+ Export int `help:"print export data"`
+ GCProg int `help:"print dump of GC programs"`
+ InlFuncsWithClosures int `help:"allow functions with closures to be inlined"`
+ Libfuzzer int `help:"enable coverage instrumentation for libfuzzer"`
+ LocationLists int `help:"print information about DWARF location list creation"`
+ Nil int `help:"print information about nil checks"`
+ NoOpenDefer int `help:"disable open-coded defers"`
+ NoRefName int `help:"do not include referenced symbol names in object file"`
+ PCTab string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"`
+ Panic int `help:"show all compiler panics"`
+ Reshape int `help:"print information about expression reshaping"`
+ Shapify int `help:"print information about shaping recursive types"`
+ Slice int `help:"print information about slice compilation"`
+ SoftFloat int `help:"force compiler to emit soft-float code"`
+ SyncFrames int `help:"how many writer stack frames to include at sync points in unified export data"`
+ TypeAssert int `help:"print information about type assertion inlining"`
+ TypecheckInl int `help:"eager typechecking of inline function bodies"`
+ Unified int `help:"enable unified IR construction"`
+ WB int `help:"print information about write barriers"`
+ ABIWrap int `help:"print information about ABI wrapper generation"`
+ MayMoreStack string `help:"call named function before all stack growth checks"`
+ InlineHotFuncThreshold string `help:"threshold percentage for determining functions as hot candidates for inlining"`
+ InlineHotCallSiteThreshold string `help:"threshold percentage for determining call sites as hot candidates for inlining"`
+ InlineHotBudget int `help:"inline budget for hot functions"`
+ PGOInline int `help:"debug profile-guided inlining"`
Any bool // set when any of the debug flags have been set
}
diff --git a/src/cmd/compile/internal/base/flag.go b/src/cmd/compile/internal/base/flag.go
index 3e9d86c..e6df6b6 100644
--- a/src/cmd/compile/internal/base/flag.go
+++ b/src/cmd/compile/internal/base/flag.go
@@ -124,6 +124,7 @@
TrimPath string "help:\"remove `prefix` from recorded source file paths\""
WB bool "help:\"enable write barrier\"" // TODO: remove
AltComparable bool "help:\"enable alternative comparable semantics\"" // experiment - remove eventually
+ PgoProfile string "help:\"read profile from `file`\""
// Configuration derived from flags; not a flag itself.
Cfg struct {
diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go
index 2fbf2f4..5633f1f 100644
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -17,6 +17,7 @@
"cmd/compile/internal/ir"
"cmd/compile/internal/logopt"
"cmd/compile/internal/noder"
+ "cmd/compile/internal/pgo"
"cmd/compile/internal/pkginit"
"cmd/compile/internal/reflectdata"
"cmd/compile/internal/ssa"
@@ -249,10 +250,26 @@
typecheck.AllImportedBodies()
}
+ // Read profile file and build profile-graph and weighted-call-graph.
+ base.Timer.Start("fe", "pgoprofile")
+ if base.Flag.PgoProfile != "" {
+ pgo.BuildProfileGraph(base.Flag.PgoProfile)
+ pgo.BuildWeightedCallGraph()
+ }
+
// Inlining
base.Timer.Start("fe", "inlining")
if base.Flag.LowerL != 0 {
+ if pgo.WeightedCG != nil {
+ inline.InlinePrologue()
+ }
inline.InlinePackage()
+ if pgo.WeightedCG != nil {
+ inline.InlineEpilogue()
+ // Delete the graphs as no other optimization uses this currently.
+ pgo.WeightedCG = nil
+ pgo.ProfileGraph = nil
+ }
}
noder.MakeWrappers(typecheck.Target) // must happen after inlining
diff --git a/src/cmd/compile/internal/inline/inl.go b/src/cmd/compile/internal/inline/inl.go
index 5e14a87..4d942d0 100644
--- a/src/cmd/compile/internal/inline/inl.go
+++ b/src/cmd/compile/internal/inline/inl.go
@@ -29,11 +29,13 @@
import (
"fmt"
"go/constant"
+ "strconv"
"strings"
"cmd/compile/internal/base"
"cmd/compile/internal/ir"
"cmd/compile/internal/logopt"
+ "cmd/compile/internal/pgo"
"cmd/compile/internal/typecheck"
"cmd/compile/internal/types"
"cmd/internal/obj"
@@ -53,6 +55,91 @@
inlineBigFunctionMaxCost = 20 // Max cost of inlinee when inlining into a "big" function.
)
+var (
+ // List of all hot ndes.
+ candHotNodeMap = make(map[*pgo.IRNode]struct{})
+
+ // List of all hot call sites.
+ candHotEdgeMap = make(map[pgo.CallSiteInfo]struct{})
+
+ // List of inlined call sites.
+ inlinedCallSites = make(map[pgo.CallSiteInfo]struct{})
+
+ // Threshold in percentage for hot function inlining.
+ inlineHotFuncThresholdPercent = float64(2)
+
+ // Threshold in percentage for hot callsite inlining.
+ inlineHotCallSiteThresholdPercent = float64(0.1)
+
+ // Budget increased due to hotness.
+ inlineHotMaxBudget int32 = 160
+)
+
+// InlinePrologue records the hot callsites from ir-graph.
+func InlinePrologue() {
+ if s, err := strconv.ParseFloat(base.Debug.InlineHotFuncThreshold, 64); err == nil {
+ inlineHotFuncThresholdPercent = s
+ if base.Debug.PGOInline > 0 {
+ fmt.Printf("hot-node-thres=%v\n", inlineHotFuncThresholdPercent)
+ }
+ }
+
+ if s, err := strconv.ParseFloat(base.Debug.InlineHotCallSiteThreshold, 64); err == nil {
+ inlineHotCallSiteThresholdPercent = s
+ if base.Debug.PGOInline > 0 {
+ fmt.Printf("hot-callsite-thres=%v\n", inlineHotCallSiteThresholdPercent)
+ }
+ }
+
+ if base.Debug.InlineHotBudget != 0 {
+ inlineHotMaxBudget = int32(base.Debug.InlineHotBudget)
+ }
+
+ ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+ for _, f := range list {
+ name := ir.PkgFuncName(f)
+ if n, ok := pgo.WeightedCG.IRNodes[name]; ok {
+ nodeweight := pgo.WeightInPercentage(n.Flat, pgo.GlobalTotalNodeWeight)
+ if nodeweight > inlineHotFuncThresholdPercent {
+ candHotNodeMap[n] = struct{}{}
+ }
+ for _, e := range pgo.WeightedCG.OutEdges[n] {
+ if e.Weight != 0 {
+ edgeweightpercent := pgo.WeightInPercentage(e.Weight, pgo.GlobalTotalEdgeWeight)
+ if edgeweightpercent > inlineHotCallSiteThresholdPercent {
+ csi := pgo.CallSiteInfo{Line: e.CallSite, Caller: n.AST, Callee: e.Dst.AST}
+ if _, ok := candHotEdgeMap[csi]; !ok {
+ candHotEdgeMap[csi] = struct{}{}
+ }
+ }
+ }
+ }
+ }
+ }
+ })
+ if base.Debug.PGOInline > 0 {
+ fmt.Printf("hot-cg before inline in dot format:")
+ pgo.PrintWeightedCallGraphDOT(inlineHotFuncThresholdPercent, inlineHotCallSiteThresholdPercent)
+ }
+}
+
+// InlineEpilogue updates IRGraph after inlining.
+func InlineEpilogue() {
+ if base.Debug.PGOInline > 0 {
+ ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+ for _, f := range list {
+ name := ir.PkgFuncName(f)
+ if n, ok := pgo.WeightedCG.IRNodes[name]; ok {
+ pgo.RedirectEdges(n, inlinedCallSites)
+ }
+ }
+ })
+ // Print the call-graph after inlining. This is a debugging feature.
+ fmt.Printf("hot-cg after inline in dot:")
+ pgo.PrintWeightedCallGraphDOT(inlineHotFuncThresholdPercent, inlineHotCallSiteThresholdPercent)
+ }
+}
+
// InlinePackage finds functions that can be inlined and clones them before walk expands them.
func InlinePackage() {
ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
@@ -81,6 +168,9 @@
base.Fatalf("CanInline no nname %+v", fn)
}
+ // Initialize an empty list of hot callsites for this caller.
+ pgo.ListOfHotCallSites = make(map[pgo.CallSiteInfo]struct{})
+
var reason string // reason, if any, that the function was not inlined
if base.Flag.LowerM > 1 || logopt.Enabled() {
defer func() {
@@ -168,6 +258,19 @@
cc = 1 // this appears to yield better performance than 0.
}
+ // Update the budget for profile-guided inlining.
+ budget := int32(inlineMaxBudget)
+ if base.Flag.PgoProfile != "" && pgo.WeightedCG != nil {
+ if n, ok := pgo.WeightedCG.IRNodes[ir.PkgFuncName(fn)]; ok {
+ if _, ok := candHotNodeMap[n]; ok {
+ budget = int32(inlineHotMaxBudget)
+ if base.Debug.PGOInline > 0 {
+ fmt.Printf("hot-node enabled increased budget=%v for func=%v\n", budget, ir.PkgFuncName(fn))
+ }
+ }
+ }
+ }
+
// At this point in the game the function we're looking at may
// have "stale" autos, vars that still appear in the Dcl list, but
// which no longer have any uses in the function body (due to
@@ -178,7 +281,9 @@
// list. See issue 25249 for more context.
visitor := hairyVisitor{
- budget: inlineMaxBudget,
+ curFunc: fn,
+ budget: budget,
+ maxBudget: budget,
extraCallCost: cc,
}
if visitor.tooHairy(fn) {
@@ -187,7 +292,7 @@
}
n.Func.Inl = &ir.Inline{
- Cost: inlineMaxBudget - visitor.budget,
+ Cost: budget - visitor.budget,
Dcl: pruneUnusedAutos(n.Defn.(*ir.Func).Dcl, &visitor),
Body: inlcopylist(fn.Body),
@@ -195,12 +300,12 @@
}
if base.Flag.LowerM > 1 {
- fmt.Printf("%v: can inline %v with cost %d as: %v { %v }\n", ir.Line(fn), n, inlineMaxBudget-visitor.budget, fn.Type(), ir.Nodes(n.Func.Inl.Body))
+ fmt.Printf("%v: can inline %v with cost %d as: %v { %v }\n", ir.Line(fn), n, budget-visitor.budget, fn.Type(), ir.Nodes(n.Func.Inl.Body))
} else if base.Flag.LowerM != 0 {
fmt.Printf("%v: can inline %v\n", ir.Line(fn), n)
}
if logopt.Enabled() {
- logopt.LogOpt(fn.Pos(), "canInlineFunction", "inline", ir.FuncName(fn), fmt.Sprintf("cost: %d", inlineMaxBudget-visitor.budget))
+ logopt.LogOpt(fn.Pos(), "canInlineFunction", "inline", ir.FuncName(fn), fmt.Sprintf("cost: %d", budget-visitor.budget))
}
}
@@ -239,7 +344,10 @@
// hairyVisitor visits a function body to determine its inlining
// hairiness and whether or not it can be inlined.
type hairyVisitor struct {
+ // This is needed to access the current caller in the doNode function.
+ curFunc *ir.Func
budget int32
+ maxBudget int32
reason string
extraCallCost int32
usedLocals ir.NameSet
@@ -252,7 +360,7 @@
return true
}
if v.budget < 0 {
- v.reason = fmt.Sprintf("function too complex: cost %d exceeds budget %d", inlineMaxBudget-v.budget, inlineMaxBudget)
+ v.reason = fmt.Sprintf("function too complex: cost %d exceeds budget %d", v.maxBudget-v.budget, v.maxBudget)
return true
}
return false
@@ -330,6 +438,20 @@
}
}
+ // Determine if the callee edge is a for hot callee or not.
+ if base.Flag.PgoProfile != "" && pgo.WeightedCG != nil && v.curFunc != nil {
+ if fn := inlCallee(n.X); fn != nil && typecheck.HaveInlineBody(fn) {
+ ln := pgo.ConvertLine2Int(ir.Line(n))
+ csi := pgo.CallSiteInfo{Line: ln, Caller: v.curFunc, Callee: fn}
+ if _, o := candHotEdgeMap[csi]; o {
+ pgo.ListOfHotCallSites[pgo.CallSiteInfo{Line: ln, Caller: v.curFunc}] = struct{}{}
+ if base.Debug.PGOInline > 0 {
+ fmt.Printf("hot-callsite identified at line=%v for func=%v\n", ir.Line(n), ir.PkgFuncName(v.curFunc))
+ }
+ }
+ }
+ }
+
if ir.IsIntrinsicCall(n) {
// Treat like any other node.
break
@@ -750,13 +872,29 @@
return n
}
if fn.Inl.Cost > maxCost {
- // The inlined function body is too big. Typically we use this check to restrict
- // inlining into very big functions. See issue 26546 and 17566.
- if logopt.Enabled() {
- logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(ir.CurFunc),
- fmt.Sprintf("cost %d of %s exceeds max large caller cost %d", fn.Inl.Cost, ir.PkgFuncName(fn), maxCost))
+ // If the callsite is hot and it is under the inlineHotMaxBudget budget, then try to inline it, or else bail.
+ ln := pgo.ConvertLine2Int(ir.Line(n))
+ csi := pgo.CallSiteInfo{Line: ln, Caller: ir.CurFunc}
+ if _, ok := pgo.ListOfHotCallSites[csi]; ok {
+ if fn.Inl.Cost > inlineHotMaxBudget {
+ if logopt.Enabled() {
+ logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(ir.CurFunc),
+ fmt.Sprintf("cost %d of %s exceeds max large caller cost %d", fn.Inl.Cost, ir.PkgFuncName(fn), inlineHotMaxBudget))
+ }
+ return n
+ }
+ if base.Debug.PGOInline > 0 {
+ fmt.Printf("hot-budget check allows inlining for callsite at %v\n", ir.Line(n))
+ }
+ } else {
+ // The inlined function body is too big. Typically we use this check to restrict
+ // inlining into very big functions. See issue 26546 and 17566.
+ if logopt.Enabled() {
+ logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(ir.CurFunc),
+ fmt.Sprintf("cost %d of %s exceeds max large caller cost %d", fn.Inl.Cost, ir.PkgFuncName(fn), maxCost))
+ }
+ return n
}
- return n
}
if fn == ir.CurFunc {
@@ -899,7 +1037,16 @@
fmt.Printf("%v: Before inlining: %+v\n", ir.Line(n), n)
}
+ if base.Debug.PGOInline > 0 {
+ ln := pgo.ConvertLine2Int(ir.Line(n))
+ csi := pgo.CallSiteInfo{Line: ln, Caller: ir.CurFunc}
+ if _, ok := inlinedCallSites[csi]; !ok {
+ inlinedCallSites[csi] = struct{}{}
+ }
+ }
+
res := InlineCall(n, fn, inlIndex)
+
if res == nil {
base.FatalfAt(n.Pos(), "inlining call to %v failed", fn)
}
diff --git a/src/cmd/compile/internal/pgo/graph.go b/src/cmd/compile/internal/pgo/graph.go
new file mode 100644
index 0000000..d7b9432
--- /dev/null
+++ b/src/cmd/compile/internal/pgo/graph.go
@@ -0,0 +1,1033 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package graph collects a set of samples into a directed graph.
+
+// Original file location: https://github.com/google/pprof/tree/main/internal/graph/graph.go
+package pgo
+
+import (
+ "fmt"
+ "internal/profile"
+ "math"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "strings"
+)
+
+const maxNodelets = 4 // Number of nodelets for labels (both numeric and non)
+
+// Options encodes the options for constructing a graph
+type Options struct {
+ SampleValue func(s []int64) int64 // Function to compute the value of a sample
+ SampleMeanDivisor func(s []int64) int64 // Function to compute the divisor for mean graphs, or nil
+ FormatTag func(int64, string) string // Function to format a sample tag value into a string
+ ObjNames bool // Always preserve obj filename
+ OrigFnNames bool // Preserve original (eg mangled) function names
+
+ CallTree bool // Build a tree instead of a graph
+ DropNegative bool // Drop nodes with overall negative values
+
+ KeptNodes NodeSet // If non-nil, only use nodes in this set
+}
+
+// Nodes is an ordered collection of graph nodes.
+type Nodes []*Node
+
+// Node is an entry on a profiling report. It represents a unique
+// program location.
+type Node struct {
+ // Info describes the source location associated to this node.
+ Info NodeInfo
+
+ // Function represents the function that this node belongs to. On
+ // graphs with sub-function resolution (eg line number or
+ // addresses), two nodes in a NodeMap that are part of the same
+ // function have the same value of Node.Function. If the Node
+ // represents the whole function, it points back to itself.
+ Function *Node
+
+ // Values associated to this node. Flat is exclusive to this node,
+ // Cum includes all descendents.
+ Flat, FlatDiv, Cum, CumDiv int64
+
+ // In and out Contains the nodes immediately reaching or reached by
+ // this node.
+ In, Out EdgeMap
+
+ // LabelTags provide additional information about subsets of a sample.
+ LabelTags TagMap
+
+ // NumericTags provide additional values for subsets of a sample.
+ // Numeric tags are optionally associated to a label tag. The key
+ // for NumericTags is the name of the LabelTag they are associated
+ // to, or "" for numeric tags not associated to a label tag.
+ NumericTags map[string]TagMap
+}
+
+// Graph summarizes a performance profile into a format that is
+// suitable for visualization.
+type Graph struct {
+ Nodes Nodes
+}
+
+// FlatValue returns the exclusive value for this node, computing the
+// mean if a divisor is available.
+func (n *Node) FlatValue() int64 {
+ if n.FlatDiv == 0 {
+ return n.Flat
+ }
+ return n.Flat / n.FlatDiv
+}
+
+// CumValue returns the inclusive value for this node, computing the
+// mean if a divisor is available.
+func (n *Node) CumValue() int64 {
+ if n.CumDiv == 0 {
+ return n.Cum
+ }
+ return n.Cum / n.CumDiv
+}
+
+// AddToEdge increases the weight of an edge between two nodes. If
+// there isn't such an edge one is created.
+func (n *Node) AddToEdge(to *Node, v int64, residual, inline bool) {
+ n.AddToEdgeDiv(to, 0, v, residual, inline)
+}
+
+// AddToEdgeDiv increases the weight of an edge between two nodes. If
+// there isn't such an edge one is created.
+func (n *Node) AddToEdgeDiv(to *Node, dv, v int64, residual, inline bool) {
+ if n.Out[to] != to.In[n] {
+ panic(fmt.Errorf("asymmetric edges %v %v", *n, *to))
+ }
+
+ if e := n.Out[to]; e != nil {
+ e.WeightDiv += dv
+ e.Weight += v
+ if residual {
+ e.Residual = true
+ }
+ if !inline {
+ e.Inline = false
+ }
+ return
+ }
+
+ info := &Edge{Src: n, Dest: to, WeightDiv: dv, Weight: v, Residual: residual, Inline: inline}
+ n.Out[to] = info
+ to.In[n] = info
+}
+
+// NodeInfo contains the attributes for a node.
+type NodeInfo struct {
+ Name string
+ OrigName string
+ Address uint64
+ File string
+ StartLine, Lineno int
+ Objfile string
+}
+
+// PrintableName calls the Node's Formatter function with a single space separator.
+func (i *NodeInfo) PrintableName() string {
+ return strings.Join(i.NameComponents(), " ")
+}
+
+// NameComponents returns the components of the printable name to be used for a node.
+func (i *NodeInfo) NameComponents() []string {
+ var name []string
+ if i.Address != 0 {
+ name = append(name, fmt.Sprintf("%016x", i.Address))
+ }
+ if fun := i.Name; fun != "" {
+ name = append(name, fun)
+ }
+
+ switch {
+ case i.Lineno != 0:
+ // User requested line numbers, provide what we have.
+ name = append(name, fmt.Sprintf("%s:%d", i.File, i.Lineno))
+ case i.File != "":
+ // User requested file name, provide it.
+ name = append(name, i.File)
+ case i.Name != "":
+ // User requested function name. It was already included.
+ case i.Objfile != "":
+ // Only binary name is available
+ name = append(name, "["+filepath.Base(i.Objfile)+"]")
+ default:
+ // Do not leave it empty if there is no information at all.
+ name = append(name, "<unknown>")
+ }
+ return name
+}
+
+// NodeMap maps from a node info struct to a node. It is used to merge
+// report entries with the same info.
+type NodeMap map[NodeInfo]*Node
+
+// NodeSet is a collection of node info structs.
+type NodeSet map[NodeInfo]bool
+
+// NodePtrSet is a collection of nodes. Trimming a graph or tree requires a set
+// of objects which uniquely identify the nodes to keep. In a graph, NodeInfo
+// works as a unique identifier; however, in a tree multiple nodes may share
+// identical NodeInfos. A *Node does uniquely identify a node so we can use that
+// instead. Though a *Node also uniquely identifies a node in a graph,
+// currently, during trimming, graphs are rebuilt from scratch using only the
+// NodeSet, so there would not be the required context of the initial graph to
+// allow for the use of *Node.
+type NodePtrSet map[*Node]bool
+
+// FindOrInsertNode takes the info for a node and either returns a matching node
+// from the node map if one exists, or adds one to the map if one does not.
+// If kept is non-nil, nodes are only added if they can be located on it.
+func (nm NodeMap) FindOrInsertNode(info NodeInfo, kept NodeSet) *Node {
+ if kept != nil {
+ if _, ok := kept[info]; !ok {
+ return nil
+ }
+ }
+
+ if n, ok := nm[info]; ok {
+ return n
+ }
+
+ n := &Node{
+ Info: info,
+ In: make(EdgeMap),
+ Out: make(EdgeMap),
+ LabelTags: make(TagMap),
+ NumericTags: make(map[string]TagMap),
+ }
+ nm[info] = n
+ if info.Address == 0 && info.Lineno == 0 {
+ // This node represents the whole function, so point Function
+ // back to itself.
+ n.Function = n
+ return n
+ }
+ // Find a node that represents the whole function.
+ info.Address = 0
+ info.Lineno = 0
+ n.Function = nm.FindOrInsertNode(info, nil)
+ return n
+}
+
+// EdgeMap is used to represent the incoming/outgoing edges from a node.
+type EdgeMap map[*Node]*Edge
+
+// Edge contains any attributes to be represented about edges in a graph.
+type Edge struct {
+ Src, Dest *Node
+ // The summary weight of the edge
+ Weight, WeightDiv int64
+
+ // residual edges connect nodes that were connected through a
+ // separate node, which has been removed from the report.
+ Residual bool
+ // An inline edge represents a call that was inlined into the caller.
+ Inline bool
+}
+
+// WeightValue returns the weight value for this edge, normalizing if a
+// divisor is available.
+func (e *Edge) WeightValue() int64 {
+ if e.WeightDiv == 0 {
+ return e.Weight
+ }
+ return e.Weight / e.WeightDiv
+}
+
+// Tag represent sample annotations
+type Tag struct {
+ Name string
+ Unit string // Describe the value, "" for non-numeric tags
+ Value int64
+ Flat, FlatDiv int64
+ Cum, CumDiv int64
+}
+
+// FlatValue returns the exclusive value for this tag, computing the
+// mean if a divisor is available.
+func (t *Tag) FlatValue() int64 {
+ if t.FlatDiv == 0 {
+ return t.Flat
+ }
+ return t.Flat / t.FlatDiv
+}
+
+// CumValue returns the inclusive value for this tag, computing the
+// mean if a divisor is available.
+func (t *Tag) CumValue() int64 {
+ if t.CumDiv == 0 {
+ return t.Cum
+ }
+ return t.Cum / t.CumDiv
+}
+
+// TagMap is a collection of tags, classified by their name.
+type TagMap map[string]*Tag
+
+// SortTags sorts a slice of tags based on their weight.
+func SortTags(t []*Tag, flat bool) []*Tag {
+ ts := tags{t, flat}
+ sort.Sort(ts)
+ return ts.t
+}
+
+// New summarizes performance data from a profile into a graph.
+func New(prof *profile.Profile, o *Options) *Graph {
+ if o.CallTree {
+ return newTree(prof, o)
+ }
+ g, _ := newGraph(prof, o)
+ return g
+}
+
+// newGraph computes a graph from a profile. It returns the graph, and
+// a map from the profile location indices to the corresponding graph
+// nodes.
+func newGraph(prof *profile.Profile, o *Options) (*Graph, map[uint64]Nodes) {
+ nodes, locationMap := CreateNodes(prof, o)
+ seenNode := make(map[*Node]bool)
+ seenEdge := make(map[nodePair]bool)
+ for _, sample := range prof.Sample {
+ var w, dw int64
+ w = o.SampleValue(sample.Value)
+ if o.SampleMeanDivisor != nil {
+ dw = o.SampleMeanDivisor(sample.Value)
+ }
+ if dw == 0 && w == 0 {
+ continue
+ }
+ for k := range seenNode {
+ delete(seenNode, k)
+ }
+ for k := range seenEdge {
+ delete(seenEdge, k)
+ }
+ var parent *Node
+ // A residual edge goes over one or more nodes that were not kept.
+ residual := false
+
+ labels := joinLabels(sample)
+ // Group the sample frames, based on a global map.
+ for i := len(sample.Location) - 1; i >= 0; i-- {
+ l := sample.Location[i]
+ locNodes := locationMap[l.ID]
+ for ni := len(locNodes) - 1; ni >= 0; ni-- {
+ n := locNodes[ni]
+ if n == nil {
+ residual = true
+ continue
+ }
+ // Add cum weight to all nodes in stack, avoiding double counting.
+ if _, ok := seenNode[n]; !ok {
+ seenNode[n] = true
+ n.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, false)
+ }
+ // Update edge weights for all edges in stack, avoiding double counting.
+ if _, ok := seenEdge[nodePair{n, parent}]; !ok && parent != nil && n != parent {
+ seenEdge[nodePair{n, parent}] = true
+ parent.AddToEdgeDiv(n, dw, w, residual, ni != len(locNodes)-1)
+ }
+ parent = n
+ residual = false
+ }
+ }
+ if parent != nil && !residual {
+ // Add flat weight to leaf node.
+ parent.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, true)
+ }
+ }
+
+ return selectNodesForGraph(nodes, o.DropNegative), locationMap
+}
+
+func selectNodesForGraph(nodes Nodes, dropNegative bool) *Graph {
+ // Collect nodes into a graph.
+ gNodes := make(Nodes, 0, len(nodes))
+ for _, n := range nodes {
+ if n == nil {
+ continue
+ }
+ if n.Cum == 0 && n.Flat == 0 {
+ continue
+ }
+ if dropNegative && isNegative(n) {
+ continue
+ }
+ gNodes = append(gNodes, n)
+ }
+ return &Graph{gNodes}
+}
+
+type nodePair struct {
+ src, dest *Node
+}
+
+func newTree(prof *profile.Profile, o *Options) (g *Graph) {
+ parentNodeMap := make(map[*Node]NodeMap, len(prof.Sample))
+ for _, sample := range prof.Sample {
+ var w, dw int64
+ w = o.SampleValue(sample.Value)
+ if o.SampleMeanDivisor != nil {
+ dw = o.SampleMeanDivisor(sample.Value)
+ }
+ if dw == 0 && w == 0 {
+ continue
+ }
+ var parent *Node
+ labels := joinLabels(sample)
+ // Group the sample frames, based on a per-node map.
+ for i := len(sample.Location) - 1; i >= 0; i-- {
+ l := sample.Location[i]
+ lines := l.Line
+ if len(lines) == 0 {
+ lines = []profile.Line{{}} // Create empty line to include location info.
+ }
+ for lidx := len(lines) - 1; lidx >= 0; lidx-- {
+ nodeMap := parentNodeMap[parent]
+ if nodeMap == nil {
+ nodeMap = make(NodeMap)
+ parentNodeMap[parent] = nodeMap
+ }
+ n := nodeMap.findOrInsertLine(l, lines[lidx], o)
+ if n == nil {
+ continue
+ }
+ n.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, false)
+ if parent != nil {
+ parent.AddToEdgeDiv(n, dw, w, false, lidx != len(lines)-1)
+ }
+ parent = n
+ }
+ }
+ if parent != nil {
+ parent.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, true)
+ }
+ }
+
+ nodes := make(Nodes, len(prof.Location))
+ for _, nm := range parentNodeMap {
+ nodes = append(nodes, nm.nodes()...)
+ }
+ return selectNodesForGraph(nodes, o.DropNegative)
+}
+
+func joinLabels(s *profile.Sample) string {
+ if len(s.Label) == 0 {
+ return ""
+ }
+
+ var labels []string
+ for key, vals := range s.Label {
+ for _, v := range vals {
+ labels = append(labels, key+":"+v)
+ }
+ }
+ sort.Strings(labels)
+ return strings.Join(labels, `\n`)
+}
+
+// isNegative returns true if the node is considered as "negative" for the
+// purposes of drop_negative.
+func isNegative(n *Node) bool {
+ switch {
+ case n.Flat < 0:
+ return true
+ case n.Flat == 0 && n.Cum < 0:
+ return true
+ default:
+ return false
+ }
+}
+
+// CreateNodes creates graph nodes for all locations in a profile. It
+// returns set of all nodes, plus a mapping of each location to the
+// set of corresponding nodes (one per location.Line).
+func CreateNodes(prof *profile.Profile, o *Options) (Nodes, map[uint64]Nodes) {
+ locations := make(map[uint64]Nodes, len(prof.Location))
+ nm := make(NodeMap, len(prof.Location))
+ for _, l := range prof.Location {
+ lines := l.Line
+ if len(lines) == 0 {
+ lines = []profile.Line{{}} // Create empty line to include location info.
+ }
+ nodes := make(Nodes, len(lines))
+ for ln := range lines {
+ nodes[ln] = nm.findOrInsertLine(l, lines[ln], o)
+ }
+ locations[l.ID] = nodes
+ }
+ return nm.nodes(), locations
+}
+
+func (nm NodeMap) nodes() Nodes {
+ nodes := make(Nodes, 0, len(nm))
+ for _, n := range nm {
+ nodes = append(nodes, n)
+ }
+ return nodes
+}
+
+func (nm NodeMap) findOrInsertLine(l *profile.Location, li profile.Line, o *Options) *Node {
+ var objfile string
+ if m := l.Mapping; m != nil && m.File != "" {
+ objfile = m.File
+ }
+
+ if ni := nodeInfo(l, li, objfile, o); ni != nil {
+ return nm.FindOrInsertNode(*ni, o.KeptNodes)
+ }
+ return nil
+}
+
+func nodeInfo(l *profile.Location, line profile.Line, objfile string, o *Options) *NodeInfo {
+ if line.Function == nil {
+ return &NodeInfo{Address: l.Address, Objfile: objfile}
+ }
+ ni := &NodeInfo{
+ Address: l.Address,
+ Lineno: int(line.Line),
+ Name: line.Function.Name,
+ }
+ if fname := line.Function.Filename; fname != "" {
+ ni.File = filepath.Clean(fname)
+ }
+ if o.OrigFnNames {
+ ni.OrigName = line.Function.SystemName
+ }
+ if o.ObjNames || (ni.Name == "" && ni.OrigName == "") {
+ ni.Objfile = objfile
+ ni.StartLine = int(line.Function.StartLine)
+ }
+ return ni
+}
+
+type tags struct {
+ t []*Tag
+ flat bool
+}
+
+func (t tags) Len() int { return len(t.t) }
+func (t tags) Swap(i, j int) { t.t[i], t.t[j] = t.t[j], t.t[i] }
+func (t tags) Less(i, j int) bool {
+ if !t.flat {
+ if t.t[i].Cum != t.t[j].Cum {
+ return abs64(t.t[i].Cum) > abs64(t.t[j].Cum)
+ }
+ }
+ if t.t[i].Flat != t.t[j].Flat {
+ return abs64(t.t[i].Flat) > abs64(t.t[j].Flat)
+ }
+ return t.t[i].Name < t.t[j].Name
+}
+
+// Sum adds the flat and cum values of a set of nodes.
+func (ns Nodes) Sum() (flat int64, cum int64) {
+ for _, n := range ns {
+ flat += n.Flat
+ cum += n.Cum
+ }
+ return
+}
+
+func (n *Node) addSample(dw, w int64, labels string, numLabel map[string][]int64, numUnit map[string][]string, format func(int64, string) string, flat bool) {
+ // Update sample value
+ if flat {
+ n.FlatDiv += dw
+ n.Flat += w
+ } else {
+ n.CumDiv += dw
+ n.Cum += w
+ }
+
+ // Add string tags
+ if labels != "" {
+ t := n.LabelTags.findOrAddTag(labels, "", 0)
+ if flat {
+ t.FlatDiv += dw
+ t.Flat += w
+ } else {
+ t.CumDiv += dw
+ t.Cum += w
+ }
+ }
+
+ numericTags := n.NumericTags[labels]
+ if numericTags == nil {
+ numericTags = TagMap{}
+ n.NumericTags[labels] = numericTags
+ }
+ // Add numeric tags
+ if format == nil {
+ format = defaultLabelFormat
+ }
+ for k, nvals := range numLabel {
+ units := numUnit[k]
+ for i, v := range nvals {
+ var t *Tag
+ if len(units) > 0 {
+ t = numericTags.findOrAddTag(format(v, units[i]), units[i], v)
+ } else {
+ t = numericTags.findOrAddTag(format(v, k), k, v)
+ }
+ if flat {
+ t.FlatDiv += dw
+ t.Flat += w
+ } else {
+ t.CumDiv += dw
+ t.Cum += w
+ }
+ }
+ }
+}
+
+func defaultLabelFormat(v int64, key string) string {
+ return strconv.FormatInt(v, 10)
+}
+
+func (m TagMap) findOrAddTag(label, unit string, value int64) *Tag {
+ l := m[label]
+ if l == nil {
+ l = &Tag{
+ Name: label,
+ Unit: unit,
+ Value: value,
+ }
+ m[label] = l
+ }
+ return l
+}
+
+// String returns a text representation of a graph, for debugging purposes.
+func (g *Graph) String() string {
+ var s []string
+
+ nodeIndex := make(map[*Node]int, len(g.Nodes))
+
+ for i, n := range g.Nodes {
+ nodeIndex[n] = i + 1
+ }
+
+ for i, n := range g.Nodes {
+ name := n.Info.PrintableName()
+ var in, out []int
+
+ for _, from := range n.In {
+ in = append(in, nodeIndex[from.Src])
+ }
+ for _, to := range n.Out {
+ out = append(out, nodeIndex[to.Dest])
+ }
+ s = append(s, fmt.Sprintf("%d: %s[flat=%d cum=%d] %x -> %v ", i+1, name, n.Flat, n.Cum, in, out))
+ }
+ return strings.Join(s, "\n")
+}
+
+// DiscardLowFrequencyNodes returns a set of the nodes at or over a
+// specific cum value cutoff.
+func (g *Graph) DiscardLowFrequencyNodes(nodeCutoff int64) NodeSet {
+ return makeNodeSet(g.Nodes, nodeCutoff)
+}
+
+// DiscardLowFrequencyNodePtrs returns a NodePtrSet of nodes at or over a
+// specific cum value cutoff.
+func (g *Graph) DiscardLowFrequencyNodePtrs(nodeCutoff int64) NodePtrSet {
+ cutNodes := getNodesAboveCumCutoff(g.Nodes, nodeCutoff)
+ kept := make(NodePtrSet, len(cutNodes))
+ for _, n := range cutNodes {
+ kept[n] = true
+ }
+ return kept
+}
+
+func makeNodeSet(nodes Nodes, nodeCutoff int64) NodeSet {
+ cutNodes := getNodesAboveCumCutoff(nodes, nodeCutoff)
+ kept := make(NodeSet, len(cutNodes))
+ for _, n := range cutNodes {
+ kept[n.Info] = true
+ }
+ return kept
+}
+
+// getNodesAboveCumCutoff returns all the nodes which have a Cum value greater
+// than or equal to cutoff.
+func getNodesAboveCumCutoff(nodes Nodes, nodeCutoff int64) Nodes {
+ cutoffNodes := make(Nodes, 0, len(nodes))
+ for _, n := range nodes {
+ if abs64(n.Cum) < nodeCutoff {
+ continue
+ }
+ cutoffNodes = append(cutoffNodes, n)
+ }
+ return cutoffNodes
+}
+
+// TrimLowFrequencyTags removes tags that have less than
+// the specified weight.
+func (g *Graph) TrimLowFrequencyTags(tagCutoff int64) {
+ // Remove nodes with value <= total*nodeFraction
+ for _, n := range g.Nodes {
+ n.LabelTags = trimLowFreqTags(n.LabelTags, tagCutoff)
+ for s, nt := range n.NumericTags {
+ n.NumericTags[s] = trimLowFreqTags(nt, tagCutoff)
+ }
+ }
+}
+
+func trimLowFreqTags(tags TagMap, minValue int64) TagMap {
+ kept := TagMap{}
+ for s, t := range tags {
+ if abs64(t.Flat) >= minValue || abs64(t.Cum) >= minValue {
+ kept[s] = t
+ }
+ }
+ return kept
+}
+
+// TrimLowFrequencyEdges removes edges that have less than
+// the specified weight. Returns the number of edges removed
+func (g *Graph) TrimLowFrequencyEdges(edgeCutoff int64) int {
+ var droppedEdges int
+ for _, n := range g.Nodes {
+ for src, e := range n.In {
+ if abs64(e.Weight) < edgeCutoff {
+ delete(n.In, src)
+ delete(src.Out, n)
+ droppedEdges++
+ }
+ }
+ }
+ return droppedEdges
+}
+
+// SortNodes sorts the nodes in a graph based on a specific heuristic.
+func (g *Graph) SortNodes(cum bool, visualMode bool) {
+ // Sort nodes based on requested mode
+ switch {
+ case visualMode:
+ // Specialized sort to produce a more visually-interesting graph
+ g.Nodes.Sort(EntropyOrder)
+ case cum:
+ g.Nodes.Sort(CumNameOrder)
+ default:
+ g.Nodes.Sort(FlatNameOrder)
+ }
+}
+
+// SelectTopNodePtrs returns a set of the top maxNodes *Node in a graph.
+func (g *Graph) SelectTopNodePtrs(maxNodes int, visualMode bool) NodePtrSet {
+ set := make(NodePtrSet)
+ for _, node := range g.selectTopNodes(maxNodes, visualMode) {
+ set[node] = true
+ }
+ return set
+}
+
+// SelectTopNodes returns a set of the top maxNodes nodes in a graph.
+func (g *Graph) SelectTopNodes(maxNodes int, visualMode bool) NodeSet {
+ return makeNodeSet(g.selectTopNodes(maxNodes, visualMode), 0)
+}
+
+// selectTopNodes returns a slice of the top maxNodes nodes in a graph.
+func (g *Graph) selectTopNodes(maxNodes int, visualMode bool) Nodes {
+ if maxNodes > 0 {
+ if visualMode {
+ var count int
+ // If generating a visual graph, count tags as nodes. Update
+ // maxNodes to account for them.
+ for i, n := range g.Nodes {
+ tags := countTags(n)
+ if tags > maxNodelets {
+ tags = maxNodelets
+ }
+ if count += tags + 1; count >= maxNodes {
+ maxNodes = i + 1
+ break
+ }
+ }
+ }
+ }
+ if maxNodes > len(g.Nodes) {
+ maxNodes = len(g.Nodes)
+ }
+ return g.Nodes[:maxNodes]
+}
+
+// countTags counts the tags with flat count. This underestimates the
+// number of tags being displayed, but in practice is close enough.
+func countTags(n *Node) int {
+ count := 0
+ for _, e := range n.LabelTags {
+ if e.Flat != 0 {
+ count++
+ }
+ }
+ for _, t := range n.NumericTags {
+ for _, e := range t {
+ if e.Flat != 0 {
+ count++
+ }
+ }
+ }
+ return count
+}
+
+// nodeSorter is a mechanism used to allow a report to be sorted
+// in different ways.
+type nodeSorter struct {
+ rs Nodes
+ less func(l, r *Node) bool
+}
+
+func (s nodeSorter) Len() int { return len(s.rs) }
+func (s nodeSorter) Swap(i, j int) { s.rs[i], s.rs[j] = s.rs[j], s.rs[i] }
+func (s nodeSorter) Less(i, j int) bool { return s.less(s.rs[i], s.rs[j]) }
+
+// Sort reorders a slice of nodes based on the specified ordering
+// criteria. The result is sorted in decreasing order for (absolute)
+// numeric quantities, alphabetically for text, and increasing for
+// addresses.
+func (ns Nodes) Sort(o NodeOrder) error {
+ var s nodeSorter
+
+ switch o {
+ case FlatNameOrder:
+ s = nodeSorter{ns,
+ func(l, r *Node) bool {
+ if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv {
+ return iv > jv
+ }
+ if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv {
+ return iv < jv
+ }
+ if iv, jv := abs64(l.Cum), abs64(r.Cum); iv != jv {
+ return iv > jv
+ }
+ return compareNodes(l, r)
+ },
+ }
+ case FlatCumNameOrder:
+ s = nodeSorter{ns,
+ func(l, r *Node) bool {
+ if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv {
+ return iv > jv
+ }
+ if iv, jv := abs64(l.Cum), abs64(r.Cum); iv != jv {
+ return iv > jv
+ }
+ if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv {
+ return iv < jv
+ }
+ return compareNodes(l, r)
+ },
+ }
+ case NameOrder:
+ s = nodeSorter{ns,
+ func(l, r *Node) bool {
+ if iv, jv := l.Info.Name, r.Info.Name; iv != jv {
+ return iv < jv
+ }
+ return compareNodes(l, r)
+ },
+ }
+ case FileOrder:
+ s = nodeSorter{ns,
+ func(l, r *Node) bool {
+ if iv, jv := l.Info.File, r.Info.File; iv != jv {
+ return iv < jv
+ }
+ if iv, jv := l.Info.StartLine, r.Info.StartLine; iv != jv {
+ return iv < jv
+ }
+ return compareNodes(l, r)
+ },
+ }
+ case AddressOrder:
+ s = nodeSorter{ns,
+ func(l, r *Node) bool {
+ if iv, jv := l.Info.Address, r.Info.Address; iv != jv {
+ return iv < jv
+ }
+ return compareNodes(l, r)
+ },
+ }
+ case CumNameOrder, EntropyOrder:
+ // Hold scoring for score-based ordering
+ var score map[*Node]int64
+ scoreOrder := func(l, r *Node) bool {
+ if iv, jv := abs64(score[l]), abs64(score[r]); iv != jv {
+ return iv > jv
+ }
+ if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv {
+ return iv < jv
+ }
+ if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv {
+ return iv > jv
+ }
+ return compareNodes(l, r)
+ }
+
+ switch o {
+ case CumNameOrder:
+ score = make(map[*Node]int64, len(ns))
+ for _, n := range ns {
+ score[n] = n.Cum
+ }
+ s = nodeSorter{ns, scoreOrder}
+ case EntropyOrder:
+ score = make(map[*Node]int64, len(ns))
+ for _, n := range ns {
+ score[n] = entropyScore(n)
+ }
+ s = nodeSorter{ns, scoreOrder}
+ }
+ default:
+ return fmt.Errorf("report: unrecognized sort ordering: %d", o)
+ }
+ sort.Sort(s)
+ return nil
+}
+
+// compareNodes compares two nodes to provide a deterministic ordering
+// between them. Two nodes cannot have the same Node.Info value.
+func compareNodes(l, r *Node) bool {
+ return fmt.Sprint(l.Info) < fmt.Sprint(r.Info)
+}
+
+// entropyScore computes a score for a node representing how important
+// it is to include this node on a graph visualization. It is used to
+// sort the nodes and select which ones to display if we have more
+// nodes than desired in the graph. This number is computed by looking
+// at the flat and cum weights of the node and the incoming/outgoing
+// edges. The fundamental idea is to penalize nodes that have a simple
+// fallthrough from their incoming to the outgoing edge.
+func entropyScore(n *Node) int64 {
+ score := float64(0)
+
+ if len(n.In) == 0 {
+ score++ // Favor entry nodes
+ } else {
+ score += edgeEntropyScore(n, n.In, 0)
+ }
+
+ if len(n.Out) == 0 {
+ score++ // Favor leaf nodes
+ } else {
+ score += edgeEntropyScore(n, n.Out, n.Flat)
+ }
+
+ return int64(score*float64(n.Cum)) + n.Flat
+}
+
+// edgeEntropyScore computes the entropy value for a set of edges
+// coming in or out of a node. Entropy (as defined in information
+// theory) refers to the amount of information encoded by the set of
+// edges. A set of edges that have a more interesting distribution of
+// samples gets a higher score.
+func edgeEntropyScore(n *Node, edges EdgeMap, self int64) float64 {
+ score := float64(0)
+ total := self
+ for _, e := range edges {
+ if e.Weight > 0 {
+ total += abs64(e.Weight)
+ }
+ }
+ if total != 0 {
+ for _, e := range edges {
+ frac := float64(abs64(e.Weight)) / float64(total)
+ score += -frac * math.Log2(frac)
+ }
+ if self > 0 {
+ frac := float64(abs64(self)) / float64(total)
+ score += -frac * math.Log2(frac)
+ }
+ }
+ return score
+}
+
+// NodeOrder sets the ordering for a Sort operation
+type NodeOrder int
+
+// Sorting options for node sort.
+const (
+ FlatNameOrder NodeOrder = iota
+ FlatCumNameOrder
+ CumNameOrder
+ NameOrder
+ FileOrder
+ AddressOrder
+ EntropyOrder
+)
+
+// Sort returns a slice of the edges in the map, in a consistent
+// order. The sort order is first based on the edge weight
+// (higher-to-lower) and then by the node names to avoid flakiness.
+func (e EdgeMap) Sort() []*Edge {
+ el := make(edgeList, 0, len(e))
+ for _, w := range e {
+ el = append(el, w)
+ }
+
+ sort.Sort(el)
+ return el
+}
+
+// Sum returns the total weight for a set of nodes.
+func (e EdgeMap) Sum() int64 {
+ var ret int64
+ for _, edge := range e {
+ ret += edge.Weight
+ }
+ return ret
+}
+
+type edgeList []*Edge
+
+func (el edgeList) Len() int {
+ return len(el)
+}
+
+func (el edgeList) Less(i, j int) bool {
+ if el[i].Weight != el[j].Weight {
+ return abs64(el[i].Weight) > abs64(el[j].Weight)
+ }
+
+ from1 := el[i].Src.Info.PrintableName()
+ from2 := el[j].Src.Info.PrintableName()
+ if from1 != from2 {
+ return from1 < from2
+ }
+
+ to1 := el[i].Dest.Info.PrintableName()
+ to2 := el[j].Dest.Info.PrintableName()
+
+ return to1 < to2
+}
+
+func (el edgeList) Swap(i, j int) {
+ el[i], el[j] = el[j], el[i]
+}
+
+func abs64(i int64) int64 {
+ if i < 0 {
+ return -i
+ }
+ return i
+}
diff --git a/src/cmd/compile/internal/pgo/irgraph.go b/src/cmd/compile/internal/pgo/irgraph.go
new file mode 100644
index 0000000..9823882
--- /dev/null
+++ b/src/cmd/compile/internal/pgo/irgraph.go
@@ -0,0 +1,480 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// WORK IN PROGRESS
+
+package pgo
+
+import (
+ "cmd/compile/internal/ir"
+ "cmd/compile/internal/typecheck"
+ "cmd/compile/internal/types"
+ "fmt"
+ "internal/profile"
+ "log"
+ "os"
+ "strconv"
+ "strings"
+)
+
+// IRGraph is the key datastrcture that is built from profile. It is essentially a call graph with nodes pointing to IRs of functions and edges carrying weights and callsite information. The graph is bidirectional that helps in removing nodes efficiently.
+type IRGraph struct {
+ // Nodes of the graph
+ IRNodes map[string]*IRNode
+ OutEdges IREdgeMap
+ InEdges IREdgeMap
+}
+
+// IRNode represents a node in the IRGraph.
+type IRNode struct {
+ // Pointer to the IR of the Function represented by this node.
+ AST *ir.Func
+ // Flat weight of the IRNode, obtained from profile.
+ Flat int64
+ // Cumulative weight of the IRNode.
+ Cum int64
+}
+
+// IREdgeMap maps an IRNode to its successors.
+type IREdgeMap map[*IRNode][]*IREdge
+
+// IREdge represents a call edge in the IRGraph with source, destination, weight, callsite, and line number information.
+type IREdge struct {
+ // Source and destination of the edge in IRNode.
+ Src, Dst *IRNode
+ Weight int64
+ CallSite int
+}
+
+// NodeMapKey represents a hash key to identify unique call-edges in profile and in IR. Used for deduplication of call edges found in profile.
+type NodeMapKey struct {
+ CallerName string
+ CalleeName string
+ CallSite int
+}
+
+// Weights capture both node weight and edge weight.
+type Weights struct {
+ NFlat int64
+ NCum int64
+ EWeight int64
+}
+
+// CallSiteInfo captures call-site information and its caller/callee.
+type CallSiteInfo struct {
+ Line int
+ Caller *ir.Func
+ Callee *ir.Func
+}
+
+var (
+ // Aggregated NodeWeights and EdgeWeights across profiles. This helps us determine the percentage threshold for hot/cold partitioning.
+ GlobalTotalNodeWeight = int64(0)
+ GlobalTotalEdgeWeight = int64(0)
+
+ // Global node and their aggregated weight information.
+ GlobalNodeMap = make(map[NodeMapKey]*Weights)
+
+ // WeightedCG represents the IRGraph built from profile, which we will update as part of inlining.
+ WeightedCG *IRGraph
+
+ // Original profile-graph.
+ ProfileGraph *Graph
+
+ // Per-caller data structure to track the list of hot call sites. This gets rewritten every caller leaving it to GC for cleanup.
+ ListOfHotCallSites = make(map[CallSiteInfo]struct{})
+)
+
+// BuildProfileGraph generates a profile-graph from the profile.
+func BuildProfileGraph(profileFile string) {
+
+ // if possible, we should cache the profile-graph.
+ if ProfileGraph != nil {
+ return
+ }
+
+ // open the profile file.
+ f, err := os.Open(profileFile)
+ if err != nil {
+ log.Fatal("failed to open file " + profileFile)
+ return
+ }
+ defer f.Close()
+ p, err := profile.Parse(f)
+ if err != nil {
+ log.Fatal("failed to Parse profile file.")
+ return
+ }
+ // Build the options.
+ opt := &Options{
+ CallTree: false,
+ SampleValue: func(v []int64) int64 { return v[1] },
+ }
+ // Build the graph using profile package.
+ ProfileGraph = New(p, opt)
+
+ // Build various global maps from profile.
+ preprocessProfileGraph()
+
+}
+
+// BuildWeightedCallGraph generates a weighted callgraph from the profile for the current package.
+func BuildWeightedCallGraph() {
+
+ // Bail if there is no profile-graph available.
+ if ProfileGraph == nil {
+ return
+ }
+
+ // Create package-level call graph with weights from profile and IR.
+ WeightedCG = createIRGraph()
+}
+
+// ConvertLine2Int converts ir.Line string to integer.
+func ConvertLine2Int(line string) int {
+ splits := strings.Split(line, ":")
+ cs, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
+ return int(cs)
+}
+
+// preprocessProfileGraph builds various maps from the profile-graph. It builds GlobalNodeMap and other information based on the name and callsite to compute node and edge weights which will be used later on to create edges for WeightedCG.
+func preprocessProfileGraph() {
+ nFlat := make(map[string]int64)
+ nCum := make(map[string]int64)
+
+ // Accummulate weights for the same node.
+ for _, n := range ProfileGraph.Nodes {
+ canonicalName := n.Info.Name
+ nFlat[canonicalName] += n.FlatValue()
+ nCum[canonicalName] += n.CumValue()
+ }
+
+ // Process ProfileGraph and build various node and edge maps which will be consumed by AST walk.
+ for _, n := range ProfileGraph.Nodes {
+ GlobalTotalNodeWeight += n.FlatValue()
+ canonicalName := n.Info.Name
+ // Create the key to the NodeMapKey.
+ nodeinfo := NodeMapKey{
+ CallerName: canonicalName,
+ CallSite: n.Info.Lineno,
+ }
+
+ for _, e := range n.Out {
+ GlobalTotalEdgeWeight += e.WeightValue()
+ nodeinfo.CalleeName = e.Dest.Info.Name
+ if w, ok := GlobalNodeMap[nodeinfo]; ok {
+ w.EWeight += e.WeightValue()
+ } else {
+ weights := new(Weights)
+ weights.NFlat = nFlat[canonicalName]
+ weights.NCum = nCum[canonicalName]
+ weights.EWeight = e.WeightValue()
+ GlobalNodeMap[nodeinfo] = weights
+ }
+ }
+ }
+}
+
+// createIRGraph builds the IRGraph by visting all the ir.Func in decl list of a package.
+func createIRGraph() *IRGraph {
+ var g IRGraph
+ // Bottomup walk over the function to create IRGraph.
+ ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+ for _, n := range list {
+ g.Visit(n, recursive)
+ }
+ })
+ return &g
+}
+
+// Visit traverses the body of each ir.Func and use GlobalNodeMap to determine if we need to add an edge from ir.Func and any node in the ir.Func body.
+func (g *IRGraph) Visit(fn *ir.Func, recursive bool) {
+ if g.IRNodes == nil {
+ g.IRNodes = make(map[string]*IRNode)
+ }
+ if g.OutEdges == nil {
+ g.OutEdges = make(map[*IRNode][]*IREdge)
+ }
+ if g.InEdges == nil {
+ g.InEdges = make(map[*IRNode][]*IREdge)
+ }
+ name := ir.PkgFuncName(fn)
+ node := new(IRNode)
+ node.AST = fn
+ if g.IRNodes[name] == nil {
+ g.IRNodes[name] = node
+ }
+ // Create the key for the NodeMapKey.
+ nodeinfo := NodeMapKey{
+ CallerName: name,
+ CalleeName: "",
+ CallSite: -1,
+ }
+ // If the node exists, then update its node weight.
+ if weights, ok := GlobalNodeMap[nodeinfo]; ok {
+ g.IRNodes[name].Flat = weights.NFlat
+ g.IRNodes[name].Cum = weights.NCum
+ }
+
+ // Recursively walk over the body of the function to create IRGraph edges.
+ g.createIRGraphEdge(fn, g.IRNodes[name], name)
+}
+
+// addEdge adds an edge between caller and new node that points to `callee` based on the profile-graph and GlobalNodeMap.
+func (g *IRGraph) addEdge(caller *IRNode, callee *ir.Func, n *ir.Node, callername string, line int) {
+
+ // Create an IRNode for the callee.
+ calleenode := new(IRNode)
+ calleenode.AST = callee
+ calleename := ir.PkgFuncName(callee)
+
+ // Create key for NodeMapKey.
+ nodeinfo := NodeMapKey{
+ CallerName: callername,
+ CalleeName: calleename,
+ CallSite: line,
+ }
+
+ // Create the callee node with node weight.
+ if g.IRNodes[calleename] == nil {
+ g.IRNodes[calleename] = calleenode
+ nodeinfo2 := NodeMapKey{
+ CallerName: calleename,
+ CalleeName: "",
+ CallSite: -1,
+ }
+ if weights, ok := GlobalNodeMap[nodeinfo2]; ok {
+ g.IRNodes[calleename].Flat = weights.NFlat
+ g.IRNodes[calleename].Cum = weights.NCum
+ }
+ }
+
+ if weights, ok := GlobalNodeMap[nodeinfo]; ok {
+ caller.Flat = weights.NFlat
+ caller.Cum = weights.NCum
+
+ // Add edge in the IRGraph from caller to callee.
+ info := &IREdge{Src: caller, Dst: g.IRNodes[calleename], Weight: weights.EWeight, CallSite: line}
+ g.OutEdges[caller] = append(g.OutEdges[caller], info)
+ g.InEdges[g.IRNodes[calleename]] = append(g.InEdges[g.IRNodes[calleename]], info)
+ } else {
+ nodeinfo.CalleeName = ""
+ nodeinfo.CallSite = -1
+ if weights, ok := GlobalNodeMap[nodeinfo]; ok {
+ caller.Flat = weights.NFlat
+ caller.Cum = weights.NCum
+ info := &IREdge{Src: caller, Dst: g.IRNodes[calleename], Weight: 0, CallSite: line}
+ g.OutEdges[caller] = append(g.OutEdges[caller], info)
+ g.InEdges[g.IRNodes[calleename]] = append(g.InEdges[g.IRNodes[calleename]], info)
+ } else {
+ info := &IREdge{Src: caller, Dst: g.IRNodes[calleename], Weight: 0, CallSite: line}
+ g.OutEdges[caller] = append(g.OutEdges[caller], info)
+ g.InEdges[g.IRNodes[calleename]] = append(g.InEdges[g.IRNodes[calleename]], info)
+ }
+ }
+}
+
+// createIRGraphEdge traverses the nodes in the body of ir.Func and add edges between callernode which points to the ir.Func and the nodes in the body.
+func (g *IRGraph) createIRGraphEdge(fn *ir.Func, callernode *IRNode, name string) {
+ var doNode func(ir.Node) bool
+ doNode = func(n ir.Node) bool {
+ switch n.Op() {
+ default:
+ ir.DoChildren(n, doNode)
+ case ir.OCALLFUNC:
+ call := n.(*ir.CallExpr)
+ line := ConvertLine2Int(ir.Line(n))
+ // Find the callee function from the call site and add the edge.
+ f := inlCallee(call.X)
+ if f != nil {
+ g.addEdge(callernode, f, &n, name, line)
+ }
+ case ir.OCALLMETH:
+ call := n.(*ir.CallExpr)
+ // Find the callee method from the call site and add the edge.
+ fn2 := ir.MethodExprName(call.X).Func
+ line := ConvertLine2Int(ir.Line(n))
+ g.addEdge(callernode, fn2, &n, name, line)
+ }
+ return false
+ }
+ doNode(fn)
+}
+
+// WeightInPercentage converts profile weights to a percentage.
+func WeightInPercentage(value int64, total int64) float64 {
+ var ratio float64
+ if total != 0 {
+ ratio = (float64(value) / float64(total)) * 100
+ }
+ return ratio
+}
+
+// PrintWeightedCallGraphDOT prints IRGraph in DOT format.
+func PrintWeightedCallGraphDOT(nodeThreshold float64, edgeThreshold float64) {
+ fmt.Printf("\ndigraph G {\n")
+ fmt.Printf("forcelabels=true;\n")
+
+ // List of functions in this package.
+ funcs := make(map[string]struct{})
+ ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+ for _, f := range list {
+ name := ir.PkgFuncName(f)
+ funcs[name] = struct{}{}
+ }
+ })
+
+ // Determine nodes of DOT.
+ nodes := make(map[string]*ir.Func)
+ for name, _ := range funcs {
+ if n, ok := WeightedCG.IRNodes[name]; ok {
+ for _, e := range WeightedCG.OutEdges[n] {
+ if _, ok := nodes[ir.PkgFuncName(e.Src.AST)]; !ok {
+ nodes[ir.PkgFuncName(e.Src.AST)] = e.Src.AST
+ }
+ if _, ok := nodes[ir.PkgFuncName(e.Dst.AST)]; !ok {
+ nodes[ir.PkgFuncName(e.Dst.AST)] = e.Dst.AST
+ }
+ }
+ if _, ok := nodes[ir.PkgFuncName(n.AST)]; !ok {
+ nodes[ir.PkgFuncName(n.AST)] = n.AST
+ }
+ }
+ }
+
+ // Print nodes.
+ for name, ast := range nodes {
+ if n, ok := WeightedCG.IRNodes[name]; ok {
+ nodeweight := WeightInPercentage(n.Flat, GlobalTotalNodeWeight)
+ color := "black"
+ if nodeweight > nodeThreshold {
+ color = "red"
+ }
+ if ast.Inl != nil {
+ fmt.Printf("\"%v\" [color=%v,label=\"%v,freq=%.2f,inl_cost=%d\"];\n", ir.PkgFuncName(ast), color, ir.PkgFuncName(ast), nodeweight, ast.Inl.Cost)
+ } else {
+ fmt.Printf("\"%v\" [color=%v, label=\"%v,freq=%.2f\"];\n", ir.PkgFuncName(ast), color, ir.PkgFuncName(ast), nodeweight)
+ }
+ }
+ }
+ // Print edges.
+ ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+ for _, f := range list {
+ name := ir.PkgFuncName(f)
+ if n, ok := WeightedCG.IRNodes[name]; ok {
+ for _, e := range WeightedCG.OutEdges[n] {
+ edgepercent := WeightInPercentage(e.Weight, GlobalTotalEdgeWeight)
+ if edgepercent > edgeThreshold {
+ fmt.Printf("edge [color=red, style=solid];\n")
+ } else {
+ fmt.Printf("edge [color=black, style=solid];\n")
+ }
+
+ fmt.Printf("\"%v\" -> \"%v\" [label=\"%.2f\"];\n", ir.PkgFuncName(n.AST), ir.PkgFuncName(e.Dst.AST), edgepercent)
+ }
+ }
+ }
+ })
+ fmt.Printf("}\n")
+}
+
+// redirectEdges deletes the cur node out-edges and redirect them so now these edges are the parent node out-edges.
+func redirectEdges(g *IRGraph, parent *IRNode, cur *IRNode) {
+ for _, outEdge := range g.OutEdges[cur] {
+ outEdge.Src = parent
+ g.OutEdges[parent] = append(g.OutEdges[parent], outEdge)
+ }
+ delete(g.OutEdges, cur)
+}
+
+// RedirectEdges deletes and redirects out-edges from node cur based on inlining information via inlinedCallSites.
+func RedirectEdges(cur *IRNode, inlinedCallSites map[CallSiteInfo]struct{}) {
+ g := WeightedCG
+ for i, outEdge := range g.OutEdges[cur] {
+ if _, found := inlinedCallSites[CallSiteInfo{Line: outEdge.CallSite, Caller: cur.AST}]; !found {
+ for _, InEdge := range g.InEdges[cur] {
+ if _, ok := inlinedCallSites[CallSiteInfo{Line: InEdge.CallSite, Caller: InEdge.Src.AST}]; ok {
+ weight := calculateweight(g, InEdge.Src, cur)
+ redirectEdge(g, InEdge.Src, cur, outEdge, weight, i)
+ }
+ }
+ } else {
+ remove(g, cur, i, outEdge.Dst.AST.Nname)
+ }
+ }
+ removeall(g, cur)
+}
+
+// calculateweight calculates the weight of the new redirected edge.
+func calculateweight(g *IRGraph, parent *IRNode, cur *IRNode) int64 {
+ sum := int64(0)
+ pw := int64(0)
+ for _, InEdge := range g.InEdges[cur] {
+ sum = sum + InEdge.Weight
+ if InEdge.Src == parent {
+ pw = InEdge.Weight
+ }
+ }
+ weight := int64(0)
+ if sum != 0 {
+ weight = pw / sum
+ } else {
+ weight = pw
+ }
+ return weight
+}
+
+// redirectEdge deletes the cur-node's out-edges and redirect them so now these edges are the parent node out-edges.
+func redirectEdge(g *IRGraph, parent *IRNode, cur *IRNode, outEdge *IREdge, weight int64, idx int) {
+ outEdge.Src = parent
+ outEdge.Weight = weight * outEdge.Weight
+ g.OutEdges[parent] = append(g.OutEdges[parent], outEdge)
+ remove(g, cur, idx, outEdge.Dst.AST.Nname)
+}
+
+// remove deletes the cur-node's out-edges at index idx.
+func remove(g *IRGraph, cur *IRNode, idx int, name *ir.Name) {
+ if len(g.OutEdges[cur]) >= 2 {
+ g.OutEdges[cur][idx] = &IREdge{CallSite: -1}
+ } else {
+ delete(g.OutEdges, cur)
+ }
+}
+
+// removeall deletes all cur-node's out-edges that marked to be removed .
+func removeall(g *IRGraph, cur *IRNode) {
+ for i := len(g.OutEdges[cur]) - 1; i >= 0; i-- {
+ if g.OutEdges[cur][i].CallSite == -1 {
+ g.OutEdges[cur][i] = g.OutEdges[cur][len(g.OutEdges[cur])-1]
+ g.OutEdges[cur] = g.OutEdges[cur][:len(g.OutEdges[cur])-1]
+ }
+ }
+}
+
+// inlCallee is same as the implementation for inl.go with one change. The change is that we do not invoke CanInline on a closure.
+func inlCallee(fn ir.Node) *ir.Func {
+ fn = ir.StaticValue(fn)
+ switch fn.Op() {
+ case ir.OMETHEXPR:
+ fn := fn.(*ir.SelectorExpr)
+ n := ir.MethodExprName(fn)
+ // Check that receiver type matches fn.X.
+ // TODO(mdempsky): Handle implicit dereference
+ // of pointer receiver argument?
+ if n == nil || !types.Identical(n.Type().Recv().Type, fn.X.Type()) {
+ return nil
+ }
+ return n.Func
+ case ir.ONAME:
+ fn := fn.(*ir.Name)
+ if fn.Class == ir.PFUNC {
+ return fn.Func
+ }
+ case ir.OCLOSURE:
+ fn := fn.(*ir.ClosureExpr)
+ c := fn.Func
+ return c
+ }
+ return nil
+}
diff --git a/src/cmd/compile/internal/test/pgo_inl_test.go b/src/cmd/compile/internal/test/pgo_inl_test.go
new file mode 100644
index 0000000..eeeeae9
--- /dev/null
+++ b/src/cmd/compile/internal/test/pgo_inl_test.go
@@ -0,0 +1,148 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+ "bufio"
+ "fmt"
+ "internal/testenv"
+ "io"
+ "io/ioutil"
+ "os"
+ "os/exec"
+ "regexp"
+ "strings"
+ "testing"
+)
+
+// TestPGOIntendedInlining tests that specific functions are inlined.
+func TestPGOIntendedInlining(t *testing.T) {
+ testenv.MustHaveGoRun(t)
+ t.Parallel()
+
+ // Make a temporary directory to work in.
+ tmpdir, err := ioutil.TempDir("", "TestCode")
+ if err != nil {
+ t.Fatalf("Failed to create temporary directory: %v", err)
+ }
+ defer os.RemoveAll(tmpdir)
+
+ want := map[string][]string{
+ "cmd/compile/internal/test/testdata/pgo/inline": {
+ "(*BS).NS",
+ },
+ }
+
+ // The functions which are not expected to be inlined are as follows.
+ wantNot := map[string][]string{
+ "cmd/compile/internal/test/testdata/pgo/inline": {
+ // The calling edge main->A is hot and the cost of A is large than
+ // inlineHotCalleeMaxBudget.
+ "A",
+ // The calling edge BenchmarkA" -> benchmarkB is cold
+ // and the cost of A is large than inlineMaxBudget.
+ "benchmarkB",
+ },
+ }
+
+ must := map[string]bool{
+ "(*BS).NS": true,
+ }
+
+ notInlinedReason := make(map[string]string)
+ pkgs := make([]string, 0, len(want))
+ for pname, fnames := range want {
+ pkgs = append(pkgs, pname)
+ for _, fname := range fnames {
+ fullName := pname + "." + fname
+ if _, ok := notInlinedReason[fullName]; ok {
+ t.Errorf("duplicate func: %s", fullName)
+ }
+ notInlinedReason[fullName] = "unknown reason"
+ }
+ }
+
+ // If the compiler emit "cannot inline for function A", the entry A
+ // in expectedNotInlinedList will be removed.
+ expectedNotInlinedList := make(map[string]struct{})
+ for pname, fnames := range wantNot {
+ for _, fname := range fnames {
+ fullName := pname + "." + fname
+ expectedNotInlinedList[fullName] = struct{}{}
+ }
+ }
+
+ // go test -bench=. -cpuprofile testdata/pgo/inline/inline_hot.pprof cmd/compile/internal/test/testdata/pgo/inline
+ curdir, err1 := os.Getwd()
+ if err1 != nil {
+ t.Fatal(err1)
+ }
+ gcflag_option := "-gcflags=-m -m -pgoprofile %s/testdata/pgo/inline/inline_hot.pprof"
+ gcflag := fmt.Sprintf(gcflag_option, curdir)
+ args := append([]string{"test", "-run=nope", gcflag}, pkgs...)
+ cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), args...))
+
+ pr, pw := io.Pipe()
+ cmd.Stdout = pw
+ cmd.Stderr = pw
+ cmdErr := make(chan error, 1)
+ go func() {
+ cmdErr <- cmd.Run()
+ pw.Close()
+ }()
+ scanner := bufio.NewScanner(pr)
+ curPkg := ""
+ canInline := regexp.MustCompile(`: can inline ([^ ]*)`)
+ haveInlined := regexp.MustCompile(`: inlining call to ([^ ]*)`)
+ cannotInline := regexp.MustCompile(`: cannot inline ([^ ]*): (.*)`)
+ for scanner.Scan() {
+ line := scanner.Text()
+ if strings.HasPrefix(line, "# ") {
+ curPkg = line[2:]
+ splits := strings.Split(curPkg, " ")
+ curPkg = splits[0]
+ continue
+ }
+ if m := haveInlined.FindStringSubmatch(line); m != nil {
+ fname := m[1]
+ delete(notInlinedReason, curPkg+"."+fname)
+ continue
+ }
+ if m := canInline.FindStringSubmatch(line); m != nil {
+ fname := m[1]
+ fullname := curPkg + "." + fname
+ // If function must be inlined somewhere, being inlinable is not enough
+ if _, ok := must[fullname]; !ok {
+ delete(notInlinedReason, fullname)
+ continue
+ }
+ }
+ if m := cannotInline.FindStringSubmatch(line); m != nil {
+ fname, reason := m[1], m[2]
+ fullName := curPkg + "." + fname
+ if _, ok := notInlinedReason[fullName]; ok {
+ // cmd/compile gave us a reason why
+ notInlinedReason[fullName] = reason
+ }
+ delete(expectedNotInlinedList, fullName)
+ continue
+ }
+ }
+ if err := <-cmdErr; err != nil {
+ t.Fatal(err)
+ }
+ if err := scanner.Err(); err != nil {
+ t.Fatal(err)
+ }
+ for fullName, reason := range notInlinedReason {
+ t.Errorf("%s was not inlined: %s", fullName, reason)
+ }
+
+ // If the list expectedNotInlinedList is not empty, it indicates
+ // the functions in the expectedNotInlinedList are marked with caninline.
+ for fullName, _ := range expectedNotInlinedList {
+ t.Errorf("%s was expected not inlined", fullName)
+ }
+}
diff --git a/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go b/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
new file mode 100644
index 0000000..c1d2a53
--- /dev/null
+++ b/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
@@ -0,0 +1,86 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// WARNING: Please avoid updating this file. If this file needs to be updated, then a new inline_hot.pprof file should be generated via "go test -bench=. -cpuprofile testdata/pgo/inline/inline_hot.pprof cmd/compile/internal/test/testdata/pgo/inline".
+package main
+
+import (
+ "time"
+)
+
+type BS struct {
+ length uint
+ s []uint64
+}
+
+const wSize = uint(64)
+const lWSize = uint(6)
+
+func D(i uint) int {
+ return int((i + (wSize - 1)) >> lWSize)
+}
+
+func N(length uint) (bs *BS) {
+ bs = &BS{
+ length,
+ make([]uint64, D(length)),
+ }
+
+ return bs
+}
+
+func (b *BS) S(i uint) *BS {
+ b.s[i>>lWSize] |= 1 << (i & (wSize - 1))
+ return b
+}
+
+var jn = [...]byte{
+ 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
+ 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
+ 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
+ 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
+}
+
+func T(v uint64) uint {
+ return uint(jn[((v&-v)*0x03f79d71b4ca8b09)>>58])
+}
+
+func (b *BS) NS(i uint) (uint, bool) {
+ x := int(i >> lWSize)
+ if x >= len(b.s) {
+ return 0, false
+ }
+ w := b.s[x]
+ w = w >> (i & (wSize - 1))
+ if w != 0 {
+ return i + T(w), true
+ }
+ x = x + 1
+ for x < len(b.s) {
+ if b.s[x] != 0 {
+ return uint(x)*wSize + T(b.s[x]), true
+ }
+ x = x + 1
+
+ }
+ return 0, false
+}
+
+func A() {
+ s := N(100000)
+ for i := 0; i < 1000; i += 30 {
+ s.S(uint(i))
+ }
+ for j := 0; j < 1000; j++ {
+ c := uint(0)
+ for i, e := s.NS(0); e; i, e = s.NS(i + 1) {
+ c++
+ }
+ }
+}
+
+func main() {
+ time.Sleep(time.Second)
+ A()
+}
diff --git a/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof b/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof
new file mode 100644
index 0000000..45ccb61
--- /dev/null
+++ b/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof
Binary files differ
diff --git a/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go b/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
new file mode 100644
index 0000000..024d340
--- /dev/null
+++ b/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
@@ -0,0 +1,47 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// WARNING: Please avoid updating this file. If this file needs to be updated, then a new inline_hot.pprof file should be generated via "go test -bench=. -cpuprofile testdata/pgo/inline/inline_hot.pprof cmd/compile/internal/test/testdata/pgo/inline".
+package main
+
+import "testing"
+
+func BenchmarkA(b *testing.B) {
+ benchmarkB(b)
+}
+func benchmarkB(b *testing.B) {
+
+ for i := 0; true; {
+ A()
+ i = i + 1
+ if i >= b.N {
+ break
+ }
+ A()
+ i = i + 1
+ if i >= b.N {
+ break
+ }
+ A()
+ i = i + 1
+ if i >= b.N {
+ break
+ }
+ A()
+ i = i + 1
+ if i >= b.N {
+ break
+ }
+ A()
+ i = i + 1
+ if i >= b.N {
+ break
+ }
+ A()
+ i = i + 1
+ if i >= b.N {
+ break
+ }
+ }
+}
diff --git a/src/cmd/dist/buildtool.go b/src/cmd/dist/buildtool.go
index 2a4490b..394a416 100644
--- a/src/cmd/dist/buildtool.go
+++ b/src/cmd/dist/buildtool.go
@@ -66,6 +66,7 @@
"internal/goroot",
"internal/goversion",
"internal/pkgbits",
+ "internal/profile",
"internal/race",
"internal/saferio",
"internal/platform",
diff --git a/src/internal/profile/legacy_profile.go b/src/internal/profile/legacy_profile.go
index 0ac350a..b102c95 100644
--- a/src/internal/profile/legacy_profile.go
+++ b/src/internal/profile/legacy_profile.go
@@ -722,7 +722,7 @@
var l string
var err error
// Parse text of the form "attribute = value" before the samples.
- const delimiter = "="
+ const delimiter = '='
for {
l, err = r.ReadString('\n')
if err != nil {
@@ -746,10 +746,13 @@
break
}
- key, val, ok := strings.Cut(l, delimiter)
- if !ok {
+ index := strings.IndexByte(l, delimiter)
+ if index < 0 {
break
}
+ key := l[:index]
+ val := l[index+1:]
+
key, val = strings.TrimSpace(key), strings.TrimSpace(val)
var err error
switch key {
@@ -1023,7 +1026,7 @@
var attrs []string
var r *strings.Replacer
- const delimiter = "="
+ const delimiter = '='
for {
l, err := b.ReadString('\n')
if err != nil {
@@ -1046,7 +1049,10 @@
if err == errUnrecognized {
// Recognize assignments of the form: attr=value, and replace
// $attr with value on subsequent mappings.
- if attr, value, ok := strings.Cut(l, delimiter); ok {
+ idx := strings.IndexByte(l, delimiter)
+ if idx >= 0 {
+ attr := l[:idx]
+ value := l[idx+1:]
attrs = append(attrs, "$"+strings.TrimSpace(attr), strings.TrimSpace(value))
r = strings.NewReplacer(attrs...)
}
To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.