Groups

[go] cmd/compile: Enables PGO in Go and performs profile-guided inlining

82 views

Skip to first unread message

Gopher Robot (Gerrit)

unread,

Sep 9, 2022, 3:31:38 PM9/9/22

to Raj Barik, goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Congratulations on opening your first change. Thank you for your contribution!

Next steps:
A maintainer will review your change and provide feedback. See
https://go.dev/doc/contribute#review for more info and tips to get your
patch through code review.

Most changes in the Go project go through a few rounds of revision. This can be
surprising to people new to the project. The careful, iterative review process
is our way of helping mentor contributors and ensuring that their contributions
have a lasting impact.

During May-July and Nov-Jan the Go project is in a code freeze, during which
little code gets reviewed or merged. If a reviewer responds with a comment like
R=go1.11 or adds a tag like "wait-release", it means that this CL will be
reviewed as part of the next development cycle. See https://go.dev/s/release
for more details.

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Sep 10, 2022, 6:52:20 PM9/10/22

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Raj Barik has uploaded this change for review.

cmd/compile: Enables PGO in Go and performs profile-guided inlining

Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M api/go1.19.txt
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
M src/cmd/dist/buildtool.go
A src/internal/profile/graph.go
A src/internal/profile/legacy_java_profile.go
A src/pgo/inline/inline_hot.go
A src/pgo/inline/inline_hot_test.go
11 files changed, 2,443 insertions(+), 7 deletions(-)

diff --git a/api/go1.19.txt b/api/go1.19.txt
index 523f752..100c2af 100644
--- a/api/go1.19.txt
+++ b/api/go1.19.txt
@@ -244,6 +244,13 @@
 pkg os/exec, method (*Cmd) Environ() []string #50599
 pkg os/exec, type Cmd struct, Err error #43724
 pkg os/exec, var ErrDot error #43724
+pkg pgo/inline, func A() #43724
+pkg pgo/inline, func D(uint) int #43724
+pkg pgo/inline, func N(uint) *BS #43724
+pkg pgo/inline, func T(uint64) uint #43724
+pkg pgo/inline, type BS struct #43724
+pkg pgo/inline, method (*BS) NS(uint) (uint, bool) #43724
+pkg pgo/inline, method (*BS) S(uint) *BS #43724
 pkg regexp/syntax, const ErrNestingDepth = "expression nests too deeply" #51684
 pkg regexp/syntax, const ErrNestingDepth ErrorCode #51684
 pkg runtime/debug, func SetMemoryLimit(int64) int64 #48409
diff --git a/src/cmd/compile/internal/base/flag.go b/src/cmd/compile/internal/base/flag.go
index f2728d9..7420d6d 100644
--- a/src/cmd/compile/internal/base/flag.go
+++ b/src/cmd/compile/internal/base/flag.go
@@ -121,6 +121,9 @@
 	TraceProfile       string       "help:\"write an execution trace to `file`\""
 	TrimPath           string       "help:\"remove `prefix` from recorded source file paths\""
 	WB                 bool         "help:\"enable write barrier\"" // TODO: remove
+	ProfileUse         string       "help:\"read profile from `file`\""
+	InlineHotThreshold string       "help:\"threshold percentage for determining hot methods and callsites for inlining\""
+	InlineHotBudget    int          "help:\"inline budget for hot methods\""
 
 	// Configuration derived from flags; not a flag itself.
 	Cfg struct {
diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go
index c9493bf..1929b2f 100644
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -16,6 +16,7 @@
 	"cmd/compile/internal/ir"
 	"cmd/compile/internal/logopt"
 	"cmd/compile/internal/noder"
+	"cmd/compile/internal/pgo"
 	"cmd/compile/internal/pkginit"
 	"cmd/compile/internal/reflectdata"
 	"cmd/compile/internal/ssa"
@@ -29,6 +30,7 @@
 	"flag"
 	"fmt"
 	"internal/buildcfg"
+	"internal/profile"
 	"log"
 	"os"
 	"runtime"
@@ -232,10 +234,28 @@
 		typecheck.AllImportedBodies()
 	}
 
+	// Read cpu profile file and build cross-package pprof-graph and per-package weighted-call-graph.
+	base.Timer.Start("fe", "profileuse")
+	if base.Flag.ProfileUse != "" {
+		if pgo.PProfGraph == nil {
+			pgo.PProfGraph = pgo.BuildPProfGraph(base.Flag.ProfileUse, &profile.Options{
+				CallTree:    false,
+				SampleValue: func(v []int64) int64 { return v[1] },
+			})
+		}
+		pgo.WeightedCG = pgo.BuildWeightedCallGraph()
+	}
+
 	// Inlining
 	base.Timer.Start("fe", "inlining")
 	if base.Flag.LowerL != 0 {
+		if pgo.WeightedCG != nil {
+			inline.InlinePrologue()
+		}
 		inline.InlinePackage()
+		if pgo.WeightedCG != nil {
+			inline.InlineEpilogue()
+		}
 	}
 	noder.MakeWrappers(typecheck.Target) // must happen after inlining
 
diff --git a/src/cmd/compile/internal/inline/inl.go b/src/cmd/compile/internal/inline/inl.go
index 817f2fd..078eeb7 100644
--- a/src/cmd/compile/internal/inline/inl.go
+++ b/src/cmd/compile/internal/inline/inl.go
@@ -29,11 +29,13 @@
 import (
 	"fmt"
 	"go/constant"
+	"strconv"
 	"strings"
 
 	"cmd/compile/internal/base"
 	"cmd/compile/internal/ir"
 	"cmd/compile/internal/logopt"
+	"cmd/compile/internal/pgo"
 	"cmd/compile/internal/typecheck"
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
@@ -42,7 +44,8 @@
 
 // Inlining budget parameters, gathered in one place
 const (
-	inlineMaxBudget       = 80
+	inlineMaxBudget = 80
+	// Budget increased due to hotness.
 	inlineExtraAppendCost = 0
 	// default is to inline if there's at most one call. -l=4 overrides this by using 1 instead.
 	inlineExtraCallCost  = 57              // 57 was benchmarked to provided most benefit with no bad surprises; see https://github.com/golang/go/issues/19348#issuecomment-439370742
@@ -51,9 +54,87 @@
 
 	inlineBigFunctionNodes   = 5000 // Functions with this many nodes are considered "big".
 	inlineBigFunctionMaxCost = 20   // Max cost of inlinee when inlining into a "big" function.
+
 )
 
-// InlinePackage finds functions that can be inlined and clones them before walk expands them.
+var (
+	// Per-caller data structure to track the list of hot call sites. This gets rewritten every caller leaving it to GC for cleanup.
+	listOfHotCallSites = make(map[pgo.CallSiteInfo]struct{})
+
+	// List of all hot call sites.
+	candHotEdgeMap = make(map[string]struct{})
+
+	// List of inlined call sites.
+	inlinedCallSites = make(map[pgo.CallSiteInfo]struct{})
+
+	// Threshold for Hot callsite inlining.
+	inlineHotThresholdPercent = float64(2)
+
+	// Budget increased due to hotness.
+	inlineHotMaxBudget int32 = 160
+)
+
+// InlinePrologue records the hot callsites from ir-graph.
+func InlinePrologue() {
+	if s, err := strconv.ParseFloat(base.Flag.InlineHotThreshold, 64); err == nil {
+		inlineHotThresholdPercent = s
+		if base.Flag.LowerM != 0 {
+			fmt.Printf("hot-thres=%v\n", inlineHotThresholdPercent)
+		}
+	}
+
+	if base.Flag.InlineHotBudget != 0 {
+		inlineHotMaxBudget = int32(base.Flag.InlineHotBudget)
+	}
+
+	ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+		for _, f := range list {
+			name := ir.PkgFuncName(f)
+			if n, ok := pgo.WeightedCG.IRNodes[name]; ok {
+				nodeweight := pgo.WeightInPercentage(n.Flat, pgo.GlobalTotalNodeWeight)
+				if nodeweight > inlineHotThresholdPercent {
+					n.HotNode = true
+				}
+				for _, e := range pgo.WeightedCG.OutEdges[n] {
+					if e.Weight != 0 {
+						weightpercent := pgo.WeightInPercentage(e.Weight, pgo.GlobalTotalEdgeWeight)
+						if weightpercent > inlineHotThresholdPercent {
+							splits := strings.Split(e.CallSite, ":")
+							line2, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
+							lineno := fmt.Sprintf("%v", line2)
+							canonicalName := ir.PkgFuncName(n.AST) + "-" + lineno + "-" + ir.PkgFuncName(e.Dst.AST)
+							if _, ok := candHotEdgeMap[canonicalName]; !ok {
+								candHotEdgeMap[canonicalName] = struct{}{}
+							}
+						}
+					}
+				}
+			}
+		}
+	})
+	if base.Flag.LowerM > 4 {
+		fmt.Printf("hot-cg before inline in dot format:")
+		pgo.PrintWeightedCallGraphDOT(inlineHotThresholdPercent)
+	}
+}
+
+// InlineEpilogue updates IRGraph after inlining.
+func InlineEpilogue() {
+	ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+		for _, f := range list {
+			name := ir.PkgFuncName(f)
+			if n, ok := pgo.WeightedCG.IRNodes[name]; ok {
+				pgo.RedirectEdges(n, inlinedCallSites)
+			}
+		}
+	})
+	if base.Flag.LowerM > 4 {
+		fmt.Printf("hot-cg after inline in dot:")
+		pgo.PrintWeightedCallGraphDOT(inlineHotThresholdPercent)
+	}
+}
+
+// InlinePackage finds functions that can be inlined and clones them.
 func InlinePackage() {
 	ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
 		numfns := numNonClosures(list)
@@ -81,6 +162,9 @@
 		base.Fatalf("CanInline no nname %+v", fn)
 	}
 
+	// Initialize an empty list of hot callsites for this caller.
+	listOfHotCallSites = make(map[pgo.CallSiteInfo]struct{})
+
 	var reason string // reason, if any, that the function was not inlined
 	if base.Flag.LowerM > 1 || logopt.Enabled() {
 		defer func() {
@@ -181,10 +265,14 @@
 		budget:        inlineMaxBudget,
 		extraCallCost: cc,
 	}
+	savefn := ir.CurFunc
+	ir.CurFunc = fn
 	if visitor.tooHairy(fn) {
 		reason = visitor.reason
+		ir.CurFunc = savefn
 		return
 	}
+	ir.CurFunc = savefn
 
 	n.Func.Inl = &ir.Inline{
 		Cost: inlineMaxBudget - visitor.budget,
@@ -252,6 +340,19 @@
 		return true
 	}
 	if v.budget < 0 {
+		if pgo.WeightedCG != nil {
+			// Find the existing node in WeightedCallGraph.
+			if n, ok := pgo.WeightedCG.IRNodes[ir.PkgFuncName(fn)]; ok {
+				// If the cost of hot function is greater than inlineHotMaxBudget,
+				// the inliner won't inline this function.
+				if inlineMaxBudget-v.budget < inlineHotMaxBudget && n.HotNode == true {
+					if base.Flag.LowerM > 1 {
+						fmt.Printf("hot-node enabled increased budget for func=%v\n", ir.PkgFuncName(fn))
+					}
+					return false
+				}
+			}
+		}
 		v.reason = fmt.Sprintf("function too complex: cost %d exceeds budget %d", inlineMaxBudget-v.budget, inlineMaxBudget)
 		return true
 	}
@@ -315,6 +416,23 @@
 			}
 		}
 
+		// Determine if callee edge is a hot callee or not.
+		if pgo.WeightedCG != nil && ir.CurFunc != nil {
+			if fn := inlCallee(n.X); fn != nil && typecheck.HaveInlineBody(fn) {
+				lineno := fmt.Sprintf("%v", ir.Line(n))
+				splits := strings.Split(lineno, ":")
+				l, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
+				linenum := fmt.Sprintf("%v", l)
+				canonicalName := ir.PkgFuncName(ir.CurFunc) + "-" + linenum + "-" + ir.PkgFuncName(fn)
+				if _, o := candHotEdgeMap[canonicalName]; o {
+					if base.Flag.LowerM > 1 {
+						fmt.Printf("hot-callsite identified at line=%v for func=%v\n", ir.Line(n), ir.PkgFuncName(ir.CurFunc))
+					}
+					listOfHotCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}] = struct{}{}
+				}
+			}
+		}
+
 		if ir.IsIntrinsicCall(n) {
 			// Treat like any other node.
 			break
@@ -464,10 +582,12 @@
 
 	v.budget--
 
-	// When debugging, don't stop early, to get full cost of inlining this function
-	if v.budget < 0 && base.Flag.LowerM < 2 && !logopt.Enabled() {
-		v.reason = "too expensive"
-		return true
+	if pgo.WeightedCG == nil {
+		// When debugging, don't stop early, to get full cost of inlining this function
+		if v.budget < 0 && base.Flag.LowerM < 2 && !logopt.Enabled() {
+			v.reason = "too expensive"
+			return true
+		}
 	}
 
 	return ir.DoChildren(n, v.do)
@@ -716,7 +836,18 @@
 			logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(ir.CurFunc),
 				fmt.Sprintf("cost %d of %s exceeds max large caller cost %d", fn.Inl.Cost, ir.PkgFuncName(fn), maxCost))
 		}
-		return n
+
+		// If the callsite is hot and it is under the inlineHotMaxBudget budget, then inline it, or else bail.
+		if _, ok := listOfHotCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; ok {
+			if fn.Inl.Cost > inlineHotMaxBudget {
+				return n
+			}
+			if base.Flag.LowerM > 1 {
+				fmt.Printf("hot-budget check allows inlining at %v\n", ir.Line(n))
+			}
+		} else {
+			return n
+		}
 	}
 
 	if fn == ir.CurFunc {
@@ -817,7 +948,12 @@
 		fmt.Printf("%v: Before inlining: %+v\n", ir.Line(n), n)
 	}
 
+	if _, ok := inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; !ok {
+		inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}] = struct{}{}
+	}
+
 	res := InlineCall(n, fn, inlIndex)
+
 	if res == nil {
 		base.FatalfAt(n.Pos(), "inlining call to %v failed", fn)
 	}
diff --git a/src/cmd/compile/internal/pgo/irgraph.go b/src/cmd/compile/internal/pgo/irgraph.go
new file mode 100644
index 0000000..d27a5e1
--- /dev/null
+++ b/src/cmd/compile/internal/pgo/irgraph.go
@@ -0,0 +1,493 @@
+package pgo
+
+import (
+	"cmd/compile/internal/base"
+	"cmd/compile/internal/ir"
+	"cmd/compile/internal/typecheck"
+	"cmd/compile/internal/types"
+	"fmt"
+	"internal/profile"
+	"log"
+	"os"
+	"strconv"
+	"strings"
+)
+
+// IRGraph is the key datastrcture that is built from pprof profile. It is essentially a call graph with nodes pointing to IRs of functions and edges carrying weights and callsite information. The graph is bidirectional that helps in removing nodes efficiently. The IrGraph is updated as we pass through optimization phases, for example, after a function is inlined, we update the graph with new nodes and edges.
+type IRGraph struct {
+	// Nodes of the graph
+	IRNodes  map[string]*IRNode
+	OutEdges IREdgeMap
+	InEdges  IREdgeMap
+}
+
+// IRNode represents a node in the IRGraph.
+type IRNode struct {
+	// Pointer to the IR of the Function represented by this node.
+	AST *ir.Func
+	// Flat weight of the IRNode, obtained from pprof.
+	Flat int64
+	// Cumulative weight of the IRNode.
+	Cum int64
+	// Is this function a recursive function.
+	Recursive bool
+	// Is this function a hot node?
+	HotNode bool
+}
+
+// IREdgeMap maps an IRNode to its successors.
+type IREdgeMap map[*IRNode][]*IREdge
+
+// IREdge represents a call edge in the IRGraph with source, destination, weight, callsite, and line number information.
+type IREdge struct {
+	Src, Dst *IRNode
+	DstNode  *ir.Node
+	Weight   int64
+	CallSite string
+}
+
+// NodeMapInfo represents a hash key to identify unique call-edges in pprof and in IR. Used for deduplication of call edges found in pprof.
+type NodeMapInfo struct {
+	FuncName string
+	DstName  string
+	CallSite int
+}
+
+// Weights capture both node weight and edge weight.
+type Weights struct {
+	NWeight      int64
+	NTotalWeight int64
+	EWeight      int64
+}
+
+// CallSiteInfo captures call site information and its static callee.
+type CallSiteInfo struct {
+	Line   string
+	Caller *ir.Func
+}
+
+var (
+	// Aggregated NodeWeights and EdgeWeights across profiles. This helps us determine the percentage threshold for hot/cold partitioning.
+	GlobalTotalNodeWeight = int64(0)
+	GlobalTotalEdgeWeight = int64(0)
+
+	// Global node and their aggregated weight information.
+	GlobalNodeMap = make(map[NodeMapInfo]*Weights)
+
+	// WeightedCG represents the IRGraph built from pprof profile, which we will update as part of inlining.
+	WeightedCG *IRGraph = nil
+
+	// Original cross-package PProf Graph.
+	PProfGraph *profile.Graph = nil
+)
+
+// BuildPProfGraph generates a pprof-graph from cpu-profile.
+func BuildPProfGraph(profileFile string, opt *profile.Options) *profile.Graph {
+
+	if PProfGraph != nil {
+		return PProfGraph
+	}
+	// open the pprof profile file.
+	f, err := os.Open(profileFile)
+	if err != nil {
+		log.Fatal("failed to open file " + profileFile)
+		return nil
+	}
+	defer f.Close()
+	p, err := profile.Parse(f)
+	if err != nil {
+		log.Fatal("failed to Parse profile file.")
+		return nil
+	}
+	// Build the graph using google's pprof package.
+	pProfGraph := profile.New(p, opt)
+
+	// Build various global maps from pprof profile.
+	preprocessPProfGraph(pProfGraph)
+
+	return pProfGraph
+}
+
+// BuildWeightedCallGraph generates a weighted callgraph from the pprof profile for the current package.
+func BuildWeightedCallGraph() *IRGraph {
+
+	// Bail if there is no pprof-graph available.
+	if PProfGraph == nil {
+		return nil
+	}
+
+	// Create package-level call graph with weights from pprof profile and IR.
+	weightedCG := createIRGraph()
+
+	if weightedCG != nil && base.Flag.LowerM > 1 {
+		log.Println("weighted call graph created successfully!")
+	}
+
+	return weightedCG
+}
+
+// preprocessPProfGraph builds various maps from profiles. It builds GlobalNodeMap and other information based on the name and callsite to compute node and edge weights which will be used later on to create edges of WeightedCG.
+func preprocessPProfGraph(pProfGraph *profile.Graph) {
+	nweight := make(map[string]int64)
+	nTotalWeight := make(map[string]int64)
+
+	// Accummulate weights for the same nodes.
+	for _, n := range pProfGraph.Nodes {
+		canonicalName := n.Info.Name
+		if _, ok := nweight[canonicalName]; ok {
+			nweight[canonicalName] += n.FlatValue()
+			nTotalWeight[canonicalName] += n.CumValue()
+		} else {
+			nweight[canonicalName] = n.FlatValue()
+			nTotalWeight[canonicalName] = n.CumValue()
+		}
+	}
+
+	// Process PProfGraph and build various node and edge maps which will be consumed by AST walk.
+	for _, n := range pProfGraph.Nodes {
+		GlobalTotalNodeWeight += n.FlatValue()
+		canonicalName := n.Info.Name
+		// Create the key to the NodeMap.
+		nodeinfo := NodeMapInfo{
+			FuncName: canonicalName,
+			CallSite: n.Info.Lineno,
+		}
+		// If there are no outgoing edges, we still need to create the node [for cold sites] with no callee information.
+		if len(n.Out) == 0 {
+			nodeinfo.DstName = ""
+			nodeinfo.CallSite = -1
+
+			weights := new(Weights)
+			weights.NWeight = nweight[canonicalName]
+			weights.NTotalWeight = nTotalWeight[canonicalName]
+			weights.EWeight = 0
+
+			GlobalNodeMap[nodeinfo] = weights
+		}
+
+		for _, e := range n.Out {
+			GlobalTotalEdgeWeight += e.WeightValue()
+			nodeinfo.DstName = e.Dest.Info.Name
+			if w, ok := GlobalNodeMap[nodeinfo]; ok {
+				w.EWeight += e.WeightValue()
+			} else {
+				weights := new(Weights)
+				weights.NWeight = nweight[canonicalName]
+				weights.NTotalWeight = nTotalWeight[canonicalName]
+				weights.EWeight = e.WeightValue()
+				GlobalNodeMap[nodeinfo] = weights
+			}
+		}
+	}
+}
+
+// createIRGraph builds the IRGraph by visting all the ir.Func in decl list of a package.
+func createIRGraph() *IRGraph {
+	var g IRGraph
+	// Bottomup walk over the function to create IRGraph.
+	ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+		for _, n := range list {
+			g.Visit(n, recursive)
+		}
+	})
+	return &g
+}
+
+// Visit traverses the body of each ir.Func and use GlobalNodeMap to determine if we need to add an edge from ir.Func and any node in the ir.Func body.
+func (g *IRGraph) Visit(fn *ir.Func, recursive bool) {
+	if g.IRNodes == nil {
+		g.IRNodes = make(map[string]*IRNode)
+	}
+	if g.OutEdges == nil {
+		g.OutEdges = make(map[*IRNode][]*IREdge)
+	}
+	if g.InEdges == nil {
+		g.InEdges = make(map[*IRNode][]*IREdge)
+	}
+	name := ir.PkgFuncName(fn)
+	node := new(IRNode)
+	node.AST = fn
+	node.Recursive = recursive
+	if g.IRNodes[name] == nil {
+		g.IRNodes[name] = node
+	}
+	// Create the hash key of the GlobalNodeMap.
+	nodeinfo := NodeMapInfo{
+		FuncName: name,
+		DstName:  "",
+		CallSite: -1,
+	}
+	// If the node exists, then update its node weight.
+	if weights, ok := GlobalNodeMap[nodeinfo]; ok {
+		g.IRNodes[name].Flat = weights.NWeight
+		g.IRNodes[name].Cum = weights.NTotalWeight
+	}
+
+	// Recursively walk over the body of the function to create IRGraph edges.
+	g.createIRGraphEdge(fn, g.IRNodes[name], name)
+}
+
+// addEdge adds an edge between node1 and new node that points to f based on pprof graph using GlobalNodeMap. node1 represents the caller. Callee as f. CallSite n.
+func (g *IRGraph) addEdge(node1 *IRNode, f *ir.Func, n *ir.Node, name string, line string) {
+
+	splits := strings.Split(line, ":")
+	line2, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
+
+	// Create an IRNode for Callee.
+	node2 := new(IRNode)
+	node2.AST = f
+	name2 := ir.PkgFuncName(f)
+
+	// Create a hash key using NodeMapInfo with Caller FuncName and Callee fname.
+	nodeinfo := NodeMapInfo{
+		FuncName: name,
+		DstName:  name2,
+		CallSite: int(line2),
+	}
+
+	// A new callee node? If so create the node with node weight.
+	// Remove this TODO.
+	if g.IRNodes[name2] == nil {
+		g.IRNodes[name2] = node2
+		nodeinfo2 := NodeMapInfo{
+			FuncName: name2,
+			DstName:  "",
+			CallSite: -1,
+		}
+		if weights, ok := GlobalNodeMap[nodeinfo2]; ok {
+			g.IRNodes[name2].Flat = weights.NWeight
+			g.IRNodes[name2].Cum = weights.NTotalWeight
+		}
+	}
+
+	if weights, ok := GlobalNodeMap[nodeinfo]; ok {
+		node1.Flat = weights.NWeight
+		node1.Cum = weights.NTotalWeight
+
+		// Add edge in the IRGraph from caller to callee [callee is an interface type here which can have multiple targets].
+		info := &IREdge{Src: node1, Dst: g.IRNodes[name2], DstNode: n, Weight: weights.EWeight, CallSite: line}
+		g.OutEdges[node1] = append(g.OutEdges[node1], info)
+		g.InEdges[g.IRNodes[name2]] = append(g.InEdges[g.IRNodes[name2]], info)
+	} else {
+		nodeinfo.DstName = ""
+		nodeinfo.CallSite = -1
+		if weights, ok := GlobalNodeMap[nodeinfo]; ok {
+			node1.Flat = weights.NWeight
+			node1.Cum = weights.NTotalWeight
+			info := &IREdge{Src: node1, Dst: g.IRNodes[name2], DstNode: n, Weight: 0, CallSite: line}
+			g.OutEdges[node1] = append(g.OutEdges[node1], info)
+			g.InEdges[g.IRNodes[name2]] = append(g.InEdges[g.IRNodes[name2]], info)
+		} else {
+			info := &IREdge{Src: node1, Dst: g.IRNodes[name2], DstNode: n, Weight: 0, CallSite: line}
+			g.OutEdges[node1] = append(g.OutEdges[node1], info)
+			g.InEdges[g.IRNodes[name2]] = append(g.InEdges[g.IRNodes[name2]], info)
+		}
+	}
+}
+
+// createIRGraphEdge traverses the nodes in the body of ir.Func and add edges between node1 which points to the ir.Func and the nodes in the body.
+func (g *IRGraph) createIRGraphEdge(fn *ir.Func, node1 *IRNode, name string) {
+	var doNode func(ir.Node) bool
+	doNode = func(n ir.Node) bool {
+		switch n.Op() {
+		default:
+			ir.DoChildren(n, doNode)
+		case ir.OCALLFUNC:
+			call := n.(*ir.CallExpr)
+			line := ir.Line(n)
+			// Find the callee function from the call site and add the edge.
+			f := inlCallee(call.X)
+			if f != nil {
+				g.addEdge(node1, f, &n, name, line)
+			}
+		case ir.OCALLMETH:
+			call := n.(*ir.CallExpr)
+			// Find the callee method from the call site and add the edge.
+			fn2 := ir.MethodExprName(call.X).Func
+			line := ir.Line(n)
+			g.addEdge(node1, fn2, &n, name, line)
+		}
+		return false
+	}
+	doNode(fn)
+}
+
+// WeightInPercentage converts profile weights to a percentage.
+func WeightInPercentage(value int64, total int64) float64 {
+	var ratio float64
+	// percentage is computed at the (weight/totalweights) * 100
+	// e.g. if edge weight is 30 and the sum of all the edges weight is 126
+	// the percentage will be 23.8%
+	if total != 0 {
+		ratio = (float64(value) / float64(total)) * 100
+	}
+	return ratio
+}
+
+// PrintWeightedCallGraphDOT prints IRGraph in DOT format..
+func PrintWeightedCallGraphDOT(threshold float64) {
+	fmt.Printf("digraph G {\n")
+	fmt.Printf("forcelabels=true;\n")
+
+	// List of functions in this package.
+	funcs := make(map[string]struct{})
+	ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+		for _, f := range list {
+			name := ir.PkgFuncName(f)
+			funcs[name] = struct{}{}
+		}
+	})
+
+	// Determine nodes of DOT.
+	nodes := make(map[string]*ir.Func)
+	for name, _ := range funcs {
+		if n, ok := WeightedCG.IRNodes[name]; ok {
+			nodeweight := WeightInPercentage(n.Flat, GlobalTotalNodeWeight)
+			for _, e := range WeightedCG.OutEdges[n] {
+				if e.Weight != 0 {
+					p := WeightInPercentage(e.Weight, GlobalTotalEdgeWeight)
+					if p > threshold {
+						nodes[ir.PkgFuncName(e.Src.AST)] = e.Src.AST
+						nodes[ir.PkgFuncName(e.Dst.AST)] = e.Dst.AST
+					}
+				}
+			}
+			if nodeweight > threshold {
+				nodes[ir.PkgFuncName(n.AST)] = n.AST
+			}
+		}
+	}
+
+	// Print nodes.
+	for name, ast := range nodes {
+		if n, ok := WeightedCG.IRNodes[name]; ok {
+			nodeweight := WeightInPercentage(n.Flat, GlobalTotalNodeWeight)
+			if ast.Inl != nil {
+				fmt.Printf("\"%v\" [label=\"%v,freq=%.2f,inl_cost=%d\"];\n", ir.PkgFuncName(ast), ir.PkgFuncName(ast), nodeweight, ast.Inl.Cost)
+			} else {
+				fmt.Printf("\"%v\" [label=\"%v,freq=%.2f\"];\n", ir.PkgFuncName(ast), ir.PkgFuncName(ast), nodeweight)
+			}
+		}
+	}
+	// Print edges.
+	ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+		for _, f := range list {
+			name := ir.PkgFuncName(f)
+			if n, ok := WeightedCG.IRNodes[name]; ok {
+				for _, e := range WeightedCG.OutEdges[n] {
+					if e.Weight != 0 {
+						p := WeightInPercentage(e.Weight, GlobalTotalEdgeWeight)
+						if p > threshold {
+							fmt.Printf("edge [color=red, style=solid];\n")
+						} else {
+							fmt.Printf("edge [color=black, style=solid];\n")
+						}
+
+						fmt.Printf("\"%v\" -> \"%v\" [label=\"%.2f\"];\n", ir.PkgFuncName(n.AST), ir.PkgFuncName(e.Dst.AST), p)
+					}
+				}
+			}
+		}
+	})
+	fmt.Printf("}\n")
+}
+
+// redirectEdges deletes the cur node out-edges and redirect them so now these edges are the parent node out-edges.
+func redirectEdges(g *IRGraph, parent *IRNode, cur *IRNode) {
+	for _, outEdge := range g.OutEdges[cur] {
+		outEdge.Src = parent
+		g.OutEdges[parent] = append(g.OutEdges[parent], outEdge)
+	}
+	delete(g.OutEdges, cur)
+}
+
+// RedirectEdges deletes and redirects out-edges from node cur based on inlining information via inlinedCallSites.
+func RedirectEdges(cur *IRNode, inlinedCallSites map[CallSiteInfo]struct{}) {
+	g := WeightedCG
+	for i, outEdge := range g.OutEdges[cur] {
+		if _, found := inlinedCallSites[CallSiteInfo{outEdge.CallSite, cur.AST}]; !found {
+			for _, InEdge := range g.InEdges[cur] {
+				if _, ok := inlinedCallSites[CallSiteInfo{InEdge.CallSite, InEdge.Src.AST}]; ok {
+					weight := calculateweight(g, InEdge.Src, cur)
+					redirectEdge(g, InEdge.Src, cur, outEdge, weight, i)
+				}
+			}
+		} else {
+			remove(g, cur, i, outEdge.Dst.AST.Nname)
+		}
+	}
+	removeall(g, cur)
+}
+
+// calculateweight calculates the weight of the new redirected edge.
+func calculateweight(g *IRGraph, parent *IRNode, cur *IRNode) int64 {
+	sum := int64(0)
+	pw := int64(0)
+	for _, InEdge := range g.InEdges[cur] {
+		sum = sum + InEdge.Weight
+		if InEdge.Src == parent {
+			pw = InEdge.Weight
+		}
+	}
+	weight := int64(0)
+	if sum != 0 {
+		weight = pw / sum
+	} else {
+		weight = pw
+	}
+	return weight
+}
+
+// redirectEdge deletes the cur-node's out-edges and redirect them so now these edges are the parent node out-edges.
+func redirectEdge(g *IRGraph, parent *IRNode, cur *IRNode, outEdge *IREdge, weight int64, idx int) {
+	outEdge.Src = parent
+	outEdge.Weight = weight * outEdge.Weight
+	g.OutEdges[parent] = append(g.OutEdges[parent], outEdge)
+	remove(g, cur, idx, outEdge.Dst.AST.Nname)
+}
+
+// remove deletes the cur-node's out-edges at index idx.
+func remove(g *IRGraph, cur *IRNode, idx int, name *ir.Name) {
+	if len(g.OutEdges[cur]) >= 2 {
+		g.OutEdges[cur][idx] = &IREdge{CallSite: "removed"}
+	} else {
+		delete(g.OutEdges, cur)
+	}
+}
+
+// removeall deletes all cur-node's out-edges that marked to be removed .
+func removeall(g *IRGraph, cur *IRNode) {
+	for i := len(g.OutEdges[cur]) - 1; i >= 0; i-- {
+		if g.OutEdges[cur][i].CallSite == "removed" {
+			g.OutEdges[cur][i] = g.OutEdges[cur][len(g.OutEdges[cur])-1]
+			g.OutEdges[cur] = g.OutEdges[cur][:len(g.OutEdges[cur])-1]
+		}
+	}
+}
+
+// inlCallee is same as the implementation for inl.go with one change. The change is that we do not invoke CanInline on a closure.
+func inlCallee(fn ir.Node) *ir.Func {
+	fn = ir.StaticValue(fn)
+	switch fn.Op() {
+	case ir.OMETHEXPR:
+		fn := fn.(*ir.SelectorExpr)
+		n := ir.MethodExprName(fn)
+		// Check that receiver type matches fn.X.
+		// TODO(mdempsky): Handle implicit dereference
+		// of pointer receiver argument?
+		if n == nil || !types.Identical(n.Type().Recv().Type, fn.X.Type()) {
+			return nil
+		}
+		return n.Func
+	case ir.ONAME:
+		fn := fn.(*ir.Name)
+		if fn.Class == ir.PFUNC {
+			return fn.Func
+		}
+	case ir.OCLOSURE:
+		fn := fn.(*ir.ClosureExpr)
+		c := fn.Func
+		return c
+	}
+	return nil
+}
diff --git a/src/cmd/compile/internal/test/pgo_inl_test.go b/src/cmd/compile/internal/test/pgo_inl_test.go
new file mode 100644
index 0000000..d5ca156
--- /dev/null
+++ b/src/cmd/compile/internal/test/pgo_inl_test.go
@@ -0,0 +1,154 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+	"bufio"
+	"bytes"
+	"internal/testenv"
+	"io"
+	"io/ioutil"
+	"os/exec"
+	"path/filepath"
+	"regexp"
+	"strings"
+	"testing"
+)
+
+// TestIntendedInlining tests that specific functions are inlined.
+// This allows refactoring for code clarity and re-use without fear that
+// changes to the compiler will cause silent performance regressions.
+func TestPGOIntendedInlining(t *testing.T) {
+	testenv.MustHaveGoRun(t)
+	t.Parallel()
+
+	// Make a temporary directory to work in.
+	tmpdir, err := ioutil.TempDir("", "TestCode")
+	if err != nil {
+		t.Fatalf("Failed to create temporary directory: %v", err)
+	}
+	//defer os.RemoveAll(tmpdir)
+
+	want := map[string][]string{
+		"pgo/inline": {
+			"(*BS).NS",
+		},
+	}
+
+	// The functions which are not expected to be inlined are as follows.
+	wantNot := map[string][]string{
+		"pgo/inline": {
+			// The calling edge main->A is hot and the cost of A is large than
+			// inlineHotCalleeMaxBudget.
+			"A",
+			// The calling edge BenchmarkA" -> benchmarkB is cold
+			// and the cost of A is large than inlineMaxBudget.
+			"benchmarkB",
+		},
+	}
+
+	must := map[string]bool{
+		"(*BS).NS": true,
+	}
+
+	notInlinedReason := make(map[string]string)
+	pkgs := make([]string, 0, len(want))
+	for pname, fnames := range want {
+		pkgs = append(pkgs, pname)
+		for _, fname := range fnames {
+			fullName := pname + "." + fname
+			if _, ok := notInlinedReason[fullName]; ok {
+				t.Errorf("duplicate func: %s", fullName)
+			}
+			notInlinedReason[fullName] = "unknown reason"
+		}
+	}
+
+	// If the compiler emit "cannot inline for function A", the entry A
+	// in expectedNotInlinedList will be removed.
+	expectedNotInlinedList := make(map[string]struct{})
+	for pname, fnames := range wantNot {
+		for _, fname := range fnames {
+			fullName := pname + "." + fname
+			expectedNotInlinedList[fullName] = struct{}{}
+		}
+	}
+
+	args := append([]string{"test", "-o", filepath.Join(tmpdir, "inline_hot.test"), "-bench=.", "-cpuprofile", filepath.Join(tmpdir, "inline_hot.pprof")}, pkgs...)
+	gotool := testenv.GoToolPath(t)
+	cmd := exec.Command(gotool, args...)
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err = cmd.Run()
+	if err != nil {
+		t.Fatalf("Failed: %v:\nOut: %s\nStderr: %s\n", err, &stdout, &stderr)
+	}
+
+	args = append([]string{"test", "-run=nope", "-tags=", "-timeout=9m0s", "-gcflags=-m -m -profileuse " + filepath.Join(tmpdir, "inline_hot.pprof")}, pkgs...)
+	cmd = testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), args...))
+
+	pr, pw := io.Pipe()
+	cmd.Stdout = pw
+	cmd.Stderr = pw
+	cmdErr := make(chan error, 1)
+	go func() {
+		cmdErr <- cmd.Run()
+		pw.Close()
+	}()
+	scanner := bufio.NewScanner(pr)
+	curPkg := ""
+	canInline := regexp.MustCompile(`: can inline ([^ ]*)`)
+	haveInlined := regexp.MustCompile(`: inlining call to ([^ ]*)`)
+	cannotInline := regexp.MustCompile(`: cannot inline ([^ ]*): (.*)`)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if strings.HasPrefix(line, "# ") {
+			curPkg = line[2:]
+			splits := strings.Split(curPkg, " ")
+			curPkg = splits[0]
+			continue
+		}
+		if m := haveInlined.FindStringSubmatch(line); m != nil {
+			fname := m[1]
+			delete(notInlinedReason, curPkg+"."+fname)
+			continue
+		}
+		if m := canInline.FindStringSubmatch(line); m != nil {
+			fname := m[1]
+			fullname := curPkg + "." + fname
+			// If function must be inlined somewhere, being inlinable is not enough
+			if _, ok := must[fullname]; !ok {
+				delete(notInlinedReason, fullname)
+				continue
+			}
+		}
+		if m := cannotInline.FindStringSubmatch(line); m != nil {
+			fname, reason := m[1], m[2]
+			fullName := curPkg + "." + fname
+			if _, ok := notInlinedReason[fullName]; ok {
+				// cmd/compile gave us a reason why
+				notInlinedReason[fullName] = reason
+			}
+			delete(expectedNotInlinedList, fullName)
+			continue
+		}
+	}
+	if err := <-cmdErr; err != nil {
+		t.Fatal(err)
+	}
+	if err := scanner.Err(); err != nil {
+		t.Fatal(err)
+	}
+	for fullName, reason := range notInlinedReason {
+		t.Errorf("%s was not inlined: %s", fullName, reason)
+	}
+
+	// If the list expectedNotInlinedList is not empty, it indicates
+	// the functions in the expectedNotInlinedList are marked with caninline.
+	for fullName, _ := range expectedNotInlinedList {
+		t.Errorf("%s was expected not inlined", fullName)
+	}
+}
diff --git a/src/cmd/dist/buildtool.go b/src/cmd/dist/buildtool.go
index 0725039..a7901d9 100644
--- a/src/cmd/dist/buildtool.go
+++ b/src/cmd/dist/buildtool.go
@@ -64,6 +64,7 @@
 	"internal/goexperiment",
 	"internal/goversion",
 	"internal/pkgbits",
+	"internal/profile",
 	"internal/race",
 	"internal/saferio",
 	"internal/unsafeheader",
diff --git a/src/internal/profile/graph.go b/src/internal/profile/graph.go
new file mode 100644
index 0000000..4fcd081
--- /dev/null
+++ b/src/internal/profile/graph.go
@@ -0,0 +1,1173 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package graph collects a set of samples into a directed graph.
+
+// Original file location: https://github.com/google/pprof/tree/main/internal/graph/graph.go
+package profile
+
+import (
+	"fmt"
+	"math"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+var (
+	// Removes package name and method arguments for Java method names.
+	// See tests for examples.
+	javaRegExp = regexp.MustCompile(`^(?:[a-z]\w*\.)*([A-Z][\w\$]*\.(?:<init>|[a-z][\w\$]*(?:\$\d+)?))(?:(?:\()|$)`)
+	// Removes package name and method arguments for Go function names.
+	// See tests for examples.
+	goRegExp = regexp.MustCompile(`^(?:[\w\-\.]+\/)+(.+)`)
+	// Removes potential module versions in a package path.
+	goVerRegExp = regexp.MustCompile(`^(.*?)/v(?:[2-9]|[1-9][0-9]+)([./].*)$`)
+	// Strips C++ namespace prefix from a C++ function / method name.
+	// NOTE: Make sure to keep the template parameters in the name. Normally,
+	// template parameters are stripped from the C++ names but when
+	// -symbolize=demangle=templates flag is used, they will not be.
+	// See tests for examples.
+	cppRegExp                = regexp.MustCompile(`^(?:[_a-zA-Z]\w*::)+(_*[A-Z]\w*::~?[_a-zA-Z]\w*(?:<.*>)?)`)
+	cppAnonymousPrefixRegExp = regexp.MustCompile(`^\(anonymous namespace\)::`)
+)
+
+// This was obtained from https://github.com/google/pprof/blob/main/internal/graph/dotgraph.go.
+const maxNodelets = 4 // Number of nodelets for labels (both numeric and non)
+
+// Options encodes the options for constructing a graph
+type Options struct {
+	SampleValue       func(s []int64) int64      // Function to compute the value of a sample
+	SampleMeanDivisor func(s []int64) int64      // Function to compute the divisor for mean graphs, or nil
+	FormatTag         func(int64, string) string // Function to format a sample tag value into a string
+	ObjNames          bool                       // Always preserve obj filename
+	OrigFnNames       bool                       // Preserve original (eg mangled) function names
+
+	CallTree     bool // Build a tree instead of a graph
+	DropNegative bool // Drop nodes with overall negative values
+
+	KeptNodes NodeSet // If non-nil, only use nodes in this set
+}
+
+// Nodes is an ordered collection of graph nodes.
+type Nodes []*Node
+
+// Node is an entry on a profiling report. It represents a unique
+// program location.
+type Node struct {
+	// Info describes the source location associated to this node.
+	Info NodeInfo
+
+	// Function represents the function that this node belongs to. On
+	// graphs with sub-function resolution (eg line number or
+	// addresses), two nodes in a NodeMap that are part of the same
+	// function have the same value of Node.Function. If the Node
+	// represents the whole function, it points back to itself.
+	Function *Node
+
+	// Values associated to this node. Flat is exclusive to this node,
+	// Cum includes all descendents.
+	Flat, FlatDiv, Cum, CumDiv int64
+
+	// In and out Contains the nodes immediately reaching or reached by
+	// this node.
+	In, Out EdgeMap
+
+	// LabelTags provide additional information about subsets of a sample.
+	LabelTags TagMap
+
+	// NumericTags provide additional values for subsets of a sample.
+	// Numeric tags are optionally associated to a label tag. The key
+	// for NumericTags is the name of the LabelTag they are associated
+	// to, or "" for numeric tags not associated to a label tag.
+	NumericTags map[string]TagMap
+}
+
+// Graph summarizes a performance profile into a format that is
+// suitable for visualization.
+type Graph struct {
+	Nodes Nodes
+}
+
+// FlatValue returns the exclusive value for this node, computing the
+// mean if a divisor is available.
+func (n *Node) FlatValue() int64 {
+	if n.FlatDiv == 0 {
+		return n.Flat
+	}
+	return n.Flat / n.FlatDiv
+}
+
+// CumValue returns the inclusive value for this node, computing the
+// mean if a divisor is available.
+func (n *Node) CumValue() int64 {
+	if n.CumDiv == 0 {
+		return n.Cum
+	}
+	return n.Cum / n.CumDiv
+}
+
+// AddToEdge increases the weight of an edge between two nodes. If
+// there isn't such an edge one is created.
+func (n *Node) AddToEdge(to *Node, v int64, residual, inline bool) {
+	n.AddToEdgeDiv(to, 0, v, residual, inline)
+}
+
+// AddToEdgeDiv increases the weight of an edge between two nodes. If
+// there isn't such an edge one is created.
+func (n *Node) AddToEdgeDiv(to *Node, dv, v int64, residual, inline bool) {
+	if n.Out[to] != to.In[n] {
+		panic(fmt.Errorf("asymmetric edges %v %v", *n, *to))
+	}
+
+	if e := n.Out[to]; e != nil {
+		e.WeightDiv += dv
+		e.Weight += v
+		if residual {
+			e.Residual = true
+		}
+		if !inline {
+			e.Inline = false
+		}
+		return
+	}
+
+	info := &Edge{Src: n, Dest: to, WeightDiv: dv, Weight: v, Residual: residual, Inline: inline}
+	n.Out[to] = info
+	to.In[n] = info
+}
+
+// NodeInfo contains the attributes for a node.
+type NodeInfo struct {
+	Name              string
+	OrigName          string
+	Address           uint64
+	File              string
+	StartLine, Lineno int
+	Objfile           string
+}
+
+// PrintableName calls the Node's Formatter function with a single space separator.
+func (i *NodeInfo) PrintableName() string {
+	return strings.Join(i.NameComponents(), " ")
+}
+
+// NameComponents returns the components of the printable name to be used for a node.
+func (i *NodeInfo) NameComponents() []string {
+	var name []string
+	if i.Address != 0 {
+		name = append(name, fmt.Sprintf("%016x", i.Address))
+	}
+	if fun := i.Name; fun != "" {
+		name = append(name, fun)
+	}
+
+	switch {
+	case i.Lineno != 0:
+		// User requested line numbers, provide what we have.
+		name = append(name, fmt.Sprintf("%s:%d", i.File, i.Lineno))
+	case i.File != "":
+		// User requested file name, provide it.
+		name = append(name, i.File)
+	case i.Name != "":
+		// User requested function name. It was already included.
+	case i.Objfile != "":
+		// Only binary name is available
+		name = append(name, "["+filepath.Base(i.Objfile)+"]")
+	default:
+		// Do not leave it empty if there is no information at all.
+		name = append(name, "<unknown>")
+	}
+	return name
+}
+
+// NodeMap maps from a node info struct to a node. It is used to merge
+// report entries with the same info.
+type NodeMap map[NodeInfo]*Node
+
+// NodeSet is a collection of node info structs.
+type NodeSet map[NodeInfo]bool
+
+// NodePtrSet is a collection of nodes. Trimming a graph or tree requires a set
+// of objects which uniquely identify the nodes to keep. In a graph, NodeInfo
+// works as a unique identifier; however, in a tree multiple nodes may share
+// identical NodeInfos. A *Node does uniquely identify a node so we can use that
+// instead. Though a *Node also uniquely identifies a node in a graph,
+// currently, during trimming, graphs are rebuilt from scratch using only the
+// NodeSet, so there would not be the required context of the initial graph to
+// allow for the use of *Node.
+type NodePtrSet map[*Node]bool
+
+// FindOrInsertNode takes the info for a node and either returns a matching node
+// from the node map if one exists, or adds one to the map if one does not.
+// If kept is non-nil, nodes are only added if they can be located on it.
+func (nm NodeMap) FindOrInsertNode(info NodeInfo, kept NodeSet) *Node {
+	if kept != nil {
+		if _, ok := kept[info]; !ok {
+			return nil
+		}
+	}
+
+	if n, ok := nm[info]; ok {
+		return n
+	}
+
+	n := &Node{
+		Info:        info,
+		In:          make(EdgeMap),
+		Out:         make(EdgeMap),
+		LabelTags:   make(TagMap),
+		NumericTags: make(map[string]TagMap),
+	}
+	nm[info] = n
+	if info.Address == 0 && info.Lineno == 0 {
+		// This node represents the whole function, so point Function
+		// back to itself.
+		n.Function = n
+		return n
+	}
+	// Find a node that represents the whole function.
+	info.Address = 0
+	info.Lineno = 0
+	n.Function = nm.FindOrInsertNode(info, nil)
+	return n
+}
+
+// EdgeMap is used to represent the incoming/outgoing edges from a node.
+type EdgeMap map[*Node]*Edge
+
+// Edge contains any attributes to be represented about edges in a graph.
+type Edge struct {
+	Src, Dest *Node
+	// The summary weight of the edge
+	Weight, WeightDiv int64
+
+	// residual edges connect nodes that were connected through a
+	// separate node, which has been removed from the report.
+	Residual bool
+	// An inline edge represents a call that was inlined into the caller.
+	Inline bool
+}
+
+// WeightValue returns the weight value for this edge, normalizing if a
+// divisor is available.
+func (e *Edge) WeightValue() int64 {
+	if e.WeightDiv == 0 {
+		return e.Weight
+	}
+	return e.Weight / e.WeightDiv
+}
+
+// Tag represent sample annotations
+type Tag struct {
+	Name          string
+	Unit          string // Describe the value, "" for non-numeric tags
+	Value         int64
+	Flat, FlatDiv int64
+	Cum, CumDiv   int64
+}
+
+// FlatValue returns the exclusive value for this tag, computing the
+// mean if a divisor is available.
+func (t *Tag) FlatValue() int64 {
+	if t.FlatDiv == 0 {
+		return t.Flat
+	}
+	return t.Flat / t.FlatDiv
+}
+
+// CumValue returns the inclusive value for this tag, computing the
+// mean if a divisor is available.
+func (t *Tag) CumValue() int64 {
+	if t.CumDiv == 0 {
+		return t.Cum
+	}
+	return t.Cum / t.CumDiv
+}
+
+// TagMap is a collection of tags, classified by their name.
+type TagMap map[string]*Tag
+
+// SortTags sorts a slice of tags based on their weight.
+func SortTags(t []*Tag, flat bool) []*Tag {
+	ts := tags{t, flat}
+	sort.Sort(ts)
+	return ts.t
+}
+
+// New summarizes performance data from a profile into a graph.
+func New(prof *Profile, o *Options) *Graph {
+	if o.CallTree {
+		return newTree(prof, o)
+	}
+	g, _ := newGraph(prof, o)
+	return g
+}
+
+// newGraph computes a graph from a profile. It returns the graph, and
+// a map from the profile location indices to the corresponding graph
+// nodes.
+func newGraph(prof *Profile, o *Options) (*Graph, map[uint64]Nodes) {
+	nodes, locationMap := CreateNodes(prof, o)
+	seenNode := make(map[*Node]bool)
+	seenEdge := make(map[nodePair]bool)
+	for _, sample := range prof.Sample {
+		var w, dw int64
+		w = o.SampleValue(sample.Value)
+		if o.SampleMeanDivisor != nil {
+			dw = o.SampleMeanDivisor(sample.Value)
+		}
+		if dw == 0 && w == 0 {
+			continue
+		}
+		for k := range seenNode {
+			delete(seenNode, k)
+		}
+		for k := range seenEdge {
+			delete(seenEdge, k)
+		}
+		var parent *Node
+		// A residual edge goes over one or more nodes that were not kept.
+		residual := false
+
+		labels := joinLabels(sample)
+		// Group the sample frames, based on a global map.
+		for i := len(sample.Location) - 1; i >= 0; i-- {
+			l := sample.Location[i]
+			locNodes := locationMap[l.ID]
+			for ni := len(locNodes) - 1; ni >= 0; ni-- {
+				n := locNodes[ni]
+				if n == nil {
+					residual = true
+					continue
+				}
+				// Add cum weight to all nodes in stack, avoiding double counting.
+				if _, ok := seenNode[n]; !ok {
+					seenNode[n] = true
+					n.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, false)
+				}
+				// Update edge weights for all edges in stack, avoiding double counting.
+				if _, ok := seenEdge[nodePair{n, parent}]; !ok && parent != nil && n != parent {
+					seenEdge[nodePair{n, parent}] = true
+					parent.AddToEdgeDiv(n, dw, w, residual, ni != len(locNodes)-1)
+				}
+				parent = n
+				residual = false
+			}
+		}
+		if parent != nil && !residual {
+			// Add flat weight to leaf node.
+			parent.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, true)
+		}
+	}
+
+	return selectNodesForGraph(nodes, o.DropNegative), locationMap
+}
+
+func selectNodesForGraph(nodes Nodes, dropNegative bool) *Graph {
+	// Collect nodes into a graph.
+	gNodes := make(Nodes, 0, len(nodes))
+	for _, n := range nodes {
+		if n == nil {
+			continue
+		}
+		if n.Cum == 0 && n.Flat == 0 {
+			continue
+		}
+		if dropNegative && isNegative(n) {
+			continue
+		}
+		gNodes = append(gNodes, n)
+	}
+	return &Graph{gNodes}
+}
+
+type nodePair struct {
+	src, dest *Node
+}
+
+func newTree(prof *Profile, o *Options) (g *Graph) {
+	parentNodeMap := make(map[*Node]NodeMap, len(prof.Sample))
+	for _, sample := range prof.Sample {
+		var w, dw int64
+		w = o.SampleValue(sample.Value)
+		if o.SampleMeanDivisor != nil {
+			dw = o.SampleMeanDivisor(sample.Value)
+		}
+		if dw == 0 && w == 0 {
+			continue
+		}
+		var parent *Node
+		labels := joinLabels(sample)
+		// Group the sample frames, based on a per-node map.
+		for i := len(sample.Location) - 1; i >= 0; i-- {
+			l := sample.Location[i]
+			lines := l.Line
+			if len(lines) == 0 {
+				lines = []Line{{}} // Create empty line to include location info.
+			}
+			for lidx := len(lines) - 1; lidx >= 0; lidx-- {
+				nodeMap := parentNodeMap[parent]
+				if nodeMap == nil {
+					nodeMap = make(NodeMap)
+					parentNodeMap[parent] = nodeMap
+				}
+				n := nodeMap.findOrInsertLine(l, lines[lidx], o)
+				if n == nil {
+					continue
+				}
+				n.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, false)
+				if parent != nil {
+					parent.AddToEdgeDiv(n, dw, w, false, lidx != len(lines)-1)
+				}
+				parent = n
+			}
+		}
+		if parent != nil {
+			parent.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, true)
+		}
+	}
+
+	nodes := make(Nodes, len(prof.Location))
+	for _, nm := range parentNodeMap {
+		nodes = append(nodes, nm.nodes()...)
+	}
+	return selectNodesForGraph(nodes, o.DropNegative)
+}
+
+// ShortenFunctionName returns a shortened version of a function's name.
+func ShortenFunctionName(f string) string {
+	f = cppAnonymousPrefixRegExp.ReplaceAllString(f, "")
+	f = goVerRegExp.ReplaceAllString(f, `${1}${2}`)
+	for _, re := range []*regexp.Regexp{goRegExp, javaRegExp, cppRegExp} {
+		if matches := re.FindStringSubmatch(f); len(matches) >= 2 {
+			return strings.Join(matches[1:], "")
+		}
+	}
+	return f
+}
+
+// TrimTree trims a Graph in forest form, keeping only the nodes in kept. This
+// will not work correctly if even a single node has multiple parents.
+func (g *Graph) TrimTree(kept NodePtrSet) {
+	// Creates a new list of nodes
+	oldNodes := g.Nodes
+	g.Nodes = make(Nodes, 0, len(kept))
+
+	for _, cur := range oldNodes {
+		// A node may not have multiple parents
+		if len(cur.In) > 1 {
+			panic("TrimTree only works on trees")
+		}
+
+		// If a node should be kept, add it to the new list of nodes
+		if _, ok := kept[cur]; ok {
+			g.Nodes = append(g.Nodes, cur)
+			continue
+		}
+
+		// If a node has no parents, then delete all of the in edges of its
+		// children to make them each roots of their own trees.
+		if len(cur.In) == 0 {
+			for _, outEdge := range cur.Out {
+				delete(outEdge.Dest.In, cur)
+			}
+			continue
+		}
+
+		// Get the parent. This works since at this point cur.In must contain only
+		// one element.
+		if len(cur.In) != 1 {
+			panic("Get parent assertion failed. cur.In expected to be of length 1.")
+		}
+		var parent *Node
+		for _, edge := range cur.In {
+			parent = edge.Src
+		}
+
+		parentEdgeInline := parent.Out[cur].Inline
+
+		// Remove the edge from the parent to this node
+		delete(parent.Out, cur)
+
+		// Reconfigure every edge from the current node to now begin at the parent.
+		for _, outEdge := range cur.Out {
+			child := outEdge.Dest
+
+			delete(child.In, cur)
+			child.In[parent] = outEdge
+			parent.Out[child] = outEdge
+
+			outEdge.Src = parent
+			outEdge.Residual = true
+			// If the edge from the parent to the current node and the edge from the
+			// current node to the child are both inline, then this resulting residual
+			// edge should also be inline
+			outEdge.Inline = parentEdgeInline && outEdge.Inline
+		}
+	}
+	g.RemoveRedundantEdges()
+}
+
+func joinLabels(s *Sample) string {
+	if len(s.Label) == 0 {
+		return ""
+	}
+
+	var labels []string
+	for key, vals := range s.Label {
+		for _, v := range vals {
+			labels = append(labels, key+":"+v)
+		}
+	}
+	sort.Strings(labels)
+	return strings.Join(labels, `\n`)
+}
+
+// isNegative returns true if the node is considered as "negative" for the
+// purposes of drop_negative.
+func isNegative(n *Node) bool {
+	switch {
+	case n.Flat < 0:
+		return true
+	case n.Flat == 0 && n.Cum < 0:
+		return true
+	default:
+		return false
+	}
+}
+
+// CreateNodes creates graph nodes for all locations in a profile. It
+// returns set of all nodes, plus a mapping of each location to the
+// set of corresponding nodes (one per location.Line).
+func CreateNodes(prof *Profile, o *Options) (Nodes, map[uint64]Nodes) {
+	locations := make(map[uint64]Nodes, len(prof.Location))
+	nm := make(NodeMap, len(prof.Location))
+	for _, l := range prof.Location {
+		lines := l.Line
+		if len(lines) == 0 {
+			lines = []Line{{}} // Create empty line to include location info.
+		}
+		nodes := make(Nodes, len(lines))
+		for ln := range lines {
+			nodes[ln] = nm.findOrInsertLine(l, lines[ln], o)
+		}
+		locations[l.ID] = nodes
+	}
+	return nm.nodes(), locations
+}
+
+func (nm NodeMap) nodes() Nodes {
+	nodes := make(Nodes, 0, len(nm))
+	for _, n := range nm {
+		nodes = append(nodes, n)
+	}
+	return nodes
+}
+
+func (nm NodeMap) findOrInsertLine(l *Location, li Line, o *Options) *Node {
+	var objfile string
+	if m := l.Mapping; m != nil && m.File != "" {
+		objfile = m.File
+	}
+
+	if ni := nodeInfo(l, li, objfile, o); ni != nil {
+		return nm.FindOrInsertNode(*ni, o.KeptNodes)
+	}
+	return nil
+}
+
+func nodeInfo(l *Location, line Line, objfile string, o *Options) *NodeInfo {
+	if line.Function == nil {
+		return &NodeInfo{Address: l.Address, Objfile: objfile}
+	}
+	ni := &NodeInfo{
+		Address: l.Address,
+		Lineno:  int(line.Line),
+		Name:    line.Function.Name,
+	}
+	if fname := line.Function.Filename; fname != "" {
+		ni.File = filepath.Clean(fname)
+	}
+	if o.OrigFnNames {
+		ni.OrigName = line.Function.SystemName
+	}
+	if o.ObjNames || (ni.Name == "" && ni.OrigName == "") {
+		ni.Objfile = objfile
+		ni.StartLine = int(line.Function.StartLine)
+	}
+	return ni
+}
+
+type tags struct {
+	t    []*Tag
+	flat bool
+}
+
+func (t tags) Len() int      { return len(t.t) }
+func (t tags) Swap(i, j int) { t.t[i], t.t[j] = t.t[j], t.t[i] }
+func (t tags) Less(i, j int) bool {
+	if !t.flat {
+		if t.t[i].Cum != t.t[j].Cum {
+			return abs64(t.t[i].Cum) > abs64(t.t[j].Cum)
+		}
+	}
+	if t.t[i].Flat != t.t[j].Flat {
+		return abs64(t.t[i].Flat) > abs64(t.t[j].Flat)
+	}
+	return t.t[i].Name < t.t[j].Name
+}
+
+// Sum adds the flat and cum values of a set of nodes.
+func (ns Nodes) Sum() (flat int64, cum int64) {
+	for _, n := range ns {
+		flat += n.Flat
+		cum += n.Cum
+	}
+	return
+}
+
+func (n *Node) addSample(dw, w int64, labels string, numLabel map[string][]int64, numUnit map[string][]string, format func(int64, string) string, flat bool) {
+	// Update sample value
+	if flat {
+		n.FlatDiv += dw
+		n.Flat += w
+	} else {
+		n.CumDiv += dw
+		n.Cum += w
+	}
+
+	// Add string tags
+	if labels != "" {
+		t := n.LabelTags.findOrAddTag(labels, "", 0)
+		if flat {
+			t.FlatDiv += dw
+			t.Flat += w
+		} else {
+			t.CumDiv += dw
+			t.Cum += w
+		}
+	}
+
+	numericTags := n.NumericTags[labels]
+	if numericTags == nil {
+		numericTags = TagMap{}
+		n.NumericTags[labels] = numericTags
+	}
+	// Add numeric tags
+	if format == nil {
+		format = defaultLabelFormat
+	}
+	for k, nvals := range numLabel {
+		units := numUnit[k]
+		for i, v := range nvals {
+			var t *Tag
+			if len(units) > 0 {
+				t = numericTags.findOrAddTag(format(v, units[i]), units[i], v)
+			} else {
+				t = numericTags.findOrAddTag(format(v, k), k, v)
+			}
+			if flat {
+				t.FlatDiv += dw
+				t.Flat += w
+			} else {
+				t.CumDiv += dw
+				t.Cum += w
+			}
+		}
+	}
+}
+
+func defaultLabelFormat(v int64, key string) string {
+	return strconv.FormatInt(v, 10)
+}
+
+func (m TagMap) findOrAddTag(label, unit string, value int64) *Tag {
+	l := m[label]
+	if l == nil {
+		l = &Tag{
+			Name:  label,
+			Unit:  unit,
+			Value: value,
+		}
+		m[label] = l
+	}
+	return l
+}
+
+// String returns a text representation of a graph, for debugging purposes.
+func (g *Graph) String() string {
+	var s []string
+
+	nodeIndex := make(map[*Node]int, len(g.Nodes))
+
+	for i, n := range g.Nodes {
+		nodeIndex[n] = i + 1
+	}
+
+	for i, n := range g.Nodes {
+		name := n.Info.PrintableName()
+		var in, out []int
+
+		for _, from := range n.In {
+			in = append(in, nodeIndex[from.Src])
+		}
+		for _, to := range n.Out {
+			out = append(out, nodeIndex[to.Dest])
+		}
+		s = append(s, fmt.Sprintf("%d: %s[flat=%d cum=%d] %x -> %v ", i+1, name, n.Flat, n.Cum, in, out))
+	}
+	return strings.Join(s, "\n")
+}
+
+// DiscardLowFrequencyNodes returns a set of the nodes at or over a
+// specific cum value cutoff.
+func (g *Graph) DiscardLowFrequencyNodes(nodeCutoff int64) NodeSet {
+	return makeNodeSet(g.Nodes, nodeCutoff)
+}
+
+// DiscardLowFrequencyNodePtrs returns a NodePtrSet of nodes at or over a
+// specific cum value cutoff.
+func (g *Graph) DiscardLowFrequencyNodePtrs(nodeCutoff int64) NodePtrSet {
+	cutNodes := getNodesAboveCumCutoff(g.Nodes, nodeCutoff)
+	kept := make(NodePtrSet, len(cutNodes))
+	for _, n := range cutNodes {
+		kept[n] = true
+	}
+	return kept
+}
+
+func makeNodeSet(nodes Nodes, nodeCutoff int64) NodeSet {
+	cutNodes := getNodesAboveCumCutoff(nodes, nodeCutoff)
+	kept := make(NodeSet, len(cutNodes))
+	for _, n := range cutNodes {
+		kept[n.Info] = true
+	}
+	return kept
+}
+
+// getNodesAboveCumCutoff returns all the nodes which have a Cum value greater
+// than or equal to cutoff.
+func getNodesAboveCumCutoff(nodes Nodes, nodeCutoff int64) Nodes {
+	cutoffNodes := make(Nodes, 0, len(nodes))
+	for _, n := range nodes {
+		if abs64(n.Cum) < nodeCutoff {
+			continue
+		}
+		cutoffNodes = append(cutoffNodes, n)
+	}
+	return cutoffNodes
+}
+
+// TrimLowFrequencyTags removes tags that have less than
+// the specified weight.
+func (g *Graph) TrimLowFrequencyTags(tagCutoff int64) {
+	// Remove nodes with value <= total*nodeFraction
+	for _, n := range g.Nodes {
+		n.LabelTags = trimLowFreqTags(n.LabelTags, tagCutoff)
+		for s, nt := range n.NumericTags {
+			n.NumericTags[s] = trimLowFreqTags(nt, tagCutoff)
+		}
+	}
+}
+
+func trimLowFreqTags(tags TagMap, minValue int64) TagMap {
+	kept := TagMap{}
+	for s, t := range tags {
+		if abs64(t.Flat) >= minValue || abs64(t.Cum) >= minValue {
+			kept[s] = t
+		}
+	}
+	return kept
+}
+
+// TrimLowFrequencyEdges removes edges that have less than
+// the specified weight. Returns the number of edges removed
+func (g *Graph) TrimLowFrequencyEdges(edgeCutoff int64) int {
+	var droppedEdges int
+	for _, n := range g.Nodes {
+		for src, e := range n.In {
+			if abs64(e.Weight) < edgeCutoff {
+				delete(n.In, src)
+				delete(src.Out, n)
+				droppedEdges++
+			}
+		}
+	}
+	return droppedEdges
+}
+
+// SortNodes sorts the nodes in a graph based on a specific heuristic.
+func (g *Graph) SortNodes(cum bool, visualMode bool) {
+	// Sort nodes based on requested mode
+	switch {
+	case visualMode:
+		// Specialized sort to produce a more visually-interesting graph
+		g.Nodes.Sort(EntropyOrder)
+	case cum:
+		g.Nodes.Sort(CumNameOrder)
+	default:
+		g.Nodes.Sort(FlatNameOrder)
+	}
+}
+
+// SelectTopNodePtrs returns a set of the top maxNodes *Node in a graph.
+func (g *Graph) SelectTopNodePtrs(maxNodes int, visualMode bool) NodePtrSet {
+	set := make(NodePtrSet)
+	for _, node := range g.selectTopNodes(maxNodes, visualMode) {
+		set[node] = true
+	}
+	return set
+}
+
+// SelectTopNodes returns a set of the top maxNodes nodes in a graph.
+func (g *Graph) SelectTopNodes(maxNodes int, visualMode bool) NodeSet {
+	return makeNodeSet(g.selectTopNodes(maxNodes, visualMode), 0)
+}
+
+// selectTopNodes returns a slice of the top maxNodes nodes in a graph.
+func (g *Graph) selectTopNodes(maxNodes int, visualMode bool) Nodes {
+	if maxNodes > 0 {
+		if visualMode {
+			var count int
+			// If generating a visual graph, count tags as nodes. Update
+			// maxNodes to account for them.
+			for i, n := range g.Nodes {
+				tags := countTags(n)
+				if tags > maxNodelets {
+					tags = maxNodelets
+				}
+				if count += tags + 1; count >= maxNodes {
+					maxNodes = i + 1
+					break
+				}
+			}
+		}
+	}
+	if maxNodes > len(g.Nodes) {
+		maxNodes = len(g.Nodes)
+	}
+	return g.Nodes[:maxNodes]
+}
+
+// countTags counts the tags with flat count. This underestimates the
+// number of tags being displayed, but in practice is close enough.
+func countTags(n *Node) int {
+	count := 0
+	for _, e := range n.LabelTags {
+		if e.Flat != 0 {
+			count++
+		}
+	}
+	for _, t := range n.NumericTags {
+		for _, e := range t {
+			if e.Flat != 0 {
+				count++
+			}
+		}
+	}
+	return count
+}
+
+// RemoveRedundantEdges removes residual edges if the destination can
+// be reached through another path. This is done to simplify the graph
+// while preserving connectivity.
+func (g *Graph) RemoveRedundantEdges() {
+	// Walk the nodes and outgoing edges in reverse order to prefer
+	// removing edges with the lowest weight.
+	for i := len(g.Nodes); i > 0; i-- {
+		n := g.Nodes[i-1]
+		in := n.In.Sort()
+		for j := len(in); j > 0; j-- {
+			e := in[j-1]
+			if !e.Residual {
+				// Do not remove edges heavier than a non-residual edge, to
+				// avoid potential confusion.
+				break
+			}
+			if isRedundantEdge(e) {
+				delete(e.Src.Out, e.Dest)
+				delete(e.Dest.In, e.Src)
+			}
+		}
+	}
+}
+
+// isRedundantEdge determines if there is a path that allows e.Src
+// to reach e.Dest after removing e.
+func isRedundantEdge(e *Edge) bool {
+	src, n := e.Src, e.Dest
+	seen := map[*Node]bool{n: true}
+	queue := Nodes{n}
+	for len(queue) > 0 {
+		n := queue[0]
+		queue = queue[1:]
+		for _, ie := range n.In {
+			if e == ie || seen[ie.Src] {
+				continue
+			}
+			if ie.Src == src {
+				return true
+			}
+			seen[ie.Src] = true
+			queue = append(queue, ie.Src)
+		}
+	}
+	return false
+}
+
+// nodeSorter is a mechanism used to allow a report to be sorted
+// in different ways.
+type nodeSorter struct {
+	rs   Nodes
+	less func(l, r *Node) bool
+}
+
+func (s nodeSorter) Len() int           { return len(s.rs) }
+func (s nodeSorter) Swap(i, j int)      { s.rs[i], s.rs[j] = s.rs[j], s.rs[i] }
+func (s nodeSorter) Less(i, j int) bool { return s.less(s.rs[i], s.rs[j]) }
+
+// Sort reorders a slice of nodes based on the specified ordering
+// criteria. The result is sorted in decreasing order for (absolute)
+// numeric quantities, alphabetically for text, and increasing for
+// addresses.
+func (ns Nodes) Sort(o NodeOrder) error {
+	var s nodeSorter
+
+	switch o {
+	case FlatNameOrder:
+		s = nodeSorter{ns,
+			func(l, r *Node) bool {
+				if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv {
+					return iv > jv
+				}
+				if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv {
+					return iv < jv
+				}
+				if iv, jv := abs64(l.Cum), abs64(r.Cum); iv != jv {
+					return iv > jv
+				}
+				return compareNodes(l, r)
+			},
+		}
+	case FlatCumNameOrder:
+		s = nodeSorter{ns,
+			func(l, r *Node) bool {
+				if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv {
+					return iv > jv
+				}
+				if iv, jv := abs64(l.Cum), abs64(r.Cum); iv != jv {
+					return iv > jv
+				}
+				if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv {
+					return iv < jv
+				}
+				return compareNodes(l, r)
+			},
+		}
+	case NameOrder:
+		s = nodeSorter{ns,
+			func(l, r *Node) bool {
+				if iv, jv := l.Info.Name, r.Info.Name; iv != jv {
+					return iv < jv
+				}
+				return compareNodes(l, r)
+			},
+		}
+	case FileOrder:
+		s = nodeSorter{ns,
+			func(l, r *Node) bool {
+				if iv, jv := l.Info.File, r.Info.File; iv != jv {
+					return iv < jv
+				}
+				if iv, jv := l.Info.StartLine, r.Info.StartLine; iv != jv {
+					return iv < jv
+				}
+				return compareNodes(l, r)
+			},
+		}
+	case AddressOrder:
+		s = nodeSorter{ns,
+			func(l, r *Node) bool {
+				if iv, jv := l.Info.Address, r.Info.Address; iv != jv {
+					return iv < jv
+				}
+				return compareNodes(l, r)
+			},
+		}
+	case CumNameOrder, EntropyOrder:
+		// Hold scoring for score-based ordering
+		var score map[*Node]int64
+		scoreOrder := func(l, r *Node) bool {
+			if iv, jv := abs64(score[l]), abs64(score[r]); iv != jv {
+				return iv > jv
+			}
+			if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv {
+				return iv < jv
+			}
+			if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv {
+				return iv > jv
+			}
+			return compareNodes(l, r)
+		}
+
+		switch o {
+		case CumNameOrder:
+			score = make(map[*Node]int64, len(ns))
+			for _, n := range ns {
+				score[n] = n.Cum
+			}
+			s = nodeSorter{ns, scoreOrder}
+		case EntropyOrder:
+			score = make(map[*Node]int64, len(ns))
+			for _, n := range ns {
+				score[n] = entropyScore(n)
+			}
+			s = nodeSorter{ns, scoreOrder}
+		}
+	default:
+		return fmt.Errorf("report: unrecognized sort ordering: %d", o)
+	}
+	sort.Sort(s)
+	return nil
+}
+
+// compareNodes compares two nodes to provide a deterministic ordering
+// between them. Two nodes cannot have the same Node.Info value.
+func compareNodes(l, r *Node) bool {
+	return fmt.Sprint(l.Info) < fmt.Sprint(r.Info)
+}
+
+// entropyScore computes a score for a node representing how important
+// it is to include this node on a graph visualization. It is used to
+// sort the nodes and select which ones to display if we have more
+// nodes than desired in the graph. This number is computed by looking
+// at the flat and cum weights of the node and the incoming/outgoing
+// edges. The fundamental idea is to penalize nodes that have a simple
+// fallthrough from their incoming to the outgoing edge.
+func entropyScore(n *Node) int64 {
+	score := float64(0)
+
+	if len(n.In) == 0 {
+		score++ // Favor entry nodes
+	} else {
+		score += edgeEntropyScore(n, n.In, 0)
+	}
+
+	if len(n.Out) == 0 {
+		score++ // Favor leaf nodes
+	} else {
+		score += edgeEntropyScore(n, n.Out, n.Flat)
+	}
+
+	return int64(score*float64(n.Cum)) + n.Flat
+}
+
+// edgeEntropyScore computes the entropy value for a set of edges
+// coming in or out of a node. Entropy (as defined in information
+// theory) refers to the amount of information encoded by the set of
+// edges. A set of edges that have a more interesting distribution of
+// samples gets a higher score.
+func edgeEntropyScore(n *Node, edges EdgeMap, self int64) float64 {
+	score := float64(0)
+	total := self
+	for _, e := range edges {
+		if e.Weight > 0 {
+			total += abs64(e.Weight)
+		}
+	}
+	if total != 0 {
+		for _, e := range edges {
+			frac := float64(abs64(e.Weight)) / float64(total)
+			score += -frac * math.Log2(frac)
+		}
+		if self > 0 {
+			frac := float64(abs64(self)) / float64(total)
+			score += -frac * math.Log2(frac)
+		}
+	}
+	return score
+}
+
+// NodeOrder sets the ordering for a Sort operation
+type NodeOrder int
+
+// Sorting options for node sort.
+const (
+	FlatNameOrder NodeOrder = iota
+	FlatCumNameOrder
+	CumNameOrder
+	NameOrder
+	FileOrder
+	AddressOrder
+	EntropyOrder
+)
+
+// Sort returns a slice of the edges in the map, in a consistent
+// order. The sort order is first based on the edge weight
+// (higher-to-lower) and then by the node names to avoid flakiness.
+func (e EdgeMap) Sort() []*Edge {
+	el := make(edgeList, 0, len(e))
+	for _, w := range e {
+		el = append(el, w)
+	}
+
+	sort.Sort(el)
+	return el
+}
+
+// Sum returns the total weight for a set of nodes.
+func (e EdgeMap) Sum() int64 {
+	var ret int64
+	for _, edge := range e {
+		ret += edge.Weight
+	}
+	return ret
+}
+
+type edgeList []*Edge
+
+func (el edgeList) Len() int {
+	return len(el)
+}
+
+func (el edgeList) Less(i, j int) bool {
+	if el[i].Weight != el[j].Weight {
+		return abs64(el[i].Weight) > abs64(el[j].Weight)
+	}
+
+	from1 := el[i].Src.Info.PrintableName()
+	from2 := el[j].Src.Info.PrintableName()
+	if from1 != from2 {
+		return from1 < from2
+	}
+
+	to1 := el[i].Dest.Info.PrintableName()
+	to2 := el[j].Dest.Info.PrintableName()
+
+	return to1 < to2
+}
+
+func (el edgeList) Swap(i, j int) {
+	el[i], el[j] = el[j], el[i]
+}
+
+func abs64(i int64) int64 {
+	if i < 0 {
+		return -i
+	}
+	return i
+}
diff --git a/src/internal/profile/legacy_java_profile.go b/src/internal/profile/legacy_java_profile.go
new file mode 100644
index 0000000..4f9f2a7
--- /dev/null
+++ b/src/internal/profile/legacy_java_profile.go
@@ -0,0 +1,317 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file implements parsers to convert java legacy profiles into
+// the profile.proto format.
+
+// Original file location: https://github.com/google/pprof/blob/main/profile/legacy_java_profile.go
+
+package profile
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"path/filepath"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+var (
+	attributeRx            = regexp.MustCompile(`([\w ]+)=([\w ]+)`)
+	javaSampleRx           = regexp.MustCompile(` *(\d+) +(\d+) +@ +([ x0-9a-f]*)`)
+	javaLocationRx         = regexp.MustCompile(`^\s*0x([[:xdigit:]]+)\s+(.*)\s*$`)
+	javaLocationFileLineRx = regexp.MustCompile(`^(.*)\s+\((.+):(-?[[:digit:]]+)\)$`)
+	javaLocationPathRx     = regexp.MustCompile(`^(.*)\s+\((.*)\)$`)
+)
+
+// javaCPUProfile returns a new Profile from profilez data.
+// b is the profile bytes after the header, period is the profiling
+// period, and parse is a function to parse 8-byte chunks from the
+// profile in its native endianness.
+func javaCPUProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
+	p := &Profile{
+		Period:     period * 1000,
+		PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
+		SampleType: []*ValueType{{Type: "samples", Unit: "count"}, {Type: "cpu", Unit: "nanoseconds"}},
+	}
+	var err error
+	var locs map[uint64]*Location
+	if b, locs, err = parseCPUSamples(b, parse, false, p); err != nil {
+		return nil, err
+	}
+
+	if err = parseJavaLocations(b, locs, p); err != nil {
+		return nil, err
+	}
+
+	// Strip out addresses for better merge.
+	if err = p.Aggregate(true, true, true, true, false); err != nil {
+		return nil, err
+	}
+
+	return p, nil
+}
+
+// parseJavaProfile returns a new profile from heapz or contentionz
+// data. b is the profile bytes after the header.
+func parseJavaProfile(b []byte) (*Profile, error) {
+	h := bytes.SplitAfterN(b, []byte("\n"), 2)
+	if len(h) < 2 {
+		return nil, errUnrecognized
+	}
+
+	p := &Profile{
+		PeriodType: &ValueType{},
+	}
+	header := string(bytes.TrimSpace(h[0]))
+
+	var err error
+	var pType string
+	switch header {
+	case "--- heapz 1 ---":
+		pType = "heap"
+	case "--- contentionz 1 ---":
+		pType = "contention"
+	default:
+		return nil, errUnrecognized
+	}
+
+	if b, err = parseJavaHeader(pType, h[1], p); err != nil {
+		return nil, err
+	}
+	var locs map[uint64]*Location
+	if b, locs, err = parseJavaSamples(pType, b, p); err != nil {
+		return nil, err
+	}
+	if err = parseJavaLocations(b, locs, p); err != nil {
+		return nil, err
+	}
+
+	// Strip out addresses for better merge.
+	if err = p.Aggregate(true, true, true, true, false); err != nil {
+		return nil, err
+	}
+
+	return p, nil
+}
+
+// parseJavaHeader parses the attribute section on a java profile and
+// populates a profile. Returns the remainder of the buffer after all
+// attributes.
+func parseJavaHeader(pType string, b []byte, p *Profile) ([]byte, error) {
+	nextNewLine := bytes.IndexByte(b, byte('\n'))
+	for nextNewLine != -1 {
+		line := string(bytes.TrimSpace(b[0:nextNewLine]))
+		if line != "" {
+			h := attributeRx.FindStringSubmatch(line)
+			if h == nil {
+				// Not a valid attribute, exit.
+				return b, nil
+			}
+
+			attribute, value := strings.TrimSpace(h[1]), strings.TrimSpace(h[2])
+			var err error
+			switch pType + "/" + attribute {
+			case "heap/format", "cpu/format", "contention/format":
+				if value != "java" {
+					return nil, errUnrecognized
+				}
+			case "heap/resolution":
+				p.SampleType = []*ValueType{
+					{Type: "inuse_objects", Unit: "count"},
+					{Type: "inuse_space", Unit: value},
+				}
+			case "contention/resolution":
+				p.SampleType = []*ValueType{
+					{Type: "contentions", Unit: "count"},
+					{Type: "delay", Unit: value},
+				}
+			case "contention/sampling period":
+				p.PeriodType = &ValueType{
+					Type: "contentions", Unit: "count",
+				}
+				if p.Period, err = strconv.ParseInt(value, 0, 64); err != nil {
+					return nil, fmt.Errorf("failed to parse attribute %s: %v", line, err)
+				}
+			case "contention/ms since reset":
+				millis, err := strconv.ParseInt(value, 0, 64)
+				if err != nil {
+					return nil, fmt.Errorf("failed to parse attribute %s: %v", line, err)
+				}
+				p.DurationNanos = millis * 1000 * 1000
+			default:
+				return nil, errUnrecognized
+			}
+		}
+		// Grab next line.
+		b = b[nextNewLine+1:]
+		nextNewLine = bytes.IndexByte(b, byte('\n'))
+	}
+	return b, nil
+}
+
+// parseJavaSamples parses the samples from a java profile and
+// populates the Samples in a profile. Returns the remainder of the
+// buffer after the samples.
+func parseJavaSamples(pType string, b []byte, p *Profile) ([]byte, map[uint64]*Location, error) {
+	nextNewLine := bytes.IndexByte(b, byte('\n'))
+	locs := make(map[uint64]*Location)
+	for nextNewLine != -1 {
+		line := string(bytes.TrimSpace(b[0:nextNewLine]))
+		if line != "" {
+			sample := javaSampleRx.FindStringSubmatch(line)
+			if sample == nil {
+				// Not a valid sample, exit.
+				return b, locs, nil
+			}
+
+			// Java profiles have data/fields inverted compared to other
+			// profile types.
+			var err error
+			value1, value2, value3 := sample[2], sample[1], sample[3]
+			addrs := parseHexAddresses(value3)
+			if err != nil {
+				return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
+			}
+
+			var sloc []*Location
+			for _, addr := range addrs {
+				loc := locs[addr]
+				if locs[addr] == nil {
+					loc = &Location{
+						Address: addr,
+					}
+					p.Location = append(p.Location, loc)
+					locs[addr] = loc
+				}
+				sloc = append(sloc, loc)
+			}
+			s := &Sample{
+				Value:    make([]int64, 2),
+				Location: sloc,
+			}
+
+			if s.Value[0], err = strconv.ParseInt(value1, 0, 64); err != nil {
+				return nil, nil, fmt.Errorf("parsing sample %s: %v", line, err)
+			}
+			if s.Value[1], err = strconv.ParseInt(value2, 0, 64); err != nil {
+				return nil, nil, fmt.Errorf("parsing sample %s: %v", line, err)
+			}
+
+			switch pType {
+			case "heap":
+				const javaHeapzSamplingRate = 524288 // 512K
+				if s.Value[0] == 0 {
+					return nil, nil, fmt.Errorf("parsing sample %s: second value must be non-zero", line)
+				}
+				s.NumLabel = map[string][]int64{"bytes": {s.Value[1] / s.Value[0]}}
+				s.Value[0], s.Value[1] = scaleHeapSample(s.Value[0], s.Value[1], javaHeapzSamplingRate)
+			case "contention":
+				if period := p.Period; period != 0 {
+					s.Value[0] = s.Value[0] * p.Period
+					s.Value[1] = s.Value[1] * p.Period
+				}
+			}
+			p.Sample = append(p.Sample, s)
+		}
+		// Grab next line.
+		b = b[nextNewLine+1:]
+		nextNewLine = bytes.IndexByte(b, byte('\n'))
+	}
+	return b, locs, nil
+}
+
+// parseJavaLocations parses the location information in a java
+// profile and populates the Locations in a profile. It uses the
+// location addresses from the profile as both the ID of each
+// location.
+func parseJavaLocations(b []byte, locs map[uint64]*Location, p *Profile) error {
+	r := bytes.NewBuffer(b)
+	fns := make(map[string]*Function)
+	for {
+		line, err := r.ReadString('\n')
+		if err != nil {
+			if err != io.EOF {
+				return err
+			}
+			if line == "" {
+				break
+			}
+		}
+
+		if line = strings.TrimSpace(line); line == "" {
+			continue
+		}
+
+		jloc := javaLocationRx.FindStringSubmatch(line)
+		if len(jloc) != 3 {
+			continue
+		}
+		addr, err := strconv.ParseUint(jloc[1], 16, 64)
+		if err != nil {
+			return fmt.Errorf("parsing sample %s: %v", line, err)
+		}
+		loc := locs[addr]
+		if loc == nil {
+			// Unused/unseen
+			continue
+		}
+		var lineFunc, lineFile string
+		var lineNo int64
+
+		if fileLine := javaLocationFileLineRx.FindStringSubmatch(jloc[2]); len(fileLine) == 4 {
+			// Found a line of the form: "function (file:line)"
+			lineFunc, lineFile = fileLine[1], fileLine[2]
+			if n, err := strconv.ParseInt(fileLine[3], 10, 64); err == nil && n > 0 {
+				lineNo = n
+			}
+		} else if filePath := javaLocationPathRx.FindStringSubmatch(jloc[2]); len(filePath) == 3 {
+			// If there's not a file:line, it's a shared library path.
+			// The path isn't interesting, so just give the .so.
+			lineFunc, lineFile = filePath[1], filepath.Base(filePath[2])
+		} else if strings.Contains(jloc[2], "generated stub/JIT") {
+			lineFunc = "STUB"
+		} else {
+			// Treat whole line as the function name. This is used by the
+			// java agent for internal states such as "GC" or "VM".
+			lineFunc = jloc[2]
+		}
+		fn := fns[lineFunc]
+
+		if fn == nil {
+			fn = &Function{
+				Name:       lineFunc,
+				SystemName: lineFunc,
+				Filename:   lineFile,
+			}
+			fns[lineFunc] = fn
+			p.Function = append(p.Function, fn)
+		}
+		loc.Line = []Line{
+			{
+				Function: fn,
+				Line:     lineNo,
+			},
+		}
+		loc.Address = 0
+	}
+
+	p.remapLocationIDs()
+	p.remapFunctionIDs()
+	p.remapMappingIDs()
+
+	return nil
+}
diff --git a/src/pgo/inline/inline_hot.go b/src/pgo/inline/inline_hot.go
new file mode 100644
index 0000000..d2e0bf7
--- /dev/null
+++ b/src/pgo/inline/inline_hot.go
@@ -0,0 +1,81 @@
+package main
+
+import (
+	"time"
+)
+
+type BS struct {
+	length uint
+	s      []uint64
+}
+
+const wSize = uint(64)
+const lWSize = uint(6)
+
+func D(i uint) int {
+	return int((i + (wSize - 1)) >> lWSize)
+}
+
+func N(length uint) (bs *BS) {
+	bs = &BS{
+		length,
+		make([]uint64, D(length)),
+	}
+
+	return bs
+}
+
+func (b *BS) S(i uint) *BS {
+	b.s[i>>lWSize] |= 1 << (i & (wSize - 1))
+	return b
+}
+
+var jn = [...]byte{
+	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
+	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
+	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
+	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
+}
+
+func T(v uint64) uint {
+	return uint(jn[((v&-v)*0x03f79d71b4ca8b09)>>58])
+}
+
+func (b *BS) NS(i uint) (uint, bool) {
+	x := int(i >> lWSize)
+	if x >= len(b.s) {
+		return 0, false
+	}
+	w := b.s[x]
+	w = w >> (i & (wSize - 1))
+	if w != 0 {
+		return i + T(w), true
+	}
+	x = x + 1
+	for x < len(b.s) {
+		if b.s[x] != 0 {
+			return uint(x)*wSize + T(b.s[x]), true
+		}
+		x = x + 1
+
+	}
+	return 0, false
+}
+
+func A() {
+	s := N(100000)
+	for i := 0; i < 1000; i += 30 {
+		s.S(uint(i))
+	}
+	for j := 0; j < 1000; j++ {
+		c := uint(0)
+		for i, e := s.NS(0); e; i, e = s.NS(i + 1) {
+			c++
+		}
+	}
+}
+
+func main() {
+	time.Sleep(time.Second)
+	A()
+}
diff --git a/src/pgo/inline/inline_hot_test.go b/src/pgo/inline/inline_hot_test.go
new file mode 100644
index 0000000..f906ee7
--- /dev/null
+++ b/src/pgo/inline/inline_hot_test.go
@@ -0,0 +1,42 @@
+package main
+
+import "testing"
+
+func BenchmarkA(b *testing.B) {
+	benchmarkB(b)
+}
+func benchmarkB(b *testing.B) {
+
+	for i := 0; true; {
+		A()
+		i = i + 1
+		if i >= b.N {
+			break
+		}
+		A()
+		i = i + 1
+		if i >= b.N {
+			break
+		}
+		A()
+		i = i + 1
+		if i >= b.N {
+			break
+		}
+		A()
+		i = i + 1
+		if i >= b.N {
+			break
+		}
+		A()
+		i = i + 1
+		if i >= b.N {
+			break
+		}
+		A()
+		i = i + 1
+		if i >= b.N {
+			break
+		}
+	}
+}

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Cherry Mui (Gerrit)

unread,

Sep 12, 2022, 4:29:29 PM9/12/22

to Raj Barik, goph...@pubsubhelper.golang.org, Austin Clements, Michael Pratt, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.

9 comments:

Patchset:
- Patch Set #1:
  Thanks for sharing the CL!
  Haven't read the detail. Just some high-level comments. Thanks.
File api/go1.19.txt:
- Patch Set #1, Line 247:
```
pkg pgo/inline, func A() #43724
```

- ```
pkg pgo/inline, func D(uint) int #43724
```

- ```
pkg pgo/inline, func N(uint) *BS #43724
```

pkg pgo/inline, func T(uint64) uint #43724

- ```
pkg pgo/inline, type BS struct #43724
```

pkg pgo/inline, method (*BS) NS(uint) (uint, bool) #43724

pkg pgo/inline, method (*BS) S(uint) *BS #43724

- Undo. Let's not add this. (See the comment at pgo/inline/inline_hot.go)
File src/cmd/compile/internal/base/flag.go:
- Patch Set #1, Line 124: ProfileUse
  Maybe PGOProfile (-pgoprofile for the flag)? Not sure.
- Patch Set #1, Line 125: methods
  Maybe say functions instead of methods? I think it applies to non-method functions.
  "callsites" -> "call sites"
File src/cmd/compile/internal/gc/main.go:
- Patch Set #1, Line 241:
```
&profile.Options{
				CallTree:    false,
```


				SampleValue: func(v []int64) int64 { return v[1] },
			}

- It is unclear to me what the options mean here, especially the function for SampleValues. Can we make Options completely internal to the cmd/compile/internal/pgo package? (see also the comment at internal/profile/graph.go)
File src/cmd/compile/internal/pgo/irgraph.go:
- Patch Set #1:
  Add copyright header. Thanks.
File src/internal/profile/graph.go:
- Patch Set #1:
  I think this can live in the cmd/compile/internal/pgo package (unless/until we have a use case outside of the compiler). Also, it may be better to have a re-implementation instead of a copy from pprof. Then we can have APIs that is more suitable for us. For example, do we need the "Options" type? Unless the compiler needs to build multiple graphs with different options, I think it can be completely internal. Also unused options can be removed.
File src/internal/profile/legacy_java_profile.go:
- Patch Set #1:
  I don't think we need to support Java profiles. Could we not add this?
File src/pgo/inline/inline_hot.go:
- Patch Set #1:
  This is just an example, right? I don't think we need to add this package to the source tree.

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Sep 13, 2022, 7:05:30 PM9/13/22

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.

Raj Barik uploaded patch set #2 to this change.

cmd/compile: Enables PGO in Go and performs profile-guided inlining

Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---

M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go

A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
9 files changed, 2,124 insertions(+), 7 deletions(-)

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Sep 13, 2022, 7:13:43 PM9/13/22

to goph...@pubsubhelper.golang.org, Austin Clements, Cherry Mui, Michael Pratt, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Cherry Mui, Michael Pratt.

4 comments:

File src/cmd/compile/internal/base/flag.go:

- Patch Set #1, Line 124: ProfileUse
  Maybe PGOProfile (-pgoprofile for the flag)? Not sure.

- Done

- Patch Set #1, Line 125: methods

- Maybe say functions instead of methods? I think it applies to non-method functions. […]
  Done
File src/cmd/compile/internal/gc/main.go:

Patch Set #1, Line 241:

&profile.Options{
				CallTree:    false,
				SampleValue: func(v []int64) int64 { return v[1] },
			}

- It is unclear to me what the options mean here, especially the function for SampleValues. […]
  Done
File src/cmd/compile/internal/pgo/irgraph.go:

- Patch Set #1:
  Add copyright header. Thanks.

- Done

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Cherry Mui (Gerrit)

unread,

Sep 22, 2022, 5:40:32 PM9/22/22

to Raj Barik, goph...@pubsubhelper.golang.org, Matthew Dempsky, Austin Clements, Michael Pratt, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.

33 comments:

File src/cmd/compile/internal/base/flag.go:

Patch Set #3, Line 125:

	InlineHotThreshold string       "help:\"threshold percentage for determining hot functions and call sites for inlining\""
	InlineHotBudget    int          "help:\"inline budget for hot functions\""

As we discussed, maybe make them debug flags under -d, see cmd/compile/internal/base/debug.go

File src/cmd/compile/internal/gc/main.go:
- Patch Set #3, Line 239: if pgo.PProfGraph == nil {
  The check doesn't seem necessary.
File src/cmd/compile/internal/inline/inl.go:
- Patch Set #3, Line 48: // Budget increased due to hotness.
  This comment should be on the line above? Maybe say this is the non-PGO inline budget.
- Patch Set #3, Line 61: // Per-caller data structure to track the list of hot call sites. This gets rewritten every caller leaving it to GC for cleanup.
  Mutable global states make the code hard to reasoning. Can we make them local? Or just using the single global state from the pgo package?
- Patch Set #3, Line 74: 160
  160 doesn't seem large. If a function is hot, I think we can increase the budget more than that. Maybe 1000?
- Patch Set #3, Line 121: // InlineEpilogue updates IRGraph after inlining.
  At least for now, I think nothing will need the IRGraph after inlining, right? Then delete. We could add that in the future when we need it.
- Patch Set #3, Line 137: // InlinePackage finds functions that can be inlined and clones them.
  Keep the original comment.
- Patch Set #3, Line 268:
```
	savefn := ir.CurFunc
	ir.CurFunc = fn
```
  This needs some explanation.
- Patch Set #3, Line 348: inlineMaxBudget-v.budget < inlineHotMaxBudget
  Can we start with a higher budget, instead of checking here when it goes negative? Maybe increase the budget at line 265 above?
- Patch Set #3, Line 348: == true
  "== true" is unnecessary.
- Patch Set #3, Line 422: ir.CurFunc
  Maybe we want to add the current function in hairyVisitor instead of using the global variable.
File src/cmd/compile/internal/pgo/graph.go:
- Patch Set #3:
  This seems building a more general-purpose graph than we need. Can we just build what we need? Like, I don't think we need all the Options.
- Patch Set #3, Line 34: javaRegExp = regexp.MustCompile(`^(?:[a-z]\w*\.)*([A-Z][\w\$]*\.(?:<init>|[a-z][\w\$]*(?:\$\d+)?))(?:(?:\()|$)`)
  I think we don't need the non-Go ones.
File src/cmd/compile/internal/pgo/irgraph.go:
- Patch Set #3, Line 20: The IrGraph is updated as we pass through optimization phases, for example, after a function is inlined, we update the graph with new nodes and edges.
  I'm not sure we need to update the IRGraph (see the other comment at inline.go). Even if we do, it probably doesn't need to be mentioned here.
- Patch Set #3, Line 47:
```
Dst *IRNode
	DstNode  *ir.Node
```
  What is the difference between Dst and DstNode?
  Also, I think we want to use ir.Node by value instead of by pointer.
- Patch Set #3, Line 53: pprof
  Let's say "profile" for things that not specific to pprof format.
- Patch Set #3, Line 53: NodeMapInfo
  Maybe NodeMapKey to be clear.
- Patch Set #3, Line 55:
```
	FuncName string
	DstName  string
	CallSite int
```
  Maybe have some comment. Looking at this it is unclear what the fields are.
- Patch Set #3, Line 62:
```
	NWeight      int64
	NTotalWeight int64
```
  Maybe just call them NFlat and NCum.
- Patch Set #3, Line 69: Line string
  I think we don't want to use the string form of the line number. Can we just use number? In the proposal we mentioned that we don't want it to be file name sensitive (so it tolerates code moving and file renaming). So ir.Line probably isn't what we want.
- Patch Set #3, Line 82: = nil
  "= nil" isn't necessary. Also next one.
- Patch Set #3, Line 112: pProfGraph := New(p, opt)
  This builds a graph doe the whole binary. Can we build just the part we need, that is, the package we're compiling and its dependencies? Like, if we're compiling package A and the profile contains a package B which has no relation with A, we don't need to build the graph of B. Maybe we could use Options.KeptNodes?
- Patch Set #3, Line 117: return pProfGraph
  It seems we can just assign it to the global variable, without retuning it. Having both the global variable and the return value looks redundant.
- Patch Set #3, Line 132: log.Println("weighted call graph created successfully!")
  This information doesn't seem very helpful.
- Patch Set #3, Line 135: return weightedCG
  Same here.
- Patch Set #3, Line 141: nTotalWeight
  Name it nCumWeight, to be consistent with pprof naming and also clear what it actually means.
- Patch Set #3, Line 143: // Accummulate weights for the same nodes.
  Does the graph have duplicate nodes? Can we build a graph without duplicate in the first place?
- Patch Set #3, Line 149:
```
else {
			nweight[canonicalName] = n.FlatValue()
			nTotalWeight[canonicalName] = n.CumValue()
		}
```
  I think we don't need the conditional. The two += assignments should do the right thing even if the entry did not exist.
- Patch Set #3, Line 167: nodeinfo.CallSite = -1
  Why?
- Patch Set #3, Line 239: node1 represents the caller. Callee as f
  Maybe just name them caller, callee, to be clear. Also in the function body. Using 1 and 2 isn't very clear.
- Patch Set #3, Line 276: [callee is an interface type here which can have multiple targets].
  Not sure why/how this matters here.
- Patch Set #3, Line 281:
```
		nodeinfo.DstName = ""
		nodeinfo.CallSite = -1
```


		if weights, ok := GlobalNodeMap[nodeinfo]; ok {

			node1.Flat = weights.NWeight
			node1.Cum = weights.NTotalWeight


			info := &IREdge{Src: node1, Dst: g.IRNodes[name2], DstNode: n, Weight: 0, CallSite: line}

			g.OutEdges[node1] = append(g.OutEdges[node1], info)

			g.InEdges[g.IRNodes[name2]] = append(g.InEdges[g.IRNodes[name2]], info)

- ```
		} else {
```


			info := &IREdge{Src: node1, Dst: g.IRNodes[name2], DstNode: n, Weight: 0, CallSite: line}

			g.OutEdges[node1] = append(g.OutEdges[node1], info)

			g.InEdges[g.IRNodes[name2]] = append(g.InEdges[g.IRNodes[name2]], info)
		}
	}

- This else branch needs some comments.
- Patch Set #3, Line 298: node1
  Same here. Maybe say caller instead of node1.

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Sep 30, 2022, 12:06:48 PM9/30/22

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.

Raj Barik uploaded patch set #6 to this change.

cmd/compile: Enables PGO in Go and performs profile-guided inlining

Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---

M src/cmd/compile/internal/base/debug.go


M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go

10 files changed, 2,110 insertions(+), 5 deletions(-)

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Sep 30, 2022, 12:28:12 PM9/30/22

to goph...@pubsubhelper.golang.org, Matthew Dempsky, Austin Clements, Cherry Mui, Michael Pratt, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Cherry Mui, Michael Pratt.

34 comments:

File api/go1.19.txt:

Patch Set #1, Line 247:

pkg pgo/inline, func A() #43724
pkg pgo/inline, func D(uint) int #43724
pkg pgo/inline, func N(uint) *BS #43724
pkg pgo/inline, func T(uint64) uint #43724
pkg pgo/inline, type BS struct #43724
pkg pgo/inline, method (*BS) NS(uint) (uint, bool) #43724
pkg pgo/inline, method (*BS) S(uint) *BS #43724

- Undo. Let's not add this. (See the comment at pgo/inline/inline_hot. […]
  Done
File src/cmd/compile/internal/base/flag.go:

Patch Set #3, Line 125:

	InlineHotThreshold string       "help:\"threshold percentage for determining hot functions and call sites for inlining\""
	InlineHotBudget    int          "help:\"inline budget for hot functions\""

- As we discussed, maybe make them debug flags under -d, see cmd/compile/internal/base/debug. […]
  Done
File src/cmd/compile/internal/gc/main.go:

- Patch Set #3, Line 239: if pgo.PProfGraph == nil {
  The check doesn't seem necessary.

- Done
File src/cmd/compile/internal/inline/inl.go:

- Patch Set #3, Line 48: // Budget increased due to hotness.
  This comment should be on the line above? Maybe say this is the non-PGO inline budget.

- Done

- Patch Set #3, Line 61: // Per-caller data structure to track the list of hot call sites. This gets rewritten every caller leaving it to GC for cleanup.

- Mutable global states make the code hard to reasoning. […]
  Done

- Patch Set #3, Line 74: 160

- 160 doesn't seem large. If a function is hot, I think we can increase the budget more than that. […]
  Let's discuss this in our meeting. Raising this number too high would increase code size due to inlining. We should discuss pro and cons.

- Patch Set #3, Line 121: // InlineEpilogue updates IRGraph after inlining.

- At least for now, I think nothing will need the IRGraph after inlining, right? Then delete. […]
  I assigned nil to both graphs. It might be a good idea keep the updated graph, just for debugging purposes.

- Patch Set #3, Line 137: // InlinePackage finds functions that can be inlined and clones them.
  Keep the original comment.

- Done

- Patch Set #3, Line 268:
```
	savefn := ir.CurFunc
	ir.CurFunc = fn
```
  This needs some explanation.

- I updated this code to keep the function as part of the HairyVisitor. We would have liked to use ir.CurFunc to do this.

- Patch Set #3, Line 348: inlineMaxBudget-v.budget < inlineHotMaxBudget

- Can we start with a higher budget, instead of checking here when it goes negative? Maybe increase th […]
  Need to understand what you mean.

- Patch Set #3, Line 348: == true
  "== true" is unnecessary.

- Done

- Patch Set #3, Line 422: ir.CurFunc
  Maybe we want to add the current function in hairyVisitor instead of using the global variable.

- Done
File src/cmd/compile/internal/pgo/graph.go:
- Patch Set #3:
  This seems building a more general-purpose graph than we need. […]
  The `graph.go` was obtained from https://github.com/google/pprof/tree/main/internal/graph/. There are many functionalities we do not need from it or we may extend certain capabilities. What is your recommendation moving forward?
File src/cmd/compile/internal/pgo/irgraph.go:

- Patch Set #3, Line 20: The IrGraph is updated as we pass through optimization phases, for example, after a function is inlined, we update the graph with new nodes and edges.

- I'm not sure we need to update the IRGraph (see the other comment at inline.go). […]
  Done

- Patch Set #3, Line 47:
```
Dst *IRNode
	DstNode  *ir.Node
```

- What is the difference between Dst and DstNode? […]
  Done

- Patch Set #3, Line 53: NodeMapInfo
  Maybe NodeMapKey to be clear.

- Done

- Patch Set #3, Line 53: pprof
  Let's say "profile" for things that not specific to pprof format.

- Done

- Patch Set #3, Line 55:
```
	FuncName string
	DstName  string
	CallSite int
```
  Maybe have some comment. Looking at this it is unclear what the fields are.

- Done

- Patch Set #3, Line 62:
```
	NWeight      int64
	NTotalWeight int64
```
  Maybe just call them NFlat and NCum.

- Done

- Patch Set #3, Line 69: Line string

- I think we don't want to use the string form of the line number. […]
  Let's discuss this in our meeting.

- Patch Set #3, Line 82: = nil
  "= nil" isn't necessary. Also next one.

- Done

- Patch Set #3, Line 112: pProfGraph := New(p, opt)

- This builds a graph doe the whole binary. […]
  This is interesting and needs modifications to `graph.go`, which I am not sure if we should do. KeptNodes is used for some other purposes. We may have to add functionalities to filter nodes based on current package.

- Patch Set #3, Line 117: return pProfGraph

- It seems we can just assign it to the global variable, without retuning it. […]
  Done

- Patch Set #3, Line 132: log.Println("weighted call graph created successfully!")
  This information doesn't seem very helpful.

- Done

- Patch Set #3, Line 135: return weightedCG
  Same here.

- Done

- Patch Set #3, Line 141: nTotalWeight
  Name it nCumWeight, to be consistent with pprof naming and also clear what it actually means.

- Done

Patch Set #3, Line 149:

else {
			nweight[canonicalName] = n.FlatValue()
			nTotalWeight[canonicalName] = n.CumValue()
		}

- I think we don't need the conditional. […]
  Done

- Patch Set #3, Line 167: nodeinfo.CallSite = -1
  Why?

- Done

- Patch Set #3, Line 239: node1 represents the caller. Callee as f

- Maybe just name them caller, callee, to be clear. Also in the function body. […]
  Done

- Patch Set #3, Line 276: [callee is an interface type here which can have multiple targets].
  Not sure why/how this matters here.

- Done

- Patch Set #3, Line 298: node1
  Same here. Maybe say caller instead of node1.

- Done
File src/internal/profile/graph.go:
- Patch Set #1:
  I think this can live in the cmd/compile/internal/pgo package (unless/until we have a use case outsi […]
  Done
File src/internal/profile/legacy_java_profile.go:
- Patch Set #1:
  I don't think we need to support Java profiles. […]
  Done
File src/pgo/inline/inline_hot.go:

- Patch Set #1:
  This is just an example, right? I don't think we need to add this package to the source tree.

- Done

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Cherry Mui (Gerrit)

unread,

Oct 5, 2022, 5:42:18 PM10/5/22

to Raj Barik, goph...@pubsubhelper.golang.org, Matthew Dempsky, Austin Clements, Michael Pratt, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.

17 comments:

File src/cmd/compile/internal/inline/inl.go:
- Patch Set #3, Line 121: // InlineEpilogue updates IRGraph after inlining.
  I assigned nil to both graphs. […]
  Okay, thanks. As we discussed, only update the graph if a debug flag is set.

- Patch Set #3, Line 348: inlineMaxBudget-v.budget < inlineHotMaxBudget

- Need to understand what you mean.
  As discussed offline, start with a higher budget, by changing inlineMaxBudget at line 263 (as of Patch Set 6) to inlineHotMaxBudget, if the function is a hot callee.
File src/cmd/compile/internal/inline/inl.go:
- Patch Set #6, Line 56:
  Undo adding the blank line.
- Patch Set #6, Line 99:

line2, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)

							lineno := fmt.Sprintf("%v", line2)

- This looks redundant parsing the string to integer then convert back to string.
- Patch Set #6, Line 101: canonicalName := ir.PkgFuncName(n.AST) + "-" + lineno + "-" + ir.PkgFuncName(e.Dst.AST)
  Can we make it keyed on a struct with caller, callee and the line number, without making a string?
- Patch Set #6, Line 583: if pgo.WeightedCG == nil {
  I think if we start with inlineHotMaxBudget in the first place then we don't need this change here.
- Patch Set #6, Line 839: if _, ok := pgo.ListOfHotCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; ok {
  Only do the check if PGO is enabled.
- Patch Set #6, Line 949:

	if _, ok := inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; !ok {

		inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}] = struct{}{}
	}

- Only do this update in debug mode.
File src/cmd/compile/internal/pgo/graph.go:
- Patch Set #3:
  The `graph.go` was obtained from https://github.com/google/pprof/tree/main/internal/graph/. […]
  As discussed, we should make this code more suitable to our need, without keeping exact copy of pprof/internal/graph. Let's simplify this code. Thanks.
File src/cmd/compile/internal/pgo/irgraph.go:
- Patch Set #3, Line 69: Line string

- Let's discuss this in our meeting.

- Reading more of the code, I think we really should move away from using ir.Line. It seems in many places in the code we just need the number, but the code starting with ir.Line, which makes a string, then parse back to number, then sometimes convert to string again, which seems more complex than it should be.

- Patch Set #3, Line 143: // Accummulate weights for the same nodes.
  Does the graph have duplicate nodes? Can we build a graph without duplicate in the first place?

- Ack
File src/cmd/compile/internal/pgo/irgraph.go:
- Patch Set #6, Line 36: Recursive bool
  This field doesn't seem to be read anywhere? Maybe remove?
- Patch Set #6, Line 38: HotNode bool
  This field is defined here but only set and used in the inliner. Maybe move it to the inliner (perhaps as a map)?
- Patch Set #6, Line 56: CallSite int
  It is still not quite clear what CallSite is. Looks like a line number?
- Patch Set #6, Line 156: /*
  Should this commented-out code be deleted?
- Patch Set #6, Line 234: `f`
  callee? (Also, remove quotes)
- Patch Set #6, Line 300: line := ir.Line(n)
  That we convert the line numbers to string here then parse it back to numbers in addEdge looks quite convoluted. Can we just use the number (for now, even not the relative line number as we eventually want)?

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Cherry Mui (Gerrit)

unread,

Oct 5, 2022, 7:05:14 PM10/5/22

to Raj Barik, goph...@pubsubhelper.golang.org, Matthew Dempsky, Austin Clements, Michael Pratt, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.

1 comment:

File src/cmd/compile/internal/inline/inl.go:
- Patch Set #6, Line 97: inlineHotThresholdPercent
  Using the same threshold for nodes and edges doesn't seem to work well. For a reasonable large program, the edge percentages are really really small. I think we need a better way to select hot call sites.

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Cherry Mui (Gerrit)

unread,

Oct 13, 2022, 5:00:12 PM10/13/22

to Raj Barik, goph...@pubsubhelper.golang.org, Matthew Dempsky, Austin Clements, Michael Pratt, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.

4 comments:

File src/cmd/compile/internal/test/pgo_inl_test.go:
- Patch Set #6, Line 21:

// TestIntendedInlining tests that specific functions are inlined.

// This allows refactoring for code clarity and re-use without fear that

// changes to the compiler will cause silent performance regressions.

- This comment needs update.
- Patch Set #6, Line 27:
  Profiling doesn't work well on some platforms https://cs.opensource.google/go/go/+/master:src/runtime/pprof/pprof_test.go;l=400 . Maybe we want to skip the test there?
  Alternatively, maybe we could use a pre-collected profile (which is pretty small).
- Patch Set #6, Line 90: "-run=nope
  You can pass "-c" so it only builds the test but does not run it. (Maybe also pass "-o" to make sure the output is in the temp directory.) Also "-tags" and "-timeout" seems not necessary.
File src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go:
- Patch Set #6:
  Add copyright header.
  It doesn't seem necessary to have two files. I think we can put everything in inline_hot_test.go ?
  Also, this file seems to contain more things than necessary (e.g. time.Sleep). I think we only need A and BS.NS? Other things (D, N, jn, etc.) probably can be simplified, to make it clear what we're testing.

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Cherry Mui (Gerrit)

unread,

Oct 13, 2022, 5:45:51 PM10/13/22

to Raj Barik, goph...@pubsubhelper.golang.org, Matthew Dempsky, Austin Clements, Michael Pratt, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.

1 comment:

File src/cmd/compile/internal/pgo/irgraph.go:
- Patch Set #6, Line 328: threshold
  For debugging purposes, I think we should probably print the nodes below the threshold as well. This can be helpful e.g. if we want to adjust the threshold.

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Oct 13, 2022, 8:23:24 PM10/13/22

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.

Raj Barik uploaded patch set #7 to this change.

cmd/compile: Enables PGO in Go and performs profile-guided inlining

Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go

10 files changed, 2,142 insertions(+), 44 deletions(-)

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Oct 13, 2022, 8:24:45 PM10/13/22

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.

Raj Barik uploaded patch set #8 to this change.

cmd/compile: Enables PGO in Go and performs profile-guided inlining

Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e

 Handled most of the concerns from Cherry


---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go

10 files changed, 2,143 insertions(+), 44 deletions(-)

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Oct 13, 2022, 8:30:15 PM10/13/22

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Michael Pratt, Raj Barik.

Raj Barik uploaded patch set #9 to this change.

cmd/compile: Enables PGO in Go and performs profile-guided inlining

Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e

---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go

10 files changed, 2,142 insertions(+), 44 deletions(-)

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Oct 14, 2022, 12:03:51 PM10/14/22

to goph...@pubsubhelper.golang.org, Matthew Dempsky, Austin Clements, Cherry Mui, Michael Pratt, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Cherry Mui, Michael Pratt.

16 comments:

File src/cmd/compile/internal/inline/inl.go:

- Patch Set #3, Line 121: // InlineEpilogue updates IRGraph after inlining.

- Okay, thanks. As we discussed, only update the graph if a debug flag is set.
  Done

- Patch Set #3, Line 348: inlineMaxBudget-v.budget < inlineHotMaxBudget

- As discussed offline, start with a higher budget, by changing inlineMaxBudget at line 263 (as of Pat […]
  Done
File src/cmd/compile/internal/inline/inl.go:
- Patch Set #6, Line 56:

- Undo adding the blank line.

- Done

- Patch Set #6, Line 97: inlineHotThresholdPercent

- Using the same threshold for nodes and edges doesn't seem to work well. […]
  Separated them out! Take a look.

Patch Set #6, Line 99:

line2, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
							lineno := fmt.Sprintf("%v", line2)

This looks redundant parsing the string to integer then convert back to string.

- Done

- Patch Set #6, Line 101: canonicalName := ir.PkgFuncName(n.AST) + "-" + lineno + "-" + ir.PkgFuncName(e.Dst.AST)
  Can we make it keyed on a struct with caller, callee and the line number, without making a string?

- Done

- Patch Set #6, Line 583: if pgo.WeightedCG == nil {
  I think if we start with inlineHotMaxBudget in the first place then we don't need this change here.

- Done

- Patch Set #6, Line 839: if _, ok := pgo.ListOfHotCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; ok {
  Only do the check if PGO is enabled.

- Done

Patch Set #6, Line 949:

	if _, ok := inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}]; !ok {
		inlinedCallSites[pgo.CallSiteInfo{ir.Line(n), ir.CurFunc}] = struct{}{}
	}

Only do this update in debug mode.

- Done
File src/cmd/compile/internal/pgo/irgraph.go:

- Patch Set #3, Line 69: Line string

- Reading more of the code, I think we really should move away from using ir.Line. […]
  changed to integer. should discuss more tomorrow.
File src/cmd/compile/internal/pgo/irgraph.go:
- Patch Set #6, Line 36: Recursive bool

- This field doesn't seem to be read anywhere? Maybe remove?

- Done

- Patch Set #6, Line 38: HotNode bool

- This field is defined here but only set and used in the inliner. […]
  Done

- Patch Set #6, Line 56: CallSite int

- It is still not quite clear what CallSite is. […]
  changed to integer universally. We should discuss if this is semantically correct.

- Patch Set #6, Line 156: /*
  Should this commented-out code be deleted?

- Done

- Patch Set #6, Line 234: `f`
  callee? (Also, remove quotes)

- Done

- Patch Set #6, Line 300: line := ir.Line(n)

- That we convert the line numbers to string here then parse it back to numbers in addEdge looks quite […]
  Done

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Michael Pratt (Gerrit)

unread,

Oct 18, 2022, 1:01:15 PM10/18/22

to Raj Barik, goph...@pubsubhelper.golang.org, Matthew Dempsky, Austin Clements, Cherry Mui, Michael Pratt, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Cherry Mui, Raj Barik.

1 comment:

Patchset:
- Patch Set #10:
  Did you mean to push a new version? The latest push you did today was identical to the patch set 9.

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Cherry Mui (Gerrit)

unread,

Oct 20, 2022, 2:56:40 PM10/20/22

to Raj Barik, goph...@pubsubhelper.golang.org, Matthew Dempsky, Austin Clements, Michael Pratt, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Raj Barik.

Patch set 10:Run-TryBot +1

8 comments:

Commit Message:
- Patch Set #10, Line 7: Enables
  Minor: lowercase "enable", and without the "s".
- Patch Set #10, Line 8:
  Add "For #55022" to reference the issue.
Patchset:
- Patch Set #10:
  Thanks. Looks mostly okay. We can iterate and make improvements from here.
File src/cmd/compile/internal/inline/inl.go:
- Patch Set #3, Line 74: 160
  Let's discuss this in our meeting. […]
  Ack
File src/cmd/compile/internal/inline/inl.go:
- Patch Set #6, Line 97: inlineHotThresholdPercent

- Separated them out! Take a look.

- Done
File src/cmd/compile/internal/pgo/irgraph.go:
- Patch Set #6, Line 56: CallSite int
  changed to integer universally. We should discuss if this is semantically correct.
  Done
File src/cmd/compile/internal/pgo/irgraph.go:
- Patch Set #10:
  Add a "WORK IN PROGRESS" comment at the top, as we discussed.
File src/cmd/compile/internal/test/pgo_inl_test.go:
- Patch Set #10:
  As discussed offline, we may want to go with a checked-in profile for the test. Maybe remove this test for now and we can add it back later.

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Cherry Mui (Gerrit)

unread,

Oct 20, 2022, 3:13:00 PM10/20/22

to Raj Barik, goph...@pubsubhelper.golang.org, Gopher Robot, Matthew Dempsky, Austin Clements, Michael Pratt, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Raj Barik.

1 comment:

Patchset:
- Patch Set #10:
  29 of 30 TryBots failed. […]
  This needs to be fixed before checking it in. This means, for bootstrapping from Go 1.17, we need to not use strings.Cut in internal/profile, but the old strings.Index.

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Oct 20, 2022, 7:15:52 PM10/20/22

to goph...@pubsubhelper.golang.org, Gopher Robot, Cherry Mui, Matthew Dempsky, Austin Clements, Michael Pratt, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Cherry Mui.

5 comments:

Patchset:
- Patch Set #11:
  Some cleanups in the tests...

File src/cmd/compile/internal/inline/inl.go:
- Patch Set #6, Line 97: inlineHotThresholdPercent
  Separated them out! Take a look.
  Done

File src/cmd/compile/internal/pgo/irgraph.go:
- Patch Set #3, Line 69: Line string
  changed to integer. should discuss more tomorrow.
  Done
File src/cmd/compile/internal/pgo/irgraph.go:

- Patch Set #6, Line 56: CallSite int
  changed to integer universally. We should discuss if this is semantically correct.
  Done

- Patch Set #6, Line 328: threshold
  For debugging purposes, I think we should probably print the nodes below the threshold as well. […]
  Done

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Oct 20, 2022, 7:16:27 PM10/20/22

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Cherry Mui.

Raj Barik uploaded patch set #12 to this change.

cmd/compile: enable PGO in Go and performs profile-guided inlining



Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
10 files changed, 2,142 insertions(+), 44 deletions(-)

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Oct 20, 2022, 7:40:41 PM10/20/22

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Cherry Mui.

Raj Barik uploaded patch set #13 to this change.

cmd/compile: enable PGO in Go and performs profile-guided inlining (for issue #55022)



Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
10 files changed, 2,142 insertions(+), 44 deletions(-)

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Oct 20, 2022, 7:41:41 PM10/20/22

to goph...@pubsubhelper.golang.org, Gopher Robot, Cherry Mui, Matthew Dempsky, Austin Clements, Michael Pratt, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Cherry Mui.

8 comments:

Commit Message:

- Patch Set #10, Line 7: Enables
  Minor: lowercase "enable", and without the "s".

- Done
Patchset:
- Patch Set #10:
  This needs to be fixed before checking it in. This means, for bootstrapping from Go 1. […]
  Should this file [perhaps all files in this directory] be synced to files at https://github.com/google/pprof/blob/main/profile ? They do not use Cut.
File src/cmd/compile/internal/pgo/graph.go:
- Patch Set #3:
  As discussed, we should make this code more suitable to our need, without keeping exact copy of ppro […]
  Done
- Patch Set #3, Line 34: javaRegExp = regexp.MustCompile(`^(?:[a-z]\w*\.)*([A-Z][\w\$]*\.(?:<init>|[a-z][\w\$]*(?:\$\d+)?))(?:(?:\()|$)`)
  I think we don't need the non-Go ones.
  Done
File src/cmd/compile/internal/pgo/irgraph.go:
- Patch Set #3, Line 281:

		nodeinfo.DstName = ""
		nodeinfo.CallSite = -1
		if weights, ok := GlobalNodeMap[nodeinfo]; ok {
			node1.Flat = weights.NWeight
			node1.Cum = weights.NTotalWeight
			info := &IREdge{Src: node1, Dst: g.IRNodes[name2], DstNode: n, Weight: 0, CallSite: line}
			g.OutEdges[node1] = append(g.OutEdges[node1], info)
			g.InEdges[g.IRNodes[name2]] = append(g.InEdges[g.IRNodes[name2]], info)
		} else {
			info := &IREdge{Src: node1, Dst: g.IRNodes[name2], DstNode: n, Weight: 0, CallSite: line}
			g.OutEdges[node1] = append(g.OutEdges[node1], info)
			g.InEdges[g.IRNodes[name2]] = append(g.InEdges[g.IRNodes[name2]], info)
		}
	}

This else branch needs some comments.

- Done
File src/cmd/compile/internal/test/pgo_inl_test.go:

Patch Set #6, Line 21:

// TestIntendedInlining tests that specific functions are inlined.
// This allows refactoring for code clarity and re-use without fear that
// changes to the compiler will cause silent performance regressions.

This comment needs update.

- Done
- Patch Set #6, Line 27:
  Profiling doesn't work well on some platforms https://cs.opensource. […]
  Done
File src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go:
- Patch Set #6:
  Add copyright header. […]
  Done

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Cherry Mui (Gerrit)

unread,

Oct 21, 2022, 11:55:57 AM10/21/22

to Raj Barik, goph...@pubsubhelper.golang.org, Gopher Robot, Matthew Dempsky, Austin Clements, Michael Pratt, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Raj Barik.

Patch set 13:Run-TryBot +1

1 comment:

Patchset:
- Patch Set #10:
  Should this file [perhaps all files in this directory] be synced to files at https://github. […]
  No, at least not in this CL. Just do the minimal change to fix the build.

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Austin Clements (Gerrit)

unread,

Oct 21, 2022, 12:12:22 PM10/21/22

to Raj Barik, goph...@pubsubhelper.golang.org, Gopher Robot, Cherry Mui, Matthew Dempsky, Austin Clements, Michael Pratt, golang-co...@googlegroups.com

Attention is currently required from: Raj Barik.

3 comments:

Commit Message:
- Patch Set #10, Line 8:

- Add "For #55022" to reference the issue.

- To clarify, the commit message should look like (and Gerrit recognizes #NNNN as an issue link; you don't have to do anything to make that a link):
  cmd/compile: enable PGO in Go and perform profile-guided inlining
  For #55022
File src/cmd/compile/internal/pgo/irgraph.go:
- Patch Set #3, Line 112: pProfGraph := New(p, opt)
  This is interesting and needs modifications to `graph.go`, which I am not sure if we should do. […]
  We can do this in a follow-up CL
File src/cmd/compile/internal/test/pgo_inl_test.go:
- Patch Set #10:
  As discussed offline, we may want to go with a checked-in profile for the test. […]
  I think it would be okay to do this in a follow-up CL.

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Oct 25, 2022, 6:53:59 PM10/25/22

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Cherry Mui, Raj Barik.

Raj Barik uploaded patch set #14 to this change.

The following approvals got outdated and were removed: Run-TryBot+1 by Cherry Mui, TryBot-Result-1 by Gopher Robot

cmd/compile: Enables PGO in Go and performs profile-guided inlining



Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go

A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
M src/internal/profile/legacy_profile.go
12 files changed, 2,022 insertions(+), 45 deletions(-)

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Oct 25, 2022, 7:42:05 PM10/25/22

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Cherry Mui, Raj Barik.

Raj Barik uploaded patch set #15 to this change.

cmd/compile: Enables PGO in Go and performs profile-guided inlining
For #55022



Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
M src/internal/profile/legacy_profile.go

12 files changed, 2,023 insertions(+), 45 deletions(-)

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Oct 25, 2022, 7:42:24 PM10/25/22

to goph...@pubsubhelper.golang.org, Gopher Robot, Cherry Mui, Matthew Dempsky, Austin Clements, Michael Pratt, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Cherry Mui.

3 comments:

Commit Message:

- Patch Set #10, Line 8:

- To clarify, the commit message should look like (and Gerrit recognizes #NNNN as an issue link; you d […]
  Done
File src/cmd/compile/internal/pgo/irgraph.go:

- Patch Set #3, Line 112: pProfGraph := New(p, opt)

- We can do this in a follow-up CL
  Done
File src/cmd/compile/internal/test/pgo_inl_test.go:
- Patch Set #10:
  I think it would be okay to do this in a follow-up CL.
  Done

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Oct 25, 2022, 7:44:57 PM10/25/22

to goph...@pubsubhelper.golang.org, Gopher Robot, Cherry Mui, Matthew Dempsky, Austin Clements, Michael Pratt, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Cherry Mui.

2 comments:

File src/cmd/compile/internal/pgo/irgraph.go:

- Patch Set #10:
  Add a "WORK IN PROGRESS" comment at the top, as we discussed.

- Done
File src/cmd/compile/internal/test/pgo_inl_test.go:
- Patch Set #6, Line 90: "-run=nope
  You can pass "-c" so it only builds the test but does not run it. […]
  Ack

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Oct 25, 2022, 7:46:22 PM10/25/22

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Cherry Mui.

Raj Barik uploaded patch set #17 to this change.

cmd/compile: Enables PGO in Go and performs profile-guided inlining
For #55022

Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
M src/internal/profile/legacy_profile.go

12 files changed, 2,025 insertions(+), 45 deletions(-)

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Raj Barik (Gerrit)

unread,

Oct 25, 2022, 7:51:22 PM10/25/22

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Cherry Mui.

Raj Barik uploaded patch set #18 to this change.

cmd/compile: Enables PGO in Go and performs profile-guided inlining
For #55022

Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e
---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
M src/internal/profile/legacy_profile.go
12 files changed, 2,025 insertions(+), 45 deletions(-)

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Cherry Mui (Gerrit)

unread,

Oct 27, 2022, 12:54:38 PM10/27/22

to Raj Barik, goph...@pubsubhelper.golang.org, Gopher Robot, Matthew Dempsky, Austin Clements, Michael Pratt, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Raj Barik.

Patch set 18:Run-TryBot +1

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Cherry Mui (Gerrit)

unread,

Oct 27, 2022, 1:51:19 PM10/27/22

to Raj Barik, goph...@pubsubhelper.golang.org, Gopher Robot, Matthew Dempsky, Austin Clements, Michael Pratt, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Raj Barik.

Patch set 18:Code-Review +2

1 comment:

Patchset:
- Patch Set #18:
  Thanks. LGTM for the first round. There are things we can improve. We can iterate and improve from here.

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Michael Pratt (Gerrit)

unread,

Oct 27, 2022, 2:43:46 PM10/27/22

to Raj Barik, goph...@pubsubhelper.golang.org, Michael Pratt, Cherry Mui, Gopher Robot, Matthew Dempsky, Austin Clements, golang-co...@googlegroups.com

Attention is currently required from: Austin Clements, Cherry Mui, Raj Barik.

Patch set 18:Code-Review +2

1 comment:

Patchset:
- Patch Set #10:

- No, at least not in this CL. Just do the minimal change to fix the build.

- Done

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Michael Pratt (Gerrit)

unread,

Oct 28, 2022, 10:23:32 AM10/28/22

to Raj Barik, Michael Pratt, goph...@pubsubhelper.golang.org, golang-...@googlegroups.com, Cherry Mui, Gopher Robot, Matthew Dempsky, Austin Clements, golang-co...@googlegroups.com

Michael Pratt submitted this change.

Approvals:
  Michael Pratt: Looks good to me, approved
  Cherry Mui: Looks good to me, approved; Run TryBots
  Gopher Robot: TryBots succeeded

cmd/compile: Enables PGO in Go and performs profile-guided inlining
For #55022

Change-Id: I51f1ba166d5a66dcaf4b280756be4a6bf9545c5e

Reviewed-on: https://go-review.googlesource.com/c/go/+/429863
TryBot-Result: Gopher Robot <go...@golang.org>
Reviewed-by: Cherry Mui <cher...@google.com>
Reviewed-by: Michael Pratt <mpr...@google.com>
Run-TryBot: Cherry Mui <cher...@google.com>


---
M src/cmd/compile/internal/base/debug.go
M src/cmd/compile/internal/base/flag.go
M src/cmd/compile/internal/gc/main.go
M src/cmd/compile/internal/inline/inl.go
A src/cmd/compile/internal/pgo/graph.go
A src/cmd/compile/internal/pgo/irgraph.go
A src/cmd/compile/internal/test/pgo_inl_test.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof
A src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
M src/cmd/dist/buildtool.go
M src/internal/profile/legacy_profile.go

12 files changed, 2,030 insertions(+), 45 deletions(-)

diff --git a/src/cmd/compile/internal/base/debug.go b/src/cmd/compile/internal/base/debug.go
index 32a45d7..ba21491 100644
--- a/src/cmd/compile/internal/base/debug.go
+++ b/src/cmd/compile/internal/base/debug.go
@@ -16,35 +16,39 @@
 // The -d option takes a comma-separated list of settings.
 // Each setting is name=value; for ints, name is short for name=1.
 type DebugFlags struct {
-	Append               int    `help:"print information about append compilation"`
-	Checkptr             int    `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation"`
-	Closure              int    `help:"print information about closure compilation"`
-	DclStack             int    `help:"run internal dclstack check"`
-	Defer                int    `help:"print information about defer compilation"`
-	DisableNil           int    `help:"disable nil checks"`
-	DumpPtrs             int    `help:"show Node pointers values in dump output"`
-	DwarfInl             int    `help:"print information about DWARF inlined function creation"`
-	Export               int    `help:"print export data"`
-	GCProg               int    `help:"print dump of GC programs"`
-	InlFuncsWithClosures int    `help:"allow functions with closures to be inlined"`
-	Libfuzzer            int    `help:"enable coverage instrumentation for libfuzzer"`
-	LocationLists        int    `help:"print information about DWARF location list creation"`
-	Nil                  int    `help:"print information about nil checks"`
-	NoOpenDefer          int    `help:"disable open-coded defers"`
-	NoRefName            int    `help:"do not include referenced symbol names in object file"`
-	PCTab                string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"`
-	Panic                int    `help:"show all compiler panics"`
-	Reshape              int    `help:"print information about expression reshaping"`
-	Shapify              int    `help:"print information about shaping recursive types"`
-	Slice                int    `help:"print information about slice compilation"`
-	SoftFloat            int    `help:"force compiler to emit soft-float code"`
-	SyncFrames           int    `help:"how many writer stack frames to include at sync points in unified export data"`
-	TypeAssert           int    `help:"print information about type assertion inlining"`
-	TypecheckInl         int    `help:"eager typechecking of inline function bodies"`
-	Unified              int    `help:"enable unified IR construction"`
-	WB                   int    `help:"print information about write barriers"`
-	ABIWrap              int    `help:"print information about ABI wrapper generation"`
-	MayMoreStack         string `help:"call named function before all stack growth checks"`
+	Append                     int    `help:"print information about append compilation"`
+	Checkptr                   int    `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation"`
+	Closure                    int    `help:"print information about closure compilation"`
+	DclStack                   int    `help:"run internal dclstack check"`
+	Defer                      int    `help:"print information about defer compilation"`
+	DisableNil                 int    `help:"disable nil checks"`
+	DumpPtrs                   int    `help:"show Node pointers values in dump output"`
+	DwarfInl                   int    `help:"print information about DWARF inlined function creation"`
+	Export                     int    `help:"print export data"`
+	GCProg                     int    `help:"print dump of GC programs"`
+	InlFuncsWithClosures       int    `help:"allow functions with closures to be inlined"`
+	Libfuzzer                  int    `help:"enable coverage instrumentation for libfuzzer"`
+	LocationLists              int    `help:"print information about DWARF location list creation"`
+	Nil                        int    `help:"print information about nil checks"`
+	NoOpenDefer                int    `help:"disable open-coded defers"`
+	NoRefName                  int    `help:"do not include referenced symbol names in object file"`
+	PCTab                      string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"`
+	Panic                      int    `help:"show all compiler panics"`
+	Reshape                    int    `help:"print information about expression reshaping"`
+	Shapify                    int    `help:"print information about shaping recursive types"`
+	Slice                      int    `help:"print information about slice compilation"`
+	SoftFloat                  int    `help:"force compiler to emit soft-float code"`
+	SyncFrames                 int    `help:"how many writer stack frames to include at sync points in unified export data"`
+	TypeAssert                 int    `help:"print information about type assertion inlining"`
+	TypecheckInl               int    `help:"eager typechecking of inline function bodies"`
+	Unified                    int    `help:"enable unified IR construction"`
+	WB                         int    `help:"print information about write barriers"`
+	ABIWrap                    int    `help:"print information about ABI wrapper generation"`
+	MayMoreStack               string `help:"call named function before all stack growth checks"`
+	InlineHotFuncThreshold     string `help:"threshold percentage for determining functions as hot candidates for inlining"`
+	InlineHotCallSiteThreshold string `help:"threshold percentage for determining call sites as hot candidates for inlining"`
+	InlineHotBudget            int    `help:"inline budget for hot functions"`
+	PGOInline                  int    `help:"debug profile-guided inlining"`
 
 	Any bool // set when any of the debug flags have been set
 }
diff --git a/src/cmd/compile/internal/base/flag.go b/src/cmd/compile/internal/base/flag.go
index 3e9d86c..e6df6b6 100644
--- a/src/cmd/compile/internal/base/flag.go
+++ b/src/cmd/compile/internal/base/flag.go
@@ -124,6 +124,7 @@
 	TrimPath           string       "help:\"remove `prefix` from recorded source file paths\""
 	WB                 bool         "help:\"enable write barrier\""                    // TODO: remove
 	AltComparable      bool         "help:\"enable alternative comparable semantics\"" // experiment - remove eventually
+	PgoProfile         string       "help:\"read profile from `file`\""
 
 	// Configuration derived from flags; not a flag itself.
 	Cfg struct {
diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go
index 2fbf2f4..5633f1f 100644
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -17,6 +17,7 @@
 	"cmd/compile/internal/ir"
 	"cmd/compile/internal/logopt"
 	"cmd/compile/internal/noder"
+	"cmd/compile/internal/pgo"
 	"cmd/compile/internal/pkginit"
 	"cmd/compile/internal/reflectdata"
 	"cmd/compile/internal/ssa"
@@ -249,10 +250,26 @@
 		typecheck.AllImportedBodies()
 	}
 
+	// Read profile file and build profile-graph and weighted-call-graph.
+	base.Timer.Start("fe", "pgoprofile")
+	if base.Flag.PgoProfile != "" {
+		pgo.BuildProfileGraph(base.Flag.PgoProfile)
+		pgo.BuildWeightedCallGraph()
+	}
+
 	// Inlining
 	base.Timer.Start("fe", "inlining")
 	if base.Flag.LowerL != 0 {
+		if pgo.WeightedCG != nil {
+			inline.InlinePrologue()
+		}
 		inline.InlinePackage()
+		if pgo.WeightedCG != nil {
+			inline.InlineEpilogue()
+			// Delete the graphs as no other optimization uses this currently.
+			pgo.WeightedCG = nil
+			pgo.ProfileGraph = nil
+		}
 	}
 	noder.MakeWrappers(typecheck.Target) // must happen after inlining
 
diff --git a/src/cmd/compile/internal/inline/inl.go b/src/cmd/compile/internal/inline/inl.go
index 5e14a87..4d942d0 100644
--- a/src/cmd/compile/internal/inline/inl.go
+++ b/src/cmd/compile/internal/inline/inl.go
@@ -29,11 +29,13 @@
 import (
 	"fmt"
 	"go/constant"
+	"strconv"
 	"strings"
 
 	"cmd/compile/internal/base"
 	"cmd/compile/internal/ir"
 	"cmd/compile/internal/logopt"
+	"cmd/compile/internal/pgo"
 	"cmd/compile/internal/typecheck"
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
@@ -53,6 +55,91 @@
 	inlineBigFunctionMaxCost = 20   // Max cost of inlinee when inlining into a "big" function.
 )
 
+var (
+	// List of all hot ndes.
+	candHotNodeMap = make(map[*pgo.IRNode]struct{})
+
+	// List of all hot call sites.
+	candHotEdgeMap = make(map[pgo.CallSiteInfo]struct{})
+
+	// List of inlined call sites.
+	inlinedCallSites = make(map[pgo.CallSiteInfo]struct{})
+
+	// Threshold in percentage for hot function inlining.
+	inlineHotFuncThresholdPercent = float64(2)
+
+	// Threshold in percentage for hot callsite inlining.
+	inlineHotCallSiteThresholdPercent = float64(0.1)
+
+	// Budget increased due to hotness.
+	inlineHotMaxBudget int32 = 160
+)
+
+// InlinePrologue records the hot callsites from ir-graph.
+func InlinePrologue() {
+	if s, err := strconv.ParseFloat(base.Debug.InlineHotFuncThreshold, 64); err == nil {
+		inlineHotFuncThresholdPercent = s
+		if base.Debug.PGOInline > 0 {
+			fmt.Printf("hot-node-thres=%v\n", inlineHotFuncThresholdPercent)
+		}
+	}
+
+	if s, err := strconv.ParseFloat(base.Debug.InlineHotCallSiteThreshold, 64); err == nil {
+		inlineHotCallSiteThresholdPercent = s
+		if base.Debug.PGOInline > 0 {
+			fmt.Printf("hot-callsite-thres=%v\n", inlineHotCallSiteThresholdPercent)
+		}
+	}
+
+	if base.Debug.InlineHotBudget != 0 {
+		inlineHotMaxBudget = int32(base.Debug.InlineHotBudget)
+	}
+
+	ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+		for _, f := range list {
+			name := ir.PkgFuncName(f)
+			if n, ok := pgo.WeightedCG.IRNodes[name]; ok {
+				nodeweight := pgo.WeightInPercentage(n.Flat, pgo.GlobalTotalNodeWeight)
+				if nodeweight > inlineHotFuncThresholdPercent {
+					candHotNodeMap[n] = struct{}{}
+				}
+				for _, e := range pgo.WeightedCG.OutEdges[n] {
+					if e.Weight != 0 {
+						edgeweightpercent := pgo.WeightInPercentage(e.Weight, pgo.GlobalTotalEdgeWeight)
+						if edgeweightpercent > inlineHotCallSiteThresholdPercent {
+							csi := pgo.CallSiteInfo{Line: e.CallSite, Caller: n.AST, Callee: e.Dst.AST}
+							if _, ok := candHotEdgeMap[csi]; !ok {
+								candHotEdgeMap[csi] = struct{}{}
+							}
+						}
+					}
+				}
+			}
+		}
+	})
+	if base.Debug.PGOInline > 0 {
+		fmt.Printf("hot-cg before inline in dot format:")
+		pgo.PrintWeightedCallGraphDOT(inlineHotFuncThresholdPercent, inlineHotCallSiteThresholdPercent)
+	}
+}
+
+// InlineEpilogue updates IRGraph after inlining.
+func InlineEpilogue() {
+	if base.Debug.PGOInline > 0 {
+		ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+			for _, f := range list {
+				name := ir.PkgFuncName(f)
+				if n, ok := pgo.WeightedCG.IRNodes[name]; ok {
+					pgo.RedirectEdges(n, inlinedCallSites)
+				}
+			}
+		})
+		// Print the call-graph after inlining. This is a debugging feature.
+		fmt.Printf("hot-cg after inline in dot:")
+		pgo.PrintWeightedCallGraphDOT(inlineHotFuncThresholdPercent, inlineHotCallSiteThresholdPercent)
+	}
+}
+
 // InlinePackage finds functions that can be inlined and clones them before walk expands them.
 func InlinePackage() {
 	ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
@@ -81,6 +168,9 @@
 		base.Fatalf("CanInline no nname %+v", fn)
 	}
 
+	// Initialize an empty list of hot callsites for this caller.
+	pgo.ListOfHotCallSites = make(map[pgo.CallSiteInfo]struct{})
+
 	var reason string // reason, if any, that the function was not inlined
 	if base.Flag.LowerM > 1 || logopt.Enabled() {
 		defer func() {
@@ -168,6 +258,19 @@
 		cc = 1 // this appears to yield better performance than 0.
 	}
 
+	// Update the budget for profile-guided inlining.
+	budget := int32(inlineMaxBudget)
+	if base.Flag.PgoProfile != "" && pgo.WeightedCG != nil {
+		if n, ok := pgo.WeightedCG.IRNodes[ir.PkgFuncName(fn)]; ok {
+			if _, ok := candHotNodeMap[n]; ok {
+				budget = int32(inlineHotMaxBudget)
+				if base.Debug.PGOInline > 0 {
+					fmt.Printf("hot-node enabled increased budget=%v for func=%v\n", budget, ir.PkgFuncName(fn))
+				}
+			}
+		}
+	}
+
 	// At this point in the game the function we're looking at may
 	// have "stale" autos, vars that still appear in the Dcl list, but
 	// which no longer have any uses in the function body (due to
@@ -178,7 +281,9 @@
 	// list. See issue 25249 for more context.
 
 	visitor := hairyVisitor{
-		budget:        inlineMaxBudget,
+		curFunc:       fn,
+		budget:        budget,
+		maxBudget:     budget,
 		extraCallCost: cc,
 	}
 	if visitor.tooHairy(fn) {
@@ -187,7 +292,7 @@
 	}
 
 	n.Func.Inl = &ir.Inline{
-		Cost: inlineMaxBudget - visitor.budget,
+		Cost: budget - visitor.budget,
 		Dcl:  pruneUnusedAutos(n.Defn.(*ir.Func).Dcl, &visitor),
 		Body: inlcopylist(fn.Body),
 
@@ -195,12 +300,12 @@
 	}
 
 	if base.Flag.LowerM > 1 {
-		fmt.Printf("%v: can inline %v with cost %d as: %v { %v }\n", ir.Line(fn), n, inlineMaxBudget-visitor.budget, fn.Type(), ir.Nodes(n.Func.Inl.Body))
+		fmt.Printf("%v: can inline %v with cost %d as: %v { %v }\n", ir.Line(fn), n, budget-visitor.budget, fn.Type(), ir.Nodes(n.Func.Inl.Body))
 	} else if base.Flag.LowerM != 0 {
 		fmt.Printf("%v: can inline %v\n", ir.Line(fn), n)
 	}
 	if logopt.Enabled() {
-		logopt.LogOpt(fn.Pos(), "canInlineFunction", "inline", ir.FuncName(fn), fmt.Sprintf("cost: %d", inlineMaxBudget-visitor.budget))
+		logopt.LogOpt(fn.Pos(), "canInlineFunction", "inline", ir.FuncName(fn), fmt.Sprintf("cost: %d", budget-visitor.budget))
 	}
 }
 
@@ -239,7 +344,10 @@
 // hairyVisitor visits a function body to determine its inlining
 // hairiness and whether or not it can be inlined.
 type hairyVisitor struct {
+	// This is needed to access the current caller in the doNode function.
+	curFunc       *ir.Func
 	budget        int32
+	maxBudget     int32
 	reason        string
 	extraCallCost int32
 	usedLocals    ir.NameSet
@@ -252,7 +360,7 @@
 		return true
 	}
 	if v.budget < 0 {
-		v.reason = fmt.Sprintf("function too complex: cost %d exceeds budget %d", inlineMaxBudget-v.budget, inlineMaxBudget)
+		v.reason = fmt.Sprintf("function too complex: cost %d exceeds budget %d", v.maxBudget-v.budget, v.maxBudget)
 		return true
 	}
 	return false
@@ -330,6 +438,20 @@
 			}
 		}
 
+		// Determine if the callee edge is a for hot callee or not.
+		if base.Flag.PgoProfile != "" && pgo.WeightedCG != nil && v.curFunc != nil {
+			if fn := inlCallee(n.X); fn != nil && typecheck.HaveInlineBody(fn) {
+				ln := pgo.ConvertLine2Int(ir.Line(n))
+				csi := pgo.CallSiteInfo{Line: ln, Caller: v.curFunc, Callee: fn}
+				if _, o := candHotEdgeMap[csi]; o {
+					pgo.ListOfHotCallSites[pgo.CallSiteInfo{Line: ln, Caller: v.curFunc}] = struct{}{}
+					if base.Debug.PGOInline > 0 {
+						fmt.Printf("hot-callsite identified at line=%v for func=%v\n", ir.Line(n), ir.PkgFuncName(v.curFunc))
+					}
+				}
+			}
+		}
+
 		if ir.IsIntrinsicCall(n) {
 			// Treat like any other node.
 			break
@@ -750,13 +872,29 @@
 		return n
 	}
 	if fn.Inl.Cost > maxCost {
-		// The inlined function body is too big. Typically we use this check to restrict
-		// inlining into very big functions.  See issue 26546 and 17566.
-		if logopt.Enabled() {
-			logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(ir.CurFunc),
-				fmt.Sprintf("cost %d of %s exceeds max large caller cost %d", fn.Inl.Cost, ir.PkgFuncName(fn), maxCost))
+		// If the callsite is hot and it is under the inlineHotMaxBudget budget, then try to inline it, or else bail.
+		ln := pgo.ConvertLine2Int(ir.Line(n))
+		csi := pgo.CallSiteInfo{Line: ln, Caller: ir.CurFunc}
+		if _, ok := pgo.ListOfHotCallSites[csi]; ok {
+			if fn.Inl.Cost > inlineHotMaxBudget {
+				if logopt.Enabled() {
+					logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(ir.CurFunc),
+						fmt.Sprintf("cost %d of %s exceeds max large caller cost %d", fn.Inl.Cost, ir.PkgFuncName(fn), inlineHotMaxBudget))
+				}
+				return n
+			}
+			if base.Debug.PGOInline > 0 {
+				fmt.Printf("hot-budget check allows inlining for callsite at %v\n", ir.Line(n))
+			}
+		} else {
+			// The inlined function body is too big. Typically we use this check to restrict
+			// inlining into very big functions.  See issue 26546 and 17566.
+			if logopt.Enabled() {
+				logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(ir.CurFunc),
+					fmt.Sprintf("cost %d of %s exceeds max large caller cost %d", fn.Inl.Cost, ir.PkgFuncName(fn), maxCost))
+			}
+			return n
 		}
-		return n
 	}
 
 	if fn == ir.CurFunc {
@@ -899,7 +1037,16 @@
 		fmt.Printf("%v: Before inlining: %+v\n", ir.Line(n), n)
 	}
 
+	if base.Debug.PGOInline > 0 {
+		ln := pgo.ConvertLine2Int(ir.Line(n))
+		csi := pgo.CallSiteInfo{Line: ln, Caller: ir.CurFunc}
+		if _, ok := inlinedCallSites[csi]; !ok {
+			inlinedCallSites[csi] = struct{}{}
+		}
+	}
+
 	res := InlineCall(n, fn, inlIndex)
+
 	if res == nil {
 		base.FatalfAt(n.Pos(), "inlining call to %v failed", fn)
 	}
diff --git a/src/cmd/compile/internal/pgo/graph.go b/src/cmd/compile/internal/pgo/graph.go
new file mode 100644
index 0000000..d7b9432
--- /dev/null
+++ b/src/cmd/compile/internal/pgo/graph.go
@@ -0,0 +1,1033 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package graph collects a set of samples into a directed graph.
+
+// Original file location: https://github.com/google/pprof/tree/main/internal/graph/graph.go
+package pgo
+
+import (
+	"fmt"
+	"internal/profile"
+	"math"
+	"path/filepath"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+const maxNodelets = 4 // Number of nodelets for labels (both numeric and non)
+
+// Options encodes the options for constructing a graph
+type Options struct {
+	SampleValue       func(s []int64) int64      // Function to compute the value of a sample
+	SampleMeanDivisor func(s []int64) int64      // Function to compute the divisor for mean graphs, or nil
+	FormatTag         func(int64, string) string // Function to format a sample tag value into a string
+	ObjNames          bool                       // Always preserve obj filename
+	OrigFnNames       bool                       // Preserve original (eg mangled) function names
+
+	CallTree     bool // Build a tree instead of a graph
+	DropNegative bool // Drop nodes with overall negative values
+
+	KeptNodes NodeSet // If non-nil, only use nodes in this set
+}
+
+// Nodes is an ordered collection of graph nodes.
+type Nodes []*Node
+
+// Node is an entry on a profiling report. It represents a unique
+// program location.
+type Node struct {
+	// Info describes the source location associated to this node.
+	Info NodeInfo
+
+	// Function represents the function that this node belongs to. On
+	// graphs with sub-function resolution (eg line number or
+	// addresses), two nodes in a NodeMap that are part of the same
+	// function have the same value of Node.Function. If the Node
+	// represents the whole function, it points back to itself.
+	Function *Node
+
+	// Values associated to this node. Flat is exclusive to this node,
+	// Cum includes all descendents.
+	Flat, FlatDiv, Cum, CumDiv int64
+
+	// In and out Contains the nodes immediately reaching or reached by
+	// this node.
+	In, Out EdgeMap
+
+	// LabelTags provide additional information about subsets of a sample.
+	LabelTags TagMap
+
+	// NumericTags provide additional values for subsets of a sample.
+	// Numeric tags are optionally associated to a label tag. The key
+	// for NumericTags is the name of the LabelTag they are associated
+	// to, or "" for numeric tags not associated to a label tag.
+	NumericTags map[string]TagMap
+}
+
+// Graph summarizes a performance profile into a format that is
+// suitable for visualization.
+type Graph struct {
+	Nodes Nodes
+}
+
+// FlatValue returns the exclusive value for this node, computing the
+// mean if a divisor is available.
+func (n *Node) FlatValue() int64 {
+	if n.FlatDiv == 0 {
+		return n.Flat
+	}
+	return n.Flat / n.FlatDiv
+}
+
+// CumValue returns the inclusive value for this node, computing the
+// mean if a divisor is available.
+func (n *Node) CumValue() int64 {
+	if n.CumDiv == 0 {
+		return n.Cum
+	}
+	return n.Cum / n.CumDiv
+}
+
+// AddToEdge increases the weight of an edge between two nodes. If
+// there isn't such an edge one is created.
+func (n *Node) AddToEdge(to *Node, v int64, residual, inline bool) {
+	n.AddToEdgeDiv(to, 0, v, residual, inline)
+}
+
+// AddToEdgeDiv increases the weight of an edge between two nodes. If
+// there isn't such an edge one is created.
+func (n *Node) AddToEdgeDiv(to *Node, dv, v int64, residual, inline bool) {
+	if n.Out[to] != to.In[n] {
+		panic(fmt.Errorf("asymmetric edges %v %v", *n, *to))
+	}
+
+	if e := n.Out[to]; e != nil {
+		e.WeightDiv += dv
+		e.Weight += v
+		if residual {
+			e.Residual = true
+		}
+		if !inline {
+			e.Inline = false
+		}
+		return
+	}
+
+	info := &Edge{Src: n, Dest: to, WeightDiv: dv, Weight: v, Residual: residual, Inline: inline}
+	n.Out[to] = info
+	to.In[n] = info
+}
+
+// NodeInfo contains the attributes for a node.
+type NodeInfo struct {
+	Name              string
+	OrigName          string
+	Address           uint64
+	File              string
+	StartLine, Lineno int
+	Objfile           string
+}
+
+// PrintableName calls the Node's Formatter function with a single space separator.
+func (i *NodeInfo) PrintableName() string {
+	return strings.Join(i.NameComponents(), " ")
+}
+
+// NameComponents returns the components of the printable name to be used for a node.
+func (i *NodeInfo) NameComponents() []string {
+	var name []string
+	if i.Address != 0 {
+		name = append(name, fmt.Sprintf("%016x", i.Address))
+	}
+	if fun := i.Name; fun != "" {
+		name = append(name, fun)
+	}
+
+	switch {
+	case i.Lineno != 0:
+		// User requested line numbers, provide what we have.
+		name = append(name, fmt.Sprintf("%s:%d", i.File, i.Lineno))
+	case i.File != "":
+		// User requested file name, provide it.
+		name = append(name, i.File)
+	case i.Name != "":
+		// User requested function name. It was already included.
+	case i.Objfile != "":
+		// Only binary name is available
+		name = append(name, "["+filepath.Base(i.Objfile)+"]")
+	default:
+		// Do not leave it empty if there is no information at all.
+		name = append(name, "<unknown>")
+	}
+	return name
+}
+
+// NodeMap maps from a node info struct to a node. It is used to merge
+// report entries with the same info.
+type NodeMap map[NodeInfo]*Node
+
+// NodeSet is a collection of node info structs.
+type NodeSet map[NodeInfo]bool
+
+// NodePtrSet is a collection of nodes. Trimming a graph or tree requires a set
+// of objects which uniquely identify the nodes to keep. In a graph, NodeInfo
+// works as a unique identifier; however, in a tree multiple nodes may share
+// identical NodeInfos. A *Node does uniquely identify a node so we can use that
+// instead. Though a *Node also uniquely identifies a node in a graph,
+// currently, during trimming, graphs are rebuilt from scratch using only the
+// NodeSet, so there would not be the required context of the initial graph to
+// allow for the use of *Node.
+type NodePtrSet map[*Node]bool
+
+// FindOrInsertNode takes the info for a node and either returns a matching node
+// from the node map if one exists, or adds one to the map if one does not.
+// If kept is non-nil, nodes are only added if they can be located on it.
+func (nm NodeMap) FindOrInsertNode(info NodeInfo, kept NodeSet) *Node {
+	if kept != nil {
+		if _, ok := kept[info]; !ok {
+			return nil
+		}
+	}
+
+	if n, ok := nm[info]; ok {
+		return n
+	}
+
+	n := &Node{
+		Info:        info,
+		In:          make(EdgeMap),
+		Out:         make(EdgeMap),
+		LabelTags:   make(TagMap),
+		NumericTags: make(map[string]TagMap),
+	}
+	nm[info] = n
+	if info.Address == 0 && info.Lineno == 0 {
+		// This node represents the whole function, so point Function
+		// back to itself.
+		n.Function = n
+		return n
+	}
+	// Find a node that represents the whole function.
+	info.Address = 0
+	info.Lineno = 0
+	n.Function = nm.FindOrInsertNode(info, nil)
+	return n
+}
+
+// EdgeMap is used to represent the incoming/outgoing edges from a node.
+type EdgeMap map[*Node]*Edge
+
+// Edge contains any attributes to be represented about edges in a graph.
+type Edge struct {
+	Src, Dest *Node
+	// The summary weight of the edge
+	Weight, WeightDiv int64
+
+	// residual edges connect nodes that were connected through a
+	// separate node, which has been removed from the report.
+	Residual bool
+	// An inline edge represents a call that was inlined into the caller.
+	Inline bool
+}
+
+// WeightValue returns the weight value for this edge, normalizing if a
+// divisor is available.
+func (e *Edge) WeightValue() int64 {
+	if e.WeightDiv == 0 {
+		return e.Weight
+	}
+	return e.Weight / e.WeightDiv
+}
+
+// Tag represent sample annotations
+type Tag struct {
+	Name          string
+	Unit          string // Describe the value, "" for non-numeric tags
+	Value         int64
+	Flat, FlatDiv int64
+	Cum, CumDiv   int64
+}
+
+// FlatValue returns the exclusive value for this tag, computing the
+// mean if a divisor is available.
+func (t *Tag) FlatValue() int64 {
+	if t.FlatDiv == 0 {
+		return t.Flat
+	}
+	return t.Flat / t.FlatDiv
+}
+
+// CumValue returns the inclusive value for this tag, computing the
+// mean if a divisor is available.
+func (t *Tag) CumValue() int64 {
+	if t.CumDiv == 0 {
+		return t.Cum
+	}
+	return t.Cum / t.CumDiv
+}
+
+// TagMap is a collection of tags, classified by their name.
+type TagMap map[string]*Tag
+
+// SortTags sorts a slice of tags based on their weight.
+func SortTags(t []*Tag, flat bool) []*Tag {
+	ts := tags{t, flat}
+	sort.Sort(ts)
+	return ts.t
+}
+
+// New summarizes performance data from a profile into a graph.
+func New(prof *profile.Profile, o *Options) *Graph {
+	if o.CallTree {
+		return newTree(prof, o)
+	}
+	g, _ := newGraph(prof, o)
+	return g
+}
+
+// newGraph computes a graph from a profile. It returns the graph, and
+// a map from the profile location indices to the corresponding graph
+// nodes.
+func newGraph(prof *profile.Profile, o *Options) (*Graph, map[uint64]Nodes) {
+	nodes, locationMap := CreateNodes(prof, o)
+	seenNode := make(map[*Node]bool)
+	seenEdge := make(map[nodePair]bool)
+	for _, sample := range prof.Sample {
+		var w, dw int64
+		w = o.SampleValue(sample.Value)
+		if o.SampleMeanDivisor != nil {
+			dw = o.SampleMeanDivisor(sample.Value)
+		}
+		if dw == 0 && w == 0 {
+			continue
+		}
+		for k := range seenNode {
+			delete(seenNode, k)
+		}
+		for k := range seenEdge {
+			delete(seenEdge, k)
+		}
+		var parent *Node
+		// A residual edge goes over one or more nodes that were not kept.
+		residual := false
+
+		labels := joinLabels(sample)
+		// Group the sample frames, based on a global map.
+		for i := len(sample.Location) - 1; i >= 0; i-- {
+			l := sample.Location[i]
+			locNodes := locationMap[l.ID]
+			for ni := len(locNodes) - 1; ni >= 0; ni-- {
+				n := locNodes[ni]
+				if n == nil {
+					residual = true
+					continue
+				}
+				// Add cum weight to all nodes in stack, avoiding double counting.
+				if _, ok := seenNode[n]; !ok {
+					seenNode[n] = true
+					n.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, false)
+				}
+				// Update edge weights for all edges in stack, avoiding double counting.
+				if _, ok := seenEdge[nodePair{n, parent}]; !ok && parent != nil && n != parent {
+					seenEdge[nodePair{n, parent}] = true
+					parent.AddToEdgeDiv(n, dw, w, residual, ni != len(locNodes)-1)
+				}
+				parent = n
+				residual = false
+			}
+		}
+		if parent != nil && !residual {
+			// Add flat weight to leaf node.
+			parent.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, true)
+		}
+	}
+
+	return selectNodesForGraph(nodes, o.DropNegative), locationMap
+}
+
+func selectNodesForGraph(nodes Nodes, dropNegative bool) *Graph {
+	// Collect nodes into a graph.
+	gNodes := make(Nodes, 0, len(nodes))
+	for _, n := range nodes {
+		if n == nil {
+			continue
+		}
+		if n.Cum == 0 && n.Flat == 0 {
+			continue
+		}
+		if dropNegative && isNegative(n) {
+			continue
+		}
+		gNodes = append(gNodes, n)
+	}
+	return &Graph{gNodes}
+}
+
+type nodePair struct {
+	src, dest *Node
+}
+
+func newTree(prof *profile.Profile, o *Options) (g *Graph) {
+	parentNodeMap := make(map[*Node]NodeMap, len(prof.Sample))
+	for _, sample := range prof.Sample {
+		var w, dw int64
+		w = o.SampleValue(sample.Value)
+		if o.SampleMeanDivisor != nil {
+			dw = o.SampleMeanDivisor(sample.Value)
+		}
+		if dw == 0 && w == 0 {
+			continue
+		}
+		var parent *Node
+		labels := joinLabels(sample)
+		// Group the sample frames, based on a per-node map.
+		for i := len(sample.Location) - 1; i >= 0; i-- {
+			l := sample.Location[i]
+			lines := l.Line
+			if len(lines) == 0 {
+				lines = []profile.Line{{}} // Create empty line to include location info.
+			}
+			for lidx := len(lines) - 1; lidx >= 0; lidx-- {
+				nodeMap := parentNodeMap[parent]
+				if nodeMap == nil {
+					nodeMap = make(NodeMap)
+					parentNodeMap[parent] = nodeMap
+				}
+				n := nodeMap.findOrInsertLine(l, lines[lidx], o)
+				if n == nil {
+					continue
+				}
+				n.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, false)
+				if parent != nil {
+					parent.AddToEdgeDiv(n, dw, w, false, lidx != len(lines)-1)
+				}
+				parent = n
+			}
+		}
+		if parent != nil {
+			parent.addSample(dw, w, labels, sample.NumLabel, sample.NumUnit, o.FormatTag, true)
+		}
+	}
+
+	nodes := make(Nodes, len(prof.Location))
+	for _, nm := range parentNodeMap {
+		nodes = append(nodes, nm.nodes()...)
+	}
+	return selectNodesForGraph(nodes, o.DropNegative)
+}
+
+func joinLabels(s *profile.Sample) string {
+	if len(s.Label) == 0 {
+		return ""
+	}
+
+	var labels []string
+	for key, vals := range s.Label {
+		for _, v := range vals {
+			labels = append(labels, key+":"+v)
+		}
+	}
+	sort.Strings(labels)
+	return strings.Join(labels, `\n`)
+}
+
+// isNegative returns true if the node is considered as "negative" for the
+// purposes of drop_negative.
+func isNegative(n *Node) bool {
+	switch {
+	case n.Flat < 0:
+		return true
+	case n.Flat == 0 && n.Cum < 0:
+		return true
+	default:
+		return false
+	}
+}
+
+// CreateNodes creates graph nodes for all locations in a profile. It
+// returns set of all nodes, plus a mapping of each location to the
+// set of corresponding nodes (one per location.Line).
+func CreateNodes(prof *profile.Profile, o *Options) (Nodes, map[uint64]Nodes) {
+	locations := make(map[uint64]Nodes, len(prof.Location))
+	nm := make(NodeMap, len(prof.Location))
+	for _, l := range prof.Location {
+		lines := l.Line
+		if len(lines) == 0 {
+			lines = []profile.Line{{}} // Create empty line to include location info.
+		}
+		nodes := make(Nodes, len(lines))
+		for ln := range lines {
+			nodes[ln] = nm.findOrInsertLine(l, lines[ln], o)
+		}
+		locations[l.ID] = nodes
+	}
+	return nm.nodes(), locations
+}
+
+func (nm NodeMap) nodes() Nodes {
+	nodes := make(Nodes, 0, len(nm))
+	for _, n := range nm {
+		nodes = append(nodes, n)
+	}
+	return nodes
+}
+
+func (nm NodeMap) findOrInsertLine(l *profile.Location, li profile.Line, o *Options) *Node {
+	var objfile string
+	if m := l.Mapping; m != nil && m.File != "" {
+		objfile = m.File
+	}
+
+	if ni := nodeInfo(l, li, objfile, o); ni != nil {
+		return nm.FindOrInsertNode(*ni, o.KeptNodes)
+	}
+	return nil
+}
+
+func nodeInfo(l *profile.Location, line profile.Line, objfile string, o *Options) *NodeInfo {
+	if line.Function == nil {
+		return &NodeInfo{Address: l.Address, Objfile: objfile}
+	}
+	ni := &NodeInfo{
+		Address: l.Address,
+		Lineno:  int(line.Line),
+		Name:    line.Function.Name,
+	}
+	if fname := line.Function.Filename; fname != "" {
+		ni.File = filepath.Clean(fname)
+	}
+	if o.OrigFnNames {
+		ni.OrigName = line.Function.SystemName
+	}
+	if o.ObjNames || (ni.Name == "" && ni.OrigName == "") {
+		ni.Objfile = objfile
+		ni.StartLine = int(line.Function.StartLine)
+	}
+	return ni
+}
+
+type tags struct {
+	t    []*Tag
+	flat bool
+}
+
+func (t tags) Len() int      { return len(t.t) }
+func (t tags) Swap(i, j int) { t.t[i], t.t[j] = t.t[j], t.t[i] }
+func (t tags) Less(i, j int) bool {
+	if !t.flat {
+		if t.t[i].Cum != t.t[j].Cum {
+			return abs64(t.t[i].Cum) > abs64(t.t[j].Cum)
+		}
+	}
+	if t.t[i].Flat != t.t[j].Flat {
+		return abs64(t.t[i].Flat) > abs64(t.t[j].Flat)
+	}
+	return t.t[i].Name < t.t[j].Name
+}
+
+// Sum adds the flat and cum values of a set of nodes.
+func (ns Nodes) Sum() (flat int64, cum int64) {
+	for _, n := range ns {
+		flat += n.Flat
+		cum += n.Cum
+	}
+	return
+}
+
+func (n *Node) addSample(dw, w int64, labels string, numLabel map[string][]int64, numUnit map[string][]string, format func(int64, string) string, flat bool) {
+	// Update sample value
+	if flat {
+		n.FlatDiv += dw
+		n.Flat += w
+	} else {
+		n.CumDiv += dw
+		n.Cum += w
+	}
+
+	// Add string tags
+	if labels != "" {
+		t := n.LabelTags.findOrAddTag(labels, "", 0)
+		if flat {
+			t.FlatDiv += dw
+			t.Flat += w
+		} else {
+			t.CumDiv += dw
+			t.Cum += w
+		}
+	}
+
+	numericTags := n.NumericTags[labels]
+	if numericTags == nil {
+		numericTags = TagMap{}
+		n.NumericTags[labels] = numericTags
+	}
+	// Add numeric tags
+	if format == nil {
+		format = defaultLabelFormat
+	}
+	for k, nvals := range numLabel {
+		units := numUnit[k]
+		for i, v := range nvals {
+			var t *Tag
+			if len(units) > 0 {
+				t = numericTags.findOrAddTag(format(v, units[i]), units[i], v)
+			} else {
+				t = numericTags.findOrAddTag(format(v, k), k, v)
+			}
+			if flat {
+				t.FlatDiv += dw
+				t.Flat += w
+			} else {
+				t.CumDiv += dw
+				t.Cum += w
+			}
+		}
+	}
+}
+
+func defaultLabelFormat(v int64, key string) string {
+	return strconv.FormatInt(v, 10)
+}
+
+func (m TagMap) findOrAddTag(label, unit string, value int64) *Tag {
+	l := m[label]
+	if l == nil {
+		l = &Tag{
+			Name:  label,
+			Unit:  unit,
+			Value: value,
+		}
+		m[label] = l
+	}
+	return l
+}
+
+// String returns a text representation of a graph, for debugging purposes.
+func (g *Graph) String() string {
+	var s []string
+
+	nodeIndex := make(map[*Node]int, len(g.Nodes))
+
+	for i, n := range g.Nodes {
+		nodeIndex[n] = i + 1
+	}
+
+	for i, n := range g.Nodes {
+		name := n.Info.PrintableName()
+		var in, out []int
+
+		for _, from := range n.In {
+			in = append(in, nodeIndex[from.Src])
+		}
+		for _, to := range n.Out {
+			out = append(out, nodeIndex[to.Dest])
+		}
+		s = append(s, fmt.Sprintf("%d: %s[flat=%d cum=%d] %x -> %v ", i+1, name, n.Flat, n.Cum, in, out))
+	}
+	return strings.Join(s, "\n")
+}
+
+// DiscardLowFrequencyNodes returns a set of the nodes at or over a
+// specific cum value cutoff.
+func (g *Graph) DiscardLowFrequencyNodes(nodeCutoff int64) NodeSet {
+	return makeNodeSet(g.Nodes, nodeCutoff)
+}
+
+// DiscardLowFrequencyNodePtrs returns a NodePtrSet of nodes at or over a
+// specific cum value cutoff.
+func (g *Graph) DiscardLowFrequencyNodePtrs(nodeCutoff int64) NodePtrSet {
+	cutNodes := getNodesAboveCumCutoff(g.Nodes, nodeCutoff)
+	kept := make(NodePtrSet, len(cutNodes))
+	for _, n := range cutNodes {
+		kept[n] = true
+	}
+	return kept
+}
+
+func makeNodeSet(nodes Nodes, nodeCutoff int64) NodeSet {
+	cutNodes := getNodesAboveCumCutoff(nodes, nodeCutoff)
+	kept := make(NodeSet, len(cutNodes))
+	for _, n := range cutNodes {
+		kept[n.Info] = true
+	}
+	return kept
+}
+
+// getNodesAboveCumCutoff returns all the nodes which have a Cum value greater
+// than or equal to cutoff.
+func getNodesAboveCumCutoff(nodes Nodes, nodeCutoff int64) Nodes {
+	cutoffNodes := make(Nodes, 0, len(nodes))
+	for _, n := range nodes {
+		if abs64(n.Cum) < nodeCutoff {
+			continue
+		}
+		cutoffNodes = append(cutoffNodes, n)
+	}
+	return cutoffNodes
+}
+
+// TrimLowFrequencyTags removes tags that have less than
+// the specified weight.
+func (g *Graph) TrimLowFrequencyTags(tagCutoff int64) {
+	// Remove nodes with value <= total*nodeFraction
+	for _, n := range g.Nodes {
+		n.LabelTags = trimLowFreqTags(n.LabelTags, tagCutoff)
+		for s, nt := range n.NumericTags {
+			n.NumericTags[s] = trimLowFreqTags(nt, tagCutoff)
+		}
+	}
+}
+
+func trimLowFreqTags(tags TagMap, minValue int64) TagMap {
+	kept := TagMap{}
+	for s, t := range tags {
+		if abs64(t.Flat) >= minValue || abs64(t.Cum) >= minValue {
+			kept[s] = t
+		}
+	}
+	return kept
+}
+
+// TrimLowFrequencyEdges removes edges that have less than
+// the specified weight. Returns the number of edges removed
+func (g *Graph) TrimLowFrequencyEdges(edgeCutoff int64) int {
+	var droppedEdges int
+	for _, n := range g.Nodes {
+		for src, e := range n.In {
+			if abs64(e.Weight) < edgeCutoff {
+				delete(n.In, src)
+				delete(src.Out, n)
+				droppedEdges++
+			}
+		}
+	}
+	return droppedEdges
+}
+
+// SortNodes sorts the nodes in a graph based on a specific heuristic.
+func (g *Graph) SortNodes(cum bool, visualMode bool) {
+	// Sort nodes based on requested mode
+	switch {
+	case visualMode:
+		// Specialized sort to produce a more visually-interesting graph
+		g.Nodes.Sort(EntropyOrder)
+	case cum:
+		g.Nodes.Sort(CumNameOrder)
+	default:
+		g.Nodes.Sort(FlatNameOrder)
+	}
+}
+
+// SelectTopNodePtrs returns a set of the top maxNodes *Node in a graph.
+func (g *Graph) SelectTopNodePtrs(maxNodes int, visualMode bool) NodePtrSet {
+	set := make(NodePtrSet)
+	for _, node := range g.selectTopNodes(maxNodes, visualMode) {
+		set[node] = true
+	}
+	return set
+}
+
+// SelectTopNodes returns a set of the top maxNodes nodes in a graph.
+func (g *Graph) SelectTopNodes(maxNodes int, visualMode bool) NodeSet {
+	return makeNodeSet(g.selectTopNodes(maxNodes, visualMode), 0)
+}
+
+// selectTopNodes returns a slice of the top maxNodes nodes in a graph.
+func (g *Graph) selectTopNodes(maxNodes int, visualMode bool) Nodes {
+	if maxNodes > 0 {
+		if visualMode {
+			var count int
+			// If generating a visual graph, count tags as nodes. Update
+			// maxNodes to account for them.
+			for i, n := range g.Nodes {
+				tags := countTags(n)
+				if tags > maxNodelets {
+					tags = maxNodelets
+				}
+				if count += tags + 1; count >= maxNodes {
+					maxNodes = i + 1
+					break
+				}
+			}
+		}
+	}
+	if maxNodes > len(g.Nodes) {
+		maxNodes = len(g.Nodes)
+	}
+	return g.Nodes[:maxNodes]
+}
+
+// countTags counts the tags with flat count. This underestimates the
+// number of tags being displayed, but in practice is close enough.
+func countTags(n *Node) int {
+	count := 0
+	for _, e := range n.LabelTags {
+		if e.Flat != 0 {
+			count++
+		}
+	}
+	for _, t := range n.NumericTags {
+		for _, e := range t {
+			if e.Flat != 0 {
+				count++
+			}
+		}
+	}
+	return count
+}
+
+// nodeSorter is a mechanism used to allow a report to be sorted
+// in different ways.
+type nodeSorter struct {
+	rs   Nodes
+	less func(l, r *Node) bool
+}
+
+func (s nodeSorter) Len() int           { return len(s.rs) }
+func (s nodeSorter) Swap(i, j int)      { s.rs[i], s.rs[j] = s.rs[j], s.rs[i] }
+func (s nodeSorter) Less(i, j int) bool { return s.less(s.rs[i], s.rs[j]) }
+
+// Sort reorders a slice of nodes based on the specified ordering
+// criteria. The result is sorted in decreasing order for (absolute)
+// numeric quantities, alphabetically for text, and increasing for
+// addresses.
+func (ns Nodes) Sort(o NodeOrder) error {
+	var s nodeSorter
+
+	switch o {
+	case FlatNameOrder:
+		s = nodeSorter{ns,
+			func(l, r *Node) bool {
+				if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv {
+					return iv > jv
+				}
+				if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv {
+					return iv < jv
+				}
+				if iv, jv := abs64(l.Cum), abs64(r.Cum); iv != jv {
+					return iv > jv
+				}
+				return compareNodes(l, r)
+			},
+		}
+	case FlatCumNameOrder:
+		s = nodeSorter{ns,
+			func(l, r *Node) bool {
+				if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv {
+					return iv > jv
+				}
+				if iv, jv := abs64(l.Cum), abs64(r.Cum); iv != jv {
+					return iv > jv
+				}
+				if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv {
+					return iv < jv
+				}
+				return compareNodes(l, r)
+			},
+		}
+	case NameOrder:
+		s = nodeSorter{ns,
+			func(l, r *Node) bool {
+				if iv, jv := l.Info.Name, r.Info.Name; iv != jv {
+					return iv < jv
+				}
+				return compareNodes(l, r)
+			},
+		}
+	case FileOrder:
+		s = nodeSorter{ns,
+			func(l, r *Node) bool {
+				if iv, jv := l.Info.File, r.Info.File; iv != jv {
+					return iv < jv
+				}
+				if iv, jv := l.Info.StartLine, r.Info.StartLine; iv != jv {
+					return iv < jv
+				}
+				return compareNodes(l, r)
+			},
+		}
+	case AddressOrder:
+		s = nodeSorter{ns,
+			func(l, r *Node) bool {
+				if iv, jv := l.Info.Address, r.Info.Address; iv != jv {
+					return iv < jv
+				}
+				return compareNodes(l, r)
+			},
+		}
+	case CumNameOrder, EntropyOrder:
+		// Hold scoring for score-based ordering
+		var score map[*Node]int64
+		scoreOrder := func(l, r *Node) bool {
+			if iv, jv := abs64(score[l]), abs64(score[r]); iv != jv {
+				return iv > jv
+			}
+			if iv, jv := l.Info.PrintableName(), r.Info.PrintableName(); iv != jv {
+				return iv < jv
+			}
+			if iv, jv := abs64(l.Flat), abs64(r.Flat); iv != jv {
+				return iv > jv
+			}
+			return compareNodes(l, r)
+		}
+
+		switch o {
+		case CumNameOrder:
+			score = make(map[*Node]int64, len(ns))
+			for _, n := range ns {
+				score[n] = n.Cum
+			}
+			s = nodeSorter{ns, scoreOrder}
+		case EntropyOrder:
+			score = make(map[*Node]int64, len(ns))
+			for _, n := range ns {
+				score[n] = entropyScore(n)
+			}
+			s = nodeSorter{ns, scoreOrder}
+		}
+	default:
+		return fmt.Errorf("report: unrecognized sort ordering: %d", o)
+	}
+	sort.Sort(s)
+	return nil
+}
+
+// compareNodes compares two nodes to provide a deterministic ordering
+// between them. Two nodes cannot have the same Node.Info value.
+func compareNodes(l, r *Node) bool {
+	return fmt.Sprint(l.Info) < fmt.Sprint(r.Info)
+}
+
+// entropyScore computes a score for a node representing how important
+// it is to include this node on a graph visualization. It is used to
+// sort the nodes and select which ones to display if we have more
+// nodes than desired in the graph. This number is computed by looking
+// at the flat and cum weights of the node and the incoming/outgoing
+// edges. The fundamental idea is to penalize nodes that have a simple
+// fallthrough from their incoming to the outgoing edge.
+func entropyScore(n *Node) int64 {
+	score := float64(0)
+
+	if len(n.In) == 0 {
+		score++ // Favor entry nodes
+	} else {
+		score += edgeEntropyScore(n, n.In, 0)
+	}
+
+	if len(n.Out) == 0 {
+		score++ // Favor leaf nodes
+	} else {
+		score += edgeEntropyScore(n, n.Out, n.Flat)
+	}
+
+	return int64(score*float64(n.Cum)) + n.Flat
+}
+
+// edgeEntropyScore computes the entropy value for a set of edges
+// coming in or out of a node. Entropy (as defined in information
+// theory) refers to the amount of information encoded by the set of
+// edges. A set of edges that have a more interesting distribution of
+// samples gets a higher score.
+func edgeEntropyScore(n *Node, edges EdgeMap, self int64) float64 {
+	score := float64(0)
+	total := self
+	for _, e := range edges {
+		if e.Weight > 0 {
+			total += abs64(e.Weight)
+		}
+	}
+	if total != 0 {
+		for _, e := range edges {
+			frac := float64(abs64(e.Weight)) / float64(total)
+			score += -frac * math.Log2(frac)
+		}
+		if self > 0 {
+			frac := float64(abs64(self)) / float64(total)
+			score += -frac * math.Log2(frac)
+		}
+	}
+	return score
+}
+
+// NodeOrder sets the ordering for a Sort operation
+type NodeOrder int
+
+// Sorting options for node sort.
+const (
+	FlatNameOrder NodeOrder = iota
+	FlatCumNameOrder
+	CumNameOrder
+	NameOrder
+	FileOrder
+	AddressOrder
+	EntropyOrder
+)
+
+// Sort returns a slice of the edges in the map, in a consistent
+// order. The sort order is first based on the edge weight
+// (higher-to-lower) and then by the node names to avoid flakiness.
+func (e EdgeMap) Sort() []*Edge {
+	el := make(edgeList, 0, len(e))
+	for _, w := range e {
+		el = append(el, w)
+	}
+
+	sort.Sort(el)
+	return el
+}
+
+// Sum returns the total weight for a set of nodes.
+func (e EdgeMap) Sum() int64 {
+	var ret int64
+	for _, edge := range e {
+		ret += edge.Weight
+	}
+	return ret
+}
+
+type edgeList []*Edge
+
+func (el edgeList) Len() int {
+	return len(el)
+}
+
+func (el edgeList) Less(i, j int) bool {
+	if el[i].Weight != el[j].Weight {
+		return abs64(el[i].Weight) > abs64(el[j].Weight)
+	}
+
+	from1 := el[i].Src.Info.PrintableName()
+	from2 := el[j].Src.Info.PrintableName()
+	if from1 != from2 {
+		return from1 < from2
+	}
+
+	to1 := el[i].Dest.Info.PrintableName()
+	to2 := el[j].Dest.Info.PrintableName()
+
+	return to1 < to2
+}
+
+func (el edgeList) Swap(i, j int) {
+	el[i], el[j] = el[j], el[i]
+}
+
+func abs64(i int64) int64 {
+	if i < 0 {
+		return -i
+	}
+	return i
+}
diff --git a/src/cmd/compile/internal/pgo/irgraph.go b/src/cmd/compile/internal/pgo/irgraph.go
new file mode 100644
index 0000000..9823882
--- /dev/null
+++ b/src/cmd/compile/internal/pgo/irgraph.go
@@ -0,0 +1,480 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// WORK IN PROGRESS
+
+package pgo
+
+import (
+	"cmd/compile/internal/ir"
+	"cmd/compile/internal/typecheck"
+	"cmd/compile/internal/types"
+	"fmt"
+	"internal/profile"
+	"log"
+	"os"
+	"strconv"
+	"strings"
+)
+
+// IRGraph is the key datastrcture that is built from profile. It is essentially a call graph with nodes pointing to IRs of functions and edges carrying weights and callsite information. The graph is bidirectional that helps in removing nodes efficiently.
+type IRGraph struct {
+	// Nodes of the graph
+	IRNodes  map[string]*IRNode
+	OutEdges IREdgeMap
+	InEdges  IREdgeMap
+}
+
+// IRNode represents a node in the IRGraph.
+type IRNode struct {
+	// Pointer to the IR of the Function represented by this node.
+	AST *ir.Func
+	// Flat weight of the IRNode, obtained from profile.
+	Flat int64
+	// Cumulative weight of the IRNode.
+	Cum int64
+}
+
+// IREdgeMap maps an IRNode to its successors.
+type IREdgeMap map[*IRNode][]*IREdge
+
+// IREdge represents a call edge in the IRGraph with source, destination, weight, callsite, and line number information.
+type IREdge struct {
+	// Source and destination of the edge in IRNode.
+	Src, Dst *IRNode
+	Weight   int64
+	CallSite int
+}
+
+// NodeMapKey represents a hash key to identify unique call-edges in profile and in IR. Used for deduplication of call edges found in profile.
+type NodeMapKey struct {
+	CallerName string
+	CalleeName string
+	CallSite   int
+}
+
+// Weights capture both node weight and edge weight.
+type Weights struct {
+	NFlat   int64
+	NCum    int64
+	EWeight int64
+}
+
+// CallSiteInfo captures call-site information and its caller/callee.
+type CallSiteInfo struct {
+	Line   int
+	Caller *ir.Func
+	Callee *ir.Func
+}
+
+var (
+	// Aggregated NodeWeights and EdgeWeights across profiles. This helps us determine the percentage threshold for hot/cold partitioning.
+	GlobalTotalNodeWeight = int64(0)
+	GlobalTotalEdgeWeight = int64(0)
+
+	// Global node and their aggregated weight information.
+	GlobalNodeMap = make(map[NodeMapKey]*Weights)
+
+	// WeightedCG represents the IRGraph built from profile, which we will update as part of inlining.
+	WeightedCG *IRGraph
+
+	// Original profile-graph.
+	ProfileGraph *Graph
+
+	// Per-caller data structure to track the list of hot call sites. This gets rewritten every caller leaving it to GC for cleanup.
+	ListOfHotCallSites = make(map[CallSiteInfo]struct{})
+)
+
+// BuildProfileGraph generates a profile-graph from the profile.
+func BuildProfileGraph(profileFile string) {
+
+	// if possible, we should cache the profile-graph.
+	if ProfileGraph != nil {
+		return
+	}
+
+	// open the profile file.
+	f, err := os.Open(profileFile)
+	if err != nil {
+		log.Fatal("failed to open file " + profileFile)
+		return
+	}
+	defer f.Close()
+	p, err := profile.Parse(f)
+	if err != nil {
+		log.Fatal("failed to Parse profile file.")
+		return
+	}
+	// Build the options.
+	opt := &Options{
+		CallTree:    false,
+		SampleValue: func(v []int64) int64 { return v[1] },
+	}
+	// Build the graph using profile package.
+	ProfileGraph = New(p, opt)
+
+	// Build various global maps from profile.
+	preprocessProfileGraph()
+
+}
+
+// BuildWeightedCallGraph generates a weighted callgraph from the profile for the current package.
+func BuildWeightedCallGraph() {
+
+	// Bail if there is no profile-graph available.
+	if ProfileGraph == nil {
+		return
+	}
+
+	// Create package-level call graph with weights from profile and IR.
+	WeightedCG = createIRGraph()
+}
+
+// ConvertLine2Int converts ir.Line string to integer.
+func ConvertLine2Int(line string) int {
+	splits := strings.Split(line, ":")
+	cs, _ := strconv.ParseInt(splits[len(splits)-2], 0, 64)
+	return int(cs)
+}
+
+// preprocessProfileGraph builds various maps from the profile-graph. It builds GlobalNodeMap and other information based on the name and callsite to compute node and edge weights which will be used later on to create edges for WeightedCG.
+func preprocessProfileGraph() {
+	nFlat := make(map[string]int64)
+	nCum := make(map[string]int64)
+
+	// Accummulate weights for the same node.
+	for _, n := range ProfileGraph.Nodes {
+		canonicalName := n.Info.Name
+		nFlat[canonicalName] += n.FlatValue()
+		nCum[canonicalName] += n.CumValue()
+	}
+
+	// Process ProfileGraph and build various node and edge maps which will be consumed by AST walk.
+	for _, n := range ProfileGraph.Nodes {
+		GlobalTotalNodeWeight += n.FlatValue()
+		canonicalName := n.Info.Name
+		// Create the key to the NodeMapKey.
+		nodeinfo := NodeMapKey{
+			CallerName: canonicalName,
+			CallSite:   n.Info.Lineno,
+		}
+
+		for _, e := range n.Out {
+			GlobalTotalEdgeWeight += e.WeightValue()
+			nodeinfo.CalleeName = e.Dest.Info.Name
+			if w, ok := GlobalNodeMap[nodeinfo]; ok {
+				w.EWeight += e.WeightValue()
+			} else {
+				weights := new(Weights)
+				weights.NFlat = nFlat[canonicalName]
+				weights.NCum = nCum[canonicalName]
+				weights.EWeight = e.WeightValue()
+				GlobalNodeMap[nodeinfo] = weights
+			}
+		}
+	}
+}
+
+// createIRGraph builds the IRGraph by visting all the ir.Func in decl list of a package.
+func createIRGraph() *IRGraph {
+	var g IRGraph
+	// Bottomup walk over the function to create IRGraph.
+	ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+		for _, n := range list {
+			g.Visit(n, recursive)
+		}
+	})
+	return &g
+}
+
+// Visit traverses the body of each ir.Func and use GlobalNodeMap to determine if we need to add an edge from ir.Func and any node in the ir.Func body.
+func (g *IRGraph) Visit(fn *ir.Func, recursive bool) {
+	if g.IRNodes == nil {
+		g.IRNodes = make(map[string]*IRNode)
+	}
+	if g.OutEdges == nil {
+		g.OutEdges = make(map[*IRNode][]*IREdge)
+	}
+	if g.InEdges == nil {
+		g.InEdges = make(map[*IRNode][]*IREdge)
+	}
+	name := ir.PkgFuncName(fn)
+	node := new(IRNode)
+	node.AST = fn
+	if g.IRNodes[name] == nil {
+		g.IRNodes[name] = node
+	}
+	// Create the key for the NodeMapKey.
+	nodeinfo := NodeMapKey{
+		CallerName: name,
+		CalleeName: "",
+		CallSite:   -1,
+	}
+	// If the node exists, then update its node weight.
+	if weights, ok := GlobalNodeMap[nodeinfo]; ok {
+		g.IRNodes[name].Flat = weights.NFlat
+		g.IRNodes[name].Cum = weights.NCum
+	}
+
+	// Recursively walk over the body of the function to create IRGraph edges.
+	g.createIRGraphEdge(fn, g.IRNodes[name], name)
+}
+
+// addEdge adds an edge between caller and new node that points to `callee` based on the profile-graph and GlobalNodeMap.
+func (g *IRGraph) addEdge(caller *IRNode, callee *ir.Func, n *ir.Node, callername string, line int) {
+
+	// Create an IRNode for the callee.
+	calleenode := new(IRNode)
+	calleenode.AST = callee
+	calleename := ir.PkgFuncName(callee)
+
+	// Create key for NodeMapKey.
+	nodeinfo := NodeMapKey{
+		CallerName: callername,
+		CalleeName: calleename,
+		CallSite:   line,
+	}
+
+	// Create the callee node with node weight.
+	if g.IRNodes[calleename] == nil {
+		g.IRNodes[calleename] = calleenode
+		nodeinfo2 := NodeMapKey{
+			CallerName: calleename,
+			CalleeName: "",
+			CallSite:   -1,
+		}
+		if weights, ok := GlobalNodeMap[nodeinfo2]; ok {
+			g.IRNodes[calleename].Flat = weights.NFlat
+			g.IRNodes[calleename].Cum = weights.NCum
+		}
+	}
+
+	if weights, ok := GlobalNodeMap[nodeinfo]; ok {
+		caller.Flat = weights.NFlat
+		caller.Cum = weights.NCum
+
+		// Add edge in the IRGraph from caller to callee.
+		info := &IREdge{Src: caller, Dst: g.IRNodes[calleename], Weight: weights.EWeight, CallSite: line}
+		g.OutEdges[caller] = append(g.OutEdges[caller], info)
+		g.InEdges[g.IRNodes[calleename]] = append(g.InEdges[g.IRNodes[calleename]], info)
+	} else {
+		nodeinfo.CalleeName = ""
+		nodeinfo.CallSite = -1
+		if weights, ok := GlobalNodeMap[nodeinfo]; ok {
+			caller.Flat = weights.NFlat
+			caller.Cum = weights.NCum
+			info := &IREdge{Src: caller, Dst: g.IRNodes[calleename], Weight: 0, CallSite: line}
+			g.OutEdges[caller] = append(g.OutEdges[caller], info)
+			g.InEdges[g.IRNodes[calleename]] = append(g.InEdges[g.IRNodes[calleename]], info)
+		} else {
+			info := &IREdge{Src: caller, Dst: g.IRNodes[calleename], Weight: 0, CallSite: line}
+			g.OutEdges[caller] = append(g.OutEdges[caller], info)
+			g.InEdges[g.IRNodes[calleename]] = append(g.InEdges[g.IRNodes[calleename]], info)
+		}
+	}
+}
+
+// createIRGraphEdge traverses the nodes in the body of ir.Func and add edges between callernode which points to the ir.Func and the nodes in the body.
+func (g *IRGraph) createIRGraphEdge(fn *ir.Func, callernode *IRNode, name string) {
+	var doNode func(ir.Node) bool
+	doNode = func(n ir.Node) bool {
+		switch n.Op() {
+		default:
+			ir.DoChildren(n, doNode)
+		case ir.OCALLFUNC:
+			call := n.(*ir.CallExpr)
+			line := ConvertLine2Int(ir.Line(n))
+			// Find the callee function from the call site and add the edge.
+			f := inlCallee(call.X)
+			if f != nil {
+				g.addEdge(callernode, f, &n, name, line)
+			}
+		case ir.OCALLMETH:
+			call := n.(*ir.CallExpr)
+			// Find the callee method from the call site and add the edge.
+			fn2 := ir.MethodExprName(call.X).Func
+			line := ConvertLine2Int(ir.Line(n))
+			g.addEdge(callernode, fn2, &n, name, line)
+		}
+		return false
+	}
+	doNode(fn)
+}
+
+// WeightInPercentage converts profile weights to a percentage.
+func WeightInPercentage(value int64, total int64) float64 {
+	var ratio float64
+	if total != 0 {
+		ratio = (float64(value) / float64(total)) * 100
+	}
+	return ratio
+}
+
+// PrintWeightedCallGraphDOT prints IRGraph in DOT format.
+func PrintWeightedCallGraphDOT(nodeThreshold float64, edgeThreshold float64) {
+	fmt.Printf("\ndigraph G {\n")
+	fmt.Printf("forcelabels=true;\n")
+
+	// List of functions in this package.
+	funcs := make(map[string]struct{})
+	ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+		for _, f := range list {
+			name := ir.PkgFuncName(f)
+			funcs[name] = struct{}{}
+		}
+	})
+
+	// Determine nodes of DOT.
+	nodes := make(map[string]*ir.Func)
+	for name, _ := range funcs {
+		if n, ok := WeightedCG.IRNodes[name]; ok {
+			for _, e := range WeightedCG.OutEdges[n] {
+				if _, ok := nodes[ir.PkgFuncName(e.Src.AST)]; !ok {
+					nodes[ir.PkgFuncName(e.Src.AST)] = e.Src.AST
+				}
+				if _, ok := nodes[ir.PkgFuncName(e.Dst.AST)]; !ok {
+					nodes[ir.PkgFuncName(e.Dst.AST)] = e.Dst.AST
+				}
+			}
+			if _, ok := nodes[ir.PkgFuncName(n.AST)]; !ok {
+				nodes[ir.PkgFuncName(n.AST)] = n.AST
+			}
+		}
+	}
+
+	// Print nodes.
+	for name, ast := range nodes {
+		if n, ok := WeightedCG.IRNodes[name]; ok {
+			nodeweight := WeightInPercentage(n.Flat, GlobalTotalNodeWeight)
+			color := "black"
+			if nodeweight > nodeThreshold {
+				color = "red"
+			}
+			if ast.Inl != nil {
+				fmt.Printf("\"%v\" [color=%v,label=\"%v,freq=%.2f,inl_cost=%d\"];\n", ir.PkgFuncName(ast), color, ir.PkgFuncName(ast), nodeweight, ast.Inl.Cost)
+			} else {
+				fmt.Printf("\"%v\" [color=%v, label=\"%v,freq=%.2f\"];\n", ir.PkgFuncName(ast), color, ir.PkgFuncName(ast), nodeweight)
+			}
+		}
+	}
+	// Print edges.
+	ir.VisitFuncsBottomUp(typecheck.Target.Decls, func(list []*ir.Func, recursive bool) {
+		for _, f := range list {
+			name := ir.PkgFuncName(f)
+			if n, ok := WeightedCG.IRNodes[name]; ok {
+				for _, e := range WeightedCG.OutEdges[n] {
+					edgepercent := WeightInPercentage(e.Weight, GlobalTotalEdgeWeight)
+					if edgepercent > edgeThreshold {
+						fmt.Printf("edge [color=red, style=solid];\n")
+					} else {
+						fmt.Printf("edge [color=black, style=solid];\n")
+					}
+
+					fmt.Printf("\"%v\" -> \"%v\" [label=\"%.2f\"];\n", ir.PkgFuncName(n.AST), ir.PkgFuncName(e.Dst.AST), edgepercent)
+				}
+			}
+		}
+	})
+	fmt.Printf("}\n")
+}
+
+// redirectEdges deletes the cur node out-edges and redirect them so now these edges are the parent node out-edges.
+func redirectEdges(g *IRGraph, parent *IRNode, cur *IRNode) {
+	for _, outEdge := range g.OutEdges[cur] {
+		outEdge.Src = parent
+		g.OutEdges[parent] = append(g.OutEdges[parent], outEdge)
+	}
+	delete(g.OutEdges, cur)
+}
+
+// RedirectEdges deletes and redirects out-edges from node cur based on inlining information via inlinedCallSites.
+func RedirectEdges(cur *IRNode, inlinedCallSites map[CallSiteInfo]struct{}) {
+	g := WeightedCG
+	for i, outEdge := range g.OutEdges[cur] {
+		if _, found := inlinedCallSites[CallSiteInfo{Line: outEdge.CallSite, Caller: cur.AST}]; !found {
+			for _, InEdge := range g.InEdges[cur] {
+				if _, ok := inlinedCallSites[CallSiteInfo{Line: InEdge.CallSite, Caller: InEdge.Src.AST}]; ok {
+					weight := calculateweight(g, InEdge.Src, cur)
+					redirectEdge(g, InEdge.Src, cur, outEdge, weight, i)
+				}
+			}
+		} else {
+			remove(g, cur, i, outEdge.Dst.AST.Nname)
+		}
+	}
+	removeall(g, cur)
+}
+
+// calculateweight calculates the weight of the new redirected edge.
+func calculateweight(g *IRGraph, parent *IRNode, cur *IRNode) int64 {
+	sum := int64(0)
+	pw := int64(0)
+	for _, InEdge := range g.InEdges[cur] {
+		sum = sum + InEdge.Weight
+		if InEdge.Src == parent {
+			pw = InEdge.Weight
+		}
+	}
+	weight := int64(0)
+	if sum != 0 {
+		weight = pw / sum
+	} else {
+		weight = pw
+	}
+	return weight
+}
+
+// redirectEdge deletes the cur-node's out-edges and redirect them so now these edges are the parent node out-edges.
+func redirectEdge(g *IRGraph, parent *IRNode, cur *IRNode, outEdge *IREdge, weight int64, idx int) {
+	outEdge.Src = parent
+	outEdge.Weight = weight * outEdge.Weight
+	g.OutEdges[parent] = append(g.OutEdges[parent], outEdge)
+	remove(g, cur, idx, outEdge.Dst.AST.Nname)
+}
+
+// remove deletes the cur-node's out-edges at index idx.
+func remove(g *IRGraph, cur *IRNode, idx int, name *ir.Name) {
+	if len(g.OutEdges[cur]) >= 2 {
+		g.OutEdges[cur][idx] = &IREdge{CallSite: -1}
+	} else {
+		delete(g.OutEdges, cur)
+	}
+}
+
+// removeall deletes all cur-node's out-edges that marked to be removed .
+func removeall(g *IRGraph, cur *IRNode) {
+	for i := len(g.OutEdges[cur]) - 1; i >= 0; i-- {
+		if g.OutEdges[cur][i].CallSite == -1 {
+			g.OutEdges[cur][i] = g.OutEdges[cur][len(g.OutEdges[cur])-1]
+			g.OutEdges[cur] = g.OutEdges[cur][:len(g.OutEdges[cur])-1]
+		}
+	}
+}
+
+// inlCallee is same as the implementation for inl.go with one change. The change is that we do not invoke CanInline on a closure.
+func inlCallee(fn ir.Node) *ir.Func {
+	fn = ir.StaticValue(fn)
+	switch fn.Op() {
+	case ir.OMETHEXPR:
+		fn := fn.(*ir.SelectorExpr)
+		n := ir.MethodExprName(fn)
+		// Check that receiver type matches fn.X.
+		// TODO(mdempsky): Handle implicit dereference
+		// of pointer receiver argument?
+		if n == nil || !types.Identical(n.Type().Recv().Type, fn.X.Type()) {
+			return nil
+		}
+		return n.Func
+	case ir.ONAME:
+		fn := fn.(*ir.Name)
+		if fn.Class == ir.PFUNC {
+			return fn.Func
+		}
+	case ir.OCLOSURE:
+		fn := fn.(*ir.ClosureExpr)
+		c := fn.Func
+		return c
+	}
+	return nil
+}
diff --git a/src/cmd/compile/internal/test/pgo_inl_test.go b/src/cmd/compile/internal/test/pgo_inl_test.go
new file mode 100644
index 0000000..eeeeae9
--- /dev/null
+++ b/src/cmd/compile/internal/test/pgo_inl_test.go
@@ -0,0 +1,148 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+	"bufio"
+	"fmt"
+	"internal/testenv"
+	"io"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"regexp"
+	"strings"
+	"testing"
+)
+
+// TestPGOIntendedInlining tests that specific functions are inlined.
+func TestPGOIntendedInlining(t *testing.T) {
+	testenv.MustHaveGoRun(t)
+	t.Parallel()
+
+	// Make a temporary directory to work in.
+	tmpdir, err := ioutil.TempDir("", "TestCode")
+	if err != nil {
+		t.Fatalf("Failed to create temporary directory: %v", err)
+	}
+	defer os.RemoveAll(tmpdir)
+
+	want := map[string][]string{
+		"cmd/compile/internal/test/testdata/pgo/inline": {
+			"(*BS).NS",
+		},
+	}
+
+	// The functions which are not expected to be inlined are as follows.
+	wantNot := map[string][]string{
+		"cmd/compile/internal/test/testdata/pgo/inline": {
+			// The calling edge main->A is hot and the cost of A is large than
+			// inlineHotCalleeMaxBudget.
+			"A",
+			// The calling edge BenchmarkA" -> benchmarkB is cold
+			// and the cost of A is large than inlineMaxBudget.
+			"benchmarkB",
+		},
+	}
+
+	must := map[string]bool{
+		"(*BS).NS": true,
+	}
+
+	notInlinedReason := make(map[string]string)
+	pkgs := make([]string, 0, len(want))
+	for pname, fnames := range want {
+		pkgs = append(pkgs, pname)
+		for _, fname := range fnames {
+			fullName := pname + "." + fname
+			if _, ok := notInlinedReason[fullName]; ok {
+				t.Errorf("duplicate func: %s", fullName)
+			}
+			notInlinedReason[fullName] = "unknown reason"
+		}
+	}
+
+	// If the compiler emit "cannot inline for function A", the entry A
+	// in expectedNotInlinedList will be removed.
+	expectedNotInlinedList := make(map[string]struct{})
+	for pname, fnames := range wantNot {
+		for _, fname := range fnames {
+			fullName := pname + "." + fname
+			expectedNotInlinedList[fullName] = struct{}{}
+		}
+	}
+
+	// go test -bench=. -cpuprofile testdata/pgo/inline/inline_hot.pprof cmd/compile/internal/test/testdata/pgo/inline
+	curdir, err1 := os.Getwd()
+	if err1 != nil {
+		t.Fatal(err1)
+	}
+	gcflag_option := "-gcflags=-m -m -pgoprofile %s/testdata/pgo/inline/inline_hot.pprof"
+	gcflag := fmt.Sprintf(gcflag_option, curdir)
+	args := append([]string{"test", "-run=nope", gcflag}, pkgs...)
+	cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), args...))
+
+	pr, pw := io.Pipe()
+	cmd.Stdout = pw
+	cmd.Stderr = pw
+	cmdErr := make(chan error, 1)
+	go func() {
+		cmdErr <- cmd.Run()
+		pw.Close()
+	}()
+	scanner := bufio.NewScanner(pr)
+	curPkg := ""
+	canInline := regexp.MustCompile(`: can inline ([^ ]*)`)
+	haveInlined := regexp.MustCompile(`: inlining call to ([^ ]*)`)
+	cannotInline := regexp.MustCompile(`: cannot inline ([^ ]*): (.*)`)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if strings.HasPrefix(line, "# ") {
+			curPkg = line[2:]
+			splits := strings.Split(curPkg, " ")
+			curPkg = splits[0]
+			continue
+		}
+		if m := haveInlined.FindStringSubmatch(line); m != nil {
+			fname := m[1]
+			delete(notInlinedReason, curPkg+"."+fname)
+			continue
+		}
+		if m := canInline.FindStringSubmatch(line); m != nil {
+			fname := m[1]
+			fullname := curPkg + "." + fname
+			// If function must be inlined somewhere, being inlinable is not enough
+			if _, ok := must[fullname]; !ok {
+				delete(notInlinedReason, fullname)
+				continue
+			}
+		}
+		if m := cannotInline.FindStringSubmatch(line); m != nil {
+			fname, reason := m[1], m[2]
+			fullName := curPkg + "." + fname
+			if _, ok := notInlinedReason[fullName]; ok {
+				// cmd/compile gave us a reason why
+				notInlinedReason[fullName] = reason
+			}
+			delete(expectedNotInlinedList, fullName)
+			continue
+		}
+	}
+	if err := <-cmdErr; err != nil {
+		t.Fatal(err)
+	}
+	if err := scanner.Err(); err != nil {
+		t.Fatal(err)
+	}
+	for fullName, reason := range notInlinedReason {
+		t.Errorf("%s was not inlined: %s", fullName, reason)
+	}
+
+	// If the list expectedNotInlinedList is not empty, it indicates
+	// the functions in the expectedNotInlinedList are marked with caninline.
+	for fullName, _ := range expectedNotInlinedList {
+		t.Errorf("%s was expected not inlined", fullName)
+	}
+}
diff --git a/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go b/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
new file mode 100644
index 0000000..c1d2a53
--- /dev/null
+++ b/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.go
@@ -0,0 +1,86 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// WARNING: Please avoid updating this file. If this file needs to be updated, then a new inline_hot.pprof file should be generated via "go test -bench=. -cpuprofile testdata/pgo/inline/inline_hot.pprof cmd/compile/internal/test/testdata/pgo/inline".
+package main
+
+import (
+	"time"
+)
+
+type BS struct {
+	length uint
+	s      []uint64
+}
+
+const wSize = uint(64)
+const lWSize = uint(6)
+
+func D(i uint) int {
+	return int((i + (wSize - 1)) >> lWSize)
+}
+
+func N(length uint) (bs *BS) {
+	bs = &BS{
+		length,
+		make([]uint64, D(length)),
+	}
+
+	return bs
+}
+
+func (b *BS) S(i uint) *BS {
+	b.s[i>>lWSize] |= 1 << (i & (wSize - 1))
+	return b
+}
+
+var jn = [...]byte{
+	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
+	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
+	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
+	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
+}
+
+func T(v uint64) uint {
+	return uint(jn[((v&-v)*0x03f79d71b4ca8b09)>>58])
+}
+
+func (b *BS) NS(i uint) (uint, bool) {
+	x := int(i >> lWSize)
+	if x >= len(b.s) {
+		return 0, false
+	}
+	w := b.s[x]
+	w = w >> (i & (wSize - 1))
+	if w != 0 {
+		return i + T(w), true
+	}
+	x = x + 1
+	for x < len(b.s) {
+		if b.s[x] != 0 {
+			return uint(x)*wSize + T(b.s[x]), true
+		}
+		x = x + 1
+
+	}
+	return 0, false
+}
+
+func A() {
+	s := N(100000)
+	for i := 0; i < 1000; i += 30 {
+		s.S(uint(i))
+	}
+	for j := 0; j < 1000; j++ {
+		c := uint(0)
+		for i, e := s.NS(0); e; i, e = s.NS(i + 1) {
+			c++
+		}
+	}
+}
+
+func main() {
+	time.Sleep(time.Second)
+	A()
+}
diff --git a/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof b/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof
new file mode 100644
index 0000000..45ccb61
--- /dev/null
+++ b/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot.pprof
Binary files differ
diff --git a/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go b/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
new file mode 100644
index 0000000..024d340
--- /dev/null
+++ b/src/cmd/compile/internal/test/testdata/pgo/inline/inline_hot_test.go
@@ -0,0 +1,47 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// WARNING: Please avoid updating this file. If this file needs to be updated, then a new inline_hot.pprof file should be generated via "go test -bench=. -cpuprofile testdata/pgo/inline/inline_hot.pprof cmd/compile/internal/test/testdata/pgo/inline".
+package main
+
+import "testing"
+
+func BenchmarkA(b *testing.B) {
+	benchmarkB(b)
+}
+func benchmarkB(b *testing.B) {
+
+	for i := 0; true; {
+		A()
+		i = i + 1
+		if i >= b.N {
+			break
+		}
+		A()
+		i = i + 1
+		if i >= b.N {
+			break
+		}
+		A()
+		i = i + 1
+		if i >= b.N {
+			break
+		}
+		A()
+		i = i + 1
+		if i >= b.N {
+			break
+		}
+		A()
+		i = i + 1
+		if i >= b.N {
+			break
+		}
+		A()
+		i = i + 1
+		if i >= b.N {
+			break
+		}
+	}
+}
diff --git a/src/cmd/dist/buildtool.go b/src/cmd/dist/buildtool.go
index 2a4490b..394a416 100644
--- a/src/cmd/dist/buildtool.go
+++ b/src/cmd/dist/buildtool.go
@@ -66,6 +66,7 @@
 	"internal/goroot",
 	"internal/goversion",
 	"internal/pkgbits",
+	"internal/profile",
 	"internal/race",
 	"internal/saferio",
 	"internal/platform",
diff --git a/src/internal/profile/legacy_profile.go b/src/internal/profile/legacy_profile.go
index 0ac350a..b102c95 100644
--- a/src/internal/profile/legacy_profile.go
+++ b/src/internal/profile/legacy_profile.go
@@ -722,7 +722,7 @@
 	var l string
 	var err error
 	// Parse text of the form "attribute = value" before the samples.
-	const delimiter = "="
+	const delimiter = '='
 	for {
 		l, err = r.ReadString('\n')
 		if err != nil {
@@ -746,10 +746,13 @@
 			break
 		}
 
-		key, val, ok := strings.Cut(l, delimiter)
-		if !ok {
+		index := strings.IndexByte(l, delimiter)
+		if index < 0 {
 			break
 		}
+		key := l[:index]
+		val := l[index+1:]
+
 		key, val = strings.TrimSpace(key), strings.TrimSpace(val)
 		var err error
 		switch key {
@@ -1023,7 +1026,7 @@
 
 	var attrs []string
 	var r *strings.Replacer
-	const delimiter = "="
+	const delimiter = '='
 	for {
 		l, err := b.ReadString('\n')
 		if err != nil {
@@ -1046,7 +1049,10 @@
 			if err == errUnrecognized {
 				// Recognize assignments of the form: attr=value, and replace
 				// $attr with value on subsequent mappings.
-				if attr, value, ok := strings.Cut(l, delimiter); ok {
+				idx := strings.IndexByte(l, delimiter)
+				if idx >= 0 {
+					attr := l[:idx]
+					value := l[idx+1:]
 					attrs = append(attrs, "$"+strings.TrimSpace(attr), strings.TrimSpace(value))
 					r = strings.NewReplacer(attrs...)
 				}

To view, visit change 429863. To unsubscribe, or for help writing mail filters, visit settings.

Reply all

Reply to author

Forward

0 new messages

Search

Clear search

Close search

Google apps

Main menu