[go] runtime: Profile goroutines holding contended mutexes.

Peter Weinberger (Gerrit)

unread,

Sep 22, 2016, 12:14:59 PM9/22/16

to Ian Lance Taylor, golang-co...@googlegroups.com

Peter Weinberger uploaded a change:
https://go-review.googlesource.com/29650

runtime: Profile goroutines holding contended mutexes.

runtime.SetMutexProfileFraction(n int) will capture 1/n-th of stack
traces of goroutines holding contended mutexes. From runtime/pprof
pprot.Lookup("mutex").WriteTo(w, 1) write the accumulated
stack traces to w (in essentially the same format that blocking
profiling uses).

Change-Id: Ie0b54fa4226853d99aa42c14cb529ae586a8335a
---
M src/cmd/go/test.go
M src/cmd/go/testflag.go
M src/cmd/internal/pprof/profile/legacy_profile.go
M src/runtime/mgc.go
M src/runtime/mprof.go
M src/runtime/pprof/pprof.go
M src/runtime/pprof/pprof_test.go
M src/runtime/proc.go
M src/runtime/runtime2.go
M src/runtime/sema.go
M src/runtime/trace.go
M src/sync/mutex.go
M src/sync/mutex_test.go
M src/testing/testing.go
14 files changed, 374 insertions(+), 126 deletions(-)

diff --git a/src/cmd/go/test.go b/src/cmd/go/test.go
index 138d46c..7a670fd 100644
--- a/src/cmd/go/test.go
+++ b/src/cmd/go/test.go
@@ -237,6 +237,15 @@
To profile all memory allocations, use -test.memprofilerate=1
and pass --alloc_space flag to the pprof tool.

+ -mutexprotfile mutex.out
+ Write a mutex contention profile to the specified file
+ when all tests are complete.
+ Writes test binary as -c would.
+
+ -mutexprofilefraction n
+ Sample 1 in n stack traces of goroutines holding a
+ contended mutex.
+
-outputdir directory
Place output files from profiling in the specified directory,
by default the directory in which "go test" is running.
diff --git a/src/cmd/go/testflag.go b/src/cmd/go/testflag.go
index a65ed1f..cf4d2b4 100644
--- a/src/cmd/go/testflag.go
+++ b/src/cmd/go/testflag.go
@@ -50,6 +50,8 @@
{name: "memprofilerate", passToTest: true},
{name: "blockprofile", passToTest: true},
{name: "blockprofilerate", passToTest: true},
+ {name: "mutexprofile", passToTest: true},
+ {name: "mutexprofilefraction", passToTest: true},
{name: "outputdir", passToTest: true},
{name: "parallel", passToTest: true},
{name: "run", passToTest: true},
@@ -152,7 +154,7 @@
case "blockprofile", "cpuprofile", "memprofile":
testProfile = true
testNeedBinary = true
- case "trace":
+ case "mutexprofile", "trace":
testProfile = true
case "coverpkg":
testCover = true
diff --git a/src/cmd/internal/pprof/profile/legacy_profile.go
b/src/cmd/internal/pprof/profile/legacy_profile.go
index 8ccfe45..7cb8645 100644
--- a/src/cmd/internal/pprof/profile/legacy_profile.go
+++ b/src/cmd/internal/pprof/profile/legacy_profile.go
@@ -12,6 +12,7 @@
"bytes"
"fmt"
"io"
+ "log"
"math"
"regexp"
"strconv"
@@ -26,6 +27,7 @@
heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+)
*] @([ x0-9a-f]*)`)

contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
+ contentionAddrRE =
regexp.MustCompile(`^#\W+(\w+)\W+([^+]+)[+](\w+)\W+([^:]+):(.+)`)

hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)

@@ -679,130 +681,151 @@
return int64(float64(count) * scale), int64(float64(size) * scale)
}

-// parseContention parses a contentionz profile and returns a newly
-// populated Profile.
+// parseContention parses a debug mutex or contention profile. There may
or may
+// not be a sampling period line, and there may or may not be detailed
stack traces
+// The order of the test corresponds to what synchonization_profiling.cc
writes.
+// It returns a new Profile.
func parseContention(b []byte) (p *Profile, err error) {
- r := bytes.NewBuffer(b)
- l, err := r.ReadString('\n')
- if err != nil {
- return nil, errUnrecognized
- }
-
- if !strings.HasPrefix(l, "--- contention") {
- return nil, errUnrecognized
- }
-
p = &Profile{
PeriodType: &ValueType{Type: "contentions", Unit: "count"},
- Period: 1,
SampleType: []*ValueType{
{Type: "contentions", Unit: "count"},
{Type: "delay", Unit: "nanoseconds"},
},
}
-
+ lines := strings.Split(string(b), "\n") // memory expensive
+ if len(lines) == 0 {
+ return nil, errUnrecognized
+ }
+ var nxtline int
+ l := lines[nxtline]
+ if len(lines) < 3 || (l != "--- mutex:" && l != "--- contention:") {
+ return nil, errUnrecognized
+ }
+ // attribute = values lines, without assumptions on the ordering
+ nxtline++
var cpuHz int64
- // Parse text of the form "attribute = value" before the samples.
- const delimiter = "="
- for {
- l, err = r.ReadString('\n')
+ for ; strings.Index(lines[nxtline], "=") != -1; nxtline++ {
+ flds := strings.Split(lines[nxtline], "=")
+ if len(flds) != 2 {
+ return nil, errUnrecognized
+ }
+ key, val := strings.TrimSpace(flds[0]), strings.TrimSpace(flds[1])
+ value, err := strconv.ParseInt(val, 0, 64)
if err != nil {
- if err != io.EOF {
- return nil, err
- }
-
- if l == "" {
- break
- }
+ return nil, errUnrecognized
}
-
- if l = strings.TrimSpace(l); l == "" {
- continue
- }
-
- if strings.HasPrefix(l, "---") {
- break
- }
-
- attr := strings.SplitN(l, delimiter, 2)
- if len(attr) != 2 {
- break
- }
- key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
- var err error
switch key {
case "cycles/second":
- if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
- return nil, errUnrecognized
- }
+ cpuHz = value
case "sampling period":
- if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
- return nil, errUnrecognized
- }
+ p.Period = value
case "ms since reset":
- ms, err := strconv.ParseInt(val, 0, 64)
- if err != nil {
- return nil, errUnrecognized
- }
- p.DurationNanos = ms * 1000 * 1000
- case "format":
- // CPP contentionz profiles don't have format.
- return nil, errUnrecognized
- case "resolution":
- // CPP contentionz profiles don't have resolution.
- return nil, errUnrecognized
+ p.DurationNanos = value * 1000 * 1000
case "discarded samples":
- default:
- return nil, errUnrecognized
}
}
-
- locs := make(map[uint64]*Location)
- for {
- if l = strings.TrimSpace(l); strings.HasPrefix(l, "---") {
- break
- }
- value, addrs, err := parseContentionSample(l, p.Period, cpuHz)
- if err != nil {
- return nil, err
- }
- var sloc []*Location
- for _, addr := range addrs {
- // Addresses from stack traces point to the next instruction after
- // each call. Adjust by -1 to land somewhere on the actual call.
- addr--
- loc := locs[addr]
- if locs[addr] == nil {
- loc = &Location{
- Address: addr,
- }
- p.Location = append(p.Location, loc)
- locs[addr] = loc
+ if p.Period == 0 {
+ p.Period = 1
+ }
+ infos := map[uint64]addrInfo{}
+ stacks := []stackRec{}
+ // now there are blank lines, lines starting # and count lines
+ for i := nxtline; i < len(lines); i++ {
+ l := lines[i]
+ if l == "" || l == "#\t0x0" { // the latter case from go1.7
+ continue
+ } else if l[0] == '#' {
+ m := contentionAddrRE.FindStringSubmatch(l)
+ if len(m) != 6 {
+ log.Fatalf("found %d, %v, %q", len(m), m, l)
}
- sloc = append(sloc, loc)
- }
- p.Sample = append(p.Sample, &Sample{
- Value: value,
- Location: sloc,
- })
-
- if l, err = r.ReadString('\n'); err != nil {
- if err != io.EOF {
+ adr, err := strconv.ParseUint(m[1], 0, 64)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if _, ok := infos[adr]; ok {
+ continue // already seen it
+ }
+ ln, err := strconv.ParseInt(m[5], 0, 64)
+ if err != nil {
+ log.Fatal(err)
+ }
+ infos[adr] = addrInfo{m[1], m[2], m[3], m[4], ln}
+ } else {
+ value, addrs, err := parseContentionSample(l, p.Period, cpuHz)
+ if err != nil {
+ // log something here?
return nil, err
}
- if l == "" {
- break
+ stacks = append(stacks, stackRec{value[0], value[1], addrs})
+ }
+ }
+ // we might not have had any # lines, just stacks, so fake the infos
+ if len(infos) == 0 {
+ for _, s := range stacks {
+ for _, adr := range s.stack {
+ adr-- // Address points to 1 after call
+ // (for # lines, the behavior changes at 1.7, so we did nothing)
+ infos[adr] = addrInfo{addr: fmt.Sprintf("0x%x", adr)}
}
}
}
+ // we now have everything. Fill in the Profile.
+ // We won't have any Mapping or Label.
+ // Function: self-contained. Line: Function
+ // Location: Line (maybe). Sample: Location

- if err = parseAdditionalSections(l, r, p); err != nil {
- return nil, err
+ // Does it matter if several Functions correspond to one real function?
+ // Do Function.IDs have to have any properties? Location.IDs?
+ // (other than uniqueness)
+ locmap := map[uint64]*Location{}
+ for k, v := range infos {
+ f := Function{ID: k, Name: v.name, Filename: v.file, StartLine: v.line}
+ line := Line{Function: &f, Line: v.line}
+ loc := Location{ID: k, Address: k, Line: []Line{line}}
+ locmap[k] = &loc
+ p.Location = append(p.Location, &loc)
+ p.Function = append(p.Function, &f)
}
-
+ for _, st := range stacks {
+ s := Sample{Value: []int64{st.value, st.count}}
+ for _, i := range st.stack {
+ // sometimes the addresses match exactly, sometimes off by one
+ // This should be figured out!
+ if v, ok := locmap[i]; ok { // before go1.7
+ s.Location = append(s.Location, v)
+ } else if v, ok := locmap[i-1]; ok { // go 1.7
+ s.Location = append(s.Location, v)
+ }
+ }
+ p.Sample = append(p.Sample, &s)
+ }
return p, nil
}

+type stackRec struct {
+ value, count int64
+ stack []uint64
+}
+
+func (s stackRec) String() string {
+ ans := []string{fmt.Sprintf("%d,%d", s.value, s.count), "["}
+ for _, x := range s.stack {
+ ans = append(ans, fmt.Sprintf("0x%x", x))
+ }
+ ans = append(ans, "]")
+ return strings.Join(ans, " ")
+}
+
+type addrInfo struct {
+ addr string // 0x4b8254
+ name string // sync_test.BenchmarkMutexSpin.func1
+ offset string // 0xdb
+ file string
+ line int64 // 191
+}
+
// parseContentionSample parses a single row from a contention profile
// into a new Sample.
func parseContentionSample(line string, period, cpuHz int64) (value
[]int64, addrs []uint64, err error) {
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index ce7ac63..8a1fbef 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -927,7 +927,7 @@
// another thread.
useStartSema := mode == gcBackgroundMode
if useStartSema {
- semacquire(&work.startSema, false)
+ semacquire(&work.startSema, 0)
// Re-check transition condition under transition lock.
if !gcShouldStart(forceTrigger) {
semrelease(&work.startSema)
@@ -948,7 +948,7 @@
}

// Ok, we're doing it! Stop everybody else
- semacquire(&worldsema, false)
+ semacquire(&worldsema, 0)

if trace.enabled {
traceGCStart()
@@ -1062,7 +1062,7 @@
// by mark termination.
func gcMarkDone() {
top:
- semacquire(&work.markDoneSema, false)
+ semacquire(&work.markDoneSema, 0)

// Re-check transition condition under transition lock.
if !(gcphase == _GCmark && work.nwait == work.nproc
&& !gcMarkWorkAvailable(nil)) {
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
index 37b5e1b..d09b154 100644
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -22,6 +22,7 @@
// profile types
memProfile bucketType = 1 + iota
blockProfile
+ mutexProfile

// size of bucket hash table
buckHashSize = 179999
@@ -43,7 +44,7 @@
type bucket struct {
next *bucket
allnext *bucket
- typ bucketType // memBucket or blockBucket
+ typ bucketType // memBucket or blockBucket (includes mutexProfile)
hash uintptr
size uintptr
nstk uintptr
@@ -83,7 +84,7 @@
}

// A blockRecord is the bucket data for a bucket of type blockProfile,
-// part of the blocking profile.
+// part of the blocking profile. (also used for mutex profiles)
type blockRecord struct {
count int64
cycles int64
@@ -92,6 +93,7 @@
var (
mbuckets *bucket // memory profile buckets
bbuckets *bucket // blocking profile buckets
+ xbuckets *bucket // mutex profile buckets
buckhash *[179999]*bucket
bucketmem uintptr
)
@@ -104,7 +106,7 @@
throw("invalid profile bucket type")
case memProfile:
size += unsafe.Sizeof(memRecord{})
- case blockProfile:
+ case blockProfile, mutexProfile:
size += unsafe.Sizeof(blockRecord{})
}

@@ -132,7 +134,7 @@

// bp returns the blockRecord associated with the blockProfile bucket b.
func (b *bucket) bp() *blockRecord {
- if b.typ != blockProfile {
+ if b.typ != blockProfile && b.typ != mutexProfile {
throw("bad use of bucket.bp")
}
data := add(unsafe.Pointer(b),
unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(uintptr(0)))
@@ -184,6 +186,9 @@
if typ == memProfile {
b.allnext = mbuckets
mbuckets = b
+ } else if typ == mutexProfile {
+ b.allnext = xbuckets
+ xbuckets = b
} else {
b.allnext = bbuckets
bbuckets = b
@@ -288,10 +293,20 @@
if cycles <= 0 {
cycles = 1
}
+ if blocksampled(cycles) {
+ saveevent(cycles, skip+1, blockProfile, &blockprofilerate)
+ }
+}
+
+func blocksampled(cycles int64) bool {
rate := int64(atomic.Load64(&blockprofilerate))
if rate <= 0 || (rate > cycles && int64(fastrand())%rate > cycles) {
- return
+ return false
}
+ return true
+}
+
+func saveevent(cycles int64, skip int, which bucketType, ratep *uint64) {
gp := getg()
var nstk int
var stk [maxStack]uintptr
@@ -301,10 +316,38 @@
nstk = gcallers(gp.m.curg, skip, stk[:])
}
lock(&proflock)
- b := stkbucket(blockProfile, 0, stk[:nstk], true)
+ b := stkbucket(which, 0, stk[:nstk], true)
b.bp().count++
b.bp().cycles += cycles
unlock(&proflock)
+}
+
+var mutexprofilerate uint64 // fraction sampled
+
+// SetMutexProfileFraction controls the fraction of mutex contention events
+// that are reported in the mutex profile. On average 1/rate events are
+// reported. The previous rate is returned.
+//
+// To turn off profiling entirely, pass rate 0.
+// To just read the current rate, pass rate -1.
+func SetMutexProfileFraction(rate int) int {
+ if rate < 0 {
+ return int(mutexprofilerate)
+ }
+ old := mutexprofilerate
+ atomic.Store64(&mutexprofilerate, uint64(rate))
+ return int(old)
+}
+
+//go:linkname mutexevent sync.event
+func mutexevent(cycles int64, skip int) {
+ rate := int64(atomic.Load64(&mutexprofilerate))
+ if rate > 0 && int64(fastrand())%rate == 0 {
+ if cycles <= 0 {
+ cycles = 1
+ }
+ saveevent(cycles, skip+1, mutexProfile, &mutexprofilerate)
+ }
}

// Go interface to profile data.
@@ -491,6 +534,35 @@
return
}

+// MutexProfile returns n, the number of records in the current mutex
profile.
+// If len(p) >= n, MutexPrifle copies the profile into p and return s,
true.
+// Otherwise, MutexProfile does not change p, and returns n, false.
+//
+// Most clients should use the runtime/pprof package
+// instead of calling MutexProfile directly.
+func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
+ lock(&proflock)
+ for b := xbuckets; b != nil; b = b.allnext {
+ n++
+ }
+ if n <= len(p) {
+ ok = true
+ for b := xbuckets; b != nil; b = b.allnext {
+ bp := b.bp()
+ r := &p[0]
+ r.Count = int64(bp.count)
+ r.Cycles = bp.cycles
+ i := copy(r.Stack0[:], b.stk())
+ for ; i < len(r.Stack0); i++ {
+ r.Stack0[i] = 0
+ }
+ p = p[1:]
+ }
+ }
+ unlock(&proflock)
+ return
+}
+
// ThreadCreateProfile returns n, the number of records in the thread
creation profile.
// If len(p) >= n, ThreadCreateProfile copies the profile into p and
returns n, true.
// If len(p) < n, ThreadCreateProfile does not change p and returns n,
false.
diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go
index 1fc9568..3c71fdc 100644
--- a/src/runtime/pprof/pprof.go
+++ b/src/runtime/pprof/pprof.go
@@ -152,6 +152,12 @@
write: writeBlock,
}

+var mutexProfile = &Profile{
+ name: "mutex",
+ count: countMutex,
+ write: writeMutex,
+}
+
func lockProfiles() {
profiles.mu.Lock()
if profiles.m == nil {
@@ -161,6 +167,7 @@
"threadcreate": threadcreateProfile,
"heap": heapProfile,
"block": blockProfile,
+ "mutex": mutexProfile,
}
}
}
@@ -747,6 +754,12 @@
return n
}

+// countMutex returns the number of records in the mutex profile.
+func countMutex() int {
+ n, _ := runtime.MutexProfile(nil)
+ return n
+}
+
// writeBlock writes the current blocking profile to w.
func writeBlock(w io.Writer, debug int) error {
var p []runtime.BlockProfileRecord
@@ -790,4 +803,49 @@
return b.Flush()
}

+// writeMutex writes the current mutex profile to w.
+func writeMutex(w io.Writer, debug int) error {
+ // PJW: too much common code with writeBlock. FIX!
+ var p []runtime.BlockProfileRecord
+ n, ok := runtime.MutexProfile(nil)
+ for {
+ p = make([]runtime.BlockProfileRecord, n+50)
+ n, ok = runtime.MutexProfile(p)
+ if ok {
+ p = p[:n]
+ break
+ }
+ }
+
+ sort.Sort(byCycles(p))
+
+ b := bufio.NewWriter(w)
+ var tw *tabwriter.Writer
+ w = b
+ if debug > 0 {
+ tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
+ w = tw
+ }
+
+ fmt.Fprintf(w, "--- mutex:\n")
+ fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond())
+ fmt.Fprintf(w, "sampling period = %d\n",
runtime.SetMutexProfileFraction(-1))
+ for i := range p {
+ r := &p[i]
+ fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count)
+ for _, pc := range r.Stack() {
+ fmt.Fprintf(w, " %#x", pc)
+ }
+ fmt.Fprint(w, "\n")
+ if debug > 0 {
+ printStackRecord(w, r.Stack(), true)
+ }
+ }
+
+ if tw != nil {
+ tw.Flush()
+ }
+ return b.Flush()
+}
+
func runtime_cyclesPerSecond() int64
diff --git a/src/runtime/pprof/pprof_test.go
b/src/runtime/pprof/pprof_test.go
index a093015..4f2a490 100644
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -592,6 +592,40 @@
mu.Unlock()
}

+func TestMutexProfile(t *testing.T) {
+ old := runtime.SetMutexProfileFraction(1)
+ defer runtime.SetMutexProfileFraction(old)
+ if old != 0 {
+ t.Fatalf("need MutexProfileRate 0, got %d", old)
+ }
+
+ blockMutex()
+
+ var w bytes.Buffer
+ Lookup("mutex").WriteTo(&w, 1)
+ prof := w.String()
+
+ if !strings.HasPrefix(prof, "--- mutex:\ncycles/second=") {
+ t.Errorf("Bad profile header:\n%v", prof)
+ }
+ prof = strings.Trim(prof, "\n")
+ lines := strings.Split(prof, "\n")
+ if len(lines) != 6 {
+ t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof)
+ }
+ if len(lines) < 6 {
+ return
+ }
+ r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
+ if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
+ t.Errorf("%q didn't match %q", lines[3], r2)
+ }
+ r3 := "^#.*runtime/pprof_test.blockMutex.*$"
+ if ok, err := regexp.MatchString(r3, lines[5]); err != nil || !ok {
+ t.Errorf("%q didn't match %q", lines[5], r3)
+ }
+}
+
func func1(c chan int) { <-c }
func func2(c chan int) { <-c }
func func3(c chan int) { <-c }
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 4dd2bd2..92f5a3a 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -903,7 +903,7 @@
// in panic or being exited, this may not reliably stop all
// goroutines.
func stopTheWorld(reason string) {
- semacquire(&worldsema, false)
+ semacquire(&worldsema, 0)
getg().m.preemptoff = reason
systemstack(stopTheWorldWithSema)
}
@@ -926,7 +926,7 @@
// preemption first and then should stopTheWorldWithSema on the system
// stack:
//
-// semacquire(&worldsema, false)
+// semacquire(&worldsema, 0)
// m.preemptoff = "reason"
// systemstack(stopTheWorldWithSema)
//
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 2e88674..683156d 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -256,6 +256,7 @@
// The following fields are never accessed concurrently.
// waitlink is only accessed by g.

+ acquiretime int64
releasetime int64
ticket uint32
waitlink *sudog // g.waiting list
diff --git a/src/runtime/sema.go b/src/runtime/sema.go
index 45fbbca..cbb3e35 100644
--- a/src/runtime/sema.go
+++ b/src/runtime/sema.go
@@ -44,17 +44,22 @@

//go:linkname sync_runtime_Semacquire sync.runtime_Semacquire
func sync_runtime_Semacquire(addr *uint32) {
- semacquire(addr, true)
+ semacquire(addr, semaBlockProfile)
}

//go:linkname net_runtime_Semacquire net.runtime_Semacquire
func net_runtime_Semacquire(addr *uint32) {
- semacquire(addr, true)
+ semacquire(addr, semaBlockProfile)
}

//go:linkname sync_runtime_Semrelease sync.runtime_Semrelease
func sync_runtime_Semrelease(addr *uint32) {
semrelease(addr)
+}
+
+//go:linkname sync_runtime_SemacquireMutex sync.runtime_SemacquireMutex
+func sync_runtime_SemacquireMutex(addr *uint32) {
+ semacquire(addr, semaBlockProfile|semaMutexProfile)
}

//go:linkname net_runtime_Semrelease net.runtime_Semrelease
@@ -69,8 +74,13 @@
goready(s.g, traceskip)
}

+const (
+ semaBlockProfile = 1 << iota
+ semaMutexProfile
+)
+
// Called from runtime.
-func semacquire(addr *uint32, profile bool) {
+func semacquire(addr *uint32, profile int) {
gp := getg()
if gp != gp.m.curg {
throw("semacquire not on the G stack")
@@ -91,9 +101,16 @@
root := semroot(addr)
t0 := int64(0)
s.releasetime = 0
- if profile && blockprofilerate > 0 {
+ s.acquiretime = 0
+ if profile&semaBlockProfile != 0 && blockprofilerate > 0 {
t0 = cputicks()
s.releasetime = -1
+ }
+ if profile&semaMutexProfile != 0 && mutexprofilerate > 0 {
+ if t0 == 0 {
+ t0 = cputicks()
+ }
+ s.acquiretime = t0
}
for {
lock(&root.lock)
@@ -148,6 +165,15 @@
}
unlock(&root.lock)
if s != nil {
+ if s.acquiretime != 0 {
+ t0 := cputicks()
+ for x := root.head; x != nil; x = x.next {
+ if x.elem == unsafe.Pointer(addr) {
+ x.acquiretime = t0
+ }
+ }
+ mutexevent(t0-s.acquiretime, 3)
+ }
readyWithTime(s, 5)
}
}
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 0b3741f..684b37d 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -288,7 +288,7 @@

// The world is started but we've set trace.shutdown, so new tracing
can't start.
// Wait for the trace reader to flush pending buffers and stop.
- semacquire(&trace.shutdownSema, false)
+ semacquire(&trace.shutdownSema, 0)
if raceenabled {
raceacquire(unsafe.Pointer(&trace.shutdownSema))
}
diff --git a/src/sync/mutex.go b/src/sync/mutex.go
index 9089279..2693578 100644
--- a/src/sync/mutex.go
+++ b/src/sync/mutex.go
@@ -82,7 +82,7 @@
if old&mutexLocked == 0 {
break
}
- runtime_Semacquire(&m.sema)
+ runtime_SemacquireMutex(&m.sema)
awoke = true
iter = 0
}
@@ -127,3 +127,5 @@
old = m.state
}
}
+
+func runtime_SemacquireMutex(*uint32)
diff --git a/src/sync/mutex_test.go b/src/sync/mutex_test.go
index 91a4855..e8bbd38 100644
--- a/src/sync/mutex_test.go
+++ b/src/sync/mutex_test.go
@@ -61,6 +61,10 @@
}

func TestMutex(t *testing.T) {
+ if n := runtime.SetMutexProfileFraction(1); n != 0 {
+ t.Logf("got mutexrate %d expected 0", n)
+ }
+ defer runtime.SetMutexProfileFraction(0)
m := new(Mutex)
c := make(chan bool)
for i := 0; i < 10; i++ {
diff --git a/src/testing/testing.go b/src/testing/testing.go
index 4a4cbcc..08bd877 100644
--- a/src/testing/testing.go
+++ b/src/testing/testing.go
@@ -233,19 +233,21 @@
outputDir = flag.String("test.outputdir", "", "directory in which to
write profiles")

// Report as tests are run; default is silent for success.
- chatty = flag.Bool("test.v", false, "verbose: print additional
output")
- count = flag.Uint("test.count", 1, "run tests and benchmarks
`n` times")
- coverProfile = flag.String("test.coverprofile", "", "write a coverage
profile to the named file after execution")
- match = flag.String("test.run", "", "regular expression to
select tests and examples to run")
- memProfile = flag.String("test.memprofile", "", "write a memory
profile to the named file after execution")
- memProfileRate = flag.Int("test.memprofilerate", 0, "if >=0, sets
runtime.MemProfileRate")
- cpuProfile = flag.String("test.cpuprofile", "", "write a cpu
profile to the named file during execution")
- blockProfile = flag.String("test.blockprofile", "", "write a
goroutine blocking profile to the named file after execution")
- blockProfileRate = flag.Int("test.blockprofilerate", 1, "if >= 0, calls
runtime.SetBlockProfileRate()")
- traceFile = flag.String("test.trace", "", "write an execution
trace to the named file after execution")
- timeout = flag.Duration("test.timeout", 0, "if positive, sets an
aggregate time limit for all tests")
- cpuListStr = flag.String("test.cpu", "", "comma-separated list of
number of CPUs to use for each test")
- parallel = flag.Int("test.parallel",
runtime.GOMAXPROCS(0), "maximum test parallelism")
+ chatty = flag.Bool("test.v", false, "verbose: print
additional output")
+ count = flag.Uint("test.count", 1, "run tests and
benchmarks `n` times")
+ coverProfile = flag.String("test.coverprofile", "", "write a
coverage profile to the named file after execution")
+ match = flag.String("test.run", "", "regular expression to
select tests and examples to run")
+ memProfile = flag.String("test.memprofile", "", "write a memory
profile to the named file after execution")
+ memProfileRate = flag.Int("test.memprofilerate", 0, "if >=0, sets
runtime.MemProfileRate")
+ cpuProfile = flag.String("test.cpuprofile", "", "write a cpu
profile to the named file during execution")
+ blockProfile = flag.String("test.blockprofile", "", "write a
goroutine blocking profile to the named file after execution")
+ blockProfileRate = flag.Int("test.blockprofilerate", 1, "if >= 0,
calls runtime.SetBlockProfileRate()")
+ mutexProfile = flag.String("test.mutexprofile", "", "write a
mutex contention profile to the named file after execution")
+ mutexProfileFraction = flag.Int("test.mutexprofilefraction", 1, "if >= 0,
calls runtime.SetMutexProfileFraction()")
+ traceFile = flag.String("test.trace", "", "write an execution
trace to the named file after execution")
+ timeout = flag.Duration("test.timeout", 0, "if positive,
sets an aggregate time limit for all tests")
+ cpuListStr = flag.String("test.cpu", "", "comma-separated list
of number of CPUs to use for each test")
+ parallel = flag.Int("test.parallel",
runtime.GOMAXPROCS(0), "maximum test parallelism")

haveExamples bool // are there examples?

@@ -840,6 +842,9 @@
if *blockProfile != "" && *blockProfileRate >= 0 {
runtime.SetBlockProfileRate(*blockProfileRate)
}
+ if *mutexProfile != "" && *mutexProfileFraction > 0 {
+ runtime.SetMutexProfileFraction(*mutexProfileFraction)
+ }
if *coverProfile != "" && cover.Mode == "" {
fmt.Fprintf(os.Stderr, "testing: cannot use -test.coverprofile because
test binary was not built with coverage enabled\n")
os.Exit(2)
@@ -879,6 +884,18 @@
}
f.Close()
}
+ if *mutexProfile != "" && *mutexProfileFraction >= 0 {
+ f, err := os.Create(toOutputDir(*mutexProfile))
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "testing: %s\n", err)
+ os.Exit(2)
+ }
+ if err = pprof.Lookup("mutex").WriteTo(f, 1); err != nil {
+ fmt.Fprintf(os.Stderr, "testing: can't write %s: %s\n", *blockProfile,
err)
+ os.Exit(2)
+ }
+ f.Close()
+ }
if cover.Mode != "" {
coverReport()
}

--
https://go-review.googlesource.com/29650

Ralph Corderoy (Gerrit)

unread,

Sep 22, 2016, 8:11:31 PM9/22/16

to Peter Weinberger, golang-co...@googlegroups.com

Ralph Corderoy has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 1:

(12 comments)

https://go-review.googlesource.com/#/c/29650/1/src/cmd/internal/pprof/profile/legacy_profile.go
File src/cmd/internal/pprof/profile/legacy_profile.go:

PS1, Line 30: [+]
This could be \+
Probably then more obvious to the reader a literal plus is intended.

PS1, Line 685: traces
Missing full stop.

PS1, Line 737: continue
: } else if l[0] == '#' {
else not required after previous block ends with continue.

PS1, Line 770: 0x%x
0x%x can be %#x. (Saves a byte and those iterations over non-format
characters! :-)

PS1, Line 815: 0x%x
%#x

https://go-review.googlesource.com/#/c/29650/1/src/runtime/mprof.go
File src/runtime/mprof.go:

PS1, Line 86: // A blockRecord is the bucket data for a bucket of type
blockProfile,
: // part of the blocking profile. (also used for mutex
profiles)
Existing part of comment is properly punctuated sentence, so the new bit
should be too? Capital A, etc.

PS1, Line 303: if rate <= 0 || (rate > cycles && int64(fastrand())%rate >
cycles) {
: return false
: }
: return true
Seems it would still be clear if

return rate > 0 && (rate <= cycles || int64(fastrand())%rate <= cycles

PS1, Line 538: return s, true
returns n, true.

https://go-review.googlesource.com/#/c/29650/1/src/runtime/pprof/pprof.go
File src/runtime/pprof/pprof.go:

PS1, Line 809: var p []runtime.BlockProfileRecord
: n, ok := runtime.MutexProfile(nil)
: for {
: p = make([]runtime.BlockProfileRecord, n+50)
: n, ok = runtime.MutexProfile(p)
: if ok {
: p = p[:n]
: break
: }
: }
The logic looks like it could be simpler. Something like

var (
ok bool
p []runtime.BlockProfileRecord
n int
)
for !ok {

p = make([]runtime.BlockProfileRecord, n+50)

n, ok := runtime.MutexProfile(p)
}
p = p[:n]

https://go-review.googlesource.com/#/c/29650/1/src/runtime/pprof/pprof_test.go
File src/runtime/pprof/pprof_test.go:

PS1, Line 614: t.Errorf("expected 6 lines, got %d %q\n%s", len(lines),
prof, prof)
Is it intended to print prof twice here?

PS1, Line 619: r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
0-9 can be \d: "^\d+ 1 @ 0x[\da-f x]+$"
Is the " x" in the character class to try and match multiple words of
0x...? That might be more clear if it was spelt out as "( 0x[\da-f]+)+".

PS1, Line 623: .*$
No need to have .*$ as it will always succeed?

--
https://go-review.googlesource.com/29650
Gerrit-HasComments: Yes

Peter Weinberger (Gerrit)

unread,

Sep 23, 2016, 9:27:57 AM9/23/16

to golang-co...@googlegroups.com

Peter Weinberger uploaded a new patch set:

https://go-review.googlesource.com/29650

runtime: Profile goroutines holding contended mutexes.

runtime.SetMutexProfileFraction(n int) will capture 1/n-th of stack
traces of goroutines holding contended mutexes. From runtime/pprof
pprot.Lookup("mutex").WriteTo(w, 1) write the accumulated
stack traces to w (in essentially the same format that blocking
profiling uses).

Change-Id: Ie0b54fa4226853d99aa42c14cb529ae586a8335a
---
M src/cmd/go/test.go
M src/cmd/go/testflag.go
M src/cmd/internal/pprof/profile/legacy_profile.go
M src/runtime/mgc.go
M src/runtime/mprof.go
M src/runtime/pprof/pprof.go
M src/runtime/pprof/pprof_test.go
M src/runtime/proc.go
M src/runtime/runtime2.go
M src/runtime/sema.go
M src/runtime/trace.go
M src/sync/mutex.go
M src/sync/mutex_test.go
M src/testing/testing.go

14 files changed, 376 insertions(+), 125 deletions(-)

--
https://go-review.googlesource.com/29650

Peter Weinberger (Gerrit)

unread,

Sep 23, 2016, 9:28:28 AM9/23/16

to golang-co...@googlegroups.com

Peter Weinberger has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 1:

(11 comments)

https://go-review.googlesource.com/#/c/29650/1/src/cmd/internal/pprof/profile/legacy_profile.go
File src/cmd/internal/pprof/profile/legacy_profile.go:

PS1, Line 30: [+]
> This could be \+

Done

PS1, Line 737: continue
: } else if l[0] == '#' {
> else not required after previous block ends with continue.

Done. (but it saved one line and made the 3 cases parallel.)

PS1, Line 770: 0x%x
> 0x%x can be %#x. (Saves a byte and those iterations over non-format
> charac

In truth, %#x is better, so a good habit to get into. In this case it
doesn't matter, but for negative numbers it does.

PS1, Line 815: 0x%x
> %#x

Done

https://go-review.googlesource.com/#/c/29650/1/src/runtime/mprof.go
File src/runtime/mprof.go:

PS1, Line 86: // A blockRecord is the bucket data for a bucket of type
blockProfile,
: // part of the blocking profile. (also used for mutex
profiles)

> Existing part of comment is properly punctuated sentence, so the new bit
> sh

Rewritten

PS1, Line 303: if rate <= 0 || (rate > cycles && int64(fastrand())%rate >
cycles) {

: return false
: }
: return true
> Seems it would still be clear if

Possibly so, but I'd rather not touch the existing expression, as I'm not
sure the tests would find a typo.

PS1, Line 538: return s, true
> returns n, true.

Done

https://go-review.googlesource.com/#/c/29650/1/src/runtime/pprof/pprof.go
File src/runtime/pprof/pprof.go:

PS1, Line 809: var p []runtime.BlockProfileRecord
: n, ok := runtime.MutexProfile(nil)
: for {
: p = make([]runtime.BlockProfileRecord, n+50)
: n, ok = runtime.MutexProfile(p)
: if ok {
: p = p[:n]
: break
: }
: }
> The logic looks like it could be simpler. Something like

Yes, and I'll change it if you insist, but this code is exactly the same as
the existing code in writeBlock, above it.

https://go-review.googlesource.com/#/c/29650/1/src/runtime/pprof/pprof_test.go
File src/runtime/pprof/pprof_test.go:

PS1, Line 614: t.Errorf("expected 6 lines, got %d %q\n%s", len(lines),
prof, prof)

> Is it intended to print prof twice here?

Yes. Once quoted to pick up unexpected white space, and once as it would
appear to human beings.

PS1, Line 619: r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
> 0-9 can be \d: "^\d+ 1 @ 0x[\da-f x]+$"

Yes, for instance ' @ 0x48288d 0x47cd28 0x458931'
I put in an explanatory comment. There's no point in capturing the result,
so the proper regexp is something like
`^\d+ 1 @(?: 0x[[:xdigit:]]+)+`
which seems a little obscure.

PS1, Line 623: .*$
> No need to have .*$ as it will always succeed?

True. It's a personal idiosyncrasy to like to match the ^ at the beginning
with a $ at the end. In that way r3 is parallel to r2. (But I don't feel
strongly about it.)

--
https://go-review.googlesource.com/29650
Gerrit-Reviewer: Peter Weinberger <p...@google.com>
Gerrit-HasComments: Yes

Austin Clements (Gerrit)

unread,

Sep 23, 2016, 4:13:13 PM9/23/16

to Peter Weinberger, Austin Clements, golang-co...@googlegroups.com

Austin Clements has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 2:

(21 comments)

https://go-review.googlesource.com/#/c/29650/2/src/cmd/go/test.go
File src/cmd/go/test.go:

Line 240: -mutexprotfile mutex.out
To match the other documentation, use a tab to indent this line and a tab
and four spaces for the further indented lines.

https://go-review.googlesource.com/#/c/29650/2/src/cmd/internal/pprof/profile/legacy_profile.go
File src/cmd/internal/pprof/profile/legacy_profile.go:

Line 686: // The order of the test corresponds to what
synchonization_profiling.cc writes.
What is "the test"?

Line 696: lines := strings.Split(string(b), "\n") // memory expensive
I'm not sure about the mutex profile, but I've seen other profiles grow to
many 100s of megabytes. It's rather unfortunate that we're more than
doubling the memory footprint of processing the profile here. Is there a
reason this can't work directly from the bytes rather than converting the
whole thing into a string?

Line 708: for ; strings.Index(lines[nxtline], "=") != -1; nxtline++ {
This will panic if all of the lines contain ='s. The strings.Split also
does more work than necessary because you've already had to Index for
the "=", it has to build up a slice, and because the check for the number
of fields is redundant with the check for a parse error if you only allow
two fields.

Consider

for ; nxtline < len(lines); nxtline++ {
line := lines[nxtline]
sep := strings.Index(line, "=")
if sep < 0 {
break
}
key, val := strings.TrimSpace(line[:sep]), strings.TrimSpace(line[sep+1:])
...
}

Line 725: case "discarded samples":
The previous parser allowed "discarded samples" to have a non-int value.

Line 726: }
The previous parser rejected unknown fields. Is it okay that this parser
accepts them?

Line 733: // now there are blank lines, lines starting # and count lines
Typical style is to capitalize comments.

Line 734: for i := nxtline; i < len(lines); i++ {
for _, l := range lines[nxtline:] {
...
}

Line 742: log.Fatalf("found %d, %v, %q", len(m), m, l)
These are the only uses of log.Fatal in here. Rather than aborting the
program on a malformed profile, these should just return the error. For
this one, I would just use fmt.Errorf (which it looks like we use in a few
other places already). Also, consider making this error more readable.
Since the length of m is either going to be 6 or m will be nil, it could
just be fmt.Errorf("malformed sample: %v", l)

PS2, Line 744: adr
"addr" for consistency

PS2, Line 768: adr
addr

Line 780: // Does it matter if several Functions correspond to one real
function?
Mark these comments with "TODO: " (or figure out their answers :)

PS2, Line 783: locmap
"locations" perhaps?

Line 799: } else if v, ok := locmap[i-1]; ok { // go 1.7
This is very suspicious and worth debugging. Since x86 has single byte
instructions, getting this wrong could actually land on the wrong
instruction. I would guess that this has to do with whether the stack
points at the call instruction or the next instruction.

Line 814: ans := []string{fmt.Sprintf("%d,%d", s.value, s.count), "["}
Consider

var buf bytes.Buffer
fmt.Fprintf(&buf, "%d,%d [", s.value, s.count))

for _, x := range s.stack {

fmt.Fprintf(&buf, "%#x ", x)
}
fmt.Fprintf(&buf, "]")
return buf.String()

Line 823: addr string // 0x4b8254
Why are addr and offset strings?

https://go-review.googlesource.com/#/c/29650/2/src/runtime/mprof.go
File src/runtime/mprof.go:

Line 309: func saveevent(cycles int64, skip int, which bucketType, ratep
*uint64) {
This name is too generic for the runtime namespace. saveblockevent?

https://go-review.googlesource.com/#/c/29650/2/src/runtime/pprof/pprof.go
File src/runtime/pprof/pprof.go:

Line 832: fmt.Fprintf(w, "sampling period = %d\n",
runtime.SetMutexProfileFraction(-1))
Either spaces or no spaces around the "=" for consistency.

https://go-review.googlesource.com/#/c/29650/2/src/runtime/runtime2.go
File src/runtime/runtime2.go:

Line 259: acquiretime int64
I'm not sure it's true that acquiretime is never accessed concurrently (see
below)

https://go-review.googlesource.com/#/c/29650/2/src/runtime/sema.go
File src/runtime/sema.go:

Line 77: const (
Introduce a type for these. E.g.,

type semaProfileFlags int

const (
semaBlockProfile semaProfileFlags = 1 << iota
semaMutexProfile
)

Line 170: for x := root.head; x != nil; x = x.next {
Given that the root is no longer locked at this point, I wouldn't think
that it's safe to walk to list (or poke at the acquiretime of the other
sudogs).

--
https://go-review.googlesource.com/29650
Gerrit-Reviewer: Austin Clements <aus...@google.com>

Peter Weinberger (Gerrit)

unread,

Oct 5, 2016, 7:54:38 AM10/5/16

to Austin Clements, golang-co...@googlegroups.com

Reviewers: Austin Clements

Peter Weinberger uploaded a new patch set:

https://go-review.googlesource.com/29650

runtime: Profile goroutines holding contended mutexes.

runtime.SetMutexProfileFraction(n int) will capture 1/n-th of stack
traces of goroutines holding contended mutexes. From runtime/pprof
pprot.Lookup("mutex").WriteTo(w, 1) write the accumulated
stack traces to w (in essentially the same format that blocking
profiling uses).

cmd/pprof now converts both the new stack traces and the legacy
C++-compatible stack traces to Profiles.

Change-Id: Ie0b54fa4226853d99aa42c14cb529ae586a8335a
---
M src/cmd/go/test.go
M src/cmd/go/testflag.go
M src/cmd/internal/pprof/profile/legacy_profile.go
M src/runtime/mgc.go
M src/runtime/mprof.go
M src/runtime/pprof/pprof.go
M src/runtime/pprof/pprof_test.go
M src/runtime/proc.go
M src/runtime/runtime2.go
M src/runtime/sema.go
M src/runtime/trace.go
M src/sync/mutex.go
M src/sync/mutex_test.go
M src/testing/testing.go

14 files changed, 441 insertions(+), 39 deletions(-)

Peter Weinberger (Gerrit)

unread,

Oct 12, 2016, 11:56:59 AM10/12/16

to Austin Clements, golang-co...@googlegroups.com

Peter Weinberger has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 3:

(20 comments)

It's better. I fixed the bugs (I think) and rewrote the legacy_profile code
to maintain backward compatibility and to use a bytes.Buffer.

https://go-review.googlesource.com/#/c/29650/2/src/cmd/go/test.go
File src/cmd/go/test.go:

Line 240: -mutexprotfile mutex.out
> To match the other documentation, use a tab to indent this line and a tab
> a

Done. (Thank you emacs...)

https://go-review.googlesource.com/#/c/29650/2/src/cmd/internal/pprof/profile/legacy_profile.go
File src/cmd/internal/pprof/profile/legacy_profile.go:

Line 686: // "--- mutex:" or "--- contention:" for profiles generated by
the Go runtime.

> What is "the test"?

changed comment to be maybe clearer.

Line 696: return parseCppContention(r)

> I'm not sure about the mutex profile, but I've seen other profiles grow to

Of course not. Let me look at fixing it.

Line 708: p := &Profile{

> This will panic if all of the lines contain ='s. The strings.Split also
> doe

i rewrote all of thsi stuff.

Line 725: if err != io.EOF {

> The previous parser allowed "discarded samples" to have a non-int value.

I believe "discarded samples" is a remnant. It's not set anywhere in the
code.

But that raises a bigger question, which I had ignored. Is this code
expected to parse only Go profiles, or are there other languages which
produce "--- contention:" profiles? I wrote it just to match Go's output,
but that might be wrong. (codesearch only finds the string in Go code.)

Line 726: return nil, err

> The previous parser rejected unknown fields. Is it okay that this parser
> ac

see above comment.

Line 733:

> Typical style is to capitalize comments.

Done

Line 734: if l = strings.TrimSpace(l); l == "" {

> for _, l := range lines[nxtline:] {

changed

Line 742: attr := strings.SplitN(l, delimiter, 2)

> These are the only uses of log.Fatal in here. Rather than aborting the
> prog

Done

PS2, Line 744: bre
> "addr" for consistency
Done

PS2, Line 768: nil
> addr
Done

Line 780: value, addrs, err := parseContentionSample(l, p.Period, cpuHz)

> Mark these comments with "TODO: " (or figure out their answers :)

Done

PS2, Line 783: }
> "locations" perhaps?
Done

Line 799: p.Sample = append(p.Sample, &Sample{

> This is very suspicious and worth debugging. Since x86 has single byte
> inst

Yup. In 1.8 it is seeing
15891737393 446 @ 0x47237d 0x401884 0x4538d1
# 0x47237c sync.(*Mutex).Unlock+0x7c go/src/sync/mutex.go:124
# 0x401883 main.slow+0x53 rscmutex.go:73
(The addresses in the first line are off by 1 from the addresses in the
next two.

These things are written by writeBlock() and writeMutex() in
runtime/pprof.go

Line 823: // was 1, the Profile is self-contained; the executable is not
needed.

> Why are addr and offset strings?

They are filled in by a regex match.

https://go-review.googlesource.com/#/c/29650/2/src/runtime/mprof.go
File src/runtime/mprof.go:

Line 309: func saveblockevent(cycles int64, skip int, which bucketType,
ratep *uint64) {

> This name is too generic for the runtime namespace. saveblockevent?

ok.

https://go-review.googlesource.com/#/c/29650/2/src/runtime/pprof/pprof.go
File src/runtime/pprof/pprof.go:

Line 832: fmt.Fprintf(w, "sampling period=%d\n",

runtime.SetMutexProfileFraction(-1))
> Either spaces or no spaces around the "=" for consistency.

Done

https://go-review.googlesource.com/#/c/29650/2/src/runtime/runtime2.go
File src/runtime/runtime2.go:

Line 259: acquiretime int64
> I'm not sure it's true that acquiretime is never accessed concurrently
> (see

fixed (see below)

https://go-review.googlesource.com/#/c/29650/2/src/runtime/sema.go
File src/runtime/sema.go:

Line 77: type semaProfileFlags int

> Introduce a type for these. E.g.,

Done

Line 170: t0 := cputicks()

> Given that the root is no longer locked at this point, I wouldn't think
> tha

oops. fixed.

--
https://go-review.googlesource.com/29650
Gerrit-Reviewer: Austin Clements <aus...@google.com>
Gerrit-Reviewer: Peter Weinberger <p...@google.com>

Gerrit-HasComments: Yes

Russ Cox (Gerrit)

unread,

Oct 12, 2016, 12:30:11 PM10/12/16

to Peter Weinberger, Raul Silvera, Austin Clements, golang-co...@googlegroups.com, Russ Cox

Russ Cox has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 3:

This looks good to me - leaving for Austin - but we should make sure to get
the pprof changes upstreamed into the main pprof too.

--
https://go-review.googlesource.com/29650
Gerrit-Reviewer: Austin Clements <aus...@google.com>
Gerrit-Reviewer: Peter Weinberger <p...@google.com>

Gerrit-Reviewer: Raul Silvera <rsil...@google.com>
Gerrit-HasComments: No

Ralph Corderoy (Gerrit)

unread,

Oct 13, 2016, 12:24:55 PM10/13/16

to Peter Weinberger, Raul Silvera, Austin Clements, golang-co...@googlegroups.com

Ralph Corderoy has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 3:

(1 comment)

https://go-review.googlesource.com/#/c/29650/1/src/runtime/mprof.go
File src/runtime/mprof.go:

PS1, Line 538: return n, true
> Done
It's gone from "return s, true" to "return n, true", but still lacks
the "s" on the end of "return".

--
https://go-review.googlesource.com/29650
Gerrit-Reviewer: Austin Clements <aus...@google.com>
Gerrit-Reviewer: Peter Weinberger <p...@google.com>
Gerrit-Reviewer: Raul Silvera <rsil...@google.com>

Gerrit-HasComments: Yes

Raul Silvera (Gerrit)

unread,

Oct 14, 2016, 12:14:11 PM10/14/16

to Peter Weinberger, Austin Clements, golang-co...@googlegroups.com

Raul Silvera has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 3:

(4 comments)

I would strongly encourage you to generate any new profiles directly on
profile.proto format instead of adding new legacy formats.

Legacy formats are problematic because they're hard to extend and to parse.
I would prefer not to pick up any new legacy formats on the upstream pprof.

https://go-review.googlesource.com/#/c/29650/3/src/cmd/go/test.go
File src/cmd/go/test.go:

PS3, Line 240: mutexprotfile
mutexprofile

https://go-review.googlesource.com/#/c/29650/3/src/runtime/mprof.go
File src/runtime/mprof.go:

PS3, Line 345: int64(fastrand())%rate == 0
I realize this is copied from blockevent, but this is really expensive, as
fastrand and the module are called even if sampling is off.

If this is an issue, an alternative may be to compute the next sampling
point after each sample is taken: that's the approach used by the heap
profiler.

PS3, Line 538: MutexPrifle
MutexProfile

PS3, Line 538: return
returns

Peter Weinberger (Gerrit)

unread,

Oct 17, 2016, 7:15:45 AM10/17/16

to Raul Silvera, Austin Clements, golang-co...@googlegroups.com

Peter Weinberger has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 3:

(3 comments)

The new code emits a profile in exactly the same format already used by
block profiling. Converting runtime to use profile.proto seems to me to be
a much bigger change. (I could back out the additional parsing in
legacy_profile.go if that were desired. In that case the situation would be
unchanged from the present except that legacy_profile.go would have to
recognize "mutex" in addition to "contention".)

https://go-review.googlesource.com/#/c/29650/3/src/runtime/mprof.go
File src/runtime/mprof.go:

PS3, Line 345: int64(fastrand())%rate == 0
> I realize this is copied from blockevent, but this is really expensive, as

If sampling is off, then rate is 0 and fastrand() isn't called, nor is
saveblockevent).

PS3, Line 538: MutexPrifle
> MutexProfile

Done

PS3, Line 538: return
> returns

Done

Raul Silvera (Gerrit)

unread,

Oct 17, 2016, 1:23:53 PM10/17/16

to Peter Weinberger, Austin Clements, golang-co...@googlegroups.com

Raul Silvera has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 3:

(3 comments)

I would prefer to back off the changes to pprof/legacy_profile.go. I'd
prefer to only recognize the "mutex" sentinel, and please upstream it to
github.com/google/pprof.

If we do want to recognize debug=1 profiles we should do it on its own
change, and comprehensively (eg including heap profiles). Change 30556 is
attempting to add native generation of profile.proto, which would obsolete
such a change, though.

https://go-review.googlesource.com/#/c/29650/3/src/cmd/internal/pprof/profile/legacy_profile.go
File src/cmd/internal/pprof/profile/legacy_profile.go:

PS3, Line 822: arguwmnt
I don't think it is a good idea to document the arguments to WriteTo here.
If the interface to WriteTo changes, will anyone remember to update this?

PS3, Line 824: // [should all the errors be the uninformative
errUnrecognized,
: // or can we count on being the last legacy case in
profile.go?
: // (or should the logic there be changed?)]
errUnrecognized should be used when the input isn't recognized as a legacy
profile, so that we continue trying other formats. If you're sure that the
input is a contention/mutex profile but it cannot be successfully parsed,
return a different kind of error.

https://go-review.googlesource.com/#/c/29650/3/src/runtime/mprof.go
File src/runtime/mprof.go:

PS3, Line 345: int64(fastrand())%rate == 0

> If sampling is off, then rate is 0 and fastrand() isn't called, nor is
> save

Sorry, I meant "if a sample isn't taken", not if sampling is off.
Anyway, only a suggestion in case overhead is an issue.

Austin Clements (Gerrit)

unread,

Oct 22, 2016, 2:23:18 PM10/22/16

to Peter Weinberger, Austin Clements, Raul Silvera, golang-co...@googlegroups.com

Austin Clements has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 3:

(6 comments)

https://go-review.googlesource.com/#/c/29650/3/src/runtime/mprof.go
File src/runtime/mprof.go:

Line 329: // reported. The previous rate is returned.
So, I have a question that I perhaps should have asked a while ago. Do we
want to sample 1/rate contention *events*, or 1/rate *time* spent
contended? Is it okay that I can have a long contention event that doesn't
get sampled? The CPU profiling rate is obviously time-based, and the heap
profiling rate is by bytes allocated, not by allocations.

Line 345: if rate > 0 && int64(fastrand())%rate == 0 {

> Sorry, I meant "if a sample isn't taken", not if sampling is off.

Given that this isn't on by default like the heap profiler, I'm not too
worried about the overhead here, at least for version 1. (OTOH, maybe we're
planning to turn it on inside Google by default?)

https://go-review.googlesource.com/#/c/29650/3/src/runtime/pprof/pprof.go
File src/runtime/pprof/pprof.go:

Line 102: //
Add "mutex" profile here.

Line 808: // PJW: too much common code with writeBlock. FIX!
// TODO(pjw): ...

https://go-review.googlesource.com/#/c/29650/3/src/runtime/sema.go
File src/runtime/sema.go:

Line 178: readyWithTime(s, 5)
readyWithTime can take a non-trivial amount of time, since it blocks if
we're scanning the goroutine's stack. So it would be good to unlock before
the readyWithTime (but after the mutexevent handling above).

https://go-review.googlesource.com/#/c/29650/3/src/sync/mutex.go
File src/sync/mutex.go:

Line 131: func runtime_SemacquireMutex(*uint32)
The rest of these are defined in sync/runtime.go. I would put this with its
friends.

Peter Weinberger (Gerrit)

unread,

Oct 24, 2016, 10:12:39 AM10/24/16

to Austin Clements, Raul Silvera, golang-co...@googlegroups.com

Peter Weinberger has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 3:

(8 comments)

Raul, I removed the debug parsing from legacy_profile.go.

https://go-review.googlesource.com/#/c/29650/3/src/cmd/internal/pprof/profile/legacy_profile.go
File src/cmd/internal/pprof/profile/legacy_profile.go:

PS3, Line 822: arguwmnt
> I don't think it is a good idea to document the arguments to WriteTo here.

Fixed by removing this routine and rewriting the comments above
parseContention() and parseCppContention().

PS3, Line 824: // [should all the errors be the uninformative
errUnrecognized,
: // or can we count on being the last legacy case in
profile.go?
: // (or should the logic there be changed?)]
> errUnrecognized should be used when the input isn't recognized as a legacy

Whole routine is gone.

https://go-review.googlesource.com/#/c/29650/3/src/runtime/mprof.go
File src/runtime/mprof.go:

Line 329: // reported. The previous rate is returned.
> So, I have a question that I perhaps should have asked a while ago. Do we
> w

1. I used n==1 in most of my tests without much impact, but haven't
rechecked all of that. Also, my mental image is that a single
long-contention event would be unusual.
2. Nevertheless I modified the godoc to say that the exact sampling method
for n>1 could change.

Line 345: if rate > 0 && int64(fastrand())%rate == 0 {

> Given that this isn't on by default like the heap profiler, I'm not too
> wor

I added a TODO to do some careful measurements. (The google3 approach
hasn't been decided yet.)

https://go-review.googlesource.com/#/c/29650/3/src/runtime/pprof/pprof.go
File src/runtime/pprof/pprof.go:

Line 102: //
> Add "mutex" profile here.
> Add "mutex" profile here.

Line 808: // PJW: too much common code with writeBlock. FIX!
> // TODO(pjw): ...
Done

https://go-review.googlesource.com/#/c/29650/3/src/runtime/sema.go
File src/runtime/sema.go:

Line 178: readyWithTime(s, 5)
> readyWithTime can take a non-trivial amount of time, since it blocks if
> we'

Done

https://go-review.googlesource.com/#/c/29650/3/src/sync/mutex.go
File src/sync/mutex.go:

Line 131: func runtime_SemacquireMutex(*uint32)
> The rest of these are defined in sync/runtime.go. I would put this with
> its

Done

Peter Weinberger (Gerrit)

unread,

Oct 24, 2016, 10:21:27 AM10/24/16

to Austin Clements, Raul Silvera, golang-co...@googlegroups.com

Peter Weinberger has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 3:

(and removed some now-unused code from legacy_profile.go and fixed a
formatting error in pprof.go)

--
https://go-review.googlesource.com/29650
Gerrit-Reviewer: Austin Clements <aus...@google.com>
Gerrit-Reviewer: Peter Weinberger <p...@google.com>
Gerrit-Reviewer: Raul Silvera <rsil...@google.com>

Gerrit-HasComments: No

Raul Silvera (Gerrit)

unread,

Oct 24, 2016, 4:20:14 PM10/24/16

to Peter Weinberger, Austin Clements, golang-co...@googlegroups.com

Raul Silvera has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 3:

SGTM, please upload a refreshed patch at your convenience.

Peter Weinberger (Gerrit)

unread,

Oct 25, 2016, 12:23:05 PM10/25/16

to Raul Silvera, Austin Clements, golang-co...@googlegroups.com

Reviewers: Raul Silvera, Austin Clements

Peter Weinberger uploaded a new patch set:

https://go-review.googlesource.com/29650

runtime: Profile goroutines holding contended mutexes.

runtime.SetMutexProfileFraction(n int) will capture 1/n-th of stack

traces of goroutines holding contended mutexes if n > 0. From runtime/pprof,
pprot.Lookup("mutex").WriteTo writes the accumulated

stack traces to w (in essentially the same format that blocking
profiling uses).

Change-Id: Ie0b54fa4226853d99aa42c14cb529ae586a8335a
---
M src/cmd/go/test.go
M src/cmd/go/testflag.go
M src/cmd/internal/pprof/profile/legacy_profile.go
M src/runtime/mgc.go
M src/runtime/mprof.go
M src/runtime/pprof/pprof.go
M src/runtime/pprof/pprof_test.go
M src/runtime/proc.go
M src/runtime/runtime2.go
M src/runtime/sema.go
M src/runtime/trace.go
M src/sync/mutex.go
M src/sync/mutex_test.go

M src/sync/runtime.go
M src/testing/testing.go
15 files changed, 289 insertions(+), 39 deletions(-)

Peter Weinberger (Gerrit)

unread,

Oct 25, 2016, 12:24:47 PM10/25/16

to Raul Silvera, Austin Clements, golang-co...@googlegroups.com

Peter Weinberger has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 4:

please take another look

--
https://go-review.googlesource.com/29650
Gerrit-Reviewer: Austin Clements <aus...@google.com>
Gerrit-Reviewer: Peter Weinberger <p...@google.com>
Gerrit-Reviewer: Raul Silvera <rsil...@google.com>

Gerrit-HasComments: No

Austin Clements (Gerrit)

unread,

Oct 26, 2016, 3:02:35 PM10/26/16

to Peter Weinberger, Austin Clements, Raul Silvera, golang-co...@googlegroups.com

Austin Clements has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 4: Code-Review+2

(4 comments)

Raul, did you want to make another pass over this?

https://go-review.googlesource.com/#/c/29650/4/src/runtime/mprof.go
File src/runtime/mprof.go:

PS4, Line 350: usiing
"using"

Line 354: cycles = 1
Add a comment about why you bound cycles at 1. I can understand a negative
cycles would be problematic (though I don't know how that could happen),
but why is 0 cycles a problem?

https://go-review.googlesource.com/#/c/29650/4/src/runtime/pprof/pprof_test.go
File src/runtime/pprof/pprof_test.go:

Line 620: // a more precise regexp would be `^\d+ 1 @(?: 0x[[:xdigit:]]+)+`
If there's a more precise regexp, why not use it?

https://go-review.googlesource.com/#/c/29650/4/src/testing/testing.go
File src/testing/testing.go:

Line 879: if *mutexProfile != "" && *mutexProfileFraction > 0 {
This condition is inconsistent. The flags doc string above says ">= 0",
this says "> 0", and the condition to write out the mutex profile below
says ">= 0". I think for consistency with the block profile, we should
change this one to ">= 0".

It's a little weird that setting either the block or mutex profile rate to
0 writes out an empty profile, but maybe the thought is that it's better to
write out the file the user asked for, even if it's empty, than to not
write out the file at all (and, e.g., leave behind a stale file from an
earlier run).

--
https://go-review.googlesource.com/29650
Gerrit-Reviewer: Austin Clements <aus...@google.com>
Gerrit-Reviewer: Peter Weinberger <p...@google.com>
Gerrit-Reviewer: Raul Silvera <rsil...@google.com>

Gerrit-HasComments: Yes

Peter Weinberger (Gerrit)

unread,

Oct 26, 2016, 3:38:24 PM10/26/16

to Austin Clements, Raul Silvera, golang-co...@googlegroups.com

Reviewers: Austin Clements, Raul Silvera

Peter Weinberger uploaded a new patch set:

https://go-review.googlesource.com/29650

runtime: Profile goroutines holding contended mutexes.

runtime.SetMutexProfileFraction(n int) will capture 1/n-th of stack

traces of goroutines holding contended mutexes if n > 0. From runtime/pprof,

pprot.Lookup("mutex").WriteTo writes the accumulated

stack traces to w (in essentially the same format that blocking
profiling uses).

Change-Id: Ie0b54fa4226853d99aa42c14cb529ae586a8335a
---
M src/cmd/go/test.go
M src/cmd/go/testflag.go
M src/cmd/internal/pprof/profile/legacy_profile.go
M src/runtime/mgc.go
M src/runtime/mprof.go
M src/runtime/pprof/pprof.go
M src/runtime/pprof/pprof_test.go
M src/runtime/proc.go
M src/runtime/runtime2.go
M src/runtime/sema.go
M src/runtime/trace.go
M src/sync/mutex.go
M src/sync/mutex_test.go

M src/sync/runtime.go
M src/testing/testing.go
15 files changed, 286 insertions(+), 39 deletions(-)

Peter Weinberger (Gerrit)

unread,

Oct 26, 2016, 3:39:45 PM10/26/16

to Austin Clements, Raul Silvera, golang-co...@googlegroups.com

Peter Weinberger has posted comments on this change.

runtime: Profile goroutines holding contended mutexes.

Patch Set 5:

(4 comments)

Responded to Austin.

https://go-review.googlesource.com/#/c/29650/4/src/runtime/mprof.go
File src/runtime/mprof.go:

PS4, Line 350: using
> "using"
Done

Line 354: }

> Add a comment about why you bound cycles at 1. I can understand a negative

I guess it's a leftover from blockevent. Took it out.

https://go-review.googlesource.com/#/c/29650/4/src/runtime/pprof/pprof_test.go
File src/runtime/pprof/pprof_test.go:

Line 620: r2 := `^\d+ 1 @(?: 0x[[:xdigit:]]+)+`

> If there's a more precise regexp, why not use it?

Because I thought the simpler one was enough. But I changed it.

https://go-review.googlesource.com/#/c/29650/4/src/testing/testing.go
File src/testing/testing.go:

Line 879: if *mutexProfile != "" && *mutexProfileFraction >= 0 {

> This condition is inconsistent. The flags doc string above says ">= 0",
> thi

Either I don't remember my reasoning, or it's a typo. I made it the same as
blockProfile.

--
https://go-review.googlesource.com/29650
Gerrit-Reviewer: Austin Clements <aus...@google.com>
Gerrit-Reviewer: Peter Weinberger <p...@google.com>
Gerrit-Reviewer: Raul Silvera <rsil...@google.com>

Gerrit-HasComments: Yes

Peter Weinberger (Gerrit)

unread,

Oct 28, 2016, 7:47:22 AM10/28/16

to golang-...@googlegroups.com, Austin Clements, Raul Silvera, golang-co...@googlegroups.com

Peter Weinberger has submitted this change and it was merged.

runtime: Profile goroutines holding contended mutexes.

runtime.SetMutexProfileFraction(n int) will capture 1/n-th of stack

traces of goroutines holding contended mutexes if n > 0. From runtime/pprof,

pprot.Lookup("mutex").WriteTo writes the accumulated

stack traces to w (in essentially the same format that blocking
profiling uses).

Change-Id: Ie0b54fa4226853d99aa42c14cb529ae586a8335a

Reviewed-on: https://go-review.googlesource.com/29650
Reviewed-by: Austin Clements <aus...@google.com>

---
M src/cmd/go/test.go
M src/cmd/go/testflag.go
M src/cmd/internal/pprof/profile/legacy_profile.go
M src/runtime/mgc.go
M src/runtime/mprof.go
M src/runtime/pprof/pprof.go
M src/runtime/pprof/pprof_test.go
M src/runtime/proc.go
M src/runtime/runtime2.go
M src/runtime/sema.go
M src/runtime/trace.go
M src/sync/mutex.go
M src/sync/mutex_test.go

M src/sync/runtime.go
M src/testing/testing.go
15 files changed, 286 insertions(+), 39 deletions(-)

Approvals:
Austin Clements: Looks good to me, approved

Brad Fitzpatrick (Gerrit)

unread,

Oct 28, 2016, 2:33:22 PM10/28/16

to Peter Weinberger, Austin Clements, Raul Silvera, golang-co...@googlegroups.com, Brad Fitzpatrick

Brad Fitzpatrick posted comments on runtime: Profile goroutines holding contended mutexes..

View Change

Patch Set 6:

This appears to break the build and was submitted with using the trybots.

To view, visit this change. To unsubscribe, visit settings.

Brad Fitzpatrick (Gerrit)

unread,

Oct 28, 2016, 2:42:16 PM10/28/16

to Peter Weinberger, Austin Clements, Raul Silvera, golang-co...@googlegroups.com, Brad Fitzpatrick

Brad Fitzpatrick posted comments on runtime: Profile goroutines holding contended mutexes..

View Change

Patch Set 6:

(1 comment)

File src/runtime/pprof/pprof.go:
- Patch Set #6, Line 805: b := bufio.NewWriter(w)
  Nevermind, I figured out what happened. A later commit removed the bufio import.

To view, visit this change. To unsubscribe, visit settings.

Dmitri Shuralyov (Gerrit)

unread,

Dec 28, 2016, 2:13:18 PM12/28/16

to Peter Weinberger, Austin Clements, Raul Silvera, golang-co...@googlegroups.com

Dmitri Shuralyov posted comments on this change.

View Change

Patch Set 6:

(1 comment)

File src/cmd/go/test.go:
- Patch Set #6, Line 247:
  This space at the beginning of the line shouldn't be here. It causes https://golang.org/issues/18448.

To view, visit this change. To unsubscribe, visit settings.

Reply all

Reply to author

Forward