cmd/cover: exclude commented-out code from coverage instrumentation
Add logic to exclude purely commented lines from coverage instrumentation.
When instrumenting Go code for coverage, the cover tool now identifies
and excludes lines that contain only comments from coverage blocks.
This prevents commented-out code from being reported as "uncovered"
in coverage reports, which can be misleading.
The implementation adds a splitBlockByComments function that parses
source code character by character to identify segments containing
executable code versus segments containing only comments. The
addCounters function now uses this to create coverage counters only
for segments that contain actual executable code.
The parser correctly handles:
- Single-line comments (//)
- Multi-line comments (/* */)
- String literals containing comment-like sequences
- Raw string literals with fake comments
- Mixed lines with both code and comments
This improves the accuracy of coverage reports by ensuring that
commented-out code, TODOs, and documentation comments don't inflate
the count of uncovered lines.
Fixes #22545
diff --git a/src/cmd/cover/cover.go b/src/cmd/cover/cover.go
index 9207fa0..9e5afd8 100644
--- a/src/cmd/cover/cover.go
+++ b/src/cmd/cover/cover.go
@@ -266,6 +266,197 @@
pkg *Package
}
+// BlockSegment represents a segment of a basic block that can be either
+// executable code or commented-out code.
+type BlockSegment struct {
+ start token.Pos
+ end token.Pos
+ hasCode bool // true if this segment contains executable code
+}
+
+// blockSplitter holds the state for splitting a block by comments.
+// It tracks cursor position, line state, and section boundaries
+// while scanning through source code character by character.
+type blockSplitter struct {
+ // Source information
+ file *token.File
+ startOffset int
+
+ // Accumulated results
+ segments []BlockSegment
+
+ // Cursor position
+ i int
+ indexStartCurrentLine int
+ currentOffset int
+ currentSegmentStart token.Pos
+
+ // Section state (persists across lines)
+ inCommentedSection bool
+
+ // Line state (reset each line)
+ lineHasCode bool
+ lineHasComment bool
+
+ // Cursor state (tracks what we're inside)
+ inSingleLineComment bool
+ inMultiLineComment bool
+ inString bool
+ inRawString bool
+}
+
+// processLine handles end-of-line processing: computes state transitions
+// and decides whether to create a new segment based on code/comment boundaries.
+func (bs *blockSplitter) processLine() {
+ lineStart := bs.currentOffset
+ lineEnd := bs.currentOffset + (bs.i - bs.indexStartCurrentLine)
+
+ if bs.inCommentedSection && bs.lineHasCode {
+ // End of commented section, start of code section
+ segmentEnd := bs.file.Pos(lineStart)
+ bs.segments = append(bs.segments, BlockSegment{
+ start: bs.currentSegmentStart,
+ end: segmentEnd,
+ hasCode: false,
+ })
+ bs.currentSegmentStart = bs.file.Pos(lineStart)
+ bs.inCommentedSection = false
+ } else if !bs.inCommentedSection && !bs.lineHasCode && bs.lineHasComment {
+ // End of code section, start of commented section
+ segmentEnd := bs.file.Pos(lineStart)
+ if bs.currentSegmentStart < segmentEnd {
+ bs.segments = append(bs.segments, BlockSegment{
+ start: bs.currentSegmentStart,
+ end: segmentEnd,
+ hasCode: true,
+ })
+ }
+ bs.currentSegmentStart = bs.file.Pos(lineStart)
+ bs.inCommentedSection = true
+ }
+
+ bs.currentOffset = lineEnd
+}
+
+// resetLineState resets line-specific state for a new line.
+func (bs *blockSplitter) resetLineState() {
+ bs.indexStartCurrentLine = bs.i
+ bs.lineHasComment = bs.inMultiLineComment
+ bs.lineHasCode = false
+ bs.inSingleLineComment = false
+}
+
+// inStringOrComment returns true if currently inside a string or comment.
+func (bs *blockSplitter) inStringOrComment() bool {
+ return bs.inString || bs.inRawString || bs.inSingleLineComment || bs.inMultiLineComment
+}
+
+// splitBlockByComments analyzes a block range and splits it into segments,
+// separating executable code from any commented lines.
+// To do this, it reads character by character the original source code.
+func (f *File) splitBlockByComments(start, end token.Pos) []BlockSegment {
+ startOffset := f.offset(start)
+ endOffset := f.offset(end)
+
+ if startOffset >= endOffset || endOffset > len(f.content) {
+ return []BlockSegment{{start: start, end: end, hasCode: true}}
+ }
+
+ originalSourceCode := f.content[startOffset:endOffset]
+
+ bs := &blockSplitter{
+ file: f.fset.File(start),
+ startOffset: startOffset,
+ currentOffset: startOffset,
+ currentSegmentStart: start,
+ }
+
+ for bs.i < len(originalSourceCode) {
+ char := originalSourceCode[bs.i]
+
+ if char == '\\' && bs.inString && bs.i+1 < len(originalSourceCode) {
+ bs.lineHasCode = true
+ bs.i += 2 // Skip escaped character
+ continue
+ }
+
+ if char == '"' && !bs.inRawString && !bs.inSingleLineComment && !bs.inMultiLineComment {
+ bs.lineHasCode = true
+ bs.inString = !bs.inString
+ bs.i++
+ continue
+ }
+
+ if char == '`' && !bs.inString && !bs.inSingleLineComment && !bs.inMultiLineComment {
+ bs.lineHasCode = true
+ bs.inRawString = !bs.inRawString
+ bs.i++
+ continue
+ }
+
+ if char == '\n' {
+ bs.i++
+ bs.processLine()
+ bs.resetLineState()
+ continue
+ }
+
+ if bs.i+1 < len(originalSourceCode) {
+ nextChar := originalSourceCode[bs.i+1]
+ if char == '/' && nextChar == '/' && !bs.inString && !bs.inRawString && !bs.inMultiLineComment {
+ bs.inSingleLineComment = true
+ bs.lineHasComment = true
+ bs.i += 2
+ continue
+ }
+
+ if char == '/' && nextChar == '*' && !bs.inString && !bs.inRawString && !bs.inSingleLineComment {
+ bs.inMultiLineComment = true
+ bs.lineHasComment = true
+ bs.i += 2
+ continue
+ }
+
+ if char == '*' && nextChar == '/' && bs.inMultiLineComment {
+ bs.inMultiLineComment = false
+ bs.lineHasComment = true
+ bs.i += 2
+ continue
+ }
+ }
+
+ // If we matched nothing else and the char is not a whitespace, we are in normal code.
+ if !bs.lineHasCode && !isWhitespace(char) && !bs.inSingleLineComment && !bs.inMultiLineComment {
+ bs.lineHasCode = true
+ }
+
+ bs.i++
+ }
+
+ // Process the last line if it doesn't end with a newline
+ bs.processLine()
+
+ // Add the final segment
+ if bs.currentSegmentStart < end {
+ bs.segments = append(bs.segments, BlockSegment{
+ start: bs.currentSegmentStart,
+ end: end,
+ hasCode: !bs.inCommentedSection,
+ })
+ }
+
+ // If no segments were created, return the original block as a code segment
+ if len(bs.segments) == 0 {
+ return []BlockSegment{{start: start, end: end, hasCode: true}}
+ }
+
+ return bs.segments
+}
+
+func isWhitespace(b byte) bool {
+ return b == ' ' || b == '\t' || b == '\n' || b == '\r'
+}
+
// findText finds text in the original source, starting at pos.
// It correctly skips over comments and assumes it need not
// handle quoted strings.
@@ -755,7 +946,14 @@
// Special case: make sure we add a counter to an empty block. Can't do this below
// or we will add a counter to an empty statement list after, say, a return statement.
if len(list) == 0 {
- f.edit.Insert(f.offset(insertPos), f.newCounter(insertPos, blockEnd, 0)+";")
+ // Split the empty block by comments and only add counter if there's executable content
+ segments := f.splitBlockByComments(insertPos, blockEnd)
+ for _, segment := range segments {
+ if segment.hasCode {
+ f.edit.Insert(f.offset(segment.start), f.newCounter(segment.start, segment.end, 0)+";")
+ break // Only insert one counter per basic block
+ }
+ }
return
}
// Make a copy of the list, as we may mutate it and should leave the
@@ -805,7 +1003,20 @@
end = blockEnd
}
if pos != end { // Can have no source to cover if e.g. blocks abut.
- f.edit.Insert(f.offset(insertPos), f.newCounter(pos, end, last)+";")
+ // Split the block by comments and create counters only for executable segments
+ segments := f.splitBlockByComments(pos, end)
+ for i, segment := range segments {
+ if segment.hasCode {
+ // Only create counter for executable code segments
+ // Insert counter at the beginning of the executable segment
+ insertOffset := f.offset(segment.start)
+ // For the first segment, use the original insertPos if it's before the segment
+ if i == 0 {
+ insertOffset = f.offset(insertPos)
+ }
+ f.edit.Insert(insertOffset, f.newCounter(segment.start, segment.end, last)+";")
+ }
+ }
}
list = list[last:]
if len(list) == 0 {
diff --git a/src/cmd/cover/cover_test.go b/src/cmd/cover/cover_test.go
index 431c056..d345179 100644
--- a/src/cmd/cover/cover_test.go
+++ b/src/cmd/cover/cover_test.go
@@ -19,6 +19,7 @@
"os/exec"
"path/filepath"
"regexp"
+ "strconv"
"strings"
"sync"
"testing"
@@ -638,3 +639,275 @@
t.Errorf("unexpected success; want failure due to newline in file path")
}
}
+
+// TestCommentedOutCodeExclusion tests that all commented lines are excluded
+// from coverage requirements using the simplified block splitting approach.
+func TestCommentedOutCodeExclusion(t *testing.T) {
+ //testenv.MustHaveGoBuild(t)
+
+ // Create a test file with various types of comments mixed with real code
+ testSrc := `package main
+
+ import "fmt"
+
+ func main() {
+ fmt.Println("Start - should be covered")
+
+ // This is a regular comment - should be excluded from coverage
+ x := 42
+
+ // Any comment line is excluded, regardless of content
+ // fmt.Println("commented code")
+ // TODO: implement this later
+ // BUG: fix this issue
+
+ y := 0
+ fmt.Println("After comments - should be covered")
+
+ /* some comment */
+
+ fmt.Println("It's a trap /*")
+ z := 0
+
+ if x > 0 {
+ y = x * 2
+ } else {
+ y = x - 2
+ }
+
+ z = 5
+
+ /* Multiline comments
+ with here
+ and here */
+
+ z1 := 0
+
+ z1 = 1 /* Multiline comments
+ where there is code at the beginning
+ and the end */ z1 = 2
+
+ z1 = 3; /* // */ z1 = 4
+
+ z1 = 5 /* //
+ //
+ // */ z1 = 6
+
+ /*
+ */ z1 = 7 /*
+ */
+
+ z1 = 8/*
+ before */ /* after
+ */z1 = 9
+
+ /* before */ z1 = 10
+ /* before */ z1 = 10 /* after */
+ z1 = 10 /* after */
+
+ fmt.Printf("Result: %d\n", z)
+ fmt.Printf("Result: %d\n", z1)
+
+ s := ` + "`This is a multi-line raw string" + `
+ // fake comment on line 2
+ /* and fake comment on line 3 */
+ and other` + "`" + `
+
+ s = ` + "`another multiline string" + `
+ ` + "`" + ` // another trap
+
+ fmt.Printf("%s", s)
+
+ // More comments to exclude
+ // for i := 0; i < 10; i++ {
+ // fmt.Printf("Loop %d", i)
+ // }
+
+ fmt.Printf("Result: %d\n", y)
+ // end comment
+ }
+
+ func empty() {
+
+ }
+
+ func singleBlock() {
+ fmt.Printf("ResultSomething")
+ }
+
+ func justComment() {
+ // comment
+ }
+
+ func justMultilineComment() {
+ /* comment
+ again
+ until here */
+ }`
+
+ tmpdir := t.TempDir()
+ srcPath := filepath.Join(tmpdir, "test.go")
+ if err := os.WriteFile(srcPath, []byte(testSrc), 0666); err != nil {
+ t.Fatalf("writing test file: %v", err)
+ }
+
+ // Test that our cover tool processes the file without errors
+ cmd := testenv.Command(t, testcover(t), "-mode=set", srcPath)
+ out, err := cmd.Output()
+ if err != nil {
+ t.Fatalf("cover failed: %v\nOutput: %s", err, out)
+ }
+
+ // The output should contain the real code but preserve commented-out code as comments, we still need to get rid of the first line
+ outStr := string(out)
+ outStr = strings.Join(strings.SplitN(outStr, "\n", 2)[1:], "\n")
+
+ segments := extractCounterPositionFromCode(outStr)
+
+ if len(segments) != 18 {
+ t.Fatalf("expected 18 code segments, got %d", len(segments))
+ }
+
+ // Assert all segments contents, we expect to find all lines of code and no comments alone on a line
+ assertSegmentContent(t, segments[0], []string{`fmt.Println("Start - should be covered")`, `{`})
+ assertSegmentContent(t, segments[1], []string{`x := 42`})
+ assertSegmentContent(t, segments[2], []string{`y := 0`, `fmt.Println("After comments - should be covered")`})
+ assertSegmentContent(t, segments[3], []string{`fmt.Println("It's a trap /*")`, `z := 0`, `if x > 0 {`})
+ assertSegmentContent(t, segments[4], []string{`z = 5`})
+ assertSegmentContent(t, segments[5], []string{`z1 := 0`, `z1 = 1 /* Multiline comments`})
+ assertSegmentContent(t, segments[6], []string{`and the end */ z1 = 2`, `z1 = 3; /* // */ z1 = 4`, `z1 = 5 /* //`})
+ assertSegmentContent(t, segments[7], []string{`// */ z1 = 6`})
+ assertSegmentContent(t, segments[8], []string{`*/ z1 = 7 /*`})
+ assertSegmentContent(t, segments[9], []string{`z1 = 8/*`})
+ assertSegmentContent(t, segments[10], []string{`*/z1 = 9`, `/* before */ z1 = 10`, `/* before */ z1 = 10 /* after */`, `z1 = 10 /* after */`,
+ `fmt.Printf("Result: %d\n", z)`, `fmt.Printf("Result: %d\n", z1)`, `s := ` + "`" + `This is a multi-line raw string`, `// fake comment on line 2`,
+ `/* and fake comment on line 3 */`, `and other` + "`", `s = ` + "`" + `another multiline string`, "` // another trap", `fmt.Printf("%s", s)`})
+ assertSegmentContent(t, segments[11], []string{`fmt.Printf("Result: %d\n", y)`})
+ // Segment 12 is the if block in main()
+ assertSegmentContent(t, segments[12], []string{`{`, `y = x * 2`, `}`})
+ // and segment 13 is the else block, extra curly brackets introduced by cover tool
+ assertSegmentContent(t, segments[13], []string{`{`, `{`, `y = x - 2`, `}`, `}`})
+ // function empty()
+ assertSegmentContent(t, segments[14], []string{})
+ // function singleBlock()
+ assertSegmentContent(t, segments[15], []string{`{`, `fmt.Printf("ResultSomething")`})
+ // function justComment()
+ assertSegmentContent(t, segments[16], []string{})
+ // function justMultilineComment()
+ assertSegmentContent(t, segments[17], []string{})
+}
+
+func assertSegmentContent(t *testing.T, segment CounterSegment, expecteds []string) {
+ code := segment.Code
+
+ // removing counter
+ expectedCounter := fmt.Sprintf("GoCover.Count[%d] = 1;", segment.CounterIndex)
+ if strings.Contains(code, expectedCounter) {
+ code = strings.Replace(code, expectedCounter, "", 1)
+ } else {
+ t.Fatalf("expected code segment %d to contain the counter code '%q', but segment is: %q", segment.CounterIndex, expectedCounter, code)
+ }
+
+ // removing expected strings
+ for _, expected := range expecteds {
+ if strings.Contains(code, expected) {
+ code = strings.Replace(code, expected, "", 1)
+ } else {
+ t.Fatalf("expected code segment %d to contain '%q', but segment is: %q", segment.CounterIndex, expected, code)
+ }
+ }
+
+ // checking if only left is whitespace
+ code = strings.TrimSpace(code)
+ if code != "" {
+ t.Fatalf("expected code segment %d to contain only expected strings, leftover: %q", segment.CounterIndex, code)
+ }
+}
+
+type CounterSegment struct {
+ StartLine int
+ EndLine int
+ StartColumn int
+ EndColumn int
+ CounterIndex int
+ Code string
+}
+
+func extractCounterPositionFromCode(code string) []CounterSegment {
+ re := regexp.MustCompile(`(?P<startLine>\d+), (?P<endLine>\d+), (?P<packedColumnDetail>0x[0-9a-f]+), // \[(?P<counterIndex>\d+)\]`)
+ // find all occurrences and extract captured parenthesis into a struct
+ matches := re.FindAllStringSubmatch(code, -1)
+ var matchResults []CounterSegment
+ for _, m := range matches {
+ packed := m[re.SubexpIndex("packedColumnDetail")]
+ var packedVal int
+ fmt.Sscanf(packed, "0x%x", &packedVal)
+ startCol := packedVal & 0xFFFF
+ endCol := (packedVal >> 16) & 0xFFFF
+ startLine, _ := strconv.Atoi(m[re.SubexpIndex("startLine")])
+ endLine, _ := strconv.Atoi(m[re.SubexpIndex("endLine")])
+
+ // fix: if start line and end line are equals, increase end column by the number of chars in 'GoCover.Count[X] = 1;'
+ if startLine == endLine {
+ endCol += len("GoCover.Count[" + m[re.SubexpIndex("counterIndex")] + "] = 1;")
+ }
+
+ counterIndex, _ := strconv.Atoi(m[re.SubexpIndex("counterIndex")])
+ codeSegment := extractCodeFromSegmentDetail(startLine, endLine, startCol, endCol, code)
+
+ matchResults = append(matchResults, CounterSegment{
+ StartLine: startLine,
+ EndLine: endLine,
+ StartColumn: startCol,
+ EndColumn: endCol,
+ CounterIndex: counterIndex,
+ Code: codeSegment,
+ })
+ }
+ return matchResults
+}
+
+func extractCodeFromSegmentDetail(startLine int, endLine int, startCol int, endCol int, code string) string {
+ lines := strings.Split(code, "\n")
+ var codeSegment string
+ if startLine == endLine && startLine > 0 && startLine <= len(lines) {
+ // Single line segment
+ line := lines[startLine-1]
+ if startCol <= 0 {
+ startCol = 1
+ }
+ if endCol > len(line) {
+ endCol = len(line)
+ }
+
+ if startCol <= endCol {
+ // display details of next operation
+ codeSegment = line[startCol-1 : endCol]
+ }
+ } else if startLine > 0 && endLine <= len(lines) && startLine <= endLine {
+ // Multi-line segment
+ var segmentLines []string
+ for i := startLine; i <= endLine; i++ {
+ if i > len(lines) {
+ break
+ }
+ line := lines[i-1]
+ if i == startLine {
+ // First line: from startCol to end
+ if startCol > 0 && startCol <= len(line) {
+ segmentLines = append(segmentLines, line[startCol-1:])
+ }
+ } else if i == endLine {
+ // Last line: from beginning to endCol
+ if endCol <= len(line) {
+ segmentLines = append(segmentLines, line[:endCol])
+ }
+ } else {
+ // Middle lines: entire line
+ segmentLines = append(segmentLines, line)
+ }
+ }
+ codeSegment = strings.Join(segmentLines, "\n")
+ }
+ return codeSegment
+}
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Congratulations on opening your first change. Thank you for your contribution!
Next steps:
A maintainer will review your change and provide feedback. See
https://go.dev/doc/contribute#review for more info and tips to get your
patch through code review.
Most changes in the Go project go through a few rounds of revision. This can be
surprising to people new to the project. The careful, iterative review process
is our way of helping mentor contributors and ensuring that their contributions
have a lasting impact.
During May-July and Nov-Jan the Go project is in a code freeze, during which
little code gets reviewed or merged. If a reviewer responds with a comment like
R=go1.11 or adds a tag like "wait-release", it means that this CL will be
reviewed as part of the next development cycle. See https://go.dev/s/release
for more details.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
for bs.i < len(originalSourceCode) {This loop seems to reinvent the logic of go/scanner, which produces a stream of tokens and comments (with positions) from the content of a Go file. Is there a reason we can't use it directly?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
for bs.i < len(originalSourceCode) {This loop seems to reinvent the logic of go/scanner, which produces a stream of tokens and comments (with positions) from the content of a Go file. Is there a reason we can't use it directly?
Good call, I actually was looking for such things but couldn't find it initially. I adapted the code and logic, much simpler now, thanks.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
if startOffset >= endOffset || endOffset > len(f.content) {
return []BlockSegment{{start: start, end: end, hasCode: true}}
}
I think condition should be impossible: the two Pos values should be valid for file f, so their offsets will be in range [0, len). Am I missing something?
(This does assume that start <= end. We should document that precondition on splitBlockByComments and audit the callers.)
fset := token.NewFileSet()When there are two Files (and FileSets) in use, we must be extra explicit to avoid confusion about which is when. Since fset is only needed on the following line, let's inline it away, and let's use the names origFile and scanFile:
scanFile := token.NewFileSet().AddFile(...)
Then let's comment each Pos with the File it belongs to, e.g.
currentSegmentStart := start // (in origFile)
endLine := file.Line(pos + token.Pos(len(lit)) - 1)The Line method is relatively expensive since it must binary search the line-offset table. Since the only token that spans lines is a string literal, consider setting endLine directly to startLine for all other tokens.
BTW, there's a proposal to add a method to compute `pos + token.Pos(len(lit))` without the bugs in certain edge cases (e.g. multiline string literals containing carriage returns that are normalized away). Let's leave a comment here:
// TODO(adonovan): simplify when https://go.dev/issue/74958 is resolved.
Nit: I don't think the `- 1` term is needed. The correct position is immediately after the token (which will be on the correct line), not one character before.
if endLine < startLine {
endLine = startLine
}I don't think this condition can be true, unless a file uses //line directives, which I suspect will make a mess of this whole algorithm. You'll notice that the File.Position(pos) operator comes in two forms, PositionFor(pos, true) and PositionFor(pos, false), that either use or ignore the //line directives. I think you may want to use only the false form throughout, for consistency; you'll need to test on some code that uses //line directives.
Beware that some wrappers such as File.Line(pos) only come in the 'true' form, so you'll need to unwrap them and make them use 'false'.
if tok == token.COMMENT {
// Mark all lines spanned by this comment
for line := startLine; line <= endLine; line++ {
lineHasComment[line] = true
}
} else {Why is it necessary to consider comments at all? Is it not the case that every line containing a non-comment token is a "code" line, and lines containing only spaces can be ignored? Then you don't need to distinguish horizontal and vertical space from comments.
Also, is it necessary to record the non-code segments? It looks like neither caller actually uses them. Then you can simplify BlockSegment to just `type Range struct { pos, end token.Pos }` and call this method codeRanges.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
if startOffset >= endOffset || endOffset > len(f.content) {
return []BlockSegment{{start: start, end: end, hasCode: true}}
}
I think condition should be impossible: the two Pos values should be valid for file f, so their offsets will be in range [0, len). Am I missing something?
(This does assume that start <= end. We should document that precondition on splitBlockByComments and audit the callers.)
You're right, this was purely defensive programming. I audited all callers:
Removed the defensive check and documented the precondition instead.
fset := token.NewFileSet()When there are two Files (and FileSets) in use, we must be extra explicit to avoid confusion about which is when. Since fset is only needed on the following line, let's inline it away, and let's use the names origFile and scanFile:
scanFile := token.NewFileSet().AddFile(...)
Then let's comment each Pos with the File it belongs to, e.g.
currentSegmentStart := start // (in origFile)
Good point. Renamed `file` to `scanFile` to distinguish it from `origFile`, and `inlined` the `FileSet` creation since it's only used once. Also added a clarifying comment explaining why we need a separate `FileSet`.
Added comments on `Pos` as suggested.
endLine := file.Line(pos + token.Pos(len(lit)) - 1)The Line method is relatively expensive since it must binary search the line-offset table. Since the only token that spans lines is a string literal, consider setting endLine directly to startLine for all other tokens.
BTW, there's a proposal to add a method to compute `pos + token.Pos(len(lit))` without the bugs in certain edge cases (e.g. multiline string literals containing carriage returns that are normalized away). Let's leave a comment here:
// TODO(adonovan): simplify when https://go.dev/issue/74958 is resolved.
Nit: I don't think the `- 1` term is needed. The correct position is immediately after the token (which will be on the correct line), not one character before.
Optimized to only compute `endLine` for `STRING` tokens - the only token types that can span multiple lines. Removed the unnecessary and incorrect `-1` since `Line(pos + len(lit))` correctly returns the line of the last character. Added the `TODO` as suggested.
if endLine < startLine {
endLine = startLine
}I don't think this condition can be true, unless a file uses //line directives, which I suspect will make a mess of this whole algorithm. You'll notice that the File.Position(pos) operator comes in two forms, PositionFor(pos, true) and PositionFor(pos, false), that either use or ignore the //line directives. I think you may want to use only the false form throughout, for consistency; you'll need to test on some code that uses //line directives.
Beware that some wrappers such as File.Line(pos) only come in the 'true' form, so you'll need to unwrap them and make them use 'false'.
Ah yes, thanks for reminding me about `//line` directives, I had some fun in the past with them in our analyzer.
From my searches, such directive is not commonly used in "normal" code, as from my understanding it's more for code generated that need to map errors back to the original source file, so not really something that you use on your typical/classic go code.
That lead me to think that for coverage computing, we never want to use adjusted line numbers (expect for reporting issue/message).
I had to fix some existing methods in `cover.go` (`newCounter` and `addVariables`) and also took opportunity to align some others (`offset`) that were also using adjusted positions.
I kept the `adjusted=true` cases (lines 549, 608) as they are about user-facing filenames message.
I added a test with line directive, it is now properly computing segments. Thanks.
if tok == token.COMMENT {
// Mark all lines spanned by this comment
for line := startLine; line <= endLine; line++ {
lineHasComment[line] = true
}
} else {Why is it necessary to consider comments at all? Is it not the case that every line containing a non-comment token is a "code" line, and lines containing only spaces can be ignored? Then you don't need to distinguish horizontal and vertical space from comments.
Also, is it necessary to record the non-code segments? It looks like neither caller actually uses them. Then you can simplify BlockSegment to just `type Range struct { pos, end token.Pos }` and call this method codeRanges.
That's a good call I think, we should not even bother with comments and only consider lines with code.
This will directly split our segments by empty lines or lines with comments only.
In my opinion this will give a better result as we would have only blocks of codes, but ofc it will give far more blocks (which is not an issue IMO).
I went this way and applied your suggestion to also simplify the structure and the code.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Hold | +1 |
Thanks. The production code looks sound; I have note a few opportunities for improvement below. I think we need a better strategy for testing that exercises the production parser of coverage files.
BTW, the go1.26 freeze began last week, so we won't be able to merge this CL until February (hence the Hold).
// It uses go/scanner to tokenize the source and identify which lines
// contain executable code.This information belongs inside the function, since it describes the implementation, not the contract.
startOffset := f.offset(start)Nit: var ( ... ) will make them line up neatly.
// We use a separate FileSet because we're scanning a slice of the original source,File
(The choice of FileSet matters less.)
s.Init(scanFile, src, nil, scanner.ScanComments)Clear this flag and you won't need the COMMENT check below.
lineHasCode := make(map[int]bool)This map will be densely (>50%) populated, and will cost around 72 bits per entry, so that's something like 36 bits per file line even without the hash table overheads. A better data structure would be a slice of bools of length n (exactly 8 bits per file line) or a bit vector (1 bit per file line):
```
const k = unsafe.Sizeof(uint) * 8 // limb size
code := make([]uint, 1 + totalLines/k) // bit set of code lines
code[line/k] |= 1<<(n%k) // set bit n
if code[line/k] & (1<<(n%k)) != 0 { ... } // get bit n
```
if hasCode && !inCode {Factor:
```
if hasCode != !inCode {
// start or end of a range
lineStartPos := origFile.Pos(startOffset + scanFile.Offset(scanFile.LineStart(line)))
if hasCode { // start
codeStart = lineStartPos
} else {
ranges = append(ranges, Range{pos: codeStart, end: lineStartPos})
}
inCode = hasCode
}
```
if len(ranges) > 0 {This condition is never false. Consider documenting this postcondition of codeRanges and omitting the check.
for i, r := range ranges {range f.CodeRanges(...)
return f.fset.PositionFor(pos, false).OffsetConsider adding a new *File method for this recurring expression.
tmpdir := t.TempDir()
srcPath := filepath.Join(tmpdir, "test.go")
if err := os.WriteFile(srcPath, []byte(testSrc), 0666); err != nil {
t.Fatalf("writing test file: %v", err)
}Why not move the big constant into a testdata/ranges/ranges_test.go file and run cover directly on that?
outStr = strings.Join(strings.SplitN(outStr, "\n", 2)[1:], "\n")_, outStr, _ := strings.Cut(string(out), "\n")
// Assert all segments contents, we expect to find all lines of code and no comments alone on a line
assertSegmentContent(t, segments[0], []string{`{`, `fmt.Println("Start - should be covered")`})
assertSegmentContent(t, segments[1], []string{`x := 42`})
assertSegmentContent(t, segments[2], []string{`y := 0`, `fmt.Println("After comments - should be covered")`})
assertSegmentContent(t, segments[3], []string{`fmt.Println("It's a trap /*")`, `z := 0`})
assertSegmentContent(t, segments[4], []string{`if x > 0 {`})
assertSegmentContent(t, segments[5], []string{`z = 5`})
assertSegmentContent(t, segments[6], []string{`z1 := 0`})
assertSegmentContent(t, segments[7], []string{`z1 = 1 /* Multiline comments`})
assertSegmentContent(t, segments[8], []string{`and the end */ z1 = 2`})
assertSegmentContent(t, segments[9], []string{`z1 = 3; /* // */ z1 = 4`})
assertSegmentContent(t, segments[10], []string{`z1 = 5 /* //`})
assertSegmentContent(t, segments[11], []string{`// */ z1 = 6`})
assertSegmentContent(t, segments[12], []string{`*/ z1 = 7 /*`})
assertSegmentContent(t, segments[13], []string{`z1 = 8/*`})
assertSegmentContent(t, segments[14], []string{`*/z1 = 9`})
assertSegmentContent(t, segments[15], []string{`/* before */ z1 = 10`, `/* before */ z1 = 10 /* after */`, `z1 = 10 /* after */`})
assertSegmentContent(t, segments[16], []string{`fmt.Printf("Result: %d\n", z)`, `fmt.Printf("Result: %d\n", z1)`})
assertSegmentContent(t, segments[17], []string{`s := ` + "`" + `This is a multi-line raw string`, `// fake comment on line 2`, `/* and fake comment on line 3 */`,
`and other` + "`"})
assertSegmentContent(t, segments[18], []string{`s = ` + "`" + `another multiline string`, "` // another trap"})
assertSegmentContent(t, segments[19], []string{`fmt.Printf("%s", s)`})
assertSegmentContent(t, segments[20], []string{`fmt.Printf("Result: %d\n", y)`})
assertSegmentContent(t, segments[21], []string{`{`, `y = x * 2`, `}`})
assertSegmentContent(t, segments[22], []string{`{`, `{`, `y = x - 2`, `}`, `}`})
assertSegmentContent(t, segments[23], []string{`}`})
assertSegmentContent(t, segments[24], []string{`{`, `fmt.Printf("ResultSomething")`})
assertSegmentContent(t, segments[25], []string{`}`})
assertSegmentContent(t, segments[26], []string{`}`})This is quite unwieldy: a single change to one line would require the user to at least renumber the entire table. A better approach would be to weave the assertions into the original file. For example, if you insert some non-Go bracket characters into the file like this:
```
«package main»
// hello
«func main() {
fmt.Println("hello")
}»
```
then you can derive both the true input (by removing the `«...»` bytes) and the test assertions (by remembering their indices) from the same file.
Then it's easy for the reader to see how the result relates to the input, and to make changes in a local and intelligible way.
func extractCounterPositionFromCode(code string) []CounterSegment {This parser is a lot of complex code. If the tests pass, it tells us only that this parser is working, but perhaps it behaves differently from the parser actually used to render the page that the user sees, so the test is inconsistent with reality. Really we need to factor the parser used by that tool so that it too is exercised by these tests. Do you know where it is?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Thanks. The production code looks sound; I have note a few opportunities for improvement below. I think we need a better strategy for testing that exercises the production parser of coverage files.
BTW, the go1.26 freeze began last week, so we won't be able to merge this CL until February (hence the Hold).
Thanks for your help!
Indeed I saw that the project is in code freeze, no worries.
I may not have enough time to continue effort on this topic for now, and I'm unsure if we can have someone else to have a look, so you can expect that this topic will be paused (hope to resume effort on it in January).
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
This change seems like a good idea overall to me, happy to see it go in. Alan is a much better reviewer than I am for this part of `cmd/cover`, thanks.
I would recommend that you run as many of the Go tests in long mode as you can; a number of the coverage regression tests are time-consuming enough that they don't run in short mode (e.g. regular all.bash).
I pulled your patch and tried some hand testing, it looks like `go test internal/coverage/cfile` is failing. Here is a smaller reproducer:
```
$ go build -cover internal/strconv
# internal/strconv
/w/xgo/src/internal/strconv/math.go:141:29: syntax error: unexpected =, expected type
/w/xgo/src/internal/strconv/math.go:144:29: syntax error: unexpected =, expected type
/w/xgo/src/internal/strconv/math.go:147:29: syntax error: unexpected =, expected type
...
```
Better to track down these sorts of glitches ahead of time as opposed to only learning about them once the CL is submitted and the longer tests are running on the builders.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
This change seems like a good idea overall to me, happy to see it go in. Alan is a much better reviewer than I am for this part of `cmd/cover`, thanks.
I would recommend that you run as many of the Go tests in long mode as you can; a number of the coverage regression tests are time-consuming enough that they don't run in short mode (e.g. regular all.bash).
I pulled your patch and tried some hand testing, it looks like `go test internal/coverage/cfile` is failing. Here is a smaller reproducer:
```
$ go build -cover internal/strconv
# internal/strconv
/w/xgo/src/internal/strconv/math.go:141:29: syntax error: unexpected =, expected type
/w/xgo/src/internal/strconv/math.go:144:29: syntax error: unexpected =, expected type
/w/xgo/src/internal/strconv/math.go:147:29: syntax error: unexpected =, expected type
...
```Better to track down these sorts of glitches ahead of time as opposed to only learning about them once the CL is submitted and the longer tests are running on the builders.
Hi @rudy.re...@sonarsource.com, FWIW, I ran a couple of the longtest trybots earlier today on this CL, which did indeed fail.
You can see the exact failures if you click on the results above, so that could be worth looking at when you return to this in the new year.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |