[tools] internal/diff/lcs: introduce line diffs

10 views
Skip to first unread message

Peter Weinberger (Gerrit)

unread,
Nov 9, 2025, 8:53:13 PMNov 9
to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Peter Weinberger has uploaded the change for review

Commit message

internal/diff/lcs: introduce line diffs

WIP, not ready for review.
Change-Id: Ia0f48d5fec2ebb7b314b71e3d58f5bab26ef8995

Change diff

diff --git a/internal/diff/lcs/old.go b/internal/diff/lcs/old.go
index 7b7c5cc..e941b86 100644
--- a/internal/diff/lcs/old.go
+++ b/internal/diff/lcs/old.go
@@ -27,9 +27,22 @@
// DiffRunes returns the differences between two rune sequences.
func DiffRunes(a, b []rune) []Diff { return diff(runesSeqs{a, b}) }

+// DiffLines returns the differencesw between two string sequences.
+func DiffLines(a, b []string) []Diff { return diff(linesSeqs{a, b}) }
+
+// A limit on how deeply the LCS algorithm should search. The value is just a guess.
+var maxDiffs = 100
+
+// set maxDiffs and return the old value.
+// (This may be a bad idea, as there could be concurrent calls to diff
+// using different values. The window is old=MaxDiffs(new); Diff(...); MaxDiffs(old))
+func MaxDiffs(n int) int {
+ old := maxDiffs
+ maxDiffs = n
+ return old
+}
+
func diff(seqs sequences) []Diff {
- // A limit on how deeply the LCS algorithm should search. The value is just a guess.
- const maxDiffs = 100
diff, _ := compute(seqs, twosided, maxDiffs/2)
return diff
}
diff --git a/internal/diff/lcs/old_test.go b/internal/diff/lcs/old_test.go
index 7381954..4c4ef31 100644
--- a/internal/diff/lcs/old_test.go
+++ b/internal/diff/lcs/old_test.go
@@ -256,4 +256,11 @@
compute(runesSeqs{srcRunes, dstRunes}, twosided, len(srcRunes)+len(dstRunes))
}
})
+ srcLines := strings.Split(src, "\n")
+ dstLines := strings.Split(dst, "\n")
+ b.Run("lines", func(b *testing.B) {
+ for b.Loop() {
+ compute(linesSeqs{srcLines, dstLines}, twosided, len(srcLines)+len(dstLines))
+ }
+ })
}
diff --git a/internal/diff/lcs/sequence.go b/internal/diff/lcs/sequence.go
index 811bb216..bb5b7c5 100644
--- a/internal/diff/lcs/sequence.go
+++ b/internal/diff/lcs/sequence.go
@@ -29,20 +29,30 @@

func (s bytesSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
func (s bytesSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
- return commonPrefixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj])
+ return commonPrefixLen(s.a[ai:aj:aj], s.b[bi:bj:bj])
}
func (s bytesSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
- return commonSuffixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj])
+ return commonSuffixLen(s.a[ai:aj:aj], s.b[bi:bj:bj])
}

type runesSeqs struct{ a, b []rune }

func (s runesSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
func (s runesSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
- return commonPrefixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj])
+ return commonPrefixLen(s.a[ai:aj:aj], s.b[bi:bj:bj])
}
func (s runesSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
- return commonSuffixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj])
+ return commonSuffixLen(s.a[ai:aj:aj], s.b[bi:bj:bj])
+}
+
+type linesSeqs struct{ a, b []string }
+
+func (s linesSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
+func (s linesSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
+ return commonPrefixLen(s.a[ai:aj], s.b[bi:bj])
+}
+func (s linesSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
+ return commonSuffixLen(s.a[ai:aj], s.b[bi:bj])
}

// TODO(adonovan): optimize these functions using ideas from:
@@ -52,8 +62,8 @@
// TODO(adonovan): factor using generics when available,
// but measure performance impact.

-// commonPrefixLen* returns the length of the common prefix of a[ai:aj] and b[bi:bj].
-func commonPrefixLenBytes(a, b []byte) int {
+// commonPrefixLen returns the length of the common prefix of a[ai:aj] and b[bi:bj].
+func commonPrefixLen[T comparable](a, b []T) int {
n := min(len(a), len(b))
i := 0
for i < n && a[i] == b[i] {
@@ -61,14 +71,7 @@
}
return i
}
-func commonPrefixLenRunes(a, b []rune) int {
- n := min(len(a), len(b))
- i := 0
- for i < n && a[i] == b[i] {
- i++
- }
- return i
-}
+
func commonPrefixLenString(a, b string) int {
n := min(len(a), len(b))
i := 0
@@ -78,8 +81,8 @@
return i
}

-// commonSuffixLen* returns the length of the common suffix of a[ai:aj] and b[bi:bj].
-func commonSuffixLenBytes(a, b []byte) int {
+// commonSuffixLen returns the length of the common suffix of a[ai:aj] and b[bi:bj].
+func commonSuffixLen[T comparable](a, b []T) int {
n := min(len(a), len(b))
i := 0
for i < n && a[len(a)-1-i] == b[len(b)-1-i] {
@@ -87,14 +90,7 @@
}
return i
}
-func commonSuffixLenRunes(a, b []rune) int {
- n := min(len(a), len(b))
- i := 0
- for i < n && a[len(a)-1-i] == b[len(b)-1-i] {
- i++
- }
- return i
-}
+
func commonSuffixLenString(a, b string) int {
n := min(len(a), len(b))
i := 0

Change information

Files:
  • M internal/diff/lcs/old.go
  • M internal/diff/lcs/old_test.go
  • M internal/diff/lcs/sequence.go
Change size: M
Delta: 3 files changed, 42 insertions(+), 26 deletions(-)
Open in Gerrit

Related details

Attention set is empty
Submit Requirements:
  • requirement is not satisfiedCode-Review
  • requirement satisfiedNo-Unresolved-Comments
  • requirement is not satisfiedReview-Enforcement
  • requirement is not satisfiedTryBots-Pass
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
Gerrit-MessageType: newchange
Gerrit-Project: tools
Gerrit-Branch: master
Gerrit-Change-Id: Ia0f48d5fec2ebb7b314b71e3d58f5bab26ef8995
Gerrit-Change-Number: 719160
Gerrit-PatchSet: 1
Gerrit-Owner: Peter Weinberger <p...@google.com>
Gerrit-Reviewer: Peter Weinberger <p...@google.com>
unsatisfied_requirement
satisfied_requirement
open
diffy

Peter Weinberger (Gerrit)

unread,
Nov 11, 2025, 2:34:54 PMNov 11
to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com
Attention needed from Peter Weinberger

Peter Weinberger uploaded new patchset

Peter Weinberger uploaded patch set #2 to this change.
Following approvals got outdated and were removed:
  • TryBots-Pass: LUCI-TryBot-Result-1 by Go LUCI
Open in Gerrit

Related details

Attention is currently required from:
  • Peter Weinberger
Submit Requirements:
  • requirement is not satisfiedCode-Review
  • requirement satisfiedNo-Unresolved-Comments
  • requirement is not satisfiedReview-Enforcement
  • requirement is not satisfiedTryBots-Pass
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
Gerrit-MessageType: newpatchset
Gerrit-Project: tools
Gerrit-Branch: master
Gerrit-Change-Id: Ia0f48d5fec2ebb7b314b71e3d58f5bab26ef8995
Gerrit-Change-Number: 719160
Gerrit-PatchSet: 2
Gerrit-Owner: Peter Weinberger <p...@google.com>
Gerrit-Reviewer: Peter Weinberger <p...@google.com>
Gerrit-Attention: Peter Weinberger <p...@google.com>
unsatisfied_requirement
satisfied_requirement
open
diffy

Peter Weinberger (Gerrit)

unread,
Nov 12, 2025, 10:05:38 AMNov 12
to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com
Attention needed from Peter Weinberger

Peter Weinberger uploaded new patchset

Peter Weinberger uploaded patch set #3 to this change.
Following approvals got outdated and were removed:
  • TryBots-Pass: LUCI-TryBot-Result-1 by Go LUCI
Open in Gerrit

Related details

Attention is currently required from:
  • Peter Weinberger
Submit Requirements:
  • requirement is not satisfiedCode-Review
  • requirement satisfiedNo-Unresolved-Comments
  • requirement is not satisfiedReview-Enforcement
  • requirement is not satisfiedTryBots-Pass
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
Gerrit-MessageType: newpatchset
Gerrit-Project: tools
Gerrit-Branch: master
Gerrit-Change-Id: Ia0f48d5fec2ebb7b314b71e3d58f5bab26ef8995
Gerrit-Change-Number: 719160
Gerrit-PatchSet: 3
unsatisfied_requirement
satisfied_requirement
open
diffy

Peter Weinberger (Gerrit)

unread,
Nov 12, 2025, 2:31:40 PMNov 12
to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com
Attention needed from Peter Weinberger

Peter Weinberger uploaded new patchset

Peter Weinberger uploaded patch set #4 to this change.
Following approvals got outdated and were removed:
  • TryBots-Pass: LUCI-TryBot-Result+1 by Go LUCI
Open in Gerrit

Related details

Attention is currently required from:
  • Peter Weinberger
Submit Requirements:
  • requirement is not satisfiedCode-Review
  • requirement satisfiedNo-Unresolved-Comments
  • requirement is not satisfiedReview-Enforcement
  • requirement is not satisfiedTryBots-Pass
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
Gerrit-MessageType: newpatchset
Gerrit-Project: tools
Gerrit-Branch: master
Gerrit-Change-Id: Ia0f48d5fec2ebb7b314b71e3d58f5bab26ef8995
Gerrit-Change-Number: 719160
Gerrit-PatchSet: 4
unsatisfied_requirement
satisfied_requirement
open
diffy

Peter Weinberger (Gerrit)

unread,
Dec 7, 2025, 11:25:15 AMDec 7
to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com
Attention needed from Peter Weinberger

Peter Weinberger uploaded new patchset

Peter Weinberger uploaded patch set #7 to this change.
Open in Gerrit

Related details

Attention is currently required from:
  • Peter Weinberger
Submit Requirements:
  • requirement is not satisfiedCode-Review
  • requirement satisfiedNo-Unresolved-Comments
  • requirement is not satisfiedReview-Enforcement
  • requirement is not satisfiedTryBots-Pass
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
Gerrit-MessageType: newpatchset
Gerrit-Project: tools
Gerrit-Branch: master
Gerrit-Change-Id: Ia0f48d5fec2ebb7b314b71e3d58f5bab26ef8995
Gerrit-Change-Number: 719160
Gerrit-PatchSet: 7
unsatisfied_requirement
satisfied_requirement
open
diffy

Peter Weinberger (Gerrit)

unread,
Dec 9, 2025, 8:54:39 AM (13 days ago) Dec 9
to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com
Attention needed from Peter Weinberger

Peter Weinberger uploaded new patchset

Peter Weinberger uploaded patch set #8 to this change.
Following approvals got outdated and were removed:
  • TryBots-Pass: LUCI-TryBot-Result+1 by Go LUCI
Open in Gerrit

Related details

Attention is currently required from:
  • Peter Weinberger
Submit Requirements:
  • requirement is not satisfiedCode-Review
  • requirement satisfiedNo-Unresolved-Comments
  • requirement is not satisfiedReview-Enforcement
  • requirement is not satisfiedTryBots-Pass
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
Gerrit-MessageType: newpatchset
Gerrit-Project: tools
Gerrit-Branch: master
Gerrit-Change-Id: Ia0f48d5fec2ebb7b314b71e3d58f5bab26ef8995
Gerrit-Change-Number: 719160
Gerrit-PatchSet: 8
unsatisfied_requirement
satisfied_requirement
open
diffy

Alan Donovan (Gerrit)

unread,
Dec 9, 2025, 4:18:08 PM (12 days ago) Dec 9
to Peter Weinberger, goph...@pubsubhelper.golang.org, Go LUCI, golang-co...@googlegroups.com
Attention needed from Peter Weinberger

Alan Donovan added 10 comments

Patchset-level comments
File-level comment, Patchset 8 (Latest):
Alan Donovan . resolved

Nice.

File internal/diff/lcs/common_test.go
Line 66, Patchset 8 (Latest): return commonPrefixLenString(s.a[ai:aj], s.b[bi:bj])
Alan Donovan . unresolved

commonPrefixLen[string]

File internal/diff/lcs/old.go
Line 26, Patchset 8 (Latest):// DiffLines returns the differencesw between two string sequences.
Alan Donovan . unresolved

delete

File internal/diff/lcs/sequence.go

// TODO(adonovan): factor using generics when available,
// but measure performance impact.
Alan Donovan . unresolved

delete

File internal/diff/myers/diff.go
Line 58, Patchset 8 (Latest): if len(newedits) != len(edits) {
Alan Donovan . unresolved

I assume this is a cross check. Do we expect the algorithms to always produce the same number of edits, but not identical edits? Or is this a quick sanity check to avoid allocation in the common case when they are equal?

Line 59, Patchset 8 (Latest): var _ diff.Edit
Alan Donovan . unresolved

delete

Line 69, Patchset 8 (Latest): log.Printf("%d != %d %v, %v", len(newedits), len(edits), newlocs, locs)
return edits // PJW: work in progress, do not submit
Alan Donovan . unresolved

This looks unfinished.

File internal/diff/ndiff.go
Line 15, Patchset 8 (Latest):// Lines computes lines differenes between two strings.
Alan Donovan . unresolved

differences


Better:

Lines computes differences between two strings. All edits are at line boundaries.

File internal/diff/unified.go
Line 177, Patchset 8 (Latest): offsets := []int{0}
Alan Donovan . unresolved

Use bytes.Count to scan the file and preallocate the correct array size?

Line 179, Patchset 8 (Latest): for i := 0; i < len(text); i++ {
if text[i] == '\n' { // keep the newlines
lines = append(lines, text[offsets[len(offsets)-1]:i+1])
offsets = append(offsets, i+1)
}
}
Alan Donovan . unresolved

Use bytes.Index as it uses vector instructions to compare 32 bytes at a time.

Open in Gerrit

Related details

Attention is currently required from:
  • Peter Weinberger
Submit Requirements:
    • requirement is not satisfiedCode-Review
    • requirement is not satisfiedNo-Unresolved-Comments
    • requirement is not satisfiedReview-Enforcement
    • requirement satisfiedTryBots-Pass
    Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
    Gerrit-MessageType: comment
    Gerrit-Project: tools
    Gerrit-Branch: master
    Gerrit-Change-Id: Ia0f48d5fec2ebb7b314b71e3d58f5bab26ef8995
    Gerrit-Change-Number: 719160
    Gerrit-PatchSet: 8
    Gerrit-Owner: Peter Weinberger <p...@google.com>
    Gerrit-Reviewer: Alan Donovan <adon...@google.com>
    Gerrit-Reviewer: Peter Weinberger <p...@google.com>
    Gerrit-Attention: Peter Weinberger <p...@google.com>
    Gerrit-Comment-Date: Tue, 09 Dec 2025 21:18:05 +0000
    Gerrit-HasComments: Yes
    Gerrit-Has-Labels: No
    unsatisfied_requirement
    satisfied_requirement
    open
    diffy

    Peter Weinberger (Gerrit)

    unread,
    Dec 11, 2025, 11:04:08 AM (11 days ago) Dec 11
    to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com
    Attention needed from Peter Weinberger

    Peter Weinberger uploaded new patchset

    Peter Weinberger uploaded patch set #9 to this change.
    Following approvals got outdated and were removed:
    • TryBots-Pass: LUCI-TryBot-Result+1 by Go LUCI

    Related details

    Attention is currently required from:
    • Peter Weinberger
    Submit Requirements:
      • requirement is not satisfiedCode-Review
      • requirement is not satisfiedNo-Unresolved-Comments
      • requirement is not satisfiedReview-Enforcement
      • requirement is not satisfiedTryBots-Pass
      Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
      Gerrit-MessageType: newpatchset
      Gerrit-Project: tools
      Gerrit-Branch: master
      Gerrit-Change-Id: Ia0f48d5fec2ebb7b314b71e3d58f5bab26ef8995
      Gerrit-Change-Number: 719160
      Gerrit-PatchSet: 9
      unsatisfied_requirement
      open
      diffy

      Peter Weinberger (Gerrit)

      unread,
      Dec 20, 2025, 3:49:44 PM (yesterday) Dec 20
      to goph...@pubsubhelper.golang.org, Go LUCI, Alan Donovan, golang-co...@googlegroups.com
      Attention needed from Alan Donovan

      Peter Weinberger voted and added 10 comments

      Votes added by Peter Weinberger

      Commit-Queue+1

      10 comments

      Patchset-level comments
      File-level comment, Patchset 8:
      Peter Weinberger . resolved

      s

      File internal/diff/lcs/common_test.go
      Line 66, Patchset 8: return commonPrefixLenString(s.a[ai:aj], s.b[bi:bj])
      Alan Donovan . resolved

      commonPrefixLen[string]

      Peter Weinberger

      That doesn't work. CommonPrefixLen takes []T, which does not match string.

      File internal/diff/lcs/old.go
      Line 26, Patchset 8:// DiffLines returns the differencesw between two string sequences.
      Alan Donovan . resolved

      delete

      Peter Weinberger

      Done

      File internal/diff/lcs/sequence.go

      // TODO(adonovan): factor using generics when available,
      // but measure performance impact.
      Alan Donovan . resolved

      delete

      Peter Weinberger

      Done

      File internal/diff/myers/diff.go
      Line 58, Patchset 8: if len(newedits) != len(edits) {
      Alan Donovan . resolved

      I assume this is a cross check. Do we expect the algorithms to always produce the same number of edits, but not identical edits? Or is this a quick sanity check to avoid allocation in the common case when they are equal?

      Peter Weinberger

      it's now gone.

      This whole routine would go away once the code review for my other CL (https://go-review.git.corp.google.com/c/tools/+/724660) is finished.

      Line 59, Patchset 8: var _ diff.Edit
      Alan Donovan . resolved

      delete

      Peter Weinberger

      Done

      Line 69, Patchset 8: log.Printf("%d != %d %v, %v", len(newedits), len(edits), newlocs, locs)

      return edits // PJW: work in progress, do not submit
      Alan Donovan . resolved

      This looks unfinished.

      Peter Weinberger

      Done

      File internal/diff/ndiff.go
      Line 15, Patchset 8:// Lines computes lines differenes between two strings.
      Alan Donovan . resolved

      differences


      Better:

      Lines computes differences between two strings. All edits are at line boundaries.

      Peter Weinberger

      Done

      File internal/diff/unified.go
      Line 177, Patchset 8: offsets := []int{0}
      Alan Donovan . resolved

      Use bytes.Count to scan the file and preallocate the correct array size?

      Peter Weinberger

      I don't think it is worth it. The resulting logic would have to check for 0 and the savings is at best a tiny fraction of all the other processing.

      Line 179, Patchset 8: for i := 0; i < len(text); i++ {

      if text[i] == '\n' { // keep the newlines
      lines = append(lines, text[offsets[len(offsets)-1]:i+1])
      offsets = append(offsets, i+1)
      }
      }
      Alan Donovan . resolved

      Use bytes.Index as it uses vector instructions to compare 32 bytes at a time.

      Peter Weinberger

      I couldn't see a simple way of writing that code so that it works if the last line does not end with \n, as in some of the tests.

      Open in Gerrit

      Related details

      Attention is currently required from:
      • Alan Donovan
      Submit Requirements:
        • requirement is not satisfiedCode-Review
        • requirement satisfiedNo-Unresolved-Comments
        • requirement is not satisfiedReview-Enforcement
        • requirement is not satisfiedTryBots-Pass
        Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
        Gerrit-MessageType: comment
        Gerrit-Project: tools
        Gerrit-Branch: master
        Gerrit-Change-Id: Ia0f48d5fec2ebb7b314b71e3d58f5bab26ef8995
        Gerrit-Change-Number: 719160
        Gerrit-PatchSet: 10
        Gerrit-Owner: Peter Weinberger <p...@google.com>
        Gerrit-Reviewer: Alan Donovan <adon...@google.com>
        Gerrit-Reviewer: Peter Weinberger <p...@google.com>
        Gerrit-Attention: Alan Donovan <adon...@google.com>
        Gerrit-Comment-Date: Sat, 20 Dec 2025 20:49:41 +0000
        Gerrit-HasComments: Yes
        Gerrit-Has-Labels: Yes
        Comment-In-Reply-To: Alan Donovan <adon...@google.com>
        unsatisfied_requirement
        satisfied_requirement
        open
        diffy
        Reply all
        Reply to author
        Forward
        0 new messages