yunhao zhang has uploaded this change for review.
sort: use pdqsort
- Across all benchmarks, pdqsort is never significantly slower than the previous algorithm.
- In common patterns, pdqsort is often faster (i.e. 10x faster in sorted slices).
The pdqsort is described at https://arxiv.org/pdf/2106.05123.pdf
Benchmark:
name old time/op new time/op delta
SearchWrappers 56.5ns ± 1% 55.1ns ± 0% -2.58% (p=0.000 n=10+10)
SortString1K 58.5µs ± 0% 64.1µs ± 0% +9.54% (p=0.000 n=9+10)
SortString1K_Slice 55.5µs ± 1% 57.6µs ± 1% +3.75% (p=0.000 n=10+9)
StableString1K 81.1µs ± 1% 82.8µs ± 1% +2.02% (p=0.000 n=9+10)
SortInt1K 24.9µs ± 1% 26.0µs ± 1% +4.58% (p=0.000 n=10+9)
StableInt1K 39.4µs ± 1% 39.7µs ± 1% +0.91% (p=0.001 n=7+8)
StableInt1K_Slice 36.3µs ± 1% 37.3µs ± 0% +2.72% (p=0.000 n=8+9)
SortInt64K 3.55ms ± 0% 3.41ms ± 0% -4.13% (p=0.000 n=10+10)
SortInt64K_Slice 3.40ms ± 0% 3.19ms ± 0% -6.35% (p=0.000 n=10+9)
StableInt64K 3.53ms ± 1% 3.53ms ± 1% ~ (p=0.739 n=10+10)
Sort1e2 24.2µs ± 0% 24.8µs ± 0% +2.59% (p=0.000 n=10+10)
Stable1e2 42.7µs ± 0% 42.6µs ± 0% -0.34% (p=0.018 n=10+10)
Sort1e4 5.12ms ± 0% 5.08ms ± 0% -0.81% (p=0.000 n=10+9)
Stable1e4 13.5ms ± 0% 13.5ms ± 1% ~ (p=0.447 n=9+10)
Sort1e6 784ms ± 0% 768ms ± 0% -1.99% (p=0.000 n=10+10)
Stable1e6 2.67s ± 1% 2.66s ± 0% ~ (p=0.315 n=10+9)
Change-Id: Iecded047d237b9330b5a4101001a5fdc2f50646a
---
M src/sort/zfuncversion.go
M src/sort/genzfunc.go
M src/sort/slice.go
M src/sort/sort.go
M src/sort/example_multi_test.go
5 files changed, 539 insertions(+), 244 deletions(-)
diff --git a/src/sort/example_multi_test.go b/src/sort/example_multi_test.go
index de6ec14..93f2d3e 100644
--- a/src/sort/example_multi_test.go
+++ b/src/sort/example_multi_test.go
@@ -126,7 +126,7 @@
// By user: [{dmr C 100} {glenda Go 200} {gri Go 100} {gri Smalltalk 80} {ken C 150} {ken Go 200} {r Go 100} {r C 150} {rsc Go 200}]
// By user,<lines: [{dmr C 100} {glenda Go 200} {gri Smalltalk 80} {gri Go 100} {ken C 150} {ken Go 200} {r Go 100} {r C 150} {rsc Go 200}]
// By user,>lines: [{dmr C 100} {glenda Go 200} {gri Go 100} {gri Smalltalk 80} {ken Go 200} {ken C 150} {r C 150} {r Go 100} {rsc Go 200}]
- // By language,<lines: [{dmr C 100} {ken C 150} {r C 150} {r Go 100} {gri Go 100} {ken Go 200} {glenda Go 200} {rsc Go 200} {gri Smalltalk 80}]
+ // By language,<lines: [{dmr C 100} {ken C 150} {r C 150} {gri Go 100} {r Go 100} {glenda Go 200} {ken Go 200} {rsc Go 200} {gri Smalltalk 80}]
// By language,<lines,user: [{dmr C 100} {ken C 150} {r C 150} {gri Go 100} {r Go 100} {glenda Go 200} {ken Go 200} {rsc Go 200} {gri Smalltalk 80}]
}
diff --git a/src/sort/genzfunc.go b/src/sort/genzfunc.go
index ed04e33..0f504005 100644
--- a/src/sort/genzfunc.go
+++ b/src/sort/genzfunc.go
@@ -117,7 +117,7 @@
return
}
// skip casts
- if ident.Name == "int" || ident.Name == "uint" {
+ if ident.Name == "int" || ident.Name == "uint" || ident.Name == "nextPowerOfTwo" {
return
}
if len(ce.Args) < 1 {
diff --git a/src/sort/slice.go b/src/sort/slice.go
index ba5c2e2..1f46111 100644
--- a/src/sort/slice.go
+++ b/src/sort/slice.go
@@ -4,6 +4,11 @@
package sort
+import (
+ "math/bits"
+ "strconv"
+)
+
// Slice sorts the slice x given the provided less function.
// It panics if x is not a slice.
//
@@ -17,7 +22,8 @@
rv := reflectValueOf(x)
swap := reflectSwapper(x)
length := rv.Len()
- quickSort_func(lessSwap{less, swap}, 0, length, maxDepth(length))
+ limit := strconv.IntSize - bits.LeadingZeros(uint(length))
+ recurse_func(lessSwap{less, swap}, 0, length, 0, false, limit)
}
// SliceStable sorts the slice x using the provided less
diff --git a/src/sort/sort.go b/src/sort/sort.go
index 7493107..e7ffa38 100644
--- a/src/sort/sort.go
+++ b/src/sort/sort.go
@@ -7,6 +7,11 @@
// Package sort provides primitives for sorting slices and user-defined collections.
package sort
+import (
+ "math/bits"
+ "strconv"
+)
+
// An implementation of Interface can be sorted by the routines in this package.
// The methods refer to elements of the underlying collection by integer index.
type Interface interface {
@@ -80,165 +85,287 @@
}
}
-// Quicksort, loosely following Bentley and McIlroy,
-// ``Engineering a Sort Function,'' SP&E November 1993.
-
-// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
-func medianOfThree(data Interface, m1, m0, m2 int) {
- // sort 3 elements
- if data.Less(m1, m0) {
- data.Swap(m1, m0)
- }
- // data[m0] <= data[m1]
- if data.Less(m2, m1) {
- data.Swap(m2, m1)
- // data[m0] <= data[m2] && data[m1] < data[m2]
- if data.Less(m1, m0) {
- data.Swap(m1, m0)
- }
- }
- // now data[m0] <= data[m1] <= data[m2]
+// Sort sorts data.
+// The sort is not guaranteed to be stable.
+func Sort(data Interface) {
+ n := data.Len()
+ limit := strconv.IntSize - bits.LeadingZeros(uint(n))
+ recurse(data, 0, n, 0, false, limit)
}
-func swapRange(data Interface, a, b, n int) {
- for i := 0; i < n; i++ {
- data.Swap(a+i, b+i)
- }
-}
+// recurse sorts `data` recursively.
+// The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort.
+// pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf
+func recurse(data Interface, a, b, pred int, predExist bool, limit int) {
+ const maxInsertion = 12
-func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
- m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
- if hi-lo > 40 {
- // Tukey's ``Ninther,'' median of three medians of three.
- s := (hi - lo) / 8
- medianOfThree(data, lo, lo+s, lo+2*s)
- medianOfThree(data, m, m-s, m+s)
- medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
- }
- medianOfThree(data, lo, m, hi-1)
+ var (
+ wasBalanced = true // whether the last partitioning was reasonably balanced
+ wasPartitioned = true // whether the slice was already partitioned
+ )
- // Invariants are:
- // data[lo] = pivot (set up by ChoosePivot)
- // data[lo < i < a] < pivot
- // data[a <= i < b] <= pivot
- // data[b <= i < c] unexamined
- // data[c <= i < hi-1] > pivot
- // data[hi-1] >= pivot
- pivot := lo
- a, c := lo+1, hi-1
-
- for ; a < c && data.Less(a, pivot); a++ {
- }
- b := a
for {
- for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot
- }
- for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
- }
- if b >= c {
- break
- }
- // data[b] > pivot; data[c-1] <= pivot
- data.Swap(b, c-1)
- b++
- c--
- }
- // If hi-c<3 then there are duplicates (by property of median of nine).
- // Let's be a bit more conservative, and set border to 5.
- protect := hi-c < 5
- if !protect && hi-c < (hi-lo)/4 {
- // Lets test some points for equality to pivot
- dups := 0
- if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
- data.Swap(c, hi-1)
- c++
- dups++
- }
- if !data.Less(b-1, pivot) { // data[b-1] = pivot
- b--
- dups++
- }
- // m-lo = (hi-lo)/2 > 6
- // b-lo > (hi-lo)*3/4-1 > 8
- // ==> m < b ==> data[m] <= pivot
- if !data.Less(m, pivot) { // data[m] = pivot
- data.Swap(m, b-1)
- b--
- dups++
- }
- // if at least 2 points are equal to pivot, assume skewed distribution
- protect = dups > 1
- }
- if protect {
- // Protect against a lot of duplicates
- // Add invariant:
- // data[a <= i < b] unexamined
- // data[b <= i < c] = pivot
- for {
- for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
- }
- for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot
- }
- if a >= b {
- break
- }
- // data[a] == pivot; data[b-1] < pivot
- data.Swap(a, b-1)
- a++
- b--
- }
- }
- // Swap pivot into middle
- data.Swap(pivot, b-1)
- return b - 1, c
-}
+ length := b - a
-func quickSort(data Interface, a, b, maxDepth int) {
- for b-a > 12 { // Use ShellSort for slices <= 12 elements
- if maxDepth == 0 {
+ if length <= maxInsertion {
+ insertionSort(data, a, b)
+ return
+ }
+
+ // Fall back to heapsort if too many bad choices were made.
+ if limit == 0 {
heapSort(data, a, b)
return
}
- maxDepth--
- mlo, mhi := doPivot(data, a, b)
- // Avoiding recursion on the larger subproblem guarantees
- // a stack depth of at most lg(b-a).
- if mlo-a < b-mhi {
- quickSort(data, a, mlo, maxDepth)
- a = mhi // i.e., quickSort(data, mhi, b)
- } else {
- quickSort(data, mhi, b, maxDepth)
- b = mlo // i.e., quickSort(data, a, mlo)
+
+ // If the last partitioning was imbalanced, we need to breaking patterns.
+ if !wasBalanced {
+ breakPatterns(data, a, b)
+ limit--
}
- }
- if b-a > 1 {
- // Do ShellSort pass with gap 6
- // It could be written in this simplified form cause b-a <= 12
- for i := a + 6; i < b; i++ {
- if data.Less(i, i-6) {
- data.Swap(i, i-6)
+
+ pivotidx, likelySorted := choosePivot(data, a, b)
+
+ // The slice is likely already sorted.
+ if wasBalanced && wasPartitioned && likelySorted {
+ if partialInsertionSort(data, a, b) {
+ return
}
}
- insertionSort(data, a, b)
+
+ // Probably the slice contains many duplicate elements, partition the slice into
+ // elements equal to and elements greater than the pivot.
+ if predExist && !data.Less(pred, pivotidx) {
+ mid := partitionEqual(data, a, b, pivotidx)
+ a = mid
+ continue
+ }
+
+ mid, wasP := partition(data, a, b, pivotidx)
+ wasPartitioned = wasP
+
+ leftLen, rightLen := mid-a, b-mid
+ balanceThreshold := length / 8
+ if leftLen > rightLen {
+ wasBalanced = rightLen >= balanceThreshold
+ recurse(data, a, mid, pred, predExist, limit)
+ a = mid + 1
+ pred = mid
+ predExist = true
+ } else {
+ wasBalanced = leftLen >= balanceThreshold
+ recurse(data, mid+1, b, mid, true, limit)
+ b = mid
+ }
}
}
-// Sort sorts data in ascending order as determined by the Less method.
-// It makes one call to data.Len to determine n and O(n*log(n)) calls to
-// data.Less and data.Swap. The sort is not guaranteed to be stable.
-func Sort(data Interface) {
- n := data.Len()
- quickSort(data, 0, n, maxDepth(n))
+func partition(data Interface, a, b, pivotidx int) (newpivotidx int, wasPartitioned bool) {
+ data.Swap(a, pivotidx)
+ i, j := a+1, b-1
+
+ for i <= j && data.Less(i, a) {
+ i++
+ }
+ for i <= j && !data.Less(j, a) {
+ j--
+ }
+ if i > j {
+ data.Swap(j, a)
+ return j, true
+ }
+ data.Swap(i, j)
+ i++
+ j--
+
+ for {
+ for i <= j && data.Less(i, a) {
+ i++
+ }
+ for i <= j && !data.Less(j, a) {
+ j--
+ }
+ if i > j {
+ break
+ }
+ data.Swap(i, j)
+ i++
+ j--
+ }
+ data.Swap(j, a)
+ return j, false
}
-// maxDepth returns a threshold at which quicksort should switch
-// to heapsort. It returns 2*ceil(lg(n+1)).
-func maxDepth(n int) int {
- var depth int
- for i := n; i > 0; i >>= 1 {
- depth++
+// partitionEqual partitions `data` into elements equal to `data[pivotidx]` followed by elements greater than `data[pivotidx]`.
+// It assumed that `data` does not contain elements smaller than the `data[pivotidx]`.
+func partitionEqual(data Interface, a, b, pivotidx int) int {
+ data.Swap(a, pivotidx)
+
+ L := a + 1
+ R := b
+ for {
+ for L < R && !data.Less(a, L) {
+ L++
+ }
+ for L < R && data.Less(a, R-1) {
+ R--
+ }
+ if L >= R {
+ break
+ }
+ R--
+ data.Swap(L, R)
+ L++
}
- return depth * 2
+ return L
+}
+
+// partialInsertionSort partially sorts a slice, returns `true` if the slice is sorted at the end.
+func partialInsertionSort(data Interface, a, b int) bool {
+ const (
+ maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted
+ shortestShifting = 50 // don't shift any elements on short arrays
+ )
+ i := a + 1
+ for j := 0; j < maxSteps; j++ {
+ for i < b && !data.Less(i, i-1) {
+ i++
+ }
+
+ if i == b {
+ return true
+ }
+
+ if b-a < shortestShifting {
+ return false
+ }
+
+ data.Swap(i-1, i)
+
+ // Shift the smaller one to the left.
+ if i-a >= 2 {
+ for j := i - 1; j >= 1; j-- {
+ if !data.Less(j, j-1) {
+ break
+ }
+ data.Swap(j, j-1)
+ }
+ }
+ // Shift the greater one to the right.
+ if b-i >= 2 {
+ for j := 1; j < b; j++ {
+ if !data.Less(j, j-1) {
+ break
+ }
+ data.Swap(j, j-1)
+ }
+ }
+ }
+ return false
+}
+
+// breakPatterns scatters some elements around in an attempt to break some patterns
+// that might cause imbalanced partitions in quicksort.
+func breakPatterns(data Interface, a, b int) {
+ length := b - a
+ if length >= 8 {
+ // Xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
+ random := uint(length)
+ random ^= random << 13
+ random ^= random >> 17
+ random ^= random << 5
+
+ modulus := nextPowerOfTwo(length)
+ pos := a + length/8
+
+ for i := 0; i < 3; i++ {
+ other := int(random & (modulus - 1))
+ if other >= length {
+ other -= length
+ }
+ data.Swap(pos-1+i, a+other)
+ }
+ }
+}
+
+// choosePivot chooses a pivot in `data`.
+//
+// `data` might be reordered in this function.
+//
+// [0,8): choose a static pivot.
+// [8,ShortestNinther): use the simple median-of-three method.
+// [ShortestNinther,∞): use the Tukey’s ninther method.
+func choosePivot(data Interface, x, y int) (pivotidx int, likelySorted bool) {
+ const (
+ shortestNinther = 50
+ maxSwaps = 4 * 3
+ )
+
+ l := y - x
+
+ var (
+ swaps int
+ a = x + l/4*1
+ b = x + l/4*2
+ c = x + l/4*3
+ )
+
+ if l >= 8 {
+ if l >= shortestNinther {
+ // Tukey’s ninther method.
+ sortAdjacent(data, &a, &swaps)
+ sortAdjacent(data, &b, &swaps)
+ sortAdjacent(data, &c, &swaps)
+ }
+ // Find the median among `a`, `b`, `c` and stores it into `b`.
+ sort3(data, &a, &b, &c, &swaps)
+ }
+
+ if swaps < maxSwaps {
+ return b, (swaps == 0)
+ } else {
+ // The maximum number of swaps was performed.
+ // Reversing will probably help.
+ reverseRange(data, x, y)
+ return 2*x + (l - 1 - b), true
+ }
+}
+
+// sort2 swaps `a, b` so that `data[a] <= data[b]`.
+func sort2(data Interface, a, b, swaps *int) {
+ if data.Less(*b, *a) {
+ *a, *b = *b, *a
+ *swaps++
+ }
+}
+
+// sort3 swaps `a, b, c` so that `data[a] <= data[b] <= data[c]`.
+func sort3(data Interface, a, b, c, swaps *int) {
+ sort2(data, a, b, swaps)
+ sort2(data, b, c, swaps)
+ sort2(data, a, b, swaps)
+}
+
+// sortAdjacent finds the median of `data[a - 1], data[a], data[a + 1]` and stores the index into `a`.
+func sortAdjacent(data Interface, a, swaps *int) {
+ t1 := *a - 1
+ t2 := *a + 1
+ sort3(data, &t1, a, &t2, swaps)
+}
+
+func reverseRange(data Interface, a, b int) {
+ i := a
+ j := b - a - 1
+ for i < j {
+ data.Swap(i, j)
+ i++
+ j--
+ }
+}
+
+func nextPowerOfTwo(length int) uint {
+ shift := uint(strconv.IntSize - bits.LeadingZeros(uint(length)))
+ return uint(1 << shift)
}
// lessSwap is a pair of Less and Swap function for use with the
@@ -525,6 +652,12 @@
swapRange(data, m-i, m, i)
}
+func swapRange(data Interface, a, b, n int) {
+ for i := 0; i < n; i++ {
+ data.Swap(a+i, b+i)
+ }
+}
+
/*
Complexity of Stable Sorting
diff --git a/src/sort/zfuncversion.go b/src/sort/zfuncversion.go
index 30067cb..8be59df 100644
--- a/src/sort/zfuncversion.go
+++ b/src/sort/zfuncversion.go
@@ -48,114 +48,230 @@
}
}
-// Auto-generated variant of sort.go:medianOfThree
-func medianOfThree_func(data lessSwap, m1, m0, m2 int) {
- if data.Less(m1, m0) {
- data.Swap(m1, m0)
- }
- if data.Less(m2, m1) {
- data.Swap(m2, m1)
- if data.Less(m1, m0) {
- data.Swap(m1, m0)
- }
- }
-}
-
-// Auto-generated variant of sort.go:swapRange
-func swapRange_func(data lessSwap, a, b, n int) {
- for i := 0; i < n; i++ {
- data.Swap(a+i, b+i)
- }
-}
-
-// Auto-generated variant of sort.go:doPivot
-func doPivot_func(data lessSwap, lo, hi int) (midlo, midhi int) {
- m := int(uint(lo+hi) >> 1)
- if hi-lo > 40 {
- s := (hi - lo) / 8
- medianOfThree_func(data, lo, lo+s, lo+2*s)
- medianOfThree_func(data, m, m-s, m+s)
- medianOfThree_func(data, hi-1, hi-1-s, hi-1-2*s)
- }
- medianOfThree_func(data, lo, m, hi-1)
- pivot := lo
- a, c := lo+1, hi-1
- for ; a < c && data.Less(a, pivot); a++ {
- }
- b := a
+// Auto-generated variant of sort.go:recurse
+func recurse_func(data lessSwap, a, b, pred int, predExist bool, limit int) {
+ const maxInsertion = 12
+ var (
+ wasBalanced = true
+ wasPartitioned = true
+ )
for {
- for ; b < c && !data.Less(pivot, b); b++ {
+ length := b - a
+ if length <= maxInsertion {
+ insertionSort_func(data, a, b)
+ return
}
- for ; b < c && data.Less(pivot, c-1); c-- {
- }
- if b >= c {
- break
- }
- data.Swap(b, c-1)
- b++
- c--
- }
- protect := hi-c < 5
- if !protect && hi-c < (hi-lo)/4 {
- dups := 0
- if !data.Less(pivot, hi-1) {
- data.Swap(c, hi-1)
- c++
- dups++
- }
- if !data.Less(b-1, pivot) {
- b--
- dups++
- }
- if !data.Less(m, pivot) {
- data.Swap(m, b-1)
- b--
- dups++
- }
- protect = dups > 1
- }
- if protect {
- for {
- for ; a < b && !data.Less(b-1, pivot); b-- {
- }
- for ; a < b && data.Less(a, pivot); a++ {
- }
- if a >= b {
- break
- }
- data.Swap(a, b-1)
- a++
- b--
- }
- }
- data.Swap(pivot, b-1)
- return b - 1, c
-}
-
-// Auto-generated variant of sort.go:quickSort
-func quickSort_func(data lessSwap, a, b, maxDepth int) {
- for b-a > 12 {
- if maxDepth == 0 {
+ if limit == 0 {
heapSort_func(data, a, b)
return
}
- maxDepth--
- mlo, mhi := doPivot_func(data, a, b)
- if mlo-a < b-mhi {
- quickSort_func(data, a, mlo, maxDepth)
- a = mhi
- } else {
- quickSort_func(data, mhi, b, maxDepth)
- b = mlo
+ if !wasBalanced {
+ breakPatterns_func(data, a, b)
+ limit--
}
- }
- if b-a > 1 {
- for i := a + 6; i < b; i++ {
- if data.Less(i, i-6) {
- data.Swap(i, i-6)
+ pivotidx, likelySorted := choosePivot_func(data, a, b)
+ if wasBalanced && wasPartitioned && likelySorted {
+ if partialInsertionSort_func(data, a, b) {
+ return
}
}
- insertionSort_func(data, a, b)
+ if predExist && !data.Less(pred, pivotidx) {
+ mid := partitionEqual_func(data, a, b, pivotidx)
+ a = mid
+ continue
+ }
+ mid, wasP := partition_func(data, a, b, pivotidx)
+ wasPartitioned = wasP
+ leftLen, rightLen := mid-a, b-mid
+ balanceThreshold := length / 8
+ if leftLen > rightLen {
+ wasBalanced = rightLen >= balanceThreshold
+ recurse_func(data, a, mid, pred, predExist, limit)
+ a = mid + 1
+ pred = mid
+ predExist = true
+ } else {
+ wasBalanced = leftLen >= balanceThreshold
+ recurse_func(data, mid+1, b, mid, true, limit)
+ b = mid
+ }
+ }
+}
+
+// Auto-generated variant of sort.go:partition
+func partition_func(data lessSwap, a, b, pivotidx int) (newpivotidx int, wasPartitioned bool) {
+ data.Swap(a, pivotidx)
+ i, j := a+1, b-1
+ for i <= j && data.Less(i, a) {
+ i++
+ }
+ for i <= j && !data.Less(j, a) {
+ j--
+ }
+ if i > j {
+ data.Swap(j, a)
+ return j, true
+ }
+ data.Swap(i, j)
+ i++
+ j--
+ for {
+ for i <= j && data.Less(i, a) {
+ i++
+ }
+ for i <= j && !data.Less(j, a) {
+ j--
+ }
+ if i > j {
+ break
+ }
+ data.Swap(i, j)
+ i++
+ j--
+ }
+ data.Swap(j, a)
+ return j, false
+}
+
+// Auto-generated variant of sort.go:partitionEqual
+func partitionEqual_func(data lessSwap, a, b, pivotidx int) int {
+ data.Swap(a, pivotidx)
+ L := a + 1
+ R := b
+ for {
+ for L < R && !data.Less(a, L) {
+ L++
+ }
+ for L < R && data.Less(a, R-1) {
+ R--
+ }
+ if L >= R {
+ break
+ }
+ R--
+ data.Swap(L, R)
+ L++
+ }
+ return L
+}
+
+// Auto-generated variant of sort.go:partialInsertionSort
+func partialInsertionSort_func(data lessSwap, a, b int) bool {
+ const (
+ maxSteps = 5
+ shortestShifting = 50
+ )
+ i := a + 1
+ for j := 0; j < maxSteps; j++ {
+ for i < b && !data.Less(i, i-1) {
+ i++
+ }
+ if i == b {
+ return true
+ }
+ if b-a < shortestShifting {
+ return false
+ }
+ data.Swap(i-1, i)
+ if i-a >= 2 {
+ for j := i - 1; j >= 1; j-- {
+ if !data.Less(j, j-1) {
+ break
+ }
+ data.Swap(j, j-1)
+ }
+ }
+ if b-i >= 2 {
+ for j := 1; j < b; j++ {
+ if !data.Less(j, j-1) {
+ break
+ }
+ data.Swap(j, j-1)
+ }
+ }
+ }
+ return false
+}
+
+// Auto-generated variant of sort.go:breakPatterns
+func breakPatterns_func(data lessSwap, a, b int) {
+ length := b - a
+ if length >= 8 {
+ random := uint(length)
+ random ^= random << 13
+ random ^= random >> 17
+ random ^= random << 5
+ modulus := nextPowerOfTwo(length)
+ pos := a + length/8
+ for i := 0; i < 3; i++ {
+ other := int(random & (modulus - 1))
+ if other >= length {
+ other -= length
+ }
+ data.Swap(pos-1+i, a+other)
+ }
+ }
+}
+
+// Auto-generated variant of sort.go:choosePivot
+func choosePivot_func(data lessSwap, x, y int) (pivotidx int, likelySorted bool) {
+ const (
+ shortestNinther = 50
+ maxSwaps = 4 * 3
+ )
+ l := y - x
+ var (
+ swaps int
+ a = x + l/4*1
+ b = x + l/4*2
+ c = x + l/4*3
+ )
+ if l >= 8 {
+ if l >= shortestNinther {
+ sortAdjacent_func(data, &a, &swaps)
+ sortAdjacent_func(data, &b, &swaps)
+ sortAdjacent_func(data, &c, &swaps)
+ }
+ sort3_func(data, &a, &b, &c, &swaps)
+ }
+ if swaps < maxSwaps {
+ return b, (swaps == 0)
+ } else {
+ reverseRange_func(data, x, y)
+ return 2*x + (l - 1 - b), true
+ }
+}
+
+// Auto-generated variant of sort.go:sort2
+func sort2_func(data lessSwap, a, b, swaps *int) {
+ if data.Less(*b, *a) {
+ *a, *b = *b, *a
+ *swaps++
+ }
+}
+
+// Auto-generated variant of sort.go:sort3
+func sort3_func(data lessSwap, a, b, c, swaps *int) {
+ sort2_func(data, a, b, swaps)
+ sort2_func(data, b, c, swaps)
+ sort2_func(data, a, b, swaps)
+}
+
+// Auto-generated variant of sort.go:sortAdjacent
+func sortAdjacent_func(data lessSwap, a, swaps *int) {
+ t1 := *a - 1
+ t2 := *a + 1
+ sort3_func(data, &t1, a, &t2, swaps)
+}
+
+// Auto-generated variant of sort.go:reverseRange
+func reverseRange_func(data lessSwap, a, b int) {
+ i := a
+ j := b - a - 1
+ for i < j {
+ data.Swap(i, j)
+ i++
+ j--
}
}
@@ -263,3 +379,10 @@
}
swapRange_func(data, m-i, m, i)
}
+
+// Auto-generated variant of sort.go:swapRange
+func swapRange_func(data lessSwap, a, b, n int) {
+ for i := 0; i < n; i++ {
+ data.Swap(a+i, b+i)
+ }
+}
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
yunhao zhang uploaded patch set #2 to this change.
sort: use pdqsort
- Across all benchmarks, pdqsort is never significantly slower than the previous algorithm.
- In common patterns, pdqsort is often faster (i.e. 10x faster in sorted slices).
The pdqsort is described at https://arxiv.org/pdf/2106.05123.pdf
Fixes #50154
5 files changed, 541 insertions(+), 244 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Brad Fitzpatrick, Robert Griesemer, Ian Lance Taylor, Russ Cox.
yunhao zhang uploaded patch set #3 to this change.
sort: use pdqsort
- Across all benchmarks, pdqsort is never significantly slower than the previous algorithm.
- In common patterns, pdqsort is often faster (i.e. 10x faster in sorted slices).
The pdqsort is described at https://arxiv.org/pdf/2106.05123.pdf
Fixes #50154
Benchmark:
name old time/op new time/op delta
SearchWrappers 56.5ns ± 1% 55.1ns ± 0% -2.58% (p=0.000 n=10+10)
SortString1K 58.5µs ± 0% 64.1µs ± 0% +9.54% (p=0.000 n=9+10)
SortString1K_Slice 55.5µs ± 1% 57.6µs ± 1% +3.75% (p=0.000 n=10+9)
StableString1K 81.1µs ± 1% 82.8µs ± 1% +2.02% (p=0.000 n=9+10)
SortInt1K 24.9µs ± 1% 26.0µs ± 1% +4.58% (p=0.000 n=10+9)
SortInt1K_Sorted 16.5µs ± 0% 2.0µs ± 0% -87.85% (p=0.000 n=9+9)
SortInt1K_Reversed 17.9µs ± 0% 3.0µs ± 0% -83.51% (p=0.000 n=8+8)
SortInt1K_Mod8 14.3µs ± 2% 10.6µs ± 0% -25.99% (p=0.000 n=9+10)
StableInt1K 39.4µs ± 1% 39.7µs ± 1% +0.91% (p=0.001 n=7+8)
StableInt1K_Slice 36.3µs ± 1% 37.3µs ± 0% +2.72% (p=0.000 n=8+9)
SortInt64K 3.55ms ± 0% 3.41ms ± 0% -4.13% (p=0.000 n=10+10)
SortInt64K_Slice 3.40ms ± 0% 3.19ms ± 0% -6.35% (p=0.000 n=10+9)
StableInt64K 3.53ms ± 1% 3.53ms ± 1% ~ (p=0.739 n=10+10)
Sort1e2 24.2µs ± 0% 24.8µs ± 0% +2.59% (p=0.000 n=10+10)
Stable1e2 42.7µs ± 0% 42.6µs ± 0% -0.34% (p=0.018 n=10+10)
Sort1e4 5.12ms ± 0% 5.08ms ± 0% -0.81% (p=0.000 n=10+9)
Stable1e4 13.5ms ± 0% 13.5ms ± 1% ~ (p=0.447 n=9+10)
Sort1e6 784ms ± 0% 768ms ± 0% -1.99% (p=0.000 n=10+10)
Stable1e6 2.67s ± 1% 2.66s ± 0% ~ (p=0.315 n=10+9)
Change-Id: Iecded047d237b9330b5a4101001a5fdc2f50646a
---
M src/sort/zfuncversion.go
M src/sort/genzfunc.go
M src/sort/sort_test.go
M src/sort/slice.go
M src/sort/sort.go
M src/sort/example_multi_test.go
6 files changed, 583 insertions(+), 244 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Brad Fitzpatrick, Robert Griesemer, Ian Lance Taylor, Russ Cox.
yunhao zhang uploaded patch set #4 to this change.
A src/sort/bits.go
M src/sort/sort_test.go
M src/sort/slice.go
M src/sort/sort.go
M src/sort/example_multi_test.go
7 files changed, 647 insertions(+), 243 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Brad Fitzpatrick, Robert Griesemer, Ian Lance Taylor, Russ Cox.
yunhao zhang uploaded patch set #5 to this change.
7 files changed, 642 insertions(+), 243 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Brad Fitzpatrick, Robert Griesemer, Ian Lance Taylor, Russ Cox.
yunhao zhang uploaded patch set #6 to this change.
A src/sort/bits.go
M src/sort/example_multi_test.go
M src/sort/genzfunc.go
M src/sort/slice.go
M src/sort/sort.go
M src/sort/sort_test.go
M src/sort/zfuncversion.go
7 files changed, 642 insertions(+), 243 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Brad Fitzpatrick, Robert Griesemer, Ian Lance Taylor, Russ Cox.
1 comment:
Patchset:
Kindly ping :)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Russ Cox.
1 comment:
Patchset:
Kindly ping :)
Thanks for sending the patch. We are in the 1.18 release cycle right now. We will review this for the 1.19 release after 1.18 is out. Thanks.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Russ Cox.
yunhao zhang uploaded patch set #7 to this change.
sort: use pdqsort
- Across all benchmarks, pdqsort is never significantly slower than the previous algorithm.
- In common patterns, pdqsort is often faster (i.e. 10x faster in sorted slices).
The pdqsort is described at https://arxiv.org/pdf/2106.05123.pdf
Fixes #50154
name old time/op new time/op delta
SearchWrappers-16 89.2ns ± 2% 89.0ns ± 1% ~ (p=0.971 n=10+10)
SortString1K-16 103µs ± 5% 114µs ± 4% +10.07% (p=0.000 n=9+10)
SortString1K_Slice-16 101µs ± 3% 101µs ± 4% ~ (p=0.971 n=10+10)
StableString1K-16 130µs ± 3% 161µs ± 4% +23.90% (p=0.000 n=10+10)
SortInt1K-16 51.8µs ± 4% 48.1µs ± 3% -7.15% (p=0.000 n=10+10)
SortInt1K_Sorted-16 31.4µs ± 3% 4.0µs ± 3% -87.35% (p=0.000 n=10+10)
SortInt1K_Reversed-16 33.5µs ± 2% 5.7µs ± 2% -82.99% (p=0.000 n=10+10)
SortInt1K_Mod8-16 26.4µs ± 1% 21.5µs ± 2% -18.79% (p=0.000 n=10+10)
StableInt1K-16 59.4µs ± 1% 60.3µs ± 1% +1.47% (p=0.000 n=9+9)
StableInt1K_Slice-16 57.7µs ± 3% 61.8µs ± 2% +7.03% (p=0.000 n=10+9)
SortInt64K-16 5.56ms ± 3% 5.19ms ± 1% -6.60% (p=0.000 n=10+9)
SortInt64K_Slice-16 5.30ms ± 3% 5.15ms ± 1% -2.87% (p=0.000 n=10+9)
StableInt64K-16 5.63ms ± 3% 5.61ms ± 2% ~ (p=0.720 n=10+9)
Sort1e2-16 33.3µs ± 2% 33.4µs ± 2% ~ (p=0.393 n=10+10)
Stable1e2-16 66.9µs ± 1% 66.9µs ± 1% ~ (p=0.842 n=10+9)
Sort1e4-16 6.55ms ± 0% 6.43ms ± 1% -1.78% (p=0.000 n=10+10)
Stable1e4-16 21.1ms ± 1% 20.9ms ± 1% -0.56% (p=0.022 n=9+10)
Sort1e6-16 993ms ± 1% 962ms ± 0% -3.07% (p=0.000 n=10+9)
Stable1e6-16 4.16s ± 0% 4.13s ± 0% -0.62% (p=0.000 n=10+9)
zyh@ubuntu-server:~/GitHub/test$ benchstat base-no-generics-1.txt pdqsort-no-generics-1.txt
name old time/op new time/op delta
SearchWrappers 88.3ns ± 2% 88.4ns ± 1% ~ (p=0.328 n=9+9)
SortString1K 100µs ± 2% 110µs ± 2% +10.85% (p=0.000 n=10+10)
SortString1K_Slice 97.4µs ± 0% 97.5µs ± 0% ~ (p=0.159 n=8+9)
StableString1K 126µs ± 1% 156µs ± 4% +23.36% (p=0.000 n=9+10)
SortInt1K 45.9µs ± 1% 42.9µs ± 2% -6.65% (p=0.000 n=10+9)
SortInt1K_Sorted 26.8µs ± 0% 2.9µs ± 0% -89.15% (p=0.000 n=10+9)
SortInt1K_Reversed 28.6µs ± 0% 4.2µs ± 1% -85.14% (p=0.000 n=8+10)
SortInt1K_Mod8 21.9µs ± 1% 17.1µs ± 2% -21.98% (p=0.000 n=10+10)
StableInt1K 58.8µs ± 1% 59.2µs ± 0% +0.69% (p=0.000 n=10+9)
StableInt1K_Slice 56.0µs ± 1% 60.8µs ± 1% +8.50% (p=0.000 n=9+9)
SortInt64K 5.02ms ± 0% 4.72ms ± 0% -5.91% (p=0.000 n=9+9)
SortInt64K_Slice 4.78ms ± 0% 4.58ms ± 0% -4.10% (p=0.000 n=9+8)
StableInt64K 5.13ms ± 2% 5.14ms ± 0% ~ (p=0.173 n=10+8)
Sort1e2 31.4µs ± 1% 31.8µs ± 1% +1.45% (p=0.000 n=10+9)
Stable1e2 64.1µs ± 2% 64.0µs ± 0% ~ (p=1.000 n=10+9)
Sort1e4 6.51ms ± 0% 6.41ms ± 0% -1.53% (p=0.000 n=9+10)
Stable1e4 20.9ms ± 1% 20.9ms ± 0% ~ (p=0.965 n=10+8)
Sort1e6 991ms ± 0% 966ms ± 1% -2.57% (p=0.000 n=9+10)
Stable1e6 4.16s ± 1% 4.14s ± 1% -0.41% (p=0.008 n=10+9)
Change-Id: Iecded047d237b9330b5a4101001a5fdc2f50646a
---
A src/sort/bits.go
M src/sort/example_multi_test.go
M src/sort/gen_sort_variants.go
M src/sort/slice.go
M src/sort/sort.go
M src/sort/sort_test.go
M src/sort/zsortfunc.go
M src/sort/zsortinterface.go
8 files changed, 978 insertions(+), 407 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Russ Cox.
yunhao zhang uploaded patch set #8 to this change.
sort: use pdqsort
- Across all benchmarks, pdqsort is never significantly slower than the previous algorithm.
- In common patterns, pdqsort is often faster (i.e. 10x faster in sorted slices).
The pdqsort is described at https://arxiv.org/pdf/2106.05123.pdf
Fixes #50154
name old time/op new time/op delta
8 files changed, 957 insertions(+), 407 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Russ Cox.
Patch set 8:Run-TryBot +1
Attention is currently required from: yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Russ Cox.
4 comments:
File src/sort/bits.go:
Patch Set #8, Line 7: // A copy of bits.LeadingZeros to avoid a dependency on the bits package.
It's fine for sort to depend on math/bits.
File src/sort/gen_sort_variants.go:
Patch Set #8, Line 243: const MaxInsertion = 12
Make this maxInsertion so that it doesn't appear to be exported.
Patch Set #8, Line 369: MaxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted
maxSteps, shortestShifting
Patch Set #8, Line 442: ShortestNinther = 50
shortestNinher
maxSwaps
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Russ Cox.
yunhao zhang uploaded patch set #9 to this change.
M src/sort/example_multi_test.go
M src/sort/gen_sort_variants.go
M src/sort/slice.go
M src/sort/sort.go
M src/sort/sort_test.go
M src/sort/zsortfunc.go
M src/sort/zsortinterface.go
7 files changed, 896 insertions(+), 407 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Brad Fitzpatrick, Robert Griesemer, Ian Lance Taylor, Russ Cox.
5 comments:
Patchset:
Thanks for your review!
File src/sort/bits.go:
Patch Set #8, Line 7: // A copy of bits.LeadingZeros to avoid a dependency on the bits package.
It's fine for sort to depend on math/bits.
Done
File src/sort/gen_sort_variants.go:
Patch Set #8, Line 243: const MaxInsertion = 12
Make this maxInsertion so that it doesn't appear to be exported.
Done
Patch Set #8, Line 369: MaxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted
maxSteps, shortestShifting
Done
Patch Set #8, Line 442: ShortestNinther = 50
shortestNinher […]
Done
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Ian Lance Taylor, Russ Cox.
1 comment:
Patchset:
General sorting of strings seems to be become slower (by 20+%), at the benefit of sorting almost-sorted slices becoming much faster. Is this correct?
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Ian Lance Taylor, Russ Cox.
yunhao zhang uploaded patch set #10 to this change.
sort: use pdqsort
- Across all benchmarks, pdqsort is never significantly slower than the previous algorithm.
- In common patterns, pdqsort is often faster (i.e. 10x faster in sorted slices).
The pdqsort is described at https://arxiv.org/pdf/2106.05123.pdf
Fixes #50154
name old time/op new time/op delta
SearchWrappers-16 89.6ns ± 2% 92.0ns ± 1% +2.68% (p=0.000 n=10+10)
SortString1K-16 103µs ± 3% 105µs ± 1% ~ (p=0.089 n=10+10)
SortString1K_Slice-16 101µs ± 3% 102µs ± 2% +1.44% (p=0.028 n=10+9)
StableString1K-16 131µs ± 6% 135µs ± 2% +2.63% (p=0.015 n=10+10)
SortInt1K-16 50.8µs ± 2% 48.2µs ± 2% -5.07% (p=0.000 n=10+10)
SortInt1K_Sorted-16 31.0µs ± 1% 3.7µs ± 2% -88.20% (p=0.000 n=10+10)
SortInt1K_Reversed-16 32.7µs ± 2% 5.3µs ± 2% -83.75% (p=0.000 n=10+10)
SortInt1K_Mod8-16 25.7µs ± 2% 22.0µs ± 3% -14.38% (p=0.000 n=9+10)
StableInt1K-16 62.4µs ± 8% 61.5µs ± 8% ~ (p=0.247 n=10+10)
StableInt1K_Slice-16 60.8µs ± 7% 60.8µs ± 8% ~ (p=1.000 n=10+10)
SortInt64K-16 5.44ms ± 2% 5.17ms ± 2% -5.11% (p=0.000 n=10+10)
SortInt64K_Slice-16 5.21ms ± 3% 5.03ms ± 1% -3.51% (p=0.000 n=10+10)
StableInt64K-16 5.57ms ± 2% 5.60ms ± 2% ~ (p=0.247 n=10+10)
Sort1e2-16 32.9µs ± 2% 33.4µs ± 2% +1.51% (p=0.029 n=10+10)
Stable1e2-16 66.5µs ± 3% 64.9µs ± 1% -2.34% (p=0.001 n=10+9)
Sort1e4-16 6.53ms ± 1% 6.44ms ± 1% -1.40% (p=0.000 n=10+10)
Stable1e4-16 20.9ms ± 0% 20.5ms ± 0% -1.91% (p=0.000 n=9+9)
Sort1e6-16 987ms ± 0% 963ms ± 0% -2.45% (p=0.000 n=10+8)
Stable1e6-16 4.13s ± 0% 4.06s ± 0% -1.67% (p=0.000 n=9+10)
Change-Id: Iecded047d237b9330b5a4101001a5fdc2f50646a
---
M src/sort/example_multi_test.go
M src/sort/gen_sort_variants.go
M src/sort/slice.go
M src/sort/sort.go
M src/sort/sort_test.go
M src/sort/zsortfunc.go
M src/sort/zsortinterface.go
7 files changed, 896 insertions(+), 407 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Ian Lance Taylor, Russ Cox.
1 comment:
Patchset:
General sorting of strings seems to be become slower (by 20+%), at the benefit of sorting almost-sor […]
The previous benchmark result is inaccurate, they‘re actually from different Go versions(we can see the StableSort is 20% slower than before, but we haven't changed any of its code). After synchronizing the latest code, the benchmark results are updated.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Ian Lance Taylor, Russ Cox.
1 comment:
Patchset:
The previous benchmark result is inaccurate, they‘re actually from different Go versions(we can see […]
Thanks.
gen_sort_variants is also used to generate the sort code in https://pkg.go.dev/golang.org/x/exp/slices
Could you please plug the generator output there (zsortordered.go and zsortanyfunc.go) and run that repo's benchmarks?
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Ian Lance Taylor, Keith Randall.
17 comments:
File src/sort/gen_sort_variants.go:
Patch Set #10, Line 242: func pdqsort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pred int, predExist bool, limit int {{.ExtraParam}}) {
It would be nice to describe what each of the parameters mean here:
// pdqsort sorts data[a:b].
// limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort.
Aren't pred,predExist just another way of saying a-1,a>0 ?
Patch Set #10, Line 281: "pred" "pivotidx"
pred and pivotidx are both indexes into the sorted array. It seems weird that one contains "idx" and one doesn't. Maybe "idx" isn't needed? (e.g. "a" and "b" are also indexes.)
Patch Set #10, Line 306: pivotidx
just "newpivot"
Patch Set #10, Line 306: newpivotidx
this could just be "pivot", I think.
Patch Set #10, Line 306: func partition{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pivotidx int {{.ExtraParam}}) (newpivotidx int, wasPartitioned bool) {
Comment as to what this does.
// partition does one quicksort partition.
// Let p = data[pivot]
// Moves elements in data[a:b] around, so that data[i]<p and data[j]>=p for i<newpivot and j>newpivot
// On return, data[newpivot] = p
Then describe the wasPartitioned boolean. In fact, I'm not sure what this means exactly. Is it just that did not need to move a (non-pivot) element? I think the name "wasPartitioned" is confusing. You mean here that "it was already partitioned on input, so we didn't have to do anything" but I initially thought it was "some work had to be done to partition it", which is the exact opposite. Maybe "alreadyPartitioned"? (that's the name pdqsort uses)
Patch Set #10, Line 308: i, j := a+1, b-1
// i and j are inclusive of the elements remaining to be partitioned.
Patch Set #10, Line 347: L := a + 1
This is using a different indexing scheme than partition does (R is exclusive, where j in partition was inclusive). Is there a reason for that?
If not, it would be nice to use the same scheme and the same variable names (i,j, both inclusive).
Patch Set #10, Line 412: func breakPatterns{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) {
I worry that there really isn't much test coverage of the fallback routines used in this code. Have you run a coverage tool on this? Could we design a testing framework to somehow force the fallback paths to be taken more often?
Patch Set #10, Line 433: data.
data[a:b]
Patch Set #10, Line 440: lo, hi
These should probably be "a, b" as they match other uses of those names.
Then these should be i, j, k which are indexes of individual elements.
Patch Set #10, Line 458: sortAdjacent{{.FuncSuffix}}(data, &a, &swaps {{.ExtraArg}})
This is a different scheme than pdqsort uses. You're doing [[n/4-1,n/4,n/4+1], [n/2-1,n/2,n/2+1], [3*n/4-1, 3*n/4, 3*n/4+1]]. They use
[[0,n/2,n-1], [1,n/2-1,n-2], [2, n/2+1,n-3]]. I'm not sure it's a big deal one way or the other, but interested to know why we went this way and not just match pdqsort.
Patch Set #10, Line 469: // The maximum number of swaps was performed.
This reversing optimization was not part of the pdq paper. Is there a reference for this and why it helps?
Patch Set #10, Line 476: // sort2 swaps a, b so that data[a] <= data[b].
I think these 3 following functions would read better if they took ints and returned ints instead of taking *int and updating in place. Updating indexes in place is confusing, and not needed as Go has multi-returns.
So you would use them like this:
a, b = sort2(data, a, b, swaps)
a = sortAdjacent(data, a, &swaps)
and similarly for the others.
Patch Set #10, Line 500: j := b - a - 1
This math doesn't make sense to me. j is not an index, but a length (difference of two indexes). Comparing i and j in the line below is meaningless.
Should this just be j=b-1?
If this is a bug like I think it is, how did a test not catch it?
File src/sort/slice.go:
Patch Set #10, Line 22: usize - bits.LeadingZeros(uint(length))
I think this is just bits.Len(uint(length))?
File src/sort/sort.go:
Patch Set #10, Line 49: limit := usize - bits.LeadingZeros(uint(n))
Use bits.Len in these places also. Then I don't think you need usize.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Ian Lance Taylor, Keith Randall.
yunhao zhang uploaded patch set #11 to this change.
sort: use pdqsort
- Across all benchmarks, pdqsort is never significantly slower than the previous algorithm.
- In common patterns, pdqsort is often faster (i.e. 10x faster in sorted slices).
The pdqsort is described at https://arxiv.org/pdf/2106.05123.pdf
Fixes #50154
name old time/op new time/op delta
SearchWrappers-16 72.8ns ± 3% 75.1ns ± 2% +3.25% (p=0.000 n=20+10)
SortString1K-16 85.2µs ± 3% 86.2µs ± 5% ~ (p=0.247 n=19+10)
SortString1K_Slice-16 84.6µs ± 4% 86.1µs ± 4% ~ (p=0.120 n=20+10)
StableString1K-16 112µs ± 5% 112µs ± 5% ~ (p=0.604 n=19+10)
SortInt1K-16 44.8µs ± 3% 43.2µs ± 2% -3.68% (p=0.000 n=20+10)
SortInt1K_Sorted-16 28.2µs ± 3% 3.3µs ± 3% -88.16% (p=0.000 n=19+10)
SortInt1K_Reversed-16 29.4µs ± 3% 4.8µs ± 2% -83.59% (p=0.000 n=20+10)
SortInt1K_Mod8-16 25.1µs ± 2% 20.0µs ± 2% -20.35% (p=0.000 n=18+10)
StableInt1K-16 51.3µs ± 3% 50.9µs ± 2% ~ (p=0.562 n=20+9)
StableInt1K_Slice-16 49.5µs ± 2% 50.7µs ± 4% +2.55% (p=0.009 n=19+10)
SortInt64K-16 4.73ms ± 3% 4.49ms ± 4% -5.08% (p=0.000 n=20+10)
SortInt64K_Slice-16 4.51ms ± 3% 4.35ms ± 1% -3.42% (p=0.000 n=20+8)
StableInt64K-16 4.85ms ± 2% 4.82ms ± 2% ~ (p=0.267 n=20+10)
Sort1e2-16 27.9µs ± 1% 28.1µs ± 2% ~ (p=0.198 n=20+10)
Stable1e2-16 56.6µs ± 2% 55.0µs ± 2% -2.88% (p=0.000 n=20+10)
Sort1e4-16 5.51ms ± 1% 5.36ms ± 1% -2.58% (p=0.000 n=19+9)
Stable1e4-16 17.8ms ± 1% 17.3ms ± 1% -2.40% (p=0.000 n=20+10)
Sort1e6-16 833ms ± 1% 807ms ± 1% -3.02% (p=0.000 n=20+10)
Stable1e6-16 3.49s ± 2% 3.44s ± 1% -1.41% (p=0.001 n=20+10)
Change-Id: Iecded047d237b9330b5a4101001a5fdc2f50646a
---
M src/sort/example_multi_test.go
M src/sort/gen_sort_variants.go
M src/sort/slice.go
M src/sort/sort.go
M src/sort/sort_test.go
M src/sort/zsortfunc.go
M src/sort/zsortinterface.go
7 files changed, 909 insertions(+), 407 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
18 comments:
Patchset:
Thanks for your review Keith!
This CL is inspired by both C++ implementation and Rust implementation.
Because most of Rust's changes bring performance optimization(based on the benchmark: https://github.com/zhangyunhao116/pdqsort/blob/master/benchmark_test.go), this CL may contain some ideas from Rust's implementation.
The benchmark results is updated.
File src/sort/gen_sort_variants.go:
Patch Set #10, Line 242: func pdqsort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pred int, predExist bool, limit int {{.ExtraParam}}) {
It would be nice to describe what each of the parameters mean here: […]
Updated some comments.
The pred would be the smallest element in data[a:b] if existed, and it only appears in right array, because the elements equal to the pivot also only appears in right array(and only partitionEqual will use these variables). All the left arrays do not have pred.
Patch Set #10, Line 281: "pred" "pivotidx"
pred and pivotidx are both indexes into the sorted array. […]
Done
Patch Set #10, Line 306: func partition{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pivotidx int {{.ExtraParam}}) (newpivotidx int, wasPartitioned bool) {
Comment as to what this does. […]
Done
Patch Set #10, Line 306: pivotidx
just "newpivot"
Done
Patch Set #10, Line 306: newpivotidx
this could just be "pivot", I think.
Done
Patch Set #10, Line 308: i, j := a+1, b-1
// i and j are inclusive of the elements remaining to be partitioned.
Done
Patch Set #10, Line 347: L := a + 1
This is using a different indexing scheme than partition does (R is exclusive, where j in partition […]
Done
Patch Set #10, Line 412: func breakPatterns{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) {
I worry that there really isn't much test coverage of the fallback routines used in this code. […]
This function is covered, there is a fuzz test https://github.com/zhangyunhao116/stdpdqsort/blob/70c38de4aed0de2a3eabe78074452d15892b9cf2/std_test.go#L55 for previous change(base on sort.Interface{}). Except for the heapSort, but heapSort is not changed in this CL.
Maybe we could also add a fuzz test in src/sort?
Patch Set #10, Line 433: data.
data[a:b]
Done
Patch Set #10, Line 440: lo, hi
These should probably be "a, b" as they match other uses of those names.
Done
Then these should be i, j, k which are indexes of individual elements.
Done
Patch Set #10, Line 458: sortAdjacent{{.FuncSuffix}}(data, &a, &swaps {{.ExtraArg}})
This is a different scheme than pdqsort uses. […]
It was from Rust's implementation: https://github.com/rust-lang/rust/blob/415c9f95884caebd0be9b837ef0885d1ffde1b9b/library/core/src/slice/sort.rs#L657
Because most of Rust's changes bring performance optimization, this approach is also adopted here. But I have not yet performed the benchmark for this change, I can benchmark it if needed :)
Patch Set #10, Line 469: // The maximum number of swaps was performed.
This reversing optimization was not part of the pdq paper. […]
It was from Rust's implementation: https://github.com/rust-lang/rust/blob/415c9f95884caebd0be9b837ef0885d1ffde1b9b/library/core/src/slice/sort.rs#L707
From the benchmark result, this change can bring 1% ~ 5% performance improvement compared to the original implementation in some cases. So it is added to Go implementation.
Patch Set #10, Line 476: // sort2 swaps a, b so that data[a] <= data[b].
I think these 3 following functions would read better if they took ints and returned ints instead of […]
Done
Patch Set #10, Line 500: j := b - a - 1
This math doesn't make sense to me. j is not an index, but a length (difference of two indexes). […]
Yes, it is a bug here. This implementation is initially modified from a generic version of pdqsort(https://github.com/zhangyunhao116/pdqsort), I didn't notice this problem when modifying the code.
It has a fuzz test for pdqsort, locate in https://github.com/zhangyunhao116/stdpdqsort/blob/70c38de4aed0de2a3eabe78074452d15892b9cf2/std_test.go#L55, but no unit test for this function.
In this case, there are only two possibilities:
Fixed.
File src/sort/slice.go:
Patch Set #10, Line 22: usize - bits.LeadingZeros(uint(length))
I think this is just bits. […]
Done
File src/sort/sort.go:
Patch Set #10, Line 49: limit := usize - bits.LeadingZeros(uint(n))
Use bits.Len in these places also. Then I don't think you need usize.
Done
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
yunhao zhang uploaded patch set #12 to this change.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
yunhao zhang uploaded patch set #13 to this change.
Attention is currently required from: Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
1 comment:
Patchset:
Thanks. […]
```
name old time/op new time/op delta
SortInts-16 10.9ms ± 2% 10.9ms ± 3% ~ (p=0.529 n=10+10)
SlicesSortInts-16 6.22ms ± 3% 6.31ms ± 2% ~ (p=0.089 n=10+10)
SortStrings-16 22.6ms ± 2% 22.5ms ± 1% ~ (p=0.829 n=10+8)
SlicesSortStrings-16 19.2ms ± 2% 19.4ms ± 1% ~ (p=0.105 n=10+10)
SortStructs-16 15.6ms ± 3% 15.6ms ± 2% ~ (p=1.000 n=10+10)
SortFuncStructs-16 13.3ms ± 2% 12.6ms ± 2% -5.92% (p=0.000 n=10+10)
```
With some modifications:
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
20 comments:
Patchset:
``` […]
Modifying maxInsertion is fine, but let's do that in a different CL.
(We'll need benchmarks for just that, and I have a suspicion it might depend a lot on the cpu involved.)
Patchset:
Thanks for your review Keith! […]
Could you add those references to the CL just below the pdqsort.pdf link?
File src/sort/gen_sort_variants.go:
Patch Set #10, Line 242: func pdqsort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pred int, predExist bool, limit int {{.ExtraParam}}) {
Updated some comments. […]
All the recursive calls (and tail-recursive loops) to pdqsort either set predExist to true, or to the predExist value from the caller. Only the external call from Slice and friends sets predExist to false.
So predExist can only be false if we're on a chain from the root where we pass the input predExist on every recursion. And those are exactly the paths that leave a unchanged. (All paths that set predExist==true also increment a by at least 1).
What am I missing? Or did I flub that analysis somehow?
And also just looking at the places where a and predidx are assigned, whenever predExist==true, predidx == a-1?
Patch Set #10, Line 412: func breakPatterns{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) {
This function is covered, there is a fuzz test https://github. […]
A fuzz test in src/sort is a good idea. We need coverage of this function somehow, either with that, an input that is known to trigger this case, or with a dedicated unit test.
Patch Set #10, Line 458: sortAdjacent{{.FuncSuffix}}(data, &a, &swaps {{.ExtraArg}})
It was from Rust's implementation: https://github. […]
No need to benchmark, just wondering where the pattern used came from. Just mention that here in a comment.
Patch Set #10, Line 469: // The maximum number of swaps was performed.
It was from Rust's implementation: https://github. […]
Same here, just comment that this idea came from Rust's implementation.
Patch Set #10, Line 500: j := b - a - 1
Yes, it is a bug here. […]
Ah, I see, reversing is just an optimization - the algorithm still works correctly even if reverseRange doesn't actually reverse correctly (as long as it doesn't walk out of range or clobber entries or anything).
It would be nice to have a unit test for this function - there's really no other way to tell that it is correct.
File src/sort/gen_sort_variants.go:
Patch Set #13, Line 242: // predidx would be the smallest element in data[a:b] if existed(predExist is true).
"if predExists is true, data[predidx] is a known lower bound on all entries in data[a:b]." Do I have that correct?
(Any presumably, predidx < a?)
Which makes me thing "pred" is not really a good name. Maybe "min" and "minKnown"?
Patch Set #13, Line 272: pivotidx
pivotidx -> pivot
Patch Set #13, Line 284: pivotidx
Maybe use min instead of pivotidx here? (See comment in partitionEqual)
It seems like we do the recursive call on the larger subsequence and then do the tail call on the smaller subsequence. That feels backwards to me, as it leads to larger stack depths than doing it the other way around. Can we reverse that, or is there an important reason it is in this order?
Patch Set #13, Line 348: pivotidx
pivotidx -> pivot
data[a:b]
Patch Set #13, Line 350: func partitionEqual{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pivotidx int {{.ExtraParam}}) int {
Describe the return value
i+1 ?
Patch Set #13, Line 428: for _, idx := range idxs {
Since elements in idxs are all adjacent, can't you just do
for idx := a + (length/4)*2 - 1; idx <= a + (length/4)*2 + 1; idx++ {
}
Then you don't need idxs at all.
Also, length/4*2 seems kind of pointless. Why not just length/2? I'm not sure why odd/even matters here.
Presumably the point of this is to swap in some random data that a subsequent pivoting step will then pick up. So I guess the indexes chosen here might need to correspond to the indexes used by choosePivot below? But I'm not entirely sure.
I feel like it might be better, or at least cleaner to do (in pdqsort):
if wasBalanced {
pivot, likelySorted = choosePivot(...)
} else {
limit--
pivot, likelySorted = choosePivotRandom(...)
}Where choosePivotRandom is like choosePivot but uses 9 random locations.
But maybe the swapping of random elements helps multiple subsequent pivot choices? Something to think about (but probably not for this CL).
Patch Set #13, Line 445: pivotidx
pivotidx -> pivot
Patch Set #13, Line 476: reverseRange{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
It feels odd to me that choosePivot is actually doing this reversing itself. Proposal: choosePivot actually returns a tri-state as its second return value.
type sortedHint int
const (
increasingHint sortedHint = iota
decreasingHint
unknownHint
)
Then choosePivot does
switch swaps {
case 0: return j, increasingHint
case maxSwaps: return j, decreasingHint
default: return j, unknownHint
}Then pdqsort does
pivot, hint := choosePivot
if hint == decreasingHint {
reverseRange(data, a, b)
// The chosen pivot was pivot-a elements after the start of the array.
// After reversing it is pivot-a elements before the end of the array.
pivot = (b-1)-(pivot-a)
hint = increasingHint
}
.. then use hint == increasingHint instead of likelySorted.
I'm not sure it would materially make a difference, but I think it makes the code cleaner as choosePivot is then only choosing the pivot and giving a hint, not also doing actual work of moving elements around.
Patch Set #13, Line 477: 2*a + (l - 1 - j)
This formula is kind of scary, not sure how it was intended to be understood. Definitely needs a comment. I put a formula I think is a bit clearer in the above comment.
This is another chunk of code that, if it was wrong but never generated an out-of-bounds result, we'd never know. We'd just pick less-than-optimal pivots and never be the wiser.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
yunhao zhang uploaded patch set #14 to this change.
sort: use pdqsort
- Across all benchmarks, pdqsort is never significantly slower than the previous algorithm.
- In common patterns, pdqsort is often faster (i.e. 10x faster in sorted slices).
The pdqsort is described at https://arxiv.org/pdf/2106.05123.pdf
This CL is inspired by both C++ implementation and Rust implementation.
- C++ implementation: https://github.com/orlp/pdqsort
- Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/
Fixes #50154
name old time/op new time/op delta
7 files changed, 936 insertions(+), 407 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
20 comments:
Patchset:
Modifying maxInsertion is fine, but let's do that in a different CL. […]
ACK. Let's do this in next CL.
Patchset:
The unit test is cover 99.1% of codes in sort/zsortinterface.go (except for the heapSort) now. Accept the proposal in the comment 😊
File src/sort/gen_sort_variants.go:
Patch Set #10, Line 242: func pdqsort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pred int, predExist bool, limit int {{.ExtraParam}}) {
All the recursive calls (and tail-recursive loops) to pdqsort either set predExist to true, or to th […]
You're right! I am wondering maybe we could use only the "minKnown"(the new name for "predExist"), remove the min(predidx) since it is a-1? but it may panic if a=0 and minKnown==true.
Patch Set #10, Line 412: func breakPatterns{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) {
A fuzz test in src/sort is a good idea. […]
I tried to add a fuzz test for this case, but seems it is not work well(hard to trigger this case). Add a unit test with special input for it(TestBreakPatterns). Done.
Patch Set #10, Line 458: sortAdjacent{{.FuncSuffix}}(data, &a, &swaps {{.ExtraArg}})
No need to benchmark, just wondering where the pattern used came from. […]
Done.
Patch Set #10, Line 469: // The maximum number of swaps was performed.
Same here, just comment that this idea came from Rust's implementation.
Done
Patch Set #10, Line 500: j := b - a - 1
Ah, I see, reversing is just an optimization - the algorithm still works correctly even if reverseRa […]
Tried to add a test for this function in sort/sort_test.go, but got
# sort_test [sort.test]
/usr/local/go/src/sort/sort_test.go:127:2: undefined: reverseRange
/usr/local/go/src/sort/sort_test.go:136:2: undefined: reverseRange
Looks like it is a dependency issue(notice that sort_test.go import . "sort"). Not sure what going on here, but this function is covered by other tests now(from coverage.html).
File src/sort/gen_sort_variants.go:
Patch Set #13, Line 242: // predidx would be the smallest element in data[a:b] if existed(predExist is true).
"if predExists is true, data[predidx] is a known lower bound on all entries in data[a:b]." Do I have that correct?
(Any presumably, predidx < a?)
You are correct! Done.
Patch Set #13, Line 272: pivotidx
pivotidx -> pivot
Done
Patch Set #13, Line 284: pivotidx
Maybe use min instead of pivotidx here? (See comment in partitionEqual)
Done
It seems like we do the recursive call on the larger subsequence and then do the tail call on the sm […]
Done. No important reason, recursive call on the smaller subsequence will be faster.
Patch Set #13, Line 348: pivotidx
pivotidx -> pivot
Done
data[a:b]
Done
Patch Set #13, Line 350: func partitionEqual{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pivotidx int {{.ExtraParam}}) int {
Describe the return value
Done
i+1 ?
Done
Patch Set #13, Line 428: for _, idx := range idxs {
Since elements in idxs are all adjacent, can't you just do […]
Done.
Because length/4*2 is the second candidate of choosPivot, so it kept the same format. We can use length/2 here but looks like the compiler will optimize this case.
"idx := a + (length/4)*2 - 1; idx <= a + (length/4)*2 + 1; idx++" is used in a previous implementation, but found that use idxs will be faster(~3% in some cases), changed to the previous one.
But maybe the swapping of random elements helps multiple subsequent pivot choices?
Yes, I also think so, and if choosePivotRandom uses 9 locations, it is an expensive function(especially for small slice), we need a benchmark for this. (I can do the benchmark in the next CL)
Patch Set #13, Line 445: pivotidx
pivotidx -> pivot
Done
Patch Set #13, Line 476: reverseRange{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
It feels odd to me that choosePivot is actually doing this reversing itself. […]
Agree! It's more cleaner. Done.
A tiny modification: move the "unknownHint" to the first, because it is also the default path in the "choosePivot".
Patch Set #13, Line 477: 2*a + (l - 1 - j)
This formula is kind of scary, not sure how it was intended to be understood. […]
Done. Use "(b-1)-(pivot-a)" now.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
2 comments:
File src/sort/gen_sort_variants.go:
Patch Set #10, Line 500: j := b - a - 1
Tried to add a test for this function in sort/sort_test.go, but got […]
I think you'd need to add something to export_test.go so that package sort_test can access the unexported reverseRange (which is in package sort).
(The other option would be to add a new test file which is in package sort, not package sort_test. But the export route is probably better as that mechanism is already present.)
File src/sort/gen_sort_variants.go:
Patch Set #14, Line 244: func pdqsort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, min int, minKnown bool, limit int {{.ExtraParam}}) {
I would just say here:
if a>0, then data[a-1] is a known lower bound on all the elements in data[a:b].
Then you can get rid of both min and minKnown.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
yunhao zhang uploaded patch set #15 to this change.
M src/sort/export_test.go
M src/sort/gen_sort_variants.go
M src/sort/slice.go
M src/sort/sort.go
M src/sort/sort_test.go
M src/sort/zsortfunc.go
M src/sort/zsortinterface.go
8 files changed, 950 insertions(+), 407 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
5 comments:
Patchset:
Thanks, a lot of great ideas!
File src/sort/gen_sort_variants.go:
Patch Set #10, Line 242: func pdqsort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pred int, predExist bool, limit int {{.ExtraParam}}) {
You're right! I am wondering maybe we could use only the "minKnown"(the new name for "predExist"), r […]
Done
Patch Set #10, Line 500: j := b - a - 1
I think you'd need to add something to export_test. […]
Done
File src/sort/gen_sort_variants.go:
> But maybe the swapping of random elements helps multiple subsequent pivot choices? […]
Ack
File src/sort/gen_sort_variants.go:
Patch Set #14, Line 244: func pdqsort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, min int, minKnown bool, limit int {{.ExtraParam}}) {
I would just say here: […]
Done
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
yunhao zhang uploaded patch set #16 to this change.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
1 comment:
Patchset:
Please feel free to submit any feedback, thanks!
Some works need to be solved in other CL:
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
Patch set 16:Run-TryBot +1Code-Review +2
5 comments:
Patchset:
Could you add those references to the CL just below the pdqsort. […]
Still need link references in the code comment.
Patchset:
All in all, looks pretty good now. Just a few minor things, mostly naming/docs.
File src/sort/gen_sort_variants.go:
Patch Set #16, Line 480: func sort2{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int, swaps *int {{.ExtraParam}}) (int, int) {
Maybe we should just be extra clear in the discriptions of these 3 functions that no data is moved - We're just returning indexes.
"sort" is a bit of a misnomer, as the actual sort functions above do move items. Maybe "order"?
// order2 returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a.
Patch Set #16, Line 488: // sort3 swaps a, b, c so that data[a] <= data[b] <= data[c], and returns b.
Call this one "median" or "median3"
// median returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c.
Patch Set #16, Line 496: stores the index into a
medianAdjacent?
// medianAdjancent returns x where x is the median of data[x-1],data[x],data[x+1] where a-1 <= x <= a+1.
(maybe this function isn't really needed? You could just call median directly and use "i-1,i,i+1" instead of "i" at the callsite.)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
yunhao zhang uploaded patch set #17 to this change.
8 files changed, 956 insertions(+), 407 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
5 comments:
Patchset:
Still need link references in the code comment.
Done
Patchset:
All done, thanks!
File src/sort/gen_sort_variants.go:
Patch Set #16, Line 480: func sort2{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int, swaps *int {{.ExtraParam}}) (int, int) {
Maybe we should just be extra clear in the discriptions of these 3 functions that no data is moved - […]
Done
Patch Set #16, Line 488: // sort3 swaps a, b, c so that data[a] <= data[b] <= data[c], and returns b.
Call this one "median" or "median3" […]
Done
Patch Set #16, Line 496: stores the index into a
medianAdjacent? […]
Done. Using this method will be a little faster, maybe related to compiler optimization?
Benchmark in generics version, new use use "i-1,i,i+1" at the callsite.
name old time/op new time/op delta
Random/pdqsort_1024-16 40.5µs ± 2% 40.4µs ± 1% ~ (p=0.888 n=8+9)
Sorted/pdqsort_1024-16 884ns ± 1% 1180ns ± 1% +33.46% (p=0.000 n=10+10)
NearlySorted/pdqsort_1024-16 16.0µs ± 3% 15.6µs ± 2% -2.98% (p=0.001 n=10+10)
Reversed/pdqsort_1024-16 1.24µs ± 1% 1.53µs ± 1% +23.02% (p=0.000 n=9+10)
NearlyReversed/pdqsort_1024-16 19.0µs ± 2% 18.6µs ± 3% -1.77% (p=0.035 n=10+10)
Mod8/pdqsort_1024-16 6.12µs ± 3% 5.61µs ± 2% -8.27% (p=0.000 n=10+10)
AllEqual/pdqsort_1024-16 899ns ± 1% 1192ns ± 1% +32.56% (p=0.000 n=9+10)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
yunhao zhang uploaded patch set #18 to this change.
sort: use pdqsort
- Across all benchmarks, pdqsort is never significantly slower than the previous algorithm.
- In common patterns, pdqsort is often faster (i.e. 10x faster in sorted slices).
The pdqsort is described at https://arxiv.org/pdf/2106.05123.pdf
This CL is inspired by both C++ implementation and Rust implementation.
- C++ implementation: https://github.com/orlp/pdqsort
- Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/
Fixes #50154
name old time/op new time/op delta
8 files changed, 971 insertions(+), 410 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
1 comment:
Patchset:
Patchset18 fixes a bug for exp/slices and adds function-suffix for each function's comment.
For the float64 slice, we can not say that `!(a<b)` is always equal to `(a>=b)`.
Bad case:
var x float64 = -100
println(!(math.NaN() < x)) // true
println(math.NaN() >= x) // false
All tests passed in both exp/slices and src/sort.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall.
Patch set 18:Run-TryBot +1
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall.
1 comment:
Patchset:
Can you rebase this CL to HEAD? Thanks.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
1 comment:
Patchset:
Can you rebase this CL to HEAD? Thanks.
Hi, looks like this CL is up to date with master already(from the web UI)?
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor.
Patch set 18:Code-Review +1Trust +1
1 comment:
Patchset:
Hi, looks like this CL is up to date with master already(from the web UI)?
Please try again, Yunhao.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Emmanuel Odeke.
1 comment:
Patchset:
Please try again, Yunhao.
In that case I'm not sure why we are seeing trybot failures on 386 systems.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Emmanuel Odeke.
Patch set 19:Run-TryBot +1Code-Review +2
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Emmanuel Odeke.
1 comment:
Patchset:
Hm, that failure does look kinda real.
How about adding test code to the end of pdqsort (or a wrapper for it), which just double-checks that !Less(i, i-1) for the array and panics if it is bad? Then we can run trybots and see if that check fails anywhere.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Emmanuel Odeke.
yunhao zhang uploaded patch set #20 to this change.
8 files changed, 1,004 insertions(+), 414 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Emmanuel Odeke.
1 comment:
Patchset:
Hm, that failure does look kinda real. […]
Good idea! Done, and I found that `TestNonDeterministicComparison` will makes the temp test fail, so it's disabled for now. Please run trybots again, thanks!
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Emmanuel Odeke.
Patch set 20:Run-TryBot +1Code-Review +2
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Emmanuel Odeke.
yunhao zhang uploaded patch set #21 to this change.
A src/sort/bits.go
M src/sort/example_multi_test.go
M src/sort/export_test.go
M src/sort/gen_sort_variants.go
M src/sort/slice.go
M src/sort/sort.go
M src/sort/sort_test.go
M src/sort/zsortfunc.go
M src/sort/zsortinterface.go
9 files changed, 1,064 insertions(+), 414 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Emmanuel Odeke.
1 comment:
Patchset:
Good idea! Done, and I found that `TestNonDeterministicComparison` will makes the temp test fail, so […]
For this case, I think it may be a dependency error (but not sure why it only happens on 386 systems).
Because we got `TryBots+1` at the Patchset 8, the biggest difference between the two versions is that the previous one avoids a dependency on the `math/bits` package.
Patchset 21 adds a copy of `bits.Len`. It would be nice if we could run the try bot again.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Emmanuel Odeke.
Patch set 21:Run-TryBot +1
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Emmanuel Odeke.
Patch set 21:Run-TryBot +1
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Emmanuel Odeke.
yunhao zhang uploaded patch set #23 to this change.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Emmanuel Odeke.
1 comment:
Patchset:
I have tried to use this test wrapper in exp/slices, all the tests passed. Not sure why this test failed, and I'm trying to install a Linux-386 system using QEMU to debug this error, but still a work in progress.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Emmanuel Odeke.
1 comment:
Patchset:
A quick note, please don't close the attached issue before this is also ported to exp/slices
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Emmanuel Odeke.
Patch set 23:Code-Review +2
1 comment:
Patchset:
I have tried to use this test wrapper in exp/slices, all the tests passed. […]
I can reproduce the problem on linux/amd64 with
GOARCH=386 GOHOSTARCH=386 ./make.bash
Not sure why yet. Something definitely fishy. If I just add, at the start of pdqsort, this code:
if true { heapSort{{.FuncSuffix}}(data, a, b); return }
I get an error at the same place (building toolchain3) but with a very different symptom. Which argues that there's a bug somewhere else that this CL is somehow tripping over. No idea yet what that might be:
$ GOARCH=386 GOHOSTARCH=386 ./make.bash
Building Go cmd/dist using /usr/local/google/home/khr/go1.17. (go1.17 linux/amd64)
Building Go toolchain1 using /usr/local/google/home/khr/go1.17.
Building Go bootstrap cmd/go (go_bootstrap) using Go toolchain1.
Building Go toolchain2 using go_bootstrap and Go toolchain1.
Building Go toolchain3 using go_bootstrap and Go toolchain2.
go tool compile: signal: trace/breakpoint trap
minpc= 0x8049000 min= 0x8049000 maxpc= 0x88a3bdb max= 0x8049000
fatal error: minpc or maxpc invalid
runtime: panic before malloc heap initialized
runtime stack:
fatal: morestack on g0
go tool dist: FAILED: /usr/local/google/home/khr/sandbox/tmp8/pkg/tool/linux_386/go_bootstrap install -a -i cmd/asm cmd/cgo cmd/compile cmd/link: exit status 1
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Emmanuel Odeke.
1 comment:
Patchset:
I can reproduce the problem on linux/amd64 with […]
I suspect a bug in heapSort. If I apply the patch below to tip, without this CL at all, then I get the same error I list above. My guess is the pdqsort CL is failing only when it decides to fall back to heapsort.
diff --git a/src/sort/slice.go b/src/sort/slice.go
index ba5c2e2f3d..8e35d5517f 100644
--- a/src/sort/slice.go
+++ b/src/sort/slice.go
@@ -17,7 +17,8 @@ func Slice(x any, less func(i, j int) bool) {
rv := reflectValueOf(x)
swap := reflectSwapper(x)
length := rv.Len()
- quickSort_func(lessSwap{less, swap}, 0, length, maxDepth(length))
+ //quickSort_func(lessSwap{less, swap}, 0, length, maxDepth(length))
+ heapSort_func(lessSwap{less, swap}, 0, length)
}
// SliceStable sorts the slice x using the provided less
diff --git a/src/sort/sort.go b/src/sort/sort.go
index aed0eaba30..43cf65b3a1 100644
--- a/src/sort/sort.go
+++ b/src/sort/sort.go
@@ -39,7 +39,8 @@ type Interface interface {
// data.Less and data.Swap. The sort is not guaranteed to be stable.
func Sort(data Interface) {
n := data.Len()
- quickSort(data, 0, n, maxDepth(n))
+ //quickSort(data, 0, n, maxDepth(n))
+ heapSort(data, 0, n)
}
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Russ Cox, Emmanuel Odeke.
1 comment:
Patchset:
Does this implementation fundamentally require making random decisions that produce unstable sort ou […]
FWIW, the rust stdlib implementation of pdqsort is deterministically "random":
https://doc.rust-lang.org/std/primitive.slice.html#current-implementation
I think the original C++ implementation is also, but less sure of that.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Russ Cox, Emmanuel Odeke.
1 comment:
Patchset:
I suspect a bug in heapSort. […]
Replacing heapSort with insertionSort fixes the bug.
Doing a heapSort followed by an insertionSort doesn't fix the bug.
I don't see anything obviously wrong with heapSort. Adding various assertions to the output of it doesn't reveal anything broken.
Current theory - something in the toolchain requires sort stability but doesn't request it. We happened to get lucky with quicksort but not so much with heapSort or pdqsort.
(Using heapSort fails with amd64, no need for arch flags.)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Russ Cox, Emmanuel Odeke.
1 comment:
Patchset:
This fixes the new problem: […]
Yep, I just found it as well. Something about 0-size symbols?
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Russ Cox, Emmanuel Odeke.
yunhao zhang uploaded patch set #24 to this change.
sort: use pdqsort
- Across all benchmarks, pdqsort is never significantly slower than the previous algorithm.
- In common patterns, pdqsort is often faster (i.e. 10x faster in sorted slices).
The pdqsort is described at https://arxiv.org/pdf/2106.05123.pdf
This CL is inspired by both C++ implementation and Rust implementation.
- C++ implementation: https://github.com/orlp/pdqsort
- Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/
For #50154
name old time/op new time/op delta
SearchWrappers-16 72.8ns ± 3% 75.1ns ± 2% +3.25% (p=0.000 n=20+10)
SortString1K-16 85.2µs ± 3% 86.2µs ± 5% ~ (p=0.247 n=19+10)
SortString1K_Slice-16 84.6µs ± 4% 86.1µs ± 4% ~ (p=0.120 n=20+10)
StableString1K-16 112µs ± 5% 112µs ± 5% ~ (p=0.604 n=19+10)
SortInt1K-16 44.8µs ± 3% 43.2µs ± 2% -3.68% (p=0.000 n=20+10)
SortInt1K_Sorted-16 28.2µs ± 3% 3.3µs ± 3% -88.16% (p=0.000 n=19+10)
SortInt1K_Reversed-16 29.4µs ± 3% 4.8µs ± 2% -83.59% (p=0.000 n=20+10)
SortInt1K_Mod8-16 25.1µs ± 2% 20.0µs ± 2% -20.35% (p=0.000 n=18+10)
StableInt1K-16 51.3µs ± 3% 50.9µs ± 2% ~ (p=0.562 n=20+9)
StableInt1K_Slice-16 49.5µs ± 2% 50.7µs ± 4% +2.55% (p=0.009 n=19+10)
SortInt64K-16 4.73ms ± 3% 4.49ms ± 4% -5.08% (p=0.000 n=20+10)
SortInt64K_Slice-16 4.51ms ± 3% 4.35ms ± 1% -3.42% (p=0.000 n=20+8)
StableInt64K-16 4.85ms ± 2% 4.82ms ± 2% ~ (p=0.267 n=20+10)
Sort1e2-16 27.9µs ± 1% 28.1µs ± 2% ~ (p=0.198 n=20+10)
Stable1e2-16 56.6µs ± 2% 55.0µs ± 2% -2.88% (p=0.000 n=20+10)
Sort1e4-16 5.51ms ± 1% 5.36ms ± 1% -2.58% (p=0.000 n=19+9)
Stable1e4-16 17.8ms ± 1% 17.3ms ± 1% -2.40% (p=0.000 n=20+10)
Sort1e6-16 833ms ± 1% 807ms ± 1% -3.02% (p=0.000 n=20+10)
Stable1e6-16 3.49s ± 2% 3.44s ± 1% -1.41% (p=0.001 n=20+10)
Change-Id: Iecded047d237b9330b5a4101001a5fdc2f50646a
---
M src/sort/example_multi_test.go
M src/sort/export_test.go
M src/sort/gen_sort_variants.go
M src/sort/slice.go
M src/sort/sort.go
M src/sort/sort_test.go
M src/sort/zsortfunc.go
M src/sort/zsortinterface.go
8 files changed, 970 insertions(+), 410 deletions(-)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, thepudds, Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Russ Cox, Emmanuel Odeke.
4 comments:
Commit Message:
Patch Set #23, Line 18: Fixes #50154
Please change "Fixes" to "For" since x/exp still needs updating.
Done
Patchset:
A quick note, please don't close the attached issue before this is also ported to exp/slices
Done. Thanks for the reminder.
FWIW, the rust stdlib implementation of pdqsort is deterministically "random": […]
It's a deterministic permutation. The PRNG is seeded only by the length of the input slice, AFAIK, both Rust and C++ implementations use the same strategy.
> That will mean some people can make inputs that "defeat" the randomness, but then the code just falls back to heapsort, right?
Yes, that's right!
Ref from the pdqsort paper
> For all random shuffling a seed was deterministically chosen for each size and input distribution, so all algorithms received the exact same input for the same experiment.
Patchset:
Thanks all, great catch! The new patch removed all test codes, waiting for some fixes from Keith to be merged.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, thepudds, Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor, Russ Cox.
Patch set 24:Run-TryBot +1Code-Review +1
Attention is currently required from: Eli Bendersky, thepudds, Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor, Russ Cox.
Patch set 24:Code-Review +2
1 comment:
Patchset:
It's a deterministic permutation. […]
I think the trybot failure is due to the fact that the xorshift RNG generates different results depending on whether the code is compiled as 32-bit or 64-bit. Something in the bootstrap process is comparing the output of a 32-bit-compiled toolchain and a 64-bit-compiled toolchain.
Just make that RNG the same for both (probably making it 64-bit is easiest), and it should solve this problem.
Attention is currently required from: Eli Bendersky, thepudds, Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor, Russ Cox.
yunhao zhang uploaded patch set #25 to this change.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, thepudds, Eli Bendersky, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Ian Lance Taylor, Russ Cox.
1 comment:
Patchset:
I think the trybot failure is due to the fact that the xorshift RNG generates different results depe […]
Done. XORSHIFT uses uint64 now.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eli Bendersky, thepudds, Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Russ Cox.
Patch set 25:Run-TryBot +1
Attention is currently required from: Eli Bendersky, thepudds, Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Keith Randall, Russ Cox.
Patch set 25:Run-TryBot +1Auto-Submit +1Code-Review +2
Attention is currently required from: Eli Bendersky, thepudds, Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Russ Cox.
Patch set 25:Code-Review +1
Attention is currently required from: thepudds, Eli Bendersky, yunhao zhang, Brad Fitzpatrick, Robert Griesemer, Russ Cox.
Patch set 25:Code-Review +1
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Gopher Robot submitted this change.
Reviewed-on: https://go-review.googlesource.com/c/go/+/371574
Reviewed-by: Emmanuel Odeke <emma...@orijtech.com>
Run-TryBot: Ian Lance Taylor <ia...@golang.org>
Reviewed-by: Keith Randall <k...@golang.org>
Run-TryBot: Keith Randall <k...@golang.org>
Auto-Submit: Keith Randall <k...@golang.org>
Reviewed-by: Keith Randall <k...@google.com>
TryBot-Result: Gopher Robot <go...@golang.org>
Reviewed-by: Eli Bendersky <eli...@google.com>
---
M src/sort/example_multi_test.go
M src/sort/export_test.go
M src/sort/gen_sort_variants.go
M src/sort/slice.go
M src/sort/sort.go
M src/sort/sort_test.go
M src/sort/zsortfunc.go
M src/sort/zsortinterface.go
8 files changed, 979 insertions(+), 410 deletions(-)
diff --git a/src/sort/example_multi_test.go b/src/sort/example_multi_test.go
index de6ec14..93f2d3e 100644
--- a/src/sort/example_multi_test.go
+++ b/src/sort/example_multi_test.go
@@ -126,7 +126,7 @@
// By user: [{dmr C 100} {glenda Go 200} {gri Go 100} {gri Smalltalk 80} {ken C 150} {ken Go 200} {r Go 100} {r C 150} {rsc Go 200}]
// By user,<lines: [{dmr C 100} {glenda Go 200} {gri Smalltalk 80} {gri Go 100} {ken C 150} {ken Go 200} {r Go 100} {r C 150} {rsc Go 200}]
// By user,>lines: [{dmr C 100} {glenda Go 200} {gri Go 100} {gri Smalltalk 80} {ken Go 200} {ken C 150} {r C 150} {r Go 100} {rsc Go 200}]
- // By language,<lines: [{dmr C 100} {ken C 150} {r C 150} {r Go 100} {gri Go 100} {ken Go 200} {glenda Go 200} {rsc Go 200} {gri Smalltalk 80}]
+ // By language,<lines: [{dmr C 100} {ken C 150} {r C 150} {gri Go 100} {r Go 100} {glenda Go 200} {ken Go 200} {rsc Go 200} {gri Smalltalk 80}]
// By language,<lines,user: [{dmr C 100} {ken C 150} {r C 150} {gri Go 100} {r Go 100} {glenda Go 200} {ken Go 200} {rsc Go 200} {gri Smalltalk 80}]
}
diff --git a/src/sort/export_test.go b/src/sort/export_test.go
index b6e30ce..2a3c21f 100644
--- a/src/sort/export_test.go
+++ b/src/sort/export_test.go
@@ -7,3 +7,7 @@
func Heapsort(data Interface) {
heapSort(data, 0, data.Len())
}
+
+func ReverseRange(data Interface, a, b int) {
+ reverseRange(data, a, b)
+}
diff --git a/src/sort/gen_sort_variants.go b/src/sort/gen_sort_variants.go
index 0ff1869..2b671ce 100644
--- a/src/sort/gen_sort_variants.go
+++ b/src/sort/gen_sort_variants.go
@@ -84,6 +84,9 @@
ExtraArg: "",
DataType: "[]E",
Funcs: template.FuncMap{
+ "GreaterOrEqual": func(name, i, j string) string {
+ return fmt.Sprintf("(%s[%s] >= %s[%s])", name, i, name, j)
+ },
"Less": func(name, i, j string) string {
return fmt.Sprintf("(%s[%s] < %s[%s])", name, i, name, j)
},
@@ -103,6 +106,9 @@
ExtraArg: ", less",
DataType: "[]E",
Funcs: template.FuncMap{
+ "GreaterOrEqual": func(name, i, j string) string {
+ return fmt.Sprintf("!less(%s[%s], %s[%s])", name, i, name, j)
+ },
"Less": func(name, i, j string) string {
return fmt.Sprintf("less(%s[%s], %s[%s])", name, i, name, j)
},
@@ -123,6 +129,9 @@
ExtraArg: "",
DataType: "Interface",
Funcs: template.FuncMap{
+ "GreaterOrEqual": func(name, i, j string) string {
+ return fmt.Sprintf("!%s.Less(%s, %s)", name, i, j)
+ },
"Less": func(name, i, j string) string {
return fmt.Sprintf("%s.Less(%s, %s)", name, i, j)
},
@@ -143,6 +152,9 @@
ExtraArg: "",
DataType: "lessSwap",
Funcs: template.FuncMap{
+ "GreaterOrEqual": func(name, i, j string) string {
+ return fmt.Sprintf("!%s.Less(%s, %s)", name, i, j)
+ },
"Less": func(name, i, j string) string {
return fmt.Sprintf("%s.Less(%s, %s)", name, i, j)
},
@@ -210,7 +222,7 @@
if child+1 < hi && {{Less "data" "first+child" "first+child+1"}} {
child++
}
- if !{{Less "data" "first+root" "first+child"}} {
+ if {{GreaterOrEqual "data" "first+root" "first+child"}} {
return
}
{{Swap "data" "first+root" "first+child"}}
@@ -235,24 +247,278 @@
}
}
-// Quicksort, loosely following Bentley and McIlroy,
-// "Engineering a Sort Function" SP&E November 1993.
+// pdqsort{{.FuncSuffix}} sorts data[a:b].
+// The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort.
+// pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf
+// C++ implementation: https://github.com/orlp/pdqsort
+// Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/
+// limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort.
+func pdqsort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, limit int {{.ExtraParam}}) {
+ const maxInsertion = 12
-// medianOfThree{{.FuncSuffix}} moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
-func medianOfThree{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, m1, m0, m2 int {{.ExtraParam}}) {
- // sort 3 elements
- if {{Less "data" "m1" "m0"}} {
- {{Swap "data" "m1" "m0"}}
- }
- // data[m0] <= data[m1]
- if {{Less "data" "m2" "m1"}} {
- {{Swap "data" "m2" "m1"}}
- // data[m0] <= data[m2] && data[m1] < data[m2]
- if {{Less "data" "m1" "m0"}} {
- {{Swap "data" "m1" "m0"}}
+ var (
+ wasBalanced = true // whether the last partitioning was reasonably balanced
+ wasPartitioned = true // whether the slice was already partitioned
+ )
+
+ for {
+ length := b - a
+
+ if length <= maxInsertion {
+ insertionSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
+ return
+ }
+
+ // Fall back to heapsort if too many bad choices were made.
+ if limit == 0 {
+ heapSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
+ return
+ }
+
+ // If the last partitioning was imbalanced, we need to breaking patterns.
+ if !wasBalanced {
+ breakPatterns{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
+ limit--
+ }
+
+ pivot, hint := choosePivot{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
+ if hint == decreasingHint {
+ reverseRange{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
+ // The chosen pivot was pivot-a elements after the start of the array.
+ // After reversing it is pivot-a elements before the end of the array.
+ // The idea came from Rust's implementation.
+ pivot = (b - 1) - (pivot - a)
+ hint = increasingHint
+ }
+
+ // The slice is likely already sorted.
+ if wasBalanced && wasPartitioned && hint == increasingHint {
+ if partialInsertionSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) {
+ return
+ }
+ }
+
+ // Probably the slice contains many duplicate elements, partition the slice into
+ // elements equal to and elements greater than the pivot.
+ if a > 0 && {{GreaterOrEqual "data" "a-1" "pivot"}} {
+ mid := partitionEqual{{.FuncSuffix}}(data, a, b, pivot {{.ExtraArg}})
+ a = mid
+ continue
+ }
+
+ mid, alreadyPartitioned := partition{{.FuncSuffix}}(data, a, b, pivot {{.ExtraArg}})
+ wasPartitioned = alreadyPartitioned
+
+ leftLen, rightLen := mid-a, b-mid
+ balanceThreshold := length / 8
+ if leftLen < rightLen {
+ wasBalanced = leftLen >= balanceThreshold
+ pdqsort{{.FuncSuffix}}(data, a, mid, limit {{.ExtraArg}})
+ a = mid + 1
+ } else {
+ wasBalanced = rightLen >= balanceThreshold
+ pdqsort{{.FuncSuffix}}(data, mid+1, b, limit {{.ExtraArg}})
+ b = mid
}
}
- // now data[m0] <= data[m1] <= data[m2]
+}
+
+// partition{{.FuncSuffix}} does one quicksort partition.
+// Let p = data[pivot]
+// Moves elements in data[a:b] around, so that data[i]<p and data[j]>=p for i<newpivot and j>newpivot.
+// On return, data[newpivot] = p
+func partition{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pivot int {{.ExtraParam}}) (newpivot int, alreadyPartitioned bool) {
+ {{Swap "data" "a" "pivot"}}
+ i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
+
+ for i <= j && {{Less "data" "i" "a"}} {
+ i++
+ }
+ for i <= j && {{GreaterOrEqual "data" "j" "a"}} {
+ j--
+ }
+ if i > j {
+ {{Swap "data" "j" "a"}}
+ return j, true
+ }
+ {{Swap "data" "i" "j"}}
+ i++
+ j--
+
+ for {
+ for i <= j && {{Less "data" "i" "a"}} {
+ i++
+ }
+ for i <= j && {{GreaterOrEqual "data" "j" "a"}} {
+ j--
+ }
+ if i > j {
+ break
+ }
+ {{Swap "data" "i" "j"}}
+ i++
+ j--
+ }
+ {{Swap "data" "j" "a"}}
+ return j, false
+}
+
+// partitionEqual{{.FuncSuffix}} partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot].
+// It assumed that data[a:b] does not contain elements smaller than the data[pivot].
+func partitionEqual{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pivot int {{.ExtraParam}}) (newpivot int) {
+ {{Swap "data" "a" "pivot"}}
+ i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
+
+ for {
+ for i <= j && {{GreaterOrEqual "data" "a" "i"}} {
+ i++
+ }
+ for i <= j && {{Less "data" "a" "j"}} {
+ j--
+ }
+ if i > j {
+ break
+ }
+ {{Swap "data" "i" "j"}}
+ i++
+ j--
+ }
+ return i
+}
+
+// partialInsertionSort{{.FuncSuffix}} partially sorts a slice, returns true if the slice is sorted at the end.
+func partialInsertionSort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) bool {
+ const (
+ maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted
+ shortestShifting = 50 // don't shift any elements on short arrays
+ )
+ i := a + 1
+ for j := 0; j < maxSteps; j++ {
+ for i < b && {{GreaterOrEqual "data" "i" "i-1"}} {
+ i++
+ }
+
+ if i == b {
+ return true
+ }
+
+ if b-a < shortestShifting {
+ return false
+ }
+
+ {{Swap "data" "i" "i-1"}}
+
+ // Shift the smaller one to the left.
+ if i-a >= 2 {
+ for j := i - 1; j >= 1; j-- {
+ if {{GreaterOrEqual "data" "j" "j-1"}} {
+ break
+ }
+ {{Swap "data" "j" "j-1"}}
+ }
+ }
+ // Shift the greater one to the right.
+ if b-i >= 2 {
+ for j := i + 1; j < b; j++ {
+ if {{GreaterOrEqual "data" "j" "j-1"}} {
+ break
+ }
+ {{Swap "data" "j" "j-1"}}
+ }
+ }
+ }
+ return false
+}
+
+// breakPatterns{{.FuncSuffix}} scatters some elements around in an attempt to break some patterns
+// that might cause imbalanced partitions in quicksort.
+func breakPatterns{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) {
+ length := b - a
+ if length >= 8 {
+ random := xorshift(length)
+ modulus := nextPowerOfTwo(length)
+
+ for idx := a + (length/4)*2 - 1; idx <= a + (length/4)*2 + 1; idx++ {
+ other := int(uint(random.Next()) & (modulus - 1))
+ if other >= length {
+ other -= length
+ }
+ {{Swap "data" "idx" "a+other"}}
+ }
+ }
+}
+
+// choosePivot{{.FuncSuffix}} chooses a pivot in data[a:b].
+//
+// [0,8): chooses a static pivot.
+// [8,shortestNinther): uses the simple median-of-three method.
+// [shortestNinther,∞): uses the Tukey ninther method.
+func choosePivot{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) (pivot int, hint sortedHint) {
+ const (
+ shortestNinther = 50
+ maxSwaps = 4 * 3
+ )
+
+ l := b - a
+
+ var (
+ swaps int
+ i = a + l/4*1
+ j = a + l/4*2
+ k = a + l/4*3
+ )
+
+ if l >= 8 {
+ if l >= shortestNinther {
+ // Tukey ninther method, the idea came from Rust's implementation.
+ i = medianAdjacent{{.FuncSuffix}}(data, i, &swaps {{.ExtraArg}})
+ j = medianAdjacent{{.FuncSuffix}}(data, j, &swaps {{.ExtraArg}})
+ k = medianAdjacent{{.FuncSuffix}}(data, k, &swaps {{.ExtraArg}})
+ }
+ // Find the median among i, j, k and stores it into j.
+ j = median{{.FuncSuffix}}(data, i, j, k, &swaps {{.ExtraArg}})
+ }
+
+ switch swaps {
+ case 0:
+ return j, increasingHint
+ case maxSwaps:
+ return j, decreasingHint
+ default:
+ return j, unknownHint
+ }
+}
+
+// order2{{.FuncSuffix}} returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a.
+func order2{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int, swaps *int {{.ExtraParam}}) (int, int) {
+ if {{Less "data" "b" "a"}} {
+ *swaps++
+ return b, a
+ }
+ return a, b
+}
+
+// median{{.FuncSuffix}} returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c.
+func median{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, c int, swaps *int {{.ExtraParam}}) int {
+ a, b = order2{{.FuncSuffix}}(data, a, b, swaps {{.ExtraArg}})
+ b, c = order2{{.FuncSuffix}}(data, b, c, swaps {{.ExtraArg}})
+ a, b = order2{{.FuncSuffix}}(data, a, b, swaps {{.ExtraArg}})
+ return b
+}
+
+// medianAdjacent{{.FuncSuffix}} finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a.
+func medianAdjacent{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a int, swaps *int {{.ExtraParam}}) int {
+ return median{{.FuncSuffix}}(data, a-1, a, a+1, swaps {{.ExtraArg}})
+}
+
+func reverseRange{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) {
+ i := a
+ j := b - 1
+ for i < j {
+ {{Swap "data" "i" "j"}}
+ i++
+ j--
+ }
}
func swapRange{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, n int {{.ExtraParam}}) {
@@ -261,123 +527,6 @@
}
}
-func doPivot{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, lo, hi int {{.ExtraParam}}) (midlo, midhi int) {
- m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
- if hi-lo > 40 {
- // Tukey's "Ninther" median of three medians of three.
- s := (hi - lo) / 8
- medianOfThree{{.FuncSuffix}}(data, lo, lo+s, lo+2*s {{.ExtraArg}})
- medianOfThree{{.FuncSuffix}}(data, m, m-s, m+s {{.ExtraArg}})
- medianOfThree{{.FuncSuffix}}(data, hi-1, hi-1-s, hi-1-2*s {{.ExtraArg}})
- }
- medianOfThree{{.FuncSuffix}}(data, lo, m, hi-1 {{.ExtraArg}})
-
- // Invariants are:
- // data[lo] = pivot (set up by ChoosePivot)
- // data[lo < i < a] < pivot
- // data[a <= i < b] <= pivot
- // data[b <= i < c] unexamined
- // data[c <= i < hi-1] > pivot
- // data[hi-1] >= pivot
- pivot := lo
- a, c := lo+1, hi-1
-
- for ; a < c && {{Less "data" "a" "pivot"}}; a++ {
- }
- b := a
- for {
- for ; b < c && !{{Less "data" "pivot" "b"}}; b++ { // data[b] <= pivot
- }
- for ; b < c && {{Less "data" "pivot" "c-1"}}; c-- { // data[c-1] > pivot
- }
- if b >= c {
- break
- }
- // data[b] > pivot; data[c-1] <= pivot
- {{Swap "data" "b" "c-1"}}
- b++
- c--
- }
- // If hi-c<3 then there are duplicates (by property of median of nine).
- // Let's be a bit more conservative, and set border to 5.
- protect := hi-c < 5
- if !protect && hi-c < (hi-lo)/4 {
- // Lets test some points for equality to pivot
- dups := 0
- if !{{Less "data" "pivot" "hi-1"}} { // data[hi-1] = pivot
- {{Swap "data" "c" "hi-1"}}
- c++
- dups++
- }
- if !{{Less "data" "b-1" "pivot"}} { // data[b-1] = pivot
- b--
- dups++
- }
- // m-lo = (hi-lo)/2 > 6
- // b-lo > (hi-lo)*3/4-1 > 8
- // ==> m < b ==> data[m] <= pivot
- if !{{Less "data" "m" "pivot"}} { // data[m] = pivot
- {{Swap "data" "m" "b-1"}}
- b--
- dups++
- }
- // if at least 2 points are equal to pivot, assume skewed distribution
- protect = dups > 1
- }
- if protect {
- // Protect against a lot of duplicates
- // Add invariant:
- // data[a <= i < b] unexamined
- // data[b <= i < c] = pivot
- for {
- for ; a < b && !{{Less "data" "b-1" "pivot"}}; b-- { // data[b] == pivot
- }
- for ; a < b && {{Less "data" "a" "pivot"}}; a++ { // data[a] < pivot
- }
- if a >= b {
- break
- }
- // data[a] == pivot; data[b-1] < pivot
- {{Swap "data" "a" "b-1"}}
- a++
- b--
- }
- }
- // Swap pivot into middle
- {{Swap "data" "pivot" "b-1"}}
- return b - 1, c
-}
-
-func quickSort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, maxDepth int {{.ExtraParam}}) {
- for b-a > 12 { // Use ShellSort for slices <= 12 elements
- if maxDepth == 0 {
- heapSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
- return
- }
- maxDepth--
- mlo, mhi := doPivot{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
- // Avoiding recursion on the larger subproblem guarantees
- // a stack depth of at most lg(b-a).
- if mlo-a < b-mhi {
- quickSort{{.FuncSuffix}}(data, a, mlo, maxDepth {{.ExtraArg}})
- a = mhi // i.e., quickSort{{.FuncSuffix}}(data, mhi, b)
- } else {
- quickSort{{.FuncSuffix}}(data, mhi, b, maxDepth {{.ExtraArg}})
- b = mlo // i.e., quickSort{{.FuncSuffix}}(data, a, mlo)
- }
- }
- if b-a > 1 {
- // Do ShellSort pass with gap 6
- // It could be written in this simplified form cause b-a <= 12
- for i := a + 6; i < b; i++ {
- if {{Less "data" "i" "i-6"}} {
- {{Swap "data" "i" "i-6"}}
- }
- }
- insertionSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
- }
-}
-
func stable{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, n int {{.ExtraParam}}) {
blockSize := 20 // must be > 0
a, b := 0, blockSize
@@ -457,7 +606,7 @@
j := m
for i < j {
h := int(uint(i+j) >> 1)
- if !{{Less "data" "m" "h"}} {
+ if {{GreaterOrEqual "data" "m" "h"}} {
i = h + 1
} else {
j = h
@@ -484,7 +633,7 @@
for start < r {
c := int(uint(start+r) >> 1)
- if !{{Less "data" "p-c" "c"}} {
+ if {{GreaterOrEqual "data" "p-c" "c"}} {
start = c + 1
} else {
r = c
diff --git a/src/sort/slice.go b/src/sort/slice.go
index ba5c2e2..443182b 100644
--- a/src/sort/slice.go
+++ b/src/sort/slice.go
@@ -4,6 +4,8 @@
package sort
+import "math/bits"
+
// Slice sorts the slice x given the provided less function.
// It panics if x is not a slice.
//
@@ -17,7 +19,8 @@
rv := reflectValueOf(x)
swap := reflectSwapper(x)
length := rv.Len()
- quickSort_func(lessSwap{less, swap}, 0, length, maxDepth(length))
+ limit := bits.Len(uint(length))
+ pdqsort_func(lessSwap{less, swap}, 0, length, limit)
}
// SliceStable sorts the slice x using the provided less
diff --git a/src/sort/sort.go b/src/sort/sort.go
index aed0eab..68e2f0d 100644
--- a/src/sort/sort.go
+++ b/src/sort/sort.go
@@ -7,6 +7,8 @@
// Package sort provides primitives for sorting slices and user-defined collections.
package sort
+import "math/bits"
+
// An implementation of Interface can be sorted by the routines in this package.
// The methods refer to elements of the underlying collection by integer index.
type Interface interface {
@@ -39,17 +41,34 @@
// data.Less and data.Swap. The sort is not guaranteed to be stable.
func Sort(data Interface) {
n := data.Len()
- quickSort(data, 0, n, maxDepth(n))
+ if n <= 1 {
+ return
+ }
+ limit := bits.Len(uint(n))
+ pdqsort(data, 0, n, limit)
}
-// maxDepth returns a threshold at which quicksort should switch
-// to heapsort. It returns 2*ceil(lg(n+1)).
-func maxDepth(n int) int {
- var depth int
- for i := n; i > 0; i >>= 1 {
- depth++
- }
- return depth * 2
+type sortedHint int // hint for pdqsort when choosing the pivot
+
+const (
+ unknownHint sortedHint = iota
+ increasingHint
+ decreasingHint
+)
+
+// xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
+type xorshift uint64
+
+func (r *xorshift) Next() uint64 {
+ *r ^= *r << 13
+ *r ^= *r >> 17
+ *r ^= *r << 5
+ return uint64(*r)
+}
+
+func nextPowerOfTwo(length int) uint {
+ shift := uint(bits.Len(uint(length)))
+ return uint(1 << shift)
}
// lessSwap is a pair of Less and Swap function for use with the
diff --git a/src/sort/sort_test.go b/src/sort/sort_test.go
index bfff352..862bba2 100644
--- a/src/sort/sort_test.go
+++ b/src/sort/sort_test.go
@@ -122,6 +122,37 @@
}
}
+func TestBreakPatterns(t *testing.T) {
+ // Special slice used to trigger breakPatterns.
+ data := make([]int, 30)
+ for i := range data {
+ data[i] = 10
+ }
+ data[(len(data)/4)*1] = 0
+ data[(len(data)/4)*2] = 1
+ data[(len(data)/4)*3] = 2
+ Sort(IntSlice(data))
+}
+
+func TestReverseRange(t *testing.T) {
+ data := []int{1, 2, 3, 4, 5, 6, 7}
+ ReverseRange(IntSlice(data), 0, len(data))
+ for i := len(data) - 1; i > 0; i-- {
+ if data[i] > data[i-1] {
+ t.Fatalf("reverseRange didn't work")
+ }
+ }
+
+ data1 := []int{1, 2, 3, 4, 5, 6, 7}
+ data2 := []int{1, 2, 5, 4, 3, 6, 7}
+ ReverseRange(IntSlice(data1), 2, 5)
+ for i, v := range data1 {
+ if v != data2[i] {
+ t.Fatalf("reverseRange didn't work")
+ }
+ }
+}
+
type nonDeterministicTestingData struct {
r *rand.Rand
}
@@ -220,6 +251,45 @@
}
}
+func BenchmarkSortInt1K_Sorted(b *testing.B) {
+ b.StopTimer()
+ for i := 0; i < b.N; i++ {
+ data := make([]int, 1<<10)
+ for i := 0; i < len(data); i++ {
+ data[i] = i
+ }
+ b.StartTimer()
+ Ints(data)
+ b.StopTimer()
+ }
+}
+
+func BenchmarkSortInt1K_Reversed(b *testing.B) {
+ b.StopTimer()
+ for i := 0; i < b.N; i++ {
+ data := make([]int, 1<<10)
+ for i := 0; i < len(data); i++ {
+ data[i] = len(data) - i
+ }
+ b.StartTimer()
+ Ints(data)
+ b.StopTimer()
+ }
+}
+
+func BenchmarkSortInt1K_Mod8(b *testing.B) {
+ b.StopTimer()
+ for i := 0; i < b.N; i++ {
+ data := make([]int, 1<<10)
+ for i := 0; i < len(data); i++ {
+ data[i] = i % 8
+ }
+ b.StartTimer()
+ Ints(data)
+ b.StopTimer()
+ }
+}
+
func BenchmarkStableInt1K(b *testing.B) {
b.StopTimer()
unsorted := make([]int, 1<<10)
diff --git a/src/sort/zsortfunc.go b/src/sort/zsortfunc.go
index 80c8a77..49b6169 100644
--- a/src/sort/zsortfunc.go
+++ b/src/sort/zsortfunc.go
@@ -52,24 +52,278 @@
}
}
-// Quicksort, loosely following Bentley and McIlroy,
-// "Engineering a Sort Function" SP&E November 1993.
+// pdqsort_func sorts data[a:b].
+// The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort.
+// pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf
+// C++ implementation: https://github.com/orlp/pdqsort
+// Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/
+// limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort.
+func pdqsort_func(data lessSwap, a, b, limit int) {
+ const maxInsertion = 12
-// medianOfThree_func moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
-func medianOfThree_func(data lessSwap, m1, m0, m2 int) {
- // sort 3 elements
- if data.Less(m1, m0) {
- data.Swap(m1, m0)
- }
- // data[m0] <= data[m1]
- if data.Less(m2, m1) {
- data.Swap(m2, m1)
- // data[m0] <= data[m2] && data[m1] < data[m2]
- if data.Less(m1, m0) {
- data.Swap(m1, m0)
+ var (
+ wasBalanced = true // whether the last partitioning was reasonably balanced
+ wasPartitioned = true // whether the slice was already partitioned
+ )
+
+ for {
+ length := b - a
+
+ if length <= maxInsertion {
+ insertionSort_func(data, a, b)
+ return
+ }
+
+ // Fall back to heapsort if too many bad choices were made.
+ if limit == 0 {
+ heapSort_func(data, a, b)
+ return
+ }
+
+ // If the last partitioning was imbalanced, we need to breaking patterns.
+ if !wasBalanced {
+ breakPatterns_func(data, a, b)
+ limit--
+ }
+
+ pivot, hint := choosePivot_func(data, a, b)
+ if hint == decreasingHint {
+ reverseRange_func(data, a, b)
+ // The chosen pivot was pivot-a elements after the start of the array.
+ // After reversing it is pivot-a elements before the end of the array.
+ // The idea came from Rust's implementation.
+ pivot = (b - 1) - (pivot - a)
+ hint = increasingHint
+ }
+
+ // The slice is likely already sorted.
+ if wasBalanced && wasPartitioned && hint == increasingHint {
+ if partialInsertionSort_func(data, a, b) {
+ return
+ }
+ }
+
+ // Probably the slice contains many duplicate elements, partition the slice into
+ // elements equal to and elements greater than the pivot.
+ if a > 0 && !data.Less(a-1, pivot) {
+ mid := partitionEqual_func(data, a, b, pivot)
+ a = mid
+ continue
+ }
+
+ mid, alreadyPartitioned := partition_func(data, a, b, pivot)
+ wasPartitioned = alreadyPartitioned
+
+ leftLen, rightLen := mid-a, b-mid
+ balanceThreshold := length / 8
+ if leftLen < rightLen {
+ wasBalanced = leftLen >= balanceThreshold
+ pdqsort_func(data, a, mid, limit)
+ a = mid + 1
+ } else {
+ wasBalanced = rightLen >= balanceThreshold
+ pdqsort_func(data, mid+1, b, limit)
+ b = mid
}
}
- // now data[m0] <= data[m1] <= data[m2]
+}
+
+// partition_func does one quicksort partition.
+// Let p = data[pivot]
+// Moves elements in data[a:b] around, so that data[i]<p and data[j]>=p for i<newpivot and j>newpivot.
+// On return, data[newpivot] = p
+func partition_func(data lessSwap, a, b, pivot int) (newpivot int, alreadyPartitioned bool) {
+ data.Swap(a, pivot)
+ i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
+
+ for i <= j && data.Less(i, a) {
+ i++
+ }
+ for i <= j && !data.Less(j, a) {
+ j--
+ }
+ if i > j {
+ data.Swap(j, a)
+ return j, true
+ }
+ data.Swap(i, j)
+ i++
+ j--
+
+ for {
+ for i <= j && data.Less(i, a) {
+ i++
+ }
+ for i <= j && !data.Less(j, a) {
+ j--
+ }
+ if i > j {
+ break
+ }
+ data.Swap(i, j)
+ i++
+ j--
+ }
+ data.Swap(j, a)
+ return j, false
+}
+
+// partitionEqual_func partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot].
+// It assumed that data[a:b] does not contain elements smaller than the data[pivot].
+func partitionEqual_func(data lessSwap, a, b, pivot int) (newpivot int) {
+ data.Swap(a, pivot)
+ i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
+
+ for {
+ for i <= j && !data.Less(a, i) {
+ i++
+ }
+ for i <= j && data.Less(a, j) {
+ j--
+ }
+ if i > j {
+ break
+ }
+ data.Swap(i, j)
+ i++
+ j--
+ }
+ return i
+}
+
+// partialInsertionSort_func partially sorts a slice, returns true if the slice is sorted at the end.
+func partialInsertionSort_func(data lessSwap, a, b int) bool {
+ const (
+ maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted
+ shortestShifting = 50 // don't shift any elements on short arrays
+ )
+ i := a + 1
+ for j := 0; j < maxSteps; j++ {
+ for i < b && !data.Less(i, i-1) {
+ i++
+ }
+
+ if i == b {
+ return true
+ }
+
+ if b-a < shortestShifting {
+ return false
+ }
+
+ data.Swap(i, i-1)
+
+ // Shift the smaller one to the left.
+ if i-a >= 2 {
+ for j := i - 1; j >= 1; j-- {
+ if !data.Less(j, j-1) {
+ break
+ }
+ data.Swap(j, j-1)
+ }
+ }
+ // Shift the greater one to the right.
+ if b-i >= 2 {
+ for j := i + 1; j < b; j++ {
+ if !data.Less(j, j-1) {
+ break
+ }
+ data.Swap(j, j-1)
+ }
+ }
+ }
+ return false
+}
+
+// breakPatterns_func scatters some elements around in an attempt to break some patterns
+// that might cause imbalanced partitions in quicksort.
+func breakPatterns_func(data lessSwap, a, b int) {
+ length := b - a
+ if length >= 8 {
+ random := xorshift(length)
+ modulus := nextPowerOfTwo(length)
+
+ for idx := a + (length/4)*2 - 1; idx <= a+(length/4)*2+1; idx++ {
+ other := int(uint(random.Next()) & (modulus - 1))
+ if other >= length {
+ other -= length
+ }
+ data.Swap(idx, a+other)
+ }
+ }
+}
+
+// choosePivot_func chooses a pivot in data[a:b].
+//
+// [0,8): chooses a static pivot.
+// [8,shortestNinther): uses the simple median-of-three method.
+// [shortestNinther,∞): uses the Tukey ninther method.
+func choosePivot_func(data lessSwap, a, b int) (pivot int, hint sortedHint) {
+ const (
+ shortestNinther = 50
+ maxSwaps = 4 * 3
+ )
+
+ l := b - a
+
+ var (
+ swaps int
+ i = a + l/4*1
+ j = a + l/4*2
+ k = a + l/4*3
+ )
+
+ if l >= 8 {
+ if l >= shortestNinther {
+ // Tukey ninther method, the idea came from Rust's implementation.
+ i = medianAdjacent_func(data, i, &swaps)
+ j = medianAdjacent_func(data, j, &swaps)
+ k = medianAdjacent_func(data, k, &swaps)
+ }
+ // Find the median among i, j, k and stores it into j.
+ j = median_func(data, i, j, k, &swaps)
+ }
+
+ switch swaps {
+ case 0:
+ return j, increasingHint
+ case maxSwaps:
+ return j, decreasingHint
+ default:
+ return j, unknownHint
+ }
+}
+
+// order2_func returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a.
+func order2_func(data lessSwap, a, b int, swaps *int) (int, int) {
+ if data.Less(b, a) {
+ *swaps++
+ return b, a
+ }
+ return a, b
+}
+
+// median_func returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c.
+func median_func(data lessSwap, a, b, c int, swaps *int) int {
+ a, b = order2_func(data, a, b, swaps)
+ b, c = order2_func(data, b, c, swaps)
+ a, b = order2_func(data, a, b, swaps)
+ return b
+}
+
+// medianAdjacent_func finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a.
+func medianAdjacent_func(data lessSwap, a int, swaps *int) int {
+ return median_func(data, a-1, a, a+1, swaps)
+}
+
+func reverseRange_func(data lessSwap, a, b int) {
+ i := a
+ j := b - 1
+ for i < j {
+ data.Swap(i, j)
+ i++
+ j--
+ }
}
func swapRange_func(data lessSwap, a, b, n int) {
@@ -78,123 +332,6 @@
}
}
-func doPivot_func(data lessSwap, lo, hi int) (midlo, midhi int) {
- m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
- if hi-lo > 40 {
- // Tukey's "Ninther" median of three medians of three.
- s := (hi - lo) / 8
- medianOfThree_func(data, lo, lo+s, lo+2*s)
- medianOfThree_func(data, m, m-s, m+s)
- medianOfThree_func(data, hi-1, hi-1-s, hi-1-2*s)
- }
- medianOfThree_func(data, lo, m, hi-1)
-
- // Invariants are:
- // data[lo] = pivot (set up by ChoosePivot)
- // data[lo < i < a] < pivot
- // data[a <= i < b] <= pivot
- // data[b <= i < c] unexamined
- // data[c <= i < hi-1] > pivot
- // data[hi-1] >= pivot
- pivot := lo
- a, c := lo+1, hi-1
-
- for ; a < c && data.Less(a, pivot); a++ {
- }
- b := a
- for {
- for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot
- }
- for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
- }
- if b >= c {
- break
- }
- // data[b] > pivot; data[c-1] <= pivot
- data.Swap(b, c-1)
- b++
- c--
- }
- // If hi-c<3 then there are duplicates (by property of median of nine).
- // Let's be a bit more conservative, and set border to 5.
- protect := hi-c < 5
- if !protect && hi-c < (hi-lo)/4 {
- // Lets test some points for equality to pivot
- dups := 0
- if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
- data.Swap(c, hi-1)
- c++
- dups++
- }
- if !data.Less(b-1, pivot) { // data[b-1] = pivot
- b--
- dups++
- }
- // m-lo = (hi-lo)/2 > 6
- // b-lo > (hi-lo)*3/4-1 > 8
- // ==> m < b ==> data[m] <= pivot
- if !data.Less(m, pivot) { // data[m] = pivot
- data.Swap(m, b-1)
- b--
- dups++
- }
- // if at least 2 points are equal to pivot, assume skewed distribution
- protect = dups > 1
- }
- if protect {
- // Protect against a lot of duplicates
- // Add invariant:
- // data[a <= i < b] unexamined
- // data[b <= i < c] = pivot
- for {
- for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
- }
- for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot
- }
- if a >= b {
- break
- }
- // data[a] == pivot; data[b-1] < pivot
- data.Swap(a, b-1)
- a++
- b--
- }
- }
- // Swap pivot into middle
- data.Swap(pivot, b-1)
- return b - 1, c
-}
-
-func quickSort_func(data lessSwap, a, b, maxDepth int) {
- for b-a > 12 { // Use ShellSort for slices <= 12 elements
- if maxDepth == 0 {
- heapSort_func(data, a, b)
- return
- }
- maxDepth--
- mlo, mhi := doPivot_func(data, a, b)
- // Avoiding recursion on the larger subproblem guarantees
- // a stack depth of at most lg(b-a).
- if mlo-a < b-mhi {
- quickSort_func(data, a, mlo, maxDepth)
- a = mhi // i.e., quickSort_func(data, mhi, b)
- } else {
- quickSort_func(data, mhi, b, maxDepth)
- b = mlo // i.e., quickSort_func(data, a, mlo)
- }
- }
- if b-a > 1 {
- // Do ShellSort pass with gap 6
- // It could be written in this simplified form cause b-a <= 12
- for i := a + 6; i < b; i++ {
- if data.Less(i, i-6) {
- data.Swap(i, i-6)
- }
- }
- insertionSort_func(data, a, b)
- }
-}
-
func stable_func(data lessSwap, n int) {
blockSize := 20 // must be > 0
a, b := 0, blockSize
diff --git a/src/sort/zsortinterface.go b/src/sort/zsortinterface.go
index e0d7093..51fa503 100644
--- a/src/sort/zsortinterface.go
+++ b/src/sort/zsortinterface.go
@@ -52,24 +52,278 @@
}
}
-// Quicksort, loosely following Bentley and McIlroy,
-// "Engineering a Sort Function" SP&E November 1993.
+// pdqsort sorts data[a:b].
+// The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort.
+// pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf
+// C++ implementation: https://github.com/orlp/pdqsort
+// Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/
+// limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort.
+func pdqsort(data Interface, a, b, limit int) {
+ const maxInsertion = 12
-// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
-func medianOfThree(data Interface, m1, m0, m2 int) {
- // sort 3 elements
- if data.Less(m1, m0) {
- data.Swap(m1, m0)
- }
- // data[m0] <= data[m1]
- if data.Less(m2, m1) {
- data.Swap(m2, m1)
- // data[m0] <= data[m2] && data[m1] < data[m2]
- if data.Less(m1, m0) {
- data.Swap(m1, m0)
+ var (
+ wasBalanced = true // whether the last partitioning was reasonably balanced
+ wasPartitioned = true // whether the slice was already partitioned
+ )
+
+ for {
+ length := b - a
+
+ if length <= maxInsertion {
+ insertionSort(data, a, b)
+ return
+ }
+
+ // Fall back to heapsort if too many bad choices were made.
+ if limit == 0 {
+ heapSort(data, a, b)
+ return
+ }
+
+ // If the last partitioning was imbalanced, we need to breaking patterns.
+ if !wasBalanced {
+ breakPatterns(data, a, b)
+ limit--
+ }
+
+ pivot, hint := choosePivot(data, a, b)
+ if hint == decreasingHint {
+ reverseRange(data, a, b)
+ // The chosen pivot was pivot-a elements after the start of the array.
+ // After reversing it is pivot-a elements before the end of the array.
+ // The idea came from Rust's implementation.
+ pivot = (b - 1) - (pivot - a)
+ hint = increasingHint
+ }
+
+ // The slice is likely already sorted.
+ if wasBalanced && wasPartitioned && hint == increasingHint {
+ if partialInsertionSort(data, a, b) {
+ return
+ }
+ }
+
+ // Probably the slice contains many duplicate elements, partition the slice into
+ // elements equal to and elements greater than the pivot.
+ if a > 0 && !data.Less(a-1, pivot) {
+ mid := partitionEqual(data, a, b, pivot)
+ a = mid
+ continue
+ }
+
+ mid, alreadyPartitioned := partition(data, a, b, pivot)
+ wasPartitioned = alreadyPartitioned
+
+ leftLen, rightLen := mid-a, b-mid
+ balanceThreshold := length / 8
+ if leftLen < rightLen {
+ wasBalanced = leftLen >= balanceThreshold
+ pdqsort(data, a, mid, limit)
+ a = mid + 1
+ } else {
+ wasBalanced = rightLen >= balanceThreshold
+ pdqsort(data, mid+1, b, limit)
+ b = mid
}
}
- // now data[m0] <= data[m1] <= data[m2]
+}
+
+// partition does one quicksort partition.
+// Let p = data[pivot]
+// Moves elements in data[a:b] around, so that data[i]<p and data[j]>=p for i<newpivot and j>newpivot.
+// On return, data[newpivot] = p
+func partition(data Interface, a, b, pivot int) (newpivot int, alreadyPartitioned bool) {
+ data.Swap(a, pivot)
+ i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
+
+ for i <= j && data.Less(i, a) {
+ i++
+ }
+ for i <= j && !data.Less(j, a) {
+ j--
+ }
+ if i > j {
+ data.Swap(j, a)
+ return j, true
+ }
+ data.Swap(i, j)
+ i++
+ j--
+
+ for {
+ for i <= j && data.Less(i, a) {
+ i++
+ }
+ for i <= j && !data.Less(j, a) {
+ j--
+ }
+ if i > j {
+ break
+ }
+ data.Swap(i, j)
+ i++
+ j--
+ }
+ data.Swap(j, a)
+ return j, false
+}
+
+// partitionEqual partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot].
+// It assumed that data[a:b] does not contain elements smaller than the data[pivot].
+func partitionEqual(data Interface, a, b, pivot int) (newpivot int) {
+ data.Swap(a, pivot)
+ i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
+
+ for {
+ for i <= j && !data.Less(a, i) {
+ i++
+ }
+ for i <= j && data.Less(a, j) {
+ j--
+ }
+ if i > j {
+ break
+ }
+ data.Swap(i, j)
+ i++
+ j--
+ }
+ return i
+}
+
+// partialInsertionSort partially sorts a slice, returns true if the slice is sorted at the end.
+func partialInsertionSort(data Interface, a, b int) bool {
+ const (
+ maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted
+ shortestShifting = 50 // don't shift any elements on short arrays
+ )
+ i := a + 1
+ for j := 0; j < maxSteps; j++ {
+ for i < b && !data.Less(i, i-1) {
+ i++
+ }
+
+ if i == b {
+ return true
+ }
+
+ if b-a < shortestShifting {
+ return false
+ }
+
+ data.Swap(i, i-1)
+
+ // Shift the smaller one to the left.
+ if i-a >= 2 {
+ for j := i - 1; j >= 1; j-- {
+ if !data.Less(j, j-1) {
+ break
+ }
+ data.Swap(j, j-1)
+ }
+ }
+ // Shift the greater one to the right.
+ if b-i >= 2 {
+ for j := i + 1; j < b; j++ {
+ if !data.Less(j, j-1) {
+ break
+ }
+ data.Swap(j, j-1)
+ }
+ }
+ }
+ return false
+}
+
+// breakPatterns scatters some elements around in an attempt to break some patterns
+// that might cause imbalanced partitions in quicksort.
+func breakPatterns(data Interface, a, b int) {
+ length := b - a
+ if length >= 8 {
+ random := xorshift(length)
+ modulus := nextPowerOfTwo(length)
+
+ for idx := a + (length/4)*2 - 1; idx <= a+(length/4)*2+1; idx++ {
+ other := int(uint(random.Next()) & (modulus - 1))
+ if other >= length {
+ other -= length
+ }
+ data.Swap(idx, a+other)
+ }
+ }
+}
+
+// choosePivot chooses a pivot in data[a:b].
+//
+// [0,8): chooses a static pivot.
+// [8,shortestNinther): uses the simple median-of-three method.
+// [shortestNinther,∞): uses the Tukey ninther method.
+func choosePivot(data Interface, a, b int) (pivot int, hint sortedHint) {
+ const (
+ shortestNinther = 50
+ maxSwaps = 4 * 3
+ )
+
+ l := b - a
+
+ var (
+ swaps int
+ i = a + l/4*1
+ j = a + l/4*2
+ k = a + l/4*3
+ )
+
+ if l >= 8 {
+ if l >= shortestNinther {
+ // Tukey ninther method, the idea came from Rust's implementation.
+ i = medianAdjacent(data, i, &swaps)
+ j = medianAdjacent(data, j, &swaps)
+ k = medianAdjacent(data, k, &swaps)
+ }
+ // Find the median among i, j, k and stores it into j.
+ j = median(data, i, j, k, &swaps)
+ }
+
+ switch swaps {
+ case 0:
+ return j, increasingHint
+ case maxSwaps:
+ return j, decreasingHint
+ default:
+ return j, unknownHint
+ }
+}
+
+// order2 returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a.
+func order2(data Interface, a, b int, swaps *int) (int, int) {
+ if data.Less(b, a) {
+ *swaps++
+ return b, a
+ }
+ return a, b
+}
+
+// median returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c.
+func median(data Interface, a, b, c int, swaps *int) int {
+ a, b = order2(data, a, b, swaps)
+ b, c = order2(data, b, c, swaps)
+ a, b = order2(data, a, b, swaps)
+ return b
+}
+
+// medianAdjacent finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a.
+func medianAdjacent(data Interface, a int, swaps *int) int {
+ return median(data, a-1, a, a+1, swaps)
+}
+
+func reverseRange(data Interface, a, b int) {
+ i := a
+ j := b - 1
+ for i < j {
+ data.Swap(i, j)
+ i++
+ j--
+ }
}
func swapRange(data Interface, a, b, n int) {
@@ -78,123 +332,6 @@
}
}
-func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
- m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
- if hi-lo > 40 {
- // Tukey's "Ninther" median of three medians of three.
- s := (hi - lo) / 8
- medianOfThree(data, lo, lo+s, lo+2*s)
- medianOfThree(data, m, m-s, m+s)
- medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
- }
- medianOfThree(data, lo, m, hi-1)
-
- // Invariants are:
- // data[lo] = pivot (set up by ChoosePivot)
- // data[lo < i < a] < pivot
- // data[a <= i < b] <= pivot
- // data[b <= i < c] unexamined
- // data[c <= i < hi-1] > pivot
- // data[hi-1] >= pivot
- pivot := lo
- a, c := lo+1, hi-1
-
- for ; a < c && data.Less(a, pivot); a++ {
- }
- b := a
- for {
- for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot
- }
- for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
- }
- if b >= c {
- break
- }
- // data[b] > pivot; data[c-1] <= pivot
- data.Swap(b, c-1)
- b++
- c--
- }
- // If hi-c<3 then there are duplicates (by property of median of nine).
- // Let's be a bit more conservative, and set border to 5.
- protect := hi-c < 5
- if !protect && hi-c < (hi-lo)/4 {
- // Lets test some points for equality to pivot
- dups := 0
- if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
- data.Swap(c, hi-1)
- c++
- dups++
- }
- if !data.Less(b-1, pivot) { // data[b-1] = pivot
- b--
- dups++
- }
- // m-lo = (hi-lo)/2 > 6
- // b-lo > (hi-lo)*3/4-1 > 8
- // ==> m < b ==> data[m] <= pivot
- if !data.Less(m, pivot) { // data[m] = pivot
- data.Swap(m, b-1)
- b--
- dups++
- }
- // if at least 2 points are equal to pivot, assume skewed distribution
- protect = dups > 1
- }
- if protect {
- // Protect against a lot of duplicates
- // Add invariant:
- // data[a <= i < b] unexamined
- // data[b <= i < c] = pivot
- for {
- for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
- }
- for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot
- }
- if a >= b {
- break
- }
- // data[a] == pivot; data[b-1] < pivot
- data.Swap(a, b-1)
- a++
- b--
- }
- }
- // Swap pivot into middle
- data.Swap(pivot, b-1)
- return b - 1, c
-}
-
-func quickSort(data Interface, a, b, maxDepth int) {
- for b-a > 12 { // Use ShellSort for slices <= 12 elements
- if maxDepth == 0 {
- heapSort(data, a, b)
- return
- }
- maxDepth--
- mlo, mhi := doPivot(data, a, b)
- // Avoiding recursion on the larger subproblem guarantees
- // a stack depth of at most lg(b-a).
- if mlo-a < b-mhi {
- quickSort(data, a, mlo, maxDepth)
- a = mhi // i.e., quickSort(data, mhi, b)
- } else {
- quickSort(data, mhi, b, maxDepth)
- b = mlo // i.e., quickSort(data, a, mlo)
- }
- }
- if b-a > 1 {
- // Do ShellSort pass with gap 6
- // It could be written in this simplified form cause b-a <= 12
- for i := a + 6; i < b; i++ {
- if data.Less(i, i-6) {
- data.Swap(i, i-6)
- }
- }
- insertionSort(data, a, b)
- }
-}
-
func stable(data Interface, n int) {
blockSize := 20 // must be > 0
a, b := 0, blockSize
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
1 comment:
Patchset:
RELNOTE=yes
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: yunhao zhang.
1 comment:
Patchset:
Go's previous implementation of quickSort used ShellSort on short ranges, but pdqsort only uses plai […]
Based on the benchmark in generic version pdqsort(https://github.com/zhangyunhao116/pdqsort, with more benchmark cases), the performance of InsertionSort and ShellSort is almost the same, InsertionSort even has better performance in some cases.
Both C++ and Rust implementations use InsertionSort instead of ShellSort, the performance improvement of ShellSort is more likely to be theoretical than practical.
Perf diff(new use ShellSort):
name old time/op new time/op delta
Random/pdqsort_1024-16 55.2µs ± 4% 56.7µs ± 2% +2.61% (p=0.001 n=10+10)
Sorted/pdqsort_1024-16 5.37µs ± 1% 5.35µs ± 1% ~ (p=0.056 n=10+9)
NearlySorted/pdqsort_1024-16 20.0µs ± 2% 20.4µs ± 1% +2.03% (p=0.000 n=10+10)
Reversed/pdqsort_1024-16 5.40µs ± 1% 5.38µs ± 1% ~ (p=0.197 n=10+10)
NearlyReversed/pdqsort_1024-16 19.3µs ±11% 21.6µs ± 4% +12.10% (p=0.000 n=10+10)
Mod8/pdqsort_1024-16 5.64µs ± 2% 5.58µs ± 0% -0.98% (p=0.023 n=10+9)
AllEqual/pdqsort_1024-16 5.41µs ± 1% 5.38µs ± 1% ~ (p=0.085 n=10+10)
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
1 comment:
Patchset:
Go's previous implementation of quickSort used ShellSort on short ranges, but pdqsort only uses plain insertion sort.
Isn't ShellSort better?
https://cs.opensource.google/go/go/+/refs/tags/go1.18.1:src/sort/sort.go;l=215
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: yunhao zhang.
1 comment:
Patchset:
https://github.com/scandum/crumsort
seems to claim better performance.
To view, visit change 371574. To unsubscribe, or for help writing mail filters, visit settings.