Alberto Donizetti posted comments on this change.
Patch Set 1:
Since you wrote a benchmark, I suggest you add the benchmark results to the commit message. You can use benchcmp or benchstat to get a nicely formatted performance report.
To view, visit this change. To unsubscribe, visit settings.
John Doe posted comments on this change.
Patch Set 1:
(1 comment)
File src/image/jpeg/writer.go:
Isn't performing type switching inside two loops a bit inefficient? https://play.golang.org/p/6LXF82e6U4
To view, visit this change. To unsubscribe, visit settings.
thomas bonfort uploaded this change for review.
jpeg: improve performance when encoding *image.YCbCr The existing implementation falls back to using image.At() for each pixel when encoding an *image.YCbCr which is inefficient and causes many memory allocations. This change makes the jpeg encode directly read Y, Cb, and Cr pixel values. Fixes #18487 Change-Id: Iaf2ebc646997e3e1fffa5335f1b0d642e15bd453 --- M src/image/jpeg/writer.go M src/image/jpeg/writer_test.go 2 files changed, 52 insertions(+), 4 deletions(-)
diff --git a/src/image/jpeg/writer.go b/src/image/jpeg/writer.go
index 91bbde3..7014959 100644
--- a/src/image/jpeg/writer.go
+++ b/src/image/jpeg/writer.go
@@ -416,6 +416,30 @@
}
}
+// yCbCrToYCbCr is a specialized version of toYCbCr for image.YCbCr images.
+func yCbCrToYCbCr(m *image.YCbCr, p image.Point, yBlock, cbBlock, crBlock *block) {
+ b := m.Bounds()
+ xmax := b.Max.X - 1
+ ymax := b.Max.Y - 1
+ for j := 0; j < 8; j++ {
+ sy := p.Y + j
+ if sy > ymax {
+ sy = ymax
+ }
+ for i := 0; i < 8; i++ {
+ sx := p.X + i
+ if sx > xmax {
+ sx = xmax
+ }
+ yi := m.YOffset(sx, sy)
+ ci := m.COffset(sx, sy)
+ yBlock[8*j+i] = int32(m.Y[yi])
+ cbBlock[8*j+i] = int32(m.Cb[ci])
+ crBlock[8*j+i] = int32(m.Cr[ci])
+ }
+ }
+}
+
// rgbaToYCbCr is a specialized version of toYCbCr for image.RGBA images.
func rgbaToYCbCr(m *image.RGBA, p image.Point, yBlock, cbBlock, crBlock *block) {
b := m.Bounds()
@@ -509,16 +533,18 @@
}
}
default:
- rgba, _ := m.(*image.RGBA)
for y := bounds.Min.Y; y < bounds.Max.Y; y += 16 {
for x := bounds.Min.X; x < bounds.Max.X; x += 16 {
for i := 0; i < 4; i++ {
xOff := (i & 1) * 8
yOff := (i & 2) * 4
p := image.Pt(x+xOff, y+yOff)
- if rgba != nil {
- rgbaToYCbCr(rgba, p, &b, &cb[i], &cr[i])
- } else {
+ switch im := m.(type) {
+ case *image.RGBA:
+ rgbaToYCbCr(im, p, &b, &cb[i], &cr[i])
+ case *image.YCbCr:
+ yCbCrToYCbCr(im, p, &b, &cb[i], &cr[i])
+ default:
toYCbCr(m, p, &b, &cb[i], &cr[i])
}
prevDCY = e.writeBlock(&b, 0, prevDCY)
diff --git a/src/image/jpeg/writer_test.go b/src/image/jpeg/writer_test.go
index 3df3cfc..db56ac8 100644
--- a/src/image/jpeg/writer_test.go
+++ b/src/image/jpeg/writer_test.go
@@ -230,3 +230,25 @@
Encode(ioutil.Discard, img, options)
}
}
+
+func BenchmarkEncodeYCbCr(b *testing.B) {
+ b.StopTimer()
+ img := image.NewYCbCr(image.Rect(0, 0, 640, 480), image.YCbCrSubsampleRatio420)
+ bo := img.Bounds()
+ rnd := rand.New(rand.NewSource(123))
+ for y := bo.Min.Y; y < bo.Max.Y; y++ {
+ for x := bo.Min.X; x < bo.Max.X; x++ {
+ cy := img.YOffset(x, y)
+ ci := img.COffset(x, y)
+ img.Y[cy] = uint8(rnd.Intn(256))
+ img.Cb[ci] = uint8(rnd.Intn(256))
+ img.Cr[ci] = uint8(rnd.Intn(256))
+ }
+ }
+ b.SetBytes(640 * 480 * 4)
+ b.StartTimer()
+ options := &Options{Quality: 90}
+ for i := 0; i < b.N; i++ {
+ Encode(ioutil.Discard, img, options)
+ }
+}
To view, visit this change. To unsubscribe, visit settings.
thomas bonfort uploaded patch set #2 to this change.
jpeg: improve performance when encoding *image.YCbCr The existing implementation falls back to using image.At() for each pixel when encoding an *image.YCbCr which is inefficient and causes many memory allocations. This change makes the jpeg encode directly read Y, Cb, and Cr pixel values. benchmark old ns/op new ns/op delta BenchmarkEncodeYCbCr-4 43858245 24515125 -44.10% benchmark old MB/s new MB/s speedup BenchmarkEncodeYCbCr-4 28.02 50.12 1.79x Fixes #18487 Change-Id: Iaf2ebc646997e3e1fffa5335f1b0d642e15bd453 --- M src/image/jpeg/writer.go M src/image/jpeg/writer_test.go 2 files changed, 59 insertions(+), 4 deletions(-)
To view, visit this change. To unsubscribe, visit settings.
thomas bonfort posted comments on this change.
Isn't performing type switching inside two loops a bit inefficient?
Done
To view, visit this change. To unsubscribe, visit settings.
thomas bonfort posted comments on this change.
Patch Set 2:
Not sure how this can be tested... I wrote this one, but it fails given that the ycbcr->rgba->ycbcr conversion happening while saving the rgba version is lossy.
func TestEncodeYCbCr(t *testing.T) {
img := image.NewYCbCr(image.Rect(0, 0, 640, 480), image.YCbCrSubsampleRatio420)
bo := img.Bounds()
rnd := rand.New(rand.NewSource(123))
for y := bo.Min.Y; y < bo.Max.Y; y++ {
for x := bo.Min.X; x < bo.Max.X; x++ {
cy := img.YOffset(x, y)
ci := img.COffset(x, y)
img.Y[cy] = uint8(rnd.Intn(256))
img.Cb[ci] = uint8(rnd.Intn(256))
img.Cr[ci] = uint8(rnd.Intn(256))
}
}
options := &Options{Quality: 90}
var wycbcr bytes.Buffer
Encode(&wycbcr, img, options)imgt := image.NewRGBA(img.Rect) draw.Draw(imgt, img.Rect, img, image.Pt(0, 0), draw.Src) var wrgba bytes.Buffer Encode(&wrgba, imgt, options)
imgycbcr, _, _ := image.Decode(&wycbcr)
imgrgba, _, _ := image.Decode(&wrgba)
i1 := imgrgba.(*image.YCbCr)
i2 := imgycbcr.(*image.YCbCr)
if !reflect.DeepEqual(i1.Y, i2.Y) ||
!reflect.DeepEqual(i1.Cb, i2.Cb) ||
!reflect.DeepEqual(i1.Cr, i2.Cr) {
t.Errorf("rgba and ycbcr jpeg compressions differ")
}
}To view, visit this change. To unsubscribe, visit settings.
Brad Fitzpatrick posted comments on this change.
Patch Set 2:
(1 comment)
File src/image/jpeg/writer.go:
Patch Set #2, Line 550: case 1:
Is this faster than the idiomatic way of writing it?
That is, can you ditch the "it" variable and just do the type switch here?
switch m := m.(type) {
case *image.RGBA:
...?
To view, visit this change. To unsubscribe, visit settings.
thomas bonfort uploaded patch set #3 to this change.
jpeg: improve performance when encoding *image.YCbCr The existing implementation falls back to using image.At() for each pixel when encoding an *image.YCbCr which is inefficient and causes many memory allocations. This change makes the jpeg encode directly read Y, Cb, and Cr pixel values. benchmark old ns/op new ns/op delta BenchmarkEncodeYCbCr-4 43858245 24201148 -44.82% benchmark old MB/s new MB/s speedup BenchmarkEncodeYCbCr-4 28.02 38.08 1.36x Fixes #18487 Change-Id: Iaf2ebc646997e3e1fffa5335f1b0d642e15bd453 --- M src/image/jpeg/writer.go M src/image/jpeg/writer_test.go 2 files changed, 50 insertions(+), 1 deletion(-)
To view, visit this change. To unsubscribe, visit settings.
thomas bonfort posted comments on this change.
Patch Set 3:
(1 comment)
John Doe pointed out that the type switch inside the nested for loops was inefficient. The syntax I came up with in patchset 2 to address that sucked, hopefully the one in patchset 3 is better. Will happily update again if there's a more idiomatic way of handling this (please send a pointer to an example in that case).
To view, visit this change. To unsubscribe, visit settings.
thomas bonfort uploaded patch set #4 to this change.
jpeg: improve performance when encoding *image.YCbCr The existing implementation falls back to using image.At() for each pixel when encoding an *image.YCbCr which is inefficient and causes many memory allocations. This change makes the jpeg encode directly read Y, Cb, and Cr pixel values. benchmark old ns/op new ns/op delta BenchmarkEncodeYCbCr-4 43990846 24201148 -44.99% benchmark old MB/s new MB/s speedup BenchmarkEncodeYCbCr-4 20.95 38.08 1.82x Fixes #18487 Change-Id: Iaf2ebc646997e3e1fffa5335f1b0d642e15bd453 --- M src/image/jpeg/writer.go M src/image/jpeg/writer_test.go 2 files changed, 97 insertions(+), 1 deletion(-)
To view, visit this change. To unsubscribe, visit settings.
thomas bonfort posted comments on this change.
Patch Set 4:
Uploaded patch set 4.
Test added, benchmark score corrected
To view, visit this change. To unsubscribe, visit settings.
Brad Fitzpatrick posted comments on this change.
Patch Set 4:
Okay, I filed https://github.com/golang/go/issues/18492 for the missing compiler optimization then.
To view, visit this change. To unsubscribe, visit settings.
thomas bonfort uploaded patch set #5 to this change.
jpeg: improve performance when encoding *image.YCbCr The existing implementation falls back to using image.At() for each pixel when encoding an *image.YCbCr which is inefficient and causes many memory allocations. This change makes the jpeg encoder directly read Y, Cb, and Cr pixel values. benchmark old ns/op new ns/op delta BenchmarkEncodeYCbCr-4 43990846 24201148 -44.99% benchmark old MB/s new MB/s speedup BenchmarkEncodeYCbCr-4 20.95 38.08 1.82x Fixes #18487 Change-Id: Iaf2ebc646997e3e1fffa5335f1b0d642e15bd453 --- M src/image/jpeg/writer.go M src/image/jpeg/writer_test.go 2 files changed, 85 insertions(+), 1 deletion(-)
To view, visit this change. To unsubscribe, visit settings.
Russ Cox posted comments on this change.
Patch Set 5:
(1 comment)
R=go1.9
File src/image/jpeg/writer.go:
Patch Set #5, Line 536: rgba, _ := m.(*image.RGBA) //perf:avoid type assertion in loop
Can you delete this comment please? There are reasonable non-performance reasons to write the code this way.
To view, visit this change. To unsubscribe, visit settings.
Patch Set #5, Line 7: jpeg: improve performance when encoding *image.YCbCr
should be "image/jpeg: ..."
To view, visit this change. To unsubscribe, visit settings.
thomas bonfort uploaded patch set #6 to this change.
image/jpeg: improve performance when encoding *image.YCbCr The existing implementation falls back to using image.At() for each pixel when encoding an *image.YCbCr which is inefficient and causes many memory allocations. This change makes the jpeg encoder directly read Y, Cb, and Cr pixel values. benchmark old ns/op new ns/op delta BenchmarkEncodeYCbCr-4 43990846 24201148 -44.99% benchmark old MB/s new MB/s speedup BenchmarkEncodeYCbCr-4 20.95 38.08 1.82x Fixes #18487 Change-Id: Iaf2ebc646997e3e1fffa5335f1b0d642e15bd453 --- M src/image/jpeg/writer.go M src/image/jpeg/writer_test.go 2 files changed, 84 insertions(+), 0 deletions(-)
To view, visit this change. To unsubscribe, visit settings.
thomas bonfort posted comments on this change.
Patch Set 6:
(2 comments)
Patch Set #5, Line 7: image/jpeg: improve performance when encoding *image.YCbCr
should be "image/jpeg: ..."
Done
File src/image/jpeg/writer.go:
Patch Set #5, Line 536: rgba, _ := m.(*image.RGBA)
Can you delete this comment please?
Done
To view, visit this change. To unsubscribe, visit settings.
Nigel Tao posted comments on this change.
Patch Set 6:
(8 comments)
Looks good overall, just some style nits.
File src/image/jpeg/writer.go:
Patch Set #6, Line 420: func yCbCrToYCbCr(m *image.YCbCr, p image.Point, yBlock, cbBlock, crBlock *block) {
Move this down to after the rgbaToYCbCr function.
File src/image/jpeg/writer_test.go:
Patch Set #6, Line 211: func BenchmarkEncode(b *testing.B) {
Rename this to BenchmarkEncodeRGBA.
Patch Set #6, Line 234: func TestEncodeYCbCr(t *testing.T) {
Move this up above BenchmarkEncode, so that the two Benchmark functions are next to each other.
Patch Set #6, Line 235: img := image.NewRGBA(image.Rect(0, 0, 640, 480))
It might be easier if you declare bo := image.Rect(0, 0, 640, 480) above this line, and re-use it here and below.
Patch Set #6, Line 237: //must use 444 here so we don't loose precision in YCbCr compared to RGBA
s/loose/lose/
Also, comments in this package are usually complete sentences, so start with a capital letter (and a space after the slash), and end with a full stop.
// Must use 44 here etc RGBA.
Ditto below.
Patch Set #6, Line 238: imgycbcr
It might be more consistent if you rename img and imgycbcr to imgRGBA and imgYCbCr. Below, the bytes.Buffer variables could be bufRGBA and bufYCbCr.
Add a comma and a new line before the }.
Patch Set #6, Line 262: Encode(&rgba, img, options)
s/options/nil/ is simpler. Ditto below.
To view, visit this change. To unsubscribe, visit settings.
thomas bonfort uploaded patch set #7 to this change.
image/jpeg: improve performance when encoding *image.YCbCr The existing implementation falls back to using image.At() for each pixel when encoding an *image.YCbCr which is inefficient and causes many memory allocations. This change makes the jpeg encoder directly read Y, Cb, and Cr pixel values. benchmark old ns/op new ns/op delta BenchmarkEncodeYCbCr-4 43990846 24201148 -44.99% benchmark old MB/s new MB/s speedup BenchmarkEncodeYCbCr-4 20.95 38.08 1.82x Fixes #18487 Change-Id: Iaf2ebc646997e3e1fffa5335f1b0d642e15bd453 --- M src/image/jpeg/writer.go M src/image/jpeg/writer_test.go 2 files changed, 84 insertions(+), 1 deletion(-)
To view, visit this change. To unsubscribe, visit settings.
thomas bonfort posted comments on this change.
Thanks for the review, all comments addressed in patchset 7. I have left the option to set quality to 90 in the benchmark to be consistent with the previous rgba benchmark.
Patch Set #6, Line 420: func yCbCrToYCbCr(m *image.YCbCr, p image.Point, yBlock, cbBlock, crBlock *block) {
Move this down to after the rgbaToYCbCr function.
Patch Set #6, Line 211: func BenchmarkEncode(b *testing.B) {
Rename this to BenchmarkEncodeRGBA.
Done
Patch Set #6, Line 234: func TestEncodeYCbCr(t *testing.T) {
Move this up above BenchmarkEncode, so that the two Benchmark functions are
Done
Patch Set #6, Line 235: img := image.NewRGBA(image.Rect(0, 0, 640, 480))
It might be easier if you declare
Done
Patch Set #6, Line 237: //must use 444 here so we don't loose precision in YCbCr compared to RGBA
s/loose/lose/
Done
Patch Set #6, Line 238: imgycbcr
It might be more consistent if you rename img and imgycbcr to imgRGBA and i
Done
Add a comma and a new line before the }.
Done
Patch Set #6, Line 262: Encode(&rgba, img, options)
s/options/nil/ is simpler. Ditto below.
Done
To view, visit this change. To unsubscribe, visit settings.
Nigel Tao posted comments on this change.
Patch Set 7: Code-Review+2
Looks good, thanks.
I'll submit this after Go 1.8 is released.
Brad Fitzpatrick posted comments on this change.
Patch set 8:Run-TryBot +1
To view, visit change 34773. To unsubscribe, visit settings.
Gobot Gobot posted comments on this change.
Patch set 8:
TryBots beginning. Status page: http://farmer.golang.org/try?commit=056cd965
Build is still in progress... This change failed on darwin-amd64-10_11: See https://storage.googleapis.com/go-build-log/056cd965/darwin-amd64-10_11_9fa06a0c.log
Consult https://build.golang.org/ to see whether it's a new failure. Other builds still in progress; subsequent failure notices suppressed until final report.
Gobot Gobot posted comments on this change.
Patch set 9:TryBot-Result +1
TryBots are happy.
Brad Fitzpatrick merged this change.
image/jpeg: improve performance when encoding *image.YCbCr The existing implementation falls back to using image.At() for each pixel when encoding an *image.YCbCr which is inefficient and causes many memory allocations. This change makes the jpeg encoder directly read Y, Cb, and Cr pixel values. benchmark old ns/op new ns/op delta BenchmarkEncodeYCbCr-4 43990846 24201148 -44.99% benchmark old MB/s new MB/s speedup BenchmarkEncodeYCbCr-4 20.95 38.08 1.82x Fixes #18487 Change-Id: Iaf2ebc646997e3e1fffa5335f1b0d642e15bd453 Reviewed-on: https://go-review.googlesource.com/34773 Run-TryBot: Brad Fitzpatrick <brad...@golang.org> TryBot-Result: Gobot Gobot <go...@golang.org> Reviewed-by: Nigel Tao <nige...@golang.org> --- M src/image/jpeg/writer.go M src/image/jpeg/writer_test.go 2 files changed, 84 insertions(+), 1 deletion(-)
diff --git a/src/image/jpeg/writer.go b/src/image/jpeg/writer.go
index 91bbde3..ce7728b 100644
--- a/src/image/jpeg/writer.go
+++ b/src/image/jpeg/writer.go
@@ -441,6 +441,30 @@
}
}
+// yCbCrToYCbCr is a specialized version of toYCbCr for image.YCbCr images.
+func yCbCrToYCbCr(m *image.YCbCr, p image.Point, yBlock, cbBlock, crBlock *block) {
+ b := m.Bounds()
+ xmax := b.Max.X - 1
+ ymax := b.Max.Y - 1
+ for j := 0; j < 8; j++ {
+ sy := p.Y + j
+ if sy > ymax {
+ sy = ymax
+ }
+ for i := 0; i < 8; i++ {
+ sx := p.X + i
+ if sx > xmax {
+ sx = xmax
+ }
+ yi := m.YOffset(sx, sy)
+ ci := m.COffset(sx, sy)
+ yBlock[8*j+i] = int32(m.Y[yi])
+ cbBlock[8*j+i] = int32(m.Cb[ci])
+ crBlock[8*j+i] = int32(m.Cr[ci])
+ }
+ }
+}
+
// scale scales the 16x16 region represented by the 4 src blocks to the 8x8
// dst block.
func scale(dst *block, src *[4]block) {
@@ -510,6 +534,7 @@
}
default:
rgba, _ := m.(*image.RGBA)
+ ycbcr, _ := m.(*image.YCbCr)
for y := bounds.Min.Y; y < bounds.Max.Y; y += 16 {
for x := bounds.Min.X; x < bounds.Max.X; x += 16 {
for i := 0; i < 4; i++ {
@@ -518,6 +543,8 @@
p := image.Pt(x+xOff, y+yOff)
if rgba != nil {
rgbaToYCbCr(rgba, p, &b, &cb[i], &cr[i])
+ } else if ycbcr != nil {
+ yCbCrToYCbCr(ycbcr, p, &b, &cb[i], &cr[i])
} else {
toYCbCr(m, p, &b, &cb[i], &cr[i])
}
diff --git a/src/image/jpeg/writer_test.go b/src/image/jpeg/writer_test.go
index 3df3cfc..a6c0561 100644
--- a/src/image/jpeg/writer_test.go
+++ b/src/image/jpeg/writer_test.go
@@ -208,7 +208,41 @@
return sum / n
}
-func BenchmarkEncode(b *testing.B) {
+func TestEncodeYCbCr(t *testing.T) {
+ bo := image.Rect(0, 0, 640, 480)
+ imgRGBA := image.NewRGBA(bo)
+ // Must use 444 subsampling to avoid lossy RGBA to YCbCr conversion.
+ imgYCbCr := image.NewYCbCr(bo, image.YCbCrSubsampleRatio444)
+ rnd := rand.New(rand.NewSource(123))
+ // Create identical rgba and ycbcr images.
+ for y := bo.Min.Y; y < bo.Max.Y; y++ {
+ for x := bo.Min.X; x < bo.Max.X; x++ {
+ col := color.RGBA{
+ uint8(rnd.Intn(256)),
+ uint8(rnd.Intn(256)),
+ uint8(rnd.Intn(256)),
+ 255,
+ }
+ imgRGBA.SetRGBA(x, y, col)
+ yo := imgYCbCr.YOffset(x, y)
+ co := imgYCbCr.COffset(x, y)
+ cy, ccr, ccb := color.RGBToYCbCr(col.R, col.G, col.B)
+ imgYCbCr.Y[yo] = cy
+ imgYCbCr.Cb[co] = ccr
+ imgYCbCr.Cr[co] = ccb
+ }
+ }
+
+ // Now check that both images are identical after an encode.
+ var bufRGBA, bufYCbCr bytes.Buffer
+ Encode(&bufRGBA, imgRGBA, nil)
+ Encode(&bufYCbCr, imgYCbCr, nil)
+ if !bytes.Equal(bufRGBA.Bytes(), bufYCbCr.Bytes()) {
+ t.Errorf("RGBA and YCbCr encoded bytes differ")
+ }
+}
+
+func BenchmarkEncodeRGBA(b *testing.B) {
b.StopTimer()
img := image.NewRGBA(image.Rect(0, 0, 640, 480))
bo := img.Bounds()
@@ -230,3 +264,25 @@
Encode(ioutil.Discard, img, options)
}
}
+
+func BenchmarkEncodeYCbCr(b *testing.B) {
+ b.StopTimer()
+ img := image.NewYCbCr(image.Rect(0, 0, 640, 480), image.YCbCrSubsampleRatio420)
+ bo := img.Bounds()
+ rnd := rand.New(rand.NewSource(123))
+ for y := bo.Min.Y; y < bo.Max.Y; y++ {
+ for x := bo.Min.X; x < bo.Max.X; x++ {
+ cy := img.YOffset(x, y)
+ ci := img.COffset(x, y)
+ img.Y[cy] = uint8(rnd.Intn(256))
+ img.Cb[ci] = uint8(rnd.Intn(256))
+ img.Cr[ci] = uint8(rnd.Intn(256))
+ }
+ }
+ b.SetBytes(640 * 480 * 3)
+ b.StartTimer()
+ options := &Options{Quality: 90}
+ for i := 0; i < b.N; i++ {
+ Encode(ioutil.Discard, img, options)
+ }
+}
To view, visit change 34773. To unsubscribe, visit settings.
TryBots beginning. Status page: http://farmer.golang.org/try?commit=7f3c9b09
Brad Fitzpatrick posted comments on this change.
Patch set 9:Run-TryBot +1