image/jpeg: implement a more forgiving restart marker parser
inspired by Lua libjpeg to fix bad RST marker issues
Improves handling of restart (RST) markers in JPEG data. Particularly addressing issues
around spurious data before a marker, but also improving robust handling of unexpected
markers, following the philosophy used in Lua libjpeg. Also adds support for padding
bytes before a marker.
This is mostly a revamp of changes Ibbbdb7f4c2217c46dc2789d5799fd0226babdc08 and Ia9cb7bdefbeea44531f34650d49a173b7aeb3355.
Fixes #40130
diff --git a/src/image/jpeg/reader_test.go b/src/image/jpeg/reader_test.go
index cdac2dd..a675f75 100644
--- a/src/image/jpeg/reader_test.go
+++ b/src/image/jpeg/reader_test.go
@@ -504,6 +504,21 @@
}
}
+func TestBadRestartMarker(t *testing.T) {
+ // Image sourced from: https://github.com/photoprism/photoprism/issues/1673
+ // Referenced here: https://go-review.googlesource.com/c/go/+/260837
+
+ f, err := os.Open("../testdata/jpeg-bad-rst-marker-001.jpeg")
+ if err != nil {
+ t.Fatalf("Open: %v", err)
+ }
+
+ defer f.Close()
+ if _, err = Decode(f); err != nil {
+ t.Fatalf("Decode: %v", err)
+ }
+}
+
func benchmarkDecode(b *testing.B, filename string) {
data, err := os.ReadFile(filename)
if err != nil {
diff --git a/src/image/jpeg/scan.go b/src/image/jpeg/scan.go
index 94f3d3a..31a63b0 100644
--- a/src/image/jpeg/scan.go
+++ b/src/image/jpeg/scan.go
@@ -305,37 +305,80 @@
} // for i
mcu++
if d.ri > 0 && mcu%d.ri == 0 && mcu < mxx*myy {
- // A more sophisticated decoder could use RST[0-7] markers to resynchronize from corrupt input,
- // but this one assumes well-formed input, and hence the restart marker follows immediately.
- if err := d.readFull(d.tmp[:2]); err != nil {
- return err
- }
+ var err error
- // Section F.1.2.3 says that "Byte alignment of markers is
- // achieved by padding incomplete bytes with 1-bits. If padding
- // with 1-bits creates a X’FF’ value, a zero byte is stuffed
- // before adding the marker."
- //
- // Seeing "\xff\x00" here is not spec compliant, as we are not
- // expecting an *incomplete* byte (that needed padding). Still,
- // some real world encoders (see golang.org/issue/28717) insert
- // it, so we accept it and re-try the 2 byte read.
- //
- // libjpeg issues a warning (but not an error) for this:
- // https://github.com/LuaDist/libjpeg/blob/6c0fcb8ddee365e7abc4d332662b06900612e923/jdmarker.c#L1041-L1046
- if d.tmp[0] == 0xff && d.tmp[1] == 0x00 {
- if err := d.readFull(d.tmp[:2]); err != nil {
+ // Find the next restart marker. It should be the first byte,
+ // but that's not always the case. This loop scans through the
+ // bytes until the right marker is found, or at least an acceptable
+ // marker is found (leaving the rest up to the entropy decoder)
+ // Based on the logic here: https://github.com/LuaDist/libjpeg/blob/6c0fcb8ddee365e7abc4d332662b06900612e923/jdmarker.c#L1293-L1340
+ for {
+ // Read the first byte to decide what to do next.
+ d.tmp[0], err = d.readByte()
+ if err != nil {
return err
}
+
+ // Discard non 0xFF (if we're at a marker there shouldn't be any).
+ for d.tmp[0] != 0xff {
+ d.tmp[0], err = d.readByte()
+ if err != nil {
+ return err
+ }
+ }
+
+ // Discard duplicate 0xFF's (padding bytes).
+ for d.tmp[0] == 0xff {
+ d.tmp[0], err = d.readByte()
+ if err != nil {
+ return err
+ }
+ }
+
+ // Check for a stuffed zero and start over if we find one.
+ // Section F.1.2.3 says that "Byte alignment of markers is
+ // achieved by padding incomplete bytes with 1-bits. If padding
+ // with 1-bits creates a X’FF’ value, a zero byte is stuffed
+ // before adding the marker."
+ //
+ // Seeing "\xff\x00" here is not spec compliant, as we are not
+ // expecting an *incomplete* byte (that needed padding). Still,
+ // some real world encoders (see golang.org/issue/28717) insert
+ // it, so we accept it and re-try the 2 byte read.
+ //
+ // libjpeg issues a warning (but not an error) for this:
+ // https://github.com/LuaDist/libjpeg/blob/6c0fcb8ddee365e7abc4d332662b06900612e923/jdmarker.c#L1041-L1046
+ if d.tmp[0] == 0x00 {
+ continue
+ }
+
+ // If it's the correct restart marker, increment and break
+ // out to reset Huffman.
+ if d.tmp[0] == expectedRST {
+ expectedRST++
+ if expectedRST == rst7Marker+1 {
+ expectedRST = rst0Marker
+ }
+ break
+ }
+
+ // If the restart marker is within 2 of what is expected,
+ // keep scanning until another marker is found.
+ if d.abs(int(d.tmp[0])-int(expectedRST)) < 3 {
+ continue
+ }
+
+ // If the marker is valid, but not a restart marker, switch
+ // back to the entropy decoder and let it handle the marker.
+ if (d.tmp[0] >= sof0Marker && d.tmp[0] < rst0Marker) || (d.tmp[0] > rst7Marker) {
+ break
+ }
+
+ // If this code is reached, the marker is invalid, most
+ // likely < 0xC0/sof0Marker random data. Continue the process
+ // and strip data to search for something valid.
}
- if d.tmp[0] != 0xff || d.tmp[1] != expectedRST {
- return FormatError("bad RST marker")
- }
- expectedRST++
- if expectedRST == rst7Marker+1 {
- expectedRST = rst0Marker
- }
// Reset the Huffman decoder.
d.bits = bits{}
// Reset the DC components, as per section F.2.1.3.1.
@@ -521,3 +564,13 @@
}
return nil
}
+
+// Abs returns the absolute value of a given int.
+// This is a quick function to save a bunch of additional includes to cast
+// bytes to float64 for math.Abs.
+func (d *decoder) abs(x int) int {
+ if x < 0 {
+ return -x
+ }
+ return x
+}
diff --git a/src/image/testdata/jpeg-bad-rst-marker-001.jpeg b/src/image/testdata/jpeg-bad-rst-marker-001.jpeg
new file mode 100644
index 0000000..a0305fb
--- /dev/null
+++ b/src/image/testdata/jpeg-bad-rst-marker-001.jpeg
Binary files differ
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
I know the test image is very big, but I currently have no idea how to generate a test file that has the same behavior.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
I agree that the test image is too large for the repo. We need a different approach.
This is mostly a revamp of changes Ibbbdb7f4c2217c46dc2789d5799fd0226babdc08 and Ia9cb7bdefbeea44531f34650d49a173b7aeb3355.Write this as
This is mostly a revamp of CL 382754 and CL 260837.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Jeroen BobbeldijkI agree that the test image is too large for the repo. We need a different approach.
I sadly don't know enough about JPEG to purposely make a working image that has the same issue.
This is mostly a revamp of changes Ibbbdb7f4c2217c46dc2789d5799fd0226babdc08 and Ia9cb7bdefbeea44531f34650d49a173b7aeb3355.Write this as
This is mostly a revamp of CL 382754 and CL 260837.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Jeroen BobbeldijkI agree that the test image is too large for the repo. We need a different approach.
I sadly don't know enough about JPEG to purposely make a working image that has the same issue.
Hey all, sorry for chiming in randomly, but could you try the fix with this image? I also get the bad RST marker error with it. It is much smaller: https://drive.google.com/file/d/1jTEjZ6a8BQOWiHckU5Hopf-nQcPswREJ/view?usp=sharing
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Jeroen BobbeldijkI agree that the test image is too large for the repo. We need a different approach.
Sean YuI sadly don't know enough about JPEG to purposely make a working image that has the same issue.
Hey all, sorry for chiming in randomly, but could you try the fix with this image? I also get the bad RST marker error with it. It is much smaller: https://drive.google.com/file/d/1jTEjZ6a8BQOWiHckU5Hopf-nQcPswREJ/view?usp=sharing
Managed to get an even smaller one https://drive.google.com/file/d/1ACOGO6EhEqXH5dgWdiIM6OuARaprnDhs/view?usp=sharing
640x480
63kb
if d.abs(int(d.tmp[0])-int(expectedRST)) < 3 {As I said previously:
Abs isn't the right calculation. For example, RST0 and RST7 marker values are 0xD0 and 0xD7 and are "within 2" of each other, conceptually, but the absolute value of the difference is 7, which fails this "less than 3" test.
https://go-review.googlesource.com/c/go/+/382754/1/src/image/jpeg/scan.go#379
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |