diff --git a/src/index/suffixarray/suffixarray.go b/src/index/suffixarray/suffixarray.go
index 2b96c8b..5541f09 100644
--- a/src/index/suffixarray/suffixarray.go
+++ b/src/index/suffixarray/suffixarray.go
@@ -117,7 +117,7 @@
return
}
-var errTooBig = errors.New("suffixarray: data too large")
+var errCorrupted = errors.New("suffixarray: data corrupted")
// readSlice reads data[:n] from r and returns n.
// It uses buf to buffer the read.
@@ -130,7 +130,7 @@
}
if int64(int(size64)) != size64 || int(size64) < 0 {
// We never write chunks this big anyway.
- return 0, errTooBig
+ return 0, errCorrupted
}
size := int(size64)
@@ -140,8 +140,14 @@
}
// decode as many elements as present in buf
+ len := data.len()
for p := binary.MaxVarintLen64; p < size; n++ {
x, w := binary.Uvarint(buf[p:])
+ // prevent index-out-of-bounds error if data is corrupted
+ // (was go.dev/issue/53352)
+ if n >= len {
+ return n, errCorrupted
+ }
data.set(n, int64(x))
p += w
}
@@ -162,7 +168,7 @@
return err
}
if int64(int(n64)) != n64 || int(n64) < 0 {
- return errTooBig
+ return errCorrupted
}
n := int(n64)
diff --git a/src/index/suffixarray/suffixarray_test.go b/src/index/suffixarray/suffixarray_test.go
index da092a7..68e4258 100644
--- a/src/index/suffixarray/suffixarray_test.go
+++ b/src/index/suffixarray/suffixarray_test.go
@@ -614,3 +614,52 @@
})
}
}
+
+func TestIssue53352(t *testing.T) {
+ data := []byte("x")
+ index := New(data)
+ var buf bytes.Buffer
+ if err := index.Write(&buf); err != nil {
+ t.Fatal(err)
+ }
+
+ // buffer encoding is as follows:
+ //
+ // [ data length n | data bytes | index slice buffer size | indices ] ... (next slice, if any)
+ // \__ 10 bytes __/\_ n bytes _/\_______ 10 bytes _______/\_ varuints /
+ //
+ // n and s are encoded as varints using 10 bytes always so they can be patched.
+ // For small values x >= 0 the varint encoded value is 2*x.
+
+ // For the above data we have n == len("x") == 1.
+ n := len(data)
+ encoding := buf.Bytes()
+ if got := int(encoding[0]); got != 2*n {
+ t.Fatalf("got n = %d; want %d", got, 2*n)
+ }
+
+ // For the above data, the index slice buffer contains a single index entry (0 for "x")
+ // plus the size of the index buffer (10 bytes), so s == 10 + 1 == 11; and s is encoded
+ // immediately following the data bytes.
+ s := 10 + 1
+ if got := int(encoding[10+n]); got != 2*s {
+ t.Fatalf("got s = %d; want %d", got, 2*s)
+ }
+
+ // Reading back the encoding should work without errors.
+ if err := index.Read(bytes.NewBuffer(encoding)); err != nil {
+ t.Fatal(err)
+ }
+
+ // Adding an extra index corrupts the format:
+ // Now we have one more index than data bytes.
+ s++ // increase slice buffer size
+ encoding = append(encoding, 0) // add one more index
+ encoding[10+n] = byte(2 * s) // update index slice buffer size
+
+ // Reading back the corrupted encoding should report an error.
+ // Before fixing go.dev/issue/53352, this resulted in a index-out-of-range panic.
+ if err := index.Read(bytes.NewBuffer(encoding)); err == nil || err.Error() != errCorrupted.Error() {
+ t.Fatalf("got %q; want %q", err, errCorrupted)
+ }
+}