[go] encoding: require unique alphabet for base32 and base64

460 views

Skip to first unread message

Gopher Robot (Gerrit)

unread,

Aug 17, 2023, 10:08:09 PM8/17/23

to Joseph Tsai, goph...@pubsubhelper.golang.org, golang-...@googlegroups.com, Dmitri Shuralyov, Dmitri Shuralyov, Ian Lance Taylor, golang-co...@googlegroups.com

Gopher Robot submitted this change.

View Change



1 is the latest approved patch-set.
The change was submitted with unreviewed changes in the following files:

```
The name of the file: src/encoding/base32/base32.go
Insertions: 2, Deletions: 2.

@@ -7,6 +7,7 @@
 
 import (
 	"io"
+	"slices"
 	"strconv"
 )
 
@@ -77,7 +78,7 @@
 		case e.decodeMap[encoder[i]] != invalidIndex:
 			panic("encoding alphabet includes duplicate symbols")
 		}
-		e.decodeMap[encoder[i]] = byte(i)
+		e.decodeMap[encoder[i]] = uint8(i)
 	}
 	return e
 }
@@ -92,14 +93,14 @@
 
 // WithPadding creates a new encoding identical to enc except
 // with a specified padding character, or NoPadding to disable padding.
-// The padding character must not be '\r' or '\n', must not
-// be contained in the encoding's alphabet and must be a rune equal or
-// below '\xff'.
+// The padding character must not be '\r' or '\n',
+// must not be contained in the encoding's alphabet,
+// must not be negative, and must be a rune equal or below '\xff'.
 // Padding characters above '\x7f' are encoded as their exact byte value
 // rather than using the UTF-8 representation of the codepoint.
 func (enc Encoding) WithPadding(padding rune) *Encoding {
 	switch {
-	case padding == '\r' || padding == '\n' || padding > 0xff:
+	case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
 		panic("invalid padding")
 	case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
 		panic("padding contained in alphabet")
@@ -186,6 +187,15 @@
 	}
 }
 
+// AppendEncode appends the base32 encoded src to dst
+// and returns the extended buffer.
+func (enc *Encoding) AppendEncode(dst, src []byte) []byte {
+	n := enc.EncodedLen(len(src))
+	dst = slices.Grow(dst, n)
+	enc.Encode(dst[len(dst):][:n], src)
+	return dst[:len(dst)+n]
+}
+
 // EncodeToString returns the base32 encoding of src.
 func (enc *Encoding) EncodeToString(src []byte) string {
 	buf := make([]byte, enc.EncodedLen(len(src)))
@@ -388,6 +398,16 @@
 	return
 }
 
+// AppendDecode appends the base32 decoded src to dst
+// and returns the extended buffer.
+// If the input is malformed, it returns the partially decoded src and an error.
+func (enc *Encoding) AppendDecode(dst, src []byte) ([]byte, error) {
+	n := enc.DecodedLen(len(src))
+	dst = slices.Grow(dst, n)
+	n, err := enc.Decode(dst[len(dst):][:n], src)
+	return dst[:len(dst)+n], err
+}
+
 // DecodeString returns the bytes represented by the base32 string s.
 func (enc *Encoding) DecodeString(s string) ([]byte, error) {
 	buf := []byte(s)
```
```
The name of the file: src/encoding/base64/base64.go
Insertions: 2, Deletions: 2.

@@ -8,6 +8,7 @@
 import (
 	"encoding/binary"
 	"io"
+	"slices"
 	"strconv"
 )
 
@@ -80,21 +81,21 @@
 		case e.decodeMap[encoder[i]] != invalidIndex:
 			panic("encoding alphabet includes duplicate symbols")
 		}
-		e.decodeMap[encoder[i]] = byte(i)
+		e.decodeMap[encoder[i]] = uint8(i)
 	}
 	return e
 }
 
 // WithPadding creates a new encoding identical to enc except
 // with a specified padding character, or NoPadding to disable padding.
-// The padding character must not be '\r' or '\n', must not
-// be contained in the encoding's alphabet and must be a rune equal or
-// below '\xff'.
+// The padding character must not be '\r' or '\n',
+// must not be contained in the encoding's alphabet,
+// must not be negative, and must be a rune equal or below '\xff'.
 // Padding characters above '\x7f' are encoded as their exact byte value
 // rather than using the UTF-8 representation of the codepoint.
 func (enc Encoding) WithPadding(padding rune) *Encoding {
 	switch {
-	case padding == '\r' || padding == '\n' || padding > 0xff:
+	case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
 		panic("invalid padding")
 	case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
 		panic("padding contained in alphabet")
@@ -193,6 +194,15 @@
 	}
 }
 
+// AppendEncode appends the base64 encoded src to dst
+// and returns the extended buffer.
+func (enc *Encoding) AppendEncode(dst, src []byte) []byte {
+	n := enc.EncodedLen(len(src))
+	dst = slices.Grow(dst, n)
+	enc.Encode(dst[len(dst):][:n], src)
+	return dst[:len(dst)+n]
+}
+
 // EncodeToString returns the base64 encoding of src.
 func (enc *Encoding) EncodeToString(src []byte) string {
 	buf := make([]byte, enc.EncodedLen(len(src)))
@@ -397,6 +407,16 @@
 	return si, dlen - 1, err
 }
 
+// AppendDecode appends the base64 decoded src to dst
+// and returns the extended buffer.
+// If the input is malformed, it returns the partially decoded src and an error.
+func (enc *Encoding) AppendDecode(dst, src []byte) ([]byte, error) {
+	n := enc.DecodedLen(len(src))
+	dst = slices.Grow(dst, n)
+	n, err := enc.Decode(dst[len(dst):][:n], src)
+	return dst[:len(dst)+n], err
+}
+
 // DecodeString returns the bytes represented by the base64 string s.
 func (enc *Encoding) DecodeString(s string) ([]byte, error) {
 	dbuf := make([]byte, enc.DecodedLen(len(s)))
```

Approvals:
  Dmitri Shuralyov: Looks good to me, but someone else must approve
  Ian Lance Taylor: Looks good to me, approved
  Joseph Tsai: Run TryBots; Automatically submit change
  Gopher Robot: TryBots succeeded

encoding: require unique alphabet for base32 and base64

In order for decoding to faithfully reproduce the encoded input,
the symbols must be unique (i.e., provide a bijective mapping).
Thus, reject duplicate symbols in NewEncoding.

As a minor optimization, modify WithPadding to use the decodeMap
to quickly check whether the padding character is used in O(1)
instead of O(32) or O(64).

Change-Id: I5631f6ff9335c35d59d020dc0e307e3520786fbc
Reviewed-on: https://go-review.googlesource.com/c/go/+/520335
Reviewed-by: Dmitri Shuralyov <dmit...@google.com>
Auto-Submit: Joseph Tsai <joe...@digital-static.net>
TryBot-Result: Gopher Robot <go...@golang.org>
Reviewed-by: Ian Lance Taylor <ia...@google.com>
Run-TryBot: Joseph Tsai <joe...@digital-static.net>
---
M src/encoding/base32/base32.go
M src/encoding/base64/base64.go
2 files changed, 44 insertions(+), 32 deletions(-)

diff --git a/src/encoding/base32/base32.go b/src/encoding/base32/base32.go
index 6e23607..de95df0 100644
--- a/src/encoding/base32/base32.go
+++ b/src/encoding/base32/base32.go
@@ -20,8 +20,8 @@
 // introduced for SASL GSSAPI and standardized in RFC 4648.
 // The alternate "base32hex" encoding is used in DNSSEC.
 type Encoding struct {
-	encode    [32]byte
-	decodeMap [256]byte
+	encode    [32]byte   // mapping of symbol index to symbol byte value
+	decodeMap [256]uint8 // mapping of symbol byte value to symbol index
 	padChar   rune
 }
 
@@ -45,14 +45,19 @@
 		"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
 		"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
 		"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+	invalidIndex = '\xff'
 )
 
 const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
 const encodeHex = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
 
-// NewEncoding returns a new Encoding defined by the given alphabet,
-// which must be a 32-byte string. The alphabet is treated as sequence
-// of byte values without any special treatment for multi-byte UTF-8.
+// NewEncoding returns a new padded Encoding defined by the given alphabet,
+// which must be a 32-byte string that contains unique byte values and
+// does not contain the padding character or CR / LF ('\r', '\n').
+// The alphabet is treated as a sequence of byte values
+// without any special treatment for multi-byte UTF-8.
+// The resulting Encoding uses the default padding character ('='),
+// which may be changed or disabled via WithPadding.
 func NewEncoding(encoder string) *Encoding {
 	if len(encoder) != 32 {
 		panic("encoding alphabet is not 32-bytes long")
@@ -64,7 +69,16 @@
 	copy(e.decodeMap[:], decodeMapInitialize)
 
 	for i := 0; i < len(encoder); i++ {
-		e.decodeMap[encoder[i]] = byte(i)
+		// Note: While we document that the alphabet cannot contain
+		// the padding character, we do not enforce it since we do not know
+		// if the caller intends to switch the padding from StdPadding later.
+		switch {
+		case encoder[i] == '\n' || encoder[i] == '\r':
+			panic("encoding alphabet contains newline character")
+		case e.decodeMap[encoder[i]] != invalidIndex:
+			panic("encoding alphabet includes duplicate symbols")
+		}
+		e.decodeMap[encoder[i]] = uint8(i)
 	}
 	return e
 }
@@ -85,16 +99,12 @@
 // Padding characters above '\x7f' are encoded as their exact byte value
 // rather than using the UTF-8 representation of the codepoint.
 func (enc Encoding) WithPadding(padding rune) *Encoding {
-	if padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff {
+	switch {
+	case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
 		panic("invalid padding")
+	case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
+		panic("padding contained in alphabet")
 	}
-
-	for i := 0; i < len(enc.encode); i++ {
-		if rune(enc.encode[i]) == padding {
-			panic("padding contained in alphabet")
-		}
-	}
-
 	enc.padChar = padding
 	return &enc
 }
diff --git a/src/encoding/base64/base64.go b/src/encoding/base64/base64.go
index 28ed7a0..802ef14 100644
--- a/src/encoding/base64/base64.go
+++ b/src/encoding/base64/base64.go
@@ -22,8 +22,8 @@
 // (RFC 1421).  RFC 4648 also defines an alternate encoding, which is
 // the standard encoding with - and _ substituted for + and /.
 type Encoding struct {
-	encode    [64]byte
-	decodeMap [256]byte
+	encode    [64]byte   // mapping of symbol index to symbol byte value
+	decodeMap [256]uint8 // mapping of symbol byte value to symbol index
 	padChar   rune
 	strict    bool
 }
@@ -48,14 +48,16 @@
 		"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
 		"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
 		"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+	invalidIndex = '\xff'
 )
 
 const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
 const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
 
 // NewEncoding returns a new padded Encoding defined by the given alphabet,
-// which must be a 64-byte string that does not contain the padding character
-// or CR / LF ('\r', '\n'). The alphabet is treated as sequence of byte values
+// which must be a 64-byte string that contains unique byte values and
+// does not contain the padding character or CR / LF ('\r', '\n').
+// The alphabet is treated as a sequence of byte values
 // without any special treatment for multi-byte UTF-8.
 // The resulting Encoding uses the default padding character ('='),
 // which may be changed or disabled via WithPadding.
@@ -63,11 +65,6 @@
 	if len(encoder) != 64 {
 		panic("encoding alphabet is not 64-bytes long")
 	}
-	for i := 0; i < len(encoder); i++ {
-		if encoder[i] == '\n' || encoder[i] == '\r' {
-			panic("encoding alphabet contains newline character")
-		}
-	}
 
 	e := new(Encoding)
 	e.padChar = StdPadding
@@ -75,7 +72,16 @@
 	copy(e.decodeMap[:], decodeMapInitialize)
 
 	for i := 0; i < len(encoder); i++ {
-		e.decodeMap[encoder[i]] = byte(i)
+		// Note: While we document that the alphabet cannot contain
+		// the padding character, we do not enforce it since we do not know
+		// if the caller intends to switch the padding from StdPadding later.
+		switch {
+		case encoder[i] == '\n' || encoder[i] == '\r':
+			panic("encoding alphabet contains newline character")
+		case e.decodeMap[encoder[i]] != invalidIndex:
+			panic("encoding alphabet includes duplicate symbols")
+		}
+		e.decodeMap[encoder[i]] = uint8(i)
 	}
 	return e
 }
@@ -88,16 +94,12 @@
 // Padding characters above '\x7f' are encoded as their exact byte value
 // rather than using the UTF-8 representation of the codepoint.
 func (enc Encoding) WithPadding(padding rune) *Encoding {
-	if padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff {
+	switch {
+	case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
 		panic("invalid padding")
+	case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
+		panic("padding contained in alphabet")
 	}
-
-	for i := 0; i < len(enc.encode); i++ {
-		if rune(enc.encode[i]) == padding {
-			panic("padding contained in alphabet")
-		}
-	}
-
 	enc.padChar = padding
 	return &enc
 }

To view, visit change 520335. To unsubscribe, or for help writing mail filters, visit settings.

Reply all

Reply to author

Forward

0 new messages