[go] encoding: require unique alphabet for base32 and base64

460 views
Skip to first unread message

Gopher Robot (Gerrit)

unread,
Aug 17, 2023, 10:08:09 PM8/17/23
to Joseph Tsai, goph...@pubsubhelper.golang.org, golang-...@googlegroups.com, Dmitri Shuralyov, Dmitri Shuralyov, Ian Lance Taylor, golang-co...@googlegroups.com

Gopher Robot submitted this change.

View Change



1 is the latest approved patch-set.
The change was submitted with unreviewed changes in the following files:

```
The name of the file: src/encoding/base32/base32.go
Insertions: 2, Deletions: 2.

@@ -7,6 +7,7 @@

import (
"io"
+ "slices"
"strconv"
)

@@ -77,7 +78,7 @@
case e.decodeMap[encoder[i]] != invalidIndex:
panic("encoding alphabet includes duplicate symbols")
}
- e.decodeMap[encoder[i]] = byte(i)
+ e.decodeMap[encoder[i]] = uint8(i)
}
return e
}
@@ -92,14 +93,14 @@

// WithPadding creates a new encoding identical to enc except
// with a specified padding character, or NoPadding to disable padding.
-// The padding character must not be '\r' or '\n', must not
-// be contained in the encoding's alphabet and must be a rune equal or
-// below '\xff'.
+// The padding character must not be '\r' or '\n',
+// must not be contained in the encoding's alphabet,
+// must not be negative, and must be a rune equal or below '\xff'.
// Padding characters above '\x7f' are encoded as their exact byte value
// rather than using the UTF-8 representation of the codepoint.
func (enc Encoding) WithPadding(padding rune) *Encoding {
switch {
- case padding == '\r' || padding == '\n' || padding > 0xff:
+ case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
panic("invalid padding")
case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
panic("padding contained in alphabet")
@@ -186,6 +187,15 @@
}
}

+// AppendEncode appends the base32 encoded src to dst
+// and returns the extended buffer.
+func (enc *Encoding) AppendEncode(dst, src []byte) []byte {
+ n := enc.EncodedLen(len(src))
+ dst = slices.Grow(dst, n)
+ enc.Encode(dst[len(dst):][:n], src)
+ return dst[:len(dst)+n]
+}
+
// EncodeToString returns the base32 encoding of src.
func (enc *Encoding) EncodeToString(src []byte) string {
buf := make([]byte, enc.EncodedLen(len(src)))
@@ -388,6 +398,16 @@
return
}

+// AppendDecode appends the base32 decoded src to dst
+// and returns the extended buffer.
+// If the input is malformed, it returns the partially decoded src and an error.
+func (enc *Encoding) AppendDecode(dst, src []byte) ([]byte, error) {
+ n := enc.DecodedLen(len(src))
+ dst = slices.Grow(dst, n)
+ n, err := enc.Decode(dst[len(dst):][:n], src)
+ return dst[:len(dst)+n], err
+}
+
// DecodeString returns the bytes represented by the base32 string s.
func (enc *Encoding) DecodeString(s string) ([]byte, error) {
buf := []byte(s)
```
```
The name of the file: src/encoding/base64/base64.go
Insertions: 2, Deletions: 2.

@@ -8,6 +8,7 @@
import (
"encoding/binary"
"io"
+ "slices"
"strconv"
)

@@ -80,21 +81,21 @@
case e.decodeMap[encoder[i]] != invalidIndex:
panic("encoding alphabet includes duplicate symbols")
}
- e.decodeMap[encoder[i]] = byte(i)
+ e.decodeMap[encoder[i]] = uint8(i)
}
return e
}

// WithPadding creates a new encoding identical to enc except
// with a specified padding character, or NoPadding to disable padding.
-// The padding character must not be '\r' or '\n', must not
-// be contained in the encoding's alphabet and must be a rune equal or
-// below '\xff'.
+// The padding character must not be '\r' or '\n',
+// must not be contained in the encoding's alphabet,
+// must not be negative, and must be a rune equal or below '\xff'.
// Padding characters above '\x7f' are encoded as their exact byte value
// rather than using the UTF-8 representation of the codepoint.
func (enc Encoding) WithPadding(padding rune) *Encoding {
switch {
- case padding == '\r' || padding == '\n' || padding > 0xff:
+ case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
panic("invalid padding")
case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
panic("padding contained in alphabet")
@@ -193,6 +194,15 @@
}
}

+// AppendEncode appends the base64 encoded src to dst
+// and returns the extended buffer.
+func (enc *Encoding) AppendEncode(dst, src []byte) []byte {
+ n := enc.EncodedLen(len(src))
+ dst = slices.Grow(dst, n)
+ enc.Encode(dst[len(dst):][:n], src)
+ return dst[:len(dst)+n]
+}
+
// EncodeToString returns the base64 encoding of src.
func (enc *Encoding) EncodeToString(src []byte) string {
buf := make([]byte, enc.EncodedLen(len(src)))
@@ -397,6 +407,16 @@
return si, dlen - 1, err
}

+// AppendDecode appends the base64 decoded src to dst
+// and returns the extended buffer.
+// If the input is malformed, it returns the partially decoded src and an error.
+func (enc *Encoding) AppendDecode(dst, src []byte) ([]byte, error) {
+ n := enc.DecodedLen(len(src))
+ dst = slices.Grow(dst, n)
+ n, err := enc.Decode(dst[len(dst):][:n], src)
+ return dst[:len(dst)+n], err
+}
+
// DecodeString returns the bytes represented by the base64 string s.
func (enc *Encoding) DecodeString(s string) ([]byte, error) {
dbuf := make([]byte, enc.DecodedLen(len(s)))
```

Approvals: Dmitri Shuralyov: Looks good to me, but someone else must approve Ian Lance Taylor: Looks good to me, approved Joseph Tsai: Run TryBots; Automatically submit change Gopher Robot: TryBots succeeded
encoding: require unique alphabet for base32 and base64

In order for decoding to faithfully reproduce the encoded input,
the symbols must be unique (i.e., provide a bijective mapping).
Thus, reject duplicate symbols in NewEncoding.

As a minor optimization, modify WithPadding to use the decodeMap
to quickly check whether the padding character is used in O(1)
instead of O(32) or O(64).

Change-Id: I5631f6ff9335c35d59d020dc0e307e3520786fbc
Reviewed-on: https://go-review.googlesource.com/c/go/+/520335
Reviewed-by: Dmitri Shuralyov <dmit...@google.com>
Auto-Submit: Joseph Tsai <joe...@digital-static.net>
TryBot-Result: Gopher Robot <go...@golang.org>
Reviewed-by: Ian Lance Taylor <ia...@google.com>
Run-TryBot: Joseph Tsai <joe...@digital-static.net>
---
M src/encoding/base32/base32.go
M src/encoding/base64/base64.go
2 files changed, 44 insertions(+), 32 deletions(-)

diff --git a/src/encoding/base32/base32.go b/src/encoding/base32/base32.go
index 6e23607..de95df0 100644
--- a/src/encoding/base32/base32.go
+++ b/src/encoding/base32/base32.go
@@ -20,8 +20,8 @@
// introduced for SASL GSSAPI and standardized in RFC 4648.
// The alternate "base32hex" encoding is used in DNSSEC.
type Encoding struct {
- encode [32]byte
- decodeMap [256]byte
+ encode [32]byte // mapping of symbol index to symbol byte value
+ decodeMap [256]uint8 // mapping of symbol byte value to symbol index
padChar rune
}

@@ -45,14 +45,19 @@
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ invalidIndex = '\xff'
)

const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
const encodeHex = "0123456789ABCDEFGHIJKLMNOPQRSTUV"

-// NewEncoding returns a new Encoding defined by the given alphabet,
-// which must be a 32-byte string. The alphabet is treated as sequence
-// of byte values without any special treatment for multi-byte UTF-8.
+// NewEncoding returns a new padded Encoding defined by the given alphabet,
+// which must be a 32-byte string that contains unique byte values and
+// does not contain the padding character or CR / LF ('\r', '\n').
+// The alphabet is treated as a sequence of byte values
+// without any special treatment for multi-byte UTF-8.
+// The resulting Encoding uses the default padding character ('='),
+// which may be changed or disabled via WithPadding.
func NewEncoding(encoder string) *Encoding {
if len(encoder) != 32 {
panic("encoding alphabet is not 32-bytes long")
@@ -64,7 +69,16 @@
copy(e.decodeMap[:], decodeMapInitialize)

for i := 0; i < len(encoder); i++ {
- e.decodeMap[encoder[i]] = byte(i)
+ // Note: While we document that the alphabet cannot contain
+ // the padding character, we do not enforce it since we do not know
+ // if the caller intends to switch the padding from StdPadding later.
+ switch {
+ case encoder[i] == '\n' || encoder[i] == '\r':
+ panic("encoding alphabet contains newline character")
+ case e.decodeMap[encoder[i]] != invalidIndex:
+ panic("encoding alphabet includes duplicate symbols")
+ }
+ e.decodeMap[encoder[i]] = uint8(i)
}
return e
}
@@ -85,16 +99,12 @@
// Padding characters above '\x7f' are encoded as their exact byte value
// rather than using the UTF-8 representation of the codepoint.
func (enc Encoding) WithPadding(padding rune) *Encoding {
- if padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff {
+ switch {
+ case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
panic("invalid padding")
+ case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
+ panic("padding contained in alphabet")
}
-
- for i := 0; i < len(enc.encode); i++ {
- if rune(enc.encode[i]) == padding {
- panic("padding contained in alphabet")
- }
- }
-
enc.padChar = padding
return &enc
}
diff --git a/src/encoding/base64/base64.go b/src/encoding/base64/base64.go
index 28ed7a0..802ef14 100644
--- a/src/encoding/base64/base64.go
+++ b/src/encoding/base64/base64.go
@@ -22,8 +22,8 @@
// (RFC 1421). RFC 4648 also defines an alternate encoding, which is
// the standard encoding with - and _ substituted for + and /.
type Encoding struct {
- encode [64]byte
- decodeMap [256]byte
+ encode [64]byte // mapping of symbol index to symbol byte value
+ decodeMap [256]uint8 // mapping of symbol byte value to symbol index
padChar rune
strict bool
}
@@ -48,14 +48,16 @@
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ invalidIndex = '\xff'
)

const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"

// NewEncoding returns a new padded Encoding defined by the given alphabet,
-// which must be a 64-byte string that does not contain the padding character
-// or CR / LF ('\r', '\n'). The alphabet is treated as sequence of byte values
+// which must be a 64-byte string that contains unique byte values and
+// does not contain the padding character or CR / LF ('\r', '\n').
+// The alphabet is treated as a sequence of byte values
// without any special treatment for multi-byte UTF-8.
// The resulting Encoding uses the default padding character ('='),
// which may be changed or disabled via WithPadding.
@@ -63,11 +65,6 @@
if len(encoder) != 64 {
panic("encoding alphabet is not 64-bytes long")
}
- for i := 0; i < len(encoder); i++ {
- if encoder[i] == '\n' || encoder[i] == '\r' {
- panic("encoding alphabet contains newline character")
- }
- }

e := new(Encoding)
e.padChar = StdPadding
@@ -75,7 +72,16 @@
copy(e.decodeMap[:], decodeMapInitialize)

for i := 0; i < len(encoder); i++ {
- e.decodeMap[encoder[i]] = byte(i)
+ // Note: While we document that the alphabet cannot contain
+ // the padding character, we do not enforce it since we do not know
+ // if the caller intends to switch the padding from StdPadding later.
+ switch {
+ case encoder[i] == '\n' || encoder[i] == '\r':
+ panic("encoding alphabet contains newline character")
+ case e.decodeMap[encoder[i]] != invalidIndex:
+ panic("encoding alphabet includes duplicate symbols")
+ }
+ e.decodeMap[encoder[i]] = uint8(i)
}
return e
}
@@ -88,16 +94,12 @@
// Padding characters above '\x7f' are encoded as their exact byte value
// rather than using the UTF-8 representation of the codepoint.
func (enc Encoding) WithPadding(padding rune) *Encoding {
- if padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff {
+ switch {
+ case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff:
panic("invalid padding")
+ case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex:
+ panic("padding contained in alphabet")
}
-
- for i := 0; i < len(enc.encode); i++ {
- if rune(enc.encode[i]) == padding {
- panic("padding contained in alphabet")
- }
- }
-
enc.padChar = padding
return &enc
}

To view, visit change 520335. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-MessageType: merged
Gerrit-Project: go
Gerrit-Branch: master
Gerrit-Change-Id: I5631f6ff9335c35d59d020dc0e307e3520786fbc
Gerrit-Change-Number: 520335
Gerrit-PatchSet: 4
Gerrit-Owner: Joseph Tsai <joe...@digital-static.net>
Gerrit-Reviewer: Dmitri Shuralyov <dmit...@google.com>
Gerrit-Reviewer: Gopher Robot <go...@golang.org>
Gerrit-Reviewer: Ian Lance Taylor <ia...@golang.org>
Gerrit-Reviewer: Ian Lance Taylor <ia...@google.com>
Gerrit-Reviewer: Joseph Tsai <joe...@digital-static.net>
Gerrit-CC: Dmitri Shuralyov <dmit...@golang.org>
Reply all
Reply to author
Forward
0 new messages