[protobuf] encoding/protojson: improve marshal performance

Seba Chamena (Gerrit)

unread,

Dec 19, 2021, 2:34:28 PM12/19/21

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Seba Chamena has uploaded this change for review.

encoding/protojson: improve marshal performance

it introduces:
- buffered encoder instead of using []byte directly
- pool to recycle buffer
- significant improvement marshaling uint64, int64 (~10alloc/op)
- `MarshalOpts.Unordered` to ignore fields order
- other small improvements

Change-Id: I797dd50a8878ed282c7623da8a5d4899cd468cb3
---
M encoding/protojson/bench_test.go
M encoding/protojson/encode.go
M internal/encoding/json/encode.go
3 files changed, 238 insertions(+), 75 deletions(-)

diff --git a/encoding/protojson/bench_test.go b/encoding/protojson/bench_test.go
index 8d5a0bb..fbeedad 100644
--- a/encoding/protojson/bench_test.go
+++ b/encoding/protojson/bench_test.go
@@ -5,11 +5,13 @@
 package protojson_test
 
 import (
+	"google.golang.org/protobuf/types/known/durationpb"
+	"math"
 	"testing"
 
-	"google.golang.org/protobuf/encoding/protojson"
+	pb3 "google.golang.org/protobuf/internal/testprotos/textpb3"
 
-	"google.golang.org/protobuf/types/known/durationpb"
+	"google.golang.org/protobuf/encoding/protojson"
 )
 
 func BenchmarkUnmarshal_Duration(b *testing.B) {
@@ -22,3 +24,39 @@
 		}
 	}
 }
+
+var scalarSample = pb3.Scalars{
+	SBool:     true,
+	SInt32:    math.MaxInt32,
+	SInt64:    math.MaxInt64,
+	SUint64:   math.MaxUint64,
+	SUint32:   2130321213,
+	SSint32:   23132131,
+	SSint64:   3213232123812,
+	SFixed32:  4232943192,
+	SFixed64:  2389182192312,
+	SSfixed32: 281432823,
+	SSfixed64: 23919321921,
+	SFloat:    3213213.2931,
+	SDouble:   39121.321321,
+	SBytes:    []byte("foo_sbytes"),
+	SString:   "foo_sstring",
+}
+
+func BenchmarkMarshal_ScalarsUnordered(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		_, err := protojson.MarshalOptions{Unordered: true}.Marshal(&scalarSample)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+}
+
+func BenchmarkMarshal_Scalars(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		_, err := protojson.MarshalOptions{Unordered: true}.Marshal(&scalarSample)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+}
diff --git a/encoding/protojson/encode.go b/encoding/protojson/encode.go
index ba971f0..b957656 100644
--- a/encoding/protojson/encode.go
+++ b/encoding/protojson/encode.go
@@ -5,8 +5,8 @@
 package protojson
 
 import (
-	"encoding/base64"
 	"fmt"
+	"sync"
 
 	"google.golang.org/protobuf/internal/encoding/json"
 	"google.golang.org/protobuf/internal/encoding/messageset"
@@ -88,6 +88,8 @@
 		protoregistry.ExtensionTypeResolver
 		protoregistry.MessageTypeResolver
 	}
+
+	Unordered bool
 }
 
 // Format formats the message as a string.
@@ -110,6 +112,30 @@
 	return o.marshal(m)
 }
 
+var encoderPool sync.Pool
+
+func getCachedEncoder() *json.Encoder {
+	v := encoderPool.Get()
+	if v == nil {
+		return nil
+	}
+	e := v.(*json.Encoder)
+	e.Reset()
+	return e
+}
+
+func newEncoder(indent string) (*json.Encoder, error) {
+	e := getCachedEncoder()
+	if e == nil {
+		e = json.NewEncoder()
+	}
+	err := e.SetIndent(indent)
+	if err != nil {
+		return nil, err
+	}
+	return e, nil
+}
+
 // marshal is a centralized function that all marshal operations go through.
 // For profiling purposes, avoid changing the name of this function or
 // introducing other code paths for marshal that do not go through this.
@@ -121,7 +147,7 @@
 		o.Resolver = protoregistry.GlobalTypes
 	}
 
-	internalEnc, err := json.NewEncoder(o.Indent)
+	internalEnc, err := newEncoder(o.Indent)
 	if err != nil {
 		return nil, err
 	}
@@ -139,6 +165,7 @@
 	if o.AllowPartial {
 		return enc.Bytes(), nil
 	}
+	encoderPool.Put(internalEnc)
 	return enc.Bytes(), proto.CheckInitialized(m)
 }
 
@@ -196,6 +223,13 @@
 	m.Message.Range(f)
 }
 
+func (e encoder) order() order.FieldOrder {
+	if e.opts.Unordered {
+		return nil
+	}
+	return order.IndexNameFieldOrder
+}
+
 // marshalMessage marshals the fields in the given protoreflect.Message.
 // If the typeURL is non-empty, then a synthetic "@type" field is injected
 // containing the URL as the value.
@@ -207,7 +241,6 @@
 	if marshal := wellKnownTypeMarshaler(m.Descriptor().FullName()); marshal != nil {
 		return marshal(e, m)
 	}
-
 	e.StartObject()
 	defer e.EndObject()
 
@@ -220,7 +253,7 @@
 	}
 
 	var err error
-	order.RangeFields(fields, order.IndexNameFieldOrder, func(fd pref.FieldDescriptor, v pref.Value) bool {
+	order.RangeFields(fields, e.order(), func(fd pref.FieldDescriptor, v pref.Value) bool {
 		name := fd.JSONName()
 		if e.opts.UseProtoNames {
 			name = fd.TextName()
@@ -272,22 +305,22 @@
 	case pref.Uint32Kind, pref.Fixed32Kind:
 		e.WriteUint(val.Uint())
 
-	case pref.Int64Kind, pref.Sint64Kind, pref.Uint64Kind,
-		pref.Sfixed64Kind, pref.Fixed64Kind:
-		// 64-bit integers are written out as JSON string.
-		e.WriteString(val.String())
+	case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
+		e.WriteInt64(val.Int())
+
+	case pref.Uint64Kind, pref.Fixed64Kind:
+		e.WriteUint64(val.Uint())
 
 	case pref.FloatKind:
-		// Encoder.WriteFloat handles the special numbers NaN and infinites.
+		// Encoder.WriteFloat handles the special numbers NaN and infinities.
 		e.WriteFloat(val.Float(), 32)
 
 	case pref.DoubleKind:
-		// Encoder.WriteFloat handles the special numbers NaN and infinites.
+		// Encoder.WriteFloat handles the special numbers NaN and infinities.
 		e.WriteFloat(val.Float(), 64)
 
 	case pref.BytesKind:
-		e.WriteString(base64.StdEncoding.EncodeToString(val.Bytes()))
-
+		e.WriteByteSlice(val.Bytes())
 	case pref.EnumKind:
 		if fd.Enum().FullName() == genid.NullValue_enum_fullname {
 			e.WriteNull()
diff --git a/internal/encoding/json/encode.go b/internal/encoding/json/encode.go
index fbdf348..8276ec5 100644
--- a/internal/encoding/json/encode.go
+++ b/internal/encoding/json/encode.go
@@ -5,10 +5,13 @@
 package json
 
 import (
+	"bytes"
+	"encoding/base64"
 	"math"
 	"math/bits"
 	"strconv"
 	"strings"
+	"sync"
 	"unicode/utf8"
 
 	"google.golang.org/protobuf/internal/detrand"
@@ -18,6 +21,8 @@
 // kind represents an encoding type.
 type kind uint8
 
+var encoderPool sync.Pool
+
 const (
 	_ kind = (1 << iota) / 2
 	name
@@ -34,42 +39,56 @@
 	indent   string
 	lastKind kind
 	indents  []byte
-	out      []byte
+	out      bytes.Buffer
+	scratch  [64]byte
+
+	ptrLevel uint
+	ptrSeen  map[interface{}]struct{}
+}
+
+func (e *Encoder) Reset() {
+	e.out.Reset()
+	e.lastKind = 0
+}
+
+func (e *Encoder) SetIndent(indent string) error {
+	if len(indent) == 0 {
+		e.indent = ""
+		return nil
+	}
+	if strings.Trim(indent, " \t") != "" {
+		return errors.New("indent may only be composed of space or tab characters")
+	}
+	e.indent = indent
+	return nil
 }
 
 // NewEncoder returns an Encoder.
 //
 // If indent is a non-empty string, it causes every entry for an Array or Object
 // to be preceded by the indent and trailed by a newline.
-func NewEncoder(indent string) (*Encoder, error) {
-	e := &Encoder{}
-	if len(indent) > 0 {
-		if strings.Trim(indent, " \t") != "" {
-			return nil, errors.New("indent may only be composed of space or tab characters")
-		}
-		e.indent = indent
-	}
-	return e, nil
+func NewEncoder() *Encoder {
+	return &Encoder{}
 }
 
 // Bytes returns the content of the written bytes.
 func (e *Encoder) Bytes() []byte {
-	return e.out
+	return e.out.Bytes()
 }
 
 // WriteNull writes out the null value.
 func (e *Encoder) WriteNull() {
 	e.prepareNext(scalar)
-	e.out = append(e.out, "null"...)
+	e.out.WriteString("null")
 }
 
 // WriteBool writes out the given boolean value.
 func (e *Encoder) WriteBool(b bool) {
 	e.prepareNext(scalar)
 	if b {
-		e.out = append(e.out, "true"...)
+		e.out.WriteString("true")
 	} else {
-		e.out = append(e.out, "false"...)
+		e.out.WriteString("false")
 	}
 }
 
@@ -78,51 +97,81 @@
 func (e *Encoder) WriteString(s string) error {
 	e.prepareNext(scalar)
 	var err error
-	if e.out, err = appendString(e.out, s); err != nil {
+	if err = e.appendString(s); err != nil {
 		return err
 	}
 	return nil
 }
 
+func (e *Encoder) WriteByteSlice(s []byte) {
+	e.prepareNext(scalar)
+
+	e.out.WriteByte('"')
+	encodedLen := base64.StdEncoding.EncodedLen(len(s))
+	if encodedLen <= len(e.scratch) {
+		// If the encoded bytes fit in e.scratch, avoid an extra
+		// allocation and use the cheaper Encoding.Encode.
+		dst := e.scratch[:encodedLen]
+		base64.StdEncoding.Encode(dst, s)
+		e.out.Write(dst)
+	} else if encodedLen <= 1024 {
+		// The encoded bytes are short enough to allocate for, and
+		// Encoding.Encode is still cheaper.
+		dst := make([]byte, encodedLen)
+		base64.StdEncoding.Encode(dst, s)
+		e.out.Write(dst)
+	} else {
+		// The encoded bytes are too long to cheaply allocate, and
+		// Encoding.Encode is no longer noticeably cheaper.
+		enc := base64.NewEncoder(base64.StdEncoding, &e.out)
+		enc.Write(s)
+		enc.Close()
+	}
+	e.out.WriteByte('"')
+}
+
 // Sentinel error used for indicating invalid UTF-8.
 var errInvalidUTF8 = errors.New("invalid UTF-8")
 
-func appendString(out []byte, in string) ([]byte, error) {
-	out = append(out, '"')
+func (e *Encoder) appendString(in string) error {
+	e.out.WriteByte('"')
 	i := indexNeedEscapeInString(in)
-	in, out = in[i:], append(out, in[:i]...)
+	e.out.WriteString(in[:i])
+	in = in[i:]
 	for len(in) > 0 {
 		switch r, n := utf8.DecodeRuneInString(in); {
 		case r == utf8.RuneError && n == 1:
-			return out, errInvalidUTF8
+			return errInvalidUTF8
 		case r < ' ' || r == '"' || r == '\\':
-			out = append(out, '\\')
+			e.out.WriteByte('\\')
 			switch r {
 			case '"', '\\':
-				out = append(out, byte(r))
+				e.out.WriteRune(r)
 			case '\b':
-				out = append(out, 'b')
+				e.out.WriteByte('b')
 			case '\f':
-				out = append(out, 'f')
+				e.out.WriteByte('f')
 			case '\n':
-				out = append(out, 'n')
+				e.out.WriteByte('n')
 			case '\r':
-				out = append(out, 'r')
+				e.out.WriteByte('r')
 			case '\t':
-				out = append(out, 't')
+				e.out.WriteByte('t')
 			default:
-				out = append(out, 'u')
-				out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
-				out = strconv.AppendUint(out, uint64(r), 16)
+				e.out.WriteByte('u')
+				e.out.WriteString("0000"[1+(bits.Len32(uint32(r))-1)/4:])
+				b := strconv.AppendUint(e.scratch[:0], uint64(r), 16)
+				e.out.Write(b)
 			}
 			in = in[n:]
 		default:
 			i := indexNeedEscapeInString(in[n:])
-			in, out = in[n+i:], append(out, in[:n+i]...)
+			e.out.WriteString(in[:n+i])
+			in = in[n+i:]
 		}
 	}
-	out = append(out, '"')
-	return out, nil
+	e.out.WriteByte('"')
+	return nil
 }
 
 // indexNeedEscapeInString returns the index of the character that needs
@@ -139,22 +188,25 @@
 // WriteFloat writes out the given float and bitSize in JSON number value.
 func (e *Encoder) WriteFloat(n float64, bitSize int) {
 	e.prepareNext(scalar)
-	e.out = appendFloat(e.out, n, bitSize)
+	e.appendFloat(n, bitSize)
 }
 
-// appendFloat formats given float in bitSize, and appends to the given []byte.
-func appendFloat(out []byte, n float64, bitSize int) []byte {
+func (e *Encoder) appendFloat(n float64, bitSize int) {
 	switch {
 	case math.IsNaN(n):
-		return append(out, `"NaN"`...)
+		e.out.WriteString(`"NaN"`)
+		return
 	case math.IsInf(n, +1):
-		return append(out, `"Infinity"`...)
+		e.out.WriteString(`"Infinity"`)
+		return
 	case math.IsInf(n, -1):
-		return append(out, `"-Infinity"`...)
+		e.out.WriteString(`"-Infinity"`)
+		return
 	}
 
 	// JSON number formatting logic based on encoding/json.
 	// See floatEncoder.encode for reference.
+	b := e.scratch[:0]
 	fmt := byte('f')
 	if abs := math.Abs(n); abs != 0 {
 		if bitSize == 64 && (abs < 1e-6 || abs >= 1e21) ||
@@ -162,39 +214,62 @@
 			fmt = 'e'
 		}
 	}
-	out = strconv.AppendFloat(out, n, fmt, -1, bitSize)
+	b = strconv.AppendFloat(b, n, fmt, -1, bitSize)
 	if fmt == 'e' {
-		n := len(out)
-		if n >= 4 && out[n-4] == 'e' && out[n-3] == '-' && out[n-2] == '0' {
-			out[n-2] = out[n-1]
-			out = out[:n-1]
+		n := len(b)
+		if n >= 4 && b[n-4] == 'e' && b[n-3] == '-' && b[n-2] == '0' {
+			b[n-2] = b[n-1]
+			b = b[:n-1]
 		}
 	}
-	return out
+
+	e.out.Write(b)
 }
 
 // WriteInt writes out the given signed integer in JSON number value.
 func (e *Encoder) WriteInt(n int64) {
 	e.prepareNext(scalar)
-	e.out = append(e.out, strconv.FormatInt(n, 10)...)
+	b := strconv.AppendInt(e.scratch[:0], n, 10)
+	e.out.Write(b)
 }
 
 // WriteUint writes out the given unsigned integer in JSON number value.
 func (e *Encoder) WriteUint(n uint64) {
 	e.prepareNext(scalar)
-	e.out = append(e.out, strconv.FormatUint(n, 10)...)
+	b := strconv.AppendUint(e.scratch[:0], n, 10)
+	e.out.Write(b)
+}
+
+// WriteInt64 writes the given int64 as string (as it's specified by the I-JSON spec)
+func (e *Encoder) WriteInt64(n int64) error {
+	e.prepareNext(scalar)
+	b := strconv.AppendInt(e.scratch[:0], n, 10)
+	e.out.WriteByte('"')
+	e.out.Write(b)
+	e.out.WriteByte('"')
+	return nil
+}
+
+// WriteUint64 writes the given uint64 as string (as it's specified by the I-JSON spec)
+func (e *Encoder) WriteUint64(n uint64) error {
+	e.prepareNext(scalar)
+	b := strconv.AppendUint(e.scratch[:0], n, 10)
+	e.out.WriteByte('"')
+	e.out.Write(b)
+	e.out.WriteByte('"')
+	return nil
 }
 
 // StartObject writes out the '{' symbol.
 func (e *Encoder) StartObject() {
 	e.prepareNext(objectOpen)
-	e.out = append(e.out, '{')
+	e.out.WriteByte('{')
 }
 
 // EndObject writes out the '}' symbol.
 func (e *Encoder) EndObject() {
 	e.prepareNext(objectClose)
-	e.out = append(e.out, '}')
+	e.out.WriteByte('}')
 }
 
 // WriteName writes out the given string in JSON string value and the name
@@ -204,21 +279,21 @@
 	e.prepareNext(name)
 	var err error
 	// Append to output regardless of error.
-	e.out, err = appendString(e.out, s)
-	e.out = append(e.out, ':')
+	err = e.appendString(s)
+	e.out.WriteByte(':')
 	return err
 }
 
 // StartArray writes out the '[' symbol.
 func (e *Encoder) StartArray() {
 	e.prepareNext(arrayOpen)
-	e.out = append(e.out, '[')
+	e.out.WriteByte('[')
 }
 
 // EndArray writes out the ']' symbol.
 func (e *Encoder) EndArray() {
 	e.prepareNext(arrayClose)
-	e.out = append(e.out, ']')
+	e.out.WriteByte(']')
 }
 
 // prepareNext adds possible comma and indentation for the next value based
@@ -233,11 +308,11 @@
 		// Need to add comma on the following condition.
 		if e.lastKind&(scalar|objectClose|arrayClose) != 0 &&
 			next&(name|scalar|objectOpen|arrayOpen) != 0 {
-			e.out = append(e.out, ',')
+			e.out.WriteByte(',')
 			// For single-line output, add a random extra space after each
 			// comma to make output unstable.
 			if detrand.Bool() {
-				e.out = append(e.out, ' ')
+				e.out.WriteByte(' ')
 			}
 		}
 		return
@@ -248,29 +323,30 @@
 		// If next type is NOT closing, add indent and newline.
 		if next&(objectClose|arrayClose) == 0 {
 			e.indents = append(e.indents, e.indent...)
-			e.out = append(e.out, '\n')
-			e.out = append(e.out, e.indents...)
+			e.out.WriteByte('\n')
+			e.out.Write(e.indents)
 		}
 
 	case e.lastKind&(scalar|objectClose|arrayClose) != 0:
 		switch {
 		// If next type is either a value or name, add comma and newline.
 		case next&(name|scalar|objectOpen|arrayOpen) != 0:
-			e.out = append(e.out, ',', '\n')
+			e.out.WriteByte(',')
+			e.out.WriteByte('\n')
 
 		// If next type is a closing object or array, adjust indentation.
 		case next&(objectClose|arrayClose) != 0:
 			e.indents = e.indents[:len(e.indents)-len(e.indent)]
-			e.out = append(e.out, '\n')
+			e.out.WriteByte('\n')
 		}
-		e.out = append(e.out, e.indents...)
+		e.out.Write(e.indents)
 
 	case e.lastKind&name != 0:
-		e.out = append(e.out, ' ')
+		e.out.WriteByte(' ')
 		// For multi-line output, add a random extra space after key: to make
 		// output unstable.
 		if detrand.Bool() {
-			e.out = append(e.out, ' ')
+			e.out.WriteByte(' ')
 		}
 	}
 }

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 19, 2021, 2:34:28 PM12/19/21

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Seba Chamena uploaded patch set #2 to this change.

View Change

encoding/protojson: improve marshal performance

it introduces:
- buffered encoder instead of using []byte directly
- pool to recycle buffer
- significant improvement marshaling uint64, int64 (~10alloc/op)
- `MarshalOpts.Unordered` to ignore fields order
- other small improvements

github issue: https://github.com/golang/protobuf/issues/1285



Change-Id: I797dd50a8878ed282c7623da8a5d4899cd468cb3
---
M encoding/protojson/bench_test.go
M encoding/protojson/encode.go
M internal/encoding/json/encode.go

3 files changed, 240 insertions(+), 75 deletions(-)

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 19, 2021, 2:34:28 PM12/19/21

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Seba Chamena uploaded patch set #3 to this change.

View Change

encoding/protojson: improve marshal performance

it introduces:
- buffered encoder instead of using []byte directly
- pool to recycle buffer
- significant improvement marshaling uint64, int64 (~10alloc/op)
- `MarshalOpts.Unordered` to ignore fields order
- other small improvements

Change-Id: I797dd50a8878ed282c7623da8a5d4899cd468cb3
---
M encoding/protojson/bench_test.go
M encoding/protojson/encode.go
M internal/encoding/json/encode.go

3 files changed, 238 insertions(+), 75 deletions(-)

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 19, 2021, 2:34:28 PM12/19/21

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Seba Chamena uploaded patch set #4 to this change.

View Change

encoding/protojson: improve marshal performance

it introduces:
- buffered encoder instead of using []byte directly
- pool to recycle buffer
- significant improvement marshaling uint64, int64 (~10alloc/op)
- `MarshalOpts.Unordered` to ignore fields order
- other small improvements

benchmarks: https://github.com/golang/protobuf/issues/1285#issuecomment-997442098



Change-Id: I797dd50a8878ed282c7623da8a5d4899cd468cb3
---
M encoding/protojson/bench_test.go
M encoding/protojson/encode.go
M internal/encoding/json/encode.go

3 files changed, 241 insertions(+), 75 deletions(-)

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 19, 2021, 4:02:18 PM12/19/21

to goph...@pubsubhelper.golang.org, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Cassondra Foesch.

View Change

6 comments:

File internal/encoding/json/encode.go:
- Patch Set #4, Line 24: var encoderPool sync.Pool
  This does not appear to be used at all in this file.
  sry, old impl was using that on internal/json instead of the wrapper
- Patch Set #4, Line 70: func NewEncoder() *Encoder {
  This function signature changes. […]
  hmm, it's an internal module, why pushing backwards compatibility there?
  In case it's desired I think it can be achieved but just requires dirtier code (will add it in case you want to keep bw compat anyways).
- Patch Set #4, Line 100: if err = e.appendString(s); err != nil {
  The `var err error ; if _ = …` code was because we were also assigning into `e.out`. […]
  Ack
- Patch Set #4, Line 117: } else if encodedLen <= 1024 {
  This magic number might change in the future?
  right. It's related to perf reasons as the comment below says (Encoding.Encode is cheaper)
- Patch Set #4, Line 209: b := e.scratch[:0]
  We don’t use this value until line 217. […]
  Ack
- Patch Set #4, Line 246: b := strconv.AppendInt(e.scratch[:0], n, 10)
  Consider moving all of these `strconv.Append…` calls directly into the `e.out. […]
  Ack

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 19, 2021, 4:06:47 PM12/19/21

to goph...@pubsubhelper.golang.org, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Seba Chamena.

View Change

1 comment:

File encoding/protojson/encode.go:
- Patch Set #4, Line 92: Unordered bool
  Adding this feature becomes something we cannot remove in the future without breaking backwards comp […]
  got it, but 2 questions then:
  1. How do you think is the way to use `unordered` then? When fields order were introduced (see the associated CR), the features were pushing it as something optional bc of performance reasons.
  2. Why fields ordering? Dont really know why its here. I understand it should be kept just for compatibility, but dont know why were introduced in the first place since it's not part of the IJSON std with proto3 afaik.

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 19, 2021, 4:07:41 PM12/19/21

to goph...@pubsubhelper.golang.org, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Cassondra Foesch.

View Change

1 comment:

File encoding/protojson/encode.go:
- Patch Set #4, Line 129: if e == nil {
  Should we be using the `New` field of the `encoderPool` so that we don’t have to test things against […]
  sounds good

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 19, 2021, 4:34:43 PM12/19/21

to goph...@pubsubhelper.golang.org, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Cassondra Foesch.

View Change

1 comment:

File encoding/protojson/encode.go:
- Patch Set #4, Line 92: Unordered bool
  With all the people asking us why we have non-deterministic spacing, I’m sure if we didn’t provide s […]
  Ok, got it, in the end is for consistency across encoders.
  The problem is that the change is quite heavy for performance (5 alloc and increases with bigger sizes).
  In the CR in which those were introduced, this was mentioned and measured, that's why it's `optional` (although there's no parameter that can be used outside the pkg): https://go-review.googlesource.com/c/protobuf/+/239837
  I don't see how this feature could be not supported by future modifications in proto3 tbh (although they push ordered jsons which is outside IJSON and not proto-related).

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 20, 2021, 3:53:07 AM12/20/21

to goph...@pubsubhelper.golang.org, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Cassondra Foesch.

View Change

1 comment:

File encoding/protojson/encode.go:

- Patch Set #4, Line 92: Unordered bool

- The comments of “optional” are noting that the suggestion being made is optional, not that the featu […]
  Fair enough. I didn't realize that the order was preexistent.
  I'll revert this change and the related ones and focus on improving the performance on the ordered by index path.

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 20, 2021, 3:53:50 AM12/20/21

to goph...@pubsubhelper.golang.org, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Cassondra Foesch.

View Change

2 comments:

File internal/encoding/json/encode.go:

- Patch Set #4, Line 24: var encoderPool sync.Pool

- sry, old impl was using that on internal/json instead of the wrapper
  Ack

- Patch Set #4, Line 117: } else if encodedLen <= 1024 {

- right. It's related to perf reasons as the comment below says (Encoding. […]
  Done

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 20, 2021, 3:54:19 AM12/20/21

to goph...@pubsubhelper.golang.org, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Cassondra Foesch.

View Change

1 comment:

File encoding/protojson/encode.go:
- Patch Set #4, Line 129: if e == nil {
  sounds good
  Ack

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 20, 2021, 11:12:39 AM12/20/21

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Cassondra Foesch.

Seba Chamena uploaded patch set #5 to this change.

View Change

encoding/protojson: improve marshal performance

it introduces:
- buffered encoder instead of using []byte directly
- pool to recycle buffer
- significant improvement marshaling uint64, int64 (~10alloc/op)

- other small improvements



Change-Id: I797dd50a8878ed282c7623da8a5d4899cd468cb3
---
M encoding/protojson/bench_test.go
M encoding/protojson/encode.go
M internal/encoding/json/encode.go

3 files changed, 211 insertions(+), 74 deletions(-)

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 20, 2021, 11:13:14 AM12/20/21

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Cassondra Foesch.

Seba Chamena uploaded patch set #6 to this change.

View Change

encoding/protojson: improve marshal performance

it introduces:
- buffered encoder instead of using []byte directly
- pool to recycle buffer
- significant improvement marshaling uint64, int64 (~10alloc/op)
- other small improvements

Change-Id: I797dd50a8878ed282c7623da8a5d4899cd468cb3
---
M encoding/protojson/bench_test.go
M encoding/protojson/encode.go
M internal/encoding/json/encode.go

3 files changed, 205 insertions(+), 75 deletions(-)

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 20, 2021, 12:02:40 PM12/20/21

to goph...@pubsubhelper.golang.org, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Cassondra Foesch.

View Change

1 comment:

File encoding/protojson/encode.go:
- Patch Set #4, Line 92: Unordered bool
  Fair enough. I didn't realize that the order was preexistent. […]
  Removed.

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Cassondra Foesch (Gerrit)

unread,

Dec 20, 2021, 4:02:28 PM12/20/21

to Seba Chamena, goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Seba Chamena.

View Change

1 comment:

File internal/encoding/json/encode.go:
- Patch Set #4, Line 70: func NewEncoder() *Encoder {
  hmm, it's an internal module, why pushing backwards compatibility there? […]
  Ah, good point! 🤦‍♀️ Ignore this now.

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Cassondra Foesch (Gerrit)

unread,

Dec 20, 2021, 4:02:28 PM12/20/21

to Seba Chamena, goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Seba Chamena.

View Change

1 comment:

File encoding/protojson/encode.go:

- Patch Set #4, Line 92: Unordered bool

- Ok, got it, in the end is for consistency across encoders. […]
  The comments of “optional” are noting that the suggestion being made is optional, not that the feature was or should be optional.
  In the CR linked, it was purposed that they could drop ordering indexes, and instead just use the arbitrary ordering already there. And while it was agreed that might be a good idea, no change was made for that.
  Also, note that when adding the sort, they keep the existing ordering the same as it was before adding the sort. The fields were just naturally sorted before. Maps were always already ordered by key values. (The same as `encoding/json` of note.)

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Cassondra Foesch (Gerrit)

unread,

Dec 20, 2021, 4:02:28 PM12/20/21

to Seba Chamena, goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Seba Chamena.

View Change

9 comments:

Patchset:
- Patch Set #4:
  Neat, mostly style considerations, but we really _really_ cannot break the function signature of `NewEncoder`.
File encoding/protojson/encode.go:

- Patch Set #4, Line 92: Unordered bool

- Adding this feature becomes something we cannot remove in the future without breaking backwards compatibility.
  We are generally cautious about adding new option tweaks, because we added `encoding/json` support early on because it would be so easy, right? Just add `json` tags. But then proto3 defined a canonical form, and this old “feature” became an obstacle, and we couldn’t remove it without breaking users depending on it.

- Patch Set #4, Line 129: if e == nil {

- Should we be using the `New` field of the `encoderPool` so that we don’t have to test things against `nil` here?
File internal/encoding/json/encode.go:
- Patch Set #4, Line 24: var encoderPool sync.Pool

- This does not appear to be used at all in this file.

- Patch Set #4, Line 70: func NewEncoder() *Encoder {
  This function signature changes. This would be a breaking change, and require us to release a new major version of the module.
  So, I see why, it’s so we can reuse `Encoder`s in a sync.Pool, right? I’m not sure how we could provide transparent use of a sync.Pool while keeping this signature…
  Perhaps, we could just provide the `Reset` function above, so that we empower callers to track/pool their one encoders if they’re concerned with performance?
  Though, I suppose we could keep this signature the same, but still also include the `SetIndent` above allowing us to do the transparent `sync.Pool´ usage?

- Patch Set #4, Line 100: if err = e.appendString(s); err != nil {

- The `var err error ; if _ = …` code was because we were also assigning into `e.out`. Since we are no longer assigning like that, this code can be `if err := e.appendString(s); err != nil { … }`

- Patch Set #4, Line 117: } else if encodedLen <= 1024 {

- This magic number might change in the future?
- Patch Set #4, Line 209: b := e.scratch[:0]
  We don’t use this value until line 217. Consider putting this value directly into the parameters of `strconv.AppendFloat` like you do for `WriteInt` and `WriteUint` below.

- Patch Set #4, Line 246: b := strconv.AppendInt(e.scratch[:0], n, 10)

- Consider moving all of these `strconv.Append…` calls directly into the `e.out.Write(…)` calls, then we don’t have to name a temporary variable.

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Cassondra Foesch (Gerrit)

unread,

Dec 20, 2021, 4:02:28 PM12/20/21

to Seba Chamena, goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Seba Chamena.

View Change

1 comment:

File encoding/protojson/encode.go:

- Patch Set #4, Line 92: Unordered bool

- got it, but 2 questions then: […]
  With all the people asking us why we have non-deterministic spacing, I’m sure if we didn’t provide some sort of deterministic ordering, people would complain about that as well.
  Since the module itself has golden file tests (because we’re the ones generating the output, this is fine), we can turn off that non-deterministic whitespace. So, we would have to provide a deterministic ordering for our own golden tests anyways.
  The built in `encoding/json` also does field ordering, which is why I think it got brought in.

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Cassondra Foesch (Gerrit)

unread,

Dec 20, 2021, 6:22:55 PM12/20/21

to Seba Chamena, goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Seba Chamena.

Patch set 6:Code-Review +1

View Change

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Joe Tsai (Gerrit)

unread,

Dec 20, 2021, 6:28:19 PM12/20/21

to Seba Chamena, goph...@pubsubhelper.golang.org, Joe Tsai, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Seba Chamena.

View Change

4 comments:

Patchset:
- Patch Set #6:
  Us

File encoding/protojson/encode.go:

Patch Set #6, Line 112:


var encoderPool = sync.Pool{New: func() interface{} { return json.NewEncoder() }}

func getCachedEncoder() *json.Encoder {
	e := encoderPool.Get().(*json.Encoder)
	e.Reset()
	return e
}

func newEncoder(indent string) (*json.Encoder, error) {
	e := getCachedEncoder()
	err := e.SetIndent(indent)

- ```
	if err != nil {
		return nil, err
	}
```

- ```
	return e, nil
}
```
  Pooling objects with variable length buffers is potentially dangerous.
  See https://github.com/golang/go/issues/23199
  Pooling variable length buffers is a hard problem. See https://github.com/golang/go/issues/27735#issuecomment-739169121 for one possible solution.
- Patch Set #6, Line 291:
```
	case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
		e.WriteInt64(val.Int())

	case pref.Uint64Kind, pref.Fixed64Kind:
		e.WriteUint64(val.Uint())
```
  This is incorrect. The comment explicitly says that 64-bit integers should be written out as a string.
  At the very least we should be encoding a double quote before and after this.
File internal/encoding/json/encode.go:
- Patch Set #6, Line 39: out bytes.Buffer
  Using a bytes.Buffer seems more complicated than necessary. Why would this be faster than the built-in append function?

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Cassondra Foesch (Gerrit)

unread,

Dec 20, 2021, 6:32:08 PM12/20/21

to Seba Chamena, goph...@pubsubhelper.golang.org, Joe Tsai, golang-co...@googlegroups.com

Attention is currently required from: Seba Chamena, Joe Tsai.

View Change

2 comments:

Patchset:
- Patch Set #6:
  The rest of the other comments are also valid, I was just unsure about how to approach them.
File encoding/protojson/encode.go:
- Patch Set #6, Line 291:


	case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
		e.WriteInt64(val.Int())

	case pref.Uint64Kind, pref.Fixed64Kind:
		e.WriteUint64(val.Uint())

This is incorrect. […]

- The CR adds `WriteInt64` and `WriteUint64` receiver methods to `Encoder` that write these values as strings with double quotes around them.

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Joe Tsai (Gerrit)

unread,

Dec 20, 2021, 6:33:25 PM12/20/21

to Seba Chamena, goph...@pubsubhelper.golang.org, Joe Tsai, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Seba Chamena, Cassondra Foesch.

View Change

1 comment:

File encoding/protojson/encode.go:

Patch Set #6, Line 291:


	case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
		e.WriteInt64(val.Int())

	case pref.Uint64Kind, pref.Fixed64Kind:
		e.WriteUint64(val.Uint())

- The CR adds `WriteInt64` and `WriteUint64` receiver methods to `Encoder` that write these values as […]
  Ah, I didn't catch that. In which case, we should name these as WriteQuotedUint64.

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Cassondra Foesch (Gerrit)

unread,

Dec 20, 2021, 6:34:32 PM12/20/21

to Seba Chamena, goph...@pubsubhelper.golang.org, Joe Tsai, golang-co...@googlegroups.com

Attention is currently required from: Seba Chamena, Joe Tsai.

View Change

1 comment:

File encoding/protojson/encode.go:

Patch Set #6, Line 291:


	case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
		e.WriteInt64(val.Int())

	case pref.Uint64Kind, pref.Fixed64Kind:
		e.WriteUint64(val.Uint())

- Ah, I didn't catch that. In which case, we should name these as WriteQuotedUint64.
  I would second that. Make it explicitly clear that they’re quoted.

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Damien Neil (Gerrit)

unread,

Dec 21, 2021, 2:24:40 AM12/21/21

to Seba Chamena, goph...@pubsubhelper.golang.org, Joe Tsai, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Seba Chamena, Joe Tsai.

View Change

1 comment:

File encoding/protojson/encode.go:
- Patch Set #6, Line 160: return enc.Bytes(), proto.CheckInitialized(m)
  This is returning a []byte that's been returned to the pool. Those bytes can be overwritten by a subsequent encode operation. I don't see how you can pool the encode buffers. Marshal must return a newly-allocated []byte.
  An alternative approach would be to add a MarshalAppend (as in the wire marshaler) that permits the caller to control the encode buffer.

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 21, 2021, 5:51:29 AM12/21/21

to goph...@pubsubhelper.golang.org, Damien Neil, Joe Tsai, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Damien Neil, Cassondra Foesch, Joe Tsai.

View Change

4 comments:

File encoding/protojson/encode.go:
- Patch Set #6, Line 112:


var encoderPool = sync.Pool{New: func() interface{} { return json.NewEncoder() }}

func getCachedEncoder() *json.Encoder {
	e := encoderPool.Get().(*json.Encoder)
	e.Reset()
	return e
}

func newEncoder(indent string) (*json.Encoder, error) {
	e := getCachedEncoder()
	err := e.SetIndent(indent)
	if err != nil {
		return nil, err
	}
	return e, nil
}

- Pooling objects with variable length buffers is potentially dangerous. […]
  Interesting, I always thought about that possibility as well. However, here I underestimated it since here the only ones affected could be: grpc public apis that have highly variance between message sizes and are using protojson.
  I'll address it in the next patch.

- Patch Set #6, Line 160: return enc.Bytes(), proto.CheckInitialized(m)

- This is returning a []byte that's been returned to the pool. […]
  Nice catch. I'll add a MarshalAppend.
  Btw: why there are not impls of protojson.NewEncoder/NewDecoder?

Patch Set #6, Line 291:


	case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
		e.WriteInt64(val.Int())

	case pref.Uint64Kind, pref.Fixed64Kind:
		e.WriteUint64(val.Uint())

- I would second that. Make it explicitly clear that they’re quoted.
  +1. I'll add it
File internal/encoding/json/encode.go:

- Patch Set #6, Line 39: out bytes.Buffer

- Using a bytes.Buffer seems more complicated than necessary. […]
  Indeed, it's not faster by its own, but it allows other optimizations.

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 21, 2021, 6:16:40 AM12/21/21

to goph...@pubsubhelper.golang.org, Damien Neil, Joe Tsai, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Damien Neil, Cassondra Foesch, Joe Tsai.

View Change

1 comment:

File internal/encoding/json/encode.go:

- Patch Set #6, Line 39: out bytes.Buffer

- Indeed, it's not faster by its own, but it allows other optimizations.
  okay, the previous comment doesnt sound very solid.
  We use lot of WriteString and Write (almost all the fields types, except a few). WriteByte is also used (quoting, colons, escaping) which performs better with append, but the common usecase will been benefit from faster writes of []byte and string
  Benchmark: https://go.dev/play/p/S2hhTYm5X1w

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 21, 2021, 6:24:16 AM12/21/21

to goph...@pubsubhelper.golang.org, Damien Neil, Joe Tsai, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Damien Neil, Cassondra Foesch, Joe Tsai.

View Change

1 comment:

File internal/encoding/json/encode.go:

- Patch Set #6, Line 39: out bytes.Buffer

- okay, the previous comment doesnt sound very solid. […]
  finally, with 'it allows other optimizations' i mean accumulating bytes in a scratch []byte and then bulk inserting them instead of doing append byte by byte.
  Benchmark: https://go.dev/play/p/j2X_b7UmFMa
  Results:
  BenchmarkBytesAppendByteByByte-8 24911230 57.81 ns/op 64 B/op 0 allocs/op
  BenchmarkBytesAppendByteByByte-8 77653567 72.30 ns/op 63 B/op 0 allocs/op
  BenchmarkBytesAppendByteByByte-8 76234978 53.19 ns/op 64 B/op 0 allocs/op
  BenchmarkBytesAppendByteByByte-8 70499592 36.55 ns/op 55 B/op 0 allocs/op
  BenchmarkBytesAppendByteByByte-8 78313242 19.03 ns/op 63 B/op 0 allocs/op
  BenchmarkBufferWriteByteByByte-8 76573137 14.90 ns/op 32 B/op 0 allocs/op
  BenchmarkBufferWriteByteByByte-8 79204219 14.89 ns/op 31 B/op 0 allocs/op
  BenchmarkBufferWriteByteByByte-8 79709061 14.51 ns/op 31 B/op 0 allocs/op
  BenchmarkBufferWriteByteByByte-8 82899646 13.82 ns/op 30 B/op 0 allocs/op
  BenchmarkBufferWriteByteByByte-8 80310759 14.91 ns/op 31 B/op 0 allocs/op

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Cassondra Foesch (Gerrit)

unread,

Dec 21, 2021, 9:51:46 AM12/21/21

to Seba Chamena, goph...@pubsubhelper.golang.org, Damien Neil, Joe Tsai, golang-co...@googlegroups.com

Attention is currently required from: Damien Neil, Seba Chamena, Joe Tsai.

View Change

1 comment:

File encoding/protojson/encode.go:
- Patch Set #6, Line 160: return enc.Bytes(), proto.CheckInitialized(m)
  Nice catch. I'll add a MarshalAppend. […]
  We didn’t implement these because most of the uses of `encoding/json.NewEncoder` and `encoding/json.NewDecoder` are flawed. For instance, a microservice will use a `NewDecoder` on a `http.Request.Body` and then parse out one object, without realizing that the body could contain any arbitrary data after that initial request object. (I have seen this happen specifically in a passing test that was accidentally including garbage at the end from a malformed `Sprintf` argument disagreement, and thus the test was erroneously passing.)
  Likewise, using a `NewEncoder` on the `http.ResponseWriter` does not actually provide any performance benefits over `json.Marshal` and a `Write`, and worse if a JSON error occurs, it is likely that it is too late to respond with an HTTP status code error. Which can lead to weird things like spurious empty responses. (I have also seen this happen IRL.)
  Being that we were unsure how to write the right streaming API, and that the `encoding/json` one has been clearly misinterpreted by a lot of people, we kicked the can down the road.

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Seba Chamena (Gerrit)

unread,

Dec 21, 2021, 10:19:31 AM12/21/21

to goph...@pubsubhelper.golang.org, Damien Neil, Joe Tsai, Cassondra Foesch, golang-co...@googlegroups.com

Attention is currently required from: Damien Neil, Cassondra Foesch, Joe Tsai.

View Change

1 comment:

File encoding/protojson/encode.go:

- Patch Set #6, Line 160: return enc.Bytes(), proto.CheckInitialized(m)

- We didn’t implement these because most of the uses of `encoding/json.NewEncoder` and `encoding/json. […]
  Clear now, I thought it wasnt just 'we didnt have time/want to', so thanks for the explanation

To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.

Reply all

Reply to author

Forward