Seba Chamena has uploaded this change for review.
encoding/protojson: improve marshal performance
it introduces:
- buffered encoder instead of using []byte directly
- pool to recycle buffer
- significant improvement marshaling uint64, int64 (~10alloc/op)
- `MarshalOpts.Unordered` to ignore fields order
- other small improvements
Change-Id: I797dd50a8878ed282c7623da8a5d4899cd468cb3
---
M encoding/protojson/bench_test.go
M encoding/protojson/encode.go
M internal/encoding/json/encode.go
3 files changed, 238 insertions(+), 75 deletions(-)
diff --git a/encoding/protojson/bench_test.go b/encoding/protojson/bench_test.go
index 8d5a0bb..fbeedad 100644
--- a/encoding/protojson/bench_test.go
+++ b/encoding/protojson/bench_test.go
@@ -5,11 +5,13 @@
package protojson_test
import (
+ "google.golang.org/protobuf/types/known/durationpb"
+ "math"
"testing"
- "google.golang.org/protobuf/encoding/protojson"
+ pb3 "google.golang.org/protobuf/internal/testprotos/textpb3"
- "google.golang.org/protobuf/types/known/durationpb"
+ "google.golang.org/protobuf/encoding/protojson"
)
func BenchmarkUnmarshal_Duration(b *testing.B) {
@@ -22,3 +24,39 @@
}
}
}
+
+var scalarSample = pb3.Scalars{
+ SBool: true,
+ SInt32: math.MaxInt32,
+ SInt64: math.MaxInt64,
+ SUint64: math.MaxUint64,
+ SUint32: 2130321213,
+ SSint32: 23132131,
+ SSint64: 3213232123812,
+ SFixed32: 4232943192,
+ SFixed64: 2389182192312,
+ SSfixed32: 281432823,
+ SSfixed64: 23919321921,
+ SFloat: 3213213.2931,
+ SDouble: 39121.321321,
+ SBytes: []byte("foo_sbytes"),
+ SString: "foo_sstring",
+}
+
+func BenchmarkMarshal_ScalarsUnordered(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ _, err := protojson.MarshalOptions{Unordered: true}.Marshal(&scalarSample)
+ if err != nil {
+ b.Fatal(err)
+ }
+ }
+}
+
+func BenchmarkMarshal_Scalars(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ _, err := protojson.MarshalOptions{Unordered: true}.Marshal(&scalarSample)
+ if err != nil {
+ b.Fatal(err)
+ }
+ }
+}
diff --git a/encoding/protojson/encode.go b/encoding/protojson/encode.go
index ba971f0..b957656 100644
--- a/encoding/protojson/encode.go
+++ b/encoding/protojson/encode.go
@@ -5,8 +5,8 @@
package protojson
import (
- "encoding/base64"
"fmt"
+ "sync"
"google.golang.org/protobuf/internal/encoding/json"
"google.golang.org/protobuf/internal/encoding/messageset"
@@ -88,6 +88,8 @@
protoregistry.ExtensionTypeResolver
protoregistry.MessageTypeResolver
}
+
+ Unordered bool
}
// Format formats the message as a string.
@@ -110,6 +112,30 @@
return o.marshal(m)
}
+var encoderPool sync.Pool
+
+func getCachedEncoder() *json.Encoder {
+ v := encoderPool.Get()
+ if v == nil {
+ return nil
+ }
+ e := v.(*json.Encoder)
+ e.Reset()
+ return e
+}
+
+func newEncoder(indent string) (*json.Encoder, error) {
+ e := getCachedEncoder()
+ if e == nil {
+ e = json.NewEncoder()
+ }
+ err := e.SetIndent(indent)
+ if err != nil {
+ return nil, err
+ }
+ return e, nil
+}
+
// marshal is a centralized function that all marshal operations go through.
// For profiling purposes, avoid changing the name of this function or
// introducing other code paths for marshal that do not go through this.
@@ -121,7 +147,7 @@
o.Resolver = protoregistry.GlobalTypes
}
- internalEnc, err := json.NewEncoder(o.Indent)
+ internalEnc, err := newEncoder(o.Indent)
if err != nil {
return nil, err
}
@@ -139,6 +165,7 @@
if o.AllowPartial {
return enc.Bytes(), nil
}
+ encoderPool.Put(internalEnc)
return enc.Bytes(), proto.CheckInitialized(m)
}
@@ -196,6 +223,13 @@
m.Message.Range(f)
}
+func (e encoder) order() order.FieldOrder {
+ if e.opts.Unordered {
+ return nil
+ }
+ return order.IndexNameFieldOrder
+}
+
// marshalMessage marshals the fields in the given protoreflect.Message.
// If the typeURL is non-empty, then a synthetic "@type" field is injected
// containing the URL as the value.
@@ -207,7 +241,6 @@
if marshal := wellKnownTypeMarshaler(m.Descriptor().FullName()); marshal != nil {
return marshal(e, m)
}
-
e.StartObject()
defer e.EndObject()
@@ -220,7 +253,7 @@
}
var err error
- order.RangeFields(fields, order.IndexNameFieldOrder, func(fd pref.FieldDescriptor, v pref.Value) bool {
+ order.RangeFields(fields, e.order(), func(fd pref.FieldDescriptor, v pref.Value) bool {
name := fd.JSONName()
if e.opts.UseProtoNames {
name = fd.TextName()
@@ -272,22 +305,22 @@
case pref.Uint32Kind, pref.Fixed32Kind:
e.WriteUint(val.Uint())
- case pref.Int64Kind, pref.Sint64Kind, pref.Uint64Kind,
- pref.Sfixed64Kind, pref.Fixed64Kind:
- // 64-bit integers are written out as JSON string.
- e.WriteString(val.String())
+ case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
+ e.WriteInt64(val.Int())
+
+ case pref.Uint64Kind, pref.Fixed64Kind:
+ e.WriteUint64(val.Uint())
case pref.FloatKind:
- // Encoder.WriteFloat handles the special numbers NaN and infinites.
+ // Encoder.WriteFloat handles the special numbers NaN and infinities.
e.WriteFloat(val.Float(), 32)
case pref.DoubleKind:
- // Encoder.WriteFloat handles the special numbers NaN and infinites.
+ // Encoder.WriteFloat handles the special numbers NaN and infinities.
e.WriteFloat(val.Float(), 64)
case pref.BytesKind:
- e.WriteString(base64.StdEncoding.EncodeToString(val.Bytes()))
-
+ e.WriteByteSlice(val.Bytes())
case pref.EnumKind:
if fd.Enum().FullName() == genid.NullValue_enum_fullname {
e.WriteNull()
diff --git a/internal/encoding/json/encode.go b/internal/encoding/json/encode.go
index fbdf348..8276ec5 100644
--- a/internal/encoding/json/encode.go
+++ b/internal/encoding/json/encode.go
@@ -5,10 +5,13 @@
package json
import (
+ "bytes"
+ "encoding/base64"
"math"
"math/bits"
"strconv"
"strings"
+ "sync"
"unicode/utf8"
"google.golang.org/protobuf/internal/detrand"
@@ -18,6 +21,8 @@
// kind represents an encoding type.
type kind uint8
+var encoderPool sync.Pool
+
const (
_ kind = (1 << iota) / 2
name
@@ -34,42 +39,56 @@
indent string
lastKind kind
indents []byte
- out []byte
+ out bytes.Buffer
+ scratch [64]byte
+
+ ptrLevel uint
+ ptrSeen map[interface{}]struct{}
+}
+
+func (e *Encoder) Reset() {
+ e.out.Reset()
+ e.lastKind = 0
+}
+
+func (e *Encoder) SetIndent(indent string) error {
+ if len(indent) == 0 {
+ e.indent = ""
+ return nil
+ }
+ if strings.Trim(indent, " \t") != "" {
+ return errors.New("indent may only be composed of space or tab characters")
+ }
+ e.indent = indent
+ return nil
}
// NewEncoder returns an Encoder.
//
// If indent is a non-empty string, it causes every entry for an Array or Object
// to be preceded by the indent and trailed by a newline.
-func NewEncoder(indent string) (*Encoder, error) {
- e := &Encoder{}
- if len(indent) > 0 {
- if strings.Trim(indent, " \t") != "" {
- return nil, errors.New("indent may only be composed of space or tab characters")
- }
- e.indent = indent
- }
- return e, nil
+func NewEncoder() *Encoder {
+ return &Encoder{}
}
// Bytes returns the content of the written bytes.
func (e *Encoder) Bytes() []byte {
- return e.out
+ return e.out.Bytes()
}
// WriteNull writes out the null value.
func (e *Encoder) WriteNull() {
e.prepareNext(scalar)
- e.out = append(e.out, "null"...)
+ e.out.WriteString("null")
}
// WriteBool writes out the given boolean value.
func (e *Encoder) WriteBool(b bool) {
e.prepareNext(scalar)
if b {
- e.out = append(e.out, "true"...)
+ e.out.WriteString("true")
} else {
- e.out = append(e.out, "false"...)
+ e.out.WriteString("false")
}
}
@@ -78,51 +97,81 @@
func (e *Encoder) WriteString(s string) error {
e.prepareNext(scalar)
var err error
- if e.out, err = appendString(e.out, s); err != nil {
+ if err = e.appendString(s); err != nil {
return err
}
return nil
}
+func (e *Encoder) WriteByteSlice(s []byte) {
+ e.prepareNext(scalar)
+
+ e.out.WriteByte('"')
+ encodedLen := base64.StdEncoding.EncodedLen(len(s))
+ if encodedLen <= len(e.scratch) {
+ // If the encoded bytes fit in e.scratch, avoid an extra
+ // allocation and use the cheaper Encoding.Encode.
+ dst := e.scratch[:encodedLen]
+ base64.StdEncoding.Encode(dst, s)
+ e.out.Write(dst)
+ } else if encodedLen <= 1024 {
+ // The encoded bytes are short enough to allocate for, and
+ // Encoding.Encode is still cheaper.
+ dst := make([]byte, encodedLen)
+ base64.StdEncoding.Encode(dst, s)
+ e.out.Write(dst)
+ } else {
+ // The encoded bytes are too long to cheaply allocate, and
+ // Encoding.Encode is no longer noticeably cheaper.
+ enc := base64.NewEncoder(base64.StdEncoding, &e.out)
+ enc.Write(s)
+ enc.Close()
+ }
+ e.out.WriteByte('"')
+}
+
// Sentinel error used for indicating invalid UTF-8.
var errInvalidUTF8 = errors.New("invalid UTF-8")
-func appendString(out []byte, in string) ([]byte, error) {
- out = append(out, '"')
+func (e *Encoder) appendString(in string) error {
+ e.out.WriteByte('"')
i := indexNeedEscapeInString(in)
- in, out = in[i:], append(out, in[:i]...)
+ e.out.WriteString(in[:i])
+ in = in[i:]
for len(in) > 0 {
switch r, n := utf8.DecodeRuneInString(in); {
case r == utf8.RuneError && n == 1:
- return out, errInvalidUTF8
+ return errInvalidUTF8
case r < ' ' || r == '"' || r == '\\':
- out = append(out, '\\')
+ e.out.WriteByte('\\')
switch r {
case '"', '\\':
- out = append(out, byte(r))
+ e.out.WriteRune(r)
case '\b':
- out = append(out, 'b')
+ e.out.WriteByte('b')
case '\f':
- out = append(out, 'f')
+ e.out.WriteByte('f')
case '\n':
- out = append(out, 'n')
+ e.out.WriteByte('n')
case '\r':
- out = append(out, 'r')
+ e.out.WriteByte('r')
case '\t':
- out = append(out, 't')
+ e.out.WriteByte('t')
default:
- out = append(out, 'u')
- out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
- out = strconv.AppendUint(out, uint64(r), 16)
+ e.out.WriteByte('u')
+ e.out.WriteString("0000"[1+(bits.Len32(uint32(r))-1)/4:])
+ b := strconv.AppendUint(e.scratch[:0], uint64(r), 16)
+ e.out.Write(b)
}
in = in[n:]
default:
i := indexNeedEscapeInString(in[n:])
- in, out = in[n+i:], append(out, in[:n+i]...)
+ e.out.WriteString(in[:n+i])
+ in = in[n+i:]
}
}
- out = append(out, '"')
- return out, nil
+ e.out.WriteByte('"')
+ return nil
}
// indexNeedEscapeInString returns the index of the character that needs
@@ -139,22 +188,25 @@
// WriteFloat writes out the given float and bitSize in JSON number value.
func (e *Encoder) WriteFloat(n float64, bitSize int) {
e.prepareNext(scalar)
- e.out = appendFloat(e.out, n, bitSize)
+ e.appendFloat(n, bitSize)
}
-// appendFloat formats given float in bitSize, and appends to the given []byte.
-func appendFloat(out []byte, n float64, bitSize int) []byte {
+func (e *Encoder) appendFloat(n float64, bitSize int) {
switch {
case math.IsNaN(n):
- return append(out, `"NaN"`...)
+ e.out.WriteString(`"NaN"`)
+ return
case math.IsInf(n, +1):
- return append(out, `"Infinity"`...)
+ e.out.WriteString(`"Infinity"`)
+ return
case math.IsInf(n, -1):
- return append(out, `"-Infinity"`...)
+ e.out.WriteString(`"-Infinity"`)
+ return
}
// JSON number formatting logic based on encoding/json.
// See floatEncoder.encode for reference.
+ b := e.scratch[:0]
fmt := byte('f')
if abs := math.Abs(n); abs != 0 {
if bitSize == 64 && (abs < 1e-6 || abs >= 1e21) ||
@@ -162,39 +214,62 @@
fmt = 'e'
}
}
- out = strconv.AppendFloat(out, n, fmt, -1, bitSize)
+ b = strconv.AppendFloat(b, n, fmt, -1, bitSize)
if fmt == 'e' {
- n := len(out)
- if n >= 4 && out[n-4] == 'e' && out[n-3] == '-' && out[n-2] == '0' {
- out[n-2] = out[n-1]
- out = out[:n-1]
+ n := len(b)
+ if n >= 4 && b[n-4] == 'e' && b[n-3] == '-' && b[n-2] == '0' {
+ b[n-2] = b[n-1]
+ b = b[:n-1]
}
}
- return out
+
+ e.out.Write(b)
}
// WriteInt writes out the given signed integer in JSON number value.
func (e *Encoder) WriteInt(n int64) {
e.prepareNext(scalar)
- e.out = append(e.out, strconv.FormatInt(n, 10)...)
+ b := strconv.AppendInt(e.scratch[:0], n, 10)
+ e.out.Write(b)
}
// WriteUint writes out the given unsigned integer in JSON number value.
func (e *Encoder) WriteUint(n uint64) {
e.prepareNext(scalar)
- e.out = append(e.out, strconv.FormatUint(n, 10)...)
+ b := strconv.AppendUint(e.scratch[:0], n, 10)
+ e.out.Write(b)
+}
+
+// WriteInt64 writes the given int64 as string (as it's specified by the I-JSON spec)
+func (e *Encoder) WriteInt64(n int64) error {
+ e.prepareNext(scalar)
+ b := strconv.AppendInt(e.scratch[:0], n, 10)
+ e.out.WriteByte('"')
+ e.out.Write(b)
+ e.out.WriteByte('"')
+ return nil
+}
+
+// WriteUint64 writes the given uint64 as string (as it's specified by the I-JSON spec)
+func (e *Encoder) WriteUint64(n uint64) error {
+ e.prepareNext(scalar)
+ b := strconv.AppendUint(e.scratch[:0], n, 10)
+ e.out.WriteByte('"')
+ e.out.Write(b)
+ e.out.WriteByte('"')
+ return nil
}
// StartObject writes out the '{' symbol.
func (e *Encoder) StartObject() {
e.prepareNext(objectOpen)
- e.out = append(e.out, '{')
+ e.out.WriteByte('{')
}
// EndObject writes out the '}' symbol.
func (e *Encoder) EndObject() {
e.prepareNext(objectClose)
- e.out = append(e.out, '}')
+ e.out.WriteByte('}')
}
// WriteName writes out the given string in JSON string value and the name
@@ -204,21 +279,21 @@
e.prepareNext(name)
var err error
// Append to output regardless of error.
- e.out, err = appendString(e.out, s)
- e.out = append(e.out, ':')
+ err = e.appendString(s)
+ e.out.WriteByte(':')
return err
}
// StartArray writes out the '[' symbol.
func (e *Encoder) StartArray() {
e.prepareNext(arrayOpen)
- e.out = append(e.out, '[')
+ e.out.WriteByte('[')
}
// EndArray writes out the ']' symbol.
func (e *Encoder) EndArray() {
e.prepareNext(arrayClose)
- e.out = append(e.out, ']')
+ e.out.WriteByte(']')
}
// prepareNext adds possible comma and indentation for the next value based
@@ -233,11 +308,11 @@
// Need to add comma on the following condition.
if e.lastKind&(scalar|objectClose|arrayClose) != 0 &&
next&(name|scalar|objectOpen|arrayOpen) != 0 {
- e.out = append(e.out, ',')
+ e.out.WriteByte(',')
// For single-line output, add a random extra space after each
// comma to make output unstable.
if detrand.Bool() {
- e.out = append(e.out, ' ')
+ e.out.WriteByte(' ')
}
}
return
@@ -248,29 +323,30 @@
// If next type is NOT closing, add indent and newline.
if next&(objectClose|arrayClose) == 0 {
e.indents = append(e.indents, e.indent...)
- e.out = append(e.out, '\n')
- e.out = append(e.out, e.indents...)
+ e.out.WriteByte('\n')
+ e.out.Write(e.indents)
}
case e.lastKind&(scalar|objectClose|arrayClose) != 0:
switch {
// If next type is either a value or name, add comma and newline.
case next&(name|scalar|objectOpen|arrayOpen) != 0:
- e.out = append(e.out, ',', '\n')
+ e.out.WriteByte(',')
+ e.out.WriteByte('\n')
// If next type is a closing object or array, adjust indentation.
case next&(objectClose|arrayClose) != 0:
e.indents = e.indents[:len(e.indents)-len(e.indent)]
- e.out = append(e.out, '\n')
+ e.out.WriteByte('\n')
}
- e.out = append(e.out, e.indents...)
+ e.out.Write(e.indents)
case e.lastKind&name != 0:
- e.out = append(e.out, ' ')
+ e.out.WriteByte(' ')
// For multi-line output, add a random extra space after key: to make
// output unstable.
if detrand.Bool() {
- e.out = append(e.out, ' ')
+ e.out.WriteByte(' ')
}
}
}
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Seba Chamena uploaded patch set #2 to this change.
encoding/protojson: improve marshal performance
it introduces:
- buffered encoder instead of using []byte directly
- pool to recycle buffer
- significant improvement marshaling uint64, int64 (~10alloc/op)
- `MarshalOpts.Unordered` to ignore fields order
- other small improvements
github issue: https://github.com/golang/protobuf/issues/1285
Change-Id: I797dd50a8878ed282c7623da8a5d4899cd468cb3
---
M encoding/protojson/bench_test.go
M encoding/protojson/encode.go
M internal/encoding/json/encode.go
3 files changed, 240 insertions(+), 75 deletions(-)
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Seba Chamena uploaded patch set #3 to this change.
encoding/protojson: improve marshal performance
it introduces:
- buffered encoder instead of using []byte directly
- pool to recycle buffer
- significant improvement marshaling uint64, int64 (~10alloc/op)
- `MarshalOpts.Unordered` to ignore fields order
- other small improvements
Change-Id: I797dd50a8878ed282c7623da8a5d4899cd468cb3
---
M encoding/protojson/bench_test.go
M encoding/protojson/encode.go
M internal/encoding/json/encode.go
3 files changed, 238 insertions(+), 75 deletions(-)
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Seba Chamena uploaded patch set #4 to this change.
encoding/protojson: improve marshal performance
it introduces:
- buffered encoder instead of using []byte directly
- pool to recycle buffer
- significant improvement marshaling uint64, int64 (~10alloc/op)
- `MarshalOpts.Unordered` to ignore fields order
- other small improvements
benchmarks: https://github.com/golang/protobuf/issues/1285#issuecomment-997442098
Change-Id: I797dd50a8878ed282c7623da8a5d4899cd468cb3
---
M encoding/protojson/bench_test.go
M encoding/protojson/encode.go
M internal/encoding/json/encode.go
3 files changed, 241 insertions(+), 75 deletions(-)
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Cassondra Foesch.
6 comments:
File internal/encoding/json/encode.go:
Patch Set #4, Line 24: var encoderPool sync.Pool
This does not appear to be used at all in this file.
sry, old impl was using that on internal/json instead of the wrapper
Patch Set #4, Line 70: func NewEncoder() *Encoder {
This function signature changes. […]
hmm, it's an internal module, why pushing backwards compatibility there?
In case it's desired I think it can be achieved but just requires dirtier code (will add it in case you want to keep bw compat anyways).
Patch Set #4, Line 100: if err = e.appendString(s); err != nil {
The `var err error ; if _ = …` code was because we were also assigning into `e.out`. […]
Ack
Patch Set #4, Line 117: } else if encodedLen <= 1024 {
This magic number might change in the future?
right. It's related to perf reasons as the comment below says (Encoding.Encode is cheaper)
Patch Set #4, Line 209: b := e.scratch[:0]
We don’t use this value until line 217. […]
Ack
Patch Set #4, Line 246: b := strconv.AppendInt(e.scratch[:0], n, 10)
Consider moving all of these `strconv.Append…` calls directly into the `e.out. […]
Ack
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Seba Chamena.
1 comment:
File encoding/protojson/encode.go:
Patch Set #4, Line 92: Unordered bool
Adding this feature becomes something we cannot remove in the future without breaking backwards comp […]
got it, but 2 questions then:
1. How do you think is the way to use `unordered` then? When fields order were introduced (see the associated CR), the features were pushing it as something optional bc of performance reasons.
2. Why fields ordering? Dont really know why its here. I understand it should be kept just for compatibility, but dont know why were introduced in the first place since it's not part of the IJSON std with proto3 afaik.
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Cassondra Foesch.
1 comment:
File encoding/protojson/encode.go:
Patch Set #4, Line 129: if e == nil {
Should we be using the `New` field of the `encoderPool` so that we don’t have to test things against […]
sounds good
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
File encoding/protojson/encode.go:
Patch Set #4, Line 92: Unordered bool
With all the people asking us why we have non-deterministic spacing, I’m sure if we didn’t provide s […]
Ok, got it, in the end is for consistency across encoders.
The problem is that the change is quite heavy for performance (5 alloc and increases with bigger sizes).
In the CR in which those were introduced, this was mentioned and measured, that's why it's `optional` (although there's no parameter that can be used outside the pkg): https://go-review.googlesource.com/c/protobuf/+/239837
I don't see how this feature could be not supported by future modifications in proto3 tbh (although they push ordered jsons which is outside IJSON and not proto-related).
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
File encoding/protojson/encode.go:
Patch Set #4, Line 92: Unordered bool
The comments of “optional” are noting that the suggestion being made is optional, not that the featu […]
Fair enough. I didn't realize that the order was preexistent.
I'll revert this change and the related ones and focus on improving the performance on the ordered by index path.
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Cassondra Foesch.
2 comments:
File internal/encoding/json/encode.go:
Patch Set #4, Line 24: var encoderPool sync.Pool
sry, old impl was using that on internal/json instead of the wrapper
Ack
Patch Set #4, Line 117: } else if encodedLen <= 1024 {
right. It's related to perf reasons as the comment below says (Encoding. […]
Done
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Cassondra Foesch.
1 comment:
File encoding/protojson/encode.go:
Patch Set #4, Line 129: if e == nil {
sounds good
Ack
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Cassondra Foesch.
Seba Chamena uploaded patch set #5 to this change.
encoding/protojson: improve marshal performance
it introduces:
- buffered encoder instead of using []byte directly
- pool to recycle buffer
- significant improvement marshaling uint64, int64 (~10alloc/op)
- other small improvements
Change-Id: I797dd50a8878ed282c7623da8a5d4899cd468cb3
---
M encoding/protojson/bench_test.go
M encoding/protojson/encode.go
M internal/encoding/json/encode.go
3 files changed, 211 insertions(+), 74 deletions(-)
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Cassondra Foesch.
Seba Chamena uploaded patch set #6 to this change.
encoding/protojson: improve marshal performance
it introduces:
- buffered encoder instead of using []byte directly
- pool to recycle buffer
- significant improvement marshaling uint64, int64 (~10alloc/op)
- other small improvements
Change-Id: I797dd50a8878ed282c7623da8a5d4899cd468cb3
---
M encoding/protojson/bench_test.go
M encoding/protojson/encode.go
M internal/encoding/json/encode.go
3 files changed, 205 insertions(+), 75 deletions(-)
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Cassondra Foesch.
1 comment:
File encoding/protojson/encode.go:
Patch Set #4, Line 92: Unordered bool
Fair enough. I didn't realize that the order was preexistent. […]
Removed.
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Seba Chamena.
1 comment:
File internal/encoding/json/encode.go:
Patch Set #4, Line 70: func NewEncoder() *Encoder {
hmm, it's an internal module, why pushing backwards compatibility there? […]
Ah, good point! 🤦♀️ Ignore this now.
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
File encoding/protojson/encode.go:
Patch Set #4, Line 92: Unordered bool
Ok, got it, in the end is for consistency across encoders. […]
The comments of “optional” are noting that the suggestion being made is optional, not that the feature was or should be optional.
In the CR linked, it was purposed that they could drop ordering indexes, and instead just use the arbitrary ordering already there. And while it was agreed that might be a good idea, no change was made for that.
Also, note that when adding the sort, they keep the existing ordering the same as it was before adding the sort. The fields were just naturally sorted before. Maps were always already ordered by key values. (The same as `encoding/json` of note.)
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Seba Chamena.
9 comments:
Patchset:
Neat, mostly style considerations, but we really _really_ cannot break the function signature of `NewEncoder`.
File encoding/protojson/encode.go:
Patch Set #4, Line 92: Unordered bool
Adding this feature becomes something we cannot remove in the future without breaking backwards compatibility.
We are generally cautious about adding new option tweaks, because we added `encoding/json` support early on because it would be so easy, right? Just add `json` tags. But then proto3 defined a canonical form, and this old “feature” became an obstacle, and we couldn’t remove it without breaking users depending on it.
Patch Set #4, Line 129: if e == nil {
Should we be using the `New` field of the `encoderPool` so that we don’t have to test things against `nil` here?
File internal/encoding/json/encode.go:
Patch Set #4, Line 24: var encoderPool sync.Pool
This does not appear to be used at all in this file.
Patch Set #4, Line 70: func NewEncoder() *Encoder {
This function signature changes. This would be a breaking change, and require us to release a new major version of the module.
So, I see why, it’s so we can reuse `Encoder`s in a sync.Pool, right? I’m not sure how we could provide transparent use of a sync.Pool while keeping this signature…
Perhaps, we could just provide the `Reset` function above, so that we empower callers to track/pool their one encoders if they’re concerned with performance?
Though, I suppose we could keep this signature the same, but still also include the `SetIndent` above allowing us to do the transparent `sync.Pool´ usage?
Patch Set #4, Line 100: if err = e.appendString(s); err != nil {
The `var err error ; if _ = …` code was because we were also assigning into `e.out`. Since we are no longer assigning like that, this code can be `if err := e.appendString(s); err != nil { … }`
Patch Set #4, Line 117: } else if encodedLen <= 1024 {
This magic number might change in the future?
Patch Set #4, Line 209: b := e.scratch[:0]
We don’t use this value until line 217. Consider putting this value directly into the parameters of `strconv.AppendFloat` like you do for `WriteInt` and `WriteUint` below.
Patch Set #4, Line 246: b := strconv.AppendInt(e.scratch[:0], n, 10)
Consider moving all of these `strconv.Append…` calls directly into the `e.out.Write(…)` calls, then we don’t have to name a temporary variable.
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Seba Chamena.
1 comment:
File encoding/protojson/encode.go:
Patch Set #4, Line 92: Unordered bool
got it, but 2 questions then: […]
With all the people asking us why we have non-deterministic spacing, I’m sure if we didn’t provide some sort of deterministic ordering, people would complain about that as well.
Since the module itself has golden file tests (because we’re the ones generating the output, this is fine), we can turn off that non-deterministic whitespace. So, we would have to provide a deterministic ordering for our own golden tests anyways.
The built in `encoding/json` also does field ordering, which is why I think it got brought in.
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Seba Chamena.
Patch set 6:Code-Review +1
Attention is currently required from: Seba Chamena.
4 comments:
Patchset:
File encoding/protojson/encode.go:
var encoderPool = sync.Pool{New: func() interface{} { return json.NewEncoder() }}
func getCachedEncoder() *json.Encoder {
e := encoderPool.Get().(*json.Encoder)
e.Reset()
return e
}
func newEncoder(indent string) (*json.Encoder, error) {
e := getCachedEncoder()
err := e.SetIndent(indent)
if err != nil {
return nil, err
}
return e, nil
}
Pooling objects with variable length buffers is potentially dangerous.
See https://github.com/golang/go/issues/23199
Pooling variable length buffers is a hard problem. See https://github.com/golang/go/issues/27735#issuecomment-739169121 for one possible solution.
case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
e.WriteInt64(val.Int())
case pref.Uint64Kind, pref.Fixed64Kind:
e.WriteUint64(val.Uint())
This is incorrect. The comment explicitly says that 64-bit integers should be written out as a string.
At the very least we should be encoding a double quote before and after this.
File internal/encoding/json/encode.go:
Patch Set #6, Line 39: out bytes.Buffer
Using a bytes.Buffer seems more complicated than necessary. Why would this be faster than the built-in append function?
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Seba Chamena, Joe Tsai.
2 comments:
Patchset:
The rest of the other comments are also valid, I was just unsure about how to approach them.
File encoding/protojson/encode.go:
case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
e.WriteInt64(val.Int())
case pref.Uint64Kind, pref.Fixed64Kind:
e.WriteUint64(val.Uint())
This is incorrect. […]
The CR adds `WriteInt64` and `WriteUint64` receiver methods to `Encoder` that write these values as strings with double quotes around them.
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Seba Chamena, Cassondra Foesch.
1 comment:
File encoding/protojson/encode.go:
case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
e.WriteInt64(val.Int())
case pref.Uint64Kind, pref.Fixed64Kind:
e.WriteUint64(val.Uint())
The CR adds `WriteInt64` and `WriteUint64` receiver methods to `Encoder` that write these values as […]
Ah, I didn't catch that. In which case, we should name these as WriteQuotedUint64.
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Seba Chamena, Joe Tsai.
1 comment:
File encoding/protojson/encode.go:
case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
e.WriteInt64(val.Int())
case pref.Uint64Kind, pref.Fixed64Kind:
e.WriteUint64(val.Uint())
Ah, I didn't catch that. In which case, we should name these as WriteQuotedUint64.
I would second that. Make it explicitly clear that they’re quoted.
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
File encoding/protojson/encode.go:
Patch Set #6, Line 160: return enc.Bytes(), proto.CheckInitialized(m)
This is returning a []byte that's been returned to the pool. Those bytes can be overwritten by a subsequent encode operation. I don't see how you can pool the encode buffers. Marshal must return a newly-allocated []byte.
An alternative approach would be to add a MarshalAppend (as in the wire marshaler) that permits the caller to control the encode buffer.
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Damien Neil, Cassondra Foesch, Joe Tsai.
4 comments:
File encoding/protojson/encode.go:
var encoderPool = sync.Pool{New: func() interface{} { return json.NewEncoder() }}
func getCachedEncoder() *json.Encoder {
e := encoderPool.Get().(*json.Encoder)
e.Reset()
return e
}
func newEncoder(indent string) (*json.Encoder, error) {
e := getCachedEncoder()
err := e.SetIndent(indent)
if err != nil {
return nil, err
}
return e, nil
}
Pooling objects with variable length buffers is potentially dangerous. […]
Interesting, I always thought about that possibility as well. However, here I underestimated it since here the only ones affected could be: grpc public apis that have highly variance between message sizes and are using protojson.
I'll address it in the next patch.
Patch Set #6, Line 160: return enc.Bytes(), proto.CheckInitialized(m)
This is returning a []byte that's been returned to the pool. […]
Nice catch. I'll add a MarshalAppend.
Btw: why there are not impls of protojson.NewEncoder/NewDecoder?
case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
e.WriteInt64(val.Int())
case pref.Uint64Kind, pref.Fixed64Kind:
e.WriteUint64(val.Uint())
I would second that. Make it explicitly clear that they’re quoted.
+1. I'll add it
File internal/encoding/json/encode.go:
Patch Set #6, Line 39: out bytes.Buffer
Using a bytes.Buffer seems more complicated than necessary. […]
Indeed, it's not faster by its own, but it allows other optimizations.
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Damien Neil, Cassondra Foesch, Joe Tsai.
1 comment:
File internal/encoding/json/encode.go:
Patch Set #6, Line 39: out bytes.Buffer
Indeed, it's not faster by its own, but it allows other optimizations.
okay, the previous comment doesnt sound very solid.
We use lot of WriteString and Write (almost all the fields types, except a few). WriteByte is also used (quoting, colons, escaping) which performs better with append, but the common usecase will been benefit from faster writes of []byte and string
Benchmark: https://go.dev/play/p/S2hhTYm5X1w
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Damien Neil, Cassondra Foesch, Joe Tsai.
1 comment:
File internal/encoding/json/encode.go:
Patch Set #6, Line 39: out bytes.Buffer
okay, the previous comment doesnt sound very solid. […]
finally, with 'it allows other optimizations' i mean accumulating bytes in a scratch []byte and then bulk inserting them instead of doing append byte by byte.
Benchmark: https://go.dev/play/p/j2X_b7UmFMa
Results:
BenchmarkBytesAppendByteByByte-8 24911230 57.81 ns/op 64 B/op 0 allocs/op
BenchmarkBytesAppendByteByByte-8 77653567 72.30 ns/op 63 B/op 0 allocs/op
BenchmarkBytesAppendByteByByte-8 76234978 53.19 ns/op 64 B/op 0 allocs/op
BenchmarkBytesAppendByteByByte-8 70499592 36.55 ns/op 55 B/op 0 allocs/op
BenchmarkBytesAppendByteByByte-8 78313242 19.03 ns/op 63 B/op 0 allocs/op
BenchmarkBufferWriteByteByByte-8 76573137 14.90 ns/op 32 B/op 0 allocs/op
BenchmarkBufferWriteByteByByte-8 79204219 14.89 ns/op 31 B/op 0 allocs/op
BenchmarkBufferWriteByteByByte-8 79709061 14.51 ns/op 31 B/op 0 allocs/op
BenchmarkBufferWriteByteByByte-8 82899646 13.82 ns/op 30 B/op 0 allocs/op
BenchmarkBufferWriteByteByByte-8 80310759 14.91 ns/op 31 B/op 0 allocs/op
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Damien Neil, Seba Chamena, Joe Tsai.
1 comment:
File encoding/protojson/encode.go:
Patch Set #6, Line 160: return enc.Bytes(), proto.CheckInitialized(m)
Nice catch. I'll add a MarshalAppend. […]
We didn’t implement these because most of the uses of `encoding/json.NewEncoder` and `encoding/json.NewDecoder` are flawed. For instance, a microservice will use a `NewDecoder` on a `http.Request.Body` and then parse out one object, without realizing that the body could contain any arbitrary data after that initial request object. (I have seen this happen specifically in a passing test that was accidentally including garbage at the end from a malformed `Sprintf` argument disagreement, and thus the test was erroneously passing.)
Likewise, using a `NewEncoder` on the `http.ResponseWriter` does not actually provide any performance benefits over `json.Marshal` and a `Write`, and worse if a JSON error occurs, it is likely that it is too late to respond with an HTTP status code error. Which can lead to weird things like spurious empty responses. (I have also seen this happen IRL.)
Being that we were unsure how to write the right streaming API, and that the `encoding/json` one has been clearly misinterpreted by a lot of people, we kicked the can down the road.
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Damien Neil, Cassondra Foesch, Joe Tsai.
1 comment:
File encoding/protojson/encode.go:
Patch Set #6, Line 160: return enc.Bytes(), proto.CheckInitialized(m)
We didn’t implement these because most of the uses of `encoding/json.NewEncoder` and `encoding/json. […]
Clear now, I thought it wasnt just 'we didnt have time/want to', so thanks for the explanation
To view, visit change 373356. To unsubscribe, or for help writing mail filters, visit settings.