proto: wrap un/marshal error with full name
This PR wraps un/marshal errors with message and field metadata.
If field could not be obtained then message full name is used
in other case the full name of the field is used as it also
contains the message name.
Example error:
```
failed to unmarshal "opaque.goproto.proto.test3.TestAllTypes.singular_bytes":
string field contains invalid UTF-8
```
Fixes:
- https://github.com/golang/protobuf/issues/1228
diff --git a/internal/impl/decode.go b/internal/impl/decode.go
index e0dd21f..077469c 100644
--- a/internal/impl/decode.go
+++ b/internal/impl/decode.go
@@ -5,6 +5,7 @@
package impl
import (
+ "fmt"
"math/bits"
"google.golang.org/protobuf/encoding/protowire"
@@ -218,7 +219,7 @@
}
if err != nil {
if err != errUnknown {
- return out, err
+ return out, wrapUnmarshalErrorForField(err, mi.Desc, f)
}
n = protowire.ConsumeFieldValue(num, wtyp, b)
if n < 0 {
@@ -245,6 +246,16 @@
return out, nil
}
+func wrapUnmarshalErrorForField(err error, desc protoreflect.MessageDescriptor, f *coderFieldInfo) error {
+ if err == nil {
+ return nil
+ }
+ if f == nil || desc.Fields() != nil && int(f.num) >= desc.Fields().Len() {
+ return fmt.Errorf("failed to unmarshal %q: %w", desc.FullName(), err)
+ }
+ return fmt.Errorf("failed to unmarshal %q: %w", desc.Fields().Get(int(f.num)).FullName(), err)
+}
+
func (mi *MessageInfo) unmarshalExtension(b []byte, num protowire.Number, wtyp protowire.Type, exts map[int32]ExtensionField, opts unmarshalOptions) (out unmarshalOutput, err error) {
x := exts[int32(num)]
xt := x.Type()
diff --git a/internal/impl/encode.go b/internal/impl/encode.go
index b2e2122..1f24895 100644
--- a/internal/impl/encode.go
+++ b/internal/impl/encode.go
@@ -5,6 +5,7 @@
package impl
import (
+ "fmt"
"math"
"sort"
"sync/atomic"
@@ -12,6 +13,7 @@
"google.golang.org/protobuf/internal/flags"
"google.golang.org/protobuf/internal/protolazy"
"google.golang.org/protobuf/proto"
+ "google.golang.org/protobuf/reflect/protoreflect"
piface "google.golang.org/protobuf/runtime/protoiface"
)
@@ -196,7 +198,7 @@
b, err = f.funcs.marshal(b, fptr, f, opts)
if err != nil {
- return b, err
+ return b, wrapMarshalErrorForField(err, mi.Desc, f.num)
}
continue
} else if f.isPointer && fptr.Elem().IsNil() {
@@ -204,7 +206,7 @@
}
b, err = f.funcs.marshal(b, fptr, f, opts)
if err != nil {
- return b, err
+ return b, wrapMarshalErrorForField(err, mi.Desc, f.num)
}
continue
}
@@ -214,7 +216,7 @@
}
b, err = f.funcs.marshal(b, fptr, f, opts)
if err != nil {
- return b, err
+ return b, wrapMarshalErrorForField(err, mi.Desc, f.num)
}
}
if mi.unknownOffset.IsValid() && !mi.isMessageSet {
@@ -225,6 +227,16 @@
return b, nil
}
+func wrapMarshalErrorForField(err error, desc protoreflect.MessageDescriptor, num protoreflect.FieldNumber) error {
+ if err == nil {
+ return nil
+ }
+ if int(num) >= desc.Fields().Len() {
+ return fmt.Errorf("failed to marshal %q: %w", desc.FullName(), err)
+ }
+ return fmt.Errorf("failed to marshal %q: %w", desc.Fields().Get(int(num)).FullName(), err)
+}
+
// fullyLazyExtensions returns true if we should attempt to keep extensions lazy over size and marshal.
func fullyLazyExtensions(opts marshalOptions) bool {
// When deterministic marshaling is requested, force an unmarshal for lazy
diff --git a/internal/impl/lazy.go b/internal/impl/lazy.go
index c7de31e..4bd9447 100644
--- a/internal/impl/lazy.go
+++ b/internal/impl/lazy.go
@@ -115,7 +115,7 @@
continue
}
if err != errUnknown {
- return err
+ return wrapUnmarshalErrorForField(err, mi.Desc, f)
}
}
n := protowire.ConsumeFieldValue(num, wtyp, b)
@@ -356,7 +356,8 @@
}
if err != nil {
if err != errUnknown {
- return out, err
+ return out, wrapUnmarshalErrorForField(err, mi.Desc, f)
+
}
n = protowire.ConsumeFieldValue(num, wtyp, b)
if n < 0 {
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Congratulations on opening your first change. Thank you for your contribution!
Next steps:
A maintainer will review your change and provide feedback. See
https://go.dev/doc/contribute#review for more info and tips to get your
patch through code review.
Most changes in the Go project go through a few rounds of revision. This can be
surprising to people new to the project. The careful, iterative review process
is our way of helping mentor contributors and ensuring that their contributions
have a lasting impact.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Thank you for your contribution!
return b, wrapMarshalErrorForField(err, mi.Desc, f)By changing `impl.MessageInfo.marshalAppendPointer`, you are only updating the fast path, but not the slow path (similar for unmarshaling).
I think you need to change `proto.MarshalOptions.marshal()` instead, which is the central point for marshaling.
There, you’ll only have access to the message name, not the individual field, but I think wrapping should happen in two places: the code that generates the field-related error should include the field’s name, and the message name is included in `proto.MarshalOptions.marshal()`
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
return out, wrapUnmarshalErrorForField(err, mi.Desc, f)This is going to annotate every error produced during decoding a field.
Most decode errors are produced by passing the wrong data to the decoder. For example, passing a text-format data rather than a wire-format to Unmarshal. Since the protobuf wire format has very little in the way of internal consistency checks, ascribing this error to a specific field is likely to be misleading.
(See also Joe's comment here: https://github.com/golang/protobuf/issues/1228#issuecomment-714798874)
While it may be reasonable to mention specific fields in UTF-8 validation errors, I suspect that doing so for other errors will add significantly more confusing than it will solve.
return fmt.Errorf("failed to unmarshal message %q: %w", desc.Fields().Get(int(f.num)).FullName(), err)All errors returned in the protobuf module should be constructed by the internal/errors package, which adds a common error prefix among other things.
return fmt.Errorf("failed to unmarshal message %q: %w", desc.Fields().Get(int(f.num)).FullName(), err)This change should include a test which demonstrates the exact text of the new errors.
return fmt.Errorf("failed to marshal message %q: %w", desc.Fields().Get(int(f.num)).FullName(), err)All errors returned in the protobuf module should be constructed by the internal/errors package, which adds a common error prefix among other things.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Thank you for your comments. I think I addressed all of them
This is going to annotate every error produced during decoding a field.
Most decode errors are produced by passing the wrong data to the decoder. For example, passing a text-format data rather than a wire-format to Unmarshal. Since the protobuf wire format has very little in the way of internal consistency checks, ascribing this error to a specific field is likely to be misleading.
(See also Joe's comment here: https://github.com/golang/protobuf/issues/1228#issuecomment-714798874)
While it may be reasonable to mention specific fields in UTF-8 validation errors, I suspect that doing so for other errors will add significantly more confusing than it will solve.
Acknowledged
return fmt.Errorf("failed to unmarshal message %q: %w", desc.Fields().Get(int(f.num)).FullName(), err)This change should include a test which demonstrates the exact text of the new errors.
I thought about it but I'm not sure how to achieve it. The perfect place will be TestDecodeInvalidMessages but currently error string is not checked there. I thought about extending testInvalidMessages with expectedError string field but the slice is used in multiple tests with different expected errors so we need something that is closed to the test. Another aproach could be a slice of expected error messages but it looks quite ugly to me as it is coupled with testInvalidMessages. What will be the best approach?
return fmt.Errorf("failed to unmarshal message %q: %w", desc.Fields().Get(int(f.num)).FullName(), err)All errors returned in the protobuf module should be constructed by the internal/errors package, which adds a common error prefix among other things.
Done
return b, wrapMarshalErrorForField(err, mi.Desc, f)By changing `impl.MessageInfo.marshalAppendPointer`, you are only updating the fast path, but not the slow path (similar for unmarshaling).
I think you need to change `proto.MarshalOptions.marshal()` instead, which is the central point for marshaling.
There, you’ll only have access to the message name, not the individual field, but I think wrapping should happen in two places: the code that generates the field-related error should include the field’s name, and the message name is included in `proto.MarshalOptions.marshal()`
Done
return fmt.Errorf("failed to marshal message %q: %w", desc.Fields().Get(int(f.num)).FullName(), err)All errors returned in the protobuf module should be constructed by the internal/errors package, which adds a common error prefix among other things.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
if goerrors.Is(err, errDecode) {Why errors.Is? When will errDecode be wrapped?
errors.Is is more expensive than a simple equality check, and protobuf decoding is highly performance-sensitive. If we're using the more expensive check, there should be a reason for it.
The motivation for this change is to annotate UTF-8 validation failures. What errors other than errDecode and are UTF-8 validation failures appear here, and will now be annotated? Should this check for a list of errors that should be annotated instead?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Fixed, only utf-8 errors will be wrapped in decode
Why errors.Is? When will errDecode be wrapped?
errors.Is is more expensive than a simple equality check, and protobuf decoding is highly performance-sensitive. If we're using the more expensive check, there should be a reason for it.
The motivation for this change is to annotate UTF-8 validation failures. What errors other than errDecode and are UTF-8 validation failures appear here, and will now be annotated? Should this check for a list of errors that should be annotated instead?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
return b, wrapMarshalErrorForField(err, mi.Desc, f)Tomasz JaniszewskiBy changing `impl.MessageInfo.marshalAppendPointer`, you are only updating the fast path, but not the slow path (similar for unmarshaling).
I think you need to change `proto.MarshalOptions.marshal()` instead, which is the central point for marshaling.
There, you’ll only have access to the message name, not the individual field, but I think wrapping should happen in two places: the code that generates the field-related error should include the field’s name, and the message name is included in `proto.MarshalOptions.marshal()`
Done
I don’t think this is done…? Your change still modifies files in internal/impl/, but no files in proto/ (aside from tests). As a specific example, using dynamicpb would still result in the old error behavior. Similarly, have you run `./test.bash`? It tests the slow paths by running tests with the protoreflect build tag.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
return b, wrapMarshalErrorForField(err, mi.Desc, f)Tomasz JaniszewskiBy changing `impl.MessageInfo.marshalAppendPointer`, you are only updating the fast path, but not the slow path (similar for unmarshaling).
I think you need to change `proto.MarshalOptions.marshal()` instead, which is the central point for marshaling.
There, you’ll only have access to the message name, not the individual field, but I think wrapping should happen in two places: the code that generates the field-related error should include the field’s name, and the message name is included in `proto.MarshalOptions.marshal()`
Michael StapelbergDone
I don’t think this is done…? Your change still modifies files in internal/impl/, but no files in proto/ (aside from tests). As a specific example, using dynamicpb would still result in the old error behavior. Similarly, have you run `./test.bash`? It tests the slow paths by running tests with the protoreflect build tag.
Thank you! I fixed tests. In `proto/` we do not need to change much as there is already `errors.InvalidUTF8` function that adds field name to error. Maybe we should move errInvalidUTF8 from internal to errors and wrap it the same way in both places to have identical behaviour but I'm not sure if this refactoring should be done in this PR.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
return out, wrapUnmarshalErrorForField(err, mi.Desc, f)Tomasz JaniszewskiThis is going to annotate every error produced during decoding a field.
Most decode errors are produced by passing the wrong data to the decoder. For example, passing a text-format data rather than a wire-format to Unmarshal. Since the protobuf wire format has very little in the way of internal consistency checks, ascribing this error to a specific field is likely to be misleading.
(See also Joe's comment here: https://github.com/golang/protobuf/issues/1228#issuecomment-714798874)
While it may be reasonable to mention specific fields in UTF-8 validation errors, I suspect that doing so for other errors will add significantly more confusing than it will solve.
Acknowledged
Unless I miss something, I think the current version of this CL is still annotating all errors with field names.
I still think this going to be never useful, and often confusing. A decode failure on an uint64 field, for example, almost certainly indicates that the input data isn't a protobuf message at all. Failing that, it almost certainly indicates that the data is corrupted in some way nonspecific to the individual field.
(The one possible scenario I can think of where field-specific reporting for integer fields might conceivably be useful is to report the case where a 64-bit field was redefined as a 32-bit one, and we're trying to decode a value that doesn't fit. I don't think I've ever seen this happen in practice.)
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
return out, wrapUnmarshalErrorForField(err, mi.Desc, f)Tomasz JaniszewskiThis is going to annotate every error produced during decoding a field.
Most decode errors are produced by passing the wrong data to the decoder. For example, passing a text-format data rather than a wire-format to Unmarshal. Since the protobuf wire format has very little in the way of internal consistency checks, ascribing this error to a specific field is likely to be misleading.
(See also Joe's comment here: https://github.com/golang/protobuf/issues/1228#issuecomment-714798874)
While it may be reasonable to mention specific fields in UTF-8 validation errors, I suspect that doing so for other errors will add significantly more confusing than it will solve.
Damien NeilAcknowledged
Unless I miss something, I think the current version of this CL is still annotating all errors with field names.
I still think this going to be never useful, and often confusing. A decode failure on an uint64 field, for example, almost certainly indicates that the input data isn't a protobuf message at all. Failing that, it almost certainly indicates that the data is corrupted in some way nonspecific to the individual field.
(The one possible scenario I can think of where field-specific reporting for integer fields might conceivably be useful is to report the case where a 64-bit field was redefined as a 32-bit one, and we're trying to decode a value that doesn't fit. I don't think I've ever seen this happen in practice.)
I think `wrapUnmarshalErrorForField` name might be misleading as there is a check for `errInvalidUTF8` error and only it will be wrapped. Please take a look at lint 252
return b, wrapMarshalErrorForField(err, mi.Desc, f)Tomasz JaniszewskiBy changing `impl.MessageInfo.marshalAppendPointer`, you are only updating the fast path, but not the slow path (similar for unmarshaling).
I think you need to change `proto.MarshalOptions.marshal()` instead, which is the central point for marshaling.
There, you’ll only have access to the message name, not the individual field, but I think wrapping should happen in two places: the code that generates the field-related error should include the field’s name, and the message name is included in `proto.MarshalOptions.marshal()`
Michael StapelbergDone
Tomasz JaniszewskiI don’t think this is done…? Your change still modifies files in internal/impl/, but no files in proto/ (aside from tests). As a specific example, using dynamicpb would still result in the old error behavior. Similarly, have you run `./test.bash`? It tests the slow paths by running tests with the protoreflect build tag.
Thank you! I fixed tests. In `proto/` we do not need to change much as there is already `errors.InvalidUTF8` function that adds field name to error. Maybe we should move errInvalidUTF8 from internal to errors and wrap it the same way in both places to have identical behaviour but I'm not sure if this refactoring should be done in this PR.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
return out, wrapUnmarshalErrorForField(err, mi.Desc, f)Tomasz JaniszewskiThis is going to annotate every error produced during decoding a field.
Most decode errors are produced by passing the wrong data to the decoder. For example, passing a text-format data rather than a wire-format to Unmarshal. Since the protobuf wire format has very little in the way of internal consistency checks, ascribing this error to a specific field is likely to be misleading.
(See also Joe's comment here: https://github.com/golang/protobuf/issues/1228#issuecomment-714798874)
While it may be reasonable to mention specific fields in UTF-8 validation errors, I suspect that doing so for other errors will add significantly more confusing than it will solve.
Damien NeilAcknowledged
Tomasz JaniszewskiUnless I miss something, I think the current version of this CL is still annotating all errors with field names.
I still think this going to be never useful, and often confusing. A decode failure on an uint64 field, for example, almost certainly indicates that the input data isn't a protobuf message at all. Failing that, it almost certainly indicates that the data is corrupted in some way nonspecific to the individual field.
(The one possible scenario I can think of where field-specific reporting for integer fields might conceivably be useful is to report the case where a 64-bit field was redefined as a 32-bit one, and we're trying to decode a value that doesn't fit. I don't think I've ever seen this happen in practice.)
I think `wrapUnmarshalErrorForField` name might be misleading as there is a check for `errInvalidUTF8` error and only it will be wrapped. Please take a look at lint 252
Ahh, yes, I did misread this. Apologies. This only wraps UTF-8 decoding errors.
return out, errHow about we add the message type here, which will cover both fast-path and slow-path?
return out, errors.Wrap(err, "failed to unmarshal message %q", m.Descriptor().FullName())
return out, checkInitialized(m)...and also add "failed to unmarshal message" here as well for consistency.
return errors.New("failed to unmarshal message %q: cannot parse invalid wire-format data", name)This says "message %q", but its inconsistent whether this is being passed a message name or a field name.
How about we leave errDecode as-is, and add the error annotation in unmarshal (see comment above)?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
How about we add the message type here, which will cover both fast-path and slow-path?
return out, errors.Wrap(err, "failed to unmarshal message %q", m.Descriptor().FullName())
Acknowledged
return out, checkInitialized(m)...and also add "failed to unmarshal message" here as well for consistency.
I think it should be `invalid message` as it's not about marshaling but verifying if required fields are set.
return errors.New("failed to unmarshal message %q: cannot parse invalid wire-format data", name)This says "message %q", but its inconsistent whether this is being passed a message name or a field name.
How about we leave errDecode as-is, and add the error annotation in unmarshal (see comment above)?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
return out, checkInitialized(m)Tomasz Janiszewski...and also add "failed to unmarshal message" here as well for consistency.
I think it should be `invalid message` as it's not about marshaling but verifying if required fields are set.
Acknowledged
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
I’ll run this change through our Google-internal test suite and report back once I have the results…
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
The fuzz tests are still failing. Can you update your change with a fix please?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
I’ll run this change through our Google-internal test suite and report back once I have the results…
We have about 10 failing test targets (total) in various projects that need to be fixed before we can accept this change into the repository.
Unfortunately, I cannot prioritize this any time soon.
If any Google employee is reading this, take a look at cl/781882427 and reach out if you want to help get this change in.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
The fuzz tests are still failing. Can you update your change with a fix please?
Fixed. The failures were because we compare different message types. I think unwrapping twice should solve the issue although it looks a bit weird.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
I’ll run this change through our Google-internal test suite and report back once I have the results…
We have about 10 failing test targets (total) in various projects that need to be fixed before we can accept this change into the repository.
Unfortunately, I cannot prioritize this any time soon.
If any Google employee is reading this, take a look at cl/781882427 and reach out if you want to help get this change in.
Acknowledged
nit: move this import down to the other google.golang.org imports
Done
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
"google.golang.org/protobuf/internal/errors"This import should be in the other import paragraph.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |