[go] time: optimize Parse for RFC3339 and RFC3339Nano

170 views
Skip to first unread message

Gopher Robot (Gerrit)

unread,
Sep 13, 2022, 1:36:04 PM9/13/22
to Joseph Tsai, goph...@pubsubhelper.golang.org, golang-...@googlegroups.com, Jenny Rakoczy, Michael Knyszek, Rob Pike, Amarjeet Anand, Joe Tsai, Russ Cox, golang-co...@googlegroups.com

Gopher Robot submitted this change.

View Change



9 is the latest approved patch-set.
The change was submitted with unreviewed changes in the following files:

```
The name of the file: src/time/format.go
Insertions: 4, Deletions: 4.

@@ -1046,20 +1046,20 @@
func parseRFC3339(s string, local *Location) (Time, bool) {
// parseUint parses s as an unsigned decimal integer and
// verifies that it is within some range.
- // If it is invalid or out-of-range, it sets ok to false and
- // returns 1 (a valid value to avoid panic when calling daysIn).
+ // If it is invalid or out-of-range,
+ // it sets ok to false and returns the min value.
ok := true
parseUint := func(s string, min, max int) (x int) {
for _, c := range []byte(s) {
if c < '0' || '9' < c {
ok = false
- return 1
+ return min
}
x = x*10 + int(c) - '0'
}
if x < min || max < x {
ok = false
- return 1
+ return min
}
return x
}
```

Approvals: Joseph Tsai: Run TryBots; Automatically submit change Gopher Robot: TryBots succeeded Michael Knyszek: Looks good to me, but someone else must approve Jenny Rakoczy: Looks good to me, but someone else must approve Rob Pike: Looks good to me, approved
time: optimize Parse for RFC3339 and RFC3339Nano

RFC 3339 is the most common time representation,
being used in an overwhelming 57.3% of all specified formats,
while the next competitor only holds 7.5% usage.
Specially optimize parsing to handle the RFC 3339 format.
To reduce the complexity of error checking,
parseRFC3339 simply returns a bool indicating parsing success.
It leaves error handling to the general parse path.

To assist in fuzzing, the internal parse function was left unmodified
so that we could test that parseRFC3339 and parse agree with each other.

Performance:

name old time/op new time/op delta
ParseRFC3339UTC 112ns ± 1% 37ns ± 1% -67.37% (p=0.000 n=9+9)
ParseRFC3339TZ 259ns ± 2% 67ns ± 1% -73.92% (p=0.000 n=10+9)

Credit goes to Amarjeet Anand for a prior CL attemping to optimize this.
See CL 425014.

Fixes #54093

Change-Id: I14f4e8c52b092d44ceef6863f261842ed7e83f4c
Reviewed-on: https://go-review.googlesource.com/c/go/+/425197
Reviewed-by: Rob Pike <r...@golang.org>
Run-TryBot: Joseph Tsai <joe...@digital-static.net>
Reviewed-by: Michael Knyszek <mkny...@google.com>
TryBot-Result: Gopher Robot <go...@golang.org>
Auto-Submit: Joseph Tsai <joe...@digital-static.net>
Reviewed-by: Jenny Rakoczy <je...@golang.org>
---
M src/time/export_test.go
M src/time/format.go
M src/time/format_test.go
M src/time/time_test.go
4 files changed, 227 insertions(+), 31 deletions(-)

diff --git a/src/time/export_test.go b/src/time/export_test.go
index afe1560..b75d06c 100644
--- a/src/time/export_test.go
+++ b/src/time/export_test.go
@@ -135,3 +135,5 @@

var AppendFormatAny = Time.appendFormat
var AppendFormatRFC3339 = Time.appendFormatRFC3339
+var ParseAny = parse
+var ParseRFC3339 = parseRFC3339
diff --git a/src/time/format.go b/src/time/format.go
index 6d5da32..afb130c 100644
--- a/src/time/format.go
+++ b/src/time/format.go
@@ -618,6 +618,7 @@
// AppendFormat is like Format but appends the textual
// representation to b and returns the extended buffer.
func (t Time) AppendFormat(b []byte, layout string) []byte {
+ // Optimize for RFC3339 as it accounts for over half of all representations.
switch layout {
case RFC3339:
return t.appendFormatRFC3339(b, false)
@@ -1018,6 +1019,12 @@
// differ by the actual zone offset. To avoid such problems, prefer time layouts
// that use a numeric zone offset, or use ParseInLocation.
func Parse(layout, value string) (Time, error) {
+ // Optimize for RFC3339 as it accounts for over half of all representations.
+ if layout == RFC3339 || layout == RFC3339Nano {
+ if t, ok := parseRFC3339(value, Local); ok {
+ return t, nil
+ }
+ }
return parse(layout, value, UTC, Local)
}

@@ -1027,9 +1034,88 @@
// Second, when given a zone offset or abbreviation, Parse tries to match it
// against the Local location; ParseInLocation uses the given location.
func ParseInLocation(layout, value string, loc *Location) (Time, error) {
+ // Optimize for RFC3339 as it accounts for over half of all representations.
+ if layout == RFC3339 || layout == RFC3339Nano {
+ if t, ok := parseRFC3339(value, loc); ok {
+ return t, nil
+ }
+ }
return parse(layout, value, loc, loc)
}

+func parseRFC3339(s string, local *Location) (Time, bool) {
+ // parseUint parses s as an unsigned decimal integer and
+ // verifies that it is within some range.
+ // If it is invalid or out-of-range,
+ // it sets ok to false and returns the min value.
+ ok := true
+ parseUint := func(s string, min, max int) (x int) {
+ for _, c := range []byte(s) {
+ if c < '0' || '9' < c {
+ ok = false
+ return min
+ }
+ x = x*10 + int(c) - '0'
+ }
+ if x < min || max < x {
+ ok = false
+ return min
+ }
+ return x
+ }
+
+ // Parse the date and time.
+ if len(s) < len("2006-01-02T15:04:05") {
+ return Time{}, false
+ }
+ year := parseUint(s[0:4], 0, 9999) // e.g., 2006
+ month := parseUint(s[5:7], 1, 12) // e.g., 01
+ day := parseUint(s[8:10], 1, daysIn(Month(month), year)) // e.g., 02
+ hour := parseUint(s[11:13], 0, 23) // e.g., 15
+ min := parseUint(s[14:16], 0, 59) // e.g., 04
+ sec := parseUint(s[17:19], 0, 59) // e.g., 05
+ if !ok || !(s[4] == '-' && s[7] == '-' && s[10] == 'T' && s[13] == ':' && s[16] == ':') {
+ return Time{}, false
+ }
+ s = s[19:]
+
+ // Parse the fractional second.
+ var nsec int
+ if len(s) >= 2 && s[0] == '.' && isDigit(s, 1) {
+ n := 2
+ for ; n < len(s) && isDigit(s, n); n++ {
+ }
+ nsec, _, _ = parseNanoseconds(s, n)
+ s = s[n:]
+ }
+
+ // Parse the time zone.
+ t := Date(year, Month(month), day, hour, min, sec, nsec, UTC)
+ if s != "Z" {
+ if len(s) != len("-07:00") {
+ return Time{}, false
+ }
+ hr := parseUint(s[1:3], 0, 23) // e.g., 07
+ mm := parseUint(s[4:6], 0, 59) // e.g., 00
+ if !ok || !((s[0] == '-' || s[0] == '+') && s[3] == ':') {
+ return Time{}, false
+ }
+ zoneOffset := (hr*60 + mm) * 60
+ if s[0] == '-' {
+ zoneOffset *= -1
+ }
+ t.addSec(-int64(zoneOffset))
+
+ // Use local zone with the given offset if possible.
+ if _, offset, _, _, _ := local.lookup(t.unixSec()); offset == zoneOffset {
+ t.setLoc(local)
+ } else {
+ t.setLoc(FixedZone("", zoneOffset))
+ }
+ }
+ return t, true
+}
+
func parse(layout, value string, defaultLocation, local *Location) (Time, error) {
alayout, avalue := layout, value
rangeErrString := "" // set if a value is out of range
diff --git a/src/time/format_test.go b/src/time/format_test.go
index 4880e17..ae2dc90 100644
--- a/src/time/format_test.go
+++ b/src/time/format_test.go
@@ -871,44 +871,44 @@
}
}

+var longFractionalDigitsTests = []struct {
+ value string
+ want int
+}{
+ // 9 digits
+ {"2021-09-29T16:04:33.000000000Z", 0},
+ {"2021-09-29T16:04:33.000000001Z", 1},
+ {"2021-09-29T16:04:33.100000000Z", 100_000_000},
+ {"2021-09-29T16:04:33.100000001Z", 100_000_001},
+ {"2021-09-29T16:04:33.999999999Z", 999_999_999},
+ {"2021-09-29T16:04:33.012345678Z", 12_345_678},
+ // 10 digits, truncates
+ {"2021-09-29T16:04:33.0000000000Z", 0},
+ {"2021-09-29T16:04:33.0000000001Z", 0},
+ {"2021-09-29T16:04:33.1000000000Z", 100_000_000},
+ {"2021-09-29T16:04:33.1000000009Z", 100_000_000},
+ {"2021-09-29T16:04:33.9999999999Z", 999_999_999},
+ {"2021-09-29T16:04:33.0123456789Z", 12_345_678},
+ // 11 digits, truncates
+ {"2021-09-29T16:04:33.10000000000Z", 100_000_000},
+ {"2021-09-29T16:04:33.00123456789Z", 1_234_567},
+ // 12 digits, truncates
+ {"2021-09-29T16:04:33.000123456789Z", 123_456},
+ // 15 digits, truncates
+ {"2021-09-29T16:04:33.9999999999999999Z", 999_999_999},
+}
+
// Issue 48685 and 54567.
func TestParseFractionalSecondsLongerThanNineDigits(t *testing.T) {
- tests := []struct {
- s string
- want int
- }{
- // 9 digits
- {"2021-09-29T16:04:33.000000000Z", 0},
- {"2021-09-29T16:04:33.000000001Z", 1},
- {"2021-09-29T16:04:33.100000000Z", 100_000_000},
- {"2021-09-29T16:04:33.100000001Z", 100_000_001},
- {"2021-09-29T16:04:33.999999999Z", 999_999_999},
- {"2021-09-29T16:04:33.012345678Z", 12_345_678},
- // 10 digits, truncates
- {"2021-09-29T16:04:33.0000000000Z", 0},
- {"2021-09-29T16:04:33.0000000001Z", 0},
- {"2021-09-29T16:04:33.1000000000Z", 100_000_000},
- {"2021-09-29T16:04:33.1000000009Z", 100_000_000},
- {"2021-09-29T16:04:33.9999999999Z", 999_999_999},
- {"2021-09-29T16:04:33.0123456789Z", 12_345_678},
- // 11 digits, truncates
- {"2021-09-29T16:04:33.10000000000Z", 100_000_000},
- {"2021-09-29T16:04:33.00123456789Z", 1_234_567},
- // 12 digits, truncates
- {"2021-09-29T16:04:33.000123456789Z", 123_456},
- // 15 digits, truncates
- {"2021-09-29T16:04:33.9999999999999999Z", 999_999_999},
- }
-
- for _, tt := range tests {
+ for _, tt := range longFractionalDigitsTests {
for _, format := range []string{RFC3339, RFC3339Nano} {
- tm, err := Parse(format, tt.s)
+ tm, err := Parse(format, tt.value)
if err != nil {
- t.Errorf("Parse(%q, %q) error: %v", format, tt.s, err)
+ t.Errorf("Parse(%q, %q) error: %v", format, tt.value, err)
continue
}
if got := tm.Nanosecond(); got != tt.want {
- t.Errorf("Parse(%q, %q) = got %d, want %d", format, tt.s, got, tt.want)
+ t.Errorf("Parse(%q, %q) = got %d, want %d", format, tt.value, got, tt.want)
}
}
}
@@ -955,3 +955,61 @@
}
})
}
+
+func FuzzParseRFC3339(f *testing.F) {
+ for _, tt := range formatTests {
+ f.Add(tt.result)
+ }
+ for _, tt := range parseTests {
+ f.Add(tt.value)
+ }
+ for _, tt := range parseErrorTests {
+ f.Add(tt.value)
+ }
+ for _, tt := range longFractionalDigitsTests {
+ f.Add(tt.value)
+ }
+
+ f.Fuzz(func(t *testing.T, s string) {
+ // equalTime is like time.Time.Equal, but also compares the time zone.
+ equalTime := func(t1, t2 Time) bool {
+ name1, offset1 := t1.Zone()
+ name2, offset2 := t2.Zone()
+ return t1.Equal(t2) && name1 == name2 && offset1 == offset2
+ }
+
+ for _, tz := range []*Location{UTC, Local} {
+ // Parsing as RFC3339 or RFC3339Nano should be identical.
+ t1, err1 := ParseAny(RFC3339, s, UTC, tz)
+ t2, err2 := ParseAny(RFC3339Nano, s, UTC, tz)
+ switch {
+ case (err1 == nil) != (err2 == nil):
+ t.Fatalf("ParseAny(%q) error mismatch:\n\tgot: %v\n\twant: %v", s, err1, err2)
+ case !equalTime(t1, t2):
+ t.Fatalf("ParseAny(%q) value mismatch:\n\tgot: %v\n\twant: %v", s, t1, t2)
+ }
+
+ // TODO(https://go.dev/issue/54580):
+ // Remove these checks after ParseAny rejects all invalid RFC 3339.
+ if err1 == nil {
+ num2 := func(s string) byte { return 10*(s[0]-'0') + (s[1] - '0') }
+ switch {
+ case len(s) > 12 && s[12] == ':':
+ t.Skipf("ParseAny(%q) incorrectly allows single-digit hour fields", s)
+ case len(s) > 19 && s[19] == ',':
+ t.Skipf("ParseAny(%q) incorrectly allows comma as sub-second separator", s)
+ case !strings.HasSuffix(s, "Z") && len(s) > 4 && (num2(s[len(s)-5:]) >= 24 || num2(s[len(s)-2:]) >= 60):
+ t.Skipf("ParseAny(%q) incorrectly allows out-of-range zone offset", s)
+ }
+ }
+
+ // Customized parser should be identical to general parser.
+ switch got, ok := ParseRFC3339(s, tz); {
+ case ok != (err1 == nil):
+ t.Fatalf("ParseRFC3339(%q) error mismatch:\n\tgot: %v\n\twant: %v", s, ok, err1 == nil)
+ case !equalTime(got, t1):
+ t.Fatalf("ParseRFC3339(%q) value mismatch:\n\tgot: %v\n\twant: %v", s, got, t2)
+ }
+ }
+ })
+}
diff --git a/src/time/time_test.go b/src/time/time_test.go
index f2c6c39..f0fed62 100644
--- a/src/time/time_test.go
+++ b/src/time/time_test.go
@@ -1445,6 +1445,18 @@
}
}

+func BenchmarkParseRFC3339UTC(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Parse(RFC3339, "2020-08-22T11:27:43.123456789Z")
+ }
+}
+
+func BenchmarkParseRFC3339TZ(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Parse(RFC3339, "2020-08-22T11:27:43.123456789-02:00")
+ }
+}
+
func BenchmarkParseDuration(b *testing.B) {
for i := 0; i < b.N; i++ {
ParseDuration("9007199254.740993ms")

To view, visit change 425197. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: go
Gerrit-Branch: master
Gerrit-Change-Id: I14f4e8c52b092d44ceef6863f261842ed7e83f4c
Gerrit-Change-Number: 425197
Gerrit-PatchSet: 11
Gerrit-Owner: Joseph Tsai <joe...@digital-static.net>
Gerrit-Reviewer: Amarjeet Anand <amarjeeta...@gmail.com>
Gerrit-Reviewer: Gopher Robot <go...@golang.org>
Gerrit-Reviewer: Jenny Rakoczy <je...@golang.org>
Gerrit-Reviewer: Joseph Tsai <joe...@digital-static.net>
Gerrit-Reviewer: Michael Knyszek <mkny...@google.com>
Gerrit-Reviewer: Rob Pike <r...@golang.org>
Gerrit-Reviewer: Russ Cox <r...@golang.org>
Gerrit-CC: Joe Tsai <thebroke...@gmail.com>
Gerrit-MessageType: merged
Reply all
Reply to author
Forward
0 new messages