net/netip: avoid allocation in Addr and AddrPort UnmarshalText
Addr.UnmarshalText and AddrPort.UnmarshalText currently convert the
input []byte to a string, causing an unnecessary heap allocation.
Use unsafe.String to create a string view backed by the input []byte,
then parse it with a new parseAddrParts helper that returns the
address and zone separately. The zone string is cloned via
strings.Clone to ensure it does not alias the caller's input buffer.
Similarly, introduce parseAddrPortParts to share the split/parse
logic between ParseAddrPort and AddrPort.UnmarshalText.
│ old │ new │
│ sec/op │ sec/op vs base │
AddrUnmarshalText/v4-10 15.77n ± 1% 10.09n ± 1% -36.00% (p=0.000 n=10)
AddrUnmarshalText/v6-10 25.86n ± 1% 17.06n ± 0% -34.04% (p=0.000 n=10)
AddrUnmarshalText/v6_zone-10 39.02n ± 2% 32.18n ± 1% -17.52% (p=0.000 n=10)
AddrPortUnmarshalText/v4-10 25.38n ± 0% 18.82n ± 1% -25.85% (p=0.000 n=10)
AddrPortUnmarshalText/v6-10 36.65n ± 0% 25.78n ± 1% -29.67% (p=0.000 n=10)
AddrPortUnmarshalText/v6_zone-10 50.09n ± 1% 41.54n ± 0% -17.06% (p=0.000 n=10)
geomean 30.08n 21.94n -27.06%
│ old │ new │
│ B/op │ B/op vs base │
AddrUnmarshalText/v4-10 8.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10)
AddrUnmarshalText/v6-10 16.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10)
AddrUnmarshalText/v6_zone-10 16.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10)
AddrPortUnmarshalText/v4-10 16.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10)
AddrPortUnmarshalText/v6-10 24.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10)
AddrPortUnmarshalText/v6_zone-10 24.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10)
Fixes #77095
diff --git a/src/net/netip/netip.go b/src/net/netip/netip.go
index 908d292..ee5351f 100644
--- a/src/net/netip/netip.go
+++ b/src/net/netip/netip.go
@@ -18,7 +18,9 @@
"internal/byteorder"
"math"
"strconv"
+ "strings"
"unique"
+ "unsafe"
)
// Sizes: (64-bit)
@@ -112,19 +114,31 @@
// s can be in dotted decimal ("192.0.2.1"), IPv6 ("2001:db8::68"),
// or IPv6 with a scoped addressing zone ("fe80::1cc0:3e8c:119f:c2e1%ens18").
func ParseAddr(s string) (Addr, error) {
+ ip, zone, err := parseAddrParts(s)
+ if err != nil {
+ return Addr{}, err
+ }
+ return ip.WithZone(zone), nil
+}
+
+func parseAddrParts(s string) (Addr, string, error) {
for i := 0; i < len(s); i++ {
switch s[i] {
case '.':
- return parseIPv4(s)
+ ip, err := parseIPv4(s)
+ if err != nil {
+ return Addr{}, "", err
+ }
+ return ip, "", nil
case ':':
- return parseIPv6(s)
+ return parseIPv6Parts(s)
case '%':
// Assume that this was trying to be an IPv6 address with
// a zone specifier, but the address is missing.
- return Addr{}, parseAddrError{in: s, msg: "missing IPv6 address"}
+ return Addr{}, "", parseAddrError{in: s, msg: "missing IPv6 address"}
}
}
- return Addr{}, parseAddrError{in: s, msg: "unable to parse IP"}
+ return Addr{}, "", parseAddrError{in: s, msg: "unable to parse IP"}
}
// MustParseAddr calls [ParseAddr](s) and panics on error.
@@ -203,6 +217,16 @@
// parseIPv6 parses s as an IPv6 address (in form "2001:db8::68").
func parseIPv6(in string) (Addr, error) {
+ ip, zone, err := parseIPv6Parts(in)
+ if err != nil {
+ return Addr{}, err
+ }
+ return ip.WithZone(zone), nil
+}
+
+// parseIPv6Parts parses in as an IPv6 address, splitting out the optional zone.
+// It returns an Addr without a zone and the parsed zone string.
+func parseIPv6Parts(in string) (Addr, string, error) {
s := in
// Split off the zone right from the start. Yes it's a second scan
@@ -215,7 +239,7 @@
s, zone = s[:i], s[i+1:]
if zone == "" {
// Not allowed to have an empty zone if explicitly specified.
- return Addr{}, parseAddrError{in: in, msg: "zone must be a non-empty string"}
+ return Addr{}, "", parseAddrError{in: in, msg: "zone must be a non-empty string"}
}
}
@@ -228,7 +252,7 @@
s = s[2:]
// Might be only ellipsis
if len(s) == 0 {
- return IPv6Unspecified().WithZone(zone), nil
+ return IPv6Unspecified(), zone, nil
}
}
@@ -252,27 +276,27 @@
}
if off > 3 {
//more than 4 digits in group, fail.
- return Addr{}, parseAddrError{in: in, msg: "each group must have 4 or less digits", at: s}
+ return Addr{}, "", parseAddrError{in: in, msg: "each group must have 4 or less digits", at: s}
}
if acc > math.MaxUint16 {
// Overflow, fail.
- return Addr{}, parseAddrError{in: in, msg: "IPv6 field has value >=2^16", at: s}
+ return Addr{}, "", parseAddrError{in: in, msg: "IPv6 field has value >=2^16", at: s}
}
}
if off == 0 {
// No digits found, fail.
- return Addr{}, parseAddrError{in: in, msg: "each colon-separated field must have at least one digit", at: s}
+ return Addr{}, "", parseAddrError{in: in, msg: "each colon-separated field must have at least one digit", at: s}
}
// If followed by dot, might be in trailing IPv4.
if off < len(s) && s[off] == '.' {
if ellipsis < 0 && i != 12 {
// Not the right place.
- return Addr{}, parseAddrError{in: in, msg: "embedded IPv4 address must replace the final 2 fields of the address", at: s}
+ return Addr{}, "", parseAddrError{in: in, msg: "embedded IPv4 address must replace the final 2 fields of the address", at: s}
}
if i+4 > 16 {
// Not enough room.
- return Addr{}, parseAddrError{in: in, msg: "too many hex fields to fit an embedded IPv4 at the end of the address", at: s}
+ return Addr{}, "", parseAddrError{in: in, msg: "too many hex fields to fit an embedded IPv4 at the end of the address", at: s}
}
end := len(in)
@@ -281,7 +305,7 @@
}
err := parseIPv4Fields(in, end-len(s), end, ip[i:i+4])
if err != nil {
- return Addr{}, err
+ return Addr{}, "", err
}
s = ""
i += 4
@@ -301,16 +325,16 @@
// Otherwise must be followed by colon and more.
if s[0] != ':' {
- return Addr{}, parseAddrError{in: in, msg: "unexpected character, want colon", at: s}
+ return Addr{}, "", parseAddrError{in: in, msg: "unexpected character, want colon", at: s}
} else if len(s) == 1 {
- return Addr{}, parseAddrError{in: in, msg: "colon must be followed by more characters", at: s}
+ return Addr{}, "", parseAddrError{in: in, msg: "colon must be followed by more characters", at: s}
}
s = s[1:]
// Look for ellipsis.
if s[0] == ':' {
if ellipsis >= 0 { // already have one
- return Addr{}, parseAddrError{in: in, msg: "multiple :: in address", at: s}
+ return Addr{}, "", parseAddrError{in: in, msg: "multiple :: in address", at: s}
}
ellipsis = i
s = s[1:]
@@ -322,13 +346,13 @@
// Must have used entire string.
if len(s) != 0 {
- return Addr{}, parseAddrError{in: in, msg: "trailing garbage after address", at: s}
+ return Addr{}, "", parseAddrError{in: in, msg: "trailing garbage after address", at: s}
}
// If didn't parse enough, expand ellipsis.
if i < 16 {
if ellipsis < 0 {
- return Addr{}, parseAddrError{in: in, msg: "address string too short"}
+ return Addr{}, "", parseAddrError{in: in, msg: "address string too short"}
}
n := 16 - i
for j := i - 1; j >= ellipsis; j-- {
@@ -337,9 +361,9 @@
clear(ip[ellipsis : ellipsis+n])
} else if ellipsis >= 0 {
// Ellipsis must represent at least one 0 group.
- return Addr{}, parseAddrError{in: in, msg: "the :: must expand to at least one field of zeros"}
+ return Addr{}, "", parseAddrError{in: in, msg: "the :: must expand to at least one field of zeros"}
}
- return AddrFrom16(ip).WithZone(zone), nil
+ return AddrFrom16(ip), zone, nil
}
// AddrFromSlice parses the 4- or 16-byte byte slice as an IPv4 or IPv6 address.
@@ -1005,6 +1029,15 @@
*ip = Addr{}
return nil
}
+ s := unsafe.String(unsafe.SliceData(text), len(text))
+ if parsed, zone, err := parseAddrParts(s); err == nil {
+ if zone != "" {
+ zone = strings.Clone(zone)
+ parsed = parsed.WithZone(zone)
+ }
+ *ip = parsed
+ return nil
+ }
var err error
*ip, err = ParseAddr(string(text))
return err
@@ -1109,31 +1142,39 @@
return ip, port, v6, nil
}
+func parseAddrPortParts(s string) (ip Addr, zone string, port uint16, v6 bool, err error) {
+ ipStr, portStr, v6, err := splitAddrPort(s)
+ if err != nil {
+ return Addr{}, "", 0, false, err
+ }
+ port16, err := strconv.ParseUint(portStr, 10, 16)
+ if err != nil {
+ return Addr{}, "", 0, false, errors.New("invalid port " + strconv.Quote(portStr) + " parsing " + strconv.Quote(s))
+ }
+ ip, zone, err = parseAddrParts(ipStr)
+ if err != nil {
+ return Addr{}, "", 0, false, err
+ }
+ if v6 && ip.Is4() {
+ return Addr{}, "", 0, false, errors.New("invalid ip:port " + strconv.Quote(s) + ", square brackets can only be used with IPv6 addresses")
+ }
+ if !v6 && ip.Is6() {
+ return Addr{}, "", 0, false, errors.New("invalid ip:port " + strconv.Quote(s) + ", IPv6 addresses must be surrounded by square brackets")
+ }
+ return ip, zone, uint16(port16), v6, nil
+}
+
// ParseAddrPort parses s as an [AddrPort].
//
// It doesn't do any name resolution: both the address and the port
// must be numeric.
func ParseAddrPort(s string) (AddrPort, error) {
- var ipp AddrPort
- ip, port, v6, err := splitAddrPort(s)
- if err != nil {
- return ipp, err
- }
- port16, err := strconv.ParseUint(port, 10, 16)
- if err != nil {
- return ipp, errors.New("invalid port " + strconv.Quote(port) + " parsing " + strconv.Quote(s))
- }
- ipp.port = uint16(port16)
- ipp.ip, err = ParseAddr(ip)
+ ip, zone, port, _, err := parseAddrPortParts(s)
if err != nil {
return AddrPort{}, err
}
- if v6 && ipp.ip.Is4() {
- return AddrPort{}, errors.New("invalid ip:port " + strconv.Quote(s) + ", square brackets can only be used with IPv6 addresses")
- } else if !v6 && ipp.ip.Is6() {
- return AddrPort{}, errors.New("invalid ip:port " + strconv.Quote(s) + ", IPv6 addresses must be surrounded by square brackets")
- }
- return ipp, nil
+ ip = ip.WithZone(zone)
+ return AddrPortFrom(ip, port), nil
}
// MustParseAddrPort calls [ParseAddrPort](s) and panics on error.
@@ -1242,6 +1283,15 @@
*p = AddrPort{}
return nil
}
+ s := unsafe.String(unsafe.SliceData(text), len(text))
+ if parsedIP, zone, port, _, err := parseAddrPortParts(s); err == nil {
+ if zone != "" {
+ zone = strings.Clone(zone)
+ parsedIP = parsedIP.WithZone(zone)
+ }
+ *p = AddrPortFrom(parsedIP, port)
+ return nil
+ }
var err error
*p, err = ParseAddrPort(string(text))
return err
diff --git a/src/net/netip/netip_test.go b/src/net/netip/netip_test.go
index f5ce31d..71121d3 100644
--- a/src/net/netip/netip_test.go
+++ b/src/net/netip/netip_test.go
@@ -1434,6 +1434,46 @@
}
}
+func TestAddrUnmarshalTextZoneCopiesInput(t *testing.T) {
+ text := []byte("fe80::1%eth0")
+ var ip Addr
+ if err := ip.UnmarshalText(text); err != nil {
+ t.Fatal(err)
+ }
+ if got := ip.Zone(); got != "eth0" {
+ t.Fatalf("Zone() = %q, want %q", got, "eth0")
+ }
+
+ i := bytes.IndexByte(text, '%')
+ if i < 0 || i+1 >= len(text) {
+ t.Fatal("bad test input")
+ }
+ text[i+1] = 'x'
+ if got := ip.Zone(); got != "eth0" {
+ t.Fatalf("Zone() after input mutation = %q, want %q", got, "eth0")
+ }
+}
+
+func TestAddrPortUnmarshalTextZoneCopiesInput(t *testing.T) {
+ text := []byte("[fe80::1%eth0]:443")
+ var p AddrPort
+ if err := p.UnmarshalText(text); err != nil {
+ t.Fatal(err)
+ }
+ if got := p.Addr().Zone(); got != "eth0" {
+ t.Fatalf("Addr().Zone() = %q, want %q", got, "eth0")
+ }
+
+ i := bytes.IndexByte(text, '%')
+ if i < 0 || i+1 >= len(text) {
+ t.Fatal("bad test input")
+ }
+ text[i+1] = 'x'
+ if got := p.Addr().Zone(); got != "eth0" {
+ t.Fatalf("Addr().Zone() after input mutation = %q, want %q", got, "eth0")
+ }
+}
+
func TestIs4AndIs6(t *testing.T) {
tests := []struct {
ip Addr
@@ -1900,6 +1940,28 @@
{"v6_zone", "1:2::ffff:192.168.140.255%eth1"},
}
+var unmarshalTextBenchInputs = []struct {
+ name string
+ addr []byte
+ addrPort []byte
+}{
+ {
+ name: "v4",
+ addr: []byte("10.1.1.1"),
+ addrPort: []byte("10.1.1.1:443"),
+ },
+ {
+ name: "v6",
+ addr: []byte("2001:db8::1"),
+ addrPort: []byte("[2001:db8::1]:443"),
+ },
+ {
+ name: "v6_zone",
+ addr: []byte("fe80::1%eth0"),
+ addrPort: []byte("[fe80::1%eth0]:443"),
+ },
+}
+
func BenchmarkParseAddr(b *testing.B) {
sinkInternValue = unique.Make(MakeAddrDetail(true, "eth1")) // Pin to not benchmark the intern package
for _, test := range parseBenchInputs {
@@ -1959,6 +2021,22 @@
}
}
+func BenchmarkAddrUnmarshalText(b *testing.B) {
+ for _, test := range unmarshalTextBenchInputs {
+ text := test.addr
+ b.Run(test.name, func(b *testing.B) {
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ var ip Addr
+ if err := ip.UnmarshalText(text); err != nil {
+ b.Fatal(err)
+ }
+ sinkIP = ip
+ }
+ })
+ }
+}
+
func BenchmarkAddrPortString(b *testing.B) {
for _, test := range parseBenchInputs {
ip := MustParseAddr(test.ip)
@@ -1985,6 +2063,22 @@
}
}
+func BenchmarkAddrPortUnmarshalText(b *testing.B) {
+ for _, test := range unmarshalTextBenchInputs {
+ text := test.addrPort
+ b.Run(test.name, func(b *testing.B) {
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ var ipp AddrPort
+ if err := ipp.UnmarshalText(text); err != nil {
+ b.Fatal(err)
+ }
+ sinkAddrPort = ipp
+ }
+ })
+ }
+}
+
func BenchmarkPrefixMasking(b *testing.B) {
tests := []struct {
name string
@@ -2201,6 +2295,11 @@
sinkString string
sinkBytes []byte
sinkUDPAddr = &net.UDPAddr{IP: make(net.IP, 0, 16)}
+
+ testAddrUnmarshalTextV4 = []byte("1.2.3.4")
+ testAddrUnmarshalTextV6 = []byte("2001:db8::1")
+ testAddrPortUnmarshalTextV4 = []byte("1.2.3.4:1234")
+ testAddrPortUnmarshalTextV6 = []byte("[2001:db8::1]:1234")
)
func TestNoAllocs(t *testing.T) {
@@ -2284,11 +2383,39 @@
test("Addr.As4", func() { sinkIP4 = MustParseAddr("1.2.3.4").As4() })
test("Addr.Next", func() { sinkIP = MustParseAddr("1.2.3.4").Next() })
test("Addr.Prev", func() { sinkIP = MustParseAddr("1.2.3.4").Prev() })
+ test("Addr.UnmarshalText/4", func() {
+ var ip Addr
+ if err := ip.UnmarshalText(testAddrUnmarshalTextV4); err != nil {
+ panic(err)
+ }
+ sinkIP = ip
+ })
+ test("Addr.UnmarshalText/6", func() {
+ var ip Addr
+ if err := ip.UnmarshalText(testAddrUnmarshalTextV6); err != nil {
+ panic(err)
+ }
+ sinkIP = ip
+ })
// AddrPort constructors
test("AddrPortFrom", func() { sinkAddrPort = AddrPortFrom(IPv4(1, 2, 3, 4), 22) })
test("ParseAddrPort", func() { sinkAddrPort = panicIPP(ParseAddrPort("[::1]:1234")) })
test("MustParseAddrPort", func() { sinkAddrPort = MustParseAddrPort("[::1]:1234") })
+ test("AddrPort.UnmarshalText/4", func() {
+ var ipp AddrPort
+ if err := ipp.UnmarshalText(testAddrPortUnmarshalTextV4); err != nil {
+ panic(err)
+ }
+ sinkAddrPort = ipp
+ })
+ test("AddrPort.UnmarshalText/6", func() {
+ var ipp AddrPort
+ if err := ipp.UnmarshalText(testAddrPortUnmarshalTextV6); err != nil {
+ panic(err)
+ }
+ sinkAddrPort = ipp
+ })
// Prefix constructors
test("PrefixFrom", func() { sinkPrefix = PrefixFrom(IPv4(1, 2, 3, 4), 32) })
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Hold | +1 |
return Addr{}, "", parseAddrError{in: s, msg: "missing IPv6 address"}This is returning s as part of parseAddrError, but s can now be an unsafe alias to a []byte.
s := unsafe.String(unsafe.SliceData(text), len(text))I believe the compiler can generally avoid an allocation when converting a []byte to a string as a function parameter, so long as the value does not escape. For example, `strconv.ParseInt(string(byteSlice), 10, 16)` does not allocate.
If ParseAddr can avoid escaping the string, then I don't think there should be a need to use unsafe here. If ParseAddr escapes the string, then the unsafe conversion is indeed unsafe.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Ack. Done. Thanks for catching the aliasing risk!
In Patchset 2, I completely removed unsafe while maintaining the 0 allocs/op performance:
Eliminated Interface Boxing: Internal parsers now return the concrete parseAddrError struct instead of the error interface.
Fast/Slow Path: Evaluated string(text) on a fast-path to naturally leverage the compiler's 0-alloc string conversion, falling back to the slow-path to allocate a safe error only if parsing fails.
PTAL!
return Addr{}, "", parseAddrError{in: s, msg: "missing IPv6 address"}This is returning s as part of parseAddrError, but s can now be an unsafe alias to a []byte.
Done
I believe the compiler can generally avoid an allocation when converting a []byte to a string as a function parameter, so long as the value does not escape. For example, `strconv.ParseInt(string(byteSlice), 10, 16)` does not allocate.
If ParseAddr can avoid escaping the string, then I don't think there should be a need to use unsafe here. If ParseAddr escapes the string, then the unsafe conversion is indeed unsafe.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Hold | +1 |
// the parsed string from escaping to the heap on the fast path.Never return a concrete value of a type which implements error. Doing so makes it entirely too easy to produce https://go.dev/doc/faq#nil_error.
I believe you can get the effect you're looking for by cloning the string when constructing a parseAddrError: `return parseAddrError{in: strings.Clone(s), msg: "..."}`.
*ip, err = ParseAddr(string(text))ParseAddr just calls parseAddrParts, so UnmarshalText is now parsing twice on failure.
}Returning parseAddrError here causes ParseAddrPort("127.0.0.1") to return "ParseAddr("127.0.0.1"): not an ip:port", which is incorrect: The failing function call is ParseAddrPort, not ParseAddr.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// the parsed string from escaping to the heap on the fast path.Never return a concrete value of a type which implements error. Doing so makes it entirely too easy to produce https://go.dev/doc/faq#nil_error.
I believe you can get the effect you're looking for by cloning the string when constructing a parseAddrError: `return parseAddrError{in: strings.Clone(s), msg: "..."}`.
Done. Thanks for the brilliant suggestion! I've reverted all internal parsers to return the standard error interface.
By wrapping the inputs in strings.Clone on the slow path, it successfully breaks the escape chain. This perfectly preserves the 0 allocs/op on the fast path without risking typed nil issues.
ParseAddr just calls parseAddrParts, so UnmarshalText is now parsing twice on failure.
Done. Since parseAddrParts now returns a standard error interface, UnmarshalText simply returns the error directly, eliminating the double parsing on the slow path.
Returning parseAddrError here causes ParseAddrPort("127.0.0.1") to return "ParseAddr("127.0.0.1"): not an ip:port", which is incorrect: The failing function call is ParseAddrPort, not ParseAddr.
Done. Replaced it with standard errors.New in splitAddrPort and parseAddrPortParts so the error context remains accurate without the misleading "ParseAddr" prefix.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |