diff --git a/src/net/url/gen_table.go b/src/net/url/gen_table.go
new file mode 100644
index 0000000..67990d1
--- /dev/null
+++ b/src/net/url/gen_table.go
@@ -0,0 +1,216 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+package main
+
+import (
+ "bytes"
+ "fmt"
+ "go/format"
+ "io"
+ "log"
+ "maps"
+ "os"
+ "slices"
+)
+
+const filename = "table.go"
+
+func main() {
+ var out bytes.Buffer
+ fmt.Fprintln(&out, "// Code generated from gen_table.go using 'go generate'; DO NOT EDIT.")
+ fmt.Fprintln(&out)
+ fmt.Fprintln(&out, "// Copyright 2025 The Go Authors. All rights reserved.")
+ fmt.Fprintln(&out, "// Use of this source code is governed by a BSD-style")
+ fmt.Fprintln(&out, "// license that can be found in the LICENSE file.")
+ fmt.Fprintln(&out)
+ fmt.Fprintln(&out, "package url")
+ fmt.Fprintln(&out)
+ generateTable(&out)
+
+ formatted, err := format.Source(out.Bytes())
+ if err != nil {
+ log.Fatal("format:", err)
+ }
+
+ err = os.WriteFile(filename, formatted, 0644)
+ if err != nil {
+ log.Fatal("WriteFile:", err)
+ }
+}
+
+func generateTable(w io.Writer) {
+ fmt.Fprintln(w, "var table = [256]encoding{")
+
+ // Sort the encodings (in decreasing order) to guarantee a stable output.
+ sortedEncs := slices.Sorted(maps.Keys(encNames))
+ slices.Reverse(sortedEncs)
+
+ for i := range 256 {
+ c := byte(i)
+ var lineBuf bytes.Buffer
+
+ // Write key to line buffer.
+ var keyFormat string
+ if '!' <= c && c <= '~' {
+ if c == '\'' || c == '\\' {
+ keyFormat = "'\\%c'"
+ } else {
+ keyFormat = "%q"
+ }
+ } else {
+ keyFormat = "%d"
+ }
+ fmt.Fprintf(&lineBuf, keyFormat, c)
+
+ lineBuf.WriteByte(':')
+
+ // Write value to line buffer.
+ blankVal := true
+ if ishex(c) {
+ // Set the hexChar bit if this char is hexadecimal.
+ lineBuf.WriteString(hexCharName)
+ blankVal = false
+ }
+ for _, enc := range sortedEncs {
+ if !shouldEscape(c, enc) {
+ if !blankVal {
+ lineBuf.WriteByte('|')
+ }
+ // Set this encoding mode's bit if this char should NOT be
+ // escaped.
+ name := encNames[enc]
+ lineBuf.WriteString(name)
+ blankVal = false
+ }
+ }
+
+ if !blankVal {
+ lineBuf.WriteString(",\n")
+ w.Write(lineBuf.Bytes())
+ }
+ }
+ fmt.Fprintln(w, "}")
+}
+
+// Keep in sync with the definition in url.go.
+type encoding uint8
+
+// Keep in sync with the definition in url.go.
+const (
+ encodePath encoding = 1 << iota
+ encodePathSegment
+ encodeHost
+ encodeZone
+ encodeUserPassword
+ encodeQueryComponent
+ encodeFragment
+
+ // hexChar is actually NOT an encoding mode, but there are only seven
+ // encoding modes. We might as well abuse the otherwise unused most
+ // significant bit in uint8 to indicate whether a character is
+ // hexadecimal.
+ hexChar
+)
+
+// Keep in sync with the definitions in url.go.
+var encNames = map[encoding]string{
+ encodePath: "encodePath",
+ encodePathSegment: "encodePathSegment",
+ encodeHost: "encodeHost",
+ encodeZone: "encodeZone",
+ encodeUserPassword: "encodeUserPassword",
+ encodeQueryComponent: "encodeQueryComponent",
+ encodeFragment: "encodeFragment",
+}
+
+// Keep in sync with the definition of hexChar in url.go.
+const hexCharName = "hexChar"
+
+// Return true if the specified character should be escaped when
+// appearing in a URL string, according to RFC 3986.
+//
+// Please be informed that for now shouldEscape does not check all
+// reserved characters correctly. See golang.org/issue/5684.
+func shouldEscape(c byte, mode encoding) bool {
+ // §2.3 Unreserved characters (alphanum)
+ if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
+ return false
+ }
+ if mode == encodeHost || mode == encodeZone {
+ // §3.2.2 Host allows
+ // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+ // as part of reg-name.
+ // We add : because we include :port as part of host.
+ // We add [ ] because we include [ipv6]:port as part of host.
+ // We add < > because they're the only characters left that
+ // we could possibly allow, and Parse will reject them if we
+ // escape them (because hosts can't use %-encoding for
+ // ASCII bytes).
+ switch c {
+ case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
+ return false
+ }
+ }
+ switch c {
+ case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
+ return false
+ case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
+ // Different sections of the URL allow a few of
+ // the reserved characters to appear unescaped.
+ switch mode {
+ case encodePath: // §3.3
+ // The RFC allows : @ & = + $ but saves / ; , for assigning
+ // meaning to individual path segments. This package
+ // only manipulates the path as a whole, so we allow those
+ // last three as well. That leaves only ? to escape.
+ return c == '?'
+ case encodePathSegment: // §3.3
+ // The RFC allows : @ & = + $ but saves / ; , for assigning
+ // meaning to individual path segments.
+ return c == '/' || c == ';' || c == ',' || c == '?'
+ case encodeUserPassword: // §3.2.1
+ // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
+ // userinfo, so we must escape only '@', '/', and '?'.
+ // The parsing of userinfo treats ':' as special so we must escape
+ // that too.
+ return c == '@' || c == '/' || c == '?' || c == ':'
+ case encodeQueryComponent: // §3.4
+ // The RFC reserves (so we must escape) everything.
+ return true
+ case encodeFragment: // §4.1
+ // The RFC text is silent but the grammar allows
+ // everything, so escape nothing.
+ return false
+ }
+ }
+ if mode == encodeFragment {
+ // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
+ // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
+ // need to be escaped. To minimize potential breakage, we apply two restrictions:
+ // (1) we always escape sub-delims outside of the fragment, and (2) we always
+ // escape single quote to avoid breaking callers that had previously assumed that
+ // single quotes would be escaped. See issue #19917.
+ switch c {
+ case '!', '(', ')', '*':
+ return false
+ }
+ }
+ // Everything else must be escaped.
+ return true
+}
+
+func ishex(c byte) bool {
+ switch {
+ case '0' <= c && c <= '9':
+ return true
+ case 'a' <= c && c <= 'f':
+ return true
+ case 'A' <= c && c <= 'F':
+ return true
+ }
+ return false
+}
diff --git a/src/net/url/table.go b/src/net/url/table.go
new file mode 100644
index 0000000..b0be6a7
--- /dev/null
+++ b/src/net/url/table.go
@@ -0,0 +1,96 @@
+// Code generated from gen_table.go using 'go generate'; DO NOT EDIT.
+
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package url
+
+var table = [256]encoding{
+ '!': encodeFragment | encodeZone | encodeHost,
+ '"': encodeZone | encodeHost,
+ '$': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '&': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '\'': encodeZone | encodeHost,
+ '(': encodeFragment | encodeZone | encodeHost,
+ ')': encodeFragment | encodeZone | encodeHost,
+ '*': encodeFragment | encodeZone | encodeHost,
+ '+': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ ',': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePath,
+ '-': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '.': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '/': encodeFragment | encodePath,
+ '0': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '1': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '2': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '3': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '4': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '5': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '6': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '7': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '8': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '9': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ ':': encodeFragment | encodeZone | encodeHost | encodePathSegment | encodePath,
+ ';': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePath,
+ '<': encodeZone | encodeHost,
+ '=': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '>': encodeZone | encodeHost,
+ '?': encodeFragment,
+ '@': encodeFragment | encodePathSegment | encodePath,
+ 'A': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'B': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'C': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'D': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'E': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'F': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'G': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'H': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'I': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'J': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'K': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'L': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'M': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'N': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'O': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'P': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'Q': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'R': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'S': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'T': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'U': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'V': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'W': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'X': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'Y': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'Z': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '[': encodeZone | encodeHost,
+ ']': encodeZone | encodeHost,
+ '_': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'a': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'b': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'c': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'd': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'e': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'f': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'g': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'h': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'i': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'j': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'k': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'l': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'm': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'n': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'o': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'p': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'q': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'r': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 's': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 't': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'u': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'v': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'w': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'x': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'y': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'z': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '~': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+}
diff --git a/src/net/url/url.go b/src/net/url/url.go
index 6afa30f..d839f29 100644
--- a/src/net/url/url.go
+++ b/src/net/url/url.go
@@ -7,6 +7,9 @@
// See RFC 3986. This package generally follows RFC 3986, except where
// it deviates for compatibility reasons.
// RFC 6874 followed for IPv6 zone literals.
+
+//go:generate go run gen_table.go
+
package url
// When sending changes, first search old issues for history on decisions.
@@ -51,15 +54,7 @@
const upperhex = "0123456789ABCDEF"
func ishex(c byte) bool {
- switch {
- case '0' <= c && c <= '9':
- return true
- case 'a' <= c && c <= 'f':
- return true
- case 'A' <= c && c <= 'F':
- return true
- }
- return false
+ return table[c]&hexChar != 0
}
func unhex(c byte) byte {
@@ -75,16 +70,17 @@
}
}
-type encoding int
+type encoding uint8
const (
- encodePath encoding = 1 + iota
+ encodePath encoding = 1 << iota
encodePathSegment
encodeHost
encodeZone
encodeUserPassword
encodeQueryComponent
encodeFragment
+ hexChar
)
type EscapeError string
@@ -105,80 +101,7 @@
// Please be informed that for now shouldEscape does not check all
// reserved characters correctly. See golang.org/issue/5684.
func shouldEscape(c byte, mode encoding) bool {
- // §2.3 Unreserved characters (alphanum)
- if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
- return false
- }
-
- if mode == encodeHost || mode == encodeZone {
- // §3.2.2 Host allows
- // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
- // as part of reg-name.
- // We add : because we include :port as part of host.
- // We add [ ] because we include [ipv6]:port as part of host.
- // We add < > because they're the only characters left that
- // we could possibly allow, and Parse will reject them if we
- // escape them (because hosts can't use %-encoding for
- // ASCII bytes).
- switch c {
- case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
- return false
- }
- }
-
- switch c {
- case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
- return false
-
- case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
- // Different sections of the URL allow a few of
- // the reserved characters to appear unescaped.
- switch mode {
- case encodePath: // §3.3
- // The RFC allows : @ & = + $ but saves / ; , for assigning
- // meaning to individual path segments. This package
- // only manipulates the path as a whole, so we allow those
- // last three as well. That leaves only ? to escape.
- return c == '?'
-
- case encodePathSegment: // §3.3
- // The RFC allows : @ & = + $ but saves / ; , for assigning
- // meaning to individual path segments.
- return c == '/' || c == ';' || c == ',' || c == '?'
-
- case encodeUserPassword: // §3.2.1
- // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
- // userinfo, so we must escape only '@', '/', and '?'.
- // The parsing of userinfo treats ':' as special so we must escape
- // that too.
- return c == '@' || c == '/' || c == '?' || c == ':'
-
- case encodeQueryComponent: // §3.4
- // The RFC reserves (so we must escape) everything.
- return true
-
- case encodeFragment: // §4.1
- // The RFC text is silent but the grammar allows
- // everything, so escape nothing.
- return false
- }
- }
-
- if mode == encodeFragment {
- // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
- // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
- // need to be escaped. To minimize potential breakage, we apply two restrictions:
- // (1) we always escape sub-delims outside of the fragment, and (2) we always
- // escape single quote to avoid breaking callers that had previously assumed that
- // single quotes would be escaped. See issue #19917.
- switch c {
- case '!', '(', ')', '*':
- return false
- }
- }
-
- // Everything else must be escaped.
- return true
+ return table[c]&mode == 0
}
// QueryUnescape does the inverse transformation of [QueryEscape],