Although we'd intended to interpret paths with a leading "/./" as
relative for path-oriented URLs, the implementation went missing.
Have parse_bytes_path_url handle the interpretation, rename the
function to parse_path_oriented_url, and add tests. Improve some of
the related documentation.
Thanks to Greg Troxel for prompting some of the documentation
improvements, and Johannes Berg for help sorting out the issues.
Signed-off-by: Rob Browning <
r...@defaultvalue.org>
Tested-by: Rob Browning <
r...@defaultvalue.org>
---
Pushed to main.
Documentation/
bup-get.1.md | 2 +-
Documentation/
bup.1.md | 37 +++++++++++++++++++++++--------------
lib/bup/config.py | 4 ++--
lib/bup/repo/__init__.py | 4 ++--
lib/bup/url.py | 17 ++++++++++++++---
note/main.md | 3 ++-
test/int/test_url.py | 11 +++++++++--
7 files changed, 53 insertions(+), 25 deletions(-)
diff --git a/Documentation/
bup-get.1.md b/Documentation/
bup-get.1.md
index 514bffdd..824ea3c5 100644
--- a/Documentation/
bup-get.1.md
+++ b/Documentation/
bup-get.1.md
@@ -120,7 +120,7 @@ used to help test before/after results.)
-r, \--remote=[*user*@]*host*:[*path*], \--remote=URL
: write the *ref*s to the specified remote repository, by default
- via SSH. See bup(1) REMOTE OPTIONS for further information.
+ via SSH. See `bup`(1) REMOTE OPTIONS for further information.
-c, \--print-commits
: for each updated branch, print the new git commit id.
diff --git a/Documentation/
bup.1.md b/Documentation/
bup.1.md
index 3fa20b51..a6e22d4d 100644
--- a/Documentation/
bup.1.md
+++ b/Documentation/
bup.1.md
@@ -196,6 +196,11 @@ constituent bytes are not decoded (e.g. percent decoded). This allows
URLs provided on the command line to work naturally. So
`ssh://host/x?z` has a path of `/x?z`.
+And since URLs with an authority cannot represent relative paths,
+path-oriented schemes interpret a leading `/./` as a relative path.
+So `ssh://host/./x`, `file:///./x`, and `file:/./x` all indicate the
+path `x`.
+
`ssh:`
: A path-oriented scheme (see above) that specifies access to a
repository via a `bup-server(1)` launched on a host via SSH. This
@@ -204,25 +209,29 @@ URLs provided on the command line to work naturally. So
(e.g. `ssh://user@host:2222/some/repo`), and the user and host can
be percent encoded.
- As an extension to the standard, because URLs with an authority
- cannot specify a relative path when there's an authority, a
- leading `/./` is taken to indicate a relative path. So
- `ssh://host/./x` indicates the path `x`.
-
`bup:`
-: Specifies a direct network connection to to an existing
- `bup-server(1)`. Otherwise identical to `ssh:`, except that it
- does not support a user.
+: A path-oriented scheme (see above) specifying a direct network
+ connection to to an existing `bup-server(1)`. Otherwise identical
+ to `ssh:`, except that it does not support a user. This
+ connection has no authentication or encryption of its own so it's
+ unlikely you'll want to use it; prefer `file:` or `:ssh:`.
`file:`
: A path-oriented scheme (see above) that specifies a repository's
filesystem path. This scheme has syntax and semantics matching a
- typical `file:` URL, except that it does not allow an authority
- (i.e. user, host, etc.). So when constructing a URL from an
- arbitrary PATH, you can use `file:PATH` if the path starts with
- `///`, if it is a single character, or if the second character is
- not `/`. Otherwise use `file://PATH` after ensuring the path is
- absolute, or dot-encoding it.
+ typical `file:` URL, except that it only allows an empty authority
+ (i.e. no user, host, etc.).
+
+ In most cases, you will probably want to include the empty
+ authority so you don't have to consider the contents of the path
+ carefully, i.e. use `file://PATH` or `ssh://user@hostPATH` when
+ the `PATH` begins with a slash, and `file:///./PATH` or
+ `ssh://user@host/./PATH` when it doesn't.
+
+ It is possible to omit the authority, but only if the path does
+ not begin with two slashes. For example `file:/` and
+ `file:some/where` are fine, but `file://some/where` is not because
+ `some` will be read as the authority.
# ENVIRONMENT
diff --git a/lib/bup/config.py b/lib/bup/config.py
index f5d5bb17..3476546e 100644
--- a/lib/bup/config.py
+++ b/lib/bup/config.py
@@ -1,6 +1,6 @@
from
bup.io import path_msg as pm
-from bup.url import URL, parse_bytes_path_url
+from bup.url import URL, parse_path_oriented_url
class ConfigError(Exception):
@@ -20,7 +20,7 @@ def url_for_remote_opt(remote):
if not host:
return f'remote {pm(remote)} has no host'
return URL(scheme=b'ssh', host=host, user=user, path=path)
- url = parse_bytes_path_url(remote, require_auth=True)
+ url = parse_path_oriented_url(remote, require_auth=True)
if isinstance(url, (str, type(None))):
return parse_non_url(remote)
if url.scheme == b'bup':
diff --git a/lib/bup/repo/__init__.py b/lib/bup/repo/__init__.py
index bef4dfbc..284cee45 100644
--- a/lib/bup/repo/__init__.py
+++ b/lib/bup/repo/__init__.py
@@ -9,7 +9,7 @@ from
bup.io import path_msg as pm
from bup.path import defaultrepo
from bup.repo.local import LocalRepo
from bup.repo.remote import RemoteRepo
-from bup.url import URL, parse_bytes_path_url
+from bup.url import URL, parse_path_oriented_url
public_schemes = frozenset([b'file', b'ssh', b'bup'])
@@ -69,7 +69,7 @@ def repo_for_location(location, **kwargs):
def parse_repo_url_arg(arg, val, misuse):
"""Call misuse(err_msg) if val is not a valid URL, otherwise
return a corresponding URL instance."""
- res = parse_bytes_path_url(val)
+ res = parse_path_oriented_url(val)
if not res:
misuse(f'invalid {arg} {pm(val)}')
if isinstance(res, str):
diff --git a/lib/bup/url.py b/lib/bup/url.py
index 71188aba..1b7a7ab8 100644
--- a/lib/bup/url.py
+++ b/lib/bup/url.py
@@ -96,7 +96,7 @@ _port_int_rx = re.compile(br'[0-9]+')
class ParseError(Exception): pass
-def parse_bytes_path_url(url, require_auth=False):
+def parse_path_oriented_url(url, require_auth=False):
"""Parse URL mostly according to RFC 3986. Return None if it
doesn't appear to be a URL at all (or doesn't start with a scheme
and authority when require_auth is true). Return a string
@@ -110,6 +110,10 @@ def parse_bytes_path_url(url, require_auth=False):
Parse the rest of the URL mostly according to the RFC, including
percent decoding the host and user.
+ Treat any path starting with /./ as a relative path,
+ i.e. "ssh://h/./x" produces path "x", so that relative paths can
+ be represented even when there's an authority.
+
RFC 3986 Uniform Resource Identifier (URI): Generic Syntax
https://datatracker.ietf.org/doc/html/rfc3986
@@ -126,9 +130,16 @@ def parse_bytes_path_url(url, require_auth=False):
if not slashes: # no authority (not even an empty one) x:... not x://...
if require_auth:
return None
- return URL(scheme=scheme, path=rest)
+ path = rest
+ if path.startswith(b'/./'):
+ path = path[3:].lstrip(b'/')
+ return URL(scheme=scheme, path=path)
auth, slash, path = rest.partition(b'/')
- if slash: path = b'/' + path
+ if slash:
+ if path.startswith(b'./'):
+ path = path[2:].lstrip(b'/')
+ else:
+ path = b'/' + path
if not auth: # Use a subprocess for testing
return URL(scheme=scheme, path=path)
m = _userinfo_host_port_rx.fullmatch(auth)
diff --git a/note/main.md b/note/main.md
index e406447e..d8517cf7 100644
--- a/note/main.md
+++ b/note/main.md
@@ -163,7 +163,8 @@ General
"repair files". See `bup-get`(1) for additional information.
* `bup get` has added a `-S, --source-url URL` option which can
- specify a remote source repository. This should usually be
+ specify a local or remote source repository (as compared to
+ `-s/--source` which only allows a path). This should usually be
preferred to `bup on HOST get` since it requires less trust in the
remote, and avoids the need for a remote index-cache. See
`bup-get(1)` for additional information.
diff --git a/test/int/test_url.py b/test/int/test_url.py
index b958eb63..c5b672f9 100644
--- a/test/int/test_url.py
+++ b/test/int/test_url.py
@@ -57,8 +57,8 @@ def test_render_url():
assert rdot(urlx(host=b'h', path=b'p')) == b'x://h/./p'
# FIXME: more negative tests
-def test_parse_bytes_path_url():
- parse = bup.url.parse_bytes_path_url
+def test_parse_path_oriented_url():
+ parse = bup.url.parse_path_oriented_url
assert parse(b'x:', require_auth=True) is None
assert parse(b'x:/', require_auth=True) is None
@@ -69,6 +69,13 @@ def test_parse_bytes_path_url():
assert parse(b'-:') is None
assert parse(b'x://h:x') == 'invalid host h:x'
+ # dot-encoded relative paths
+ assert parse(b'x:/./p') == URL(scheme=b'x', path=b'p')
+ assert parse(b'x:///./p') == URL(scheme=b'x', path=b'p')
+ assert parse(b'x:///.//p') == URL(scheme=b'x', path=b'p')
+ assert parse(b'x://h/./p') == URL(scheme=b'x', host=b'h', path=b'p')
+ assert parse(b'x://h/.//p') == URL(scheme=b'x', host=b'h', path=b'p')
+
# Test the second rendered form for URLs with two options (other
# is in semetric_cases above).
assert parse(b'x://') == URL(scheme=b'x') # i.e. x:
--
2.47.3