[PATCH 1/1] client: fix index-cache name differences with 0.33.x

0 views
Skip to first unread message

Rob Browning

unread,
Feb 16, 2026, 5:38:07 PM (5 days ago) Feb 16
to bup-...@googlegroups.com
Test for expected (i.e. as of 0.33.x) index cache IDs, and fix the
incompatibilities revealed. Handle the synthetic bup-rev://'s content
as a non-URL, and append a colon the way that 0.33.x does. (We may
also do away with the synthetic URL soon.)

Retore cache ID mangling (e.g. replacing characters like : with _).

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---

Proposed for main.

lib/bup/client.py | 49 ++++++++++++++++++++---------------------
test/int/test_client.py | 29 +++++++++++++++++++++++-
2 files changed, 52 insertions(+), 26 deletions(-)

diff --git a/lib/bup/client.py b/lib/bup/client.py
index d6358a0d..4634289e 100644
--- a/lib/bup/client.py
+++ b/lib/bup/client.py
@@ -125,30 +125,33 @@ _url_rx = re.compile(br'%s(?:%s%s)?%s' % (_protocol_rs, _host_rs, _port_rs, _pat
re.I)

def parse_remote(remote):
+ def parse_non_url(remote):
+ rs = remote.split(b':', 1)
+ if len(rs) == 1 or rs[0] in (b'', b'-'):
+ return b'file', None, None, rs[-1]
+ else:
+ return b'ssh', rs[0], None, rs[1]
assert remote is not None
+ if remote and remote.startswith(b'bup-rev://'):
+ parts = parse_non_url(remote[len(b'bup-rev://'):] + b':')
+ return (b'bup-rev',) + parts[1:]
url_match = _url_rx.match(remote)
if url_match:
- # Backward compatibility: version of bup prior to this patch
- # passed "hostname:" to parse_remote, which wasn't url_match
- # and thus went into the else, where the ssh version was then
- # returned, and thus the dir (last component) was the empty
- # string instead of None from the regex.
- # This empty string was then put into the name of the index-
- # cache directory, so we need to preserve that to avoid the
- # index-cache being in a different location after updates.
- if url_match.group(1) == b'bup-rev':
- if url_match.group(5) is None:
- return url_match.group(1, 3, 4) + (b'', )
- elif not url_match.group(1) in (b'ssh', b'bup', b'file'):
+ if url_match.group(1) not in (b'ssh', b'bup', b'file'):
raise ClientError('unexpected protocol: %s'
% url_match.group(1).decode('ascii'))
return url_match.group(1,3,4,5)
- else:
- rs = remote.split(b':', 1)
- if len(rs) == 1 or rs[0] in (b'', b'-'):
- return b'file', None, None, rs[-1]
- else:
- return b'ssh', rs[0], None, rs[1]
+ return parse_non_url(remote)
+
+
+def _legacy_cache_id(remote):
+ scheme, host, port, path = parse_remote(remote)
+ # The b'None' here matches python2's behavior of b'%s' % None == 'None',
+ # python3 will (as of version 3.7.5) do the same for str ('%s' % None),
+ # but crashes instead when doing b'%s' % None.
+ return re.sub(br'[^@\w]', b'_',
+ b':'.join((b'None' if host is None else host,
+ b'None' if path is None else path)))


class Client:
@@ -242,7 +245,7 @@ class Client:
closing(self._sockw):
pass

- def _prep_cache(self, host, port, path):
+ def _prep_cache(self, remote):
# Set up the index-cache directory, prefer repo-id derived
# dirs when the remote repo has one (that can be accessed).
repo_id = None
@@ -251,11 +254,7 @@ class Client:
repo_id = self.config_get(b'bup.repo.id')
except PermissionError:
pass
- # The b'None' here matches python2's behavior of b'%s' % None == 'None',
- # python3 will (as of version 3.7.5) do the same for str ('%s' % None),
- # but crashes instead when doing b'%s' % None.
- legacy = index_cache(b':'.join((b'None' if host is None else host,
- b'None' if path is None else path)))
+ legacy = index_cache(_legacy_cache_id(remote))
if repo_id is None:
return legacy
# legacy ids can't include -, so avoid aliasing with an id--
@@ -294,7 +293,7 @@ class Client:
else:
self.conn.write(b'set-dir %s\n' % self.dir)
self.check_ok()
- self.cachedir = self._prep_cache(self.host, self.port, self.dir)
+ self.cachedir = self._prep_cache(remote)
self.sync_indexes()
ctx.pop_all()
self.closed = False
diff --git a/test/int/test_client.py b/test/int/test_client.py
index 71ae03ae..8a41b1cf 100644
--- a/test/int/test_client.py
+++ b/test/int/test_client.py
@@ -179,7 +179,7 @@ def test_remote_parsing():
(b'bup://[ff:fe::1]/bup', (b'bup', b'ff:fe::1', None, b'/bup')),
(b'bup://[ff:fe::1]/bup', (b'bup', b'ff:fe::1', None, b'/bup')),
(b'bup-rev://', (b'bup-rev', None, None, b'')),
- (b'bup-rev://host/dir', (b'bup-rev', b'host', None, b'/dir')),
+ (b'bup-rev://host/dir', (b'bup-rev', b'host/dir', None, b'')),
)
for remote, values in tests:
assert client.parse_remote(remote) == values
@@ -187,6 +187,33 @@ def test_remote_parsing():
with pytest.raises(client.ClientError):
client.parse_remote(b'http://asdf.com/bup')

+
+def test_legacy_cache_ids():
+ def cid(reverse, remote):
+ if reverse: # see derive_repo_addr
+ assert not remote, remote
+ return client._legacy_cache_id(b'bup-rev://' + reverse)
+ return client._legacy_cache_id(remote)
+ with pytest.raises(AssertionError):
+ assert cid(b'x', b'y')
+ with pytest.raises(AssertionError):
+ assert cid(None, None)
+ # remotes
+ assert cid(None, b'') == b'None_'
+ assert cid(None, b':') == b'None_'
+ assert cid(None, b'-') == b'None__'
+ assert cid(None, b'h:') == b'h_'
+ assert cid(None, b':p') == b'None_p'
+ # reverses
+ with pytest.raises(AssertionError): # currently same as None, None case
+ assert cid(b'', None)
+ assert cid(b':', None) == b'None__'
+ assert cid(b'-', None) == b'None_'
+ assert cid(b'x:', None) == b'x__'
+ assert cid(b':x', None) == b'None_x_'
+ assert cid(b'xy', None) == b'xy_'
+
+
def test_config(tmpdir):
environ[b'BUP_DIR'] = bupdir = tmpdir
environ[b'GIT_DIR'] = bupdir = tmpdir
--
2.47.3

Johannes Berg

unread,
Feb 17, 2026, 2:34:40 AM (5 days ago) Feb 17
to Rob Browning, bup-...@googlegroups.com
On Mon, 2026-02-16 at 16:37 -0600, Rob Browning wrote:
> Test for expected (i.e. as of 0.33.x) index cache IDs, and fix the
> incompatibilities revealed. Handle the synthetic bup-rev://'s content
> as a non-URL, and append a colon the way that 0.33.x does. (We may
> also do away with the synthetic URL soon.)
>
> Retore cache ID mangling (e.g. replacing characters like : with _).

^ Typo, "Restore"

johannes

Rob Browning

unread,
Feb 17, 2026, 10:06:23 PM (4 days ago) Feb 17
to bup-...@googlegroups.com
Test for expected (i.e. as of 0.33.x) index cache IDs, and fix the
incompatibilities revealed. Handle the synthetic bup-rev://'s content
as a non-URL, and append a colon the way that 0.33.x does. (We may
also do away with the synthetic URL soon.)

Restore cache ID mangling (e.g. replacing characters like : with _).

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---

Fixed commit message typo, added some test cases and added some
comments to the existing test cases.

e.g.
file:x is actually a path named "file:x"
ssh://h treats h as the path
...

lib/bup/client.py | 49 ++++++++++++++++++++--------------------
test/int/test_client.py | 50 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 73 insertions(+), 26 deletions(-)
index 71ae03ae..c98f0a2a 100644
--- a/test/int/test_client.py
+++ b/test/int/test_client.py
@@ -179,7 +179,7 @@ def test_remote_parsing():
(b'bup://[ff:fe::1]/bup', (b'bup', b'ff:fe::1', None, b'/bup')),
(b'bup://[ff:fe::1]/bup', (b'bup', b'ff:fe::1', None, b'/bup')),
(b'bup-rev://', (b'bup-rev', None, None, b'')),
- (b'bup-rev://host/dir', (b'bup-rev', b'host', None, b'/dir')),
+ (b'bup-rev://host/dir', (b'bup-rev', b'host/dir', None, b'')),
)
for remote, values in tests:
assert client.parse_remote(remote) == values
@@ -187,6 +187,54 @@ def test_remote_parsing():
with pytest.raises(client.ClientError):
client.parse_remote(b'http://asdf.com/bup')

+
+def test_legacy_cache_ids():
+ # Now that we prefer the repo-id, if you add one, and add a
+ # repo-id to new repositories, this should only matter for legacy
+ # repositories. If we get this wrong (inadvertently change legacy
+ # id), then the client will create a duplicate index-cache.
+
+ def cid(reverse, remote):
+ if reverse: # see derive_repo_addr
+ assert not remote, remote
+ return client._legacy_cache_id(b'bup-rev://' + reverse)
+ return client._legacy_cache_id(remote)
+ with pytest.raises(AssertionError):
+ assert cid(b'x', b'y')
+ with pytest.raises(AssertionError):
+ cid(None, None)
+ # remotes
+ assert cid(None, b'') == b'None_'
+ assert cid(None, b':') == b'None_'
+ assert cid(None, b'-') == b'None__'
+ assert cid(None, b'p') == b'None_p'
+ assert cid(None, b'h:') == b'h_'
+ assert cid(None, b':p') == b'None_p'
+ assert cid(None, b'h:p') == b'h_p'
+ # FIXME: document unusual -r behavior if we're not going to change it, e.g.
+ # file:p means a file named "file:p"
+ # file://p means a "file" with path b'' and host b'p'
+ assert cid(None, b'file:p') == b'file_p'
+ assert cid(None, b'file:/p') == b'file__p'
+ assert cid(None, b'file://p') == b'p_None' # bug if not rejected elsewhere?
+ assert cid(None, b'file:///p') == b'None__p'
+ # bup: takes the same path as ssh:
+ assert cid(None, b'ssh:/h') == b'ssh__h' # bug if not rejected elsewhere?
+ assert cid(None, b'ssh:/h/p') == b'ssh__h_p' # bug if not rejected elsewhere?
+ assert cid(None, b'ssh://h') == b'h_None'
+ assert cid(None, b'ssh://h/p') == b'h__p'
+
+ # reverses - note that on__server always sets BUP_SERVER_REVERSE
+ # to the hostname so most of these cases *should* be irrelevant.
+ with pytest.raises(AssertionError):
+ cid(b'', None)
+ assert cid(b':', None) == b'None__'
+ assert cid(b'-', None) == b'None_'
+ assert cid(b'x:', None) == b'x__'
+ assert cid(b':x', None) == b'None_x_'
+ assert cid(b'xy', None) == b'xy_' # perhaps only "real" case
Reply all
Reply to author
Forward
0 new messages